Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 39 additions & 9 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ use arrow_select::take::take;
use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive};

pub use decimal::{DecimalCast, rescale_decimal};
pub use string::cast_single_string_to_boolean_default;

/// CastOptions provides a way to override the default cast behaviors
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
Expand Down Expand Up @@ -2464,7 +2465,7 @@ where
R::Native: NumCast,
{
from.try_unary(|value| {
num_traits::cast::cast::<T::Native, R::Native>(value).ok_or_else(|| {
num_cast::<T::Native, R::Native>(value).ok_or_else(|| {
ArrowError::CastError(format!(
"Can't cast value {:?} to type {}",
value,
Expand All @@ -2474,6 +2475,17 @@ where
})
}

/// Natural cast between numeric types
/// Return None if the input `value` can't be casted to type `O`.
#[inline]
pub fn num_cast<I, O>(value: I) -> Option<O>
where
I: NumCast,
O: NumCast,
{
num_traits::cast::cast::<I, O>(value)
}

// Natural cast between numeric types
// If the value of T can't be casted to R, it will be converted to null
fn numeric_cast<T, R>(from: &PrimitiveArray<T>) -> PrimitiveArray<R>
Expand All @@ -2483,7 +2495,7 @@ where
T::Native: NumCast,
R::Native: NumCast,
{
from.unary_opt::<_, R>(num_traits::cast::cast::<T::Native, R::Native>)
from.unary_opt::<_, R>(num_cast::<T::Native, R::Native>)
}

fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>(
Expand Down Expand Up @@ -2540,16 +2552,23 @@ where
for i in 0..from.len() {
if from.is_null(i) {
b.append_null();
} else if from.value(i) != T::default_value() {
b.append_value(true);
} else {
b.append_value(false);
b.append_value(cast_num_to_bool::<T::Native>(from.value(i)));
}
}

Ok(b.finish())
}

/// Cast numeric types to boolean
#[inline]
pub fn cast_num_to_bool<I>(value: I) -> bool
where
I: Default + PartialEq,
{
value != I::default()
}

/// Cast Boolean types to numeric
///
/// `false` returns 0 while `true` returns 1
Expand All @@ -2575,11 +2594,8 @@ where
let iter = (0..from.len()).map(|i| {
if from.is_null(i) {
None
} else if from.value(i) {
// a workaround to cast a primitive to T::Native, infallible
num_traits::cast::cast(1)
} else {
Some(T::default_value())
single_bool_to_numeric::<T::Native>(from.value(i))
}
});
// Benefit:
Expand All @@ -2589,6 +2605,20 @@ where
unsafe { PrimitiveArray::<T>::from_trusted_len_iter(iter) }
}

/// Cat single bool value to numeric value.
#[inline]
pub fn single_bool_to_numeric<O>(value: bool) -> Option<O>
where
O: num_traits::NumCast + Default,
{
if value {
// a workaround to cast a primitive to type O, infallible
num_traits::cast::cast(1)
} else {
Some(O::default())
}
}

/// Helper function to cast from one `BinaryArray` or 'LargeBinaryArray' to 'FixedSizeBinaryArray'.
fn cast_binary_to_fixed_size_binary<O: OffsetSizeTrait>(
array: &dyn Array,
Expand Down
36 changes: 24 additions & 12 deletions arrow-cast/src/cast/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,25 +401,37 @@ where
let output_array = array
.iter()
.map(|value| match value {
Some(value) => match value.to_ascii_lowercase().trim() {
"t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
"f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
Ok(Some(false))
}
invalid_value => match cast_options.safe {
true => Ok(None),
false => Err(ArrowError::CastError(format!(
"Cannot cast value '{invalid_value}' to value of Boolean type",
))),
},
},
Some(value) => cast_single_string_to_boolean(value, cast_options),
None => Ok(None),
})
.collect::<Result<BooleanArray, _>>()?;

Ok(Arc::new(output_array))
}

fn cast_single_string_to_boolean(
value: &str,
cast_options: &CastOptions,
) -> Result<Option<bool>, ArrowError> {
match value.to_ascii_lowercase().trim() {
"t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
"f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => Ok(Some(false)),
invalid_value => match cast_options.safe {
true => Ok(None),
false => Err(ArrowError::CastError(format!(
"Cannot cast value '{invalid_value}' to value of Boolean type",
))),
},
}
}

/// Cast a single string to boolean with default cast option(safe=true).
pub fn cast_single_string_to_boolean_default(value: &str) -> Option<bool> {
cast_single_string_to_boolean(value, &CastOptions::default())
.ok()
.flatten()
}

pub(crate) fn cast_utf8_to_boolean<OffsetSize>(
from: &dyn Array,
cast_options: &CastOptions,
Expand Down
2 changes: 1 addition & 1 deletion parquet-variant-compute/src/shred_variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1128,7 +1128,7 @@ mod tests {
.downcast_ref::<arrow::array::Int32Array>()
.unwrap();
assert_eq!(typed_value_int32.value(0), 42);
assert!(typed_value_int32.is_null(1)); // float doesn't convert to int32
assert_eq!(typed_value_int32.value(1), 3);
assert!(typed_value_int32.is_null(2)); // string doesn't convert to int32

// Test Float64 target
Expand Down
2 changes: 2 additions & 0 deletions parquet-variant/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ edition = { workspace = true }
rust-version = { workspace = true }

[dependencies]
arrow = { workspace = true , features = ["canonical_extension_types"] }
arrow-schema = { workspace = true }
chrono = { workspace = true }
half = { version = "2.1", default-features = false }
indexmap = "2.10.0"
num-traits = { version = "0.2", default-features = false }
uuid = { version = "1.18.0", features = ["v4"]}

simdutf8 = { workspace = true , optional = true }
Expand Down
17 changes: 0 additions & 17 deletions parquet-variant/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,6 @@ pub(crate) const fn expect_size_of<T>(expected: usize) {
}
}

pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
}

/// Parse a path string into a vector of [`VariantPathElement`].
///
/// # Syntax
Expand Down Expand Up @@ -274,16 +270,3 @@ fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize)

Ok((element, end + 1))
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_fits_precision() {
assert!(fits_precision::<10>(1023));
assert!(!fits_precision::<10>(1024));
assert!(fits_precision::<10>(-1023));
assert!(!fits_precision::<10>(-1024));
}
}
Loading
Loading