Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 157 additions & 1 deletion arrow-select/src/interleave.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ pub fn interleave(
DataType::Struct(fields) => interleave_struct(fields, values, indices),
DataType::List(field) => interleave_list::<i32>(values, indices, field),
DataType::LargeList(field) => interleave_list::<i64>(values, indices, field),
DataType::ListView(field) => interleave_list_view::<i32>(values, indices, field),
DataType::LargeListView(field) => interleave_list_view::<i64>(values, indices, field),
_ => interleave_fallback(values, indices)
}
}
Expand Down Expand Up @@ -411,6 +413,56 @@ fn interleave_list<O: OffsetSizeTrait>(
Ok(Arc::new(list_array))
}

fn interleave_list_view<O: OffsetSizeTrait>(
values: &[&dyn Array],
indices: &[(usize, usize)],
field: &FieldRef,
) -> Result<ArrayRef, ArrowError> {
let interleaved = Interleave::<'_, GenericListViewArray<O>>::new(values, indices);

// Collect child indices for each referenced list element and build
// new offsets/sizes that point into the interleaved child array
let mut capacity = 0usize;
let mut offsets = Vec::with_capacity(indices.len());
let mut sizes = Vec::with_capacity(indices.len());
for &(array_idx, row_idx) in indices {
let list = interleaved.arrays[array_idx];
let size = list.sizes()[row_idx].as_usize();
offsets.push(
O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
);
sizes.push(O::from_usize(size).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?);
capacity += size;
}

// Build child indices for recursive interleave of child values
let mut child_indices = Vec::with_capacity(capacity);
for &(array_idx, row_idx) in indices {
let list = interleaved.arrays[array_idx];
let start = list.offsets()[row_idx].as_usize();
let size = list.sizes()[row_idx].as_usize();
child_indices.extend((start..start + size).map(|i| (array_idx, i)));
}

let child_arrays: Vec<&dyn Array> = interleaved
.arrays
.iter()
.map(|list| list.values().as_ref())
.collect();

let interleaved_values = interleave(&child_arrays, &child_indices)?;

let list_view_array = GenericListViewArray::<O>::new(
field.clone(),
offsets.into(),
sizes.into(),
interleaved_values,
interleaved.nulls,
);

Ok(Arc::new(list_view_array))
}

/// Fallback implementation of interleave using [`MutableArrayData`]
fn interleave_fallback(
values: &[&dyn Array],
Expand Down Expand Up @@ -580,7 +632,9 @@ pub fn interleave_record_batch(
mod tests {
use super::*;
use arrow_array::Int32RunArray;
use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
use arrow_array::builder::{
GenericListBuilder, Int32Builder, Int64Builder, PrimitiveRunBuilder,
};
use arrow_array::types::Int8Type;
use arrow_schema::Field;

Expand Down Expand Up @@ -769,6 +823,61 @@ mod tests {
test_interleave_lists::<i64>();
}

fn test_interleave_list_views<O: OffsetSizeTrait>() {
// [[1, 2], null, [3]]
let mut a = GenericListBuilder::<O, _>::new(Int32Builder::new());
a.values().append_value(1);
a.values().append_value(2);
a.append(true);
a.append(false);
a.values().append_value(3);
a.append(true);
let a: GenericListViewArray<O> = a.finish().into();

// [[4], null, [5, 6, null]]
let mut b = GenericListBuilder::<O, _>::new(Int32Builder::new());
b.values().append_value(4);
b.append(true);
b.append(false);
b.values().append_value(5);
b.values().append_value(6);
b.values().append_null();
b.append(true);
let b: GenericListViewArray<O> = b.finish().into();

let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
let v = values
.as_any()
.downcast_ref::<GenericListViewArray<O>>()
.unwrap();

// [[3], null, [4], [5, 6, null], null]
let mut expected = GenericListBuilder::<O, _>::new(Int32Builder::new());
expected.values().append_value(3);
expected.append(true);
expected.append(false);
expected.values().append_value(4);
expected.append(true);
expected.values().append_value(5);
expected.values().append_value(6);
expected.values().append_null();
expected.append(true);
expected.append(false);
let expected: GenericListViewArray<O> = expected.finish().into();

assert_eq!(v, &expected);
}

#[test]
fn test_list_views() {
test_interleave_list_views::<i32>();
}

#[test]
fn test_large_list_views() {
test_interleave_list_views::<i64>();
}

#[test]
fn test_struct_without_nulls() {
let fields = Fields::from(vec![
Expand Down Expand Up @@ -1489,4 +1598,51 @@ mod tests {
Err(ArrowError::OffsetOverflowError(_))
));
}

/// Regression test to show that ListView non-native implementation,
/// which falls through to the MutableArrayData fallback, is broken.
#[test]
fn test_list_view_interleave_fallback_is_broken() {
// Array a: [[1, 2], null, [3]]
let mut builder = GenericListBuilder::<i32, _>::new(Int64Builder::new());
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
builder.append(false);
builder.values().append_value(3);
builder.append(true);
let a: ListViewArray = builder.finish().into();

// Array b: [[4], null, [5, 6, 7]]
let mut builder = GenericListBuilder::<i32, _>::new(Int64Builder::new());
builder.values().append_value(4);
builder.append(true);
builder.append(false);
builder.values().append_value(5);
builder.values().append_value(6);
builder.values().append_value(7);
builder.append(true);
let b: ListViewArray = builder.finish().into();

let indices = &[(0, 2), (1, 0), (0, 0), (1, 2)];
let result = interleave(&[&a as _, &b as _], indices).unwrap();
let result_list_view = result.as_any().downcast_ref::<ListViewArray>().unwrap();

// Build expected: [[3], [4], [1, 2], [5, 6, 7]]
let mut expected_builder = GenericListBuilder::<i32, _>::new(Int64Builder::new());
expected_builder.values().append_value(3);
expected_builder.append(true);
expected_builder.values().append_value(4);
expected_builder.append(true);
expected_builder.values().append_value(1);
expected_builder.values().append_value(2);
expected_builder.append(true);
expected_builder.values().append_value(5);
expected_builder.values().append_value(6);
expected_builder.values().append_value(7);
expected_builder.append(true);
let expected: ListViewArray = expected_builder.finish().into();

assert_eq!(&expected, result_list_view);
}
}
7 changes: 7 additions & 0 deletions arrow/benches/interleave_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ fn add_benchmark(c: &mut Criterion) {
let list_i64_no_nulls =
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42);

let list_view_i64: ListViewArray =
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.1, 0.1, 20, 42).into();
let list_view_i64_no_nulls: ListViewArray =
create_primitive_list_array_with_seed::<i32, Int64Type>(8192, 0.0, 0.0, 20, 42).into();

let cases: &[(&str, &dyn Array)] = &[
("i32(0.0)", &i32),
("i32(0.5)", &i32_opt),
Expand All @@ -143,6 +148,8 @@ fn add_benchmark(c: &mut Criterion) {
),
("list<i64>(0.1,0.1,20)", &list_i64),
("list<i64>(0.0,0.0,20)", &list_i64_no_nulls),
("list_view<i64>(0.1,0.1,20)", &list_view_i64),
("list_view<i64>(0.0,0.0,20)", &list_view_i64_no_nulls),
];

for (prefix, base) in cases {
Expand Down
Loading