-
Notifications
You must be signed in to change notification settings - Fork 1.2k
perf(interleave): Optimize list interleave_list when child is primitive #10025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
mapleFU
wants to merge
9
commits into
apache:main
Choose a base branch
from
mapleFU:optimize-list-interleave-primitive
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 7 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
64ab2be
Trying to batch and using MultipleArrayData to optimize interleave_list
mapleFU e98e758
Handles basic
mapleFU 99761a0
Revert when not primitive
mapleFU 161d29f
Type specific optimize
mapleFU f08e703
Optimize building nulls
mapleFU 270524b
Remove some rubbish
mapleFU 03a5c15
Add optimization for null_count
mapleFU 63c76e4
Apply suggestion
mapleFU 2a619c4
Merge branch 'main' into optimize-list-interleave-primitive
mapleFU File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,6 +23,8 @@ use arrow_array::builder::{BooleanBufferBuilder, PrimitiveBuilder}; | |
| use arrow_array::cast::AsArray; | ||
| use arrow_array::types::*; | ||
| use arrow_array::*; | ||
| use arrow_buffer::bit_mask::set_bits; | ||
| use arrow_buffer::bit_util; | ||
| use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer}; | ||
| use arrow_data::ByteView; | ||
| use arrow_data::transform::MutableArrayData; | ||
|
|
@@ -373,13 +375,89 @@ fn interleave_struct( | |
| Ok(Arc::new(struct_array)) | ||
| } | ||
|
|
||
| fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>( | ||
| interleaved: &Interleave<'_, GenericListArray<O>>, | ||
| indices: &[(usize, usize)], | ||
| capacity: usize, | ||
| data_type: &DataType, | ||
| ) -> ArrayRef { | ||
| let child_arrays: Vec<&PrimitiveArray<T>> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_primitive::<T>()) | ||
| .collect(); | ||
|
|
||
| let has_child_nulls = child_arrays.iter().any(|a| a.null_count() > 0); | ||
|
|
||
| // Build values buffer by copying contiguous slices | ||
| let mut values: Vec<T::Native> = Vec::with_capacity(capacity); | ||
| for &(array, row) in indices { | ||
| let o = interleaved.arrays[array].value_offsets(); | ||
| let start = o[row].as_usize(); | ||
| let end = o[row + 1].as_usize(); | ||
| if end > start { | ||
| values.extend_from_slice(&child_arrays[array].values()[start..end]); | ||
| } | ||
| } | ||
|
|
||
| // Build null buffer. Pre-allocate with 0x00 (all null), then: | ||
| // - Sources with nulls: set_bits ORs in valid bits from source. | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
| // - Sources without nulls: set the bit range to all 1s directly. | ||
| let nulls = if has_child_nulls { | ||
| let null_byte_len = bit_util::ceil(capacity, 8); | ||
| let mut null_buf = MutableBuffer::new(null_byte_len); | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
| null_buf.resize(null_byte_len, 0); | ||
|
mapleFU marked this conversation as resolved.
Outdated
|
||
|
|
||
| let mut offset_write = 0; | ||
| let mut null_count = 0usize; | ||
| for &(array, row) in indices { | ||
| let o = interleaved.arrays[array].value_offsets(); | ||
| let start = o[row].as_usize(); | ||
| let end = o[row + 1].as_usize(); | ||
| let len = end - start; | ||
| if len > 0 { | ||
| match child_arrays[array].nulls() { | ||
| Some(null_buffer) => { | ||
| null_count += set_bits( | ||
| null_buf.as_slice_mut(), | ||
| null_buffer.validity(), | ||
| offset_write, | ||
| null_buffer.offset() + start, | ||
| len, | ||
| ); | ||
| } | ||
| None => { | ||
| // For a non-nullable source, set the bit range to all 1s directly. | ||
| let buf = null_buf.as_slice_mut(); | ||
| (offset_write..offset_write + len).for_each(|i| bit_util::set_bit(buf, i)); | ||
| } | ||
| } | ||
| } | ||
| offset_write += len; | ||
| } | ||
|
|
||
| if null_count > 0 { | ||
| let bool_buf = BooleanBuffer::new(null_buf.into(), 0, capacity); | ||
| // SAFETY: null_count is accumulated from set_bits which correctly counts unset bits | ||
| Some(unsafe { NullBuffer::new_unchecked(bool_buf, null_count) }) | ||
| } else { | ||
| None | ||
| } | ||
| } else { | ||
| None | ||
| }; | ||
|
|
||
| Arc::new(PrimitiveArray::<T>::new(values.into(), nulls).with_data_type(data_type.clone())) | ||
| } | ||
|
|
||
| fn interleave_list<O: OffsetSizeTrait>( | ||
| values: &[&dyn Array], | ||
| indices: &[(usize, usize)], | ||
| field: &FieldRef, | ||
| ) -> Result<ArrayRef, ArrowError> { | ||
| let interleaved = Interleave::<'_, GenericListArray<O>>::new(values, indices); | ||
|
|
||
| // Step 1: compute output offsets and total child capacity | ||
| let mut capacity = 0usize; | ||
| let mut offsets = Vec::with_capacity(indices.len() + 1); | ||
| offsets.push(O::from_usize(0).unwrap()); | ||
|
|
@@ -392,29 +470,46 @@ fn interleave_list<O: OffsetSizeTrait>( | |
| ); | ||
| } | ||
|
|
||
| let mut child_indices = Vec::with_capacity(capacity); | ||
| for (array, row) in indices { | ||
| let list = interleaved.arrays[*array]; | ||
| let start = list.value_offsets()[*row].as_usize(); | ||
| let end = list.value_offsets()[*row + 1].as_usize(); | ||
| child_indices.extend((start..end).map(|i| (*array, i))); | ||
| // Step 2: build child values. | ||
| macro_rules! list_primitive_helper { | ||
| ($t:ty) => { | ||
| interleave_list_primitive_child::<O, $t>( | ||
| &interleaved, | ||
| indices, | ||
| capacity, | ||
| field.data_type(), | ||
| ) | ||
| }; | ||
| } | ||
|
|
||
| let child_arrays: Vec<&dyn Array> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_ref()) | ||
| .collect(); | ||
| let child_values = downcast_primitive! { | ||
| // For primitive child types, directly copy typed value slices and null bit | ||
| // ranges, avoiding both the intermediate child_indices Vec allocation and | ||
| // MutableArrayData's function pointer indirection. | ||
| field.data_type() => (list_primitive_helper), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is just for type which could be copied fastly, for |
||
| _ => { | ||
| // For complex child types (nested lists, structs, views, dictionaries, etc.), | ||
| // use recursive interleave to benefit from type-specific optimizations. | ||
| let mut child_indices = Vec::with_capacity(capacity); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This keeps the previous code |
||
| for (array, row) in indices { | ||
| let list = interleaved.arrays[*array]; | ||
| let start = list.value_offsets()[*row].as_usize(); | ||
| let end = list.value_offsets()[*row + 1].as_usize(); | ||
| child_indices.extend((start..end).map(|i| (*array, i))); | ||
| } | ||
|
|
||
| let interleaved_values = interleave(&child_arrays, &child_indices)?; | ||
| let child_arrays: Vec<&dyn Array> = interleaved | ||
| .arrays | ||
| .iter() | ||
| .map(|list| list.values().as_ref()) | ||
| .collect(); | ||
| interleave(&child_arrays, &child_indices)? | ||
| } | ||
| }; | ||
|
|
||
| let offsets = OffsetBuffer::new(offsets.into()); | ||
| let list_array = GenericListArray::<O>::new( | ||
| field.clone(), | ||
| offsets, | ||
| interleaved_values, | ||
| interleaved.nulls, | ||
| ); | ||
| let list_array = | ||
| GenericListArray::<O>::new(field.clone(), offsets, child_values, interleaved.nulls); | ||
|
|
||
| Ok(Arc::new(list_array)) | ||
| } | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I used to uses
MutableArrayData, but it's about 15% slower than this implementation.