Skip to content

Commit 03a5c15

Browse files
committed
Add optimization for null_count
1 parent 270524b commit 03a5c15

1 file changed

Lines changed: 18 additions & 6 deletions

File tree

arrow-select/src/interleave.rs

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
379379
interleaved: &Interleave<'_, GenericListArray<O>>,
380380
indices: &[(usize, usize)],
381381
capacity: usize,
382+
data_type: &DataType,
382383
) -> ArrayRef {
383384
let child_arrays: Vec<&PrimitiveArray<T>> = interleaved
384385
.arrays
@@ -408,6 +409,7 @@ fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
408409
null_buf.resize(null_byte_len, 0);
409410

410411
let mut offset_write = 0;
412+
let mut null_count = 0usize;
411413
for &(array, row) in indices {
412414
let o = interleaved.arrays[array].value_offsets();
413415
let start = o[row].as_usize();
@@ -416,7 +418,7 @@ fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
416418
if len > 0 {
417419
match child_arrays[array].nulls() {
418420
Some(null_buffer) => {
419-
set_bits(
421+
null_count += set_bits(
420422
null_buf.as_slice_mut(),
421423
null_buffer.validity(),
422424
offset_write,
@@ -425,7 +427,7 @@ fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
425427
);
426428
}
427429
None => {
428-
// Slow path. For a non-nullable source, set the bit range to all 1s directly.
430+
// For a non-nullable source, set the bit range to all 1s directly.
429431
let buf = null_buf.as_slice_mut();
430432
(offset_write..offset_write + len).for_each(|i| bit_util::set_bit(buf, i));
431433
}
@@ -434,13 +436,18 @@ fn interleave_list_primitive_child<O: OffsetSizeTrait, T: ArrowPrimitiveType>(
434436
offset_write += len;
435437
}
436438

437-
let bool_buf = BooleanBuffer::new(null_buf.into(), 0, capacity);
438-
Some(NullBuffer::new(bool_buf))
439+
if null_count > 0 {
440+
let bool_buf = BooleanBuffer::new(null_buf.into(), 0, capacity);
441+
// SAFETY: null_count is accumulated from set_bits which correctly counts unset bits
442+
Some(unsafe { NullBuffer::new_unchecked(bool_buf, null_count) })
443+
} else {
444+
None
445+
}
439446
} else {
440447
None
441448
};
442449

443-
Arc::new(PrimitiveArray::<T>::new(values.into(), nulls))
450+
Arc::new(PrimitiveArray::<T>::new(values.into(), nulls).with_data_type(data_type.clone()))
444451
}
445452

446453
fn interleave_list<O: OffsetSizeTrait>(
@@ -466,7 +473,12 @@ fn interleave_list<O: OffsetSizeTrait>(
466473
// Step 2: build child values.
467474
macro_rules! list_primitive_helper {
468475
($t:ty) => {
469-
interleave_list_primitive_child::<O, $t>(&interleaved, indices, capacity)
476+
interleave_list_primitive_child::<O, $t>(
477+
&interleaved,
478+
indices,
479+
capacity,
480+
field.data_type(),
481+
)
470482
};
471483
}
472484

0 commit comments

Comments
 (0)