Skip to content

Commit 1d874fa

Browse files
jorgecarleitaoalamb
authored andcommitted
ARROW-11005: [Rust] Remove indirection from take kernel
This PR is a small cleanup of the `take` kernel. 1. replaces `&Arc<Array>` by `&Array`. This is a small change in the API, but makes it more obvious that the only requirements for `take` is implementing the trait `Array`. 2. internal concrete implementations of `take` are no longer `dyn`, and instead expect the corresponding array types and return the corresponding array types. 3. Clarified in the documentation that not performing bound checks can lead to undefined behavior. 4. Added equality for `FixedSizeListArray`, that was missing. The rational for this PR is that it makes it more obvious the design of the module: each array type has its own implementation (e.g. `take_primitive<T>(array: &PrimitiveArray<T>, indices: ...) -> PrimitiveArray<T>`), and there is one `dyn` implementation, `take(array: &Array, indices: ...) -> ArrayRef`, that uses each type-specific implementation and wraps it on an `Arc<Array>` for the `dyn` behavior. This is mostly for simplicity and readability, but we could expose these methods publicly. Closes #8985 from jorgecarleitao/simpler_take Authored-by: Jorge C. Leitao <[email protected]> Signed-off-by: Andrew Lamb <[email protected]>
1 parent 53a36f5 commit 1d874fa

File tree

7 files changed

+186
-163
lines changed

7 files changed

+186
-163
lines changed

rust/arrow/benches/take_kernels.rs

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ use criterion::Criterion;
2222
use rand::distributions::{Alphanumeric, Distribution, Standard};
2323
use rand::Rng;
2424

25-
use std::sync::Arc;
26-
2725
extern crate arrow;
2826

2927
use arrow::array::*;
@@ -32,36 +30,32 @@ use arrow::datatypes::*;
3230
use arrow::util::test_util::seedable_rng;
3331

3432
// cast array from specified primitive array type to desired data type
35-
fn create_primitive<T>(size: usize) -> ArrayRef
33+
fn create_primitive<T>(size: usize) -> PrimitiveArray<T>
3634
where
3735
T: ArrowPrimitiveType,
3836
Standard: Distribution<T::Native>,
3937
PrimitiveArray<T>: std::convert::From<Vec<T::Native>>,
4038
{
41-
let array: PrimitiveArray<T> = seedable_rng()
39+
seedable_rng()
4240
.sample_iter(&Standard)
4341
.take(size)
4442
.map(Some)
45-
.collect();
46-
47-
Arc::new(array) as ArrayRef
43+
.collect()
4844
}
4945

5046
// cast array from specified primitive array type to desired data type
51-
fn create_boolean(size: usize) -> ArrayRef
47+
fn create_boolean(size: usize) -> BooleanArray
5248
where
5349
Standard: Distribution<bool>,
5450
{
55-
let array: BooleanArray = seedable_rng()
51+
seedable_rng()
5652
.sample_iter(&Standard)
5753
.take(size)
5854
.map(Some)
59-
.collect();
60-
61-
Arc::new(array) as ArrayRef
55+
.collect()
6256
}
6357

64-
fn create_strings(size: usize, null_density: f32) -> ArrayRef {
58+
fn create_strings(size: usize, null_density: f32) -> StringArray {
6559
let rng = &mut seedable_rng();
6660

6761
let mut builder = StringBuilder::new(size);
@@ -74,7 +68,7 @@ fn create_strings(size: usize, null_density: f32) -> ArrayRef {
7468
builder.append_null().unwrap()
7569
}
7670
}
77-
Arc::new(builder.finish())
71+
builder.finish()
7872
}
7973

8074
fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
@@ -91,8 +85,8 @@ fn create_random_index(size: usize, null_density: f32) -> UInt32Array {
9185
builder.finish()
9286
}
9387

94-
fn bench_take(values: &ArrayRef, indices: &UInt32Array) {
95-
criterion::black_box(take(&values, &indices, None).unwrap());
88+
fn bench_take(values: &dyn Array, indices: &UInt32Array) {
89+
criterion::black_box(take(values, &indices, None).unwrap());
9690
}
9791

9892
fn add_benchmark(c: &mut Criterion) {

rust/arrow/src/array/equal/mod.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
2222
use super::{
2323
Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray,
24-
FixedSizeBinaryArray, GenericBinaryArray, GenericListArray, GenericStringArray,
25-
NullArray, OffsetSizeTrait, PrimitiveArray, StringOffsetSizeTrait, StructArray,
24+
FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
25+
GenericStringArray, NullArray, OffsetSizeTrait, PrimitiveArray,
26+
StringOffsetSizeTrait, StructArray,
2627
};
2728

2829
use crate::{
@@ -116,6 +117,12 @@ impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
116117
}
117118
}
118119

120+
impl PartialEq for FixedSizeListArray {
121+
fn eq(&self, other: &Self) -> bool {
122+
equal(self.data().as_ref(), other.data().as_ref())
123+
}
124+
}
125+
119126
impl PartialEq for StructArray {
120127
fn eq(&self, other: &Self) -> bool {
121128
equal(self.data().as_ref(), other.data().as_ref())

rust/arrow/src/compute/kernels/cast.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1123,7 +1123,7 @@ where
11231123
)
11241124
})?;
11251125

1126-
take(&cast_dict_values, u32_indicies, None)
1126+
take(cast_dict_values.as_ref(), u32_indicies, None)
11271127
}
11281128

11291129
/// Attempts to encode an array into an `ArrayDictionary` with index

rust/arrow/src/compute/kernels/sort.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ use TimeUnit::*;
3838
///
3939
pub fn sort(values: &ArrayRef, options: Option<SortOptions>) -> Result<ArrayRef> {
4040
let indices = sort_to_indices(values, options)?;
41-
take(values, &indices, None)
41+
take(values.as_ref(), &indices, None)
4242
}
4343

4444
// partition indices into non-NaN and NaN
@@ -626,7 +626,7 @@ pub fn lexsort(columns: &[SortColumn]) -> Result<Vec<ArrayRef>> {
626626
let indices = lexsort_to_indices(columns)?;
627627
columns
628628
.iter()
629-
.map(|c| take(&c.values, &indices, None))
629+
.map(|c| take(c.values.as_ref(), &indices, None))
630630
.collect()
631631
}
632632

0 commit comments

Comments
 (0)