Skip to content

Improve bulk quantiles #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ itertools = { version = "0.7.0", default-features = false }
indexmap = "1.0"

[dev-dependencies]
criterion = "0.2"
quickcheck = { version = "0.8.1", default-features = false }
ndarray-rand = "0.9"
approx = "0.3"
quickcheck_macros = "0.8"

[[bench]]
name = "sort"
harness = false
67 changes: 67 additions & 0 deletions benches/sort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
extern crate criterion;
extern crate ndarray;
extern crate ndarray_stats;
extern crate rand;

use criterion::{
black_box, criterion_group, criterion_main, AxisScale, BatchSize, Criterion,
ParameterizedBenchmark, PlotConfiguration,
};
use ndarray::prelude::*;
use ndarray_stats::Sort1dExt;
use rand::prelude::*;

fn get_from_sorted_mut(c: &mut Criterion) {
let lens = vec![10, 100, 1000, 10000];
let benchmark = ParameterizedBenchmark::new(
"get_from_sorted_mut",
|bencher, &len| {
let mut rng = StdRng::seed_from_u64(42);
let mut data: Vec<_> = (0..len).collect();
data.shuffle(&mut rng);
let indices: Vec<_> = (0..len).step_by(len / 10).collect();
bencher.iter_batched(
|| Array1::from(data.clone()),
|mut arr| {
for &i in &indices {
black_box(arr.get_from_sorted_mut(i));
}
},
BatchSize::SmallInput,
)
},
lens,
)
.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
c.bench("get_from_sorted_mut", benchmark);
}

fn get_many_from_sorted_mut(c: &mut Criterion) {
let lens = vec![10, 100, 1000, 10000];
let benchmark = ParameterizedBenchmark::new(
"get_many_from_sorted_mut",
|bencher, &len| {
let mut rng = StdRng::seed_from_u64(42);
let mut data: Vec<_> = (0..len).collect();
data.shuffle(&mut rng);
let indices: Vec<_> = (0..len).step_by(len / 10).collect();
bencher.iter_batched(
|| Array1::from(data.clone()),
|mut arr| {
black_box(arr.get_many_from_sorted_mut(&indices));
},
BatchSize::SmallInput,
)
},
lens,
)
.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
c.bench("get_many_from_sorted_mut", benchmark);
}

criterion_group! {
name = benches;
config = Criterion::default();
targets = get_from_sorted_mut, get_many_from_sorted_mut
}
criterion_main!(benches);
140 changes: 77 additions & 63 deletions src/sort.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use indexmap::IndexMap;
use ndarray::prelude::*;
use ndarray::{s, Data, DataMut};
use ndarray::{Data, DataMut, Slice};
use rand::prelude::*;
use rand::thread_rng;
use std::iter;

/// Methods for sorting and partitioning 1-D arrays.
pub trait Sort1dExt<A, S>
Expand Down Expand Up @@ -121,11 +122,12 @@ where
let pivot_index = rng.gen_range(0, n);
let partition_index = self.partition_mut(pivot_index);
if i < partition_index {
self.slice_mut(s![..partition_index]).get_from_sorted_mut(i)
self.slice_axis_mut(Axis(0), Slice::from(..partition_index))
.get_from_sorted_mut(i)
} else if i == partition_index {
self[i].clone()
} else {
self.slice_mut(s![partition_index + 1..])
self.slice_axis_mut(Axis(0), Slice::from(partition_index + 1..))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see the rationale for all the changes you have proposed, but I feel I am missing something here: why is slice_axis_mut preferable to slice_mut?

Copy link
Author

@jturner314 jturner314 Apr 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just slightly lower overhead, that's all. .slice_mut() goes through all the n-dimensional slicing infrastructure and in the end calls .slice_axis_inplace(); .slice_axis_mut() jumps there directly. [Edit: In theory, the compiler should be able to eliminate the extra slicing infrastructure through constant propagation and inlining, but in practice, some overhead usually remains.] The performance difference is quite small in this case (~2% in the small array tests when I tried it earlier), so if .slice_mut() seems cleaner, that's fine too.

Copy link
Owner

@LukeMathWalker LukeMathWalker Apr 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Readibility-wise, I'd say they are the same. I was just curious to understand the rationale, your laser-focused optimization eye is something I am trying to learn from your PR reviews 😛

.get_from_sorted_mut(i - (partition_index + 1))
}
}
Expand Down Expand Up @@ -183,10 +185,11 @@ where
}

/// To retrieve multiple indexes from the sorted array in an optimized fashion,
/// [get_many_from_sorted_mut] first of all sorts the `indexes` vector.
/// [get_many_from_sorted_mut] first of all sorts and deduplicates the
/// `indexes` vector.
///
/// `get_many_from_sorted_mut_unchecked` does not perform this sorting,
/// assuming that the user has already taken care of it.
/// `get_many_from_sorted_mut_unchecked` does not perform this sorting and
/// deduplication, assuming that the user has already taken care of it.
///
/// Useful when you have to call [get_many_from_sorted_mut] multiple times
/// using the same indexes.
Expand All @@ -200,85 +203,96 @@ where
A: Ord + Clone,
S: DataMut<Elem = A>,
{
// The actual routine
let values = _get_many_from_sorted_mut_unchecked(array, indexes);

// We convert the vector to a more search-friendly IndexMap
let mut result = IndexMap::new();
for (index, value) in indexes.into_iter().zip(values.into_iter()) {
result.insert(*index, value);
if indexes.is_empty() {
return IndexMap::new();
}
result

// Since `!indexes.is_empty` and indexes must be in-bounds, `array` must be
// non-empty.
let mut values: Vec<_> = iter::repeat(array[0].clone()).take(indexes.len()).collect();
_get_many_from_sorted_mut_unchecked(array.view_mut(), &mut indexes.to_owned(), &mut values);

// We convert the vector to a more search-friendly `IndexMap`.
indexes.iter().cloned().zip(values.into_iter()).collect()
}

fn _get_many_from_sorted_mut_unchecked<A, S>(
array: &mut ArrayBase<S, Ix1>,
indexes: &[usize],
) -> Vec<A>
where
/// This is the recursive portion of `get_many_from_sorted_mut_unchecked`.
///
/// `indexes` is the list of indexes to get. `indexes` is mutable so that it
/// can be used as scratch space for this routine; the value of `indexes` after
/// calling this routine should be ignored.
///
/// `values` is a pre-allocated slice to use for writing the output. Its
/// initial element values are ignored.
fn _get_many_from_sorted_mut_unchecked<A>(
mut array: ArrayViewMut1<A>,
indexes: &mut [usize],
values: &mut [A],
) where
A: Ord + Clone,
S: DataMut<Elem = A>,
{
let n = array.len();
debug_assert!(n >= indexes.len()); // because indexes must be unique and in-bounds
debug_assert_eq!(indexes.len(), values.len());

// Nothing to do in this case
if indexes.len() == 0 || n == 0 {
return vec![];
if indexes.is_empty() {
// Nothing to do in this case.
return;
}

// We can only reach this point with indexes.len() == 1
// So it's safe to return a vector with a single value
// At this point, `n >= 1` since `indexes.len() >= 1`.
if n == 1 {
let value = array[0].clone();
return vec![value];
// We can only reach this point if `indexes.len() == 1`, so we only
// need to assign the single value, and then we're done.
debug_assert_eq!(indexes.len(), 1);
values[0] = array[0].clone();
return;
}

// We pick a random pivot index: the corresponding element is the pivot value
let mut rng = thread_rng();
let pivot_index = rng.gen_range(0, n);

// We partition the array with respect to the pivot value
// The pivot value moves to `array_partition_index`
// Elements strictly smaller than the pivot value have indexes < `array_partition_index`
// Elements greater or equal to the pivot value have indexes > `array_partition_index`
// We partition the array with respect to the pivot value.
// The pivot value moves to `array_partition_index`.
// Elements strictly smaller than the pivot value have indexes < `array_partition_index`.
// Elements greater or equal to the pivot value have indexes > `array_partition_index`.
let array_partition_index = array.partition_mut(pivot_index);

// We can use a divide et impera strategy, splitting the indexes we are searching
// in two chunks with respect to array_partition_index
let index_split = indexes.binary_search(&array_partition_index);
let (smaller_indexes, bigger_indexes) = match index_split {
Ok(index_split) => (&indexes[..index_split], &indexes[(index_split + 1)..]),
Err(index_split) => (&indexes[..index_split], &indexes[index_split..]),
// We use a divide-and-conquer strategy, splitting the indexes we are
// searching for (`indexes`) and the corresponding portions of the output
// slice (`values`) into pieces with respect to `array_partition_index`.
let (found_exact, index_split) = match indexes.binary_search(&array_partition_index) {
Ok(index) => (true, index),
Err(index) => (false, index),
};
let (smaller_indexes, other_indexes) = indexes.split_at_mut(index_split);
let (smaller_values, other_values) = values.split_at_mut(index_split);
let (bigger_indexes, bigger_values) = if found_exact {
other_values[0] = array[array_partition_index].clone(); // Write exactly found value.
(&mut other_indexes[1..], &mut other_values[1..])
} else {
(other_indexes, other_values)
};
// We are using a recursive search - to look for bigger_indexes in the right
// slice of the array we need to shift the indexes
let bigger_indexes: Vec<usize> = bigger_indexes
.into_iter()
.map(|x| x - array_partition_index - 1)
.collect();

// We search recursively for the values corresponding to strictly smaller indexes
// to the left of partition_index
let smaller_values = _get_many_from_sorted_mut_unchecked(
&mut array.slice_mut(s![..array_partition_index]),
// We search recursively for the values corresponding to strictly smaller
// indexes to the left of `partition_index`.
_get_many_from_sorted_mut_unchecked(
array.slice_axis_mut(Axis(0), Slice::from(..array_partition_index)),
smaller_indexes,
smaller_values,
);

// We search recursively for the values corresponding to strictly bigger indexes
// to the right of partition_index+1
let mut bigger_values = _get_many_from_sorted_mut_unchecked(
&mut array.slice_mut(s![(array_partition_index + 1)..]),
&bigger_indexes,
// We search recursively for the values corresponding to strictly bigger
// indexes to the right of `partition_index`. Since only the right portion
// of the array is passed in, the indexes need to be shifted by length of
// the removed portion.
bigger_indexes
.iter_mut()
.for_each(|x| *x -= array_partition_index + 1);
_get_many_from_sorted_mut_unchecked(
array.slice_axis_mut(Axis(0), Slice::from(array_partition_index + 1..)),
bigger_indexes,
bigger_values,
);

// We merge the results together, in the correct order
let mut results: Vec<A>;

results = smaller_values;
if index_split.is_ok() {
// Get the value associated to partition index
results.push(array[array_partition_index].clone());
}
results.append(&mut bigger_values);
results
}