-
Notifications
You must be signed in to change notification settings - Fork 13.9k
Add slice::sort_by_cached_key as a memoised sort_by_key #48639
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 10 commits
ea6a1bd
670e69e
b8452cc
9fbee35
21fde09
7dcfc07
bdcc6f9
f41a26f
b430cba
ca3bed0
9896b38
81edd17
785e3c3
eca1e18
9c7b69e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,6 +102,7 @@ use core::mem::size_of; | |
| use core::mem; | ||
| use core::ptr; | ||
| use core::slice as core_slice; | ||
| use core::{u8, u16, u32}; | ||
|
|
||
| use borrow::{Borrow, BorrowMut, ToOwned}; | ||
| use boxed::Box; | ||
|
|
@@ -1302,7 +1303,12 @@ impl<T> [T] { | |
|
|
||
| /// Sorts the slice with a key extraction function. | ||
| /// | ||
| /// This sort is stable (i.e. does not reorder equal elements) and `O(n log n)` worst-case. | ||
| /// This sort is stable (i.e. does not reorder equal elements) and `O(m n log m n)` | ||
| /// worst-case, where the key function is `O(m)`. | ||
| /// | ||
| /// For expensive key functions (e.g. functions that are not simple property accesses or | ||
| /// basic operations), [`sort_by_cached_key`](#method.sort_by_cached_key) is likely to be | ||
| /// significantly faster, as it does not recompute element keys. | ||
|
||
| /// | ||
| /// When applicable, unstable sorting is preferred because it is generally faster than stable | ||
| /// sorting and it doesn't allocate auxiliary memory. | ||
|
|
@@ -1328,12 +1334,76 @@ impl<T> [T] { | |
| /// ``` | ||
| #[stable(feature = "slice_sort_by_key", since = "1.7.0")] | ||
| #[inline] | ||
| pub fn sort_by_key<B, F>(&mut self, mut f: F) | ||
| where F: FnMut(&T) -> B, B: Ord | ||
| pub fn sort_by_key<K, F>(&mut self, mut f: F) | ||
| where F: FnMut(&T) -> K, K: Ord | ||
| { | ||
| merge_sort(self, |a, b| f(a).lt(&f(b))); | ||
| } | ||
|
|
||
| /// Sorts the slice with a key extraction function. | ||
| /// | ||
| /// During sorting, the key function is called only once per element. | ||
| /// | ||
| /// This sort is stable (i.e. does not reorder equal elements) and `O(m n + n log n)` | ||
| /// worst-case, where the key function is `O(m)`. | ||
| /// | ||
| /// For simple key functions (e.g. functions that are property accesses or | ||
| /// basic operations), [`sort_by_key`](#method.sort_by_key) is likely to be | ||
| /// faster. | ||
| /// | ||
| /// # Current implementation | ||
| /// | ||
| /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters, | ||
| /// which combines the fast average case of randomized quicksort with the fast worst case of | ||
| /// heapsort, while achieving linear time on slices with certain patterns. It uses some | ||
| /// randomization to avoid degenerate cases, but with a fixed seed to always provide | ||
| /// deterministic behavior. | ||
| /// | ||
| /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the | ||
| /// length of the slice. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// let mut v = [-5i32, 4, 32, -3, 2]; | ||
|
||
| /// | ||
| /// v.sort_by_cached_key(|k| k.to_string()); | ||
| /// assert!(v == [-3, -5, 2, 32, 4]); | ||
| /// ``` | ||
| /// | ||
| /// [pdqsort]: https://github.com/orlp/pdqsort | ||
| #[unstable(feature = "slice_sort_by_cached_key", issue = "34447")] | ||
| #[inline] | ||
| pub fn sort_by_cached_key<K, F>(&mut self, f: F) | ||
| where F: FnMut(&T) -> K, K: Ord | ||
| { | ||
| // Helper macro for indexing our vector by the smallest possible type, to reduce allocation. | ||
| macro_rules! sort_by_key { | ||
| ($t:ty, $slice:ident, $f:ident) => ({ | ||
| let mut indices: Vec<_> = | ||
|
||
| $slice.iter().map($f).enumerate().map(|(i, k)| (k, i as $t)).collect(); | ||
| // The elements of `indices` are unique, as they are indexed, so any sort will be | ||
| // stable with respect to the original slice. We use `sort_unstable` here because it | ||
| // requires less memory allocation. | ||
| indices.sort_unstable(); | ||
| for i in 0..$slice.len() { | ||
|
||
| let mut index = indices[i].1; | ||
| while (index as usize) < i { | ||
| index = indices[index as usize].1; | ||
| } | ||
| indices[i].1 = index; | ||
| $slice.swap(i, index as usize); | ||
| } | ||
| }) | ||
| } | ||
|
||
|
|
||
| let len = self.len(); | ||
| if len <= ( u8::MAX as usize) { return sort_by_key!( u8, self, f) } | ||
| if len <= (u16::MAX as usize) { return sort_by_key!(u16, self, f) } | ||
| if len <= (u32::MAX as usize) { return sort_by_key!(u32, self, f) } | ||
| sort_by_key!(usize, self, f) | ||
|
||
| } | ||
|
|
||
| /// Sorts the slice, but may not preserve the order of equal elements. | ||
| /// | ||
| /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate), | ||
|
|
@@ -1410,7 +1480,7 @@ impl<T> [T] { | |
| /// elements. | ||
| /// | ||
| /// This sort is unstable (i.e. may reorder equal elements), in-place (i.e. does not allocate), | ||
| /// and `O(n log n)` worst-case. | ||
| /// and `O(m n log m n)` worst-case, where the key function is `O(m)`. | ||
|
||
| /// | ||
| /// # Current implementation | ||
| /// | ||
|
|
@@ -1420,8 +1490,9 @@ impl<T> [T] { | |
| /// randomization to avoid degenerate cases, but with a fixed seed to always provide | ||
| /// deterministic behavior. | ||
| /// | ||
| /// It is typically faster than stable sorting, except in a few special cases, e.g. when the | ||
| /// slice consists of several concatenated sorted sequences. | ||
| /// Due to its key calling strategy, [`sort_unstable_by_key`](#method.sort_unstable_by_key) | ||
| /// is likely to be slower than [`sort_by_cached_key`](#method.sort_by_cached_key) in | ||
| /// cases where the key function is expensive. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
|
|
@@ -1435,9 +1506,8 @@ impl<T> [T] { | |
| /// [pdqsort]: https://github.com/orlp/pdqsort | ||
| #[stable(feature = "sort_unstable", since = "1.20.0")] | ||
| #[inline] | ||
| pub fn sort_unstable_by_key<B, F>(&mut self, f: F) | ||
| where F: FnMut(&T) -> B, | ||
| B: Ord | ||
| pub fn sort_unstable_by_key<K, F>(&mut self, f: F) | ||
| where F: FnMut(&T) -> K, K: Ord | ||
| { | ||
| core_slice::SliceExt::sort_unstable_by_key(self, f); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -425,6 +425,14 @@ fn test_sort() { | |
| v.sort_by(|a, b| b.cmp(a)); | ||
| assert!(v.windows(2).all(|w| w[0] >= w[1])); | ||
|
|
||
| // Sort in lexicographic order. | ||
| let mut v1 = orig.clone(); | ||
| let mut v2 = orig.clone(); | ||
| v1.sort_by_key(|x| x.to_string()); | ||
| v2.sort_by_cached_key(|x| x.to_string()); | ||
| assert!(v1.windows(2).all(|w| w[0].to_string() <= w[1].to_string())); | ||
| assert!(v1 == v2); | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! |
||
| // Sort with many pre-sorted runs. | ||
| let mut v = orig.clone(); | ||
| v.sort(); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please write it as
O(m n log(m n))to avoid ambiguity.