Skip to content

Commit f3bcd71

Browse files
committed
Emit sorted dir-diff result as soon as preceding results get ready
If the directory diff contains lots of changes across files, it's better to emit earlier results incrementally. The idea is borrowed from the following thread. The diff producer assigns incremental index, and the consumer puts the results in BinaryHeap to reorder them by index. https://users.rust-lang.org/t/parallel-work-collected-sequentially/13504/3
1 parent 7c8cf98 commit f3bcd71

File tree

2 files changed

+103
-12
lines changed

2 files changed

+103
-12
lines changed

src/main.rs

Lines changed: 72 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,11 @@ use crate::parse::syntax;
7373
#[global_allocator]
7474
static GLOBAL: MiMalloc = MiMalloc;
7575

76+
use std::cmp::Ordering;
77+
use std::collections::BinaryHeap;
7678
use std::fs::Permissions;
7779
use std::path::Path;
78-
use std::{env, thread};
80+
use std::{env, iter, thread};
7981

8082
use humansize::{format_size, BINARY};
8183
use owo_colors::OwoColorize;
@@ -261,15 +263,6 @@ fn main() {
261263
.iter()
262264
.any(|diff_result| diff_result.has_reportable_change());
263265
display::json::print_directory(results);
264-
} else if display_options.sort_paths {
265-
let result: Vec<DiffResult> = diff_iter.collect();
266-
for diff_result in result {
267-
print_diff_result(&display_options, &diff_result);
268-
269-
if diff_result.has_reportable_change() {
270-
encountered_changes = true;
271-
}
272-
}
273266
} else {
274267
// We want to diff files in the directory in
275268
// parallel, but print the results serially
@@ -280,11 +273,18 @@ fn main() {
280273

281274
s.spawn(move || {
282275
diff_iter
276+
.enumerate()
283277
.try_for_each_with(send, |s, diff_result| s.send(diff_result))
284278
.expect("Receiver should be connected")
285279
});
286280

287-
for diff_result in recv.into_iter() {
281+
let serial_iter: Box<dyn Iterator<Item = _>> =
282+
if display_options.sort_paths {
283+
Box::new(reorder_by_index(recv.into_iter(), 0))
284+
} else {
285+
Box::new(recv.into_iter())
286+
};
287+
for (_, diff_result) in serial_iter {
288288
print_diff_result(&display_options, &diff_result);
289289

290290
if diff_result.has_reportable_change() {
@@ -769,7 +769,7 @@ fn diff_directories<'a>(
769769
display_options: &DisplayOptions,
770770
diff_options: &DiffOptions,
771771
overrides: &[(LanguageOverride, Vec<glob::Pattern>)],
772-
) -> impl ParallelIterator<Item = DiffResult> + 'a {
772+
) -> impl IndexedParallelIterator<Item = DiffResult> + 'a {
773773
let diff_options = diff_options.clone();
774774
let display_options = display_options.clone();
775775
let overrides: Vec<_> = overrides.into();
@@ -927,6 +927,57 @@ fn print_diff_result(display_options: &DisplayOptions, summary: &DiffResult) {
927927
}
928928
}
929929

930+
/// Sort items in the `source` iterator by the 0th `usize` field, yield when all
931+
/// preceding items are received.
932+
///
933+
/// The idea is borrowed from
934+
/// <https://users.rust-lang.org/t/parallel-work-collected-sequentially/13504/3>.
935+
fn reorder_by_index<T>(
936+
source: impl Iterator<Item = (usize, T)>,
937+
mut next_index: usize,
938+
) -> impl Iterator<Item = (usize, T)> {
939+
struct WorkItem<T> {
940+
index: usize,
941+
value: T,
942+
}
943+
944+
impl<T> Eq for WorkItem<T> {}
945+
946+
impl<T> Ord for WorkItem<T> {
947+
fn cmp(&self, other: &Self) -> Ordering {
948+
self.index.cmp(&other.index).reverse() // min heap
949+
}
950+
}
951+
952+
impl<T> PartialEq for WorkItem<T> {
953+
fn eq(&self, other: &Self) -> bool {
954+
self.index == other.index
955+
}
956+
}
957+
958+
impl<T> PartialOrd for WorkItem<T> {
959+
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
960+
Some(self.cmp(other))
961+
}
962+
}
963+
964+
let mut source = source.fuse();
965+
let mut queue: BinaryHeap<WorkItem<T>> = BinaryHeap::new();
966+
iter::from_fn(move || {
967+
// Consume the source iterator until the next_index item is found.
968+
while queue.peek().map_or(true, |item| item.index > next_index) {
969+
if let Some((index, value)) = source.next() {
970+
queue.push(WorkItem { index, value });
971+
} else {
972+
break;
973+
}
974+
}
975+
let item = queue.pop()?;
976+
next_index = item.index + 1;
977+
Some((item.index, item.value))
978+
})
979+
}
980+
930981
#[cfg(test)]
931982
mod tests {
932983
use std::ffi::OsStr;
@@ -951,4 +1002,13 @@ mod tests {
9511002
assert_eq!(res.lhs_positions, vec![]);
9521003
assert_eq!(res.rhs_positions, vec![]);
9531004
}
1005+
1006+
#[test]
1007+
fn test_reorder_by_index() {
1008+
let source = vec![(0, "a"), (4, "b"), (2, "c"), (1, "d"), (3, "e")];
1009+
let reordered: Vec<_> = reorder_by_index(source.iter().copied(), 0).collect();
1010+
let mut sorted = source.clone();
1011+
sorted.sort();
1012+
assert_eq!(reordered, sorted);
1013+
}
9541014
}

tests/cli.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,37 @@ fn directory_arguments() {
185185
cmd.assert().stdout(predicate_fn);
186186
}
187187

188+
#[test]
189+
fn directory_arguments_sorted() {
190+
let mut cmd = get_base_command();
191+
192+
cmd.arg("--sort-paths")
193+
.arg("--display=inline")
194+
.arg("sample_files/dir_before")
195+
.arg("sample_files/dir_after");
196+
197+
let expected_files = [
198+
"clojure.clj",
199+
"foo.js",
200+
"has_many_hunk.py",
201+
"only_in_after_dir.rs",
202+
"only_in_before.c",
203+
];
204+
let predicate_fn = predicate::function(|output: &str| {
205+
let mut file_linenos = Vec::new();
206+
for name in expected_files {
207+
if let Some(lineno) = output.lines().position(|line| line.starts_with(name)) {
208+
file_linenos.push(lineno);
209+
} else {
210+
return false; // All files should be emitted
211+
}
212+
}
213+
// Output should be sorted
214+
file_linenos.windows(2).all(|w| w[0] < w[1])
215+
});
216+
cmd.assert().stdout(predicate_fn);
217+
}
218+
188219
#[test]
189220
fn git_style_arguments_rename() {
190221
let mut cmd = get_base_command();

0 commit comments

Comments
 (0)