diff --git a/Cargo.lock b/Cargo.lock index c9c3de2b29b..28286ceb130 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1567,6 +1567,7 @@ dependencies = [ "gix-trace 0.1.12", "gix-traverse 0.46.1", "gix-worktree 0.41.0", + "pretty_assertions", "smallvec", "thiserror 2.0.12", ] diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index 6731854c123..8a047a2fc3d 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -31,3 +31,4 @@ gix-fs = { path = "../gix-fs" } gix-index = { path = "../gix-index" } gix-odb = { path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } +pretty_assertions = "1.4.0" diff --git a/gix-blame/src/file/function.rs b/gix-blame/src/file/function.rs index fbbb5c4248f..03429b61b12 100644 --- a/gix-blame/src/file/function.rs +++ b/gix-blame/src/file/function.rs @@ -37,7 +37,8 @@ use crate::{BlameEntry, Error, Options, Outcome, Statistics}; /// /// *For brevity, `HEAD` denotes the starting point of the blame operation. It could be any commit, or even commits that /// represent the worktree state. -/// We begin with a single *Unblamed Hunk* and a single suspect, usually the `HEAD` commit as the commit containing the +/// +/// We begin with one or more *Unblamed Hunks* and a single suspect, usually the `HEAD` commit as the commit containing the /// *Blamed File*, so that it contains the entire file, with the first commit being a candidate for the entire *Blamed File*. /// We traverse the commit graph starting at the first suspect, and see if there have been changes to `file_path`. /// If so, we have found a *Source File* and a *Suspect* commit, and have hunks that represent these changes. @@ -197,15 +198,16 @@ pub fn file( if let Some(range_in_suspect) = hunk.get_range(&suspect) { let range_in_blamed_file = hunk.range_in_blamed_file.clone(); - for (blamed_line_number, source_line_number) in range_in_blamed_file.zip(range_in_suspect.clone()) { - let source_token = source_lines_as_tokens[source_line_number as usize]; - let blame_token = blamed_lines_as_tokens[blamed_line_number as usize]; - - let source_line = BString::new(source_interner[source_token].into()); - let blamed_line = BString::new(blamed_interner[blame_token].into()); + let source_lines = range_in_suspect + .clone() + .map(|i| BString::new(source_interner[source_lines_as_tokens[i as usize]].into())) + .collect::>(); + let blamed_lines = range_in_blamed_file + .clone() + .map(|i| BString::new(blamed_interner[blamed_lines_as_tokens[i as usize]].into())) + .collect::>(); - assert_eq!(source_line, blamed_line); - } + assert_eq!(source_lines, blamed_lines); } } } @@ -302,33 +304,6 @@ pub fn file( unblamed_hunk.remove_blame(suspect); true }); - - // This block asserts that line ranges for each suspect never overlap. If they did overlap - // this would mean that the same line in a *Source File* would map to more than one line in - // the *Blamed File* and this is not possible. - #[cfg(debug_assertions)] - { - let ranges = hunks_to_blame.iter().fold( - std::collections::BTreeMap::>>::new(), - |mut acc, hunk| { - for (suspect, range) in hunk.suspects.clone() { - acc.entry(suspect).or_default().push(range); - } - - acc - }, - ); - - for (_, mut ranges) in ranges { - ranges.sort_by(|a, b| a.start.cmp(&b.start)); - - for window in ranges.windows(2) { - if let [a, b] = window { - assert!(a.end <= b.start, "#{hunks_to_blame:#?}"); - } - } - } - } } debug_assert_eq!( diff --git a/gix-blame/src/file/mod.rs b/gix-blame/src/file/mod.rs index 7470d71e037..935fb09eed8 100644 --- a/gix-blame/src/file/mod.rs +++ b/gix-blame/src/file/mod.rs @@ -8,11 +8,12 @@ use crate::types::{BlameEntry, Change, Either, LineRange, Offset, UnblamedHunk}; pub(super) mod function; -/// Compare a section from the *Blamed File* (`hunk`) with a change from a diff and see if there -/// is an intersection with `change`. Based on that intersection, we may generate a [`BlameEntry`] for `out` -/// and/or split the `hunk` into multiple. +/// Compare a section from a potential *Source File* (`hunk`) with a change from a diff and see if +/// there is an intersection with `change`. Based on that intersection, we may generate a +/// [`BlameEntry`] for `out` and/or split the `hunk` into multiple. /// -/// This is the core of the blame implementation as it matches regions in *Source File* to the *Blamed File*. +/// This is the core of the blame implementation as it matches regions in *Blamed File* to +/// corresponding regions in one or more than one *Source File*. fn process_change( new_hunks_to_blame: &mut Vec, offset: &mut Offset, @@ -320,36 +321,41 @@ fn process_change( /// Consume `hunks_to_blame` and `changes` to pair up matches ranges (also overlapping) with each other. /// Once a match is found, it's pushed onto `out`. +/// +/// `process_changes` assumes that ranges coming from the same *Source File* can and do +/// occasionally overlap. If it were a desirable property of the blame algorithm as a whole to +/// never have two different lines from a *Blamed File* mapped to the same line in a *Source File*, +/// this property would need to be enforced at a higher level than `process_changes`. +/// Then the nested loops could potentially be flattened into one. fn process_changes( hunks_to_blame: Vec, changes: Vec, suspect: ObjectId, parent: ObjectId, ) -> Vec { - let mut hunks_iter = hunks_to_blame.into_iter(); - let mut changes_iter = changes.into_iter(); + let mut new_hunks_to_blame = Vec::new(); - let mut hunk = hunks_iter.next(); - let mut change = changes_iter.next(); + for mut hunk in hunks_to_blame.into_iter().map(Some) { + let mut offset_in_destination = Offset::Added(0); - let mut new_hunks_to_blame = Vec::new(); - let mut offset_in_destination = Offset::Added(0); - - loop { - (hunk, change) = process_change( - &mut new_hunks_to_blame, - &mut offset_in_destination, - suspect, - parent, - hunk, - change, - ); - - hunk = hunk.or_else(|| hunks_iter.next()); - change = change.or_else(|| changes_iter.next()); - - if hunk.is_none() && change.is_none() { - break; + let mut changes_iter = changes.iter().cloned(); + let mut change = changes_iter.next(); + + loop { + (hunk, change) = process_change( + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + parent, + hunk, + change, + ); + + change = change.or_else(|| changes_iter.next()); + + if hunk.is_none() { + break; + } } } new_hunks_to_blame diff --git a/gix-blame/src/file/tests.rs b/gix-blame/src/file/tests.rs index 02e7d89ebd1..9185ca50633 100644 --- a/gix-blame/src/file/tests.rs +++ b/gix-blame/src/file/tests.rs @@ -959,6 +959,8 @@ mod process_change { } mod process_changes { + use pretty_assertions::assert_eq; + use crate::file::{ process_changes, tests::{new_unblamed_hunk, one_sha, zero_sha}, @@ -1337,4 +1339,38 @@ mod process_changes { ] ); } + + #[test] + fn subsequent_hunks_overlapping_end_of_addition() { + let suspect = zero_sha(); + let parent = one_sha(); + let hunks_to_blame = vec![ + new_unblamed_hunk(13..16, suspect, Offset::Added(0)), + new_unblamed_hunk(10..17, suspect, Offset::Added(0)), + ]; + let changes = vec![Change::AddedOrReplaced(10..14, 0)]; + let new_hunks_to_blame = process_changes(hunks_to_blame, changes, suspect, parent); + + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk { + range_in_blamed_file: 13..14, + suspects: [(suspect, 13..14)].into() + }, + UnblamedHunk { + range_in_blamed_file: 14..16, + suspects: [(parent, 10..12)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 10..14, + suspects: [(suspect, 10..14)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 14..17, + suspects: [(parent, 10..13)].into(), + }, + ] + ); + } } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index aca4299d41c..e811ab88ddb 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -2,15 +2,15 @@ //! //! ### Terminology //! -//! * **Source File** +//! * **Blamed File** //! - The file as it exists in `HEAD`. //! - the initial state with all lines that we need to associate with a *Source File*. -//! * **Blamed File** -//! - A file at a version (i.e. commit) that introduces hunks into the final 'image'. +//! * **Source File** +//! - A file at a version (i.e., commit) that introduces hunks into the final 'image' of the *Blamed File*. //! * **Suspects** //! - The versions of the files that can contain hunks that we could use in the final 'image' //! - multiple at the same time as the commit-graph may split up. -//! - turns into *Source File* once we have found an association into the *Blamed File*. +//! - They turn into a *Source File* once we have found an association into the *Blamed File*. #![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] diff --git a/gix-blame/src/types.rs b/gix-blame/src/types.rs index d60b723e598..84b107b6f8d 100644 --- a/gix-blame/src/types.rs +++ b/gix-blame/src/types.rs @@ -353,7 +353,10 @@ pub(crate) enum Either { } /// A single change between two blobs, or an unchanged region. -#[derive(Debug, PartialEq)] +/// +/// Line numbers refer to the file that is referred to as `after` or `NewOrDestination`, depending +/// on the context. +#[derive(Clone, Debug, PartialEq)] pub enum Change { /// A range of tokens that wasn't changed. Unchanged(Range),