Skip to content

Commit a6ab856

Browse files
committed
patience diff: remove myers fallback
When the patience_diff() is called on a range of lines that does not contain any unique context lines it falls back to calling the myers algorithm on that region. The myers implementation calls xdl_optimize_ctxs() but because it is called on a subset of the input it cannot optimize the context lines as well is it does when called on the whole file. In particular insignificant blank lines that would be ignored as matches by the myers algorithm when it is run on the whole file will be considered matches within a smaller region. This has the unfortunate effect of matching blank lines as context when they are not meaningful generating sub-optimal diffs. Instead when there are no unique context lines within a region just find the leading, trailing and inter-hunk context. This is more in keeping with the patience algorithm outlined by Bram Cohen[1]. Note that the interhunk context code rarely finds any interhunk context. In git it finds interhunk context 158 times out of the 8598 hunks where there are no unique lines (there are 368157 hunks in total). Trailing context is even rarer with just 101 instances in the 8598 hunk with no unique lines. There are no instances of Leading context which seems suprising but the leading context is handled cororectly in tests added in this patch. [1] https://bramcohen.livejournal.com/73318.html Signed-off-by: Phillip Wood <[email protected]>
1 parent 133d151 commit a6ab856

File tree

2 files changed

+112
-12
lines changed

2 files changed

+112
-12
lines changed

t/t4033-diff-patience.sh

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,46 @@ test_diff_frobnitz "patience"
1717

1818
test_diff_unique "patience"
1919

20+
test_expect_success 'non unique context between deletion and addition' '
21+
test_write_lines a b a b c d c d >file &&
22+
git add file &&
23+
test_write_lines a b c d e c d >file &&
24+
git diff --diff-algorithm=patience file >actual &&
25+
sed -ne "/^@@/,\$p" actual >hunk &&
26+
cat >expect <<-\EOF &&
27+
@@ -1,8 +1,7 @@
28+
a
29+
b
30+
-a
31+
-b
32+
c
33+
d
34+
+e
35+
c
36+
d
37+
EOF
38+
test_cmp expect hunk
39+
'
40+
41+
test_expect_success 'non unique context between additon and deletion' '
42+
test_write_lines a b a b c d c d >file &&
43+
git add file &&
44+
test_write_lines a b e a b c d >file &&
45+
git diff --diff-algorithm=patience file >actual &&
46+
sed -ne "/^@@/,\$p" actual >hunk &&
47+
cat >expect <<-\EOF &&
48+
@@ -1,8 +1,7 @@
49+
a
50+
b
51+
+e
52+
a
53+
b
54+
c
55+
d
56+
-c
57+
-d
58+
EOF
59+
test_cmp expect hunk
60+
'
61+
2062
test_done

xdiff/xpatience.c

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
*/
2222

2323
#include "xinclude.h"
24+
#include "xtypes.h"
2425

2526
/*
2627
* The basic idea of patience diff is to find lines that are unique in
@@ -303,16 +304,14 @@ static int walk_common_sequence(struct hashmap *map, struct entry *first,
303304
}
304305
}
305306

306-
static int fall_back_to_classic_diff(struct hashmap *map,
307-
int line1, int count1, int line2, int count2)
307+
static bool regions_match(xrecord_t *a, xrecord_t *b, int count)
308308
{
309-
xpparam_t xpp;
310-
311-
memset(&xpp, 0, sizeof(xpp));
312-
xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
309+
for (int i = 0; i < count; i++) {
310+
if (a[i].ha != b[i].ha)
311+
return false;
312+
}
313313

314-
return xdl_fall_back_diff(map->env, &xpp,
315-
line1, count1, line2, count2);
314+
return true;
316315
}
317316

318317
/*
@@ -357,12 +356,71 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env,
357356
result = find_longest_common_sequence(&map, &first);
358357
if (result)
359358
goto out;
360-
if (first)
359+
if (first) {
361360
result = walk_common_sequence(&map, first,
362361
line1, count1, line2, count2);
363-
else
364-
result = fall_back_to_classic_diff(&map,
365-
line1, count1, line2, count2);
362+
} else {
363+
xrecord_t *rec1 = env->xdf1.recs, *rec2 = env->xdf2.recs;
364+
long i1 = line1 - 2, i2 = line2 - 2;
365+
long overlap1, overlap2;
366+
367+
/* Find trailing context */
368+
while (count1 && count2 &&
369+
rec1[i1 + count1].ha == rec2[i2 + count2].ha) {
370+
count1--;
371+
count2--;
372+
}
373+
/* Find leading context */
374+
while (count1 && count2 &&
375+
rec1[line1 - 1].ha == rec2[line2 - 1].ha) {
376+
count1--;
377+
count2--;
378+
line1++;
379+
line2++;
380+
}
381+
/*
382+
* Find inter-hunk context. First see any of the
383+
* trailing deletions match leading additions, then
384+
* check to see if any trailing additions match leading
385+
* deletions and take the longest overlap.
386+
*/
387+
overlap1 = count1 > count2 ? count2 : count1;
388+
if (overlap1)
389+
overlap1--;
390+
i1 = line1 + (count1 - overlap1) - 1;
391+
i2 = line2 - 1;
392+
while (overlap1) {
393+
if (regions_match(&rec1[i1], &rec2[i2], overlap1))
394+
break;
395+
overlap1--;
396+
i1++;
397+
}
398+
overlap2 = count1 > count2 ? count2 : count1;
399+
if (overlap2)
400+
overlap2--;
401+
i1 = line1 - 1;
402+
i2 = line2 + (count2 - overlap2) - 1;
403+
while (overlap2) {
404+
if (regions_match(&rec1[i1], &rec2[i2], overlap2))
405+
break;
406+
overlap2--;
407+
i2++;
408+
}
409+
if (overlap1 > overlap2) {
410+
count1 -= overlap1;
411+
count2 -= overlap1;
412+
line2 += overlap1;
413+
} else {
414+
count1 -= overlap2;
415+
line1 += overlap2;
416+
count2 -= overlap2;
417+
}
418+
419+
while (count1--)
420+
env->xdf1.changed[line1++ - 1] = true;
421+
while (count2--)
422+
env->xdf2.changed[line2++ - 1] = true;
423+
}
366424
out:
367425
xdl_free(map.entries);
368426
return result;

0 commit comments

Comments
 (0)