From de93e9121fd5313523d526607eb83ef38127b3c1 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 27 Dec 2023 20:52:53 +0000 Subject: [PATCH 1/2] Fix core algorithm implementation being mirrored relative to the Myers algorithm It makes it harder than it needs to be to reason about the algorithm when everything is flipped relative to Myers' paper. In Myers' paper, we keep track in an array of the index each diagonal has reached in the old string, and positive diagonal numbers represent diagonals where we have done more deletions than insertions. In JsDiff as it exists on master, we keep track in an array of the index each diagonal has reached in the NEW string, and positive diagonal numbers represent diagonals where we have done more insertions than deletions. Everything is mirrored, and this also causes an actual behaviour mismatch between JsDiff and an accurate Myers diff implementation - namely that when we diff e.g. 'abcd' against 'acbd' we output a diff that does an insertion and then later a deletion, rather than a deletion first and an insertion later like it should be. This patch makes the code in base.js be a closer match to what's in Myers's paper, which should make comparing this to other implementations or adding any of the optimizations proposed in Myers's paper easier. For now, this DOESN'T change any behaviour (I've added a hack, noted with a TODO, to ensure this) although it'll now be straightforward to fix the bug mentioned above. --- src/diff/base.js | 49 +++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/diff/base.js b/src/diff/base.js index b129a759..d4acc406 100644 --- a/src/diff/base.js +++ b/src/diff/base.js @@ -34,11 +34,11 @@ Diff.prototype = { maxEditLength = Math.min(maxEditLength, options.maxEditLength); } - let bestPath = [{ newPos: -1, lastComponent: undefined }]; + let bestPath = [{ oldPos: -1, lastComponent: undefined }]; // Seed editLength = 0, i.e. the content starts with the same values - let oldPos = this.extractCommon(bestPath[0], newString, oldString, 0); - if (bestPath[0].newPos + 1 >= newLen && oldPos + 1 >= oldLen) { + let newPos = this.extractCommon(bestPath[0], newString, oldString, 0); + if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // Identity per the equality and tokenizer return done([{value: this.join(newString), count: newString.length}]); } @@ -47,16 +47,21 @@ Diff.prototype = { function execEditLength() { for (let diagonalPath = -1 * editLength; diagonalPath <= editLength; diagonalPath += 2) { let basePath; - let addPath = bestPath[diagonalPath - 1], - removePath = bestPath[diagonalPath + 1], - oldPos = (removePath ? removePath.newPos : 0) - diagonalPath; - if (addPath) { + let removePath = bestPath[diagonalPath - 1], + addPath = bestPath[diagonalPath + 1]; + if (removePath) { // No one else is going to attempt to use this value, clear it bestPath[diagonalPath - 1] = undefined; } - let canAdd = addPath && addPath.newPos + 1 < newLen, - canRemove = removePath && 0 <= oldPos && oldPos < oldLen; + let canAdd = false; + if (addPath) { + // what newPos will be after we do an insertion: + const addPathNewPos = addPath.oldPos - diagonalPath; + canAdd = addPath && 0 <= addPathNewPos && addPathNewPos < newLen; + } + + let canRemove = removePath && removePath.oldPos + 1 < oldLen; if (!canAdd && !canRemove) { // If this path is a terminal then prune bestPath[diagonalPath] = undefined; @@ -66,16 +71,18 @@ Diff.prototype = { // Select the diagonal that we want to branch from. We select the prior // path whose position in the new string is the farthest from the origin // and does not pass the bounds of the diff graph - if (!canAdd || (canRemove && addPath.newPos < removePath.newPos)) { - basePath = self.addToPath(removePath, undefined, true, 0); + // TODO: Remove the `+ 1` here to make behavior match Myers algorithm + // and prefer to order removals before insertions. + if (!canRemove || (canAdd && removePath.oldPos + 1 < addPath.oldPos)) { + basePath = self.addToPath(addPath, true, undefined, 0); } else { - basePath = self.addToPath(addPath, true, undefined, 1); + basePath = self.addToPath(removePath, undefined, true, 1); } - oldPos = self.extractCommon(basePath, newString, oldString, diagonalPath); + newPos = self.extractCommon(basePath, newString, oldString, diagonalPath); // If we have hit the end of both strings, then we are done - if (basePath.newPos + 1 >= newLen && oldPos + 1 >= oldLen) { + if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) { return done(buildValues(self, basePath.lastComponent, newString, oldString, self.useLongestToken)); } else { // Otherwise track this path as a potential candidate and continue. @@ -112,16 +119,16 @@ Diff.prototype = { } }, - addToPath(path, added, removed, newPosInc) { + addToPath(path, added, removed, oldPosInc) { let last = path.lastComponent; if (last && last.added === added && last.removed === removed) { return { - newPos: path.newPos + newPosInc, + oldPos: path.oldPos + oldPosInc, lastComponent: {count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent } }; } else { return { - newPos: path.newPos + newPosInc, + oldPos: path.oldPos + oldPosInc, lastComponent: {count: 1, added: added, removed: removed, previousComponent: last } }; } @@ -129,8 +136,8 @@ Diff.prototype = { extractCommon(basePath, newString, oldString, diagonalPath) { let newLen = newString.length, oldLen = oldString.length, - newPos = basePath.newPos, - oldPos = newPos - diagonalPath, + oldPos = basePath.oldPos, + newPos = oldPos - diagonalPath, commonCount = 0; while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(newString[newPos + 1], oldString[oldPos + 1])) { @@ -143,8 +150,8 @@ Diff.prototype = { basePath.lastComponent = {count: commonCount, previousComponent: basePath.lastComponent}; } - basePath.newPos = newPos; - return oldPos; + basePath.oldPos = oldPos; + return newPos; }, equals(left, right) { From 9d8384d32002b415bb9988c534e9b5c25c717e22 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 27 Dec 2023 20:57:56 +0000 Subject: [PATCH 2/2] Fix comment --- src/diff/base.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diff/base.js b/src/diff/base.js index d4acc406..0dcd3a2a 100644 --- a/src/diff/base.js +++ b/src/diff/base.js @@ -69,7 +69,7 @@ Diff.prototype = { } // Select the diagonal that we want to branch from. We select the prior - // path whose position in the new string is the farthest from the origin + // path whose position in the old string is the farthest from the origin // and does not pass the bounds of the diff graph // TODO: Remove the `+ 1` here to make behavior match Myers algorithm // and prefer to order removals before insertions.