sergi · ShoshinNikita · Apr 7, 2021 · Apr 7, 2021 · Apr 7, 2021 · findleyr
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
@@ -34,8 +34,6 @@ const (
 	DiffInsert Operation = 1
 	// DiffEqual item represents an equal diff.
 	DiffEqual Operation = 0
-	//IndexSeparator is used to seperate the array indexes in an index string
-	IndexSeparator = ","
 )
 
 // Diff represents one diff operation
@@ -195,7 +193,7 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
 // diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
 func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
 	// Scan the text on a line-by-line basis first.
-	text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
+	text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
 
 	diffs := dmp.diffMainRunes(text1, text2, false, deadline)
 
@@ -392,28 +390,88 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
 // DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
 // It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
 func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return chars1, chars2, lineArray
+	chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
+	return string(chars1), string(chars2), lineArray
 }
 
-// DiffLinesToRunes splits two texts into a list of runes.
+// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
 func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return []rune(chars1), []rune(chars2), lineArray
+	// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
+	lineArray := []string{""}    // e.g. lineArray[4] == 'Hello\n'
+	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
+
+	chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
+	chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
+
+	return chars1, chars2, lineArray
+}
+
+func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
+	return dmp.DiffLinesToRunes(string(text1), string(text2))
+}
+
+// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line.
+// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
+func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune {
+	// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
+	lineStart := 0
+	lineEnd := -1
+	runes := []rune{}
+
+	for lineEnd < len(text)-1 {
+		lineEnd = indexOf(text, "\n", lineStart)
+
+		if lineEnd == -1 {
+			lineEnd = len(text) - 1
+		}
+
+		line := text[lineStart : lineEnd+1]
+		lineStart = lineEnd + 1
+		lineValue, ok := lineHash[line]
+		if !ok {
+			checkLineArray(lineArray)
+
+			*lineArray = append(*lineArray, line)
+			lineValue = len(*lineArray) - 1
+			lineHash[line] = lineValue
+		}
+		runes = append(runes, rune(lineValue))
+	}
+
+	return runes
+}
+
+// checkLineArray checks the size of the slice and ensures that the index of the next element
+// will be the valid rune.
+func checkLineArray(a *[]string) {
+	// Runes in this range are invalid, utf8.ValidRune() returns false.
+	const (
+		surrogateMin = 0xD800
+		surrogateMax = 0xDFFF
+	)
+
+	// Check the index of the next element
+	switch len(*a) {
+	case surrogateMin:
+		// Skip invalid runes.
+		padding := [surrogateMax - surrogateMin + 1]string{}
+		*a = append(*a, padding[:]...)
+
+	case utf8.MaxRune + 1:
+		// We can't do anything about it.
+		panic(fmt.Sprintf("rune can't be more than %d", utf8.MaxRune))
+	}
 }
 
 // DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
-		chars := strings.Split(aDiff.Text, IndexSeparator)
+		chars := aDiff.Text
 		text := make([]string, len(chars))
 
 		for i, r := range chars {
-			i1, err := strconv.Atoi(r)
-			if err == nil {
-				text[i] = lineArray[i1]
-			}
+			text[i] = lineArray[r]
 		}
 
 		aDiff.Text = strings.Join(text, "")
@@ -1307,46 +1365,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
 
 	return diffs, nil
 }
-
-// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
-func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
-	// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
-	lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
-
-	//Each string has the index of lineArray which it points to
-	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
-	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
-
-	return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
-}
-
-// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
-	// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
-	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
-	lineStart := 0
-	lineEnd := -1
-	strs := []uint32{}
-
-	for lineEnd < len(text)-1 {
-		lineEnd = indexOf(text, "\n", lineStart)
-
-		if lineEnd == -1 {
-			lineEnd = len(text) - 1
-		}
-
-		line := text[lineStart : lineEnd+1]
-		lineStart = lineEnd + 1
-		lineValue, ok := lineHash[line]
-
-		if ok {
-			strs = append(strs, uint32(lineValue))
-		} else {
-			*lineArray = append(*lineArray, line)
-			lineHash[line] = len(*lineArray) - 1
-			strs = append(strs, uint32(len(*lineArray)-1))
-		}
-	}
-
-	return strs
-}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
@@ -314,10 +314,10 @@ func TestDiffLinesToChars(t *testing.T) {
 	dmp := New()
 
 	for i, tc := range []TestCase{
-		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
-		{"a", "b", "1", "2", []string{"", "a", "b"}},
+		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
+		{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
 		// Omit final newline.
-		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -330,14 +330,14 @@ func TestDiffLinesToChars(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	var charList []string
+	var charList []rune
 	for x := 1; x < n+1; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	lines := strings.Join(lineList, "")
-	chars := strings.Join(charList[:], ",")
-	assert.Equal(t, n, len(strings.Split(chars, ",")))
+	chars := string(charList)
+	assert.Equal(t, n, utf8.RuneCountInString(chars))
 
 	actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
 	assert.Equal(t, chars, actualChars1)
@@ -358,8 +358,8 @@ func TestDiffCharsToLines(t *testing.T) {
 	for i, tc := range []TestCase{
 		{
 			Diffs: []Diff{
-				{DiffEqual, "1,2,1"},
-				{DiffInsert, "2,1,2"},
+				{DiffEqual, "\u0001\u0002\u0001"},
+				{DiffInsert, "\u0002\u0001\u0002"},
 			},
 			Lines: []string{"", "alpha\n", "beta\n"},
 
@@ -378,15 +378,14 @@ func TestDiffCharsToLines(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	charList := []string{}
+	charList := []rune{}
 	for x := 1; x <= n; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	assert.Equal(t, n, len(charList))
-	chars := strings.Join(charList[:], ",")
 
-	actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList)
+	actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, string(charList)}}, lineList)
 	assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)
 }
 
@@ -1419,16 +1418,12 @@ func TestDiffMainWithTimeout(t *testing.T) {
 }
 
 func TestDiffMainWithCheckLines(t *testing.T) {
+	// Test cases must be at least 100 chars long to pass the cutoff.
 	type TestCase struct {
 		Text1 string
 		Text2 string
 	}
-
-	dmp := New()
-	dmp.DiffTimeout = 0
-
-	// Test cases must be at least 100 chars long to pass the cutoff.
-	for i, tc := range []TestCase{
+	tests := []TestCase{
 		{
 			"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n",
 			"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n",
@@ -1441,7 +1436,39 @@ func TestDiffMainWithCheckLines(t *testing.T) {
 			"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n",
 			"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n",
 		},
-	} {
+	}
+	// Add test case for the issue #115
+	func() {
+		const before = `package main
+
+import (
+	"fmt"
+)
+
+/*
+func upload(c echo.Context) error {
+	if err := r.ParseForm(); err != nil {
+		fmt.Fprintf(w, "ParseForm() err: %v", err)
+		return
+	}
+	fmt.Fprintf(w, "POST request successful")
+	path_ver := r.FormValue("path_ver")
+	ukclin_ver := r.FormValue("ukclin_ver")
+
+	fmt.Fprintf(w, "Name = %s\n", path_ver)
+	fmt.Fprintf(w, "Address = %s\n", ukclin_ver)
+}
+*/
+`
+		after := strings.ReplaceAll(before, "\n", "\r\n")
+
+		tests = append(tests, TestCase{Text1: before, Text2: after})
+	}()
+
+	dmp := New()
+	dmp.DiffTimeout = 0
+
+	for i, tc := range tests {
 		resultWithoutCheckLines := dmp.DiffMain(tc.Text1, tc.Text2, false)
 		resultWithCheckLines := dmp.DiffMain(tc.Text1, tc.Text2, true)
 

diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
@@ -9,7 +9,6 @@
 package diffmatchpatch
 
 import (
-	"strconv"
 	"strings"
 	"unicode/utf8"
 )
@@ -87,20 +86,3 @@ func runesIndex(r1, r2 []rune) int {
 	}
 	return -1
 }
-
-func intArrayToString(ns []uint32) string {
-	if len(ns) == 0 {
-		return ""
-	}
-
-	indexSeparator := IndexSeparator[0]
-
-	// Appr. 3 chars per num plus the comma.
-	b := []byte{}
-	for _, n := range ns {
-		b = strconv.AppendInt(b, int64(n), 10)
-		b = append(b, indexSeparator)
-	}
-	b = b[:len(b)-1]
-	return string(b)
-}