Skip to content

Commit 0a651d5

Browse files
committed
Optimize diffLinesToStringsMunge to use uint32 instead of string, and add extra test for rune conversion. Fixes #89.
1 parent f9beae7 commit 0a651d5

File tree

4 files changed

+64567
-18
lines changed

4 files changed

+64567
-18
lines changed

diffmatchpatch/diff.go

+14-18
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ const (
3434
DiffInsert Operation = 1
3535
// DiffEqual item represents an equal diff.
3636
DiffEqual Operation = 0
37-
//IndexSeperator is used to seperate the array indexes in an index string
38-
IndexSeperator = ","
37+
//IndexSeparator is used to seperate the array indexes in an index string
38+
IndexSeparator = ","
3939
)
4040

4141
// Diff represents one diff operation
@@ -122,7 +122,7 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
122122

123123
// Restore the prefix and suffix.
124124
if len(commonprefix) != 0 {
125-
diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...)
125+
diffs = append([]Diff{{DiffEqual, string(commonprefix)}}, diffs...)
126126
}
127127
if len(commonsuffix) != 0 {
128128
diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)})
@@ -167,8 +167,8 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
167167
// Single character string.
168168
// After the previous speedup, the character can't be an equality.
169169
return []Diff{
170-
Diff{DiffDelete, string(text1)},
171-
Diff{DiffInsert, string(text2)},
170+
{DiffDelete, string(text1)},
171+
{DiffInsert, string(text2)},
172172
}
173173
// Check to see if the problem can be split in two.
174174
} else if hm := dmp.diffHalfMatch(text1, text2); hm != nil {
@@ -370,8 +370,8 @@ func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time)
370370
}
371371
// Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all.
372372
return []Diff{
373-
Diff{DiffDelete, string(runes1)},
374-
Diff{DiffInsert, string(runes2)},
373+
{DiffDelete, string(runes1)},
374+
{DiffInsert, string(runes2)},
375375
}
376376
}
377377

@@ -406,7 +406,7 @@ func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune
406406
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
407407
hydrated := make([]Diff, 0, len(diffs))
408408
for _, aDiff := range diffs {
409-
chars := strings.Split(aDiff.Text, IndexSeperator)
409+
chars := strings.Split(aDiff.Text, IndexSeparator)
410410
text := make([]string, len(chars))
411411

412412
for i, r := range chars {
@@ -993,7 +993,7 @@ func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff {
993993
if x > 0 && diffs[x-1].Type == DiffEqual {
994994
diffs[x-1].Text += string(textInsert[:commonlength])
995995
} else {
996-
diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
996+
diffs = append([]Diff{{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
997997
pointer++
998998
}
999999
textInsert = textInsert[commonlength:]
@@ -1317,20 +1317,16 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
13171317
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
13181318
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
13191319

1320-
//Adding a delimter to later get the strings as array
1321-
str1 := strings.Join(strIndexArray1[:], IndexSeperator)
1322-
str2 := strings.Join(strIndexArray2[:], IndexSeperator)
1323-
1324-
return str1, str2, lineArray
1320+
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
13251321
}
13261322

13271323
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1328-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []string {
1324+
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
13291325
// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
13301326
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
13311327
lineStart := 0
13321328
lineEnd := -1
1333-
strs := []string{}
1329+
strs := []uint32{}
13341330

13351331
for lineEnd < len(text)-1 {
13361332
lineEnd = indexOf(text, "\n", lineStart)
@@ -1344,11 +1340,11 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str
13441340
lineValue, ok := lineHash[line]
13451341

13461342
if ok {
1347-
strs = append(strs, strconv.Itoa(lineValue))
1343+
strs = append(strs, uint32(lineValue))
13481344
} else {
13491345
*lineArray = append(*lineArray, line)
13501346
lineHash[line] = len(*lineArray) - 1
1351-
strs = append(strs, strconv.Itoa(len(*lineArray)-1))
1347+
strs = append(strs, uint32(len(*lineArray)-1))
13521348
}
13531349
}
13541350

diffmatchpatch/diff_test.go

+13
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,19 @@ func TestDiffMainWithCheckLines(t *testing.T) {
14531453
}
14541454
}
14551455

1456+
func TestMassiveRuneDiffConversion(t *testing.T) {
1457+
sNew, err := ioutil.ReadFile("../testdata/fixture.go")
1458+
if err != nil {
1459+
panic(err)
1460+
}
1461+
1462+
dmp := New()
1463+
t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
1464+
diffs := dmp.DiffMain(t1, t2, false)
1465+
diffs = dmp.DiffCharsToLines(diffs, tt)
1466+
assert.NotEmpty(t, diffs)
1467+
}
1468+
14561469
func BenchmarkDiffMain(bench *testing.B) {
14571470
s1 := "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"
14581471
s2 := "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"

diffmatchpatch/stringutil.go

+18
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package diffmatchpatch
1010

1111
import (
12+
"strconv"
1213
"strings"
1314
"unicode/utf8"
1415
)
@@ -86,3 +87,20 @@ func runesIndex(r1, r2 []rune) int {
8687
}
8788
return -1
8889
}
90+
91+
func intArrayToString(ns []uint32) string {
92+
if len(ns) == 0 {
93+
return ""
94+
}
95+
96+
indexSeparator := IndexSeparator[0]
97+
98+
// Appr. 3 chars per num plus the comma.
99+
b := []byte{}
100+
for _, n := range ns {
101+
b = strconv.AppendInt(b, int64(n), 10)
102+
b = append(b, indexSeparator)
103+
}
104+
b = b[:len(b)-1]
105+
return string(b)
106+
}

0 commit comments

Comments
 (0)