Skip to content

Commit 18a6fd4

Browse files
erifan01ianlancetaylor
authored andcommitted
bytes, strings: moves indexRabinKarp function to internal/bytealg
In order to facilitate optimization of IndexAny and LastIndexAny, this patch moves three Rabin-Karp related functions indexRabinKarp, hashStr and hashStrRev in strings package to initernal/bytealg. There are also three functions in the bytes package with the same names and functions but different parameter types. To highlight this, this patch also moves them to internal/bytealg and gives them slightly different names. Related benchmark changes on amd64 and arm64: name old time/op new time/op delta pkg:strings goos:linux goarch:amd64 Index-16 14.0ns ± 1% 14.1ns ± 2% ~ (p=0.738 n=5+5) LastIndex-16 15.5ns ± 1% 15.7ns ± 4% ~ (p=0.897 n=5+5) pkg:bytes goos:linux goarch:amd64 Index/10-16 26.5ns ± 1% 26.5ns ± 0% ~ (p=0.873 n=5+5) Index/32-16 26.2ns ± 0% 25.7ns ± 0% -1.68% (p=0.008 n=5+5) Index/4K-16 5.12µs ± 4% 5.14µs ± 2% ~ (p=0.841 n=5+5) Index/4M-16 5.44ms ± 3% 5.34ms ± 2% ~ (p=0.056 n=5+5) Index/64M-16 85.8ms ± 3% 84.6ms ± 0% -1.37% (p=0.016 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:amd64 Index/10-16 377MB/s ± 1% 377MB/s ± 0% ~ (p=1.000 n=5+5) Index/32-16 1.22GB/s ± 1% 1.24GB/s ± 0% +1.66% (p=0.008 n=5+5) Index/4K-16 800MB/s ± 4% 797MB/s ± 2% ~ (p=0.841 n=5+5) Index/4M-16 771MB/s ± 3% 786MB/s ± 2% ~ (p=0.056 n=5+5) Index/64M-16 783MB/s ± 3% 793MB/s ± 0% +1.36% (p=0.016 n=5+5) name old time/op new time/op delta pkg:strings goos:linux goarch:arm64 Index-8 22.6ns ± 0% 22.5ns ± 0% ~ (p=0.167 n=5+5) LastIndex-8 17.5ns ± 0% 17.5ns ± 0% ~ (all equal) pkg:bytes goos:linux goarch:arm64 Index/10-8 25.0ns ± 0% 25.0ns ± 0% ~ (all equal) Index/32-8 160ns ± 0% 160ns ± 0% ~ (all equal) Index/4K-8 6.26µs ± 0% 6.26µs ± 0% ~ (p=0.167 n=5+5) Index/4M-8 6.30ms ± 0% 6.31ms ± 0% ~ (p=1.000 n=5+5) Index/64M-8 101ms ± 0% 101ms ± 0% ~ (p=0.690 n=5+5) name old speed new speed delta pkg:bytes goos:linux goarch:arm64 Index/10-8 399MB/s ± 0% 400MB/s ± 0% +0.08% (p=0.008 n=5+5) Index/32-8 200MB/s ± 0% 200MB/s ± 0% ~ (p=0.127 n=4+5) Index/4K-8 654MB/s ± 0% 654MB/s ± 0% +0.01% (p=0.016 n=5+5) Index/4M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.833 n=5+5) Index/64M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.913 n=5+5) Change-Id: Icce3bc162bb8613ac36dc963a46c51f8e82ab842 Reviewed-on: https://go-review.googlesource.com/c/go/+/208638 Run-TryBot: eric fang <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Lance Taylor <[email protected]>
1 parent cec0879 commit 18a6fd4

File tree

4 files changed

+156
-129
lines changed

4 files changed

+156
-129
lines changed

src/bytes/bytes.go

Lines changed: 4 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,17 @@ func LastIndex(s, sep []byte) int {
117117
return -1
118118
}
119119
// Rabin-Karp search from the end of the string
120-
hashss, pow := hashStrRev(sep)
120+
hashss, pow := bytealg.HashStrRevBytes(sep)
121121
last := len(s) - n
122122
var h uint32
123123
for i := len(s) - 1; i >= last; i-- {
124-
h = h*primeRK + uint32(s[i])
124+
h = h*bytealg.PrimeRK + uint32(s[i])
125125
}
126126
if h == hashss && Equal(s[last:], sep) {
127127
return last
128128
}
129129
for i := last - 1; i >= 0; i-- {
130-
h *= primeRK
130+
h *= bytealg.PrimeRK
131131
h += uint32(s[i])
132132
h -= pow * uint32(s[i+n])
133133
if h == hashss && Equal(s[i:i+n], sep) {
@@ -1068,7 +1068,7 @@ func Index(s, sep []byte) int {
10681068
// we should cutover at even larger average skips,
10691069
// because Equal becomes that much more expensive.
10701070
// This code does not take that effect into account.
1071-
j := indexRabinKarp(s[i:], sep)
1071+
j := bytealg.IndexRabinKarpBytes(s[i:], sep)
10721072
if j < 0 {
10731073
return -1
10741074
}
@@ -1077,63 +1077,3 @@ func Index(s, sep []byte) int {
10771077
}
10781078
return -1
10791079
}
1080-
1081-
func indexRabinKarp(s, sep []byte) int {
1082-
// Rabin-Karp search
1083-
hashsep, pow := hashStr(sep)
1084-
n := len(sep)
1085-
var h uint32
1086-
for i := 0; i < n; i++ {
1087-
h = h*primeRK + uint32(s[i])
1088-
}
1089-
if h == hashsep && Equal(s[:n], sep) {
1090-
return 0
1091-
}
1092-
for i := n; i < len(s); {
1093-
h *= primeRK
1094-
h += uint32(s[i])
1095-
h -= pow * uint32(s[i-n])
1096-
i++
1097-
if h == hashsep && Equal(s[i-n:i], sep) {
1098-
return i - n
1099-
}
1100-
}
1101-
return -1
1102-
}
1103-
1104-
// primeRK is the prime base used in Rabin-Karp algorithm.
1105-
const primeRK = 16777619
1106-
1107-
// hashStr returns the hash and the appropriate multiplicative
1108-
// factor for use in Rabin-Karp algorithm.
1109-
func hashStr(sep []byte) (uint32, uint32) {
1110-
hash := uint32(0)
1111-
for i := 0; i < len(sep); i++ {
1112-
hash = hash*primeRK + uint32(sep[i])
1113-
}
1114-
var pow, sq uint32 = 1, primeRK
1115-
for i := len(sep); i > 0; i >>= 1 {
1116-
if i&1 != 0 {
1117-
pow *= sq
1118-
}
1119-
sq *= sq
1120-
}
1121-
return hash, pow
1122-
}
1123-
1124-
// hashStrRev returns the hash of the reverse of sep and the
1125-
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
1126-
func hashStrRev(sep []byte) (uint32, uint32) {
1127-
hash := uint32(0)
1128-
for i := len(sep) - 1; i >= 0; i-- {
1129-
hash = hash*primeRK + uint32(sep[i])
1130-
}
1131-
var pow, sq uint32 = 1, primeRK
1132-
for i := len(sep); i > 0; i >>= 1 {
1133-
if i&1 != 0 {
1134-
pow *= sq
1135-
}
1136-
sq *= sq
1137-
}
1138-
return hash, pow
1139-
}

src/bytes/bytes_test.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,10 @@ var indexTests = []BinOpTest{
141141
{"barfoobarfooyyyzzzyyyzzzyyyzzzyyyxxxzzzyyy", "x", 33},
142142
{"foofyfoobarfoobar", "y", 4},
143143
{"oooooooooooooooooooooo", "r", -1},
144-
// test fallback to Rabin-Karp.
145144
{"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
146145
{"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
146+
// test fallback to Rabin-Karp.
147+
{"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
147148
}
148149

149150
var lastIndexTests = []BinOpTest{
@@ -209,6 +210,27 @@ func runIndexTests(t *testing.T, f func(s, sep []byte) int, funcName string, tes
209210
t.Errorf("%s(%q,%q) = %v; want %v", funcName, a, b, actual, test.i)
210211
}
211212
}
213+
var allocTests = []struct {
214+
a []byte
215+
b []byte
216+
i int
217+
}{
218+
// case for function Index.
219+
{[]byte("000000000000000000000000000000000000000000000000000000000000000000000001"), []byte("0000000000000000000000000000000000000000000000000000000000000000001"), 5},
220+
// case for function LastIndex.
221+
{[]byte("000000000000000000000000000000000000000000000000000000000000000010000"), []byte("00000000000000000000000000000000000000000000000000000000000001"), 3},
222+
}
223+
allocs := testing.AllocsPerRun(100, func() {
224+
if i := Index(allocTests[1].a, allocTests[1].b); i != allocTests[1].i {
225+
t.Errorf("Index([]byte(%q), []byte(%q)) = %v; want %v", allocTests[1].a, allocTests[1].b, i, allocTests[1].i)
226+
}
227+
if i := LastIndex(allocTests[0].a, allocTests[0].b); i != allocTests[0].i {
228+
t.Errorf("LastIndex([]byte(%q), []byte(%q)) = %v; want %v", allocTests[0].a, allocTests[0].b, i, allocTests[0].i)
229+
}
230+
})
231+
if allocs != 0 {
232+
t.Errorf("expected no allocations, got %f", allocs)
233+
}
212234
}
213235

214236
func runIndexAnyTests(t *testing.T, f func(s []byte, chars string) int, funcName string, testCases []BinOpTest) {

src/internal/bytealg/bytealg.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,128 @@ const (
2121

2222
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
2323
var MaxLen int
24+
25+
// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
26+
// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
27+
// three of them without causing allocation?
28+
29+
// PrimeRK is the prime base used in Rabin-Karp algorithm.
30+
const PrimeRK = 16777619
31+
32+
// HashStrBytes returns the hash and the appropriate multiplicative
33+
// factor for use in Rabin-Karp algorithm.
34+
func HashStrBytes(sep []byte) (uint32, uint32) {
35+
hash := uint32(0)
36+
for i := 0; i < len(sep); i++ {
37+
hash = hash*PrimeRK + uint32(sep[i])
38+
}
39+
var pow, sq uint32 = 1, PrimeRK
40+
for i := len(sep); i > 0; i >>= 1 {
41+
if i&1 != 0 {
42+
pow *= sq
43+
}
44+
sq *= sq
45+
}
46+
return hash, pow
47+
}
48+
49+
// HashStr returns the hash and the appropriate multiplicative
50+
// factor for use in Rabin-Karp algorithm.
51+
func HashStr(sep string) (uint32, uint32) {
52+
hash := uint32(0)
53+
for i := 0; i < len(sep); i++ {
54+
hash = hash*PrimeRK + uint32(sep[i])
55+
}
56+
var pow, sq uint32 = 1, PrimeRK
57+
for i := len(sep); i > 0; i >>= 1 {
58+
if i&1 != 0 {
59+
pow *= sq
60+
}
61+
sq *= sq
62+
}
63+
return hash, pow
64+
}
65+
66+
// HashStrRevBytes returns the hash of the reverse of sep and the
67+
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
68+
func HashStrRevBytes(sep []byte) (uint32, uint32) {
69+
hash := uint32(0)
70+
for i := len(sep) - 1; i >= 0; i-- {
71+
hash = hash*PrimeRK + uint32(sep[i])
72+
}
73+
var pow, sq uint32 = 1, PrimeRK
74+
for i := len(sep); i > 0; i >>= 1 {
75+
if i&1 != 0 {
76+
pow *= sq
77+
}
78+
sq *= sq
79+
}
80+
return hash, pow
81+
}
82+
83+
// HashStrRev returns the hash of the reverse of sep and the
84+
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
85+
func HashStrRev(sep string) (uint32, uint32) {
86+
hash := uint32(0)
87+
for i := len(sep) - 1; i >= 0; i-- {
88+
hash = hash*PrimeRK + uint32(sep[i])
89+
}
90+
var pow, sq uint32 = 1, PrimeRK
91+
for i := len(sep); i > 0; i >>= 1 {
92+
if i&1 != 0 {
93+
pow *= sq
94+
}
95+
sq *= sq
96+
}
97+
return hash, pow
98+
}
99+
100+
// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
101+
// first occurence of substr in s, or -1 if not present.
102+
func IndexRabinKarpBytes(s, sep []byte) int {
103+
// Rabin-Karp search
104+
hashsep, pow := HashStrBytes(sep)
105+
n := len(sep)
106+
var h uint32
107+
for i := 0; i < n; i++ {
108+
h = h*PrimeRK + uint32(s[i])
109+
}
110+
if h == hashsep && Equal(s[:n], sep) {
111+
return 0
112+
}
113+
for i := n; i < len(s); {
114+
h *= PrimeRK
115+
h += uint32(s[i])
116+
h -= pow * uint32(s[i-n])
117+
i++
118+
if h == hashsep && Equal(s[i-n:i], sep) {
119+
return i - n
120+
}
121+
}
122+
return -1
123+
}
124+
125+
// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
126+
// first occurence of substr in s, or -1 if not present.
127+
func IndexRabinKarp(s, substr string) int {
128+
// Rabin-Karp search
129+
hashss, pow := HashStr(substr)
130+
n := len(substr)
131+
var h uint32
132+
for i := 0; i < n; i++ {
133+
h = h*PrimeRK + uint32(s[i])
134+
}
135+
if h == hashss && s[:n] == substr {
136+
return 0
137+
}
138+
for i := n; i < len(s); {
139+
h *= PrimeRK
140+
h += uint32(s[i])
141+
h -= pow * uint32(s[i-n])
142+
i++
143+
if h == hashss && s[i-n:i] == substr {
144+
return i - n
145+
}
146+
}
147+
return -1
148+
}

src/strings/strings.go

Lines changed: 4 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -36,43 +36,6 @@ func explode(s string, n int) []string {
3636
return a
3737
}
3838

39-
// primeRK is the prime base used in Rabin-Karp algorithm.
40-
const primeRK = 16777619
41-
42-
// hashStr returns the hash and the appropriate multiplicative
43-
// factor for use in Rabin-Karp algorithm.
44-
func hashStr(sep string) (uint32, uint32) {
45-
hash := uint32(0)
46-
for i := 0; i < len(sep); i++ {
47-
hash = hash*primeRK + uint32(sep[i])
48-
}
49-
var pow, sq uint32 = 1, primeRK
50-
for i := len(sep); i > 0; i >>= 1 {
51-
if i&1 != 0 {
52-
pow *= sq
53-
}
54-
sq *= sq
55-
}
56-
return hash, pow
57-
}
58-
59-
// hashStrRev returns the hash of the reverse of sep and the
60-
// appropriate multiplicative factor for use in Rabin-Karp algorithm.
61-
func hashStrRev(sep string) (uint32, uint32) {
62-
hash := uint32(0)
63-
for i := len(sep) - 1; i >= 0; i-- {
64-
hash = hash*primeRK + uint32(sep[i])
65-
}
66-
var pow, sq uint32 = 1, primeRK
67-
for i := len(sep); i > 0; i >>= 1 {
68-
if i&1 != 0 {
69-
pow *= sq
70-
}
71-
sq *= sq
72-
}
73-
return hash, pow
74-
}
75-
7639
// Count counts the number of non-overlapping instances of substr in s.
7740
// If substr is an empty string, Count returns 1 + the number of Unicode code points in s.
7841
func Count(s, substr string) int {
@@ -126,17 +89,17 @@ func LastIndex(s, substr string) int {
12689
return -1
12790
}
12891
// Rabin-Karp search from the end of the string
129-
hashss, pow := hashStrRev(substr)
92+
hashss, pow := bytealg.HashStrRev(substr)
13093
last := len(s) - n
13194
var h uint32
13295
for i := len(s) - 1; i >= last; i-- {
133-
h = h*primeRK + uint32(s[i])
96+
h = h*bytealg.PrimeRK + uint32(s[i])
13497
}
13598
if h == hashss && s[last:] == substr {
13699
return last
137100
}
138101
for i := last - 1; i >= 0; i-- {
139-
h *= primeRK
102+
h *= bytealg.PrimeRK
140103
h += uint32(s[i])
141104
h -= pow * uint32(s[i+n])
142105
if h == hashss && s[i:i+n] == substr {
@@ -1095,7 +1058,7 @@ func Index(s, substr string) int {
10951058
fails++
10961059
if fails >= 4+i>>4 && i < t {
10971060
// See comment in ../bytes/bytes.go.
1098-
j := indexRabinKarp(s[i:], substr)
1061+
j := bytealg.IndexRabinKarp(s[i:], substr)
10991062
if j < 0 {
11001063
return -1
11011064
}
@@ -1104,26 +1067,3 @@ func Index(s, substr string) int {
11041067
}
11051068
return -1
11061069
}
1107-
1108-
func indexRabinKarp(s, substr string) int {
1109-
// Rabin-Karp search
1110-
hashss, pow := hashStr(substr)
1111-
n := len(substr)
1112-
var h uint32
1113-
for i := 0; i < n; i++ {
1114-
h = h*primeRK + uint32(s[i])
1115-
}
1116-
if h == hashss && s[:n] == substr {
1117-
return 0
1118-
}
1119-
for i := n; i < len(s); {
1120-
h *= primeRK
1121-
h += uint32(s[i])
1122-
h -= pow * uint32(s[i-n])
1123-
i++
1124-
if h == hashss && s[i-n:i] == substr {
1125-
return i - n
1126-
}
1127-
}
1128-
return -1
1129-
}

0 commit comments

Comments
 (0)