Skip to content

Commit e607abb

Browse files
egonelbrebradfitz
authored andcommitted
unicode: improve SimpleFold performance for ascii
This change significantly speeds up case-insensitive regexp matching. benchmark old ns/op new ns/op delta BenchmarkMatchEasy0i_32-8 2690 1473 -45.24% BenchmarkMatchEasy0i_1K-8 80404 42269 -47.43% BenchmarkMatchEasy0i_32K-8 3272187 2076118 -36.55% BenchmarkMatchEasy0i_1M-8 104805990 66503805 -36.55% BenchmarkMatchEasy0i_32M-8 3360192200 2126121600 -36.73% benchmark old MB/s new MB/s speedup BenchmarkMatchEasy0i_32-8 11.90 21.72 1.83x BenchmarkMatchEasy0i_1K-8 12.74 24.23 1.90x BenchmarkMatchEasy0i_32K-8 10.01 15.78 1.58x BenchmarkMatchEasy0i_1M-8 10.00 15.77 1.58x BenchmarkMatchEasy0i_32M-8 9.99 15.78 1.58x Issue golang#13288 Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642 Reviewed-on: https://go-review.googlesource.com/16943 Reviewed-by: Brad Fitzpatrick <[email protected]> Run-TryBot: Brad Fitzpatrick <[email protected]> TryBot-Result: Gobot Gobot <[email protected]>
1 parent 6e4a861 commit e607abb

File tree

4 files changed

+161
-0
lines changed

4 files changed

+161
-0
lines changed

src/regexp/exec_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,7 @@ func benchmark(b *testing.B, re string, n int) {
672672

673673
const (
674674
easy0 = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
675+
easy0i = "(?i)ABCDEFGHIJklmnopqrstuvwxyz$"
675676
easy1 = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
676677
medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
677678
hard = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
@@ -682,6 +683,11 @@ func BenchmarkMatchEasy0_1K(b *testing.B) { benchmark(b, easy0, 1<<10) }
682683
func BenchmarkMatchEasy0_32K(b *testing.B) { benchmark(b, easy0, 32<<10) }
683684
func BenchmarkMatchEasy0_1M(b *testing.B) { benchmark(b, easy0, 1<<20) }
684685
func BenchmarkMatchEasy0_32M(b *testing.B) { benchmark(b, easy0, 32<<20) }
686+
func BenchmarkMatchEasy0i_32(b *testing.B) { benchmark(b, easy0i, 32<<0) }
687+
func BenchmarkMatchEasy0i_1K(b *testing.B) { benchmark(b, easy0i, 1<<10) }
688+
func BenchmarkMatchEasy0i_32K(b *testing.B) { benchmark(b, easy0i, 32<<10) }
689+
func BenchmarkMatchEasy0i_1M(b *testing.B) { benchmark(b, easy0i, 1<<20) }
690+
func BenchmarkMatchEasy0i_32M(b *testing.B) { benchmark(b, easy0i, 32<<20) }
685691
func BenchmarkMatchEasy1_32(b *testing.B) { benchmark(b, easy1, 32<<0) }
686692
func BenchmarkMatchEasy1_1K(b *testing.B) { benchmark(b, easy1, 1<<10) }
687693
func BenchmarkMatchEasy1_32K(b *testing.B) { benchmark(b, easy1, 32<<10) }

src/unicode/letter.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ type foldPair struct {
332332
// SimpleFold('1') = '1'
333333
//
334334
func SimpleFold(r rune) rune {
335+
if int(r) < len(asciiFold) {
336+
return rune(asciiFold[r])
337+
}
338+
335339
// Consult caseOrbit table for special cases.
336340
lo := 0
337341
hi := len(caseOrbit)

src/unicode/maketables.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,7 @@ func printCasefold() {
11721172
}
11731173
}
11741174

1175+
printAsciiFold()
11751176
printCaseOrbit()
11761177

11771178
// Tables of category and script folding exceptions: code points
@@ -1269,6 +1270,25 @@ var comment = map[string]string{
12691270
"// If there is no entry for a script name, there are no such points.\n",
12701271
}
12711272

1273+
func printAsciiFold() {
1274+
printf("var asciiFold = [MaxASCII + 1]uint16{\n")
1275+
for i := rune(0); i <= unicode.MaxASCII; i++ {
1276+
c := chars[i]
1277+
f := c.caseOrbit
1278+
if f == 0 {
1279+
if c.lowerCase != i && c.lowerCase != 0 {
1280+
f = c.lowerCase
1281+
} else if c.upperCase != i && c.upperCase != 0 {
1282+
f = c.upperCase
1283+
} else {
1284+
f = i
1285+
}
1286+
}
1287+
printf("\t0x%04X,\n", f)
1288+
}
1289+
printf("}\n\n")
1290+
}
1291+
12721292
func printCaseOrbit() {
12731293
if *test {
12741294
for j := range chars {

src/unicode/tables.go

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6834,6 +6834,137 @@ var properties = [MaxLatin1 + 1]uint8{
68346834
0xFF: pLl | pp, // 'ÿ'
68356835
}
68366836

6837+
var asciiFold = [MaxASCII + 1]uint16{
6838+
0x0000,
6839+
0x0001,
6840+
0x0002,
6841+
0x0003,
6842+
0x0004,
6843+
0x0005,
6844+
0x0006,
6845+
0x0007,
6846+
0x0008,
6847+
0x0009,
6848+
0x000A,
6849+
0x000B,
6850+
0x000C,
6851+
0x000D,
6852+
0x000E,
6853+
0x000F,
6854+
0x0010,
6855+
0x0011,
6856+
0x0012,
6857+
0x0013,
6858+
0x0014,
6859+
0x0015,
6860+
0x0016,
6861+
0x0017,
6862+
0x0018,
6863+
0x0019,
6864+
0x001A,
6865+
0x001B,
6866+
0x001C,
6867+
0x001D,
6868+
0x001E,
6869+
0x001F,
6870+
0x0020,
6871+
0x0021,
6872+
0x0022,
6873+
0x0023,
6874+
0x0024,
6875+
0x0025,
6876+
0x0026,
6877+
0x0027,
6878+
0x0028,
6879+
0x0029,
6880+
0x002A,
6881+
0x002B,
6882+
0x002C,
6883+
0x002D,
6884+
0x002E,
6885+
0x002F,
6886+
0x0030,
6887+
0x0031,
6888+
0x0032,
6889+
0x0033,
6890+
0x0034,
6891+
0x0035,
6892+
0x0036,
6893+
0x0037,
6894+
0x0038,
6895+
0x0039,
6896+
0x003A,
6897+
0x003B,
6898+
0x003C,
6899+
0x003D,
6900+
0x003E,
6901+
0x003F,
6902+
0x0040,
6903+
0x0061,
6904+
0x0062,
6905+
0x0063,
6906+
0x0064,
6907+
0x0065,
6908+
0x0066,
6909+
0x0067,
6910+
0x0068,
6911+
0x0069,
6912+
0x006A,
6913+
0x006B,
6914+
0x006C,
6915+
0x006D,
6916+
0x006E,
6917+
0x006F,
6918+
0x0070,
6919+
0x0071,
6920+
0x0072,
6921+
0x0073,
6922+
0x0074,
6923+
0x0075,
6924+
0x0076,
6925+
0x0077,
6926+
0x0078,
6927+
0x0079,
6928+
0x007A,
6929+
0x005B,
6930+
0x005C,
6931+
0x005D,
6932+
0x005E,
6933+
0x005F,
6934+
0x0060,
6935+
0x0041,
6936+
0x0042,
6937+
0x0043,
6938+
0x0044,
6939+
0x0045,
6940+
0x0046,
6941+
0x0047,
6942+
0x0048,
6943+
0x0049,
6944+
0x004A,
6945+
0x212A,
6946+
0x004C,
6947+
0x004D,
6948+
0x004E,
6949+
0x004F,
6950+
0x0050,
6951+
0x0051,
6952+
0x0052,
6953+
0x017F,
6954+
0x0054,
6955+
0x0055,
6956+
0x0056,
6957+
0x0057,
6958+
0x0058,
6959+
0x0059,
6960+
0x005A,
6961+
0x007B,
6962+
0x007C,
6963+
0x007D,
6964+
0x007E,
6965+
0x007F,
6966+
}
6967+
68376968
var caseOrbit = []foldPair{
68386969
{0x004B, 0x006B},
68396970
{0x0053, 0x0073},

0 commit comments

Comments
 (0)