Skip to content

Commit fd2e1e7

Browse files
adonovanmvdan
authored andcommitted
unicode/utf8: optimize Valid to parity with ValidString
The benchmarks added in this change revealed that ValidString runs ~17% faster than Valid([]byte) on the ASCII prefix of the input. Inspection of the assembly revealed that the code generated for p[8:] required recomputing the slice capacity to handle the cap=0 special case, which added an ADD -8 instruction. By making len=cap, the capacity becomes a common subexpression with the length, saving the ADD instruction. (Thanks to khr for the tip.) Incidentally, I tried a number of other optimizations but was unable to make consistent gains across all benchmarks. The most promising was to retain the bitmask of non-ASCII bytes from the fast loop; the slow loop would shift it, and when it becomes zero, return to the fast loop. This made the MostlyASCII benchmark 4x faster, but made the other cases slower by up to 10%. cpu: Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz benchmark old ns/op new ns/op delta BenchmarkValidTenASCIIChars-16 4.09 4.06 -0.85% BenchmarkValid100KASCIIChars-16 9325 7747 -16.92% BenchmarkValidTenJapaneseChars-16 27.0 27.2 +0.85% BenchmarkValidLongMostlyASCII-16 57277 58361 +1.89% BenchmarkValidLongJapanese-16 94002 93131 -0.93% BenchmarkValidStringTenASCIIChars-16 4.15 4.07 -1.74% BenchmarkValidString100KASCIIChars-16 7980 8019 +0.49% BenchmarkValidStringTenJapaneseChars-16 26.0 25.9 -0.38% BenchmarkValidStringLongMostlyASCII-16 58550 58006 -0.93% BenchmarkValidStringLongJapanese-16 97964 100038 +2.12% Change-Id: Ic9d585dedd9af83c27dd791ecd805150ac949f15 Reviewed-on: https://go-review.googlesource.com/c/go/+/375594 Reviewed-by: Keith Randall <[email protected]> Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Trust: Alex Rakoczy <[email protected]>
1 parent bebe9aa commit fd2e1e7

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

src/unicode/utf8/utf8.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,6 +475,11 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
475475

476476
// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
477477
func Valid(p []byte) bool {
478+
// This optimization avoids the need to recompute the capacity
479+
// when generating code for p[8:], bringing it to parity with
480+
// ValidString, which was 20% faster on long ASCII strings.
481+
p = p[:len(p):len(p)]
482+
478483
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
479484
for len(p) >= 8 {
480485
// Combining two 32 bit loads allows the same code to be used

src/unicode/utf8/utf8_test.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package utf8_test
66

77
import (
88
"bytes"
9+
"strings"
910
"testing"
1011
"unicode"
1112
. "unicode/utf8"
@@ -554,32 +555,89 @@ func BenchmarkRuneCountInStringTenJapaneseChars(b *testing.B) {
554555
}
555556
}
556557

558+
var ascii100000 = strings.Repeat("0123456789", 10000)
559+
557560
func BenchmarkValidTenASCIIChars(b *testing.B) {
558561
s := []byte("0123456789")
559562
for i := 0; i < b.N; i++ {
560563
Valid(s)
561564
}
562565
}
563566

567+
func BenchmarkValid100KASCIIChars(b *testing.B) {
568+
s := []byte(ascii100000)
569+
for i := 0; i < b.N; i++ {
570+
Valid(s)
571+
}
572+
}
573+
564574
func BenchmarkValidTenJapaneseChars(b *testing.B) {
565575
s := []byte("日本語日本語日本語日")
566576
for i := 0; i < b.N; i++ {
567577
Valid(s)
568578
}
569579
}
580+
func BenchmarkValidLongMostlyASCII(b *testing.B) {
581+
longMostlyASCII := []byte(longStringMostlyASCII)
582+
for i := 0; i < b.N; i++ {
583+
Valid(longMostlyASCII)
584+
}
585+
}
586+
587+
func BenchmarkValidLongJapanese(b *testing.B) {
588+
longJapanese := []byte(longStringJapanese)
589+
for i := 0; i < b.N; i++ {
590+
Valid(longJapanese)
591+
}
592+
}
570593

571594
func BenchmarkValidStringTenASCIIChars(b *testing.B) {
572595
for i := 0; i < b.N; i++ {
573596
ValidString("0123456789")
574597
}
575598
}
576599

600+
func BenchmarkValidString100KASCIIChars(b *testing.B) {
601+
for i := 0; i < b.N; i++ {
602+
ValidString(ascii100000)
603+
}
604+
}
605+
577606
func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
578607
for i := 0; i < b.N; i++ {
579608
ValidString("日本語日本語日本語日")
580609
}
581610
}
582611

612+
func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
613+
for i := 0; i < b.N; i++ {
614+
ValidString(longStringMostlyASCII)
615+
}
616+
}
617+
618+
func BenchmarkValidStringLongJapanese(b *testing.B) {
619+
for i := 0; i < b.N; i++ {
620+
ValidString(longStringJapanese)
621+
}
622+
}
623+
624+
var longStringMostlyASCII string // ~100KB, ~97% ASCII
625+
var longStringJapanese string // ~100KB, non-ASCII
626+
627+
func init() {
628+
const japanese = "日本語日本語日本語日"
629+
var b bytes.Buffer
630+
for i := 0; b.Len() < 100_000; i++ {
631+
if i%100 == 0 {
632+
b.WriteString(japanese)
633+
} else {
634+
b.WriteString("0123456789")
635+
}
636+
}
637+
longStringMostlyASCII = b.String()
638+
longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
639+
}
640+
583641
func BenchmarkEncodeASCIIRune(b *testing.B) {
584642
buf := make([]byte, UTFMax)
585643
for i := 0; i < b.N; i++ {

0 commit comments

Comments
 (0)