diff options
author | Martin Möhrmann <moehrmann@google.com> | 2020-04-18 23:57:28 +0200 |
---|---|---|
committer | Martin Möhrmann <moehrmann@google.com> | 2020-04-22 14:14:34 +0000 |
commit | e93d5b5e0509c052afe23b05866e013dacc21f57 (patch) | |
tree | 39586ce1eb0db2c70678a3dd8283b2314954207d /src/unicode | |
parent | a45ea55da732198dca2457ecb7cabe4ab4a7a162 (diff) | |
download | go-e93d5b5e0509c052afe23b05866e013dacc21f57.tar.gz go-e93d5b5e0509c052afe23b05866e013dacc21f57.zip |
unicode/utf8: optimize Valid and ValidString for ASCII checks
Add a fastpath that uses 32bit loads and compares to check
8 ASCII characters per loop iteration.
This avoids the overhead of comparing and branching
for every byte individually.
Combining two 32bit loads into an uint32 allows the same
code to be used for 32bit and 64bit platforms.
amd64 (Intel i7-3520M):
name old time/op new time/op delta
ValidTenASCIIChars 15.6ns ± 4% 8.5ns ±14% -45.27% (p=0.000 n=10+10)
ValidTenJapaneseChars 50.0ns ± 2% 52.7ns ±15% ~ (p=0.469 n=10+10)
ValidStringTenASCIIChars 13.5ns ± 1% 7.9ns ± 5% -41.56% (p=0.000 n=10+10)
ValidStringTenJapaneseChars 46.3ns ± 2% 45.8ns ± 2% ~ (p=0.085 n=10+10)
arm (Raspberry Pi 3):
name old time/op new time/op delta
ValidTenASCIIChars 87.5ns ± 0% 58.5ns ± 0% -33.11% (p=0.000 n=9+10)
ValidTenJapaneseChars 359ns ± 0% 384ns ± 0% +6.96% (p=0.000 n=10+9)
ValidStringTenASCIIChars 87.5ns ± 0% 57.5ns ± 0% -34.31% (p=0.000 n=10+10)
ValidStringTenJapaneseChars 356ns ± 0% 377ns ± 0% +5.90% (p=0.000 n=10+10)
Change-Id: I9da942bddb250ee1f0ef7aabb4a8cb48edd9053e
Reviewed-on: https://go-review.googlesource.com/c/go/+/228823
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/unicode')
-rw-r--r-- | src/unicode/utf8/utf8.go | 28 |
1 files changed, 28 insertions, 0 deletions
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index b8368fce41..ef0d740960 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -448,6 +448,20 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 } // Valid reports whether p consists entirely of valid UTF-8-encoded runes. func Valid(p []byte) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(p) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 + second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + p = p[8:] + } n := len(p) for i := 0; i < n; { pi := p[i] @@ -480,6 +494,20 @@ func Valid(p []byte) bool { // ValidString reports whether s consists entirely of valid UTF-8-encoded runes. func ValidString(s string) bool { + // Fast path. Check for and skip 8 bytes of ASCII characters per iteration. + for len(s) >= 8 { + // Combining two 32 bit loads allows the same code to be used + // for 32 and 64 bit platforms. + // The compiler can generate a 32bit load for first32 and second32 + // on many platforms. See test/codegen/memcombine.go. + first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24 + second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24 + if (first32|second32)&0x80808080 != 0 { + // Found a non ASCII byte (>= RuneSelf). + break + } + s = s[8:] + } n := len(s) for i := 0; i < n; { si := s[i] |