aboutsummaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
authorMartin Möhrmann <moehrmann@google.com>2020-04-18 23:57:28 +0200
committerMartin Möhrmann <moehrmann@google.com>2020-04-22 14:14:34 +0000
commite93d5b5e0509c052afe23b05866e013dacc21f57 (patch)
tree39586ce1eb0db2c70678a3dd8283b2314954207d /src/unicode
parenta45ea55da732198dca2457ecb7cabe4ab4a7a162 (diff)
downloadgo-e93d5b5e0509c052afe23b05866e013dacc21f57.tar.gz
go-e93d5b5e0509c052afe23b05866e013dacc21f57.zip
unicode/utf8: optimize Valid and ValidString for ASCII checks
Add a fastpath that uses 32bit loads and compares to check 8 ASCII characters per loop iteration. This avoids the overhead of comparing and branching for every byte individually. Combining two 32bit loads into an uint32 allows the same code to be used for 32bit and 64bit platforms. amd64 (Intel i7-3520M): name old time/op new time/op delta ValidTenASCIIChars 15.6ns ± 4% 8.5ns ±14% -45.27% (p=0.000 n=10+10) ValidTenJapaneseChars 50.0ns ± 2% 52.7ns ±15% ~ (p=0.469 n=10+10) ValidStringTenASCIIChars 13.5ns ± 1% 7.9ns ± 5% -41.56% (p=0.000 n=10+10) ValidStringTenJapaneseChars 46.3ns ± 2% 45.8ns ± 2% ~ (p=0.085 n=10+10) arm (Raspberry Pi 3): name old time/op new time/op delta ValidTenASCIIChars 87.5ns ± 0% 58.5ns ± 0% -33.11% (p=0.000 n=9+10) ValidTenJapaneseChars 359ns ± 0% 384ns ± 0% +6.96% (p=0.000 n=10+9) ValidStringTenASCIIChars 87.5ns ± 0% 57.5ns ± 0% -34.31% (p=0.000 n=10+10) ValidStringTenJapaneseChars 356ns ± 0% 377ns ± 0% +5.90% (p=0.000 n=10+10) Change-Id: I9da942bddb250ee1f0ef7aabb4a8cb48edd9053e Reviewed-on: https://go-review.googlesource.com/c/go/+/228823 Run-TryBot: Martin Möhrmann <moehrmann@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/utf8/utf8.go28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
index b8368fce41..ef0d740960 100644
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -448,6 +448,20 @@ func RuneStart(b byte) bool { return b&0xC0 != 0x80 }
// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
func Valid(p []byte) bool {
+ // Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+ for len(p) >= 8 {
+ // Combining two 32 bit loads allows the same code to be used
+ // for 32 and 64 bit platforms.
+ // The compiler can generate a 32bit load for first32 and second32
+ // on many platforms. See test/codegen/memcombine.go.
+ first32 := uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
+ second32 := uint32(p[4]) | uint32(p[5])<<8 | uint32(p[6])<<16 | uint32(p[7])<<24
+ if (first32|second32)&0x80808080 != 0 {
+ // Found a non ASCII byte (>= RuneSelf).
+ break
+ }
+ p = p[8:]
+ }
n := len(p)
for i := 0; i < n; {
pi := p[i]
@@ -480,6 +494,20 @@ func Valid(p []byte) bool {
// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
func ValidString(s string) bool {
+ // Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
+ for len(s) >= 8 {
+ // Combining two 32 bit loads allows the same code to be used
+ // for 32 and 64 bit platforms.
+ // The compiler can generate a 32bit load for first32 and second32
+ // on many platforms. See test/codegen/memcombine.go.
+ first32 := uint32(s[0]) | uint32(s[1])<<8 | uint32(s[2])<<16 | uint32(s[3])<<24
+ second32 := uint32(s[4]) | uint32(s[5])<<8 | uint32(s[6])<<16 | uint32(s[7])<<24
+ if (first32|second32)&0x80808080 != 0 {
+ // Found a non ASCII byte (>= RuneSelf).
+ break
+ }
+ s = s[8:]
+ }
n := len(s)
for i := 0; i < n; {
si := s[i]