aboutsummaryrefslogtreecommitdiff
path: root/src/unicode
diff options
context:
space:
mode:
authorMarcel van Lohuizen <mpvl@golang.org>2015-11-05 17:52:53 +0100
committerMarcel van Lohuizen <mpvl@golang.org>2015-11-16 11:29:13 +0000
commit9b299c1efd902842b788d7dc103512b6b2568ea9 (patch)
tree17b3d2e273385f06f0b866e931ef9ac88e8e8e97 /src/unicode
parent25a28da0807f3fa85588fb219f6fa40314bde675 (diff)
downloadgo-9b299c1efd902842b788d7dc103512b6b2568ea9.tar.gz
go-9b299c1efd902842b788d7dc103512b6b2568ea9.zip
unicode/utf8: removed uses of ranging over string
Ranging over string is much slower than using DecodeRuneInString. See golang.org/issue/13162. Replacing ranging over a string with the implementation of the Bytes counterpart results in the following performance improvements: RuneCountInStringTenASCIIChars-8 43.0ns ± 1% 16.4ns ± 2% -61.80% (p=0.000 n=7+8) RuneCountInStringTenJapaneseChars-8 161ns ± 2% 154ns ± 2% -4.58% (p=0.000 n=8+8) ValidStringTenASCIIChars-8 52.2ns ± 1% 13.2ns ± 1% -74.62% (p=0.001 n=7+7) ValidStringTenJapaneseChars-8 173ns ± 2% 153ns ± 2% -11.78% (p=0.000 n=7+8) Update golang/go#13162 Change-Id: Ifc40a6a94bb3317f1f2d929d310bd2694645e9f6 Reviewed-on: https://go-review.googlesource.com/16695 Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/unicode')
-rw-r--r--src/unicode/utf8/utf8.go24
1 files changed, 16 insertions, 8 deletions
diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go
index 9ac37184d6..5d29ec080e 100644
--- a/src/unicode/utf8/utf8.go
+++ b/src/unicode/utf8/utf8.go
@@ -382,10 +382,16 @@ func RuneCount(p []byte) int {
// RuneCountInString is like RuneCount but its input is a string.
func RuneCountInString(s string) (n int) {
- for range s {
+ for i := 0; i < len(s); {
n++
+ if s[i] < RuneSelf {
+ i++
+ } else {
+ _, size := DecodeRuneInString(s[i:])
+ i += size
+ }
}
- return
+ return n
}
// RuneStart reports whether the byte could be the first byte of
@@ -415,16 +421,18 @@ func Valid(p []byte) bool {
// ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
func ValidString(s string) bool {
- for i, r := range s {
- if r == RuneError {
- // The RuneError value can be an error
- // sentinel value (if it's size 1) or the same
- // value encoded properly. Decode it to see if
- // it's the 1 byte sentinel value.
+ for i := 0; i < len(s); {
+ if s[i] < RuneSelf {
+ i++
+ } else {
_, size := DecodeRuneInString(s[i:])
if size == 1 {
+ // All valid runes of size 1 (those
+ // below RuneSelf) were handled above.
+ // This must be a RuneError.
return false
}
+ i += size
}
}
return true