diff options
author | erifan01 <eric.fang@arm.com> | 2019-11-21 14:38:25 +0800 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-03-04 17:36:05 +0000 |
commit | 18a6fd44bb9de9399dba86a4c1ae1e56f967bcda (patch) | |
tree | 52d4c6a7c8d9cbaff8f7717971305d7d88378a63 /src/strings | |
parent | cec08794ef325e84f141e1a7b4deca0bedaeab34 (diff) | |
download | go-18a6fd44bb9de9399dba86a4c1ae1e56f967bcda.tar.gz go-18a6fd44bb9de9399dba86a4c1ae1e56f967bcda.zip |
bytes, strings: moves indexRabinKarp function to internal/bytealg
In order to facilitate optimization of IndexAny and LastIndexAny, this patch moves
three Rabin-Karp related functions indexRabinKarp, hashStr and hashStrRev in strings
package to initernal/bytealg. There are also three functions in the bytes package with
the same names and functions but different parameter types. To highlight this, this
patch also moves them to internal/bytealg and gives them slightly different names.
Related benchmark changes on amd64 and arm64:
name old time/op new time/op delta
pkg:strings goos:linux goarch:amd64
Index-16 14.0ns ± 1% 14.1ns ± 2% ~ (p=0.738 n=5+5)
LastIndex-16 15.5ns ± 1% 15.7ns ± 4% ~ (p=0.897 n=5+5)
pkg:bytes goos:linux goarch:amd64
Index/10-16 26.5ns ± 1% 26.5ns ± 0% ~ (p=0.873 n=5+5)
Index/32-16 26.2ns ± 0% 25.7ns ± 0% -1.68% (p=0.008 n=5+5)
Index/4K-16 5.12µs ± 4% 5.14µs ± 2% ~ (p=0.841 n=5+5)
Index/4M-16 5.44ms ± 3% 5.34ms ± 2% ~ (p=0.056 n=5+5)
Index/64M-16 85.8ms ± 3% 84.6ms ± 0% -1.37% (p=0.016 n=5+5)
name old speed new speed delta
pkg:bytes goos:linux goarch:amd64
Index/10-16 377MB/s ± 1% 377MB/s ± 0% ~ (p=1.000 n=5+5)
Index/32-16 1.22GB/s ± 1% 1.24GB/s ± 0% +1.66% (p=0.008 n=5+5)
Index/4K-16 800MB/s ± 4% 797MB/s ± 2% ~ (p=0.841 n=5+5)
Index/4M-16 771MB/s ± 3% 786MB/s ± 2% ~ (p=0.056 n=5+5)
Index/64M-16 783MB/s ± 3% 793MB/s ± 0% +1.36% (p=0.016 n=5+5)
name old time/op new time/op delta
pkg:strings goos:linux goarch:arm64
Index-8 22.6ns ± 0% 22.5ns ± 0% ~ (p=0.167 n=5+5)
LastIndex-8 17.5ns ± 0% 17.5ns ± 0% ~ (all equal)
pkg:bytes goos:linux goarch:arm64
Index/10-8 25.0ns ± 0% 25.0ns ± 0% ~ (all equal)
Index/32-8 160ns ± 0% 160ns ± 0% ~ (all equal)
Index/4K-8 6.26µs ± 0% 6.26µs ± 0% ~ (p=0.167 n=5+5)
Index/4M-8 6.30ms ± 0% 6.31ms ± 0% ~ (p=1.000 n=5+5)
Index/64M-8 101ms ± 0% 101ms ± 0% ~ (p=0.690 n=5+5)
name old speed new speed delta
pkg:bytes goos:linux goarch:arm64
Index/10-8 399MB/s ± 0% 400MB/s ± 0% +0.08% (p=0.008 n=5+5)
Index/32-8 200MB/s ± 0% 200MB/s ± 0% ~ (p=0.127 n=4+5)
Index/4K-8 654MB/s ± 0% 654MB/s ± 0% +0.01% (p=0.016 n=5+5)
Index/4M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.833 n=5+5)
Index/64M-8 665MB/s ± 0% 665MB/s ± 0% ~ (p=0.913 n=5+5)
Change-Id: Icce3bc162bb8613ac36dc963a46c51f8e82ab842
Reviewed-on: https://go-review.googlesource.com/c/go/+/208638
Run-TryBot: eric fang <eric.fang@arm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/strings')
-rw-r--r-- | src/strings/strings.go | 68 |
1 files changed, 4 insertions, 64 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index 238d657f61..7fb05b7d0e 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -36,43 +36,6 @@ func explode(s string, n int) []string { return a } -// primeRK is the prime base used in Rabin-Karp algorithm. -const primeRK = 16777619 - -// hashStr returns the hash and the appropriate multiplicative -// factor for use in Rabin-Karp algorithm. -func hashStr(sep string) (uint32, uint32) { - hash := uint32(0) - for i := 0; i < len(sep); i++ { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - -// hashStrRev returns the hash of the reverse of sep and the -// appropriate multiplicative factor for use in Rabin-Karp algorithm. -func hashStrRev(sep string) (uint32, uint32) { - hash := uint32(0) - for i := len(sep) - 1; i >= 0; i-- { - hash = hash*primeRK + uint32(sep[i]) - } - var pow, sq uint32 = 1, primeRK - for i := len(sep); i > 0; i >>= 1 { - if i&1 != 0 { - pow *= sq - } - sq *= sq - } - return hash, pow -} - // Count counts the number of non-overlapping instances of substr in s. // If substr is an empty string, Count returns 1 + the number of Unicode code points in s. func Count(s, substr string) int { @@ -126,17 +89,17 @@ func LastIndex(s, substr string) int { return -1 } // Rabin-Karp search from the end of the string - hashss, pow := hashStrRev(substr) + hashss, pow := bytealg.HashStrRev(substr) last := len(s) - n var h uint32 for i := len(s) - 1; i >= last; i-- { - h = h*primeRK + uint32(s[i]) + h = h*bytealg.PrimeRK + uint32(s[i]) } if h == hashss && s[last:] == substr { return last } for i := last - 1; i >= 0; i-- { - h *= primeRK + h *= bytealg.PrimeRK h += uint32(s[i]) h -= pow * uint32(s[i+n]) if h == hashss && s[i:i+n] == substr { @@ -1095,7 +1058,7 @@ func Index(s, substr string) int { fails++ if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes.go. - j := indexRabinKarp(s[i:], substr) + j := bytealg.IndexRabinKarp(s[i:], substr) if j < 0 { return -1 } @@ -1104,26 +1067,3 @@ func Index(s, substr string) int { } return -1 } - -func indexRabinKarp(s, substr string) int { - // Rabin-Karp search - hashss, pow := hashStr(substr) - n := len(substr) - var h uint32 - for i := 0; i < n; i++ { - h = h*primeRK + uint32(s[i]) - } - if h == hashss && s[:n] == substr { - return 0 - } - for i := n; i < len(s); { - h *= primeRK - h += uint32(s[i]) - h -= pow * uint32(s[i-n]) - i++ - if h == hashss && s[i-n:i] == substr { - return i - n - } - } - return -1 -} |