diff options
author | Keith Randall <khr@golang.org> | 2018-03-04 09:47:47 -0800 |
---|---|---|
committer | Keith Randall <khr@golang.org> | 2018-03-04 19:49:44 +0000 |
commit | ee58eccc565c0871d3f16fd702fd8649a3fb61ea (patch) | |
tree | 837073b78954dc987cf575ff478faf6fdb8afb0e /src/bytes | |
parent | f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (diff) | |
download | go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.tar.gz go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.zip |
internal/bytealg: move short string Index implementations into bytealg
Also move the arm64 CountByte implementation while we're here.
Fixes #19792
Change-Id: I1e0fdf1e03e3135af84150a2703b58dad1b0d57e
Reviewed-on: https://go-review.googlesource.com/98518
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/bytes')
-rw-r--r-- | src/bytes/bytes.go | 86 | ||||
-rw-r--r-- | src/bytes/bytes_amd64.go | 79 | ||||
-rw-r--r-- | src/bytes/bytes_arm64.go | 72 | ||||
-rw-r--r-- | src/bytes/bytes_arm64.s | 197 | ||||
-rw-r--r-- | src/bytes/bytes_generic.go | 59 | ||||
-rw-r--r-- | src/bytes/bytes_s390x.go | 80 |
6 files changed, 86 insertions, 487 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 08d8260e9e..32bf6ab30d 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -829,6 +829,92 @@ func EqualFold(s, t []byte) bool { return len(s) == len(t) } +// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. +func Index(s, sep []byte) int { + n := len(sep) + switch { + case n == 0: + return 0 + case n == 1: + return IndexByte(s, sep[0]) + case n == len(s): + if Equal(sep, s) { + return 0 + } + return -1 + case n > len(s): + return -1 + case n <= bytealg.MaxLen: + // Use brute force when s and sep both are small + if len(s) <= bytealg.MaxBruteForce { + return bytealg.Index(s, sep) + } + c := sep[0] + i := 0 + t := s[:len(s)-n+1] + fails := 0 + for i < len(t) { + if t[i] != c { + // IndexByte is faster than bytealg.Index, so use it as long as + // we're not getting lots of false positives. + o := IndexByte(t[i:], c) + if o < 0 { + return -1 + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + fails++ + i++ + // Switch to bytealg.Index when IndexByte produces too many false positives. + if fails > bytealg.Cutover(i) { + r := bytealg.Index(s[i:], sep) + if r >= 0 { + return r + i + } + return -1 + } + } + return -1 + } + c := sep[0] + i := 0 + fails := 0 + t := s[:len(s)-n+1] + for i < len(t) { + if t[i] != c { + o := IndexByte(t[i:], c) + if o < 0 { + break + } + i += o + } + if Equal(s[i:i+n], sep) { + return i + } + i++ + fails++ + if fails >= 4+i>>4 && i < len(t) { + // Give up on IndexByte, it isn't skipping ahead + // far enough to be better than Rabin-Karp. + // Experiments (using IndexPeriodic) suggest + // the cutover is about 16 byte skips. + // TODO: if large prefixes of sep are matching + // we should cutover at even larger average skips, + // because Equal becomes that much more expensive. + // This code does not take that effect into account. + j := indexRabinKarp(s[i:], sep) + if j < 0 { + return -1 + } + return i + j + } + } + return -1 +} + func indexRabinKarp(s, sep []byte) int { // Rabin-Karp search hashsep, pow := hashStr(sep) diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go deleted file mode 100644 index 2fc88c68fc..0000000000 --- a/src/bytes/bytes_amd64.go +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bytes - -import "internal/cpu" - -//go:noescape - -// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. -// indexShortStr requires 2 <= len(c) <= shortStringLen -func indexShortStr(s, c []byte) int // ../runtime/asm_amd64.s -func countByte(s []byte, c byte) int // ../runtime/asm_amd64.s - -var shortStringLen int - -func init() { - if cpu.X86.HasAVX2 { - shortStringLen = 63 - } else { - shortStringLen = 31 - } -} - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - switch { - case n == 0: - return 0 - case n == 1: - return IndexByte(s, sep[0]) - case n == len(s): - if Equal(sep, s) { - return 0 - } - return -1 - case n > len(s): - return -1 - case n <= shortStringLen: - // Use brute force when s and sep both are small - if len(s) <= 64 { - return indexShortStr(s, sep) - } - c := sep[0] - i := 0 - t := s[:len(s)-n+1] - fails := 0 - for i < len(t) { - if t[i] != c { - // IndexByte skips 16/32 bytes per iteration, - // so it's faster than indexShortStr. - o := IndexByte(t[i:], c) - if o < 0 { - return -1 - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - fails++ - i++ - // Switch to indexShortStr when IndexByte produces too many false positives. - // Too many means more that 1 error per 8 characters. - // Allow some errors in the beginning. - if fails > (i+16)/8 { - r := indexShortStr(s[i:], sep) - if r >= 0 { - return r + i - } - return -1 - } - } - return -1 - } - return indexRabinKarp(s, sep) -} diff --git a/src/bytes/bytes_arm64.go b/src/bytes/bytes_arm64.go deleted file mode 100644 index 39e9562db1..0000000000 --- a/src/bytes/bytes_arm64.go +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bytes - -func countByte(s []byte, c byte) int // bytes_arm64.s - -// 8 bytes can be completely loaded into 1 register. -const shortStringLen = 8 - -//go:noescape -func indexShortStr(s, sep []byte) int - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - switch { - case n == 0: - return 0 - case n == 1: - return IndexByte(s, sep[0]) - case n == len(s): - if Equal(sep, s) { - return 0 - } - return -1 - case n > len(s): - return -1 - case n <= shortStringLen: - // Use brute force when both s and sep are small. - // Empirical data shows that it can get better - // performance when len(s) <= 16. - if len(s) <= 16 { - return indexShortStr(s, sep) - } - } - c := sep[0] - i := 0 - fails := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) - if o < 0 { - break - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - i++ - fails++ - if fails >= 4+i>>4 && i < len(t) { - // Give up on IndexByte, it isn't skipping ahead - // far enough to be better than Rabin-Karp. - // Experiments (using IndexPeriodic) suggest - // the cutover is about 16 byte skips. - // TODO: if large prefixes of sep are matching - // we should cutover at even larger average skips, - // because Equal becomes that much more expensive. - // This code does not take that effect into account. - j := indexRabinKarp(s[i:], sep) - if j < 0 { - return -1 - } - return i + j - } - } - return -1 -} diff --git a/src/bytes/bytes_arm64.s b/src/bytes/bytes_arm64.s deleted file mode 100644 index 84e96d52ce..0000000000 --- a/src/bytes/bytes_arm64.s +++ /dev/null @@ -1,197 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -#include "textflag.h" - -// countByte(s []byte, c byte) int -TEXT bytes·countByte(SB),NOSPLIT,$0-40 - MOVD s_base+0(FP), R0 - MOVD s_len+8(FP), R2 - MOVBU c+24(FP), R1 - // R11 = count of byte to search - MOVD $0, R11 - // short path to handle 0-byte case - CBZ R2, done - CMP $0x20, R2 - // jump directly to tail if length < 32 - BLO tail - ANDS $0x1f, R0, R9 - BEQ chunk - // Work with not 32-byte aligned head - BIC $0x1f, R0, R3 - ADD $0x20, R3 -head_loop: - MOVBU.P 1(R0), R5 - CMP R5, R1 - CINC EQ, R11, R11 - SUB $1, R2, R2 - CMP R0, R3 - BNE head_loop - // Work with 32-byte aligned chunks -chunk: - BIC $0x1f, R2, R9 - // The first chunk can also be the last - CBZ R9, tail - // R3 = end of 32-byte chunks - ADD R0, R9, R3 - MOVD $1, R5 - VMOV R5, V5.B16 - // R2 = length of tail - SUB R9, R2, R2 - // Duplicate R1 (byte to search) to 16 1-byte elements of V0 - VMOV R1, V0.B16 - // Clear the low 64-bit element of V7 and V8 - VEOR V7.B8, V7.B8, V7.B8 - VEOR V8.B8, V8.B8, V8.B8 - // Count the target byte in 32-byte chunk -chunk_loop: - VLD1.P (R0), [V1.B16, V2.B16] - CMP R0, R3 - VCMEQ V0.B16, V1.B16, V3.B16 - VCMEQ V0.B16, V2.B16, V4.B16 - // Clear the higher 7 bits - VAND V5.B16, V3.B16, V3.B16 - VAND V5.B16, V4.B16, V4.B16 - // Count lanes match the requested byte - VADDP V4.B16, V3.B16, V6.B16 // 32B->16B - VUADDLV V6.B16, V7 - // Accumulate the count in low 64-bit element of V8 when inside the loop - VADD V7, V8 - BNE chunk_loop - VMOV V8.D[0], R6 - ADD R6, R11, R11 - CBZ R2, done -tail: - // Work with tail shorter than 32 bytes - MOVBU.P 1(R0), R5 - SUB $1, R2, R2 - CMP R5, R1 - CINC EQ, R11, R11 - CBNZ R2, tail -done: - MOVD R11, ret+32(FP) - RET - -// indexShortStr(s, sep []byte) int -// precondition: 2 <= len(sep) <= 8 -TEXT bytes·indexShortStr(SB),NOSPLIT,$0-56 - // main idea is to load 'sep' into separate register(s) - // to avoid repeatedly re-load it again and again - // for sebsequent substring comparisons - MOVD s+0(FP), R0 - MOVD s_len+8(FP), R1 - MOVD sep+24(FP), R2 - MOVD sep_len+32(FP), R3 - SUB R3, R1, R4 - // R4 contains the start of last substring for comparsion - ADD R0, R4, R4 - ADD $1, R0, R8 - TBZ $3, R3, len_2_7 -len_8: - // R5 contains 8-byte sep - MOVD (R2), R5 -loop_8: - // R6 contains substring for comparison - MOVD.P 1(R0), R6 - CMP R5, R6 - BEQ found - CMP R4, R0 - BLS loop_8 - JMP not_found -len_2_7: - TBZ $2, R3, len_2_3 - TBZ $1, R3, len_4_5 - TBZ $0, R3, len_6 -len_7: - // R5 and R6 contain 7-byte sep - MOVWU (R2), R5 - // 1-byte overlap with R5 - MOVWU 3(R2), R6 -loop_7: - MOVWU.P 1(R0), R3 - CMP R5, R3 - BNE not_equal_7 - MOVWU 2(R0), R3 - CMP R6, R3 - BEQ found -not_equal_7: - CMP R4, R0 - BLS loop_7 - JMP not_found -len_6: - // R5 and R6 contain 6-byte sep - MOVWU (R2), R5 - MOVHU 4(R2), R6 -loop_6: - MOVWU.P 1(R0), R3 - CMP R5, R3 - BNE not_equal_6 - MOVHU 3(R0), R3 - CMP R6, R3 - BEQ found -not_equal_6: - CMP R4, R0 - BLS loop_6 - JMP not_found -len_4_5: - TBZ $0, R3, len_4 -len_5: - // R5 and R7 contain 5-byte sep - MOVWU (R2), R5 - MOVBU 4(R2), R7 -loop_5: - MOVWU.P 1(R0), R3 - CMP R5, R3 - BNE not_equal_5 - MOVBU 3(R0), R3 - CMP R7, R3 - BEQ found -not_equal_5: - CMP R4, R0 - BLS loop_5 - JMP not_found -len_4: - // R5 contains 4-byte sep - MOVWU (R2), R5 -loop_4: - MOVWU.P 1(R0), R6 - CMP R5, R6 - BEQ found - CMP R4, R0 - BLS loop_4 - JMP not_found -len_2_3: - TBZ $0, R3, len_2 -len_3: - // R6 and R7 contain 3-byte sep - MOVHU (R2), R6 - MOVBU 2(R2), R7 -loop_3: - MOVHU.P 1(R0), R3 - CMP R6, R3 - BNE not_equal_3 - MOVBU 1(R0), R3 - CMP R7, R3 - BEQ found -not_equal_3: - CMP R4, R0 - BLS loop_3 - JMP not_found -len_2: - // R5 contains 2-byte sep - MOVHU (R2), R5 -loop_2: - MOVHU.P 1(R0), R6 - CMP R5, R6 - BEQ found - CMP R4, R0 - BLS loop_2 -not_found: - MOVD $-1, R0 - MOVD R0, ret+48(FP) - RET -found: - SUB R8, R0, R0 - MOVD R0, ret+48(FP) - RET diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go deleted file mode 100644 index 347d28473f..0000000000 --- a/src/bytes/bytes_generic.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64,!s390x,!arm64 - -package bytes - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - switch { - case n == 0: - return 0 - case n == 1: - return IndexByte(s, sep[0]) - case n == len(s): - if Equal(sep, s) { - return 0 - } - return -1 - case n > len(s): - return -1 - } - c := sep[0] - i := 0 - fails := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) - if o < 0 { - break - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - i++ - fails++ - if fails >= 4+i>>4 && i < len(t) { - // Give up on IndexByte, it isn't skipping ahead - // far enough to be better than Rabin-Karp. - // Experiments (using IndexPeriodic) suggest - // the cutover is about 16 byte skips. - // TODO: if large prefixes of sep are matching - // we should cutover at even larger average skips, - // because Equal becomes that much more expensive. - // This code does not take that effect into account. - j := indexRabinKarp(s[i:], sep) - if j < 0 { - return -1 - } - return i + j - } - } - return -1 -} diff --git a/src/bytes/bytes_s390x.go b/src/bytes/bytes_s390x.go deleted file mode 100644 index 84f040d43d..0000000000 --- a/src/bytes/bytes_s390x.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bytes - -//go:noescape - -// indexShortStr returns the index of the first instance of sep in s, -// or -1 if sep is not present in s. -// indexShortStr requires 2 <= len(sep) <= shortStringLen -func indexShortStr(s, c []byte) int // ../runtime/asm_s390x.s - -// supportsVX reports whether the vector facility is available. -// indexShortStr must not be called if the vector facility is not -// available. -func supportsVX() bool // ../runtime/asm_s390x.s - -var shortStringLen = -1 - -func init() { - if supportsVX() { - shortStringLen = 64 - } -} - -// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. -func Index(s, sep []byte) int { - n := len(sep) - switch { - case n == 0: - return 0 - case n == 1: - return IndexByte(s, sep[0]) - case n == len(s): - if Equal(sep, s) { - return 0 - } - return -1 - case n > len(s): - return -1 - case n <= shortStringLen: - // Use brute force when s and sep both are small - if len(s) <= 64 { - return indexShortStr(s, sep) - } - c := sep[0] - i := 0 - t := s[:len(s)-n+1] - fails := 0 - for i < len(t) { - if t[i] != c { - // IndexByte skips 16/32 bytes per iteration, - // so it's faster than indexShortStr. - o := IndexByte(t[i:], c) - if o < 0 { - return -1 - } - i += o - } - if Equal(s[i:i+n], sep) { - return i - } - fails++ - i++ - // Switch to indexShortStr when IndexByte produces too many false positives. - // Too many means more that 1 error per 8 characters. - // Allow some errors in the beginning. - if fails > (i+16)/8 { - r := indexShortStr(s[i:], sep) - if r >= 0 { - return r + i - } - return -1 - } - } - return -1 - } - return indexRabinKarp(s, sep) -} |