internal/bytealg: move short string Index implementations into bytealg

Also move the arm64 CountByte implementation while we're here. Fixes #19792 Change-Id: I1e0fdf1e03e3135af84150a2703b58dad1b0d57e Reviewed-on: https://go-review.googlesource.com/98518 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
author: Keith Randall <khr@golang.org> 2018-03-04 09:47:47 -0800
committer: Keith Randall <khr@golang.org> 2018-03-04 19:49:44 +0000
commit: ee58eccc565c0871d3f16fd702fd8649a3fb61ea (patch)
tree: 837073b78954dc987cf575ff478faf6fdb8afb0e /src/bytes
parent: f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (diff)
download: go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.tar.gz
go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.zip
6 files changed, 86 insertions, 487 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index 08d8260e9e..32bf6ab30d 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -829,6 +829,92 @@ func EqualFold(s, t []byte) bool {
 	return len(s) == len(t)
 }
 
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+	n := len(sep)
+	switch {
+	case n == 0:
+		return 0
+	case n == 1:
+		return IndexByte(s, sep[0])
+	case n == len(s):
+		if Equal(sep, s) {
+			return 0
+		}
+		return -1
+	case n > len(s):
+		return -1
+	case n <= bytealg.MaxLen:
+		// Use brute force when s and sep both are small
+		if len(s) <= bytealg.MaxBruteForce {
+			return bytealg.Index(s, sep)
+		}
+		c := sep[0]
+		i := 0
+		t := s[:len(s)-n+1]
+		fails := 0
+		for i < len(t) {
+			if t[i] != c {
+				// IndexByte is faster than bytealg.Index, so use it as long as
+				// we're not getting lots of false positives.
+				o := IndexByte(t[i:], c)
+				if o < 0 {
+					return -1
+				}
+				i += o
+			}
+			if Equal(s[i:i+n], sep) {
+				return i
+			}
+			fails++
+			i++
+			// Switch to bytealg.Index when IndexByte produces too many false positives.
+			if fails > bytealg.Cutover(i) {
+				r := bytealg.Index(s[i:], sep)
+				if r >= 0 {
+					return r + i
+				}
+				return -1
+			}
+		}
+		return -1
+	}
+	c := sep[0]
+	i := 0
+	fails := 0
+	t := s[:len(s)-n+1]
+	for i < len(t) {
+		if t[i] != c {
+			o := IndexByte(t[i:], c)
+			if o < 0 {
+				break
+			}
+			i += o
+		}
+		if Equal(s[i:i+n], sep) {
+			return i
+		}
+		i++
+		fails++
+		if fails >= 4+i>>4 && i < len(t) {
+			// Give up on IndexByte, it isn't skipping ahead
+			// far enough to be better than Rabin-Karp.
+			// Experiments (using IndexPeriodic) suggest
+			// the cutover is about 16 byte skips.
+			// TODO: if large prefixes of sep are matching
+			// we should cutover at even larger average skips,
+			// because Equal becomes that much more expensive.
+			// This code does not take that effect into account.
+			j := indexRabinKarp(s[i:], sep)
+			if j < 0 {
+				return -1
+			}
+			return i + j
+		}
+	}
+	return -1
+}
+
 func indexRabinKarp(s, sep []byte) int {
 	// Rabin-Karp search
 	hashsep, pow := hashStr(sep)
diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go
deleted file mode 100644
index 2fc88c68fc..0000000000
--- a/src/bytes/bytes_amd64.go
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bytes
-
-import "internal/cpu"
-
-//go:noescape
-
-// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
-// indexShortStr requires 2 <= len(c) <= shortStringLen
-func indexShortStr(s, c []byte) int  // ../runtime/asm_amd64.s
-func countByte(s []byte, c byte) int // ../runtime/asm_amd64.s
-
-var shortStringLen int
-
-func init() {
-	if cpu.X86.HasAVX2 {
-		shortStringLen = 63
-	} else {
-		shortStringLen = 31
-	}
-}
-
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
-	n := len(sep)
-	switch {
-	case n == 0:
-		return 0
-	case n == 1:
-		return IndexByte(s, sep[0])
-	case n == len(s):
-		if Equal(sep, s) {
-			return 0
-		}
-		return -1
-	case n > len(s):
-		return -1
-	case n <= shortStringLen:
-		// Use brute force when s and sep both are small
-		if len(s) <= 64 {
-			return indexShortStr(s, sep)
-		}
-		c := sep[0]
-		i := 0
-		t := s[:len(s)-n+1]
-		fails := 0
-		for i < len(t) {
-			if t[i] != c {
-				// IndexByte skips 16/32 bytes per iteration,
-				// so it's faster than indexShortStr.
-				o := IndexByte(t[i:], c)
-				if o < 0 {
-					return -1
-				}
-				i += o
-			}
-			if Equal(s[i:i+n], sep) {
-				return i
-			}
-			fails++
-			i++
-			// Switch to indexShortStr when IndexByte produces too many false positives.
-			// Too many means more that 1 error per 8 characters.
-			// Allow some errors in the beginning.
-			if fails > (i+16)/8 {
-				r := indexShortStr(s[i:], sep)
-				if r >= 0 {
-					return r + i
-				}
-				return -1
-			}
-		}
-		return -1
-	}
-	return indexRabinKarp(s, sep)
-}
diff --git a/src/bytes/bytes_arm64.go b/src/bytes/bytes_arm64.go
deleted file mode 100644
index 39e9562db1..0000000000
--- a/src/bytes/bytes_arm64.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bytes
-
-func countByte(s []byte, c byte) int // bytes_arm64.s
-
-// 8 bytes can be completely loaded into 1 register.
-const shortStringLen = 8
-
-//go:noescape
-func indexShortStr(s, sep []byte) int
-
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
-	n := len(sep)
-	switch {
-	case n == 0:
-		return 0
-	case n == 1:
-		return IndexByte(s, sep[0])
-	case n == len(s):
-		if Equal(sep, s) {
-			return 0
-		}
-		return -1
-	case n > len(s):
-		return -1
-	case n <= shortStringLen:
-		// Use brute force when both s and sep are small.
-		// Empirical data shows that it can get better
-		// performance when len(s) <= 16.
-		if len(s) <= 16 {
-			return indexShortStr(s, sep)
-		}
-	}
-	c := sep[0]
-	i := 0
-	fails := 0
-	t := s[:len(s)-n+1]
-	for i < len(t) {
-		if t[i] != c {
-			o := IndexByte(t[i:], c)
-			if o < 0 {
-				break
-			}
-			i += o
-		}
-		if Equal(s[i:i+n], sep) {
-			return i
-		}
-		i++
-		fails++
-		if fails >= 4+i>>4 && i < len(t) {
-			// Give up on IndexByte, it isn't skipping ahead
-			// far enough to be better than Rabin-Karp.
-			// Experiments (using IndexPeriodic) suggest
-			// the cutover is about 16 byte skips.
-			// TODO: if large prefixes of sep are matching
-			// we should cutover at even larger average skips,
-			// because Equal becomes that much more expensive.
-			// This code does not take that effect into account.
-			j := indexRabinKarp(s[i:], sep)
-			if j < 0 {
-				return -1
-			}
-			return i + j
-		}
-	}
-	return -1
-}
diff --git a/src/bytes/bytes_arm64.s b/src/bytes/bytes_arm64.s
deleted file mode 100644
index 84e96d52ce..0000000000
--- a/src/bytes/bytes_arm64.s
+++ /dev/null
@@ -1,197 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// countByte(s []byte, c byte) int
-TEXT bytes·countByte(SB),NOSPLIT,$0-40
-	MOVD	s_base+0(FP), R0
-	MOVD	s_len+8(FP), R2
-	MOVBU	c+24(FP), R1
-	// R11 = count of byte to search
-	MOVD	$0, R11
-	// short path to handle 0-byte case
-	CBZ	R2, done
-	CMP	$0x20, R2
-	// jump directly to tail if length < 32
-	BLO	tail
-	ANDS	$0x1f, R0, R9
-	BEQ	chunk
-	// Work with not 32-byte aligned head
-	BIC	$0x1f, R0, R3
-	ADD	$0x20, R3
-head_loop:
-	MOVBU.P	1(R0), R5
-	CMP	R5, R1
-	CINC	EQ, R11, R11
-	SUB	$1, R2, R2
-	CMP	R0, R3
-	BNE	head_loop
-	// Work with 32-byte aligned chunks
-chunk:
-	BIC	$0x1f, R2, R9
-	// The first chunk can also be the last
-	CBZ	R9, tail
-	// R3 = end of 32-byte chunks
-	ADD	R0, R9, R3
-	MOVD	$1, R5
-	VMOV	R5, V5.B16
-	// R2 = length of tail
-	SUB	R9, R2, R2
-	// Duplicate R1 (byte to search) to 16 1-byte elements of V0
-	VMOV	R1, V0.B16
-	// Clear the low 64-bit element of V7 and V8
-	VEOR	V7.B8, V7.B8, V7.B8
-	VEOR	V8.B8, V8.B8, V8.B8
-	// Count the target byte in 32-byte chunk
-chunk_loop:
-	VLD1.P	(R0), [V1.B16, V2.B16]
-	CMP	R0, R3
-	VCMEQ	V0.B16, V1.B16, V3.B16
-	VCMEQ	V0.B16, V2.B16, V4.B16
-	// Clear the higher 7 bits
-	VAND	V5.B16, V3.B16, V3.B16
-	VAND	V5.B16, V4.B16, V4.B16
-	// Count lanes match the requested byte
-	VADDP	V4.B16, V3.B16, V6.B16 // 32B->16B
-	VUADDLV	V6.B16, V7
-	// Accumulate the count in low 64-bit element of V8 when inside the loop
-	VADD	V7, V8
-	BNE	chunk_loop
-	VMOV	V8.D[0], R6
-	ADD	R6, R11, R11
-	CBZ	R2, done
-tail:
-	// Work with tail shorter than 32 bytes
-	MOVBU.P	1(R0), R5
-	SUB	$1, R2, R2
-	CMP	R5, R1
-	CINC	EQ, R11, R11
-	CBNZ	R2, tail
-done:
-	MOVD	R11, ret+32(FP)
-	RET
-
-// indexShortStr(s, sep []byte) int
-// precondition: 2 <= len(sep) <= 8
-TEXT bytes·indexShortStr(SB),NOSPLIT,$0-56
-	// main idea is to load 'sep' into separate register(s)
-	// to avoid repeatedly re-load it again and again
-	// for sebsequent substring comparisons
-	MOVD	s+0(FP), R0
-	MOVD	s_len+8(FP), R1
-	MOVD	sep+24(FP), R2
-	MOVD	sep_len+32(FP), R3
-	SUB	R3, R1, R4
-	// R4 contains the start of last substring for comparsion
-	ADD	R0, R4, R4
-	ADD	$1, R0, R8
-	TBZ	$3, R3, len_2_7
-len_8:
-	// R5 contains 8-byte sep
-	MOVD	(R2), R5
-loop_8:
-	// R6 contains substring for comparison
-	MOVD.P	1(R0), R6
-	CMP	R5, R6
-	BEQ	found
-	CMP	R4, R0
-	BLS	loop_8
-	JMP	not_found
-len_2_7:
-	TBZ	$2, R3, len_2_3
-	TBZ	$1, R3, len_4_5
-	TBZ	$0, R3, len_6
-len_7:
-	// R5 and R6 contain 7-byte sep
-	MOVWU	(R2), R5
-	// 1-byte overlap with R5
-	MOVWU	3(R2), R6
-loop_7:
-	MOVWU.P	1(R0), R3
-	CMP	R5, R3
-	BNE	not_equal_7
-	MOVWU	2(R0), R3
-	CMP	R6, R3
-	BEQ	found
-not_equal_7:
-	CMP	R4, R0
-	BLS	loop_7
-	JMP	not_found
-len_6:
-	// R5 and R6 contain 6-byte sep
-	MOVWU	(R2), R5
-	MOVHU	4(R2), R6
-loop_6:
-	MOVWU.P	1(R0), R3
-	CMP	R5, R3
-	BNE	not_equal_6
-	MOVHU	3(R0), R3
-	CMP	R6, R3
-	BEQ	found
-not_equal_6:
-	CMP	R4, R0
-	BLS	loop_6
-	JMP	not_found
-len_4_5:
-	TBZ	$0, R3, len_4
-len_5:
-	// R5 and R7 contain 5-byte sep
-	MOVWU	(R2), R5
-	MOVBU	4(R2), R7
-loop_5:
-	MOVWU.P	1(R0), R3
-	CMP	R5, R3
-	BNE	not_equal_5
-	MOVBU	3(R0), R3
-	CMP	R7, R3
-	BEQ	found
-not_equal_5:
-	CMP	R4, R0
-	BLS	loop_5
-	JMP	not_found
-len_4:
-	// R5 contains 4-byte sep
-	MOVWU	(R2), R5
-loop_4:
-	MOVWU.P	1(R0), R6
-	CMP	R5, R6
-	BEQ	found
-	CMP	R4, R0
-	BLS	loop_4
-	JMP	not_found
-len_2_3:
-	TBZ	$0, R3, len_2
-len_3:
-	// R6 and R7 contain 3-byte sep
-	MOVHU	(R2), R6
-	MOVBU	2(R2), R7
-loop_3:
-	MOVHU.P	1(R0), R3
-	CMP	R6, R3
-	BNE	not_equal_3
-	MOVBU	1(R0), R3
-	CMP	R7, R3
-	BEQ	found
-not_equal_3:
-	CMP	R4, R0
-	BLS	loop_3
-	JMP	not_found
-len_2:
-	// R5 contains 2-byte sep
-	MOVHU	(R2), R5
-loop_2:
-	MOVHU.P	1(R0), R6
-	CMP	R5, R6
-	BEQ	found
-	CMP	R4, R0
-	BLS	loop_2
-not_found:
-	MOVD	$-1, R0
-	MOVD	R0, ret+48(FP)
-	RET
-found:
-	SUB	R8, R0, R0
-	MOVD	R0, ret+48(FP)
-	RET
diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go
deleted file mode 100644
index 347d28473f..0000000000
--- a/src/bytes/bytes_generic.go
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !amd64,!s390x,!arm64
-
-package bytes
-
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
-	n := len(sep)
-	switch {
-	case n == 0:
-		return 0
-	case n == 1:
-		return IndexByte(s, sep[0])
-	case n == len(s):
-		if Equal(sep, s) {
-			return 0
-		}
-		return -1
-	case n > len(s):
-		return -1
-	}
-	c := sep[0]
-	i := 0
-	fails := 0
-	t := s[:len(s)-n+1]
-	for i < len(t) {
-		if t[i] != c {
-			o := IndexByte(t[i:], c)
-			if o < 0 {
-				break
-			}
-			i += o
-		}
-		if Equal(s[i:i+n], sep) {
-			return i
-		}
-		i++
-		fails++
-		if fails >= 4+i>>4 && i < len(t) {
-			// Give up on IndexByte, it isn't skipping ahead
-			// far enough to be better than Rabin-Karp.
-			// Experiments (using IndexPeriodic) suggest
-			// the cutover is about 16 byte skips.
-			// TODO: if large prefixes of sep are matching
-			// we should cutover at even larger average skips,
-			// because Equal becomes that much more expensive.
-			// This code does not take that effect into account.
-			j := indexRabinKarp(s[i:], sep)
-			if j < 0 {
-				return -1
-			}
-			return i + j
-		}
-	}
-	return -1
-}
diff --git a/src/bytes/bytes_s390x.go b/src/bytes/bytes_s390x.go
deleted file mode 100644
index 84f040d43d..0000000000
--- a/src/bytes/bytes_s390x.go
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bytes
-
-//go:noescape
-
-// indexShortStr returns the index of the first instance of sep in s,
-// or -1 if sep is not present in s.
-// indexShortStr requires 2 <= len(sep) <= shortStringLen
-func indexShortStr(s, c []byte) int // ../runtime/asm_s390x.s
-
-// supportsVX reports whether the vector facility is available.
-// indexShortStr must not be called if the vector facility is not
-// available.
-func supportsVX() bool // ../runtime/asm_s390x.s
-
-var shortStringLen = -1
-
-func init() {
-	if supportsVX() {
-		shortStringLen = 64
-	}
-}
-
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
-	n := len(sep)
-	switch {
-	case n == 0:
-		return 0
-	case n == 1:
-		return IndexByte(s, sep[0])
-	case n == len(s):
-		if Equal(sep, s) {
-			return 0
-		}
-		return -1
-	case n > len(s):
-		return -1
-	case n <= shortStringLen:
-		// Use brute force when s and sep both are small
-		if len(s) <= 64 {
-			return indexShortStr(s, sep)
-		}
-		c := sep[0]
-		i := 0
-		t := s[:len(s)-n+1]
-		fails := 0
-		for i < len(t) {
-			if t[i] != c {
-				// IndexByte skips 16/32 bytes per iteration,
-				// so it's faster than indexShortStr.
-				o := IndexByte(t[i:], c)
-				if o < 0 {
-					return -1
-				}
-				i += o
-			}
-			if Equal(s[i:i+n], sep) {
-				return i
-			}
-			fails++
-			i++
-			// Switch to indexShortStr when IndexByte produces too many false positives.
-			// Too many means more that 1 error per 8 characters.
-			// Allow some errors in the beginning.
-			if fails > (i+16)/8 {
-				r := indexShortStr(s[i:], sep)
-				if r >= 0 {
-					return r + i
-				}
-				return -1
-			}
-		}
-		return -1
-	}
-	return indexRabinKarp(s, sep)
-}
author	Keith Randall <khr@golang.org>	2018-03-04 09:47:47 -0800
committer	Keith Randall <khr@golang.org>	2018-03-04 19:49:44 +0000
commit	ee58eccc565c0871d3f16fd702fd8649a3fb61ea (patch)
tree	837073b78954dc987cf575ff478faf6fdb8afb0e /src/bytes
parent	f6332bb84ad87e958290ae23b29a2b13a41ee2a2 (diff)
download	go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.tar.gz go-ee58eccc565c0871d3f16fd702fd8649a3fb61ea.zip