[release-branch.go1.15] bytes, internal/bytealg: fix incorrect IndexString usage

The IndexString implementation in the bytealg package requires that the string passed into it be in the range '2 <= len(s) <= MaxLen' where MaxLen may be any value (including 0). CL 156998 added calls to bytealg.IndexString where MaxLen was not first checked. This led to an illegal instruction on s390x with the vector facility disabled. This CL guards the calls to bytealg.IndexString with a MaxLen check. If the check fails then the code now falls back to the pre CL 156998 implementation (a loop over the runes in the string). Since the MaxLen check is now in place the generic implementation is no longer called so I have returned it to its original unimplemented state. In future we may want to drop MaxLen to prevent this kind of confusion. Fixes #41595. Change-Id: I81d88cf8c5ae143a8f5f460d18f8269cb6c0f28c Reviewed-on: https://go-review.googlesource.com/c/go/+/256921 Trust: Michael Munday <mike.munday@ibm.com> Run-TryBot: Michael Munday <mike.munday@ibm.com> Reviewed-by: Keith Randall <khr@golang.org> TryBot-Result: Go Bot <gobot@golang.org>
author: Michael Munday <mike.munday@ibm.com> 2020-09-23 03:58:52 -0700
committer: Dmitri Shuralyov <dmitshur@golang.org> 2020-10-07 21:02:58 +0000
commit: af06e65910fc976c63a65decd2881ba34114428f (patch)
tree: 0057f46f7a1920d142e4e83dde47847a961e666d
parent: 884179022e0f57d6cf26a5fb12695a33dc1780fc (diff)
download: go-af06e65910fc976c63a65decd2881ba34114428f.tar.gz
go-af06e65910fc976c63a65decd2881ba34114428f.zip
2 files changed, 34 insertions, 54 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index aa07b9fbc1..ce52649f13 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -227,19 +227,26 @@ func IndexAny(s []byte, chars string) int {
 			continue
 		}
 		r, width = utf8.DecodeRune(s[i:])
-		if r == utf8.RuneError {
-			for _, r = range chars {
-				if r == utf8.RuneError {
+		if r != utf8.RuneError {
+			// r is 2 to 4 bytes
+			if len(chars) == width {
+				if chars == string(r) {
 					return i
 				}
+				continue
+			}
+			// Use bytealg.IndexString for performance if available.
+			if bytealg.MaxLen >= width {
+				if bytealg.IndexString(chars, string(r)) >= 0 {
+					return i
+				}
+				continue
 			}
-			continue
 		}
-		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
-		// package should not import the strings package, use bytealg.IndexString
-		// instead. And this does not seem to lose much performance.
-		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
-			return i
+		for _, ch := range chars {
+			if r == ch {
+				return i
+			}
 		}
 	}
 	return -1
@@ -304,19 +311,26 @@ func LastIndexAny(s []byte, chars string) int {
 		}
 		r, size := utf8.DecodeLastRune(s[:i])
 		i -= size
-		if r == utf8.RuneError {
-			for _, r = range chars {
-				if r == utf8.RuneError {
+		if r != utf8.RuneError {
+			// r is 2 to 4 bytes
+			if len(chars) == size {
+				if chars == string(r) {
 					return i
 				}
+				continue
+			}
+			// Use bytealg.IndexString for performance if available.
+			if bytealg.MaxLen >= size {
+				if bytealg.IndexString(chars, string(r)) >= 0 {
+					return i
+				}
+				continue
 			}
-			continue
 		}
-		// r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes
-		// package should not import the strings package, use bytealg.IndexString
-		// instead. And this does not seem to lose much performance.
-		if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 {
-			return i
+		for _, ch := range chars {
+			if r == ch {
+				return i
+			}
 		}
 	}
 	return -1
diff --git a/src/internal/bytealg/index_generic.go b/src/internal/bytealg/index_generic.go
index 83345f1013..98e859f925 100644
--- a/src/internal/bytealg/index_generic.go
+++ b/src/internal/bytealg/index_generic.go
@@ -16,42 +16,8 @@ func Index(a, b []byte) int {
 
 // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
 // Requires 2 <= len(b) <= MaxLen.
-func IndexString(s, substr string) int {
-	// This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny
-	// call bytealg.IndexString. Some platforms have an optimized assembly version of this function.
-	// This implementation is used for those that do not. Although the pure Go implementation here
-	// works for the case of len(b) > MaxLen, we do not require that its assembly implementation also
-	// supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the
-	// case of len(b) > MaxLen.
-	n := len(substr)
-	c0 := substr[0]
-	c1 := substr[1]
-	i := 0
-	t := len(s) - n + 1
-	fails := 0
-	for i < t {
-		if s[i] != c0 {
-			o := IndexByteString(s[i:t], c0)
-			if o < 0 {
-				return -1
-			}
-			i += o
-		}
-		if s[i+1] == c1 && s[i:i+n] == substr {
-			return i
-		}
-		i++
-		fails++
-		if fails >= 4+i>>4 && i < t {
-			// See comment in src/bytes/bytes.go.
-			j := IndexRabinKarp(s[i:], substr)
-			if j < 0 {
-				return -1
-			}
-			return i + j
-		}
-	}
-	return -1
+func IndexString(a, b string) int {
+	panic("unimplemented")
 }
 
 // Cutover reports the number of failures of IndexByte we should tolerate
author	Michael Munday <mike.munday@ibm.com>	2020-09-23 03:58:52 -0700
committer	Dmitri Shuralyov <dmitshur@golang.org>	2020-10-07 21:02:58 +0000
commit	af06e65910fc976c63a65decd2881ba34114428f (patch)
tree	0057f46f7a1920d142e4e83dde47847a961e666d
parent	884179022e0f57d6cf26a5fb12695a33dc1780fc (diff)
download	go-af06e65910fc976c63a65decd2881ba34114428f.tar.gz go-af06e65910fc976c63a65decd2881ba34114428f.zip