From af06e65910fc976c63a65decd2881ba34114428f Mon Sep 17 00:00:00 2001 From: Michael Munday Date: Wed, 23 Sep 2020 03:58:52 -0700 Subject: [release-branch.go1.15] bytes, internal/bytealg: fix incorrect IndexString usage The IndexString implementation in the bytealg package requires that the string passed into it be in the range '2 <= len(s) <= MaxLen' where MaxLen may be any value (including 0). CL 156998 added calls to bytealg.IndexString where MaxLen was not first checked. This led to an illegal instruction on s390x with the vector facility disabled. This CL guards the calls to bytealg.IndexString with a MaxLen check. If the check fails then the code now falls back to the pre CL 156998 implementation (a loop over the runes in the string). Since the MaxLen check is now in place the generic implementation is no longer called so I have returned it to its original unimplemented state. In future we may want to drop MaxLen to prevent this kind of confusion. Fixes #41595. Change-Id: I81d88cf8c5ae143a8f5f460d18f8269cb6c0f28c Reviewed-on: https://go-review.googlesource.com/c/go/+/256921 Trust: Michael Munday Run-TryBot: Michael Munday Reviewed-by: Keith Randall TryBot-Result: Go Bot --- src/bytes/bytes.go | 50 ++++++++++++++++++++++------------- src/internal/bytealg/index_generic.go | 38 ++------------------------ 2 files changed, 34 insertions(+), 54 deletions(-) diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index aa07b9fbc1..ce52649f13 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -227,19 +227,26 @@ func IndexAny(s []byte, chars string) int { continue } r, width = utf8.DecodeRune(s[i:]) - if r == utf8.RuneError { - for _, r = range chars { - if r == utf8.RuneError { + if r != utf8.RuneError { + // r is 2 to 4 bytes + if len(chars) == width { + if chars == string(r) { return i } + continue + } + // Use bytealg.IndexString for performance if available. + if bytealg.MaxLen >= width { + if bytealg.IndexString(chars, string(r)) >= 0 { + return i + } + continue } - continue } - // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes - // package should not import the strings package, use bytealg.IndexString - // instead. And this does not seem to lose much performance. - if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 { - return i + for _, ch := range chars { + if r == ch { + return i + } } } return -1 @@ -304,19 +311,26 @@ func LastIndexAny(s []byte, chars string) int { } r, size := utf8.DecodeLastRune(s[:i]) i -= size - if r == utf8.RuneError { - for _, r = range chars { - if r == utf8.RuneError { + if r != utf8.RuneError { + // r is 2 to 4 bytes + if len(chars) == size { + if chars == string(r) { return i } + continue + } + // Use bytealg.IndexString for performance if available. + if bytealg.MaxLen >= size { + if bytealg.IndexString(chars, string(r)) >= 0 { + return i + } + continue } - continue } - // r is 2 to 4 bytes. Using strings.Index is more reasonable, but as the bytes - // package should not import the strings package, use bytealg.IndexString - // instead. And this does not seem to lose much performance. - if chars == string(r) || bytealg.IndexString(chars, string(r)) >= 0 { - return i + for _, ch := range chars { + if r == ch { + return i + } } } return -1 diff --git a/src/internal/bytealg/index_generic.go b/src/internal/bytealg/index_generic.go index 83345f1013..98e859f925 100644 --- a/src/internal/bytealg/index_generic.go +++ b/src/internal/bytealg/index_generic.go @@ -16,42 +16,8 @@ func Index(a, b []byte) int { // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a. // Requires 2 <= len(b) <= MaxLen. -func IndexString(s, substr string) int { - // This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny - // call bytealg.IndexString. Some platforms have an optimized assembly version of this function. - // This implementation is used for those that do not. Although the pure Go implementation here - // works for the case of len(b) > MaxLen, we do not require that its assembly implementation also - // supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the - // case of len(b) > MaxLen. - n := len(substr) - c0 := substr[0] - c1 := substr[1] - i := 0 - t := len(s) - n + 1 - fails := 0 - for i < t { - if s[i] != c0 { - o := IndexByteString(s[i:t], c0) - if o < 0 { - return -1 - } - i += o - } - if s[i+1] == c1 && s[i:i+n] == substr { - return i - } - i++ - fails++ - if fails >= 4+i>>4 && i < t { - // See comment in src/bytes/bytes.go. - j := IndexRabinKarp(s[i:], substr) - if j < 0 { - return -1 - } - return i + j - } - } - return -1 +func IndexString(a, b string) int { + panic("unimplemented") } // Cutover reports the number of failures of IndexByte we should tolerate -- cgit v1.2.3-54-g00ecf