bytes: optimize ToLower and ToUpper for ASCII-only case

Follow what CL 68370 and CL 76470 did for the respective functions in package strings. Also adjust godoc strings to match the respective strings functions and mention the special case for ASCII-only byte slices which don't need conversion. name old time/op new time/op delta ToUpper/#00-8 9.35ns ± 3% 6.08ns ± 2% -35.04% (p=0.000 n=9+9) ToUpper/ONLYUPPER-8 77.7ns ± 1% 16.9ns ± 2% -78.22% (p=0.000 n=10+10) ToUpper/abc-8 36.5ns ± 1% 22.1ns ± 1% -39.43% (p=0.000 n=10+8) ToUpper/AbC123-8 56.9ns ± 2% 28.2ns ± 2% -50.54% (p=0.000 n=8+10) ToUpper/azAZ09_-8 62.3ns ± 1% 26.9ns ± 1% -56.82% (p=0.000 n=9+10) ToUpper/longStrinGwitHmixofsmaLLandcAps-8 219ns ± 2% 63ns ± 2% -71.17% (p=0.000 n=10+10) ToUpper/longɐstringɐwithɐnonasciiⱯchars-8 367ns ± 2% 374ns ± 3% +2.05% (p=0.000 n=9+10) ToUpper/ɐɐɐɐɐ-8 200ns ± 1% 206ns ± 1% +2.49% (p=0.000 n=10+10) ToUpper/a\u0080\U0010ffff-8 90.4ns ± 1% 93.8ns ± 0% +3.82% (p=0.000 n=10+7) ToLower/#00-8 9.59ns ± 1% 6.13ns ± 2% -36.08% (p=0.000 n=10+10) ToLower/abc-8 36.4ns ± 1% 10.4ns ± 1% -71.50% (p=0.000 n=10+10) ToLower/AbC123-8 55.8ns ± 1% 27.5ns ± 1% -50.61% (p=0.000 n=10+10) ToLower/azAZ09_-8 61.7ns ± 1% 30.2ns ± 1% -50.98% (p=0.000 n=8+10) ToLower/longStrinGwitHmixofsmaLLandcAps-8 226ns ± 1% 64ns ± 1% -71.53% (p=0.000 n=10+9) ToLower/LONGⱯSTRINGⱯWITHⱯNONASCIIⱯCHARS-8 354ns ± 0% 361ns ± 0% +2.18% (p=0.000 n=10+10) ToLower/ⱭⱭⱭⱭⱭ-8 180ns ± 1% 186ns ± 0% +3.45% (p=0.000 n=10+9) ToLower/A\u0080\U0010ffff-8 91.7ns ± 0% 94.5ns ± 0% +2.99% (p=0.000 n=10+10) Change-Id: Ifdb8ae328ff9feacd1c170db8eebbf98c399e204 Reviewed-on: https://go-review.googlesource.com/c/go/+/170954 Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
author: Tobias Klauser <tklauser@distanz.ch> 2019-04-08 15:53:35 +0200
committer: Tobias Klauser <tobias.klauser@gmail.com> 2019-04-09 05:45:34 +0000
commit: 08e1823a632783e3f71b358f2f546ab0f13a6d98 (patch)
tree: b27de820f3df1ce2782c2fc7310d8ea1ee949146 /src/bytes
parent: 2ab75c0f40fa452d275da896017cf7222fb7ca30 (diff)
download: go-08e1823a632783e3f71b358f2f546ab0f13a6d98.tar.gz
go-08e1823a632783e3f71b358f2f546ab0f13a6d98.zip
2 files changed, 94 insertions, 4 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index bdd55fca4a..22aeded5e1 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -521,11 +521,66 @@ func Repeat(b []byte, count int) []byte {
 	return nb
 }
 
-// ToUpper treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters within it mapped to their upper case.
-func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
+// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to
+// their upper case.
+func ToUpper(s []byte) []byte {
+	isASCII, hasLower := true, false
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c >= utf8.RuneSelf {
+			isASCII = false
+			break
+		}
+		hasLower = hasLower || ('a' <= c && c <= 'z')
+	}
+
+	if isASCII { // optimize for ASCII-only byte slices.
+		if !hasLower {
+			// Just return a copy.
+			return append([]byte(""), s...)
+		}
+		b := make([]byte, len(s))
+		for i := 0; i < len(s); i++ {
+			c := s[i]
+			if 'a' <= c && c <= 'z' {
+				c -= 'a' - 'A'
+			}
+			b[i] = c
+		}
+		return b
+	}
+	return Map(unicode.ToUpper, s)
+}
 
-// ToLower treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their lower case.
-func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
+// ToLower returns a copy of the byte slice s with all Unicode letters mapped to
+// their lower case.
+func ToLower(s []byte) []byte {
+	isASCII, hasUpper := true, false
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if c >= utf8.RuneSelf {
+			isASCII = false
+			break
+		}
+		hasUpper = hasUpper || ('A' <= c && c <= 'Z')
+	}
+
+	if isASCII { // optimize for ASCII-only byte slices.
+		if !hasUpper {
+			return append([]byte(""), s...)
+		}
+		b := make([]byte, len(s))
+		for i := 0; i < len(s); i++ {
+			c := s[i]
+			if 'A' <= c && c <= 'Z' {
+				c += 'a' - 'A'
+			}
+			b[i] = c
+		}
+		return b
+	}
+	return Map(unicode.ToLower, s)
+}
 
 // ToTitle treats s as UTF-8-encoded bytes and returns a copy with all the Unicode letters mapped to their title case.
 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index d760d4b52a..340810facf 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -891,10 +891,14 @@ type StringTest struct {
 
 var upperTests = []StringTest{
 	{"", []byte("")},
+	{"ONLYUPPER", []byte("ONLYUPPER")},
 	{"abc", []byte("ABC")},
 	{"AbC123", []byte("ABC123")},
 	{"azAZ09_", []byte("AZAZ09_")},
+	{"longStrinGwitHmixofsmaLLandcAps", []byte("LONGSTRINGWITHMIXOFSMALLANDCAPS")},
+	{"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", []byte("LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS")},
 	{"\u0250\u0250\u0250\u0250\u0250", []byte("\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F")}, // grows one byte per char
+	{"a\u0080\U0010FFFF", []byte("A\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 var lowerTests = []StringTest{
@@ -902,7 +906,10 @@ var lowerTests = []StringTest{
 	{"abc", []byte("abc")},
 	{"AbC123", []byte("abc123")},
 	{"azAZ09_", []byte("azaz09_")},
+	{"longStrinGwitHmixofsmaLLandcAps", []byte("longstringwithmixofsmallandcaps")},
+	{"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", []byte("long\u0250string\u0250with\u0250nonascii\u0250chars")},
 	{"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", []byte("\u0251\u0251\u0251\u0251\u0251")}, // shrinks one byte per char
+	{"A\u0080\U0010FFFF", []byte("a\u0080\U0010FFFF")},                           // test utf8.RuneSelf and utf8.MaxRune
 }
 
 const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000"
@@ -1029,6 +1036,34 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest
 
 func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) }
 
+func BenchmarkToUpper(b *testing.B) {
+	for _, tc := range upperTests {
+		tin := []byte(tc.in)
+		b.Run(tc.in, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				actual := ToUpper(tin)
+				if !Equal(actual, tc.out) {
+					b.Errorf("ToUpper(%q) = %q; want %q", tc.in, actual, tc.out)
+				}
+			}
+		})
+	}
+}
+
+func BenchmarkToLower(b *testing.B) {
+	for _, tc := range lowerTests {
+		tin := []byte(tc.in)
+		b.Run(tc.in, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				actual := ToLower(tin)
+				if !Equal(actual, tc.out) {
+					b.Errorf("ToLower(%q) = %q; want %q", tc.in, actual, tc.out)
+				}
+			}
+		})
+	}
+}
+
 func TestTrimSpace(t *testing.T) { runStringTests(t, TrimSpace, "TrimSpace", trimSpaceTests) }
 
 type RepeatTest struct {
author	Tobias Klauser <tklauser@distanz.ch>	2019-04-08 15:53:35 +0200
committer	Tobias Klauser <tobias.klauser@gmail.com>	2019-04-09 05:45:34 +0000
commit	08e1823a632783e3f71b358f2f546ab0f13a6d98 (patch)
tree	b27de820f3df1ce2782c2fc7310d8ea1ee949146 /src/bytes
parent	2ab75c0f40fa452d275da896017cf7222fb7ca30 (diff)
download	go-08e1823a632783e3f71b358f2f546ab0f13a6d98.tar.gz go-08e1823a632783e3f71b358f2f546ab0f13a6d98.zip