bytes, strings: speed up TrimSpace 4-5x for common ASCII cases

This change adds a fast path for ASCII strings to both strings.TrimSpace and bytes.TrimSpace. It doesn't slow down the non-ASCII path much, if at all. I added benchmarks for strings.TrimSpace as it didn't have any, and I fleshed out the benchmarks for bytes.TrimSpace as it just had one case (for ASCII). The benchmarks (and the code!) are now the same between the two versions. Below are the benchmark results: strings.TrimSpace: name old time/op new time/op delta TrimSpace/NoTrim-8 18.6ns ± 0% 3.8ns ± 0% -79.53% (p=0.000 n=5+4) TrimSpace/ASCII-8 33.5ns ± 2% 6.0ns ± 3% -82.05% (p=0.008 n=5+5) TrimSpace/SomeNonASCII-8 97.1ns ± 1% 88.6ns ± 1% -8.68% (p=0.008 n=5+5) TrimSpace/JustNonASCII-8 144ns ± 0% 143ns ± 0% ~ (p=0.079 n=4+5) bytes.TrimSpace: name old time/op new time/op delta TrimSpace/NoTrim-8 18.9ns ± 1% 4.1ns ± 1% -78.34% (p=0.008 n=5+5) TrimSpace/ASCII-8 29.9ns ± 0% 6.3ns ± 1% -79.06% (p=0.008 n=5+5) TrimSpace/SomeNonASCII-8 91.5ns ± 0% 82.3ns ± 0% -10.03% (p=0.008 n=5+5) TrimSpace/JustNonASCII-8 150ns ± 0% 150ns ± 0% ~ (all equal) Fixes #29122 Change-Id: Ica45cd86a219cadf60173ec9db260133cd1d7951 Reviewed-on: https://go-review.googlesource.com/c/go/+/152917 Reviewed-by: Daniel Martí <mvdan@mvdan.cc> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org>
author: Ben Hoyt <benhoyt@gmail.com> 2018-12-06 08:53:29 -0500
committer: Brad Fitzpatrick <bradfitz@golang.org> 2019-03-12 15:52:17 +0000
commit: 4b4f222a0dd8765e5b493d458fa352ea22045575 (patch)
tree: 8546c03668ded903e08c74935566b16146f5166f /src/bytes
parent: 10aede26d0603c16f6f66c87a84bccfeb2e0c8e0 (diff)
download: go-4b4f222a0dd8765e5b493d458fa352ea22045575.tar.gz
go-4b4f222a0dd8765e5b493d458fa352ea22045575.zip
2 files changed, 45 insertions, 4 deletions
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index 6fcebe6593..08fc14d837 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -759,7 +759,36 @@ func TrimRight(s []byte, cutset string) []byte {
 // TrimSpace returns a subslice of s by slicing off all leading and
 // trailing white space, as defined by Unicode.
 func TrimSpace(s []byte) []byte {
-	return TrimFunc(s, unicode.IsSpace)
+	// Fast path for ASCII: look for the first ASCII non-space byte
+	start := 0
+	for ; start < len(s); start++ {
+		c := s[start]
+		if c >= utf8.RuneSelf {
+			// If we run into a non-ASCII byte, fall back to the
+			// slower unicode-aware method on the remaining bytes
+			return TrimFunc(s[start:], unicode.IsSpace)
+		}
+		if asciiSpace[c] == 0 {
+			break
+		}
+	}
+
+	// Now look for the first ASCII non-space byte from the end
+	stop := len(s)
+	for ; stop > start; stop-- {
+		c := s[stop-1]
+		if c >= utf8.RuneSelf {
+			return TrimFunc(s[start:stop], unicode.IsSpace)
+		}
+		if asciiSpace[c] == 0 {
+			break
+		}
+	}
+
+	// At this point s[start:stop] starts and ends with an ASCII
+	// non-space bytes, so we're done. Non-ASCII cases have already
+	// been handled above.
+	return s[start:stop]
 }
 
 // Runes interprets s as a sequence of UTF-8-encoded code points.
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index 80a54f6118..98ba95009d 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -1617,9 +1617,21 @@ func BenchmarkFieldsFunc(b *testing.B) {
 }
 
 func BenchmarkTrimSpace(b *testing.B) {
-	s := []byte("  Some text.  \n")
-	for i := 0; i < b.N; i++ {
-		TrimSpace(s)
+	tests := []struct {
+		name  string
+		input []byte
+	}{
+		{"NoTrim", []byte("typical")},
+		{"ASCII", []byte("  foo bar  ")},
+		{"SomeNonASCII", []byte("    \u2000\t\r\n x\t\t\r\r\ny\n \u3000    ")},
+		{"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
+	}
+	for _, test := range tests {
+		b.Run(test.name, func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				TrimSpace(test.input)
+			}
+		})
 	}
 }
author	Ben Hoyt <benhoyt@gmail.com>	2018-12-06 08:53:29 -0500
committer	Brad Fitzpatrick <bradfitz@golang.org>	2019-03-12 15:52:17 +0000
commit	4b4f222a0dd8765e5b493d458fa352ea22045575 (patch)
tree	8546c03668ded903e08c74935566b16146f5166f /src/bytes
parent	10aede26d0603c16f6f66c87a84bccfeb2e0c8e0 (diff)
download	go-4b4f222a0dd8765e5b493d458fa352ea22045575.tar.gz go-4b4f222a0dd8765e5b493d458fa352ea22045575.zip