diff options
author | Martin Möhrmann <moehrmann@google.com> | 2018-05-04 06:54:18 +0200 |
---|---|---|
committer | Andrew Bonventre <andybons@golang.org> | 2018-05-26 22:48:56 +0000 |
commit | 408b3d7a507c0f4e0f49d80f6fb280232b99afc9 (patch) | |
tree | ec838d7dd852052274298521bb3cfb3fcef57837 | |
parent | f31a132e58427ed331005f1f9c53737ce4414577 (diff) | |
download | go-408b3d7a507c0f4e0f49d80f6fb280232b99afc9.tar.gz go-408b3d7a507c0f4e0f49d80f6fb280232b99afc9.zip |
[release-branch.go1.9] strings: fix encoding of \u0080 in map
Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.
Fixes golang/go#25573
Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
(cherry picked from commit 8c62fc0ca3c96ecbd3a6e81546aa8c53e32ff500)
Reviewed-on: https://go-review.googlesource.com/114636
Run-TryBot: Andrew Bonventre <andybons@golang.org>
Reviewed-by: Andrew Bonventre <andybons@golang.org>
-rw-r--r-- | src/strings/strings.go | 4 | ||||
-rw-r--r-- | src/strings/strings_test.go | 23 |
2 files changed, 25 insertions, 2 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index 0c836c09d4..0d660b6217 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -523,7 +523,7 @@ func Map(mapping func(rune) rune, s string) string { b = make([]byte, len(s)+utf8.UTFMax) nbytes = copy(b, s[:i]) if r >= 0 { - if r <= utf8.RuneSelf { + if r < utf8.RuneSelf { b[nbytes] = byte(r) nbytes++ } else { @@ -553,7 +553,7 @@ func Map(mapping func(rune) rune, s string) string { r := mapping(c) // common case - if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) { + if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) { b[nbytes] = byte(r) nbytes++ continue diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index 869be9c477..b5fbe64594 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -522,6 +522,7 @@ var upperTests = []StringTest{ {"AbC123", "ABC123"}, {"azAZ09_", "AZAZ09_"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char + {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } var lowerTests = []StringTest{ @@ -530,6 +531,7 @@ var lowerTests = []StringTest{ {"AbC123", "abc123"}, {"azAZ09_", "azaz09_"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char + {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" @@ -642,6 +644,27 @@ func TestMap(t *testing.T) { if m != expect { t.Errorf("replace invalid sequence: expected %q got %q", expect, m) } + + // 8. Check utf8.RuneSelf and utf8.MaxRune encoding + encode := func(r rune) rune { + switch r { + case utf8.RuneSelf: + return unicode.MaxRune + case unicode.MaxRune: + return utf8.RuneSelf + } + return r + } + s := string(utf8.RuneSelf) + string(utf8.MaxRune) + r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s + m = Map(encode, s) + if m != r { + t.Errorf("encoding not handled correctly: expected %q got %q", r, m) + } + m = Map(encode, r) + if m != s { + t.Errorf("encoding not handled correctly: expected %q got %q", s, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } |