diff options
author | Martin Möhrmann <moehrmann@google.com> | 2018-05-04 06:54:18 +0200 |
---|---|---|
committer | Andrew Bonventre <andybons@golang.org> | 2018-05-26 22:49:02 +0000 |
commit | 2bded9dc19a65005fbcb37ab1e51a9237998b09e (patch) | |
tree | 51ec333bbd5f278d89a866017b9460bfde274520 | |
parent | 48ee689f19c0b27c8a7b2de9d3b26fe6c0d4fa5d (diff) | |
download | go-2bded9dc19a65005fbcb37ab1e51a9237998b09e.tar.gz go-2bded9dc19a65005fbcb37ab1e51a9237998b09e.zip |
[release-branch.go1.10] strings: fix encoding of \u0080 in map
Fix encoding of PAD (U+0080) which has the same value as utf8.RuneSelf
being incorrectly encoded as \x80 in strings.Map due to using <= instead
of a < comparison operator to check one byte encodings for utf8.
Fixes golang/go#25479
Change-Id: Ib6c7d1f425a7ba81e431b6d64009e713d94ea3bc
Reviewed-on: https://go-review.googlesource.com/111286
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
(cherry picked from commit 8c62fc0ca3c96ecbd3a6e81546aa8c53e32ff500)
Reviewed-on: https://go-review.googlesource.com/114635
Run-TryBot: Andrew Bonventre <andybons@golang.org>
-rw-r--r-- | src/strings/strings.go | 4 | ||||
-rw-r--r-- | src/strings/strings_test.go | 23 |
2 files changed, 25 insertions, 2 deletions
diff --git a/src/strings/strings.go b/src/strings/strings.go index 02c032046b..05e8243b2e 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -474,7 +474,7 @@ func Map(mapping func(rune) rune, s string) string { b = make([]byte, len(s)+utf8.UTFMax) nbytes = copy(b, s[:i]) if r >= 0 { - if r <= utf8.RuneSelf { + if r < utf8.RuneSelf { b[nbytes] = byte(r) nbytes++ } else { @@ -504,7 +504,7 @@ func Map(mapping func(rune) rune, s string) string { r := mapping(c) // common case - if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) { + if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) { b[nbytes] = byte(r) nbytes++ continue diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index d8fcb62a87..6c1193873b 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -528,6 +528,7 @@ var upperTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS"}, {"\u0250\u0250\u0250\u0250\u0250", "\u2C6F\u2C6F\u2C6F\u2C6F\u2C6F"}, // grows one byte per char + {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } var lowerTests = []StringTest{ @@ -538,6 +539,7 @@ var lowerTests = []StringTest{ {"longStrinGwitHmixofsmaLLandcAps", "longstringwithmixofsmallandcaps"}, {"LONG\u2C6FSTRING\u2C6FWITH\u2C6FNONASCII\u2C6FCHARS", "long\u0250string\u0250with\u0250nonascii\u0250chars"}, {"\u2C6D\u2C6D\u2C6D\u2C6D\u2C6D", "\u0251\u0251\u0251\u0251\u0251"}, // shrinks one byte per char + {"A\u0080\U0010FFFF", "a\u0080\U0010FFFF"}, // test utf8.RuneSelf and utf8.MaxRune } const space = "\t\v\r\f\n\u0085\u00a0\u2000\u3000" @@ -650,6 +652,27 @@ func TestMap(t *testing.T) { if m != expect { t.Errorf("replace invalid sequence: expected %q got %q", expect, m) } + + // 8. Check utf8.RuneSelf and utf8.MaxRune encoding + encode := func(r rune) rune { + switch r { + case utf8.RuneSelf: + return unicode.MaxRune + case unicode.MaxRune: + return utf8.RuneSelf + } + return r + } + s := string(utf8.RuneSelf) + string(utf8.MaxRune) + r := string(utf8.MaxRune) + string(utf8.RuneSelf) // reverse of s + m = Map(encode, s) + if m != r { + t.Errorf("encoding not handled correctly: expected %q got %q", r, m) + } + m = Map(encode, r) + if m != s { + t.Errorf("encoding not handled correctly: expected %q got %q", s, m) + } } func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) } |