aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Martí <mvdan@mvdan.cc>2020-03-27 23:56:09 +0000
committerDmitri Shuralyov <dmitshur@golang.org>2020-05-27 20:03:49 +0000
commit846c00ed3d27a764abeaf0d153adb996751f79aa (patch)
treee0f3e47ebb39755f00b70db7bab8d1e444bdaab5
parent8ca58ff90b790062facf1084d23ba97453b51b91 (diff)
downloadgo-846c00ed3d27a764abeaf0d153adb996751f79aa.tar.gz
go-846c00ed3d27a764abeaf0d153adb996751f79aa.zip
[release-branch.go1.14] encoding/json: don't mangle strings in an edge case when decoding
The added comment contains some context. The original optimization assumed that each call to unquoteBytes (or unquote) followed its corresponding call to rescanLiteral. Otherwise, unquoting a literal might use d.safeUnquote from another re-scanned literal. Unfortunately, this assumption is wrong. When decoding {"foo": "bar"} into a map[T]string where T implements TextUnmarshaler, the sequence of calls would be as follows: 1) rescanLiteral "foo" 2) unquoteBytes "foo" 3) rescanLiteral "bar" 4) unquoteBytes "foo" (for UnmarshalText) 5) unquoteBytes "bar" Note that the call to UnmarshalText happens in literalStore, which repeats the work to unquote the input string literal. But, since that happens after we've re-scanned "bar", we're using the wrong safeUnquote field value. In the added test case, the second string had a non-zero number of safe bytes, and the first string had none since it was all non-ASCII. Thus, "safely" unquoting a number of the first string's bytes could cut a rune in half, and thus mangle the runes. A rather simple fix, without a full revert, is to only allow one use of safeUnquote per call to unquoteBytes. Each call to rescanLiteral when we have a string is soon followed by a call to unquoteBytes, so it's no longer possible for us to use the wrong index. Also add a test case from #38126, which is the same underlying bug, but affecting the ",string" option. Before the fix, the test would fail, just like in the original two issues: --- FAIL: TestUnmarshalRescanLiteralMangledUnquote (0.00s) decode_test.go:2443: Key "开源" does not exist in map: map[开���:12345开源] decode_test.go:2458: Unmarshal unexpected error: json: invalid use of ,string struct tag, trying to unmarshal "\"aaa\tbbb\"" into string Fixes #38106. For #38105. For #38126. Change-Id: I761e54924e9a971a4f9eaa70bbf72014bb1476e6 Reviewed-on: https://go-review.googlesource.com/c/go/+/226218 Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com> (cherry picked from commit 55361a26177b3faf151a1d35467db5d403b51f22) Reviewed-on: https://go-review.googlesource.com/c/go/+/233057 Run-TryBot: Dmitri Shuralyov <dmitshur@golang.org> Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
-rw-r--r--src/encoding/json/decode.go5
-rw-r--r--src/encoding/json/decode_test.go33
2 files changed, 36 insertions, 2 deletions
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index b43484692e..b60e2bb0b2 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -1217,6 +1217,11 @@ func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
if r == -1 {
return s, true
}
+ // Only perform up to one safe unquote for each re-scanned string
+ // literal. In some edge cases, the decoder unquotes a literal a second
+ // time, even after another literal has been re-scanned. Thus, only the
+ // first unquote can safely use safeUnquote.
+ d.safeUnquote = 0
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go
index 498bd97b46..a49181e982 100644
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -2419,7 +2419,7 @@ func (m *textUnmarshalerString) UnmarshalText(text []byte) error {
return nil
}
-// Test unmarshal to a map, with map key is a user defined type.
+// Test unmarshal to a map, where the map key is a user defined type.
// See golang.org/issues/34437.
func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) {
var p map[textUnmarshalerString]string
@@ -2428,6 +2428,35 @@ func TestUnmarshalMapWithTextUnmarshalerStringKey(t *testing.T) {
}
if _, ok := p["foo"]; !ok {
- t.Errorf(`Key "foo" is not existed in map: %v`, p)
+ t.Errorf(`Key "foo" does not exist in map: %v`, p)
+ }
+}
+
+func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) {
+ // See golang.org/issues/38105.
+ var p map[textUnmarshalerString]string
+ if err := Unmarshal([]byte(`{"开源":"12345开源"}`), &p); err != nil {
+ t.Fatalf("Unmarshal unexpected error: %v", err)
+ }
+ if _, ok := p["开源"]; !ok {
+ t.Errorf(`Key "开源" does not exist in map: %v`, p)
+ }
+
+ // See golang.org/issues/38126.
+ type T struct {
+ F1 string `json:"F1,string"`
+ }
+ t1 := T{"aaa\tbbb"}
+
+ b, err := Marshal(t1)
+ if err != nil {
+ t.Fatalf("Marshal unexpected error: %v", err)
+ }
+ var t2 T
+ if err := Unmarshal(b, &t2); err != nil {
+ t.Fatalf("Unmarshal unexpected error: %v", err)
+ }
+ if t1 != t2 {
+ t.Errorf("Marshal and Unmarshal roundtrip mismatch: want %q got %q", t1, t2)
}
}