aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo@golang.org>2019-08-20 17:29:04 -0400
committerAndrew Bonventre <andybons@golang.org>2019-08-21 15:20:00 +0000
commiteee07a8e685e72c287ca4a0e7a049ce941ef6ab3 (patch)
treea5f419187df6ee5691e8386e2f777077219064ed
parentc61c29fe563134f752ea14e794d08031982145e8 (diff)
downloadgo-eee07a8e685e72c287ca4a0e7a049ce941ef6ab3.tar.gz
go-eee07a8e685e72c287ca4a0e7a049ce941ef6ab3.zip
Revert "encoding/json: avoid work when unquoting strings"
This reverts CL 151157. CL 151157 introduced a crash when decoding into ",string" fields. It came with a moderate speedup, so at this stage of the release cycle let's just revert it, and reapply it in Go 1.14 with the fix in CL 190659. Also applied the test cases from CL 190659. Updates #33728 Change-Id: Ie46e2bc15224b251888580daf6b79d5865f3878e Reviewed-on: https://go-review.googlesource.com/c/go/+/190909 Run-TryBot: Andrew Bonventre <andybons@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Andrew Bonventre <andybons@golang.org>
-rw-r--r--src/encoding/json/decode.go68
-rw-r--r--src/encoding/json/decode_test.go2
2 files changed, 36 insertions, 34 deletions
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go
index bdd94e34ce..cbd71acfc6 100644
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -272,9 +272,6 @@ type decodeState struct {
savedError error
useNumber bool
disallowUnknownFields bool
- // safeUnquote is the number of current string literal bytes that don't
- // need to be unquoted. When negative, no bytes need unquoting.
- safeUnquote int
}
// readIndex returns the position of the last byte read.
@@ -376,27 +373,13 @@ func (d *decodeState) rescanLiteral() {
Switch:
switch data[i-1] {
case '"': // string
- // safeUnquote is initialized at -1, which means that all bytes
- // checked so far can be unquoted at a later time with no work
- // at all. When reaching the closing '"', if safeUnquote is
- // still -1, all bytes can be unquoted with no work. Otherwise,
- // only those bytes up until the first '\\' or non-ascii rune
- // can be safely unquoted.
- safeUnquote := -1
for ; i < len(data); i++ {
- if c := data[i]; c == '\\' {
- if safeUnquote < 0 { // first unsafe byte
- safeUnquote = int(i - d.off)
- }
+ switch data[i] {
+ case '\\':
i++ // escaped char
- } else if c == '"' {
- d.safeUnquote = safeUnquote
+ case '"':
i++ // tokenize the closing quote too
break Switch
- } else if c >= utf8.RuneSelf {
- if safeUnquote < 0 { // first unsafe byte
- safeUnquote = int(i - d.off)
- }
}
}
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
@@ -749,7 +732,7 @@ func (d *decodeState) object(v reflect.Value) error {
start := d.readIndex()
d.rescanLiteral()
item := d.data[start:d.readIndex()]
- key, ok := d.unquoteBytes(item)
+ key, ok := unquoteBytes(item)
if !ok {
panic(phasePanicMsg)
}
@@ -950,7 +933,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
return nil
}
- s, ok := d.unquoteBytes(item)
+ s, ok := unquoteBytes(item)
if !ok {
if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1001,7 +984,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
}
case '"': // string
- s, ok := d.unquoteBytes(item)
+ s, ok := unquoteBytes(item)
if !ok {
if fromQuoted {
return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1159,7 +1142,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
start := d.readIndex()
d.rescanLiteral()
item := d.data[start:d.readIndex()]
- key, ok := d.unquote(item)
+ key, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
@@ -1208,7 +1191,7 @@ func (d *decodeState) literalInterface() interface{} {
return c == 't'
case '"': // string
- s, ok := d.unquote(item)
+ s, ok := unquote(item)
if !ok {
panic(phasePanicMsg)
}
@@ -1251,21 +1234,38 @@ func getu4(s []byte) rune {
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
-func (d *decodeState) unquote(s []byte) (t string, ok bool) {
- s, ok = d.unquoteBytes(s)
+func unquote(s []byte) (t string, ok bool) {
+ s, ok = unquoteBytes(s)
t = string(s)
return
}
-func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
- r := d.safeUnquote
- // The bytes have been scanned, so we know that the first and last bytes
- // are double quotes.
+func unquoteBytes(s []byte) (t []byte, ok bool) {
+ if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+ return
+ }
s = s[1 : len(s)-1]
- // If there are no unusual characters, no unquoting is needed, so return
- // a slice of the original bytes.
- if r == -1 {
+ // Check for unusual characters. If there are none,
+ // then no unquoting is needed, so return a slice of the
+ // original bytes.
+ r := 0
+ for r < len(s) {
+ c := s[r]
+ if c == '\\' || c == '"' || c < ' ' {
+ break
+ }
+ if c < utf8.RuneSelf {
+ r++
+ continue
+ }
+ rr, size := utf8.DecodeRune(s[r:])
+ if rr == utf8.RuneError && size == 1 {
+ break
+ }
+ r += size
+ }
+ if r == len(s) {
return s, true
}
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go
index 719a9fa290..3f25893b41 100644
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -1250,6 +1250,8 @@ var wrongStringTests = []wrongStringTest{
{`{"result":"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "foo" into string`},
{`{"result":"123"}`, `json: invalid use of ,string struct tag, trying to unmarshal "123" into string`},
{`{"result":123}`, `json: invalid use of ,string struct tag, trying to unmarshal unquoted value into string`},
+ {`{"result":"\""}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"" into string`},
+ {`{"result":"\"foo"}`, `json: invalid use of ,string struct tag, trying to unmarshal "\"foo" into string`},
}
// If people misuse the ,string modifier, the error message should be