strconv: reject surrogate halves in Unquote

Unquote implements unescaping a "single-quoted, doubled-quoted, or backquoted Go string literal". Therefore, it should reject anything that the Go specification explicitly forbids. The section on "Rune literals" explicitly rejects rune values "above 0x10FFFF and surrogate halves". We properly checked for the previous condition, but were failing to check for the latter. In general, "r > utf8.MaxRune" is probably the wrong check, while !utf8.ValidRune(r) is the more correct check. We make changes to both UnquoteChar and appendEscapedRune to use the correct check. The change to appendEscapedRune is technically a noop since callers of that function already guarantee that the provided rune is valid. Fixes #47853 Change-Id: Ib8977e56b91943ec8ada821b8d217b5e9a66f950 Reviewed-on: https://go-review.googlesource.com/c/go/+/343877 Trust: Joe Tsai <joetsai@digital-static.net> Run-TryBot: Joe Tsai <joetsai@digital-static.net> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Rob Pike <r@golang.org>
author: Joe Tsai <joetsai@digital-static.net> 2021-08-20 12:50:02 -0700
committer: Joe Tsai <joetsai@digital-static.net> 2021-08-21 18:23:58 +0000
commit: 6e50991d2a38058d0824f8b086677fbe9774c9f9 (patch)
tree: 5284a7438243dca0c5a4e2a012d076d7a4cf50da /src/strconv
parent: 8fff20ffebadfea2c5c3dd58f7007246f3408495 (diff)
download: go-6e50991d2a38058d0824f8b086677fbe9774c9f9.tar.gz
go-6e50991d2a38058d0824f8b086677fbe9774c9f9.zip
2 files changed, 5 insertions, 2 deletions
diff --git a/src/strconv/quote.go b/src/strconv/quote.go
index b3bbb1612b..d2814b92da 100644
--- a/src/strconv/quote.go
+++ b/src/strconv/quote.go
@@ -103,7 +103,7 @@ func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bo
 			buf = append(buf, `\x`...)
 			buf = append(buf, lowerhex[byte(r)>>4])
 			buf = append(buf, lowerhex[byte(r)&0xF])
-		case r > utf8.MaxRune:
+		case !utf8.ValidRune(r):
 			r = 0xFFFD
 			fallthrough
 		case r < 0x10000:
@@ -322,7 +322,7 @@ func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string,
 			value = v
 			break
 		}
-		if v > utf8.MaxRune {
+		if !utf8.ValidRune(v) {
 			err = ErrSyntax
 			return
 		}
diff --git a/src/strconv/quote_test.go b/src/strconv/quote_test.go
index 4750be2740..81fc8f79e1 100644
--- a/src/strconv/quote_test.go
+++ b/src/strconv/quote_test.go
@@ -131,6 +131,7 @@ var quoterunetests = []quoteRuneTest{
 	{'\\', `'\\'`, `'\\'`, `'\\'`},
 	{0xFF, `'ÿ'`, `'\u00ff'`, `'ÿ'`},
 	{0x263a, `'☺'`, `'\u263a'`, `'☺'`},
+	{0xdead, `'�'`, `'\ufffd'`, `'�'`},
 	{0xfffd, `'�'`, `'\ufffd'`, `'�'`},
 	{0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`, `'\U0010ffff'`},
 	{0x0010ffff + 1, `'�'`, `'\ufffd'`, `'�'`},
@@ -305,6 +306,8 @@ var misquoted = []string{
 	"\"\n\"",
 	"\"\\n\n\"",
 	"'\n'",
+	`"\udead"`,
+	`"\ud83d\ude4f"`,
 }
 
 func TestUnquote(t *testing.T) {
author	Joe Tsai <joetsai@digital-static.net>	2021-08-20 12:50:02 -0700
committer	Joe Tsai <joetsai@digital-static.net>	2021-08-21 18:23:58 +0000
commit	6e50991d2a38058d0824f8b086677fbe9774c9f9 (patch)
tree	5284a7438243dca0c5a4e2a012d076d7a4cf50da /src/strconv
parent	8fff20ffebadfea2c5c3dd58f7007246f3408495 (diff)
download	go-6e50991d2a38058d0824f8b086677fbe9774c9f9.tar.gz go-6e50991d2a38058d0824f8b086677fbe9774c9f9.zip