aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2016-06-08 16:08:34 -0700
committerMatthew Dempsky <mdempsky@google.com>2016-08-16 14:32:07 -0700
commit3a7da56582c4909803ba72cdc3503d6c25cb9c62 (patch)
tree805cfec090dc55a257e5e4cf970feb6a20151a32
parentf363ad2a55bd61c6cce5c3af107b11cc3fbeeb83 (diff)
downloadgo-3a7da56582c4909803ba72cdc3503d6c25cb9c62.tar.gz
go-3a7da56582c4909803ba72cdc3503d6c25cb9c62.zip
cmd/compile/internal/syntax: fix many string/rune literal corner cases
+ many more test cases
-rw-r--r--src/cmd/compile/internal/syntax/scanner.go68
-rw-r--r--src/cmd/compile/internal/syntax/scanner_test.go108
-rw-r--r--src/cmd/compile/internal/syntax/source.go2
3 files changed, 126 insertions, 52 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index aaf254f051..b9d586d89f 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -450,10 +450,12 @@ func (s *scanner) number(c rune) {
func (s *scanner) stdString() {
s.startLit()
+
for {
r := s.getr()
- if r == '\\' && !s.escape('"') {
- continue // error already reported
+ if r == '\\' {
+ s.escape('"')
+ continue
}
if r == '"' {
break
@@ -463,11 +465,13 @@ func (s *scanner) stdString() {
break
}
}
+
s.lit = string(s.stopLit())
}
func (s *scanner) rawString() {
s.startLit()
+
for {
r := s.getr()
if r == '`' {
@@ -477,22 +481,37 @@ func (s *scanner) rawString() {
s.error("string not terminated")
break
}
- // TODO(gri) deal with CRs (or don't?)
}
+ // We leave CRs in the string since they are part of the
+ // literal (even though they are not part of the literal
+ // value).
+
s.lit = string(s.stopLit())
}
func (s *scanner) rune() {
s.startLit()
+
r := s.getr()
- if r == '\\' && !s.escape('\'') {
- panic(0)
- }
- c := s.getr()
- if c != '\'' {
- panic(c)
+ if r != '\'' {
+ ok := true
+ if r == '\\' {
+ ok = s.escape('\'')
+ }
+ r = s.getr()
+ if r != '\'' {
+ // only report error if we're ok so far
+ if ok {
+ s.error("missing '")
+ }
+ s.ungetr()
+ }
+ } else {
+ s.error("empty character literal")
}
+
s.lit = string(s.stopLit())
+ return
}
func (s *scanner) lineComment() {
@@ -574,19 +593,15 @@ func (s *scanner) escape(quote rune) bool {
c = s.getr()
n, base, max = 8, 16, unicode.MaxRune
default:
- var msg string
- if c >= 0 {
- msg = "unknown escape sequence"
- } else {
- msg = "escape sequence not terminated"
+ if c < 0 {
+ return true // complain in caller about EOF
}
- s.error(msg)
+ s.error("unknown escape sequence")
return false
}
var x uint32
-loop:
- for ; n > 0; n-- {
+ for i := n; i > 0; i-- {
d := base
switch {
case isDigit(c):
@@ -597,14 +612,16 @@ loop:
d = uint32(c) - ('A' - 10)
}
if d >= base {
- var msg string
- if c >= 0 {
- msg = fmt.Sprintf("illegal character %#U in escape sequence", c)
+ if c < 0 {
+ return true // complain in caller about EOF
+ }
+ if c != quote {
+ s.error(fmt.Sprintf("illegal character %#U in escape sequence", c))
} else {
- msg = "escape sequence not terminated"
+ s.error("escape sequence incomplete")
}
- s.error(msg)
- break loop
+ s.ungetr()
+ return false
}
// d < base
x = x*base + d
@@ -612,6 +629,11 @@ loop:
}
s.ungetr()
+ if x > max && n == 3 {
+ s.error(fmt.Sprintf("octal escape value > 255: %d", x))
+ return false
+ }
+
if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
s.error("escape sequence is invalid Unicode code point")
return false
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go
index 89ba04022e..297b61428a 100644
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -255,44 +255,96 @@ var sampleTokens = [...]struct {
func TestScanErrors(t *testing.T) {
for _, test := range []struct {
- src, msg string
+ src, msg string
+ pos, line int
}{
+ // Note: Positions for lexical errors are the earliest position
+ // where the error is apparent, not the beginning of the respective
+ // token.
+
// rune-level errors
- {"fo\x00o", "invalid NUL character"},
- {"fo\ufeffo", "invalid BOM in the middle of the file"},
- {"\xff", "invalid UTF-8 encoding"},
+ {"fo\x00o", "invalid NUL character", 2, 1},
+ {"foo\n\ufeff bar", "invalid BOM in the middle of the file", 4, 2},
+ {"foo\n\n\xff ", "invalid UTF-8 encoding", 5, 3},
// token-level errors
- {"~", "bitwise complement operator is ^"},
- {"$", "invalid rune '$'"},
- {"0xyz", "malformed hex constant"},
- {"08", "malformed octal constant"},
- {"1.0e+x", "malformed floating-point constant exponent"},
- {`"foo`, "string not terminated"},
- {"`foo", "string not terminated"},
- {"/* foo", "comment not terminated"},
- {`"foo\z"`, "unknown escape sequence"},
- // {`"\x`, "escape sequence not terminated"},
- {`"\x"`, "illegal character U+0022 '\"' in escape sequence"},
- {`"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
+ {"x + ~y", "bitwise complement operator is ^", 4, 1},
+ {"foo$bar = 0", "invalid rune '$'", 3, 1},
+ {"const x = 0xyz", "malformed hex constant", 12, 1},
+ {"0123456789", "malformed octal constant", 10, 1},
+ {"0123456789. /*", "comment not terminated", 14, 1}, // valid float constant
+ {"0123456789e0 /*", "comment not terminated", 15, 1}, // valid float constant
+ {"var a, b = 08, 07\n", "malformed octal constant", 13, 1},
+ {"(x + 1.0e+x)", "malformed floating-point constant exponent", 10, 1},
+
+ {`''`, "empty character literal", 1, 1},
+ {`'\`, "missing '", 2, 1},
+ {`'\'`, "missing '", 3, 1},
+ {`'\x`, "missing '", 3, 1},
+ {`'\x'`, "escape sequence incomplete", 3, 1},
+ {`'\y'`, "unknown escape sequence", 2, 1},
+ {`'\x0'`, "escape sequence incomplete", 4, 1},
+ {`'\00'`, "escape sequence incomplete", 4, 1},
+ {`'\377' /*`, "comment not terminated", 9, 1}, // valid octal escape
+ {`'\378`, "illegal character U+0038 '8' in escape sequence", 4, 1},
+ {`'\400'`, "octal escape value > 255: 256", 5, 1},
+ {`'xx`, "missing '", 2, 1},
+
+ {`"`, "string not terminated", 1, 1},
+ {`"foo`, "string not terminated", 4, 1},
+ {"`", "string not terminated", 1, 1},
+ {"`foo", "string not terminated", 4, 1},
+ {"/*/", "comment not terminated", 3, 1},
+ {"/*\n\nfoo", "comment not terminated", 7, 3},
+ {`"\`, "string not terminated", 2, 1},
+ {`"\"`, "string not terminated", 3, 1},
+ {`"\x`, "string not terminated", 3, 1},
+ {`"\x"`, "escape sequence incomplete", 3, 1},
+ {`"\y"`, "unknown escape sequence", 2, 1},
+ {`"\x0"`, "escape sequence incomplete", 4, 1},
+ {`"\00"`, "escape sequence incomplete", 4, 1},
+ {`"\377" /*`, "comment not terminated", 9, 1}, // valid octal escape
+ {`"\378"`, "illegal character U+0038 '8' in escape sequence", 4, 1},
+ {`"\400"`, "octal escape value > 255: 256", 5, 1},
+
+ {`s := "foo\z"`, "unknown escape sequence", 10, 1},
+ {`s := "foo\z00\nbar"`, "unknown escape sequence", 10, 1},
+ {`"\x`, "string not terminated", 3, 1},
+ {`"\x"`, "escape sequence incomplete", 3, 1},
+ {`var s string = "\x"`, "escape sequence incomplete", 18, 1},
+ {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point", 18, 1},
// former problem cases
- {"\xef", "invalid UTF-8 encoding"},
+ {"package p\n\n\xef", "invalid UTF-8 encoding", 11, 3},
} {
var s scanner
- hasError := false
- s.init(&bytesReader{[]byte(test.src)}, func(_, line int, msg string) {
- hasError = true
- // TODO(gri) test exact position as well
- if line != 1 {
- t.Errorf("got line = %d; want 1", line)
- }
- if msg != test.msg {
- t.Errorf("got msg = %q; want %q", msg, test.msg)
+ nerrors := 0
+ s.init(&bytesReader{[]byte(test.src)}, func(pos, line int, msg string) {
+ nerrors++
+ // only check the first error
+ if nerrors == 1 {
+ if msg != test.msg {
+ t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
+ }
+ if pos != test.pos {
+ t.Errorf("%q: got pos = %d; want %d", test.src, pos, test.pos)
+ }
+ if line != test.line {
+ t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
+ }
+ } else if nerrors > 1 {
+ t.Errorf("%q: got unexpected %q at pos = %d, line = %d", test.src, msg, pos, line)
}
})
- s.next()
- if !hasError {
+
+ for {
+ s.next()
+ if s.tok == _EOF {
+ break
+ }
+ }
+
+ if nerrors == 0 {
t.Errorf("%q: got no error; want %q", test.src, test.msg)
}
}
diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go
index 38692c33be..c72389b171 100644
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@@ -58,7 +58,7 @@ func (s *source) error_at(pos, line int, msg string) {
}
func (s *source) pos() int {
- return s.offs + s.r
+ return s.offs + s.r0
}
func (s *source) ungetr() {