diff options
author | Robert Griesemer <gri@golang.org> | 2010-02-16 17:39:44 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2010-02-16 17:39:44 -0800 |
commit | 855986d54f7831fa45f3b30cb9732d0d5758fc88 (patch) | |
tree | 327da5ee8ac60ad9520761e7e5f9d35b2b876b57 | |
parent | 401062f720065697bf3461d8d705bf4615320ff0 (diff) | |
download | go-855986d54f7831fa45f3b30cb9732d0d5758fc88.tar.gz go-855986d54f7831fa45f3b30cb9732d0d5758fc88.zip |
go/scanner: comply with spec changes (do not allow NUL chars)
and complain about illegal UTF-8 code sequences
R=rsc
CC=golang-dev
https://golang.org/cl/209043
-rw-r--r-- | src/pkg/go/scanner/scanner.go | 5 | ||||
-rw-r--r-- | src/pkg/go/scanner/scanner_test.go | 35 |
2 files changed, 24 insertions, 16 deletions
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go index 7a21205a95..b2e120179d 100644 --- a/src/pkg/go/scanner/scanner.go +++ b/src/pkg/go/scanner/scanner.go @@ -48,12 +48,17 @@ func (S *Scanner) next() { S.pos.Column++ r, w := int(S.src[S.offset]), 1 switch { + case r == 0: + S.error(S.pos, "illegal character NUL") case r == '\n': S.pos.Line++ S.pos.Column = 0 case r >= 0x80: // not ASCII r, w = utf8.DecodeRune(S.src[S.offset:]) + if r == utf8.RuneError && w == 1 { + S.error(S.pos, "illegal UTF-8 encoding") + } } S.offset += w S.ch = r diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go index 56091a39f8..762252488a 100644 --- a/src/pkg/go/scanner/scanner_test.go +++ b/src/pkg/go/scanner/scanner_test.go @@ -551,7 +551,7 @@ func (h *errorCollector) Error(pos token.Position, msg string) { } -func checkError(t *testing.T, src string, tok token.Token, err string) { +func checkError(t *testing.T, src string, tok token.Token, pos int, err string) { var s Scanner var h errorCollector s.Init("", strings.Bytes(src), &h, ScanComments) @@ -573,8 +573,8 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { if h.msg != err { t.Errorf("%q: got msg %q, expected %q", src, h.msg, err) } - if h.pos.Offset != 0 { - t.Errorf("%q: got offset %d, expected 0", src, h.pos.Offset) + if h.pos.Offset != pos { + t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos) } } @@ -582,27 +582,30 @@ func checkError(t *testing.T, src string, tok token.Token, err string) { type srcerr struct { src string tok token.Token + pos int err string } var errors = []srcerr{ - srcerr{"\"\"", token.STRING, ""}, - srcerr{"\"", token.STRING, "string not terminated"}, - srcerr{"/**/", token.COMMENT, ""}, - srcerr{"/*", token.COMMENT, "comment not terminated"}, - srcerr{"//\n", token.COMMENT, ""}, - srcerr{"//", token.COMMENT, "comment not terminated"}, - srcerr{"077", token.INT, ""}, - srcerr{"078.", token.FLOAT, ""}, - srcerr{"07801234567.", token.FLOAT, ""}, - srcerr{"078e0", token.FLOAT, ""}, - srcerr{"078", token.INT, "illegal octal number"}, - srcerr{"07800000009", token.INT, "illegal octal number"}, + srcerr{"\"\"", token.STRING, 0, ""}, + srcerr{"\"", token.STRING, 0, "string not terminated"}, + srcerr{"/**/", token.COMMENT, 0, ""}, + srcerr{"/*", token.COMMENT, 0, "comment not terminated"}, + srcerr{"//\n", token.COMMENT, 0, ""}, + srcerr{"//", token.COMMENT, 0, "comment not terminated"}, + srcerr{"077", token.INT, 0, ""}, + srcerr{"078.", token.FLOAT, 0, ""}, + srcerr{"07801234567.", token.FLOAT, 0, ""}, + srcerr{"078e0", token.FLOAT, 0, ""}, + srcerr{"078", token.INT, 0, "illegal octal number"}, + srcerr{"07800000009", token.INT, 0, "illegal octal number"}, + srcerr{"\"abc\x00def\"", token.STRING, 4, "illegal character NUL"}, + srcerr{"\"abc\x80def\"", token.STRING, 4, "illegal UTF-8 encoding"}, } func TestScanErrors(t *testing.T) { for _, e := range errors { - checkError(t, e.src, e.tok, e.err) + checkError(t, e.src, e.tok, e.pos, e.err) } } |