diff options
author | Robert Griesemer <gri@golang.org> | 2010-02-22 14:21:59 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2010-02-22 14:21:59 -0800 |
commit | 22e960547f5f14caf2dd401b20ebfe64749fa7b2 (patch) | |
tree | 1bb86f4fd921c3b5b82fbb57b28a0953309a1c6b | |
parent | 0485a999ff078c760a9cd4013b0f21b6ed90ffda (diff) | |
download | go-22e960547f5f14caf2dd401b20ebfe64749fa7b2.tar.gz go-22e960547f5f14caf2dd401b20ebfe64749fa7b2.zip |
scanner: match go/scanner and disallow NUL character;
also check for illegal UTF-8 sequences
R=rsc
CC=golang-dev
https://golang.org/cl/218061
-rw-r--r-- | src/pkg/scanner/scanner.go | 16 | ||||
-rw-r--r-- | src/pkg/scanner/scanner_test.go | 7 |
2 files changed, 17 insertions, 6 deletions
diff --git a/src/pkg/scanner/scanner.go b/src/pkg/scanner/scanner.go index c4233aa581..c9b46f0ea3 100644 --- a/src/pkg/scanner/scanner.go +++ b/src/pkg/scanner/scanner.go @@ -2,9 +2,10 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// A general-purpose scanner for text. Takes an io.Reader -// providing the source which then can be tokenized through -// repeated calls to the Scan function. +// A general-purpose scanner for UTF-8 encoded text. Takes an io.Reader +// providing the source which then can be tokenized through repeated +// calls to the Scan function. For compatibility with existing tools, +// the NUL character is not allowed (implementation restriction). // // By default, a Scanner skips white space and comments and // recognizes literals as defined by the Go language spec. @@ -245,13 +246,20 @@ func (s *Scanner) next() int { // uncommon case: not ASCII var width int ch, width = utf8.DecodeRune(s.srcBuf[s.srcPos:s.srcEnd]) + if ch == utf8.RuneError && width == 1 { + s.error("illegal UTF-8 encoding") + } s.srcPos += width - 1 } } s.srcPos++ s.column++ - if ch == '\n' { + switch ch { + case 0: + // implementation restriction for compatibility with other tools + s.error("illegal character NUL") + case '\n': s.line++ s.column = 0 } diff --git a/src/pkg/scanner/scanner_test.go b/src/pkg/scanner/scanner_test.go index 926048010f..563ceea0cc 100644 --- a/src/pkg/scanner/scanner_test.go +++ b/src/pkg/scanner/scanner_test.go @@ -226,7 +226,7 @@ var tokenList = []token{ token{String, "`" + f100 + "`"}, token{Comment, "// individual characters\n"}, - token{'\x00', "\x00"}, + // NUL character is not allowed token{'\x01', "\x01"}, token{' ' - 1, string(' ' - 1)}, token{'+', "+"}, @@ -390,7 +390,8 @@ func TestScanNext(t *testing.T) { func TestScanWhitespace(t *testing.T) { var buf bytes.Buffer var ws uint64 - for ch := byte(0); ch < ' '; ch++ { + // start at 1, NUL character is not allowed + for ch := byte(1); ch < ' '; ch++ { buf.WriteByte(ch) ws |= 1 << ch } @@ -442,6 +443,8 @@ func TestError(t *testing.T) { testError(t, "`abc", "literal not terminated", String) testError(t, `//`, "comment not terminated", EOF) testError(t, `/*/`, "comment not terminated", EOF) + testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String) + testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String) } |