diff options
-rw-r--r-- | src/cmd/compile/internal/syntax/parser.go | 4 | ||||
-rw-r--r-- | src/cmd/compile/internal/syntax/parser_test.go | 4 | ||||
-rw-r--r-- | src/cmd/compile/internal/syntax/scanner_test.go | 3 | ||||
-rw-r--r-- | src/cmd/compile/internal/syntax/source.go | 82 |
4 files changed, 54 insertions, 39 deletions
diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go index 5960b45fbb..86d2d6a503 100644 --- a/src/cmd/compile/internal/syntax/parser.go +++ b/src/cmd/compile/internal/syntax/parser.go @@ -26,11 +26,11 @@ type parser struct { func (p *parser) init(src io.Reader, errh ErrorHandler) { p.scanner.init(src, func(pos, line int, msg string) { p.nerrors++ - if errh != nil { + if !debug && errh != nil { errh(pos, line, msg) return } - fmt.Printf("%d: %s\n", line, msg) + panic(fmt.Sprintf("%d: %s\n", line, msg)) }) p.fnest = 0 diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go index 1cf26c4c47..12fc019414 100644 --- a/src/cmd/compile/internal/syntax/parser_test.go +++ b/src/cmd/compile/internal/syntax/parser_test.go @@ -29,6 +29,10 @@ func TestParse(t *testing.T) { } func TestStdLib(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode") + } + var m1 runtime.MemStats runtime.ReadMemStats(&m1) start := time.Now() diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go index 7c8545c3f6..89ba04022e 100644 --- a/src/cmd/compile/internal/syntax/scanner_test.go +++ b/src/cmd/compile/internal/syntax/scanner_test.go @@ -275,6 +275,9 @@ func TestScanErrors(t *testing.T) { // {`"\x`, "escape sequence not terminated"}, {`"\x"`, "illegal character U+0022 '\"' in escape sequence"}, {`"\Uffffffff"`, "escape sequence is invalid Unicode code point"}, + + // former problem cases + {"\xef", "invalid UTF-8 encoding"}, } { var s scanner hasError := false diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go index a04c089367..38692c33be 100644 --- a/src/cmd/compile/internal/syntax/source.go +++ b/src/cmd/compile/internal/syntax/source.go @@ -66,50 +66,58 @@ func (s *source) ungetr() { } func (s *source) getr() rune { - for { - s.r0, s.line0 = s.r, s.line - - // common case: ASCII and enough bytes - if b := s.buf[s.r]; b < utf8.RuneSelf { - s.r++ - if b == 0 { - s.error("invalid NUL character") - continue - } - if b == '\n' { - s.line++ - } - return rune(b) - } +redo: + s.r0, s.line0 = s.r, s.line - // uncommon case: not ASCII or not enough bytes - r, w := utf8.DecodeRune(s.buf[s.r:s.w]) // optimistically assume valid rune - if r != utf8.RuneError || w > 1 { - s.r += w - // BOM's are only allowed as the first character in a file - const BOM = 0xfeff - if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1) - s.error("invalid BOM in the middle of the file") - continue - } - return r - } + // TODO(gri) We could avoid at least one test that is always taken + // in the for loop below by duplicating the common case code (ASCII) + // here since we always have at least the sentinel (utf8.RuneSelf) + // in the buffer. Measure and optimize eventually. - if w == 0 && s.err != nil { - if s.err != io.EOF { - s.error(s.err.Error()) - } - return -1 + // make sure we have at least one rune in buffer, or we are at EOF + for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) { + s.fill() // s.w-s.r < len(s.buf) => buffer is not full + } + + // common case: ASCII and enough bytes + // (invariant: s.buf[s.w] == utf8.RuneSelf) + if b := s.buf[s.r]; b < utf8.RuneSelf { + s.r++ + if b == 0 { + s.error("invalid NUL character") + goto redo } + if b == '\n' { + s.line++ + } + return rune(b) + } - if w == 1 && (s.r+utf8.UTFMax <= s.w || utf8.FullRune(s.buf[s.r:s.w])) { - s.r++ - s.error("invalid UTF-8 encoding") - continue + // EOF + if s.r == s.w { + if s.err != io.EOF { + s.error(s.err.Error()) } + return -1 + } + + // uncommon case: not ASCII + r, w := utf8.DecodeRune(s.buf[s.r:s.w]) + s.r += w - s.fill() + if r == utf8.RuneError && w == 1 { + s.error("invalid UTF-8 encoding") + goto redo } + + // BOM's are only allowed as the first character in a file + const BOM = 0xfeff + if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1) + s.error("invalid BOM in the middle of the file") + goto redo + } + + return r } func (s *source) fill() { |