aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/cmd/compile/internal/syntax/parser.go4
-rw-r--r--src/cmd/compile/internal/syntax/parser_test.go4
-rw-r--r--src/cmd/compile/internal/syntax/scanner_test.go3
-rw-r--r--src/cmd/compile/internal/syntax/source.go82
4 files changed, 54 insertions, 39 deletions
diff --git a/src/cmd/compile/internal/syntax/parser.go b/src/cmd/compile/internal/syntax/parser.go
index 5960b45fbb..86d2d6a503 100644
--- a/src/cmd/compile/internal/syntax/parser.go
+++ b/src/cmd/compile/internal/syntax/parser.go
@@ -26,11 +26,11 @@ type parser struct {
func (p *parser) init(src io.Reader, errh ErrorHandler) {
p.scanner.init(src, func(pos, line int, msg string) {
p.nerrors++
- if errh != nil {
+ if !debug && errh != nil {
errh(pos, line, msg)
return
}
- fmt.Printf("%d: %s\n", line, msg)
+ panic(fmt.Sprintf("%d: %s\n", line, msg))
})
p.fnest = 0
diff --git a/src/cmd/compile/internal/syntax/parser_test.go b/src/cmd/compile/internal/syntax/parser_test.go
index 1cf26c4c47..12fc019414 100644
--- a/src/cmd/compile/internal/syntax/parser_test.go
+++ b/src/cmd/compile/internal/syntax/parser_test.go
@@ -29,6 +29,10 @@ func TestParse(t *testing.T) {
}
func TestStdLib(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping test in short mode")
+ }
+
var m1 runtime.MemStats
runtime.ReadMemStats(&m1)
start := time.Now()
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go
index 7c8545c3f6..89ba04022e 100644
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -275,6 +275,9 @@ func TestScanErrors(t *testing.T) {
// {`"\x`, "escape sequence not terminated"},
{`"\x"`, "illegal character U+0022 '\"' in escape sequence"},
{`"\Uffffffff"`, "escape sequence is invalid Unicode code point"},
+
+ // former problem cases
+ {"\xef", "invalid UTF-8 encoding"},
} {
var s scanner
hasError := false
diff --git a/src/cmd/compile/internal/syntax/source.go b/src/cmd/compile/internal/syntax/source.go
index a04c089367..38692c33be 100644
--- a/src/cmd/compile/internal/syntax/source.go
+++ b/src/cmd/compile/internal/syntax/source.go
@@ -66,50 +66,58 @@ func (s *source) ungetr() {
}
func (s *source) getr() rune {
- for {
- s.r0, s.line0 = s.r, s.line
-
- // common case: ASCII and enough bytes
- if b := s.buf[s.r]; b < utf8.RuneSelf {
- s.r++
- if b == 0 {
- s.error("invalid NUL character")
- continue
- }
- if b == '\n' {
- s.line++
- }
- return rune(b)
- }
+redo:
+ s.r0, s.line0 = s.r, s.line
- // uncommon case: not ASCII or not enough bytes
- r, w := utf8.DecodeRune(s.buf[s.r:s.w]) // optimistically assume valid rune
- if r != utf8.RuneError || w > 1 {
- s.r += w
- // BOM's are only allowed as the first character in a file
- const BOM = 0xfeff
- if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
- s.error("invalid BOM in the middle of the file")
- continue
- }
- return r
- }
+ // TODO(gri) We could avoid at least one test that is always taken
+ // in the for loop below by duplicating the common case code (ASCII)
+ // here since we always have at least the sentinel (utf8.RuneSelf)
+ // in the buffer. Measure and optimize eventually.
- if w == 0 && s.err != nil {
- if s.err != io.EOF {
- s.error(s.err.Error())
- }
- return -1
+ // make sure we have at least one rune in buffer, or we are at EOF
+ for s.r+utf8.UTFMax > s.w && !utf8.FullRune(s.buf[s.r:s.w]) && s.err == nil && s.w-s.r < len(s.buf) {
+ s.fill() // s.w-s.r < len(s.buf) => buffer is not full
+ }
+
+ // common case: ASCII and enough bytes
+ // (invariant: s.buf[s.w] == utf8.RuneSelf)
+ if b := s.buf[s.r]; b < utf8.RuneSelf {
+ s.r++
+ if b == 0 {
+ s.error("invalid NUL character")
+ goto redo
}
+ if b == '\n' {
+ s.line++
+ }
+ return rune(b)
+ }
- if w == 1 && (s.r+utf8.UTFMax <= s.w || utf8.FullRune(s.buf[s.r:s.w])) {
- s.r++
- s.error("invalid UTF-8 encoding")
- continue
+ // EOF
+ if s.r == s.w {
+ if s.err != io.EOF {
+ s.error(s.err.Error())
}
+ return -1
+ }
+
+ // uncommon case: not ASCII
+ r, w := utf8.DecodeRune(s.buf[s.r:s.w])
+ s.r += w
- s.fill()
+ if r == utf8.RuneError && w == 1 {
+ s.error("invalid UTF-8 encoding")
+ goto redo
}
+
+ // BOM's are only allowed as the first character in a file
+ const BOM = 0xfeff
+ if r == BOM && s.r0 > 0 { // s.r0 is always > 0 after 1st character (fill will set it to 1)
+ s.error("invalid BOM in the middle of the file")
+ goto redo
+ }
+
+ return r
}
func (s *source) fill() {