diff options
author | Robert Griesemer <gri@golang.org> | 2020-02-10 21:32:04 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2020-03-04 23:08:10 +0000 |
commit | 17e6252c051e09a3a433620182fc54952a402d73 (patch) | |
tree | a599c4045ed0d5c5f75c20685c39c934b0e7f0fa /src/cmd/compile/internal/syntax | |
parent | 19ed0d993cf7b0df804c4c2e96dc674da4059e03 (diff) | |
download | go-17e6252c051e09a3a433620182fc54952a402d73.tar.gz go-17e6252c051e09a3a433620182fc54952a402d73.zip |
cmd/compile/internal/syntax: improved scanner tests
This is one of several changes that were part of a larger rewrite
which I made in early 2019 after switching to the new number literal
syntax implementation. The purpose of the rewrite was to simplify
reading of source code (Unicode character by character) and speed up
the scanner but was never submitted for review due to other priorities.
Part 1 of 3:
This change contains improvements to the scanner tests.
Change-Id: Iecfcaef00fdeb690b0db786edbd52e828417141b
Reviewed-on: https://go-review.googlesource.com/c/go/+/221601
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
Diffstat (limited to 'src/cmd/compile/internal/syntax')
-rw-r--r-- | src/cmd/compile/internal/syntax/scanner_test.go | 151 |
1 files changed, 97 insertions, 54 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go index d76231a4af..612c59507e 100644 --- a/src/cmd/compile/internal/syntax/scanner_test.go +++ b/src/cmd/compile/internal/syntax/scanner_test.go @@ -12,19 +12,59 @@ import ( "testing" ) +// errh is a default error handler for basic tests. +func errh(line, col uint, msg string) { + panic(fmt.Sprintf("%d:%d: %s", line, col, msg)) +} + +// Don't bother with other tests if TestSmoke doesn't pass. +func TestSmoke(t *testing.T) { + const src = "if (+foo\t+=..123/***/4.2_0e-0i'a'`raw`\"string\" ;//$" + tokens := []token{_If, _Lparen, _Operator, _Name, _AssignOp, _Dot, _Literal, _Literal, _Literal, _Literal, _Literal, _Semi, _EOF} + + var got scanner + got.init(strings.NewReader(src), errh, 0) + for _, want := range tokens { + got.next() + if got.tok != want { + t.Errorf("%d:%d: got %s; want %s", got.line, got.col, got.tok, want) + continue + } + } +} + +// Once TestSmoke passes, run TestTokens next. +func TestTokens(t *testing.T) { + var got scanner + for _, want := range sampleTokens { + got.init(strings.NewReader(want.src), func(line, col uint, msg string) { + t.Errorf("%s:%d:%d: %s", want.src, line, col, msg) + }, 0) + got.next() + if got.tok != want.tok { + t.Errorf("%s: got %s; want %s", want.src, got.tok, want.tok) + continue + } + if (got.tok == _Name || got.tok == _Literal) && got.lit != want.src { + t.Errorf("%s: got %q; want %q", want.src, got.lit, want.src) + } + } +} + func TestScanner(t *testing.T) { if testing.Short() { t.Skip("skipping test in short mode") } - src, err := os.Open("parser.go") + filename := *src_ // can be changed via -src flag + src, err := os.Open(filename) if err != nil { t.Fatal(err) } defer src.Close() var s scanner - s.init(src, nil, 0) + s.init(src, errh, 0) for { s.next() if s.tok == _EOF { @@ -34,64 +74,66 @@ func TestScanner(t *testing.T) { continue } switch s.tok { - case _Name: - fmt.Println(s.line, s.tok, "=>", s.lit) + case _Name, _Literal: + fmt.Printf("%s:%d:%d: %s => %s\n", filename, s.line, s.col, s.tok, s.lit) case _Operator: - fmt.Println(s.line, s.tok, "=>", s.op, s.prec) + fmt.Printf("%s:%d:%d: %s => %s (prec = %d)\n", filename, s.line, s.col, s.tok, s.op, s.prec) default: - fmt.Println(s.line, s.tok) + fmt.Printf("%s:%d:%d: %s\n", filename, s.line, s.col, s.tok) } } } -func TestTokens(t *testing.T) { +func TestEmbeddedTokens(t *testing.T) { // make source var buf bytes.Buffer for i, s := range sampleTokens { - buf.WriteString("\t\t\t\t"[:i&3]) // leading indentation - buf.WriteString(s.src) // token - buf.WriteString(" "[:i&7]) // trailing spaces - fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i+linebase) // comments (don't crash w/o directive handler) + buf.WriteString("\t\t\t\t"[:i&3]) // leading indentation + buf.WriteString(s.src) // token + buf.WriteString(" "[:i&7]) // trailing spaces + buf.WriteString(fmt.Sprintf("/*line foo:%d */ // bar\n", i)) // comments + newline (don't crash w/o directive handler) } // scan source var got scanner + var src string got.init(&buf, func(line, col uint, msg string) { - t.Fatalf("%d:%d: %s", line, col, msg) + t.Fatalf("%s:%d:%d: %s", src, line, col, msg) }, 0) got.next() for i, want := range sampleTokens { + src = want.src nlsemi := false - if got.line != uint(i+linebase) { - t.Errorf("got line %d; want %d", got.line, i+linebase) + if got.line-linebase != uint(i) { + t.Errorf("%s: got line %d; want %d", src, got.line-linebase, i) } if got.tok != want.tok { - t.Errorf("got tok = %s; want %s", got.tok, want.tok) + t.Errorf("%s: got tok %s; want %s", src, got.tok, want.tok) continue } switch want.tok { case _Semi: if got.lit != "semicolon" { - t.Errorf("got %s; want semicolon", got.lit) + t.Errorf("%s: got %s; want semicolon", src, got.lit) } case _Name, _Literal: if got.lit != want.src { - t.Errorf("got lit = %q; want %q", got.lit, want.src) + t.Errorf("%s: got lit %q; want %q", src, got.lit, want.src) continue } nlsemi = true case _Operator, _AssignOp, _IncOp: if got.op != want.op { - t.Errorf("got op = %s; want %s", got.op, want.op) + t.Errorf("%s: got op %s; want %s", src, got.op, want.op) continue } if got.prec != want.prec { - t.Errorf("got prec = %d; want %d", got.prec, want.prec) + t.Errorf("%s: got prec %d; want %d", src, got.prec, want.prec) continue } nlsemi = want.tok == _IncOp @@ -103,11 +145,11 @@ func TestTokens(t *testing.T) { if nlsemi { got.next() if got.tok != _Semi { - t.Errorf("got tok = %s; want ;", got.tok) + t.Errorf("%s: got tok %s; want ;", src, got.tok) continue } if got.lit != "newline" { - t.Errorf("got %s; want newline", got.lit) + t.Errorf("%s: got %s; want newline", src, got.lit) } } @@ -299,7 +341,7 @@ func TestComments(t *testing.T) { {"//", comment{0, 0, "//"}}, /*-style comments */ - {"/* regular comment */", comment{0, 0, "/* regular comment */"}}, + {"123/* regular comment */", comment{0, 3, "/* regular comment */"}}, {"package p /* regular comment", comment{0, 0, ""}}, {"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}}, {"\n\n/**/", comment{2, 0, "/**/"}}, @@ -307,17 +349,16 @@ func TestComments(t *testing.T) { } { var s scanner var got comment - s.init(strings.NewReader(test.src), - func(line, col uint, msg string) { - if msg[0] != '/' { - // error - if msg != "comment not terminated" { - t.Errorf("%q: %s", test.src, msg) - } - return + s.init(strings.NewReader(test.src), func(line, col uint, msg string) { + if msg[0] != '/' { + // error + if msg != "comment not terminated" { + t.Errorf("%q: %s", test.src, msg) } - got = comment{line - linebase, col - colbase, msg} // keep last one - }, comments) + return + } + got = comment{line - linebase, col - colbase, msg} // keep last one + }, comments) for { s.next() @@ -542,7 +583,7 @@ func TestNumbers(t *testing.T) { func TestScanErrors(t *testing.T) { for _, test := range []struct { - src, msg string + src, err string line, col uint // 0-based }{ // Note: Positions for lexical errors are the earliest position @@ -582,7 +623,7 @@ func TestScanErrors(t *testing.T) { {`'xx`, "invalid character literal (missing closing ')", 0, 0}, {`'xx'`, "invalid character literal (more than one character)", 0, 0}, - {"\"\n", "newline in string", 0, 1}, + {"\n \"foo\n", "newline in string", 1, 7}, {`"`, "string not terminated", 0, 0}, {`"foo`, "string not terminated", 0, 0}, {"`", "string not terminated", 0, 0}, @@ -607,27 +648,19 @@ func TestScanErrors(t *testing.T) { {`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18}, {`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18}, + {"0b.0", "invalid radix point in binary literal", 0, 2}, + {"0x.p0\n", "hexadecimal literal has no digits", 0, 3}, + // former problem cases {"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0}, } { var s scanner - nerrors := 0 - s.init(strings.NewReader(test.src), func(line, col uint, msg string) { - nerrors++ - // only check the first error - if nerrors == 1 { - if msg != test.msg { - t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg) - } - if line != test.line+linebase { - t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase) - } - if col != test.col+colbase { - t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase) - } - } else if nerrors > 1 { - // TODO(gri) make this use position info - t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line) + var line, col uint + var err string + s.init(strings.NewReader(test.src), func(l, c uint, msg string) { + if err == "" { + line, col = l-linebase, c-colbase + err = msg } }, 0) @@ -638,8 +671,18 @@ func TestScanErrors(t *testing.T) { } } - if nerrors == 0 { - t.Errorf("%q: got no error; want %q", test.src, test.msg) + if err != "" { + if err != test.err { + t.Errorf("%q: got err = %q; want %q", test.src, err, test.err) + } + if line != test.line { + t.Errorf("%q: got line = %d; want %d", test.src, line, test.line) + } + if col != test.col { + t.Errorf("%q: got col = %d; want %d", test.src, col, test.col) + } + } else { + t.Errorf("%q: got no error; want %q", test.src, test.err) } } } @@ -648,7 +691,7 @@ func TestIssue21938(t *testing.T) { s := "/*" + strings.Repeat(" ", 4089) + "*/ .5" var got scanner - got.init(strings.NewReader(s), nil, 0) + got.init(strings.NewReader(s), errh, 0) got.next() if got.tok != _Literal || got.lit != ".5" { |