cmd/compile/internal/syntax: improved scanner tests

This is one of several changes that were part of a larger rewrite which I made in early 2019 after switching to the new number literal syntax implementation. The purpose of the rewrite was to simplify reading of source code (Unicode character by character) and speed up the scanner but was never submitted for review due to other priorities. Part 1 of 3: This change contains improvements to the scanner tests. Change-Id: Iecfcaef00fdeb690b0db786edbd52e828417141b Reviewed-on: https://go-review.googlesource.com/c/go/+/221601 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
author: Robert Griesemer <gri@golang.org> 2020-02-10 21:32:04 -0800
committer: Robert Griesemer <gri@golang.org> 2020-03-04 23:08:10 +0000
commit: 17e6252c051e09a3a433620182fc54952a402d73 (patch)
tree: a599c4045ed0d5c5f75c20685c39c934b0e7f0fa /src/cmd/compile/internal/syntax
parent: 19ed0d993cf7b0df804c4c2e96dc674da4059e03 (diff)
download: go-17e6252c051e09a3a433620182fc54952a402d73.tar.gz
go-17e6252c051e09a3a433620182fc54952a402d73.zip
1 files changed, 97 insertions, 54 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner_test.go b/src/cmd/compile/internal/syntax/scanner_test.go
index d76231a4af..612c59507e 100644
--- a/src/cmd/compile/internal/syntax/scanner_test.go
+++ b/src/cmd/compile/internal/syntax/scanner_test.go
@@ -12,19 +12,59 @@ import (
 	"testing"
 )
 
+// errh is a default error handler for basic tests.
+func errh(line, col uint, msg string) {
+	panic(fmt.Sprintf("%d:%d: %s", line, col, msg))
+}
+
+// Don't bother with other tests if TestSmoke doesn't pass.
+func TestSmoke(t *testing.T) {
+	const src = "if (+foo\t+=..123/***/4.2_0e-0i'a'`raw`\"string\" ;//$"
+	tokens := []token{_If, _Lparen, _Operator, _Name, _AssignOp, _Dot, _Literal, _Literal, _Literal, _Literal, _Literal, _Semi, _EOF}
+
+	var got scanner
+	got.init(strings.NewReader(src), errh, 0)
+	for _, want := range tokens {
+		got.next()
+		if got.tok != want {
+			t.Errorf("%d:%d: got %s; want %s", got.line, got.col, got.tok, want)
+			continue
+		}
+	}
+}
+
+// Once TestSmoke passes, run TestTokens next.
+func TestTokens(t *testing.T) {
+	var got scanner
+	for _, want := range sampleTokens {
+		got.init(strings.NewReader(want.src), func(line, col uint, msg string) {
+			t.Errorf("%s:%d:%d: %s", want.src, line, col, msg)
+		}, 0)
+		got.next()
+		if got.tok != want.tok {
+			t.Errorf("%s: got %s; want %s", want.src, got.tok, want.tok)
+			continue
+		}
+		if (got.tok == _Name || got.tok == _Literal) && got.lit != want.src {
+			t.Errorf("%s: got %q; want %q", want.src, got.lit, want.src)
+		}
+	}
+}
+
 func TestScanner(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping test in short mode")
 	}
 
-	src, err := os.Open("parser.go")
+	filename := *src_ // can be changed via -src flag
+	src, err := os.Open(filename)
 	if err != nil {
 		t.Fatal(err)
 	}
 	defer src.Close()
 
 	var s scanner
-	s.init(src, nil, 0)
+	s.init(src, errh, 0)
 	for {
 		s.next()
 		if s.tok == _EOF {
@@ -34,64 +74,66 @@ func TestScanner(t *testing.T) {
 			continue
 		}
 		switch s.tok {
-		case _Name:
-			fmt.Println(s.line, s.tok, "=>", s.lit)
+		case _Name, _Literal:
+			fmt.Printf("%s:%d:%d: %s => %s\n", filename, s.line, s.col, s.tok, s.lit)
 		case _Operator:
-			fmt.Println(s.line, s.tok, "=>", s.op, s.prec)
+			fmt.Printf("%s:%d:%d: %s => %s (prec = %d)\n", filename, s.line, s.col, s.tok, s.op, s.prec)
 		default:
-			fmt.Println(s.line, s.tok)
+			fmt.Printf("%s:%d:%d: %s\n", filename, s.line, s.col, s.tok)
 		}
 	}
 }
 
-func TestTokens(t *testing.T) {
+func TestEmbeddedTokens(t *testing.T) {
 	// make source
 	var buf bytes.Buffer
 	for i, s := range sampleTokens {
-		buf.WriteString("\t\t\t\t"[:i&3])                          // leading indentation
-		buf.WriteString(s.src)                                     // token
-		buf.WriteString("        "[:i&7])                          // trailing spaces
-		fmt.Fprintf(&buf, "/*line foo:%d */ // bar\n", i+linebase) // comments (don't crash w/o directive handler)
+		buf.WriteString("\t\t\t\t"[:i&3])                            // leading indentation
+		buf.WriteString(s.src)                                       // token
+		buf.WriteString("        "[:i&7])                            // trailing spaces
+		buf.WriteString(fmt.Sprintf("/*line foo:%d */ // bar\n", i)) // comments + newline (don't crash w/o directive handler)
 	}
 
 	// scan source
 	var got scanner
+	var src string
 	got.init(&buf, func(line, col uint, msg string) {
-		t.Fatalf("%d:%d: %s", line, col, msg)
+		t.Fatalf("%s:%d:%d: %s", src, line, col, msg)
 	}, 0)
 	got.next()
 	for i, want := range sampleTokens {
+		src = want.src
 		nlsemi := false
 
-		if got.line != uint(i+linebase) {
-			t.Errorf("got line %d; want %d", got.line, i+linebase)
+		if got.line-linebase != uint(i) {
+			t.Errorf("%s: got line %d; want %d", src, got.line-linebase, i)
 		}
 
 		if got.tok != want.tok {
-			t.Errorf("got tok = %s; want %s", got.tok, want.tok)
+			t.Errorf("%s: got tok %s; want %s", src, got.tok, want.tok)
 			continue
 		}
 
 		switch want.tok {
 		case _Semi:
 			if got.lit != "semicolon" {
-				t.Errorf("got %s; want semicolon", got.lit)
+				t.Errorf("%s: got %s; want semicolon", src, got.lit)
 			}
 
 		case _Name, _Literal:
 			if got.lit != want.src {
-				t.Errorf("got lit = %q; want %q", got.lit, want.src)
+				t.Errorf("%s: got lit %q; want %q", src, got.lit, want.src)
 				continue
 			}
 			nlsemi = true
 
 		case _Operator, _AssignOp, _IncOp:
 			if got.op != want.op {
-				t.Errorf("got op = %s; want %s", got.op, want.op)
+				t.Errorf("%s: got op %s; want %s", src, got.op, want.op)
 				continue
 			}
 			if got.prec != want.prec {
-				t.Errorf("got prec = %d; want %d", got.prec, want.prec)
+				t.Errorf("%s: got prec %d; want %d", src, got.prec, want.prec)
 				continue
 			}
 			nlsemi = want.tok == _IncOp
@@ -103,11 +145,11 @@ func TestTokens(t *testing.T) {
 		if nlsemi {
 			got.next()
 			if got.tok != _Semi {
-				t.Errorf("got tok = %s; want ;", got.tok)
+				t.Errorf("%s: got tok %s; want ;", src, got.tok)
 				continue
 			}
 			if got.lit != "newline" {
-				t.Errorf("got %s; want newline", got.lit)
+				t.Errorf("%s: got %s; want newline", src, got.lit)
 			}
 		}
 
@@ -299,7 +341,7 @@ func TestComments(t *testing.T) {
 		{"//", comment{0, 0, "//"}},
 
 		/*-style comments */
-		{"/* regular comment */", comment{0, 0, "/* regular comment */"}},
+		{"123/* regular comment */", comment{0, 3, "/* regular comment */"}},
 		{"package p /* regular comment", comment{0, 0, ""}},
 		{"\n\n\n/*\n*//* want this one */", comment{4, 2, "/* want this one */"}},
 		{"\n\n/**/", comment{2, 0, "/**/"}},
@@ -307,17 +349,16 @@ func TestComments(t *testing.T) {
 	} {
 		var s scanner
 		var got comment
-		s.init(strings.NewReader(test.src),
-			func(line, col uint, msg string) {
-				if msg[0] != '/' {
-					// error
-					if msg != "comment not terminated" {
-						t.Errorf("%q: %s", test.src, msg)
-					}
-					return
+		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
+			if msg[0] != '/' {
+				// error
+				if msg != "comment not terminated" {
+					t.Errorf("%q: %s", test.src, msg)
 				}
-				got = comment{line - linebase, col - colbase, msg} // keep last one
-			}, comments)
+				return
+			}
+			got = comment{line - linebase, col - colbase, msg} // keep last one
+		}, comments)
 
 		for {
 			s.next()
@@ -542,7 +583,7 @@ func TestNumbers(t *testing.T) {
 
 func TestScanErrors(t *testing.T) {
 	for _, test := range []struct {
-		src, msg  string
+		src, err  string
 		line, col uint // 0-based
 	}{
 		// Note: Positions for lexical errors are the earliest position
@@ -582,7 +623,7 @@ func TestScanErrors(t *testing.T) {
 		{`'xx`, "invalid character literal (missing closing ')", 0, 0},
 		{`'xx'`, "invalid character literal (more than one character)", 0, 0},
 
-		{"\"\n", "newline in string", 0, 1},
+		{"\n   \"foo\n", "newline in string", 1, 7},
 		{`"`, "string not terminated", 0, 0},
 		{`"foo`, "string not terminated", 0, 0},
 		{"`", "string not terminated", 0, 0},
@@ -607,27 +648,19 @@ func TestScanErrors(t *testing.T) {
 		{`var s string = "\x"`, "non-hex character in escape sequence: \"", 0, 18},
 		{`return "\Uffffffff"`, "escape sequence is invalid Unicode code point U+FFFFFFFF", 0, 18},
 
+		{"0b.0", "invalid radix point in binary literal", 0, 2},
+		{"0x.p0\n", "hexadecimal literal has no digits", 0, 3},
+
 		// former problem cases
 		{"package p\n\n\xef", "invalid UTF-8 encoding", 2, 0},
 	} {
 		var s scanner
-		nerrors := 0
-		s.init(strings.NewReader(test.src), func(line, col uint, msg string) {
-			nerrors++
-			// only check the first error
-			if nerrors == 1 {
-				if msg != test.msg {
-					t.Errorf("%q: got msg = %q; want %q", test.src, msg, test.msg)
-				}
-				if line != test.line+linebase {
-					t.Errorf("%q: got line = %d; want %d", test.src, line, test.line+linebase)
-				}
-				if col != test.col+colbase {
-					t.Errorf("%q: got col = %d; want %d", test.src, col, test.col+colbase)
-				}
-			} else if nerrors > 1 {
-				// TODO(gri) make this use position info
-				t.Errorf("%q: got unexpected %q at line = %d", test.src, msg, line)
+		var line, col uint
+		var err string
+		s.init(strings.NewReader(test.src), func(l, c uint, msg string) {
+			if err == "" {
+				line, col = l-linebase, c-colbase
+				err = msg
 			}
 		}, 0)
 
@@ -638,8 +671,18 @@ func TestScanErrors(t *testing.T) {
 			}
 		}
 
-		if nerrors == 0 {
-			t.Errorf("%q: got no error; want %q", test.src, test.msg)
+		if err != "" {
+			if err != test.err {
+				t.Errorf("%q: got err = %q; want %q", test.src, err, test.err)
+			}
+			if line != test.line {
+				t.Errorf("%q: got line = %d; want %d", test.src, line, test.line)
+			}
+			if col != test.col {
+				t.Errorf("%q: got col = %d; want %d", test.src, col, test.col)
+			}
+		} else {
+			t.Errorf("%q: got no error; want %q", test.src, test.err)
 		}
 	}
 }
@@ -648,7 +691,7 @@ func TestIssue21938(t *testing.T) {
 	s := "/*" + strings.Repeat(" ", 4089) + "*/ .5"
 
 	var got scanner
-	got.init(strings.NewReader(s), nil, 0)
+	got.init(strings.NewReader(s), errh, 0)
 	got.next()
 
 	if got.tok != _Literal || got.lit != ".5" {
author	Robert Griesemer <gri@golang.org>	2020-02-10 21:32:04 -0800
committer	Robert Griesemer <gri@golang.org>	2020-03-04 23:08:10 +0000
commit	17e6252c051e09a3a433620182fc54952a402d73 (patch)
tree	a599c4045ed0d5c5f75c20685c39c934b0e7f0fa /src/cmd/compile/internal/syntax
parent	19ed0d993cf7b0df804c4c2e96dc674da4059e03 (diff)
download	go-17e6252c051e09a3a433620182fc54952a402d73.tar.gz go-17e6252c051e09a3a433620182fc54952a402d73.zip