cmd/compile/internal/syntax: implement comment reporting in scanner

R=go1.11 In order to collect comments in the AST and for error testing purposes, the scanner needs to not only recognize and skip comments, but also be able to report them if so desired. This change adds a mode flag to the scanner's init function which controls the scanner behavior around comments. In the common case where comments are not needed, there must be no significant overhead. Thus, comments are reported via a handler upcall rather than being returned as a _Comment token (which the parser would have to filter out with every scanner.next() call). Because the handlers for error messages, directives, and comments all look the same (they take a position and text), and because directives look like comments, and errors never start with a '/', this change simplifies the scanner's init call to only take one (error) handler instead of 2 or 3 different handlers with identical signature. It is trivial in the handler to determine if we have an error, directive, or general comment. Finally, because directives are comments, when reporting directives the full comment text is returned now rather than just the directive text. This simplifies the implementation and makes the scanner API more regular. Furthermore, it provides important information about the comment style used by a directive, which may matter eventually when we fully implement /*line file:line:col*/ directives. Change-Id: I2adbfcebecd615e4237ed3a832b6ceb9518bf09c Reviewed-on: https://go-review.googlesource.com/88215 Reviewed-by: Matthew Dempsky <mdempsky@google.com>
author: Robert Griesemer <gri@golang.org> 2018-01-17 21:42:51 -0800
committer: Robert Griesemer <gri@golang.org> 2018-02-12 22:57:57 +0000
commit: b8906889861d0efaf5682a7d26417111eaba3480 (patch)
tree: 7d7870c860fd8f586f958259cc091ad662a71055 /src/cmd/compile/internal/syntax/scanner.go
parent: 670494827c42d4ac64a52dfa909cf6048308e133 (diff)
download: go-b8906889861d0efaf5682a7d26417111eaba3480.tar.gz
go-b8906889861d0efaf5682a7d26417111eaba3480.zip
1 files changed, 61 insertions, 33 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index be406d9753..1e0ff2e3cc 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -19,9 +19,17 @@ import (
 	"unicode/utf8"
 )
 
+// The mode flags below control which comments are reported
+// by calling the error handler. If no flag is set, comments
+// are ignored.
+const (
+	comments   uint = 1 << iota // call handler for all comments
+	directives                  // call handler for directives only
+)
+
 type scanner struct {
 	source
-	pragh  func(line, col uint, msg string)
+	mode   uint
 	nlsemi bool // if set '\n' and EOF translate to ';'
 
 	// current token, valid after calling next()
@@ -33,25 +41,32 @@ type scanner struct {
 	prec      int      // valid if tok is _Operator, _AssignOp, or _IncOp
 }
 
-func (s *scanner) init(src io.Reader, errh, pragh func(line, col uint, msg string)) {
+func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mode uint) {
 	s.source.init(src, errh)
-	s.pragh = pragh
+	s.mode = mode
 	s.nlsemi = false
 }
 
 // next advances the scanner by reading the next token.
 //
-// If a read, source encoding, or lexical error occurs, next
-// calls the error handler installed with init. The handler
-// must exist.
+// If a read, source encoding, or lexical error occurs, next calls
+// the installed error handler with the respective error position
+// and message. The error message is guaranteed to be non-empty and
+// never starts with a '/'. The error handler must exist.
+//
+// If the scanner mode includes the comments flag and a comment
+// (including comments containing directives) is encountered, the
+// error handler is also called with each comment position and text
+// (including opening /* or // and closing */, but without a newline
+// at the end of line comments). Comment text always starts with a /
+// which can be used to distinguish these handler calls from errors.
 //
-// If a //line or //go: directive is encountered at the start
-// of a line, next calls the directive handler pragh installed
-// with init, if not nil.
+// If the scanner mode includes the directives (but not the comments)
+// flag, only comments containing a //line, /*line, or //go: directive
+// are reported, in the same way as regular comments. Directives in
+// //-style comments are only recognized if they are at the beginning
+// of a line.
 //
-// The (line, col) position passed to the error and directive
-// handler is always at or after the current source reading
-// position.
 func (s *scanner) next() {
 	nlsemi := s.nlsemi
 	s.nlsemi = false
@@ -565,6 +580,10 @@ func (s *scanner) rawString() {
 	s.tok = _Literal
 }
 
+func (s *scanner) comment(text string) {
+	s.errh(s.line, s.col, text)
+}
+
 func (s *scanner) skipLine(r rune) {
 	for r >= 0 {
 		if r == '\n' {
@@ -578,14 +597,20 @@ func (s *scanner) skipLine(r rune) {
 func (s *scanner) lineComment() {
 	r := s.getr()
 
+	if s.mode&comments != 0 {
+		s.startLit()
+		s.skipLine(r)
+		s.comment("//" + string(s.stopLit()))
+		return
+	}
+
 	// directives must start at the beginning of the line (s.col == colbase)
-	if s.col != colbase || s.pragh == nil || (r != 'g' && r != 'l') {
+	if s.mode&directives == 0 || s.col != colbase || (r != 'g' && r != 'l') {
 		s.skipLine(r)
 		return
 	}
-	// s.col == colbase && s.pragh != nil && (r == 'g' || r == 'l')
 
-	// recognize directives
+	// recognize go: or line directives
 	prefix := "go:"
 	if r == 'l' {
 		prefix = "line "
@@ -598,38 +623,43 @@ func (s *scanner) lineComment() {
 		r = s.getr()
 	}
 
-	// directive text without line ending (which may be "\r\n" if Windows),
+	// directive text
 	s.startLit()
 	s.skipLine(r)
-	text := s.stopLit()
-	if i := len(text) - 1; i >= 0 && text[i] == '\r' {
-		text = text[:i]
-	}
-
-	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after //
+	s.comment("//" + prefix + string(s.stopLit()))
 }
 
-func (s *scanner) skipComment(r rune) {
+func (s *scanner) skipComment(r rune) bool {
 	for r >= 0 {
 		for r == '*' {
 			r = s.getr()
 			if r == '/' {
-				return
+				return true
 			}
 		}
 		r = s.getr()
 	}
 	s.errh(s.line, s.col, "comment not terminated")
+	return false
 }
 
 func (s *scanner) fullComment() {
 	r := s.getr()
 
-	if s.pragh == nil || r != 'l' {
+	if s.mode&comments != 0 {
+		s.startLit()
+		if s.skipComment(r) {
+			s.comment("/*" + string(s.stopLit()))
+		} else {
+			s.killLit() // not a complete comment - ignore
+		}
+		return
+	}
+
+	if s.mode&directives == 0 || r != 'l' {
 		s.skipComment(r)
 		return
 	}
-	// s.pragh != nil && r == 'l'
 
 	// recognize line directive
 	const prefix = "line "
@@ -641,15 +671,13 @@ func (s *scanner) fullComment() {
 		r = s.getr()
 	}
 
-	// directive text without comment ending
+	// directive text
 	s.startLit()
-	s.skipComment(r)
-	text := s.stopLit()
-	if i := len(text) - 2; i >= 0 && text[i] == '*' && text[i+1] == '/' {
-		text = text[:i]
+	if s.skipComment(r) {
+		s.comment("/*" + prefix + string(s.stopLit()))
+	} else {
+		s.killLit() // not a complete comment - ignore
 	}
-
-	s.pragh(s.line, s.col+2, prefix+string(text)) // +2 since directive text starts after /*
 }
 
 func (s *scanner) escape(quote rune) bool {
author	Robert Griesemer <gri@golang.org>	2018-01-17 21:42:51 -0800
committer	Robert Griesemer <gri@golang.org>	2018-02-12 22:57:57 +0000
commit	b8906889861d0efaf5682a7d26417111eaba3480 (patch)
tree	7d7870c860fd8f586f958259cc091ad662a71055 /src/cmd/compile/internal/syntax/scanner.go
parent	670494827c42d4ac64a52dfa909cf6048308e133 (diff)
download	go-b8906889861d0efaf5682a7d26417111eaba3480.tar.gz go-b8906889861d0efaf5682a7d26417111eaba3480.zip