aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2010-11-02 10:38:07 -0700
committerRobert Griesemer <gri@golang.org>2010-11-02 10:38:07 -0700
commit396228a6525b211e0c368f255724eeefef264062 (patch)
tree52061abf037e2057988ab48b6965e6aee70f3d23
parent0808b199e0f6c3143c706a3a489dc727868b19fc (diff)
downloadgo-396228a6525b211e0c368f255724eeefef264062.tar.gz
go-396228a6525b211e0c368f255724eeefef264062.zip
go/scanner: removed scanner/internal-only uses of token.Position
First step towards a more light-weight implementation of token.Position: - only use token.Position for reporting token and error position - use offsets only for scanner control - no interface changes yet R=rsc CC=golang-dev https://golang.org/cl/2825041
-rw-r--r--src/pkg/go/scanner/scanner.go225
-rw-r--r--src/pkg/go/scanner/scanner_test.go20
2 files changed, 141 insertions, 104 deletions
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go
index 663636c46e..ab11714705 100644
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -29,10 +29,14 @@ type Scanner struct {
mode uint // scanning mode
// scanning state
- pos token.Position // previous reading position (position before ch)
- offset int // current reading offset (position after ch)
- ch int // one char look-ahead
- insertSemi bool // insert a semicolon before next newline
+ filename string // current filename; may change via //line filename:line comment
+ line int // current line
+ column int // current column
+
+ ch int // current character
+ offset int // character offset
+ rdOffset int // reading offset (position after current character)
+ insertSemi bool // insert a semicolon before next newline
// public state - ok to modify
ErrorCount int // number of errors encountered
@@ -43,29 +47,31 @@ type Scanner struct {
// S.ch < 0 means end-of-file.
//
func (S *Scanner) next() {
- if S.offset < len(S.src) {
- S.pos.Offset = S.offset
- S.pos.Column++
+ S.column++
+ if S.rdOffset < len(S.src) {
+ S.offset = S.rdOffset
if S.ch == '\n' {
- // next character starts a new line
- S.pos.Line++
- S.pos.Column = 1
+ S.line++
+ S.column = 1
}
- r, w := int(S.src[S.offset]), 1
+ r, w := int(S.src[S.rdOffset]), 1
switch {
case r == 0:
- S.error(S.pos, "illegal character NUL")
+ S.error("illegal character NUL")
case r >= 0x80:
// not ASCII
- r, w = utf8.DecodeRune(S.src[S.offset:])
+ r, w = utf8.DecodeRune(S.src[S.rdOffset:])
if r == utf8.RuneError && w == 1 {
- S.error(S.pos, "illegal UTF-8 encoding")
+ S.error("illegal UTF-8 encoding")
}
}
- S.offset += w
+ S.rdOffset += w
S.ch = r
} else {
- S.pos.Offset = len(S.src)
+ S.offset = len(S.src)
+ if S.ch == '\n' {
+ S.column = 1
+ }
S.ch = -1 // eof
}
}
@@ -94,9 +100,17 @@ func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint)
S.src = src
S.err = err
S.mode = mode
- S.pos = token.Position{filename, 0, 1, 0}
+
+ S.filename = filename
+ S.line = 1
+ S.column = 0
+
+ S.ch = ' '
S.offset = 0
+ S.rdOffset = 0
+ S.insertSemi = false
S.ErrorCount = 0
+
S.next()
}
@@ -131,7 +145,12 @@ func charString(ch int) string {
}
-func (S *Scanner) error(pos token.Position, msg string) {
+func (S *Scanner) error(msg string) {
+ S.errorAt(token.Position{S.filename, S.offset, S.line, S.column}, msg)
+}
+
+
+func (S *Scanner) errorAt(pos token.Position, msg string) {
if S.err != nil {
S.err.Error(pos, msg)
}
@@ -139,18 +158,28 @@ func (S *Scanner) error(pos token.Position, msg string) {
}
-func (S *Scanner) expect(ch int) {
- if S.ch != ch {
- S.error(S.pos, "expected "+charString(ch)+", found "+charString(S.ch))
+var prefix = []byte("//line ")
+
+func (S *Scanner) interpretLineComment(text []byte) {
+ if bytes.HasPrefix(text, prefix) {
+ // get filename and line number, if any
+ if i := bytes.Index(text, []byte{':'}); i > 0 {
+ if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
+ // valid //line filename:line comment;
+ // update scanner position
+ S.filename = string(text[len(prefix):i])
+ S.line = line - 1 // -1 since the '\n' has not been consumed yet
+ }
+ }
}
- S.next() // always make progress
}
-var prefix = []byte("line ")
-
-func (S *Scanner) scanComment(pos token.Position) {
- // first '/' already consumed
+func (S *Scanner) scanComment() {
+ // initial '/' already consumed; S.ch == '/' || S.ch == '*'
+ offs := S.offset - 1 // position of initial '/'
+ col := S.column - 1
+ pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
if S.ch == '/' {
//-style comment
@@ -159,21 +188,9 @@ func (S *Scanner) scanComment(pos token.Position) {
if S.ch == '\n' {
// '\n' is not part of the comment for purposes of scanning
// (the comment ends on the same line where it started)
- if pos.Column == 1 {
- text := S.src[pos.Offset+2 : S.pos.Offset]
- if bytes.HasPrefix(text, prefix) {
- // comment starts at beginning of line with "//line ";
- // get filename and line number, if any
- i := bytes.Index(text, []byte{':'})
- if i >= 0 {
- if line, err := strconv.Atoi(string(text[i+1:])); err == nil && line > 0 {
- // valid //line filename:line comment;
- // update scanner position
- S.pos.Filename = string(text[len(prefix):i])
- S.pos.Line = line - 1 // -1 since the '\n' has not been consumed yet
- }
- }
- }
+ if col == 1 {
+ // comment starts at the beginning of the current line
+ S.interpretLineComment(S.src[offs:S.offset])
}
return
}
@@ -181,7 +198,7 @@ func (S *Scanner) scanComment(pos token.Position) {
} else {
/*-style comment */
- S.expect('*')
+ S.next()
for S.ch >= 0 {
ch := S.ch
S.next()
@@ -192,47 +209,56 @@ func (S *Scanner) scanComment(pos token.Position) {
}
}
- S.error(pos, "comment not terminated")
+ S.errorAt(pos, "comment not terminated")
}
-func (S *Scanner) findLineEnd(pos token.Position) bool {
- // initial '/' already consumed; pos is position of '/'
+func (S *Scanner) findLineEnd() bool {
+ // initial '/' already consumed
+
+ defer func(line, col, offs int) {
+ // reset scanner state to where it was upon calling findLineEnd
+ // (we don't scan //line comments and ignore errors thus
+ // S.filename and S.ErrorCount don't change)
+ S.line = line
+ S.column = col
+ S.ch = '/'
+ S.offset = offs
+ S.rdOffset = offs + 1
+ S.next() // consume initial '/' again
+ }(S.line, S.column-1, S.offset-1)
// read ahead until a newline, EOF, or non-comment token is found
- lineend := false
- for pos1 := pos; S.ch == '/' || S.ch == '*'; {
+ for S.ch == '/' || S.ch == '*' {
if S.ch == '/' {
//-style comment always contains a newline
- lineend = true
- break
+ return true
}
- S.scanComment(pos1)
- if pos1.Line < S.pos.Line {
- /*-style comment contained a newline */
- lineend = true
- break
+ /*-style comment: look for newline */
+ S.next()
+ for S.ch >= 0 {
+ ch := S.ch
+ if ch == '\n' {
+ return true
+ }
+ S.next()
+ if ch == '*' && S.ch == '/' {
+ S.next()
+ break
+ }
}
S.skipWhitespace() // S.insertSemi is set
if S.ch < 0 || S.ch == '\n' {
- // line end
- lineend = true
- break
+ return true
}
if S.ch != '/' {
// non-comment token
- break
+ return false
}
- pos1 = S.pos
S.next() // consume '/'
}
- // reset position to where it was upon calling findLineEnd
- S.pos = pos
- S.offset = pos.Offset + 1
- S.next() // consume initial '/' again
-
- return lineend
+ return false
}
@@ -247,11 +273,11 @@ func isDigit(ch int) bool {
func (S *Scanner) scanIdentifier() token.Token {
- pos := S.pos.Offset
+ offs := S.offset
for isLetter(S.ch) || isDigit(S.ch) {
S.next()
}
- return token.Lookup(S.src[pos:S.pos.Offset])
+ return token.Lookup(S.src[offs:S.offset])
}
@@ -275,7 +301,7 @@ func (S *Scanner) scanMantissa(base int) {
}
-func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.Token {
+func (S *Scanner) scanNumber(seenDecimalPoint bool) token.Token {
// digitVal(S.ch) < 10
tok := token.INT
@@ -287,6 +313,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
if S.ch == '0' {
// int or float
+ pos := token.Position{S.filename, S.offset, S.line, S.column}
S.next()
if S.ch == 'x' || S.ch == 'X' {
// hexadecimal int
@@ -306,7 +333,7 @@ func (S *Scanner) scanNumber(pos token.Position, seenDecimalPoint bool) token.To
}
// octal int
if seenDecimalDigit {
- S.error(pos, "illegal octal number")
+ S.errorAt(pos, "illegal octal number")
}
}
goto exit
@@ -343,7 +370,7 @@ exit:
func (S *Scanner) scanEscape(quote int) {
- pos := S.pos
+ pos := token.Position{S.filename, S.offset, S.line, S.column}
var i, base, max uint32
switch S.ch {
@@ -363,7 +390,7 @@ func (S *Scanner) scanEscape(quote int) {
i, base, max = 8, 16, unicode.MaxRune
default:
S.next() // always make progress
- S.error(pos, "unknown escape sequence")
+ S.errorAt(pos, "unknown escape sequence")
return
}
@@ -371,7 +398,7 @@ func (S *Scanner) scanEscape(quote int) {
for ; i > 0 && S.ch != quote && S.ch >= 0; i-- {
d := uint32(digitVal(S.ch))
if d >= base {
- S.error(S.pos, "illegal character in escape sequence")
+ S.error("illegal character in escape sequence")
break
}
x = x*base + d
@@ -382,13 +409,14 @@ func (S *Scanner) scanEscape(quote int) {
S.next()
}
if x > max || 0xd800 <= x && x < 0xe000 {
- S.error(pos, "escape sequence is invalid Unicode code point")
+ S.errorAt(pos, "escape sequence is invalid Unicode code point")
}
}
-func (S *Scanner) scanChar(pos token.Position) {
- // '\'' already consumed
+func (S *Scanner) scanChar() {
+ // '\'' opening already consumed
+ pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
n := 0
for S.ch != '\'' {
@@ -396,7 +424,7 @@ func (S *Scanner) scanChar(pos token.Position) {
n++
S.next()
if ch == '\n' || ch < 0 {
- S.error(pos, "character literal not terminated")
+ S.errorAt(pos, "character literal not terminated")
n = 1
break
}
@@ -408,19 +436,20 @@ func (S *Scanner) scanChar(pos token.Position) {
S.next()
if n != 1 {
- S.error(pos, "illegal character literal")
+ S.errorAt(pos, "illegal character literal")
}
}
-func (S *Scanner) scanString(pos token.Position) {
- // '"' already consumed
+func (S *Scanner) scanString() {
+ // '"' opening already consumed
+ pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
for S.ch != '"' {
ch := S.ch
S.next()
if ch == '\n' || ch < 0 {
- S.error(pos, "string not terminated")
+ S.errorAt(pos, "string not terminated")
break
}
if ch == '\\' {
@@ -432,14 +461,15 @@ func (S *Scanner) scanString(pos token.Position) {
}
-func (S *Scanner) scanRawString(pos token.Position) {
- // '`' already consumed
+func (S *Scanner) scanRawString() {
+ // '`' opening already consumed
+ pos := token.Position{S.filename, S.offset - 1, S.line, S.column - 1}
for S.ch != '`' {
ch := S.ch
S.next()
if ch < 0 {
- S.error(pos, "string not terminated")
+ S.errorAt(pos, "string not terminated")
break
}
}
@@ -524,7 +554,8 @@ scanAgain:
// current token start
insertSemi := false
- pos, tok = S.pos, token.ILLEGAL
+ pos, tok = token.Position{S.filename, S.offset, S.line, S.column}, token.ILLEGAL
+ offs := S.offset
// determine token value
switch ch := S.ch; {
@@ -536,7 +567,7 @@ scanAgain:
}
case digitVal(ch) < 10:
insertSemi = true
- tok = S.scanNumber(pos, false)
+ tok = S.scanNumber(false)
default:
S.next() // always make progress
switch ch {
@@ -555,21 +586,21 @@ scanAgain:
case '"':
insertSemi = true
tok = token.STRING
- S.scanString(pos)
+ S.scanString()
case '\'':
insertSemi = true
tok = token.CHAR
- S.scanChar(pos)
+ S.scanChar()
case '`':
insertSemi = true
tok = token.STRING
- S.scanRawString(pos)
+ S.scanRawString()
case ':':
tok = S.switch2(token.COLON, token.DEFINE)
case '.':
if digitVal(S.ch) < 10 {
insertSemi = true
- tok = S.scanNumber(pos, true)
+ tok = S.scanNumber(true)
} else if S.ch == '.' {
S.next()
if S.ch == '.' {
@@ -613,15 +644,19 @@ scanAgain:
case '/':
if S.ch == '/' || S.ch == '*' {
// comment
- if S.insertSemi && S.findLineEnd(pos) {
+ line := S.line
+ col := S.column - 1 // beginning of comment
+ if S.insertSemi && S.findLineEnd() {
// reset position to the beginning of the comment
- S.pos = pos
- S.offset = pos.Offset + 1
+ S.line = line
+ S.column = col
S.ch = '/'
+ S.offset = offs
+ S.rdOffset = offs + 1
S.insertSemi = false // newline consumed
return pos, token.SEMICOLON, newline
}
- S.scanComment(pos)
+ S.scanComment()
if S.mode&ScanComments == 0 {
// skip comment
S.insertSemi = false // newline consumed
@@ -659,7 +694,7 @@ scanAgain:
tok = S.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
default:
if S.mode&AllowIllegalChars == 0 {
- S.error(pos, "illegal character "+charString(ch))
+ S.errorAt(pos, "illegal character "+charString(ch))
}
insertSemi = S.insertSemi // preserve insertSemi info
}
@@ -668,7 +703,7 @@ scanAgain:
if S.mode&InsertSemis != 0 {
S.insertSemi = insertSemi
}
- return pos, tok, S.src[pos.Offset:S.pos.Offset]
+ return pos, tok, S.src[offs:S.offset]
}
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go
index 794b191e83..c40753fb03 100644
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -198,16 +198,16 @@ func newlineCount(s string) int {
func checkPos(t *testing.T, lit string, pos, expected token.Position) {
if pos.Filename != expected.Filename {
- t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename)
+ t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
}
if pos.Offset != expected.Offset {
- t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset)
+ t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
}
if pos.Line != expected.Line {
- t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line)
+ t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
}
if pos.Column != expected.Column {
- t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column)
+ t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
}
}
@@ -276,15 +276,15 @@ func checkSemi(t *testing.T, line string, mode uint) {
semiLit = ";"
}
// next token must be a semicolon
- offs := pos.Offset + 1
+ semiPos := pos
+ semiPos.Offset++
+ semiPos.Column++
pos, tok, lit = S.Scan()
if tok == token.SEMICOLON {
- if pos.Offset != offs {
- t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
- }
if string(lit) != semiLit {
t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
}
+ checkPos(t, line, pos, semiPos)
} else {
t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
}
@@ -399,11 +399,13 @@ var lines = []string{
"foo$/*\n*/",
"foo$/*comment*/ \n",
"foo$/*\n*/ ",
+
"foo $// comment\n",
"foo $/*comment*/\n",
"foo $/*\n*/",
-
+ "foo $/* */ /* \n */ bar$/**/\n",
"foo $/*0*/ /*1*/ /*2*/\n",
+
"foo $/*comment*/ \n",
"foo $/*0*/ /*1*/ /*2*/ \n",
"foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n",