aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/syntax/scanner.go
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2019-01-12 20:33:58 -0800
committerRobert Griesemer <gri@golang.org>2019-02-11 23:22:50 +0000
commitceb849dd97aebf08eee5f3683619494c56190f81 (patch)
tree6616c63bbe055299c882fe78bc4514ce16ca615a /src/cmd/compile/internal/syntax/scanner.go
parent7bc2aa670f47266d3c5a840d748a1f2e805b89d7 (diff)
downloadgo-ceb849dd97aebf08eee5f3683619494c56190f81.tar.gz
go-ceb849dd97aebf08eee5f3683619494c56190f81.zip
cmd/compile: accept new Go2 number literals
This CL introduces compiler support for the new binary and octal integer literals, hexadecimal floats, and digit separators for all number literals. The new Go 2 number literal scanner accepts the following liberal format: number = [ prefix ] digits [ "." digits ] [ exponent ] [ "i" ] . prefix = "0" [ "b" |"B" | "o" | "O" | "x" | "X" ] . digits = { digit | "_" } . exponent = ( "e" | "E" | "p" | "P" ) [ "+" | "-" ] digits . If the number starts with "0x" or "0X", digit is any hexadecimal digit; otherwise, digit is any decimal digit. If the accepted number is not valid, errors are reported accordingly. See the new test cases in scanner_test.go for a selection of valid and invalid numbers and the respective error messages. R=Go1.13 Updates #12711. Updates #19308. Updates #28493. Updates #29008. Change-Id: Ic8febc7bd4dc5186b16a8c8897691e81125cf0ca Reviewed-on: https://go-review.googlesource.com/c/157677 Reviewed-by: Ian Lance Taylor <iant@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/syntax/scanner.go')
-rw-r--r--src/cmd/compile/internal/syntax/scanner.go233
1 files changed, 167 insertions, 66 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go
index 112afa5eb6..0a77d48b3d 100644
--- a/src/cmd/compile/internal/syntax/scanner.go
+++ b/src/cmd/compile/internal/syntax/scanner.go
@@ -47,6 +47,10 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod
s.nlsemi = false
}
+func (s *scanner) errorf(format string, args ...interface{}) {
+ s.error(fmt.Sprintf(format, args...))
+}
+
// next advances the scanner by reading the next token.
//
// If a read, source encoding, or lexical error occurs, next calls
@@ -149,8 +153,9 @@ redo:
case '.':
c = s.getr()
- if isDigit(c) {
- s.unread(1)
+ if isDecimal(c) {
+ s.ungetr()
+ s.unread(1) // correct position of '.' (needed by startLit in number)
s.number('.')
break
}
@@ -304,7 +309,7 @@ redo:
default:
s.tok = 0
- s.error(fmt.Sprintf("invalid character %#U", c))
+ s.errorf("invalid character %#U", c)
goto redo
}
@@ -320,11 +325,7 @@ assignop:
}
func isLetter(c rune) bool {
- return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_'
-}
-
-func isDigit(c rune) bool {
- return '0' <= c && c <= '9'
+ return 'a' <= lower(c) && lower(c) <= 'z' || c == '_'
}
func (s *scanner) ident() {
@@ -332,7 +333,7 @@ func (s *scanner) ident() {
// accelerate common case (7bit ASCII)
c := s.getr()
- for isLetter(c) || isDigit(c) {
+ for isLetter(c) || isDecimal(c) {
c = s.getr()
}
@@ -372,10 +373,10 @@ func (s *scanner) isIdentRune(c rune, first bool) bool {
// ok
case unicode.IsDigit(c):
if first {
- s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c))
+ s.errorf("identifier cannot begin with digit %#U", c)
}
case c >= utf8.RuneSelf:
- s.error(fmt.Sprintf("invalid identifier character %#U", c))
+ s.errorf("invalid identifier character %#U", c)
default:
return false
}
@@ -401,86 +402,188 @@ func init() {
}
}
+func lower(c rune) rune { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter
+func isDecimal(c rune) bool { return '0' <= c && c <= '9' }
+func isHex(c rune) bool { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' }
+
+// digits accepts the sequence { digit | '_' } starting with c0.
+// If base <= 10, digits accepts any decimal digit but records
+// the index (relative to the literal start) of a digit >= base
+// in *invalid, if *invalid < 0.
+// digits returns the first rune that is not part of the sequence
+// anymore, and a bitset describing whether the sequence contained
+// digits (bit 0 is set), or separators '_' (bit 1 is set).
+func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) {
+ c = c0
+ if base <= 10 {
+ max := rune('0' + base)
+ for isDecimal(c) || c == '_' {
+ ds := 1
+ if c == '_' {
+ ds = 2
+ } else if c >= max && *invalid < 0 {
+ *invalid = int(s.col0 - s.col) // record invalid rune index
+ }
+ digsep |= ds
+ c = s.getr()
+ }
+ } else {
+ for isHex(c) || c == '_' {
+ ds := 1
+ if c == '_' {
+ ds = 2
+ }
+ digsep |= ds
+ c = s.getr()
+ }
+ }
+ return
+}
+
func (s *scanner) number(c rune) {
s.startLit()
+ base := 10 // number base
+ prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
+ digsep := 0 // bit 0: digit present, bit 1: '_' present
+ invalid := -1 // index of invalid digit in literal, or < 0
+
+ // integer part
+ var ds int
if c != '.' {
- s.kind = IntLit // until proven otherwise
+ s.kind = IntLit
if c == '0' {
c = s.getr()
- if c == 'x' || c == 'X' {
- // hex
+ switch lower(c) {
+ case 'x':
c = s.getr()
- hasDigit := false
- for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' {
- c = s.getr()
- hasDigit = true
- }
- if !hasDigit {
- s.error("malformed hex constant")
- }
- goto done
- }
-
- // decimal 0, octal, or float
- has8or9 := false
- for isDigit(c) {
- if c > '7' {
- has8or9 = true
- }
+ base, prefix = 16, 'x'
+ case 'o':
c = s.getr()
- }
- if c != '.' && c != 'e' && c != 'E' && c != 'i' {
- // octal
- if has8or9 {
- s.error("malformed octal constant")
- }
- goto done
- }
-
- } else {
- // decimal or float
- for isDigit(c) {
+ base, prefix = 8, 'o'
+ case 'b':
c = s.getr()
+ base, prefix = 2, 'b'
+ default:
+ base, prefix = 8, '0'
+ digsep = 1 // leading 0
}
}
+ c, ds = s.digits(c, base, &invalid)
+ digsep |= ds
}
- // float
+ // fractional part
if c == '.' {
s.kind = FloatLit
- c = s.getr()
- for isDigit(c) {
- c = s.getr()
+ if prefix == 'o' || prefix == 'b' {
+ s.error("invalid radix point in " + litname(prefix))
}
+ c, ds = s.digits(s.getr(), base, &invalid)
+ digsep |= ds
+ }
+
+ if digsep&1 == 0 {
+ s.error(litname(prefix) + " has no digits")
}
// exponent
- if c == 'e' || c == 'E' {
- s.kind = FloatLit
+ if e := lower(c); e == 'e' || e == 'p' {
+ switch {
+ case e == 'e' && prefix != 0 && prefix != '0':
+ s.errorf("%q exponent requires decimal mantissa", c)
+ case e == 'p' && prefix != 'x':
+ s.errorf("%q exponent requires hexadecimal mantissa", c)
+ }
c = s.getr()
- if c == '-' || c == '+' {
+ s.kind = FloatLit
+ if c == '+' || c == '-' {
c = s.getr()
}
- if !isDigit(c) {
- s.error("malformed floating-point constant exponent")
- }
- for isDigit(c) {
- c = s.getr()
+ c, ds = s.digits(c, 10, nil)
+ digsep |= ds
+ if ds&1 == 0 {
+ s.error("exponent has no digits")
}
+ } else if prefix == 'x' && s.kind == FloatLit {
+ s.error("hexadecimal mantissa requires a 'p' exponent")
}
- // complex
+ // suffix 'i'
if c == 'i' {
s.kind = ImagLit
- s.getr()
+ if prefix != 0 && prefix != '0' {
+ s.error("invalid suffix 'i' on " + litname(prefix))
+ }
+ c = s.getr()
}
-
-done:
s.ungetr()
+
s.nlsemi = true
s.lit = string(s.stopLit())
s.tok = _Literal
+
+ if s.kind == IntLit && invalid >= 0 {
+ s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix)))
+ }
+
+ if digsep&2 != 0 {
+ if i := invalidSep(s.lit); i >= 0 {
+ s.errh(s.line, s.col+uint(i), "'_' must separate successive digits")
+ }
+ }
+}
+
+func litname(prefix rune) string {
+ switch prefix {
+ case 'x':
+ return "hexadecimal literal"
+ case 'o', '0':
+ return "octal literal"
+ case 'b':
+ return "binary literal"
+ }
+ return "decimal literal"
+}
+
+// invalidSep returns the index of the first invalid separator in x, or -1.
+func invalidSep(x string) int {
+ x1 := ' ' // prefix char, we only care if it's 'x'
+ d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else)
+ i := 0
+
+ // a prefix counts as a digit
+ if len(x) >= 2 && x[0] == '0' {
+ x1 = lower(rune(x[1]))
+ if x1 == 'x' || x1 == 'o' || x1 == 'b' {
+ d = '0'
+ i = 2
+ }
+ }
+
+ // mantissa and exponent
+ for ; i < len(x); i++ {
+ p := d // previous digit
+ d = rune(x[i])
+ switch {
+ case d == '_':
+ if p != '0' {
+ return i
+ }
+ case isDecimal(d) || x1 == 'x' && isHex(d):
+ d = '0'
+ default:
+ if p == '_' {
+ return i - 1
+ }
+ d = '.'
+ }
+ }
+ if d == '_' {
+ return len(x) - 1
+ }
+
+ return -1
}
func (s *scanner) rune() {
@@ -713,12 +816,10 @@ func (s *scanner) escape(quote rune) bool {
for i := n; i > 0; i-- {
d := base
switch {
- case isDigit(c):
+ case isDecimal(c):
d = uint32(c) - '0'
- case 'a' <= c && c <= 'f':
- d = uint32(c) - ('a' - 10)
- case 'A' <= c && c <= 'F':
- d = uint32(c) - ('A' - 10)
+ case 'a' <= lower(c) && lower(c) <= 'f':
+ d = uint32(lower(c)) - ('a' - 10)
}
if d >= base {
if c < 0 {
@@ -728,7 +829,7 @@ func (s *scanner) escape(quote rune) bool {
if base == 8 {
kind = "octal"
}
- s.error(fmt.Sprintf("non-%s character in escape sequence: %c", kind, c))
+ s.errorf("non-%s character in escape sequence: %c", kind, c)
s.ungetr()
return false
}
@@ -739,7 +840,7 @@ func (s *scanner) escape(quote rune) bool {
s.ungetr()
if x > max && base == 8 {
- s.error(fmt.Sprintf("octal escape value > 255: %d", x))
+ s.errorf("octal escape value > 255: %d", x)
return false
}