diff options
author | Robert Griesemer <gri@golang.org> | 2019-01-12 20:33:58 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2019-02-11 23:22:50 +0000 |
commit | ceb849dd97aebf08eee5f3683619494c56190f81 (patch) | |
tree | 6616c63bbe055299c882fe78bc4514ce16ca615a /src/cmd/compile/internal/syntax/scanner.go | |
parent | 7bc2aa670f47266d3c5a840d748a1f2e805b89d7 (diff) | |
download | go-ceb849dd97aebf08eee5f3683619494c56190f81.tar.gz go-ceb849dd97aebf08eee5f3683619494c56190f81.zip |
cmd/compile: accept new Go2 number literals
This CL introduces compiler support for the new binary and octal integer
literals, hexadecimal floats, and digit separators for all number literals.
The new Go 2 number literal scanner accepts the following liberal format:
number = [ prefix ] digits [ "." digits ] [ exponent ] [ "i" ] .
prefix = "0" [ "b" |"B" | "o" | "O" | "x" | "X" ] .
digits = { digit | "_" } .
exponent = ( "e" | "E" | "p" | "P" ) [ "+" | "-" ] digits .
If the number starts with "0x" or "0X", digit is any hexadecimal digit;
otherwise, digit is any decimal digit. If the accepted number is not valid,
errors are reported accordingly.
See the new test cases in scanner_test.go for a selection of valid and
invalid numbers and the respective error messages.
R=Go1.13
Updates #12711.
Updates #19308.
Updates #28493.
Updates #29008.
Change-Id: Ic8febc7bd4dc5186b16a8c8897691e81125cf0ca
Reviewed-on: https://go-review.googlesource.com/c/157677
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/syntax/scanner.go')
-rw-r--r-- | src/cmd/compile/internal/syntax/scanner.go | 233 |
1 files changed, 167 insertions, 66 deletions
diff --git a/src/cmd/compile/internal/syntax/scanner.go b/src/cmd/compile/internal/syntax/scanner.go index 112afa5eb6..0a77d48b3d 100644 --- a/src/cmd/compile/internal/syntax/scanner.go +++ b/src/cmd/compile/internal/syntax/scanner.go @@ -47,6 +47,10 @@ func (s *scanner) init(src io.Reader, errh func(line, col uint, msg string), mod s.nlsemi = false } +func (s *scanner) errorf(format string, args ...interface{}) { + s.error(fmt.Sprintf(format, args...)) +} + // next advances the scanner by reading the next token. // // If a read, source encoding, or lexical error occurs, next calls @@ -149,8 +153,9 @@ redo: case '.': c = s.getr() - if isDigit(c) { - s.unread(1) + if isDecimal(c) { + s.ungetr() + s.unread(1) // correct position of '.' (needed by startLit in number) s.number('.') break } @@ -304,7 +309,7 @@ redo: default: s.tok = 0 - s.error(fmt.Sprintf("invalid character %#U", c)) + s.errorf("invalid character %#U", c) goto redo } @@ -320,11 +325,7 @@ assignop: } func isLetter(c rune) bool { - return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' -} - -func isDigit(c rune) bool { - return '0' <= c && c <= '9' + return 'a' <= lower(c) && lower(c) <= 'z' || c == '_' } func (s *scanner) ident() { @@ -332,7 +333,7 @@ func (s *scanner) ident() { // accelerate common case (7bit ASCII) c := s.getr() - for isLetter(c) || isDigit(c) { + for isLetter(c) || isDecimal(c) { c = s.getr() } @@ -372,10 +373,10 @@ func (s *scanner) isIdentRune(c rune, first bool) bool { // ok case unicode.IsDigit(c): if first { - s.error(fmt.Sprintf("identifier cannot begin with digit %#U", c)) + s.errorf("identifier cannot begin with digit %#U", c) } case c >= utf8.RuneSelf: - s.error(fmt.Sprintf("invalid identifier character %#U", c)) + s.errorf("invalid identifier character %#U", c) default: return false } @@ -401,86 +402,188 @@ func init() { } } +func lower(c rune) rune { return ('a' - 'A') | c } // returns lower-case c iff c is ASCII letter +func isDecimal(c rune) bool { return '0' <= c && c <= '9' } +func isHex(c rune) bool { return '0' <= c && c <= '9' || 'a' <= lower(c) && lower(c) <= 'f' } + +// digits accepts the sequence { digit | '_' } starting with c0. +// If base <= 10, digits accepts any decimal digit but records +// the index (relative to the literal start) of a digit >= base +// in *invalid, if *invalid < 0. +// digits returns the first rune that is not part of the sequence +// anymore, and a bitset describing whether the sequence contained +// digits (bit 0 is set), or separators '_' (bit 1 is set). +func (s *scanner) digits(c0 rune, base int, invalid *int) (c rune, digsep int) { + c = c0 + if base <= 10 { + max := rune('0' + base) + for isDecimal(c) || c == '_' { + ds := 1 + if c == '_' { + ds = 2 + } else if c >= max && *invalid < 0 { + *invalid = int(s.col0 - s.col) // record invalid rune index + } + digsep |= ds + c = s.getr() + } + } else { + for isHex(c) || c == '_' { + ds := 1 + if c == '_' { + ds = 2 + } + digsep |= ds + c = s.getr() + } + } + return +} + func (s *scanner) number(c rune) { s.startLit() + base := 10 // number base + prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b' + digsep := 0 // bit 0: digit present, bit 1: '_' present + invalid := -1 // index of invalid digit in literal, or < 0 + + // integer part + var ds int if c != '.' { - s.kind = IntLit // until proven otherwise + s.kind = IntLit if c == '0' { c = s.getr() - if c == 'x' || c == 'X' { - // hex + switch lower(c) { + case 'x': c = s.getr() - hasDigit := false - for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { - c = s.getr() - hasDigit = true - } - if !hasDigit { - s.error("malformed hex constant") - } - goto done - } - - // decimal 0, octal, or float - has8or9 := false - for isDigit(c) { - if c > '7' { - has8or9 = true - } + base, prefix = 16, 'x' + case 'o': c = s.getr() - } - if c != '.' && c != 'e' && c != 'E' && c != 'i' { - // octal - if has8or9 { - s.error("malformed octal constant") - } - goto done - } - - } else { - // decimal or float - for isDigit(c) { + base, prefix = 8, 'o' + case 'b': c = s.getr() + base, prefix = 2, 'b' + default: + base, prefix = 8, '0' + digsep = 1 // leading 0 } } + c, ds = s.digits(c, base, &invalid) + digsep |= ds } - // float + // fractional part if c == '.' { s.kind = FloatLit - c = s.getr() - for isDigit(c) { - c = s.getr() + if prefix == 'o' || prefix == 'b' { + s.error("invalid radix point in " + litname(prefix)) } + c, ds = s.digits(s.getr(), base, &invalid) + digsep |= ds + } + + if digsep&1 == 0 { + s.error(litname(prefix) + " has no digits") } // exponent - if c == 'e' || c == 'E' { - s.kind = FloatLit + if e := lower(c); e == 'e' || e == 'p' { + switch { + case e == 'e' && prefix != 0 && prefix != '0': + s.errorf("%q exponent requires decimal mantissa", c) + case e == 'p' && prefix != 'x': + s.errorf("%q exponent requires hexadecimal mantissa", c) + } c = s.getr() - if c == '-' || c == '+' { + s.kind = FloatLit + if c == '+' || c == '-' { c = s.getr() } - if !isDigit(c) { - s.error("malformed floating-point constant exponent") - } - for isDigit(c) { - c = s.getr() + c, ds = s.digits(c, 10, nil) + digsep |= ds + if ds&1 == 0 { + s.error("exponent has no digits") } + } else if prefix == 'x' && s.kind == FloatLit { + s.error("hexadecimal mantissa requires a 'p' exponent") } - // complex + // suffix 'i' if c == 'i' { s.kind = ImagLit - s.getr() + if prefix != 0 && prefix != '0' { + s.error("invalid suffix 'i' on " + litname(prefix)) + } + c = s.getr() } - -done: s.ungetr() + s.nlsemi = true s.lit = string(s.stopLit()) s.tok = _Literal + + if s.kind == IntLit && invalid >= 0 { + s.errh(s.line, s.col+uint(invalid), fmt.Sprintf("invalid digit %q in %s", s.lit[invalid], litname(prefix))) + } + + if digsep&2 != 0 { + if i := invalidSep(s.lit); i >= 0 { + s.errh(s.line, s.col+uint(i), "'_' must separate successive digits") + } + } +} + +func litname(prefix rune) string { + switch prefix { + case 'x': + return "hexadecimal literal" + case 'o', '0': + return "octal literal" + case 'b': + return "binary literal" + } + return "decimal literal" +} + +// invalidSep returns the index of the first invalid separator in x, or -1. +func invalidSep(x string) int { + x1 := ' ' // prefix char, we only care if it's 'x' + d := '.' // digit, one of '_', '0' (a digit), or '.' (anything else) + i := 0 + + // a prefix counts as a digit + if len(x) >= 2 && x[0] == '0' { + x1 = lower(rune(x[1])) + if x1 == 'x' || x1 == 'o' || x1 == 'b' { + d = '0' + i = 2 + } + } + + // mantissa and exponent + for ; i < len(x); i++ { + p := d // previous digit + d = rune(x[i]) + switch { + case d == '_': + if p != '0' { + return i + } + case isDecimal(d) || x1 == 'x' && isHex(d): + d = '0' + default: + if p == '_' { + return i - 1 + } + d = '.' + } + } + if d == '_' { + return len(x) - 1 + } + + return -1 } func (s *scanner) rune() { @@ -713,12 +816,10 @@ func (s *scanner) escape(quote rune) bool { for i := n; i > 0; i-- { d := base switch { - case isDigit(c): + case isDecimal(c): d = uint32(c) - '0' - case 'a' <= c && c <= 'f': - d = uint32(c) - ('a' - 10) - case 'A' <= c && c <= 'F': - d = uint32(c) - ('A' - 10) + case 'a' <= lower(c) && lower(c) <= 'f': + d = uint32(lower(c)) - ('a' - 10) } if d >= base { if c < 0 { @@ -728,7 +829,7 @@ func (s *scanner) escape(quote rune) bool { if base == 8 { kind = "octal" } - s.error(fmt.Sprintf("non-%s character in escape sequence: %c", kind, c)) + s.errorf("non-%s character in escape sequence: %c", kind, c) s.ungetr() return false } @@ -739,7 +840,7 @@ func (s *scanner) escape(quote rune) bool { s.ungetr() if x > max && base == 8 { - s.error(fmt.Sprintf("octal escape value > 255: %d", x)) + s.errorf("octal escape value > 255: %d", x) return false } |