aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Griesemer <gri@golang.org>2019-02-20 10:44:52 -0800
committerRobert Griesemer <gri@golang.org>2019-02-20 20:23:28 +0000
commit34fb5855eb73fe04ee0bcfc0d9ca8be5440a560b (patch)
treef453b91c88ae53aa14b6759a9bdd9cd1353721cc
parent153c0da89bca6726545cf4451053235b552d3d51 (diff)
downloadgo-34fb5855eb73fe04ee0bcfc0d9ca8be5440a560b.tar.gz
go-34fb5855eb73fe04ee0bcfc0d9ca8be5440a560b.zip
text/scanner: don't liberally consume (invalid) floats or underbars
This is a follow-up on https://golang.org/cl/161199 which introduced the new Go 2 number literals to text/scanner. That change introduced a bug by allowing decimal and hexadecimal floats to be consumed even if the scanner was not configured to accept floats. This CL changes the code to not consume a radix dot '.' or exponent unless the scanner is configured to accept floats. This CL also introduces a new mode "AllowNumberbars" which controls whether underbars '_' are permitted as digit separators in numbers or not. There is a possibility that we may need to refine text/scanner further (e.g., the Float mode now includes hexadecimal floats which it didn't recognize before). We're very early in the cycle, so let's see how it goes. RELNOTE=yes Updates #12711. Updates #19308. Updates #28493. Updates #29008. Fixes #30320. Change-Id: I6481d314f0384e09ef6803ffad38dc529b1e89a3 Reviewed-on: https://go-review.googlesource.com/c/163079 Reviewed-by: Ian Lance Taylor <iant@golang.org>
-rw-r--r--api/except.txt1
-rw-r--r--api/next.txt3
-rw-r--r--src/text/scanner/scanner.go48
-rw-r--r--src/text/scanner/scanner_test.go37
4 files changed, 68 insertions, 21 deletions
diff --git a/api/except.txt b/api/except.txt
index 637be18135..a608d5783e 100644
--- a/api/except.txt
+++ b/api/except.txt
@@ -457,3 +457,4 @@ pkg syscall (freebsd-arm-cgo), type Stat_t struct, Nlink uint16
pkg syscall (freebsd-arm-cgo), type Stat_t struct, Rdev uint32
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntfromname [88]int8
pkg syscall (freebsd-arm-cgo), type Statfs_t struct, Mntonname [88]int8
+pkg text/scanner, const GoTokens = 1012 \ No newline at end of file
diff --git a/api/next.txt b/api/next.txt
index e69de29bb2..aaea62d70b 100644
--- a/api/next.txt
+++ b/api/next.txt
@@ -0,0 +1,3 @@
+pkg text/scanner, const AllowNumberbars = 1024
+pkg text/scanner, const AllowNumberbars ideal-int
+pkg text/scanner, const GoTokens = 2036
diff --git a/src/text/scanner/scanner.go b/src/text/scanner/scanner.go
index 38c27f6a08..8db0ed28d3 100644
--- a/src/text/scanner/scanner.go
+++ b/src/text/scanner/scanner.go
@@ -59,15 +59,16 @@ func (pos Position) String() string {
// "foo" is scanned as the token sequence '"' Ident '"'.
//
const (
- ScanIdents = 1 << -Ident
- ScanInts = 1 << -Int
- ScanFloats = 1 << -Float // includes Ints
- ScanChars = 1 << -Char
- ScanStrings = 1 << -String
- ScanRawStrings = 1 << -RawString
- ScanComments = 1 << -Comment
- SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
- GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments
+ ScanIdents = 1 << -Ident
+ ScanInts = 1 << -Int
+ ScanFloats = 1 << -Float // includes Ints and hexadecimal floats
+ ScanChars = 1 << -Char
+ ScanStrings = 1 << -String
+ ScanRawStrings = 1 << -RawString
+ ScanComments = 1 << -Comment
+ SkipComments = 1 << -skipComment // if set with ScanComments, comments become white space
+ AllowNumberbars = 1 << -allowNumberbars // if set, number literals may contain underbars as digit separators
+ GoTokens = ScanIdents | ScanFloats | ScanChars | ScanStrings | ScanRawStrings | ScanComments | SkipComments | AllowNumberbars
)
// The result of Scan is one of these tokens or a Unicode character.
@@ -80,7 +81,10 @@ const (
String
RawString
Comment
+
+ // internal use only
skipComment
+ allowNumberbars
)
var tokenString = map[rune]string{
@@ -359,7 +363,8 @@ func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case c
func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
func isHex(ch rune) bool { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
-// digits accepts the sequence { digit | '_' } starting with ch0.
+// digits accepts the sequence { digit } (if AllowNumberbars is not set)
+// or { digit | '_' } (if AllowNumberbars is set), starting with ch0.
// If base <= 10, digits accepts any decimal digit but records
// the first invalid digit >= base in *invalid if *invalid == 0.
// digits returns the first rune that is not part of the sequence
@@ -369,7 +374,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = ch0
if base <= 10 {
max := rune('0' + base)
- for isDecimal(ch) || ch == '_' {
+ for isDecimal(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1
if ch == '_' {
ds = 2
@@ -380,7 +385,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
ch = s.next()
}
} else {
- for isHex(ch) || ch == '_' {
+ for isHex(ch) || ch == '_' && s.Mode&AllowNumberbars != 0 {
ds := 1
if ch == '_' {
ds = 2
@@ -392,7 +397,7 @@ func (s *Scanner) digits(ch0 rune, base int, invalid *rune) (ch rune, digsep int
return
}
-func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
+func (s *Scanner) scanNumber(ch rune, seenDot bool) (rune, rune) {
base := 10 // number base
prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
digsep := 0 // bit 0: digit present, bit 1: '_' present
@@ -401,7 +406,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
// integer part
var tok rune
var ds int
- if integerPart {
+ if !seenDot {
tok = Int
if ch == '0' {
ch = s.next()
@@ -422,17 +427,18 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
}
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
+ if ch == '.' && s.Mode&ScanFloats != 0 {
+ ch = s.next()
+ seenDot = true
+ }
}
// fractional part
- if !integerPart || ch == '.' {
+ if seenDot {
tok = Float
if prefix == 'o' || prefix == 'b' {
s.error("invalid radix point in " + litname(prefix))
}
- if ch == '.' {
- ch = s.next()
- }
ch, ds = s.digits(ch, base, &invalid)
digsep |= ds
}
@@ -442,7 +448,7 @@ func (s *Scanner) scanNumber(ch rune, integerPart bool) (rune, rune) {
}
// exponent
- if e := lower(ch); e == 'e' || e == 'p' {
+ if e := lower(ch); (e == 'e' || e == 'p') && s.Mode&ScanFloats != 0 {
switch {
case e == 'e' && prefix != 0 && prefix != '0':
s.errorf("%q exponent requires decimal mantissa", ch)
@@ -682,7 +688,7 @@ redo:
}
case isDecimal(ch):
if s.Mode&(ScanInts|ScanFloats) != 0 {
- tok, ch = s.scanNumber(ch, true)
+ tok, ch = s.scanNumber(ch, false)
} else {
ch = s.next()
}
@@ -705,7 +711,7 @@ redo:
case '.':
ch = s.next()
if isDecimal(ch) && s.Mode&ScanFloats != 0 {
- tok, ch = s.scanNumber(ch, false)
+ tok, ch = s.scanNumber(ch, true)
}
case '/':
ch = s.next()
diff --git a/src/text/scanner/scanner_test.go b/src/text/scanner/scanner_test.go
index 58db8e1971..6ae8fd9a08 100644
--- a/src/text/scanner/scanner_test.go
+++ b/src/text/scanner/scanner_test.go
@@ -877,3 +877,40 @@ func TestNumbers(t *testing.T) {
}
}
}
+
+func TestIssue30320(t *testing.T) {
+ for _, test := range []struct {
+ in, want string
+ mode uint
+ }{
+ {"foo01.bar31.xx-0-1-1-0", "01 31 0 1 1 0", ScanInts},
+ {"foo0/12/0/5.67", "0 12 0 5 67", ScanInts},
+ {"xxx1e0yyy", "1 0", ScanInts},
+ {"1_2", "1 2", ScanInts}, // don't consume _ as part of a number if not explicitly enabled
+ {"1_2", "1_2", ScanInts | AllowNumberbars},
+ {"xxx1.0yyy2e3ee", "1 0 2 3", ScanInts},
+ {"xxx1.0yyy2e3ee", "1.0 2e3", ScanFloats},
+ } {
+ got := extractInts(test.in, test.mode)
+ if got != test.want {
+ t.Errorf("%q: got %q; want %q", test.in, got, test.want)
+ }
+ }
+}
+
+func extractInts(t string, mode uint) (res string) {
+ var s Scanner
+ s.Init(strings.NewReader(t))
+ s.Mode = mode
+ for {
+ switch tok := s.Scan(); tok {
+ case Int, Float:
+ if len(res) > 0 {
+ res += " "
+ }
+ res += s.TokenText()
+ case EOF:
+ return
+ }
+ }
+}