aboutsummaryrefslogtreecommitdiff
path: root/src/regexp/syntax/parse.go
diff options
context:
space:
mode:
authorPaul Wankadia <junyer@google.com>2016-01-07 18:55:18 +1100
committerRuss Cox <rsc@golang.org>2016-01-08 16:41:46 +0000
commit5ccaf0255b75063a9c685009e77cee24e26a509e (patch)
treecac4bb9ee96af770689d6238dad378faab38a349 /src/regexp/syntax/parse.go
parent8ae584f21e32ab1a07d9d5d4d39cb5f7530c49df (diff)
downloadgo-5ccaf0255b75063a9c685009e77cee24e26a509e.tar.gz
go-5ccaf0255b75063a9c685009e77cee24e26a509e.zip
regexp/syntax: fix factoring of common prefixes in alternations
In the past, `a.*?c|a.*?b` was factored to `a.*?[bc]`. Thus, given "abc" as its input string, the automaton would consume "ab" and then stop (when unanchored) whereas it should consume all of "abc" as per leftmost semantics. Fixes #13812. Change-Id: I67ac0a353d7793b3d0c9c4aaf22d157621dfe784 Reviewed-on: https://go-review.googlesource.com/18357 Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/regexp/syntax/parse.go')
-rw-r--r--src/regexp/syntax/parse.go15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
index c2b92c1d44..f38bbf66e3 100644
--- a/src/regexp/syntax/parse.go
+++ b/src/regexp/syntax/parse.go
@@ -470,9 +470,14 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
}
sub = out
- // Round 2: Factor out common complex prefixes,
- // just the first piece of each concatenation,
- // whatever it is. This is good enough a lot of the time.
+ // Round 2: Factor out common simple prefixes,
+ // just the first piece of each concatenation.
+ // This will be good enough a lot of the time.
+ //
+ // Complex subexpressions (e.g. involving quantifiers)
+ // are not safe to factor because that collapses their
+ // distinct paths through the automaton, which affects
+ // correctness in some cases.
start = 0
out = sub[:0]
var first *Regexp
@@ -485,7 +490,9 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
var ifirst *Regexp
if i < len(sub) {
ifirst = p.leadingRegexp(sub[i])
- if first != nil && first.Equal(ifirst) {
+ if first != nil && first.Equal(ifirst) &&
+ // first must be a character class OR a fixed repeat of a character class.
+ (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) {
continue
}
}