diff options
author | Paul Wankadia <junyer@google.com> | 2016-01-07 18:55:18 +1100 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2016-01-08 16:41:46 +0000 |
commit | 5ccaf0255b75063a9c685009e77cee24e26a509e (patch) | |
tree | cac4bb9ee96af770689d6238dad378faab38a349 /src/regexp/syntax/parse.go | |
parent | 8ae584f21e32ab1a07d9d5d4d39cb5f7530c49df (diff) | |
download | go-5ccaf0255b75063a9c685009e77cee24e26a509e.tar.gz go-5ccaf0255b75063a9c685009e77cee24e26a509e.zip |
regexp/syntax: fix factoring of common prefixes in alternations
In the past, `a.*?c|a.*?b` was factored to `a.*?[bc]`. Thus, given
"abc" as its input string, the automaton would consume "ab" and
then stop (when unanchored) whereas it should consume all of "abc"
as per leftmost semantics.
Fixes #13812.
Change-Id: I67ac0a353d7793b3d0c9c4aaf22d157621dfe784
Reviewed-on: https://go-review.googlesource.com/18357
Reviewed-by: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src/regexp/syntax/parse.go')
-rw-r--r-- | src/regexp/syntax/parse.go | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go index c2b92c1d44..f38bbf66e3 100644 --- a/src/regexp/syntax/parse.go +++ b/src/regexp/syntax/parse.go @@ -470,9 +470,14 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { } sub = out - // Round 2: Factor out common complex prefixes, - // just the first piece of each concatenation, - // whatever it is. This is good enough a lot of the time. + // Round 2: Factor out common simple prefixes, + // just the first piece of each concatenation. + // This will be good enough a lot of the time. + // + // Complex subexpressions (e.g. involving quantifiers) + // are not safe to factor because that collapses their + // distinct paths through the automaton, which affects + // correctness in some cases. start = 0 out = sub[:0] var first *Regexp @@ -485,7 +490,9 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { var ifirst *Regexp if i < len(sub) { ifirst = p.leadingRegexp(sub[i]) - if first != nil && first.Equal(ifirst) { + if first != nil && first.Equal(ifirst) && + // first must be a character class OR a fixed repeat of a character class. + (isCharClass(first) || (first.Op == OpRepeat && first.Min == first.Max && isCharClass(first.Sub[0]))) { continue } } |