diff options
author | Andy Balholm <andy@balholm.com> | 2020-06-15 15:54:49 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2020-06-18 00:40:11 +0000 |
commit | 0ce1ffc49d3935a41e1fff2aec224c254a9fad1d (patch) | |
tree | 2abff935d21f788f5483a1e4e2bd2c5114bbf8a5 /src/regexp | |
parent | 8b98498a5833111402a2fe8f13a6605e071994b6 (diff) | |
download | go-0ce1ffc49d3935a41e1fff2aec224c254a9fad1d.tar.gz go-0ce1ffc49d3935a41e1fff2aec224c254a9fad1d.zip |
regexp/syntax: append patchLists in constant time
By keeping a tail pointer, we can append to a patchList in constant
time, rather than in time proportional to the length of the list. This
gets rid of the quadratic compile times we were seeing for long series
of alternations.
This is basically the same change as
https://github.com/google/re2/commit/e9d517989f66f2e0a24cde42f4d2424dd3e4a9b9.
Fixes #39542.
Change-Id: Ib4ca0ca9c55abd1594df1984653c7d311ccf7572
Reviewed-on: https://go-review.googlesource.com/c/go/+/238079
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/regexp')
-rw-r--r-- | src/regexp/syntax/compile.go | 68 |
1 files changed, 29 insertions, 39 deletions
diff --git a/src/regexp/syntax/compile.go b/src/regexp/syntax/compile.go index 1d8ab87a6d..7524d628fe 100644 --- a/src/regexp/syntax/compile.go +++ b/src/regexp/syntax/compile.go @@ -12,57 +12,47 @@ import "unicode" // See https://swtch.com/~rsc/regexp/regexp1.html for inspiration. // // These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). -// l == 0 denotes the empty list, okay because we start every program +// this way without using package unsafe. A value l.head denotes +// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1). +// head == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. -type patchList uint32 +type patchList struct { + head, tail uint32 +} -func (l patchList) next(p *Prog) patchList { - i := &p.Inst[l>>1] - if l&1 == 0 { - return patchList(i.Out) - } - return patchList(i.Arg) +func makePatchList(n uint32) patchList { + return patchList{n, n} } func (l patchList) patch(p *Prog, val uint32) { - for l != 0 { - i := &p.Inst[l>>1] - if l&1 == 0 { - l = patchList(i.Out) + head := l.head + for head != 0 { + i := &p.Inst[head>>1] + if head&1 == 0 { + head = i.Out i.Out = val } else { - l = patchList(i.Arg) + head = i.Arg i.Arg = val } } } func (l1 patchList) append(p *Prog, l2 patchList) patchList { - if l1 == 0 { + if l1.head == 0 { return l2 } - if l2 == 0 { + if l2.head == 0 { return l1 } - last := l1 - for { - next := last.next(p) - if next == 0 { - break - } - last = next - } - - i := &p.Inst[last>>1] - if last&1 == 0 { - i.Out = uint32(l2) + i := &p.Inst[l1.tail>>1] + if l1.tail&1 == 0 { + i.Out = l2.head } else { - i.Arg = uint32(l2) + i.Arg = l2.head } - return l1 + return patchList{l1.head, l2.tail} } // A frag represents a compiled program fragment. @@ -176,7 +166,7 @@ func (c *compiler) inst(op InstOp) frag { func (c *compiler) nop() frag { f := c.inst(InstNop) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -186,7 +176,7 @@ func (c *compiler) fail() frag { func (c *compiler) cap(arg uint32) frag { f := c.inst(InstCapture) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) c.p.Inst[f.i].Arg = arg if c.p.NumCap < int(arg)+1 { @@ -229,10 +219,10 @@ func (c *compiler) quest(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f.out = f.out.append(c.p, f1.out) return f @@ -243,10 +233,10 @@ func (c *compiler) star(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f1.out.patch(c.p, f.i) return f @@ -259,7 +249,7 @@ func (c *compiler) plus(f1 frag, nongreedy bool) frag { func (c *compiler) empty(op EmptyOp) frag { f := c.inst(InstEmptyWidth) c.p.Inst[f.i].Arg = uint32(op) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -273,7 +263,7 @@ func (c *compiler) rune(r []rune, flags Flags) frag { flags &^= FoldCase } i.Arg = uint32(flags) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) // Special cases for exec machine. switch { |