aboutsummaryrefslogtreecommitdiff
path: root/src/regexp
diff options
context:
space:
mode:
authorAndy Balholm <andy@balholm.com>2020-06-15 15:54:49 -0700
committerIan Lance Taylor <iant@golang.org>2020-06-18 00:40:11 +0000
commit0ce1ffc49d3935a41e1fff2aec224c254a9fad1d (patch)
tree2abff935d21f788f5483a1e4e2bd2c5114bbf8a5 /src/regexp
parent8b98498a5833111402a2fe8f13a6605e071994b6 (diff)
downloadgo-0ce1ffc49d3935a41e1fff2aec224c254a9fad1d.tar.gz
go-0ce1ffc49d3935a41e1fff2aec224c254a9fad1d.zip
regexp/syntax: append patchLists in constant time
By keeping a tail pointer, we can append to a patchList in constant time, rather than in time proportional to the length of the list. This gets rid of the quadratic compile times we were seeing for long series of alternations. This is basically the same change as https://github.com/google/re2/commit/e9d517989f66f2e0a24cde42f4d2424dd3e4a9b9. Fixes #39542. Change-Id: Ib4ca0ca9c55abd1594df1984653c7d311ccf7572 Reviewed-on: https://go-review.googlesource.com/c/go/+/238079 Run-TryBot: Ian Lance Taylor <iant@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/regexp')
-rw-r--r--src/regexp/syntax/compile.go68
1 files changed, 29 insertions, 39 deletions
diff --git a/src/regexp/syntax/compile.go b/src/regexp/syntax/compile.go
index 1d8ab87a6d..7524d628fe 100644
--- a/src/regexp/syntax/compile.go
+++ b/src/regexp/syntax/compile.go
@@ -12,57 +12,47 @@ import "unicode"
// See https://swtch.com/~rsc/regexp/regexp1.html for inspiration.
//
// These aren't really pointers: they're integers, so we can reinterpret them
-// this way without using package unsafe. A value l denotes
-// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
-// l == 0 denotes the empty list, okay because we start every program
+// this way without using package unsafe. A value l.head denotes
+// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1).
+// head == 0 denotes the empty list, okay because we start every program
// with a fail instruction, so we'll never want to point at its output link.
-type patchList uint32
+type patchList struct {
+ head, tail uint32
+}
-func (l patchList) next(p *Prog) patchList {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- return patchList(i.Out)
- }
- return patchList(i.Arg)
+func makePatchList(n uint32) patchList {
+ return patchList{n, n}
}
func (l patchList) patch(p *Prog, val uint32) {
- for l != 0 {
- i := &p.Inst[l>>1]
- if l&1 == 0 {
- l = patchList(i.Out)
+ head := l.head
+ for head != 0 {
+ i := &p.Inst[head>>1]
+ if head&1 == 0 {
+ head = i.Out
i.Out = val
} else {
- l = patchList(i.Arg)
+ head = i.Arg
i.Arg = val
}
}
}
func (l1 patchList) append(p *Prog, l2 patchList) patchList {
- if l1 == 0 {
+ if l1.head == 0 {
return l2
}
- if l2 == 0 {
+ if l2.head == 0 {
return l1
}
- last := l1
- for {
- next := last.next(p)
- if next == 0 {
- break
- }
- last = next
- }
-
- i := &p.Inst[last>>1]
- if last&1 == 0 {
- i.Out = uint32(l2)
+ i := &p.Inst[l1.tail>>1]
+ if l1.tail&1 == 0 {
+ i.Out = l2.head
} else {
- i.Arg = uint32(l2)
+ i.Arg = l2.head
}
- return l1
+ return patchList{l1.head, l2.tail}
}
// A frag represents a compiled program fragment.
@@ -176,7 +166,7 @@ func (c *compiler) inst(op InstOp) frag {
func (c *compiler) nop() frag {
f := c.inst(InstNop)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
return f
}
@@ -186,7 +176,7 @@ func (c *compiler) fail() frag {
func (c *compiler) cap(arg uint32) frag {
f := c.inst(InstCapture)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
c.p.Inst[f.i].Arg = arg
if c.p.NumCap < int(arg)+1 {
@@ -229,10 +219,10 @@ func (c *compiler) quest(f1 frag, nongreedy bool) frag {
i := &c.p.Inst[f.i]
if nongreedy {
i.Arg = f1.i
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
} else {
i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
+ f.out = makePatchList(f.i<<1 | 1)
}
f.out = f.out.append(c.p, f1.out)
return f
@@ -243,10 +233,10 @@ func (c *compiler) star(f1 frag, nongreedy bool) frag {
i := &c.p.Inst[f.i]
if nongreedy {
i.Arg = f1.i
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
} else {
i.Out = f1.i
- f.out = patchList(f.i<<1 | 1)
+ f.out = makePatchList(f.i<<1 | 1)
}
f1.out.patch(c.p, f.i)
return f
@@ -259,7 +249,7 @@ func (c *compiler) plus(f1 frag, nongreedy bool) frag {
func (c *compiler) empty(op EmptyOp) frag {
f := c.inst(InstEmptyWidth)
c.p.Inst[f.i].Arg = uint32(op)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
return f
}
@@ -273,7 +263,7 @@ func (c *compiler) rune(r []rune, flags Flags) frag {
flags &^= FoldCase
}
i.Arg = uint32(flags)
- f.out = patchList(f.i << 1)
+ f.out = makePatchList(f.i << 1)
// Special cases for exec machine.
switch {