diff options
author | Rob Pike <r@golang.org> | 2010-02-17 08:49:00 +1100 |
---|---|---|
committer | Rob Pike <r@golang.org> | 2010-02-17 08:49:00 +1100 |
commit | 7db472fd342dd93394a61ea39b583eedf78d21e2 (patch) | |
tree | ed2814443ee49d9894a9f05e0413a7cb02c6f519 | |
parent | ca075494a65f8b30abc9dfeb76b5144b9f14ed1e (diff) | |
download | go-7db472fd342dd93394a61ea39b583eedf78d21e2.tar.gz go-7db472fd342dd93394a61ea39b583eedf78d21e2.zip |
The prefix optimization applies only to the first iteration.
Fixes #596.
R=rsc
CC=golang-dev
https://golang.org/cl/206101
-rw-r--r-- | src/pkg/regexp/all_test.go | 5 | ||||
-rw-r--r-- | src/pkg/regexp/regexp.go | 18 |
2 files changed, 17 insertions, 6 deletions
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go index 05bba73765..4570410f95 100644 --- a/src/pkg/regexp/all_test.go +++ b/src/pkg/regexp/all_test.go @@ -30,7 +30,6 @@ var good_re = []string{ `[^\n]`, } -// TODO: nice to do this with a map type stringError struct { re string err os.Error @@ -97,6 +96,10 @@ var matches = []tester{ tester{`[.]`, ".", vec{0, 1}}, tester{`/$`, "/abc/", vec{4, 5}}, tester{`/$`, "/abc", vec{}}, + + // fixed bugs + tester{`ab$`, "cab", vec{1, 3}}, + tester{`axxb$`, "axxcb", vec{}}, } func compileTest(t *testing.T, expr string, error os.Error) *Regexp { diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 373d6b1af1..b3525396c9 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -75,8 +75,9 @@ type Regexp struct { prefix string // initial plain text string prefixBytes []byte // initial plain text bytes inst *vector.Vector - start instr - nbra int // number of brackets in expression, for subexpressions + start instr // first instruction of machine + prefixStart instr // where to start if there is a prefix + nbra int // number of brackets in expression, for subexpressions } const ( @@ -650,8 +651,8 @@ Loop: b = bytes.Add(b, utf[0:n]) i = inst.next().index() } - // point start instruction to first non-CHAR - re.inst.At(0).(instr).setNext(re.inst.At(i).(instr)) + // point prefixStart instruction to first non-CHAR after prefix + re.prefixStart = re.inst.At(i).(instr) re.prefixBytes = b re.prefix = string(b) } @@ -807,6 +808,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { end = len(bytestr) } // fast check for initial plain substring + prefixed := false // has this iteration begun by skipping a prefix? if re.prefix != "" { var advance int if bytestr == nil { @@ -818,6 +820,7 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { return []int{} } pos += advance + len(re.prefix) + prefixed = true } arena := &matchArena{nil, 2 * (re.nbra + 1)} for pos <= end { @@ -825,7 +828,12 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { // prime the pump if we haven't seen a match yet match := arena.noMatch() match.m[0] = pos - s[out] = arena.addState(s[out], re.start.next(), match, pos, end) + if prefixed { + s[out] = arena.addState(s[out], re.prefixStart, match, pos, end) + prefixed = false // next iteration should start at beginning of machine. + } else { + s[out] = arena.addState(s[out], re.start.next(), match, pos, end) + } arena.free(match) // if addState saved it, ref was incremented } in, out = out, in // old out state is new in state |