diff options
author | Marcel van Lohuizen <mpvl@golang.org> | 2011-09-05 19:09:20 +0200 |
---|---|---|
committer | Marcel van Lohuizen <mpvl@golang.org> | 2011-09-05 19:09:20 +0200 |
commit | d5e24b697521617f83b686656385a1ceb8844961 (patch) | |
tree | 36c8d72f8918bcb0fd9eb2fe90fee5f4774fef45 | |
parent | c7f6f9f318d8ce38ec43ef7e7299cb1a9df5622e (diff) | |
download | go-d5e24b697521617f83b686656385a1ceb8844961.tar.gz go-d5e24b697521617f83b686656385a1ceb8844961.zip |
exp/norm: performance improvements of quickSpan
- fixed performance bug that could lead to O(n^2) behavior
- performance improvement for ASCII case
R=r, r
CC=golang-dev
https://golang.org/cl/4956060
-rw-r--r-- | src/pkg/exp/norm/normalize.go | 27 | ||||
-rw-r--r-- | src/pkg/exp/norm/normalize_test.go | 5 |
2 files changed, 25 insertions, 7 deletions
diff --git a/src/pkg/exp/norm/normalize.go b/src/pkg/exp/norm/normalize.go index 749d3aa30d..0bbf2547b9 100644 --- a/src/pkg/exp/norm/normalize.go +++ b/src/pkg/exp/norm/normalize.go @@ -198,12 +198,16 @@ func (f Form) QuickSpan(b []byte) int { func quickSpan(fd *formInfo, b []byte) int { var lastCC uint8 var lastSegStart int - i := 0 + var i, nc int for i < len(b) { if b[i] < utf8.RuneSelf { - lastSegStart = i - i++ + // Keep the loop tight for ASCII processing, as this is where + // most of the time is spent for this case. + for i++; i < len(b) && b[i] < utf8.RuneSelf; i++ { + } + lastSegStart = i - 1 lastCC = 0 + nc = 0 continue } info := fd.info(b[i:]) @@ -212,9 +216,6 @@ func quickSpan(fd *formInfo, b []byte) int { return len(b) } cc := info.ccc - if lastCC > cc && cc != 0 { - return lastSegStart - } if fd.composing { if !info.flags.isYesC() { break @@ -224,8 +225,20 @@ func quickSpan(fd *formInfo, b []byte) int { break } } - if !fd.composing && cc == 0 { + if cc == 0 { lastSegStart = i + nc = 0 + } else { + if nc >= maxCombiningChars { + lastSegStart = i + lastCC = cc + nc = 1 + } else { + if lastCC > cc { + return lastSegStart + } + nc++ + } } lastCC = cc i += int(info.size) diff --git a/src/pkg/exp/norm/normalize_test.go b/src/pkg/exp/norm/normalize_test.go index 9159a90c4d..6e8650d59d 100644 --- a/src/pkg/exp/norm/normalize_test.go +++ b/src/pkg/exp/norm/normalize_test.go @@ -220,6 +220,11 @@ var quickSpanTests = []PositionTest{ // incorrectly ordered combining characters {"\u0300\u0316", 0, ""}, {"\u0300\u0316cd", 0, ""}, + // have a maximum number of combining characters. + {strings.Repeat("\u035D", 30) + "\u035B", 62, ""}, + {"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""}, + {"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""}, + {"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""}, } var quickSpanNFDTests = []PositionTest{ |