aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarcel van Lohuizen <mpvl@golang.org>2011-09-05 19:09:20 +0200
committerMarcel van Lohuizen <mpvl@golang.org>2011-09-05 19:09:20 +0200
commitd5e24b697521617f83b686656385a1ceb8844961 (patch)
tree36c8d72f8918bcb0fd9eb2fe90fee5f4774fef45
parentc7f6f9f318d8ce38ec43ef7e7299cb1a9df5622e (diff)
downloadgo-d5e24b697521617f83b686656385a1ceb8844961.tar.gz
go-d5e24b697521617f83b686656385a1ceb8844961.zip
exp/norm: performance improvements of quickSpan
- fixed performance bug that could lead to O(n^2) behavior - performance improvement for ASCII case R=r, r CC=golang-dev https://golang.org/cl/4956060
-rw-r--r--src/pkg/exp/norm/normalize.go27
-rw-r--r--src/pkg/exp/norm/normalize_test.go5
2 files changed, 25 insertions, 7 deletions
diff --git a/src/pkg/exp/norm/normalize.go b/src/pkg/exp/norm/normalize.go
index 749d3aa30d..0bbf2547b9 100644
--- a/src/pkg/exp/norm/normalize.go
+++ b/src/pkg/exp/norm/normalize.go
@@ -198,12 +198,16 @@ func (f Form) QuickSpan(b []byte) int {
func quickSpan(fd *formInfo, b []byte) int {
var lastCC uint8
var lastSegStart int
- i := 0
+ var i, nc int
for i < len(b) {
if b[i] < utf8.RuneSelf {
- lastSegStart = i
- i++
+ // Keep the loop tight for ASCII processing, as this is where
+ // most of the time is spent for this case.
+ for i++; i < len(b) && b[i] < utf8.RuneSelf; i++ {
+ }
+ lastSegStart = i - 1
lastCC = 0
+ nc = 0
continue
}
info := fd.info(b[i:])
@@ -212,9 +216,6 @@ func quickSpan(fd *formInfo, b []byte) int {
return len(b)
}
cc := info.ccc
- if lastCC > cc && cc != 0 {
- return lastSegStart
- }
if fd.composing {
if !info.flags.isYesC() {
break
@@ -224,8 +225,20 @@ func quickSpan(fd *formInfo, b []byte) int {
break
}
}
- if !fd.composing && cc == 0 {
+ if cc == 0 {
lastSegStart = i
+ nc = 0
+ } else {
+ if nc >= maxCombiningChars {
+ lastSegStart = i
+ lastCC = cc
+ nc = 1
+ } else {
+ if lastCC > cc {
+ return lastSegStart
+ }
+ nc++
+ }
}
lastCC = cc
i += int(info.size)
diff --git a/src/pkg/exp/norm/normalize_test.go b/src/pkg/exp/norm/normalize_test.go
index 9159a90c4d..6e8650d59d 100644
--- a/src/pkg/exp/norm/normalize_test.go
+++ b/src/pkg/exp/norm/normalize_test.go
@@ -220,6 +220,11 @@ var quickSpanTests = []PositionTest{
// incorrectly ordered combining characters
{"\u0300\u0316", 0, ""},
{"\u0300\u0316cd", 0, ""},
+ // have a maximum number of combining characters.
+ {strings.Repeat("\u035D", 30) + "\u035B", 62, ""},
+ {"a" + strings.Repeat("\u035D", 30) + "\u035B", 63, ""},
+ {"Ɵ" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
+ {"aa" + strings.Repeat("\u035D", 30) + "\u035B", 64, ""},
}
var quickSpanNFDTests = []PositionTest{