diff options
author | Egon Elbre <egonelbre@gmail.com> | 2015-11-17 16:51:23 +0200 |
---|---|---|
committer | Brad Fitzpatrick <bradfitz@golang.org> | 2016-04-26 21:59:50 +0000 |
commit | e607abbfd6e0550c13f4fa7b666d033eb9b14759 (patch) | |
tree | 928cf2b0a6713490c0a8dcf9510cbc5b6c19a3f6 /src/unicode | |
parent | 6e4a8615f652a2020471622354be6d890404020c (diff) | |
download | go-e607abbfd6e0550c13f4fa7b666d033eb9b14759.tar.gz go-e607abbfd6e0550c13f4fa7b666d033eb9b14759.zip |
unicode: improve SimpleFold performance for ascii
This change significantly speeds up case-insensitive regexp matching.
benchmark old ns/op new ns/op delta
BenchmarkMatchEasy0i_32-8 2690 1473 -45.24%
BenchmarkMatchEasy0i_1K-8 80404 42269 -47.43%
BenchmarkMatchEasy0i_32K-8 3272187 2076118 -36.55%
BenchmarkMatchEasy0i_1M-8 104805990 66503805 -36.55%
BenchmarkMatchEasy0i_32M-8 3360192200 2126121600 -36.73%
benchmark old MB/s new MB/s speedup
BenchmarkMatchEasy0i_32-8 11.90 21.72 1.83x
BenchmarkMatchEasy0i_1K-8 12.74 24.23 1.90x
BenchmarkMatchEasy0i_32K-8 10.01 15.78 1.58x
BenchmarkMatchEasy0i_1M-8 10.00 15.77 1.58x
BenchmarkMatchEasy0i_32M-8 9.99 15.78 1.58x
Issue #13288
Change-Id: I94af7bb29e75d60b4f6ee760124867ab271b9642
Reviewed-on: https://go-review.googlesource.com/16943
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/unicode')
-rw-r--r-- | src/unicode/letter.go | 4 | ||||
-rw-r--r-- | src/unicode/maketables.go | 20 | ||||
-rw-r--r-- | src/unicode/tables.go | 131 |
3 files changed, 155 insertions, 0 deletions
diff --git a/src/unicode/letter.go b/src/unicode/letter.go index ffa083eb57..8aec920d22 100644 --- a/src/unicode/letter.go +++ b/src/unicode/letter.go @@ -332,6 +332,10 @@ type foldPair struct { // SimpleFold('1') = '1' // func SimpleFold(r rune) rune { + if int(r) < len(asciiFold) { + return rune(asciiFold[r]) + } + // Consult caseOrbit table for special cases. lo := 0 hi := len(caseOrbit) diff --git a/src/unicode/maketables.go b/src/unicode/maketables.go index 328c75ed63..f364515c90 100644 --- a/src/unicode/maketables.go +++ b/src/unicode/maketables.go @@ -1172,6 +1172,7 @@ func printCasefold() { } } + printAsciiFold() printCaseOrbit() // Tables of category and script folding exceptions: code points @@ -1269,6 +1270,25 @@ var comment = map[string]string{ "// If there is no entry for a script name, there are no such points.\n", } +func printAsciiFold() { + printf("var asciiFold = [MaxASCII + 1]uint16{\n") + for i := rune(0); i <= unicode.MaxASCII; i++ { + c := chars[i] + f := c.caseOrbit + if f == 0 { + if c.lowerCase != i && c.lowerCase != 0 { + f = c.lowerCase + } else if c.upperCase != i && c.upperCase != 0 { + f = c.upperCase + } else { + f = i + } + } + printf("\t0x%04X,\n", f) + } + printf("}\n\n") +} + func printCaseOrbit() { if *test { for j := range chars { diff --git a/src/unicode/tables.go b/src/unicode/tables.go index 8bb42062f9..c04d69a6ff 100644 --- a/src/unicode/tables.go +++ b/src/unicode/tables.go @@ -6834,6 +6834,137 @@ var properties = [MaxLatin1 + 1]uint8{ 0xFF: pLl | pp, // 'ΓΏ' } +var asciiFold = [MaxASCII + 1]uint16{ + 0x0000, + 0x0001, + 0x0002, + 0x0003, + 0x0004, + 0x0005, + 0x0006, + 0x0007, + 0x0008, + 0x0009, + 0x000A, + 0x000B, + 0x000C, + 0x000D, + 0x000E, + 0x000F, + 0x0010, + 0x0011, + 0x0012, + 0x0013, + 0x0014, + 0x0015, + 0x0016, + 0x0017, + 0x0018, + 0x0019, + 0x001A, + 0x001B, + 0x001C, + 0x001D, + 0x001E, + 0x001F, + 0x0020, + 0x0021, + 0x0022, + 0x0023, + 0x0024, + 0x0025, + 0x0026, + 0x0027, + 0x0028, + 0x0029, + 0x002A, + 0x002B, + 0x002C, + 0x002D, + 0x002E, + 0x002F, + 0x0030, + 0x0031, + 0x0032, + 0x0033, + 0x0034, + 0x0035, + 0x0036, + 0x0037, + 0x0038, + 0x0039, + 0x003A, + 0x003B, + 0x003C, + 0x003D, + 0x003E, + 0x003F, + 0x0040, + 0x0061, + 0x0062, + 0x0063, + 0x0064, + 0x0065, + 0x0066, + 0x0067, + 0x0068, + 0x0069, + 0x006A, + 0x006B, + 0x006C, + 0x006D, + 0x006E, + 0x006F, + 0x0070, + 0x0071, + 0x0072, + 0x0073, + 0x0074, + 0x0075, + 0x0076, + 0x0077, + 0x0078, + 0x0079, + 0x007A, + 0x005B, + 0x005C, + 0x005D, + 0x005E, + 0x005F, + 0x0060, + 0x0041, + 0x0042, + 0x0043, + 0x0044, + 0x0045, + 0x0046, + 0x0047, + 0x0048, + 0x0049, + 0x004A, + 0x212A, + 0x004C, + 0x004D, + 0x004E, + 0x004F, + 0x0050, + 0x0051, + 0x0052, + 0x017F, + 0x0054, + 0x0055, + 0x0056, + 0x0057, + 0x0058, + 0x0059, + 0x005A, + 0x007B, + 0x007C, + 0x007D, + 0x007E, + 0x007F, +} + var caseOrbit = []foldPair{ {0x004B, 0x006B}, {0x0053, 0x0073}, |