diff options
author | Russ Cox <rsc@golang.org> | 2011-10-25 22:26:12 -0700 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2011-10-25 22:26:12 -0700 |
commit | c945f77f41fc69b652dc359e52a32f031ca5c730 (patch) | |
tree | 299c56512299a58eb361132a3bdad2444824e3b6 | |
parent | b50a847c3cf4ffa9064f03652126ef603efa3cf5 (diff) | |
download | go-c945f77f41fc69b652dc359e52a32f031ca5c730.tar.gz go-c945f77f41fc69b652dc359e52a32f031ca5c730.zip |
exp/norm: use rune
Nothing terribly interesting here. (!)
Since the public APIs are all in terms of UTF-8,
the changes are all internal only.
R=mpvl, gri, r
CC=golang-dev
https://golang.org/cl/5309042
-rw-r--r-- | src/pkg/exp/norm/composition.go | 24 | ||||
-rw-r--r-- | src/pkg/exp/norm/composition_test.go | 62 | ||||
-rw-r--r-- | src/pkg/exp/norm/maketables.go | 54 | ||||
-rw-r--r-- | src/pkg/exp/norm/maketesttables.go | 2 | ||||
-rw-r--r-- | src/pkg/exp/norm/normalize_test.go | 8 | ||||
-rw-r--r-- | src/pkg/exp/norm/normregtest.go | 20 | ||||
-rw-r--r-- | src/pkg/exp/norm/trie_test.go | 12 | ||||
-rw-r--r-- | src/pkg/exp/norm/triedata_test.go | 2 | ||||
-rw-r--r-- | src/pkg/exp/norm/triegen.go | 4 |
9 files changed, 94 insertions, 94 deletions
diff --git a/src/pkg/exp/norm/composition.go b/src/pkg/exp/norm/composition.go index 1d722230d6..7965ffc574 100644 --- a/src/pkg/exp/norm/composition.go +++ b/src/pkg/exp/norm/composition.go @@ -126,26 +126,26 @@ func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool { } // appendRune inserts a rune at the end of the buffer. It is used for Hangul. -func (rb *reorderBuffer) appendRune(rune uint32) { +func (rb *reorderBuffer) appendRune(r uint32) { bn := rb.nbyte - sz := utf8.EncodeRune(rb.byte[bn:], int(rune)) + sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) rb.nbyte += utf8.UTFMax rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0} rb.nrune++ } // assignRune sets a rune at position pos. It is used for Hangul and recomposition. -func (rb *reorderBuffer) assignRune(pos int, rune uint32) { +func (rb *reorderBuffer) assignRune(pos int, r uint32) { bn := rb.rune[pos].pos - sz := utf8.EncodeRune(rb.byte[bn:], int(rune)) + sz := utf8.EncodeRune(rb.byte[bn:], rune(r)) rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0} } // runeAt returns the rune at position n. It is used for Hangul and recomposition. func (rb *reorderBuffer) runeAt(n int) uint32 { inf := rb.rune[n] - rune, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size]) - return uint32(rune) + r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size]) + return uint32(r) } // bytesAt returns the UTF-8 encoding of the rune at position n. @@ -237,17 +237,17 @@ func isHangulWithoutJamoT(b []byte) bool { // decomposeHangul algorithmically decomposes a Hangul rune into // its Jamo components. // See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul. -func (rb *reorderBuffer) decomposeHangul(rune uint32) bool { +func (rb *reorderBuffer) decomposeHangul(r uint32) bool { b := rb.rune[:] n := rb.nrune if n+3 > len(b) { return false } - rune -= hangulBase - x := rune % jamoTCount - rune /= jamoTCount - rb.appendRune(jamoLBase + rune/jamoVCount) - rb.appendRune(jamoVBase + rune%jamoVCount) + r -= hangulBase + x := r % jamoTCount + r /= jamoTCount + rb.appendRune(jamoLBase + r/jamoVCount) + rb.appendRune(jamoVBase + r%jamoVCount) if x != 0 { rb.appendRune(jamoTBase + x) } diff --git a/src/pkg/exp/norm/composition_test.go b/src/pkg/exp/norm/composition_test.go index ce9caaff16..e32380d7af 100644 --- a/src/pkg/exp/norm/composition_test.go +++ b/src/pkg/exp/norm/composition_test.go @@ -8,14 +8,14 @@ import "testing" // TestCase is used for most tests. type TestCase struct { - in []int - out []int + in []rune + out []rune } -type insertFunc func(rb *reorderBuffer, rune int) bool +type insertFunc func(rb *reorderBuffer, r rune) bool -func insert(rb *reorderBuffer, rune int) bool { - src := inputString(string(rune)) +func insert(rb *reorderBuffer, r rune) bool { + src := inputString(string(r)) return rb.insert(src, 0, rb.f.info(src, 0)) } @@ -39,7 +39,7 @@ func runTests(t *testing.T, name string, fm Form, f insertFunc, tests []TestCase continue } for j, want := range test.out { - found := int(rb.runeAt(j)) + found := rune(rb.runeAt(j)) if found != want { t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want) } @@ -57,7 +57,7 @@ func TestFlush(t *testing.T) { t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out)) } - for _, r := range []int("world!") { + for _, r := range []rune("world!") { insert(&rb, r) } @@ -76,14 +76,14 @@ func TestFlush(t *testing.T) { } var insertTests = []TestCase{ - {[]int{'a'}, []int{'a'}}, - {[]int{0x300}, []int{0x300}}, - {[]int{0x300, 0x316}, []int{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220 - {[]int{0x316, 0x300}, []int{0x316, 0x300}}, - {[]int{0x41, 0x316, 0x300}, []int{0x41, 0x316, 0x300}}, - {[]int{0x41, 0x300, 0x316}, []int{0x41, 0x316, 0x300}}, - {[]int{0x300, 0x316, 0x41}, []int{0x316, 0x300, 0x41}}, - {[]int{0x41, 0x300, 0x40, 0x316}, []int{0x41, 0x300, 0x40, 0x316}}, + {[]rune{'a'}, []rune{'a'}}, + {[]rune{0x300}, []rune{0x300}}, + {[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220 + {[]rune{0x316, 0x300}, []rune{0x316, 0x300}}, + {[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}}, + {[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}}, + {[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}}, + {[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}}, } func TestInsert(t *testing.T) { @@ -91,18 +91,18 @@ func TestInsert(t *testing.T) { } var decompositionNFDTest = []TestCase{ - {[]int{0xC0}, []int{0x41, 0x300}}, - {[]int{0xAC00}, []int{0x1100, 0x1161}}, - {[]int{0x01C4}, []int{0x01C4}}, - {[]int{0x320E}, []int{0x320E}}, - {[]int("음ẻ과"), []int{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}}, + {[]rune{0xC0}, []rune{0x41, 0x300}}, + {[]rune{0xAC00}, []rune{0x1100, 0x1161}}, + {[]rune{0x01C4}, []rune{0x01C4}}, + {[]rune{0x320E}, []rune{0x320E}}, + {[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}}, } var decompositionNFKDTest = []TestCase{ - {[]int{0xC0}, []int{0x41, 0x300}}, - {[]int{0xAC00}, []int{0x1100, 0x1161}}, - {[]int{0x01C4}, []int{0x44, 0x5A, 0x030C}}, - {[]int{0x320E}, []int{0x28, 0x1100, 0x1161, 0x29}}, + {[]rune{0xC0}, []rune{0x41, 0x300}}, + {[]rune{0xAC00}, []rune{0x1100, 0x1161}}, + {[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}}, + {[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}}, } func TestDecomposition(t *testing.T) { @@ -111,15 +111,15 @@ func TestDecomposition(t *testing.T) { } var compositionTest = []TestCase{ - {[]int{0x41, 0x300}, []int{0xC0}}, - {[]int{0x41, 0x316}, []int{0x41, 0x316}}, - {[]int{0x41, 0x300, 0x35D}, []int{0xC0, 0x35D}}, - {[]int{0x41, 0x316, 0x300}, []int{0xC0, 0x316}}, + {[]rune{0x41, 0x300}, []rune{0xC0}}, + {[]rune{0x41, 0x316}, []rune{0x41, 0x316}}, + {[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}}, + {[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}}, // blocking starter - {[]int{0x41, 0x316, 0x40, 0x300}, []int{0x41, 0x316, 0x40, 0x300}}, - {[]int{0x1100, 0x1161}, []int{0xAC00}}, + {[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}}, + {[]rune{0x1100, 0x1161}, []rune{0xAC00}}, // parenthesized Hangul, alternate between ASCII and Hangul. - {[]int{0x28, 0x1100, 0x1161, 0x29}, []int{0x28, 0xAC00, 0x29}}, + {[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}}, } func TestComposition(t *testing.T) { diff --git a/src/pkg/exp/norm/maketables.go b/src/pkg/exp/norm/maketables.go index 14718c5cd2..626f324a54 100644 --- a/src/pkg/exp/norm/maketables.go +++ b/src/pkg/exp/norm/maketables.go @@ -119,7 +119,7 @@ const ( // This contains only the properties we're interested in. type Char struct { name string - codePoint int // if zero, this index is not a valid code point. + codePoint rune // if zero, this index is not a valid code point. ccc uint8 // canonical combining class excludeInComp bool // from CompositionExclusions.txt compatDecomp bool // it has a compatibility expansion @@ -160,7 +160,7 @@ const ( SMissing ) -var lastChar int = 0 +var lastChar = rune('\u0000') func (c Char) isValid() bool { return c.codePoint != 0 && c.state != SMissing @@ -193,7 +193,7 @@ func (f FormInfo) String() string { return buf.String() } -type Decomposition []int +type Decomposition []rune func (d Decomposition) String() string { return fmt.Sprintf("%.4X", d) @@ -220,7 +220,7 @@ func openReader(file string) (input io.ReadCloser) { return } -func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) { +func parseDecomposition(s string, skipfirst bool) (a []rune, e os.Error) { decomp := strings.Split(s, " ") if len(decomp) > 0 && skipfirst { decomp = decomp[1:] @@ -230,7 +230,7 @@ func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) { if err != nil { return a, err } - a = append(a, int(point)) + a = append(a, rune(point)) } return a, nil } @@ -260,7 +260,7 @@ func parseCharacter(line string) { state = SLast } firstChar := lastChar + 1 - lastChar = int(point) + lastChar = rune(point) if state != SLast { firstChar = lastChar } @@ -370,8 +370,8 @@ func loadCompositionExclusions() { // hasCompatDecomp returns true if any of the recursive // decompositions contains a compatibility expansion. // In this case, the character may not occur in NFK*. -func hasCompatDecomp(rune int) bool { - c := &chars[rune] +func hasCompatDecomp(r rune) bool { + c := &chars[r] if c.compatDecomp { return true } @@ -396,19 +396,19 @@ const ( JamoTEnd = 0x11C3 ) -func isHangul(rune int) bool { - return HangulBase <= rune && rune < HangulEnd +func isHangul(r rune) bool { + return HangulBase <= r && r < HangulEnd } -func ccc(rune int) uint8 { - return chars[rune].ccc +func ccc(r rune) uint8 { + return chars[r].ccc } // Insert a rune in a buffer, ordered by Canonical Combining Class. -func insertOrdered(b Decomposition, rune int) Decomposition { +func insertOrdered(b Decomposition, r rune) Decomposition { n := len(b) b = append(b, 0) - cc := ccc(rune) + cc := ccc(r) if cc > 0 { // Use bubble sort. for ; n > 0; n-- { @@ -418,18 +418,18 @@ func insertOrdered(b Decomposition, rune int) Decomposition { b[n] = b[n-1] } } - b[n] = rune + b[n] = r return b } // Recursively decompose. -func decomposeRecursive(form int, rune int, d Decomposition) Decomposition { - if isHangul(rune) { +func decomposeRecursive(form int, r rune, d Decomposition) Decomposition { + if isHangul(r) { return d } - dcomp := chars[rune].forms[form].decomp + dcomp := chars[r].forms[form].decomp if len(dcomp) == 0 { - return insertOrdered(d, rune) + return insertOrdered(d, r) } for _, c := range dcomp { d = decomposeRecursive(form, c, d) @@ -475,8 +475,8 @@ func completeCharFields(form int) { f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint) } - for _, rune := range f.decomp { - chars[rune].forms[form].inDecomp = true + for _, r := range f.decomp { + chars[r].forms[form].inDecomp = true } } @@ -505,7 +505,7 @@ func completeCharFields(form int) { switch { case len(f.decomp) > 0: f.quickCheck[MDecomposed] = QCNo - case isHangul(i): + case isHangul(rune(i)): f.quickCheck[MDecomposed] = QCNo default: f.quickCheck[MDecomposed] = QCYes @@ -588,7 +588,7 @@ func printCharInfoTables() int { for i, char := range chars { v := makeCharInfo(char) if v != 0 { - t.insert(i, v) + t.insert(rune(i), v) } } return t.printTables("charInfo") @@ -606,7 +606,7 @@ func printDecompositionTables() int { for _, c := range chars { for f := 0; f < 2; f++ { d := c.forms[f].expandedDecomp - s := string([]int(d)) + s := string([]rune(d)) if _, ok := positionMap[s]; !ok { p := decompositions.Len() decompositions.WriteByte(uint8(len(s))) @@ -624,7 +624,7 @@ func printDecompositionTables() int { for i, c := range chars { d := c.forms[FCanonical].expandedDecomp if len(d) != 0 { - nfcT.insert(i, positionMap[string([]int(d))]) + nfcT.insert(rune(i), positionMap[string([]rune(d))]) if ccc(c.codePoint) != ccc(d[0]) { // We assume the lead ccc of a decomposition is !=0 in this case. if ccc(d[0]) == 0 { @@ -634,7 +634,7 @@ func printDecompositionTables() int { } d = c.forms[FCompatibility].expandedDecomp if len(d) != 0 { - nfkcT.insert(i, positionMap[string([]int(d))]) + nfkcT.insert(rune(i), positionMap[string([]rune(d))]) if ccc(c.codePoint) != ccc(d[0]) { // We assume the lead ccc of a decomposition is !=0 in this case. if ccc(d[0]) == 0 { @@ -752,7 +752,7 @@ func verifyComputed() { for i, c := range chars { for _, f := range c.forms { isNo := (f.quickCheck[MDecomposed] == QCNo) - if (len(f.decomp) > 0) != isNo && !isHangul(i) { + if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) { log.Fatalf("%U: NF*D must be no if rune decomposes", i) } diff --git a/src/pkg/exp/norm/maketesttables.go b/src/pkg/exp/norm/maketesttables.go index fdcc114be2..20eb889dde 100644 --- a/src/pkg/exp/norm/maketesttables.go +++ b/src/pkg/exp/norm/maketesttables.go @@ -16,7 +16,7 @@ func main() { // We take the smallest, largest and an arbitrary value for each // of the UTF-8 sequence lengths. -var testRunes = []int{ +var testRunes = []rune{ 0x01, 0x0C, 0x7F, // 1-byte sequences 0x80, 0x100, 0x7FF, // 2-byte sequences 0x800, 0x999, 0xFFFF, // 3-byte sequences diff --git a/src/pkg/exp/norm/normalize_test.go b/src/pkg/exp/norm/normalize_test.go index e374edf0ab..6bd5292d3f 100644 --- a/src/pkg/exp/norm/normalize_test.go +++ b/src/pkg/exp/norm/normalize_test.go @@ -28,13 +28,13 @@ func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []Pos if pos != test.pos { t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos) } - runes := []int(test.buffer) + runes := []rune(test.buffer) if rb.nrune != len(runes) { t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes)) continue } for j, want := range runes { - found := int(rb.runeAt(j)) + found := rune(rb.runeAt(j)) if found != want { t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want) } @@ -385,8 +385,8 @@ func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []Ap } if outs != test.out { // Find first rune that differs and show context. - ir := []int(outs) - ig := []int(test.out) + ir := []rune(outs) + ig := []rune(test.out) for j := 0; j < len(ir) && j < len(ig); j++ { if ir[j] == ig[j] { continue diff --git a/src/pkg/exp/norm/normregtest.go b/src/pkg/exp/norm/normregtest.go index cbd73ffa75..cf3b34023b 100644 --- a/src/pkg/exp/norm/normregtest.go +++ b/src/pkg/exp/norm/normregtest.go @@ -103,7 +103,7 @@ type Test struct { name string partnr int number int - rune int // used for character by character test + r rune // used for character by character test cols [cMaxColumns]string // Each has 5 entries, see below. } @@ -174,12 +174,12 @@ func loadTestData() { if err != nil { logger.Fatal(err) } - if test.rune == 0 { + if test.r == 0 { // save for CharacterByCharacterTests - test.rune = int(r) + test.r = int(r) } var buf [utf8.UTFMax]byte - sz := utf8.EncodeRune(buf[:], int(r)) + sz := utf8.EncodeRune(buf[:], rune(r)) test.cols[j-1] += string(buf[:sz]) } } @@ -198,7 +198,7 @@ func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) { if errorCount > 20 { return } - st, sr, sg := []int(test), []int(result), []int(gold) + st, sr, sg := []rune(test), []rune(result), []rune(gold) logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s", t.Name(), name, fstr[f], st, sr, sg, t.name) } @@ -210,7 +210,7 @@ func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bo if errorCount > 20 { return } - logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []int(test), result, want) + logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want) } } @@ -243,13 +243,13 @@ func CharacterByCharacterTests() { tests := part[1].tests last := 0 for i := 0; i <= len(tests); i++ { // last one is special case - var rune int + var r int if i == len(tests) { - rune = 0x2FA1E // Don't have to go to 0x10FFFF + r = 0x2FA1E // Don't have to go to 0x10FFFF } else { - rune = tests[i].rune + r = tests[i].r } - for last++; last < rune; last++ { + for last++; last < r; last++ { // Check all characters that were not explicitly listed in the test. t := &Test{partnr: 1, number: -1} char := string(last) diff --git a/src/pkg/exp/norm/trie_test.go b/src/pkg/exp/norm/trie_test.go index 5649fb7eea..bbd5c03e7b 100644 --- a/src/pkg/exp/norm/trie_test.go +++ b/src/pkg/exp/norm/trie_test.go @@ -73,15 +73,15 @@ var tests = []trietest{ {1, []byte{t6, tx, tx, tx, tx, tx}}, } -func mkUtf8(rune int) ([]byte, int) { +func mkUTF8(r rune) ([]byte, int) { var b [utf8.UTFMax]byte - sz := utf8.EncodeRune(b[:], rune) + sz := utf8.EncodeRune(b[:], r) return b[:sz], sz } func TestLookup(t *testing.T) { for i, tt := range testRunes { - b, szg := mkUtf8(tt) + b, szg := mkUTF8(tt) v, szt := testdata.lookup(b) if int(v) != i { t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i) @@ -103,7 +103,7 @@ func TestLookup(t *testing.T) { func TestLookupUnsafe(t *testing.T) { for i, tt := range testRunes { - b, _ := mkUtf8(tt) + b, _ := mkUTF8(tt) v := testdata.lookupUnsafe(b) if int(v) != i { t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i) @@ -113,7 +113,7 @@ func TestLookupUnsafe(t *testing.T) { func TestLookupString(t *testing.T) { for i, tt := range testRunes { - b, szg := mkUtf8(tt) + b, szg := mkUTF8(tt) v, szt := testdata.lookupString(string(b)) if int(v) != i { t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i) @@ -135,7 +135,7 @@ func TestLookupString(t *testing.T) { func TestLookupStringUnsafe(t *testing.T) { for i, tt := range testRunes { - b, _ := mkUtf8(tt) + b, _ := mkUTF8(tt) v := testdata.lookupStringUnsafe(string(b)) if int(v) != i { t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i) diff --git a/src/pkg/exp/norm/triedata_test.go b/src/pkg/exp/norm/triedata_test.go index e8898e5d42..7f6276096c 100644 --- a/src/pkg/exp/norm/triedata_test.go +++ b/src/pkg/exp/norm/triedata_test.go @@ -4,7 +4,7 @@ package norm -var testRunes = []int{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533} +var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533} // testdataValues: 192 entries, 384 bytes // Block 2 is the null block. diff --git a/src/pkg/exp/norm/triegen.go b/src/pkg/exp/norm/triegen.go index 515e1c7860..56cba32196 100644 --- a/src/pkg/exp/norm/triegen.go +++ b/src/pkg/exp/norm/triegen.go @@ -94,9 +94,9 @@ func (n trieNode) countSparseEntries() int { return count } -func (n *trieNode) insert(rune int, value uint16) { +func (n *trieNode) insert(r rune, value uint16) { var p [utf8.UTFMax]byte - sz := utf8.EncodeRune(p[:], rune) + sz := utf8.EncodeRune(p[:], r) for i := 0; i < sz; i++ { if n.leaf { |