diff options
author | Russ Cox <rsc@golang.org> | 2011-10-25 22:22:09 -0700 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2011-10-25 22:22:09 -0700 |
commit | 8f5718176fdd3040d874f85fbd5c825fbd359173 (patch) | |
tree | 5251110e65b4820c8e38645048fa38a5869dc317 | |
parent | 0e513317b1fe148b4fd6604455bc89ecf44ed088 (diff) | |
download | go-8f5718176fdd3040d874f85fbd5c825fbd359173.tar.gz go-8f5718176fdd3040d874f85fbd5c825fbd359173.zip |
bytes, strings: use rune
Various rune-based APIs change.
R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5306044
-rw-r--r-- | src/pkg/bytes/buffer.go | 6 | ||||
-rw-r--r-- | src/pkg/bytes/buffer_test.go | 6 | ||||
-rw-r--r-- | src/pkg/bytes/bytes.go | 121 | ||||
-rw-r--r-- | src/pkg/bytes/bytes_test.go | 60 | ||||
-rw-r--r-- | src/pkg/strings/reader.go | 6 | ||||
-rw-r--r-- | src/pkg/strings/replace_test.go | 6 | ||||
-rw-r--r-- | src/pkg/strings/strings.go | 95 | ||||
-rw-r--r-- | src/pkg/strings/strings_test.go | 76 |
8 files changed, 189 insertions, 187 deletions
diff --git a/src/pkg/bytes/buffer.go b/src/pkg/bytes/buffer.go index 975031bfa4..c2a8c9fe59 100644 --- a/src/pkg/bytes/buffer.go +++ b/src/pkg/bytes/buffer.go @@ -188,7 +188,7 @@ func (b *Buffer) WriteByte(c byte) os.Error { // code point r to the buffer, returning its length and // an error, which is always nil but is included // to match bufio.Writer's WriteRune. -func (b *Buffer) WriteRune(r int) (n int, err os.Error) { +func (b *Buffer) WriteRune(r rune) (n int, err os.Error) { if r < utf8.RuneSelf { b.WriteByte(byte(r)) return 1, nil @@ -255,7 +255,7 @@ func (b *Buffer) ReadByte() (c byte, err os.Error) { // If no bytes are available, the error returned is os.EOF. // If the bytes are an erroneous UTF-8 encoding, it // consumes one byte and returns U+FFFD, 1. -func (b *Buffer) ReadRune() (r int, size int, err os.Error) { +func (b *Buffer) ReadRune() (r rune, size int, err os.Error) { b.lastRead = opInvalid if b.off >= len(b.buf) { // Buffer is empty, reset to recover space. @@ -266,7 +266,7 @@ func (b *Buffer) ReadRune() (r int, size int, err os.Error) { c := b.buf[b.off] if c < utf8.RuneSelf { b.off++ - return int(c), 1, nil + return rune(c), 1, nil } r, n := utf8.DecodeRune(b.buf[b.off:]) b.off += n diff --git a/src/pkg/bytes/buffer_test.go b/src/pkg/bytes/buffer_test.go index 06d2a65c67..ee38e084a5 100644 --- a/src/pkg/bytes/buffer_test.go +++ b/src/pkg/bytes/buffer_test.go @@ -264,7 +264,7 @@ func TestRuneIO(t *testing.T) { b := make([]byte, utf8.UTFMax*NRune) var buf Buffer n := 0 - for r := 0; r < NRune; r++ { + for r := rune(0); r < NRune; r++ { size := utf8.EncodeRune(b[n:], r) nbytes, err := buf.WriteRune(r) if err != nil { @@ -284,7 +284,7 @@ func TestRuneIO(t *testing.T) { p := make([]byte, utf8.UTFMax) // Read it back with ReadRune - for r := 0; r < NRune; r++ { + for r := rune(0); r < NRune; r++ { size := utf8.EncodeRune(p, r) nr, nbytes, err := buf.ReadRune() if nr != r || nbytes != size || err != nil { @@ -295,7 +295,7 @@ func TestRuneIO(t *testing.T) { // Check that UnreadRune works buf.Reset() buf.Write(b) - for r := 0; r < NRune; r++ { + for r := rune(0); r < NRune; r++ { r1, size, _ := buf.ReadRune() if err := buf.UnreadRune(); err != nil { t.Fatalf("UnreadRune(%U) got error %q", r, err) diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go index 2fb456900a..ac8320fe6b 100644 --- a/src/pkg/bytes/bytes.go +++ b/src/pkg/bytes/bytes.go @@ -130,10 +130,10 @@ func LastIndex(s, sep []byte) int { // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index of the first occurrence in s of the given rune. // It returns -1 if rune is not present in s. -func IndexRune(s []byte, rune int) int { +func IndexRune(s []byte, r rune) int { for i := 0; i < len(s); { - r, size := utf8.DecodeRune(s[i:]) - if r == rune { + r1, size := utf8.DecodeRune(s[i:]) + if r == r1 { return i } i += size @@ -147,16 +147,17 @@ func IndexRune(s []byte, rune int) int { // point in common. func IndexAny(s []byte, chars string) int { if len(chars) > 0 { - var rune, width int + var r rune + var width int for i := 0; i < len(s); i += width { - rune = int(s[i]) - if rune < utf8.RuneSelf { + r = rune(s[i]) + if r < utf8.RuneSelf { width = 1 } else { - rune, width = utf8.DecodeRune(s[i:]) + r, width = utf8.DecodeRune(s[i:]) } - for _, r := range chars { - if rune == r { + for _, ch := range chars { + if r == ch { return i } } @@ -172,10 +173,10 @@ func IndexAny(s []byte, chars string) int { func LastIndexAny(s []byte, chars string) int { if len(chars) > 0 { for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRune(s[0:i]) + r, size := utf8.DecodeLastRune(s[0:i]) i -= size - for _, m := range chars { - if rune == m { + for _, ch := range chars { + if r == ch { return i } } @@ -256,13 +257,13 @@ func Fields(s []byte) [][]byte { // It splits the array s at each run of code points c satisfying f(c) and // returns a slice of subarrays of s. If no code points in s satisfy f(c), an // empty slice is returned. -func FieldsFunc(s []byte, f func(int) bool) [][]byte { +func FieldsFunc(s []byte, f func(rune) bool) [][]byte { n := 0 inField := false for i := 0; i < len(s); { - rune, size := utf8.DecodeRune(s[i:]) + r, size := utf8.DecodeRune(s[i:]) wasInField := inField - inField = !f(rune) + inField = !f(r) if inField && !wasInField { n++ } @@ -273,13 +274,13 @@ func FieldsFunc(s []byte, f func(int) bool) [][]byte { na := 0 fieldStart := -1 for i := 0; i <= len(s) && na < n; { - rune, size := utf8.DecodeRune(s[i:]) - if fieldStart < 0 && size > 0 && !f(rune) { + r, size := utf8.DecodeRune(s[i:]) + if fieldStart < 0 && size > 0 && !f(r) { fieldStart = i i += size continue } - if fieldStart >= 0 && (size == 0 || f(rune)) { + if fieldStart >= 0 && (size == 0 || f(r)) { a[na] = s[fieldStart:i] na++ fieldStart = -1 @@ -329,7 +330,7 @@ func HasSuffix(s, suffix []byte) bool { // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. The characters in s and the // output are interpreted as UTF-8-encoded Unicode code points. -func Map(mapping func(rune int) int, s []byte) []byte { +func Map(mapping func(r rune) rune, s []byte) []byte { // In the worst case, the array can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. @@ -338,20 +339,20 @@ func Map(mapping func(rune int) int, s []byte) []byte { b := make([]byte, maxbytes) for i := 0; i < len(s); { wid := 1 - rune := int(s[i]) - if rune >= utf8.RuneSelf { - rune, wid = utf8.DecodeRune(s[i:]) + r := rune(s[i]) + if r >= utf8.RuneSelf { + r, wid = utf8.DecodeRune(s[i:]) } - rune = mapping(rune) - if rune >= 0 { - if nbytes+utf8.RuneLen(rune) > maxbytes { + r = mapping(r) + if r >= 0 { + if nbytes+utf8.RuneLen(r) > maxbytes { // Grow the buffer. maxbytes = maxbytes*2 + utf8.UTFMax nb := make([]byte, maxbytes) copy(nb, b[0:nbytes]) b = nb } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune) + nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } i += wid } @@ -383,44 +384,44 @@ func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r int) int { return _case.ToUpper(r) }, s) + return Map(func(r rune) rune { return _case.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r int) int { return _case.ToLower(r) }, s) + return Map(func(r rune) rune { return _case.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { - return Map(func(r int) int { return _case.ToTitle(r) }, s) + return Map(func(r rune) rune { return _case.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. -func isSeparator(rune int) bool { +func isSeparator(r rune) bool { // ASCII alphanumerics and underscore are not separators - if rune <= 0x7F { + if r <= 0x7F { switch { - case '0' <= rune && rune <= '9': + case '0' <= r && r <= '9': return false - case 'a' <= rune && rune <= 'z': + case 'a' <= r && r <= 'z': return false - case 'A' <= rune && rune <= 'Z': + case 'A' <= r && r <= 'Z': return false - case rune == '_': + case r == '_': return false } return true } // Letters and digits are not separators - if unicode.IsLetter(rune) || unicode.IsDigit(rune) { + if unicode.IsLetter(r) || unicode.IsDigit(r) { return false } // Otherwise, all we can do for now is treat spaces as separators. - return unicode.IsSpace(rune) + return unicode.IsSpace(r) } // BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. @@ -431,9 +432,9 @@ func Title(s []byte) []byte { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling // the closure once per rune. - prev := ' ' + prev := rune(' ') return Map( - func(r int) int { + func(r rune) rune { if isSeparator(prev) { prev = r return unicode.ToTitle(r) @@ -446,7 +447,7 @@ func Title(s []byte) []byte { // TrimLeftFunc returns a subslice of s by slicing off all leading UTF-8-encoded // Unicode code points c that satisfy f(c). -func TrimLeftFunc(s []byte, f func(r int) bool) []byte { +func TrimLeftFunc(s []byte, f func(r rune) bool) []byte { i := indexFunc(s, f, false) if i == -1 { return nil @@ -456,7 +457,7 @@ func TrimLeftFunc(s []byte, f func(r int) bool) []byte { // TrimRightFunc returns a subslice of s by slicing off all trailing UTF-8 // encoded Unicode code points c that satisfy f(c). -func TrimRightFunc(s []byte, f func(r int) bool) []byte { +func TrimRightFunc(s []byte, f func(r rune) bool) []byte { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { _, wid := utf8.DecodeRune(s[i:]) @@ -469,36 +470,36 @@ func TrimRightFunc(s []byte, f func(r int) bool) []byte { // TrimFunc returns a subslice of s by slicing off all leading and trailing // UTF-8-encoded Unicode code points c that satisfy f(c). -func TrimFunc(s []byte, f func(r int) bool) []byte { +func TrimFunc(s []byte, f func(r rune) bool) []byte { return TrimRightFunc(TrimLeftFunc(s, f), f) } // IndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index in s of the first Unicode // code point satisfying f(c), or -1 if none do. -func IndexFunc(s []byte, f func(r int) bool) int { +func IndexFunc(s []byte, f func(r rune) bool) int { return indexFunc(s, f, true) } // LastIndexFunc interprets s as a sequence of UTF-8-encoded Unicode code points. // It returns the byte index in s of the last Unicode // code point satisfying f(c), or -1 if none do. -func LastIndexFunc(s []byte, f func(r int) bool) int { +func LastIndexFunc(s []byte, f func(r rune) bool) int { return lastIndexFunc(s, f, true) } // indexFunc is the same as IndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func indexFunc(s []byte, f func(r int) bool, truth bool) int { +func indexFunc(s []byte, f func(r rune) bool, truth bool) int { start := 0 for start < len(s) { wid := 1 - rune := int(s[start]) - if rune >= utf8.RuneSelf { - rune, wid = utf8.DecodeRune(s[start:]) + r := rune(s[start]) + if r >= utf8.RuneSelf { + r, wid = utf8.DecodeRune(s[start:]) } - if f(rune) == truth { + if f(r) == truth { return start } start += wid @@ -509,21 +510,21 @@ func indexFunc(s []byte, f func(r int) bool, truth bool) int { // lastIndexFunc is the same as LastIndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func lastIndexFunc(s []byte, f func(r int) bool, truth bool) int { +func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int { for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRune(s[0:i]) + r, size := utf8.DecodeLastRune(s[0:i]) i -= size - if f(rune) == truth { + if f(r) == truth { return i } } return -1 } -func makeCutsetFunc(cutset string) func(rune int) bool { - return func(rune int) bool { +func makeCutsetFunc(cutset string) func(r rune) bool { + return func(r rune) bool { for _, c := range cutset { - if c == rune { + if c == r { return true } } @@ -556,8 +557,8 @@ func TrimSpace(s []byte) []byte { } // Runes returns a slice of runes (Unicode code points) equivalent to s. -func Runes(s []byte) []int { - t := make([]int, utf8.RuneCount(s)) +func Runes(s []byte) []rune { + t := make([]rune, utf8.RuneCount(s)) i := 0 for len(s) > 0 { r, l := utf8.DecodeRune(s) @@ -614,15 +615,15 @@ func Replace(s, old, new []byte, n int) []byte { func EqualFold(s, t []byte) bool { for len(s) != 0 && len(t) != 0 { // Extract first rune from each. - var sr, tr int + var sr, tr rune if s[0] < utf8.RuneSelf { - sr, s = int(s[0]), s[1:] + sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRune(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { - tr, t = int(t[0]), t[1:] + tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRune(t) tr, t = r, t[size:] diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go index ce3f37e4de..62f258de8a 100644 --- a/src/pkg/bytes/bytes_test.go +++ b/src/pkg/bytes/bytes_test.go @@ -444,7 +444,7 @@ func TestFields(t *testing.T) { } func TestFieldsFunc(t *testing.T) { - pred := func(c int) bool { return c == 'X' } + pred := func(c rune) bool { return c == 'X' } var fieldsFuncTests = []FieldsTest{ {"", []string{}}, {"XX", []string{}}, @@ -514,24 +514,24 @@ func runStringTests(t *testing.T, f func([]byte) []byte, funcName string, testCa } } -func tenRunes(rune int) string { - r := make([]int, 10) - for i := range r { - r[i] = rune +func tenRunes(r rune) string { + runes := make([]rune, 10) + for i := range runes { + runes[i] = r } - return string(r) + return string(runes) } // User-defined self-inverse mapping function -func rot13(rune int) int { - step := 13 - if rune >= 'a' && rune <= 'z' { - return ((rune - 'a' + step) % 26) + 'a' +func rot13(r rune) rune { + const step = 13 + if r >= 'a' && r <= 'z' { + return ((r - 'a' + step) % 26) + 'a' } - if rune >= 'A' && rune <= 'Z' { - return ((rune - 'A' + step) % 26) + 'A' + if r >= 'A' && r <= 'Z' { + return ((r - 'A' + step) % 26) + 'A' } - return rune + return r } func TestMap(t *testing.T) { @@ -539,7 +539,7 @@ func TestMap(t *testing.T) { a := tenRunes('a') // 1. Grow. This triggers two reallocations in Map. - maxRune := func(rune int) int { return unicode.MaxRune } + maxRune := func(r rune) rune { return unicode.MaxRune } m := Map(maxRune, []byte(a)) expect := tenRunes(unicode.MaxRune) if string(m) != expect { @@ -547,7 +547,7 @@ func TestMap(t *testing.T) { } // 2. Shrink - minRune := func(rune int) int { return 'a' } + minRune := func(r rune) rune { return 'a' } m = Map(minRune, []byte(tenRunes(unicode.MaxRune))) expect = a if string(m) != expect { @@ -569,9 +569,9 @@ func TestMap(t *testing.T) { } // 5. Drop - dropNotLatin := func(rune int) int { - if unicode.Is(unicode.Latin, rune) { - return rune + dropNotLatin := func(r rune) rune { + if unicode.Is(unicode.Latin, r) { + return r } return -1 } @@ -615,7 +615,7 @@ func TestRepeat(t *testing.T) { } } -func runesEqual(a, b []int) bool { +func runesEqual(a, b []rune) bool { if len(a) != len(b) { return false } @@ -629,18 +629,18 @@ func runesEqual(a, b []int) bool { type RunesTest struct { in string - out []int + out []rune lossy bool } var RunesTests = []RunesTest{ - {"", []int{}, false}, - {" ", []int{32}, false}, - {"ABC", []int{65, 66, 67}, false}, - {"abc", []int{97, 98, 99}, false}, - {"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false}, - {"ab\x80c", []int{97, 98, 0xFFFD, 99}, true}, - {"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true}, + {"", []rune{}, false}, + {" ", []rune{32}, false}, + {"ABC", []rune{65, 66, 67}, false}, + {"abc", []rune{97, 98, 99}, false}, + {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, + {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, + {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, } func TestRunes(t *testing.T) { @@ -711,7 +711,7 @@ func TestTrim(t *testing.T) { } type predicate struct { - f func(r int) bool + f func(r rune) bool name string } @@ -719,7 +719,7 @@ var isSpace = predicate{unicode.IsSpace, "IsSpace"} var isDigit = predicate{unicode.IsDigit, "IsDigit"} var isUpper = predicate{unicode.IsUpper, "IsUpper"} var isValidRune = predicate{ - func(r int) bool { + func(r rune) bool { return r != utf8.RuneError }, "IsValidRune", @@ -732,7 +732,7 @@ type TrimFuncTest struct { func not(p predicate) predicate { return predicate{ - func(r int) bool { + func(r rune) bool { return !p.f(r) }, "not " + p.name, diff --git a/src/pkg/strings/reader.go b/src/pkg/strings/reader.go index eb515de006..f4385a437a 100644 --- a/src/pkg/strings/reader.go +++ b/src/pkg/strings/reader.go @@ -60,16 +60,16 @@ func (r *Reader) UnreadByte() os.Error { // If no bytes are available, the error returned is os.EOF. // If the bytes are an erroneous UTF-8 encoding, it // consumes one byte and returns U+FFFD, 1. -func (r *Reader) ReadRune() (rune int, size int, err os.Error) { +func (r *Reader) ReadRune() (ch rune, size int, err os.Error) { if r.i >= len(r.s) { return 0, 0, os.EOF } r.prevRune = r.i if c := r.s[r.i]; c < utf8.RuneSelf { r.i++ - return int(c), 1, nil + return rune(c), 1, nil } - rune, size = utf8.DecodeRuneInString(r.s[r.i:]) + ch, size = utf8.DecodeRuneInString(r.s[r.i:]) r.i += size return } diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go index e337856c64..23c7e2e533 100644 --- a/src/pkg/strings/replace_test.go +++ b/src/pkg/strings/replace_test.go @@ -159,12 +159,12 @@ func BenchmarkByteByteReplaces(b *testing.B) { // BenchmarkByteByteMap compares byteByteImpl against Map. func BenchmarkByteByteMap(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) - fn := func(r int) int { + fn := func(r rune) rune { switch r { case 'a': - return int('A') + return 'A' case 'b': - return int('B') + return 'B' } return r } diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 58301febdf..4f6e8a6fe3 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -21,11 +21,12 @@ func explode(s string, n int) []string { n = l } a := make([]string, n) - var size, rune int + var size int + var ch rune i, cur := 0, 0 for ; i+1 < n; i++ { - rune, size = utf8.DecodeRuneInString(s[cur:]) - a[i] = string(rune) + ch, size = utf8.DecodeRuneInString(s[cur:]) + a[i] = string(ch) cur += size } // add the rest, if there is any @@ -117,11 +118,11 @@ func LastIndex(s, sep string) int { } // IndexRune returns the index of the first instance of the Unicode code point -// rune, or -1 if rune is not present in s. -func IndexRune(s string, rune int) int { +// r, or -1 if rune is not present in s. +func IndexRune(s string, r rune) int { switch { - case rune < 0x80: - b := byte(rune) + case r < 0x80: + b := byte(r) for i := 0; i < len(s); i++ { if s[i] == b { return i @@ -129,7 +130,7 @@ func IndexRune(s string, rune int) int { } default: for i, c := range s { - if c == rune { + if c == r { return i } } @@ -241,7 +242,7 @@ func Fields(s string) []string { // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) // and returns an array of slices of s. If all code points in s satisfy f(c) or the // string is empty, an empty slice is returned. -func FieldsFunc(s string, f func(int) bool) []string { +func FieldsFunc(s string, f func(rune) bool) []string { // First count the fields. n := 0 inField := false @@ -310,7 +311,7 @@ func HasSuffix(s, suffix string) bool { // Map returns a copy of the string s with all its characters modified // according to the mapping function. If mapping returns a negative value, the character is // dropped from the string with no replacement. -func Map(mapping func(rune int) int, s string) string { +func Map(mapping func(rune) rune, s string) string { // In the worst case, the string can grow when mapped, making // things unpleasant. But it's so rare we barge in assuming it's // fine. It could also shrink but that falls out naturally. @@ -321,18 +322,18 @@ func Map(mapping func(rune int) int, s string) string { var b []byte for i, c := range s { - rune := mapping(c) + r := mapping(c) if b == nil { - if rune == c { + if r == c { continue } b = make([]byte, maxbytes) nbytes = copy(b, s[:i]) } - if rune >= 0 { + if r >= 0 { wid := 1 - if rune >= utf8.RuneSelf { - wid = utf8.RuneLen(rune) + if r >= utf8.RuneSelf { + wid = utf8.RuneLen(r) } if nbytes+wid > maxbytes { // Grow the buffer. @@ -341,7 +342,7 @@ func Map(mapping func(rune int) int, s string) string { copy(nb, b[0:nbytes]) b = nb } - nbytes += utf8.EncodeRune(b[nbytes:maxbytes], rune) + nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r) } } if b == nil { @@ -375,44 +376,44 @@ func ToTitle(s string) string { return Map(unicode.ToTitle, s) } // ToUpperSpecial returns a copy of the string s with all Unicode letters mapped to their // upper case, giving priority to the special casing rules. func ToUpperSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToUpper(r) }, s) + return Map(func(r rune) rune { return _case.ToUpper(r) }, s) } // ToLowerSpecial returns a copy of the string s with all Unicode letters mapped to their // lower case, giving priority to the special casing rules. func ToLowerSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToLower(r) }, s) + return Map(func(r rune) rune { return _case.ToLower(r) }, s) } // ToTitleSpecial returns a copy of the string s with all Unicode letters mapped to their // title case, giving priority to the special casing rules. func ToTitleSpecial(_case unicode.SpecialCase, s string) string { - return Map(func(r int) int { return _case.ToTitle(r) }, s) + return Map(func(r rune) rune { return _case.ToTitle(r) }, s) } // isSeparator reports whether the rune could mark a word boundary. // TODO: update when package unicode captures more of the properties. -func isSeparator(rune int) bool { +func isSeparator(r rune) bool { // ASCII alphanumerics and underscore are not separators - if rune <= 0x7F { + if r <= 0x7F { switch { - case '0' <= rune && rune <= '9': + case '0' <= r && r <= '9': return false - case 'a' <= rune && rune <= 'z': + case 'a' <= r && r <= 'z': return false - case 'A' <= rune && rune <= 'Z': + case 'A' <= r && r <= 'Z': return false - case rune == '_': + case r == '_': return false } return true } // Letters and digits are not separators - if unicode.IsLetter(rune) || unicode.IsDigit(rune) { + if unicode.IsLetter(r) || unicode.IsDigit(r) { return false } // Otherwise, all we can do for now is treat spaces as separators. - return unicode.IsSpace(rune) + return unicode.IsSpace(r) } // BUG(r): The rule Title uses for word boundaries does not handle Unicode punctuation properly. @@ -423,9 +424,9 @@ func Title(s string) string { // Use a closure here to remember state. // Hackish but effective. Depends on Map scanning in order and calling // the closure once per rune. - prev := ' ' + prev := rune(' ') return Map( - func(r int) int { + func(r rune) rune { if isSeparator(prev) { prev = r return unicode.ToTitle(r) @@ -438,7 +439,7 @@ func Title(s string) string { // TrimLeftFunc returns a slice of the string s with all leading // Unicode code points c satisfying f(c) removed. -func TrimLeftFunc(s string, f func(r int) bool) string { +func TrimLeftFunc(s string, f func(rune) bool) string { i := indexFunc(s, f, false) if i == -1 { return "" @@ -448,7 +449,7 @@ func TrimLeftFunc(s string, f func(r int) bool) string { // TrimRightFunc returns a slice of the string s with all trailing // Unicode code points c satisfying f(c) removed. -func TrimRightFunc(s string, f func(r int) bool) string { +func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) if i >= 0 && s[i] >= utf8.RuneSelf { _, wid := utf8.DecodeRuneInString(s[i:]) @@ -461,34 +462,34 @@ func TrimRightFunc(s string, f func(r int) bool) string { // TrimFunc returns a slice of the string s with all leading // and trailing Unicode code points c satisfying f(c) removed. -func TrimFunc(s string, f func(r int) bool) string { +func TrimFunc(s string, f func(rune) bool) string { return TrimRightFunc(TrimLeftFunc(s, f), f) } // IndexFunc returns the index into s of the first Unicode // code point satisfying f(c), or -1 if none do. -func IndexFunc(s string, f func(r int) bool) int { +func IndexFunc(s string, f func(rune) bool) int { return indexFunc(s, f, true) } // LastIndexFunc returns the index into s of the last // Unicode code point satisfying f(c), or -1 if none do. -func LastIndexFunc(s string, f func(r int) bool) int { +func LastIndexFunc(s string, f func(rune) bool) int { return lastIndexFunc(s, f, true) } // indexFunc is the same as IndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func indexFunc(s string, f func(r int) bool, truth bool) int { +func indexFunc(s string, f func(rune) bool, truth bool) int { start := 0 for start < len(s) { wid := 1 - rune := int(s[start]) - if rune >= utf8.RuneSelf { - rune, wid = utf8.DecodeRuneInString(s[start:]) + r := rune(s[start]) + if r >= utf8.RuneSelf { + r, wid = utf8.DecodeRuneInString(s[start:]) } - if f(rune) == truth { + if f(r) == truth { return start } start += wid @@ -499,19 +500,19 @@ func indexFunc(s string, f func(r int) bool, truth bool) int { // lastIndexFunc is the same as LastIndexFunc except that if // truth==false, the sense of the predicate function is // inverted. -func lastIndexFunc(s string, f func(r int) bool, truth bool) int { +func lastIndexFunc(s string, f func(rune) bool, truth bool) int { for i := len(s); i > 0; { - rune, size := utf8.DecodeLastRuneInString(s[0:i]) + r, size := utf8.DecodeLastRuneInString(s[0:i]) i -= size - if f(rune) == truth { + if f(r) == truth { return i } } return -1 } -func makeCutsetFunc(cutset string) func(rune int) bool { - return func(rune int) bool { return IndexRune(cutset, rune) != -1 } +func makeCutsetFunc(cutset string) func(rune) bool { + return func(r rune) bool { return IndexRune(cutset, r) != -1 } } // Trim returns a slice of the string s with all leading and @@ -589,15 +590,15 @@ func Replace(s, old, new string, n int) string { func EqualFold(s, t string) bool { for s != "" && t != "" { // Extract first rune from each string. - var sr, tr int + var sr, tr rune if s[0] < utf8.RuneSelf { - sr, s = int(s[0]), s[1:] + sr, s = rune(s[0]), s[1:] } else { r, size := utf8.DecodeRuneInString(s) sr, s = r, s[size:] } if t[0] < utf8.RuneSelf { - tr, t = int(t[0]), t[1:] + tr, t = rune(t[0]), t[1:] } else { r, size := utf8.DecodeRuneInString(t) tr, t = r, t[size:] diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index 0859ddd962..4132996c19 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -122,7 +122,7 @@ func TestLastIndexAny(t *testing.T) { runIndexTests(t, LastIndexAny, "LastIndexA var indexRuneTests = []struct { s string - rune int + rune rune out int }{ {"a A x", 'A', 2}, @@ -312,7 +312,7 @@ var FieldsFuncTests = []FieldsTest{ } func TestFieldsFunc(t *testing.T) { - pred := func(c int) bool { return c == 'X' } + pred := func(c rune) bool { return c == 'X' } for _, tt := range FieldsFuncTests { a := FieldsFunc(tt.s, pred) if !eq(a, tt.a) { @@ -374,31 +374,31 @@ var trimSpaceTests = []StringTest{ {"x ☺ ", "x ☺"}, } -func tenRunes(rune int) string { - r := make([]int, 10) +func tenRunes(ch rune) string { + r := make([]rune, 10) for i := range r { - r[i] = rune + r[i] = ch } return string(r) } // User-defined self-inverse mapping function -func rot13(rune int) int { - step := 13 - if rune >= 'a' && rune <= 'z' { - return ((rune - 'a' + step) % 26) + 'a' +func rot13(r rune) rune { + step := rune(13) + if r >= 'a' && r <= 'z' { + return ((r - 'a' + step) % 26) + 'a' } - if rune >= 'A' && rune <= 'Z' { - return ((rune - 'A' + step) % 26) + 'A' + if r >= 'A' && r <= 'Z' { + return ((r - 'A' + step) % 26) + 'A' } - return rune + return r } func TestMap(t *testing.T) { // Run a couple of awful growth/shrinkage tests a := tenRunes('a') // 1. Grow. This triggers two reallocations in Map. - maxRune := func(rune int) int { return unicode.MaxRune } + maxRune := func(rune) rune { return unicode.MaxRune } m := Map(maxRune, a) expect := tenRunes(unicode.MaxRune) if m != expect { @@ -406,7 +406,7 @@ func TestMap(t *testing.T) { } // 2. Shrink - minRune := func(rune int) int { return 'a' } + minRune := func(rune) rune { return 'a' } m = Map(minRune, tenRunes(unicode.MaxRune)) expect = a if m != expect { @@ -428,9 +428,9 @@ func TestMap(t *testing.T) { } // 5. Drop - dropNotLatin := func(rune int) int { - if unicode.Is(unicode.Latin, rune) { - return rune + dropNotLatin := func(r rune) rune { + if unicode.Is(unicode.Latin, r) { + return r } return -1 } @@ -441,8 +441,8 @@ func TestMap(t *testing.T) { } // 6. Identity - identity := func(rune int) int { - return rune + identity := func(r rune) rune { + return r } orig := "Input string that we expect not to be copied." m = Map(identity, orig) @@ -457,8 +457,8 @@ func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTest func TestToLower(t *testing.T) { runStringTests(t, ToLower, "ToLower", lowerTests) } func BenchmarkMapNoChanges(b *testing.B) { - identity := func(rune int) int { - return rune + identity := func(r rune) rune { + return r } for i := 0; i < b.N; i++ { Map(identity, "Some string that won't be modified.") @@ -536,7 +536,7 @@ func TestTrim(t *testing.T) { } type predicate struct { - f func(r int) bool + f func(rune) bool name string } @@ -544,7 +544,7 @@ var isSpace = predicate{unicode.IsSpace, "IsSpace"} var isDigit = predicate{unicode.IsDigit, "IsDigit"} var isUpper = predicate{unicode.IsUpper, "IsUpper"} var isValidRune = predicate{ - func(r int) bool { + func(r rune) bool { return r != utf8.RuneError }, "IsValidRune", @@ -552,7 +552,7 @@ var isValidRune = predicate{ func not(p predicate) predicate { return predicate{ - func(r int) bool { + func(r rune) bool { return !p.f(r) }, "not " + p.name, @@ -645,9 +645,9 @@ func TestCaseConsistency(t *testing.T) { if testing.Short() { numRunes = 1000 } - a := make([]int, numRunes) + a := make([]rune, numRunes) for i := range a { - a[i] = i + a[i] = rune(i) } s := string(a) // convert the cases. @@ -706,7 +706,7 @@ func TestRepeat(t *testing.T) { } } -func runesEqual(a, b []int) bool { +func runesEqual(a, b []rune) bool { if len(a) != len(b) { return false } @@ -720,30 +720,30 @@ func runesEqual(a, b []int) bool { var RunesTests = []struct { in string - out []int + out []rune lossy bool }{ - {"", []int{}, false}, - {" ", []int{32}, false}, - {"ABC", []int{65, 66, 67}, false}, - {"abc", []int{97, 98, 99}, false}, - {"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false}, - {"ab\x80c", []int{97, 98, 0xFFFD, 99}, true}, - {"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true}, + {"", []rune{}, false}, + {" ", []rune{32}, false}, + {"ABC", []rune{65, 66, 67}, false}, + {"abc", []rune{97, 98, 99}, false}, + {"\u65e5\u672c\u8a9e", []rune{26085, 26412, 35486}, false}, + {"ab\x80c", []rune{97, 98, 0xFFFD, 99}, true}, + {"ab\xc0c", []rune{97, 98, 0xFFFD, 99}, true}, } func TestRunes(t *testing.T) { for _, tt := range RunesTests { - a := []int(tt.in) + a := []rune(tt.in) if !runesEqual(a, tt.out) { - t.Errorf("[]int(%q) = %v; want %v", tt.in, a, tt.out) + t.Errorf("[]rune(%q) = %v; want %v", tt.in, a, tt.out) continue } if !tt.lossy { // can only test reassembly if we didn't lose information s := string(a) if s != tt.in { - t.Errorf("string([]int(%q)) = %x; want %x", tt.in, s, tt.in) + t.Errorf("string([]rune(%q)) = %x; want %x", tt.in, s, tt.in) } } } |