aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2010-09-25 08:52:29 +1000
committerRob Pike <r@golang.org>2010-09-25 08:52:29 +1000
commit2f80d328e87b8155239ed950f4edaf02f1527dea (patch)
tree1705fbc55007e8f34be3eb5f7ab1bf22490f137c
parentb1f44a120f55ad163bb9516ede57277cddc8e50b (diff)
downloadgo-2f80d328e87b8155239ed950f4edaf02f1527dea.tar.gz
go-2f80d328e87b8155239ed950f4edaf02f1527dea.zip
utf8.String: Slice(i,j)
R=rsc CC=golang-dev https://golang.org/cl/2225048
-rw-r--r--src/pkg/utf8/string.go38
-rw-r--r--src/pkg/utf8/string_test.go39
2 files changed, 76 insertions, 1 deletions
diff --git a/src/pkg/utf8/string.go b/src/pkg/utf8/string.go
index ce74f720db..59107ac19d 100644
--- a/src/pkg/utf8/string.go
+++ b/src/pkg/utf8/string.go
@@ -11,6 +11,8 @@ package utf8
// O(N) in the length of the string, but the overhead is less than always
// scanning from the beginning.
// If the string is ASCII, random access is O(1).
+// Unlike the built-in string type, String has internal mutable state and
+// is not thread-safe.
type String struct {
str string
numRunes int
@@ -55,6 +57,39 @@ func (s *String) IsASCII() bool {
return s.width == 0
}
+// Slice returns the string sliced at rune positions [i:j].
+func (s *String) Slice(i, j int) string {
+ // ASCII is easy. Let the compiler catch the indexing error if there is one.
+ if j < s.nonASCII {
+ return s.str[i:j]
+ }
+ if i < 0 || j > s.numRunes || i > j {
+ panic(sliceOutOfRange)
+ }
+ if i == j {
+ return ""
+ }
+ // For non-ASCII, after At(i), bytePos is always the position of the indexed character.
+ var low, high int
+ switch {
+ case i < s.nonASCII:
+ low = i
+ case i == s.numRunes:
+ low = len(s.str)
+ default:
+ s.At(i)
+ low = s.bytePos
+ }
+ switch {
+ case j == s.numRunes:
+ high = len(s.str)
+ default:
+ s.At(j)
+ high = s.bytePos
+ }
+ return s.str[low:high]
+}
+
// At returns the rune with index i in the String. The sequence of runes is the same
// as iterating over the contents with a "for range" clause.
func (s *String) At(i int) int {
@@ -163,4 +198,5 @@ func (err error) String() string {
func (err error) RunTimeError() {
}
-var outOfRange = error("utf8.String: index out of Range")
+var outOfRange = error("utf8.String: index out of range")
+var sliceOutOfRange = error("utf8.String: slice index out of range")
diff --git a/src/pkg/utf8/string_test.go b/src/pkg/utf8/string_test.go
index 7ce8f8a7ea..484d46fbff 100644
--- a/src/pkg/utf8/string_test.go
+++ b/src/pkg/utf8/string_test.go
@@ -68,3 +68,42 @@ func TestRandomAccess(t *testing.T) {
}
}
}
+
+func TestRandomSliceAccess(t *testing.T) {
+ for _, s := range testStrings {
+ if len(s) == 0 || s[0] == '\x80' { // the bad-UTF-8 string fools this simple test
+ continue
+ }
+ runes := []int(s)
+ str := NewString(s)
+ if str.RuneCount() != len(runes) {
+ t.Error("%s: expected %d runes; got %d", s, len(runes), str.RuneCount())
+ break
+ }
+ for k := 0; k < randCount; k++ {
+ i := rand.Intn(len(runes))
+ j := rand.Intn(len(runes) + 1)
+ if i > j { // include empty strings
+ continue
+ }
+ expect := string(runes[i:j])
+ got := str.Slice(i, j)
+ if got != expect {
+ t.Errorf("%s[%d:%d]: expected %q got %q", s, i, j, expect, got)
+ }
+ }
+ }
+}
+
+func TestLimitSliceAccess(t *testing.T) {
+ for _, s := range testStrings {
+ str := NewString(s)
+ if str.Slice(0, 0) != "" {
+ t.Error("failure with empty slice at beginning")
+ }
+ nr := RuneCountInString(s)
+ if str.Slice(nr, nr) != "" {
+ t.Error("failure with empty slice at end")
+ }
+ }
+}