aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2010-03-16 18:44:37 -0700
committerRuss Cox <rsc@golang.org>2010-03-16 18:44:37 -0700
commitc75f891a720f00d4ce1c44e12ab7337cf7efc118 (patch)
tree03cbdd62998455169f9af07bc7d19dd767f860a5
parent67148530d2def7ba8f0a1700b19a488dec652366 (diff)
downloadgo-c75f891a720f00d4ce1c44e12ab7337cf7efc118.tar.gz
go-c75f891a720f00d4ce1c44e12ab7337cf7efc118.zip
utf16: new package
needed for interacting with various legacy interfaces, like Windows and the Mac OS clipboard. R=r CC=golang-dev https://golang.org/cl/595041
-rw-r--r--src/pkg/Makefile1
-rw-r--r--src/pkg/utf16/Makefile11
-rw-r--r--src/pkg/utf16/utf16.go74
-rw-r--r--src/pkg/utf16/utf16_test.go81
4 files changed, 167 insertions, 0 deletions
diff --git a/src/pkg/Makefile b/src/pkg/Makefile
index d3f0906cf6..0807d6f937 100644
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -117,6 +117,7 @@ DIRS=\
testing/script\
time\
unicode\
+ utf16\
utf8\
websocket\
xgb\
diff --git a/src/pkg/utf16/Makefile b/src/pkg/utf16/Makefile
new file mode 100644
index 0000000000..29e4005032
--- /dev/null
+++ b/src/pkg/utf16/Makefile
@@ -0,0 +1,11 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../Make.$(GOARCH)
+
+TARG=utf16
+GOFILES=\
+ utf16.go\
+
+include ../../Make.pkg
diff --git a/src/pkg/utf16/utf16.go b/src/pkg/utf16/utf16.go
new file mode 100644
index 0000000000..3031624526
--- /dev/null
+++ b/src/pkg/utf16/utf16.go
@@ -0,0 +1,74 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package utf16 implements encoding and decoding of UTF-16 sequences.
+package utf16
+
+import "unicode"
+
+const (
+ // 0xd800-0xdc00 encodes the high 10 bits of a pair.
+ // 0xdc00-0xe000 encodes the low 10 bits of a pair.
+ // the value is those 20 bits plus 0x10000.
+ surr1 = 0xd800
+ surr2 = 0xdc00
+ surr3 = 0xe000
+
+ surrSelf = 0x10000
+)
+
+// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
+func Encode(s []int) []uint16 {
+ n := len(s)
+ for _, v := range s {
+ if v >= surrSelf {
+ n++
+ }
+ }
+
+ a := make([]uint16, n)
+ n = 0
+ for _, v := range s {
+ switch {
+ case v < 0, surr1 <= v && v < surr3, v > unicode.MaxRune:
+ v = unicode.ReplacementChar
+ fallthrough
+ case v < surrSelf:
+ a[n] = uint16(v)
+ n++
+ default:
+ v -= surrSelf
+ a[n] = uint16(surr1 + (v>>10)&0x3ff)
+ a[n+1] = uint16(surr2 + v&0x3ff)
+ n += 2
+ }
+ }
+ return a[0:n]
+}
+
+// Decode returns the Unicode code point sequence represented
+// by the UTF-16 encoding s.
+func Decode(s []uint16) []int {
+ a := make([]int, len(s))
+ n := 0
+ for i := 0; i < len(s); i++ {
+ switch r := s[i]; {
+ case surr1 <= r && r < surr2 && i+1 < len(s) &&
+ surr2 <= s[i+1] && s[i+1] < surr3:
+ // valid surrogate sequence
+ a[n] = (int(r)-surr1)<<10 | (int(s[i+1]) - surr2) + 0x10000
+ i++
+ n++
+ case surr1 <= r && r < surr3:
+ // invalid surrogate sequence
+ a[n] = unicode.ReplacementChar
+ n++
+ default:
+ // normal rune
+ a[n] = int(r)
+ n++
+ }
+ }
+ return a[0:n]
+}
diff --git a/src/pkg/utf16/utf16_test.go b/src/pkg/utf16/utf16_test.go
new file mode 100644
index 0000000000..c6e269aad0
--- /dev/null
+++ b/src/pkg/utf16/utf16_test.go
@@ -0,0 +1,81 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf16
+
+import (
+ "fmt"
+ "reflect"
+ "testing"
+)
+
+type encodeTest struct {
+ in []int
+ out []uint16
+}
+
+var encodeTests = []encodeTest{
+ encodeTest{[]int{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
+ encodeTest{[]int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
+ []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
+ encodeTest{[]int{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
+ []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
+}
+
+func TestEncode(t *testing.T) {
+ for _, tt := range encodeTests {
+ out := Encode(tt.in)
+ if !reflect.DeepEqual(out, tt.out) {
+ t.Errorf("Encode(%v) = %v; want %v", hex(tt.in), hex16(out), hex16(tt.out))
+ }
+ }
+}
+
+type decodeTest struct {
+ in []uint16
+ out []int
+}
+
+var decodeTests = []decodeTest{
+ decodeTest{[]uint16{1, 2, 3, 4}, []int{1, 2, 3, 4}},
+ decodeTest{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
+ []int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
+ decodeTest{[]uint16{0xd800, 'a'}, []int{0xfffd, 'a'}},
+ decodeTest{[]uint16{0xdfff}, []int{0xfffd}},
+}
+
+func TestDecode(t *testing.T) {
+ for _, tt := range decodeTests {
+ out := Decode(tt.in)
+ if !reflect.DeepEqual(out, tt.out) {
+ t.Errorf("Decode(%v) = %v; want %v", hex16(tt.in), hex(out), hex(tt.out))
+ }
+ }
+}
+
+type hex []int
+
+func (h hex) Format(f fmt.State, c int) {
+ fmt.Fprint(f, "[")
+ for i, v := range h {
+ if i > 0 {
+ fmt.Fprint(f, " ")
+ }
+ fmt.Fprintf(f, "%x", v)
+ }
+ fmt.Fprint(f, "]")
+}
+
+type hex16 []uint16
+
+func (h hex16) Format(f fmt.State, c int) {
+ fmt.Fprint(f, "[")
+ for i, v := range h {
+ if i > 0 {
+ fmt.Fprint(f, " ")
+ }
+ fmt.Fprintf(f, "%x", v)
+ }
+ fmt.Fprint(f, "]")
+}