aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Pike <r@golang.org>2010-12-06 14:23:37 -0500
committerRob Pike <r@golang.org>2010-12-06 14:23:37 -0500
commit730e39cd134ba13d1df4a5c0645d6a083fb540bc (patch)
tree7a66f079dbeb9533fe50ae284cbe99ea5e6e6f8e
parente2d1595c819ea6baeec42bae2107c141eb935b9e (diff)
downloadgo-730e39cd134ba13d1df4a5c0645d6a083fb540bc.tar.gz
go-730e39cd134ba13d1df4a5c0645d6a083fb540bc.zip
fmt: add %U format for standard Unicode representation of integer values.
fmt.Printf("%U", 1) yields "U+0001" It's essentially "U+%.4x" but lets you override the precision works in scan, too. R=rsc CC=golang-dev https://golang.org/cl/3423043
-rw-r--r--src/pkg/fmt/doc.go1
-rw-r--r--src/pkg/fmt/fmt_test.go8
-rw-r--r--src/pkg/fmt/format.go8
-rw-r--r--src/pkg/fmt/print.go21
-rw-r--r--src/pkg/fmt/scan.go37
-rw-r--r--src/pkg/fmt/scan_test.go2
6 files changed, 67 insertions, 10 deletions
diff --git a/src/pkg/fmt/doc.go b/src/pkg/fmt/doc.go
index 15aae50e3d..f3067eac9f 100644
--- a/src/pkg/fmt/doc.go
+++ b/src/pkg/fmt/doc.go
@@ -26,6 +26,7 @@
%o base 8
%x base 16, with lower-case letters for a-f
%X base 16, with upper-case letters for A-F
+ %U unicode format: U+1234; same as "U+%x" with 4 digits default
Floating-point and complex constituents:
%e scientific notation, e.g. -1234.456e+78
%E scientific notation, e.g. -1234.456E+78
diff --git a/src/pkg/fmt/fmt_test.go b/src/pkg/fmt/fmt_test.go
index fbc2536ee1..d87b93a795 100644
--- a/src/pkg/fmt/fmt_test.go
+++ b/src/pkg/fmt/fmt_test.go
@@ -161,6 +161,14 @@ var fmttests = []fmtTest{
{"% d", 0, " 0"},
{"% d", 12345, " 12345"},
+ // unicode format
+ {"%U", 0x1, "U+0001"},
+ {"%.8U", 0x2, "U+00000002"},
+ {"%U", 0x1234, "U+1234"},
+ {"%U", 0x12345, "U+12345"},
+ {"%10.6U", 0xABC, " U+000ABC"},
+ {"%-10.6U", 0xABC, "U+000ABC "},
+
// floats
{"%+.3e", 0.0, "+0.000e+00"},
{"%+.3e", 1.0, "+1.000e+00"},
diff --git a/src/pkg/fmt/format.go b/src/pkg/fmt/format.go
index 010280bf85..0121dda31d 100644
--- a/src/pkg/fmt/format.go
+++ b/src/pkg/fmt/format.go
@@ -49,6 +49,7 @@ type fmt struct {
plus bool
sharp bool
space bool
+ unicode bool
zero bool
}
@@ -61,6 +62,7 @@ func (f *fmt) clearflags() {
f.plus = false
f.sharp = false
f.space = false
+ f.unicode = false
f.zero = false
}
@@ -213,6 +215,12 @@ func (f *fmt) integer(a int64, base uint64, signedness bool, digits string) {
buf[i] = '0'
}
}
+ if f.unicode {
+ i--
+ buf[i] = '+'
+ i--
+ buf[i] = 'U'
+ }
if negative {
i--
diff --git a/src/pkg/fmt/print.go b/src/pkg/fmt/print.go
index 3bb14eeb14..7ac6648c70 100644
--- a/src/pkg/fmt/print.go
+++ b/src/pkg/fmt/print.go
@@ -316,6 +316,8 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
p.fmt.integer(v, 8, signed, ldigits)
case 'x':
p.fmt.integer(v, 16, signed, ldigits)
+ case 'U':
+ p.fmtUnicode(v)
case 'X':
p.fmt.integer(v, 16, signed, udigits)
default:
@@ -323,7 +325,7 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) {
}
}
-// fmt_sharpHex64 formats a uint64 in hexadecimal and prefixes it with 0x by
+// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by
// temporarily turning on the sharp flag.
func (p *pp) fmt0x64(v uint64) {
sharp := p.fmt.sharp
@@ -332,6 +334,23 @@ func (p *pp) fmt0x64(v uint64) {
p.fmt.sharp = sharp
}
+// fmtUnicode formats a uint64 in U+1234 form by
+// temporarily turning on the unicode flag and tweaking the precision.
+func (p *pp) fmtUnicode(v int64) {
+ precPresent := p.fmt.precPresent
+ prec := p.fmt.prec
+ if !precPresent {
+ // If prec is already set, leave it alone; otherwise 4 is minimum.
+ p.fmt.prec = 4
+ p.fmt.precPresent = true
+ }
+ p.fmt.unicode = true // turn on U+
+ p.fmt.integer(int64(v), 16, unsigned, udigits)
+ p.fmt.unicode = false
+ p.fmt.prec = prec
+ p.fmt.precPresent = precPresent
+}
+
func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) {
switch verb {
case 'b':
diff --git a/src/pkg/fmt/scan.go b/src/pkg/fmt/scan.go
index 9b414cb9a7..dcc42bc92d 100644
--- a/src/pkg/fmt/scan.go
+++ b/src/pkg/fmt/scan.go
@@ -388,9 +388,9 @@ func (s *ss) typeError(field interface{}, expected string) {
var complexError = os.ErrorString("syntax error scanning complex number")
var boolError = os.ErrorString("syntax error scanning boolean")
-// accepts checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
-// buffer and returns true. Otherwise it return false.
-func (s *ss) accept(ok string) bool {
+// consume reads the next rune in the input and reports whether it is in the ok string.
+// If accept is true, it puts the character into the input token.
+func (s *ss) consume(ok string, accept bool) bool {
if s.wid >= s.maxWid {
return false
}
@@ -400,17 +400,25 @@ func (s *ss) accept(ok string) bool {
}
for i := 0; i < len(ok); i++ {
if int(ok[i]) == rune {
- s.buf.WriteRune(rune)
- s.wid++
+ if accept {
+ s.buf.WriteRune(rune)
+ s.wid++
+ }
return true
}
}
- if rune != EOF {
+ if rune != EOF && accept {
s.UngetRune()
}
return false
}
+// accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the
+// buffer and returns true. Otherwise it return false.
+func (s *ss) accept(ok string) bool {
+ return s.consume(ok, true)
+}
+
// okVerb verifies that the verb is present in the list, setting s.err appropriately if not.
func (s *ss) okVerb(verb int, okVerbs, typ string) bool {
for _, v := range okVerbs {
@@ -460,7 +468,7 @@ const (
// getBase returns the numeric base represented by the verb and its digit string.
func (s *ss) getBase(verb int) (base int, digits string) {
- s.okVerb(verb, "bdoxXv", "integer") // sets s.err
+ s.okVerb(verb, "bdoUxXv", "integer") // sets s.err
base = 10
digits = decimalDigits
switch verb {
@@ -470,7 +478,7 @@ func (s *ss) getBase(verb int) (base int, digits string) {
case 'o':
base = 8
digits = octalDigits
- case 'x', 'X':
+ case 'x', 'X', 'U':
base = 16
digits = hexadecimalDigits
}
@@ -506,7 +514,13 @@ func (s *ss) scanInt(verb int, bitSize int) int64 {
}
base, digits := s.getBase(verb)
s.skipSpace(false)
- s.accept(sign) // If there's a sign, it will be left in the token buffer.
+ if verb == 'U' {
+ if !s.consume("U", false) || !s.consume("+", false) {
+ s.errorString("bad unicode format ")
+ }
+ } else {
+ s.accept(sign) // If there's a sign, it will be left in the token buffer.
+ }
tok := s.scanNumber(digits)
i, err := strconv.Btoi64(tok, base)
if err != nil {
@@ -528,6 +542,11 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 {
}
base, digits := s.getBase(verb)
s.skipSpace(false)
+ if verb == 'U' {
+ if !s.consume("U", false) || !s.consume("+", false) {
+ s.errorString("bad unicode format ")
+ }
+ }
tok := s.scanNumber(digits)
i, err := strconv.Btoui64(tok, base)
if err != nil {
diff --git a/src/pkg/fmt/scan_test.go b/src/pkg/fmt/scan_test.go
index cf8a3a766f..7a0baae245 100644
--- a/src/pkg/fmt/scan_test.go
+++ b/src/pkg/fmt/scan_test.go
@@ -222,6 +222,8 @@ var scanfTests = []ScanfTest{
{"%o", "075\n", &uintVal, uint(075)},
{"%x", "a75\n", &uintVal, uint(0xa75)},
{"%x", "A75\n", &uintVal, uint(0xa75)},
+ {"%U", "U+1234\n", &intVal, int(0x1234)},
+ {"%U", "U+4567\n", &uintVal, uint(0x4567)},
// Strings
{"%s", "using-%s\n", &stringVal, "using-%s"},