aboutsummaryrefslogtreecommitdiff
path: root/src/archive
diff options
context:
space:
mode:
authorRuss Cox <rsc@golang.org>2017-11-15 11:47:31 -0500
committerRuss Cox <rsc@golang.org>2017-11-15 21:30:30 +0000
commit3a181dc7bc8fd0c61d6090a85f87c934f1874802 (patch)
tree716c5e505adec8bbcebf6ab2635cadf7ffb3c812 /src/archive
parent7de9e5e199fb9a39e5564ead221b3615d6f89341 (diff)
downloadgo-3a181dc7bc8fd0c61d6090a85f87c934f1874802.tar.gz
go-3a181dc7bc8fd0c61d6090a85f87c934f1874802.zip
archive/zip: fix handling of replacement rune in UTF8 check
The replacement rune is a valid rune and can appear as itself in valid UTF8 (it encodes as three bytes). To check for invalid UTF8 it is necessary to look for utf8.DecodeRune returning the replacement rune and size==1. Change-Id: I169be8d1fe61605c921ac13cc2fde94f80f3463c Reviewed-on: https://go-review.googlesource.com/78126 Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Diffstat (limited to 'src/archive')
-rw-r--r--src/archive/zip/writer.go6
-rw-r--r--src/archive/zip/writer_test.go26
2 files changed, 19 insertions, 13 deletions
diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go
index 7b33968618..ad8457c95a 100644
--- a/src/archive/zip/writer.go
+++ b/src/archive/zip/writer.go
@@ -219,7 +219,9 @@ func (w *Writer) Create(name string) (io.Writer, error) {
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
// or any other common encoding).
func detectUTF8(s string) (valid, require bool) {
- for _, r := range s {
+ for i := 0; i < len(s); {
+ r, size := utf8.DecodeRuneInString(s[i:])
+ i += size
// Officially, ZIP uses CP-437, but many readers use the system's
// local character encoding. Most encoding are compatible with a large
// subset of CP-437, which itself is ASCII-like.
@@ -227,7 +229,7 @@ func detectUTF8(s string) (valid, require bool) {
// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
// characters with localized currency and overline characters.
if r < 0x20 || r > 0x7d || r == 0x5c {
- if !utf8.ValidRune(r) || r == utf8.RuneError {
+ if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
return false, false
}
require = true
diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go
index ee5c866310..f217a42e74 100644
--- a/src/archive/zip/writer_test.go
+++ b/src/archive/zip/writer_test.go
@@ -136,40 +136,45 @@ func TestWriterUTF8(t *testing.T) {
var utf8Tests = []struct {
name string
comment string
- expect uint16
nonUTF8 bool
+ flags uint16
}{
{
name: "hi, hello",
comment: "in the world",
- expect: 0x8,
+ flags: 0x8,
},
{
name: "hi, こんにちわ",
comment: "in the world",
- expect: 0x808,
+ flags: 0x808,
},
{
name: "hi, こんにちわ",
comment: "in the world",
nonUTF8: true,
- expect: 0x8,
+ flags: 0x8,
},
{
name: "hi, hello",
comment: "in the 世界",
- expect: 0x808,
+ flags: 0x808,
},
{
name: "hi, こんにちわ",
comment: "in the 世界",
- expect: 0x808,
+ flags: 0x808,
+ },
+ {
+ name: "the replacement rune is �",
+ comment: "the replacement rune is �",
+ flags: 0x808,
},
{
// Name is Japanese encoded in Shift JIS.
name: "\x93\xfa\x96{\x8c\xea.txt",
comment: "in the 世界",
- expect: 0x008, // UTF-8 must not be set
+ flags: 0x008, // UTF-8 must not be set
},
}
@@ -201,10 +206,9 @@ func TestWriterUTF8(t *testing.T) {
t.Fatal(err)
}
for i, test := range utf8Tests {
- got := r.File[i].Flags
- t.Logf("name %v, comment %v", test.name, test.comment)
- if got != test.expect {
- t.Fatalf("Flags: got %v, want %v", got, test.expect)
+ flags := r.File[i].Flags
+ if flags != test.flags {
+ t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags)
}
}
}