aboutsummaryrefslogtreecommitdiff
path: root/src/archive/zip/reader_test.go
diff options
context:
space:
mode:
authorJoe Tsai <joetsai@digital-static.net>2017-11-02 13:53:16 -0700
committerJoe Tsai <thebrokentoaster@gmail.com>2017-11-06 21:35:59 +0000
commit4fcc835971ad63cf913ebe074ef6191e35a44ab9 (patch)
tree21b132a256d3f8652416b9eb9d4a4fe5a25473d5 /src/archive/zip/reader_test.go
parent0c554957483aaf1f62c6251dbe62ab5fce3e219c (diff)
downloadgo-4fcc835971ad63cf913ebe074ef6191e35a44ab9.tar.gz
go-4fcc835971ad63cf913ebe074ef6191e35a44ab9.zip
archive/zip: add FileHeader.NonUTF8 field
The NonUTF8 field provides users with a way to explictly tell the ZIP writer to avoid setting the UTF-8 flag. This is necessary because many readers: 1) (Still) do not support UTF-8 2) And use the local system encoding instead Thus, even though character encodings other than CP-437 and UTF-8 are not officially supported by the ZIP specification, pragmatically the world has permitted use of them. When a non-standard encoding is used, it is the user's responsibility to ensure that the target system is expecting the encoding used (e.g., producing a ZIP file you know is used on a Chinese version of Windows). We adjust the detectUTF8 function to account for Shift-JIS and EUC-KR not being identical to ASCII for two characters. We don't need an API for users to explicitly specify that they are encoding with UTF-8 since all single byte characters are compatible with all other common encodings (Windows-1256, Windows-1252, Windows-1251, Windows-1250, IEC-8859, EUC-KR, KOI8-R, Latin-1, Shift-JIS, GB-2312, GBK) except for the non-printable characters and the backslash character (all of which are invalid characters in a path name anyways). Fixes #10741 Change-Id: I9004542d1d522c9137973f1b6e2b623fa54dfd66 Reviewed-on: https://go-review.googlesource.com/75592 Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com> Reviewed-by: Ian Lance Taylor <iant@golang.org>
Diffstat (limited to 'src/archive/zip/reader_test.go')
-rw-r--r--src/archive/zip/reader_test.go65
1 files changed, 64 insertions, 1 deletions
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index d2d051b223..5fa2c80afa 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -29,7 +29,8 @@ type ZipTest struct {
type ZipTestFile struct {
Name string
Mode os.FileMode
- ModTime time.Time // optional, modified time in format "mm-dd-yy hh:mm:ss"
+ NonUTF8 bool
+ ModTime time.Time
// Information describing expected zip file content.
// First, reading the entire content should produce the error ContentErr.
@@ -320,6 +321,68 @@ var tests = []ZipTest{
},
},
{
+ Name: "utf8-7zip.zip",
+ File: []ZipTestFile{
+ {
+ Name: "世界",
+ Content: []byte{},
+ Mode: 0666,
+ ModTime: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)),
+ },
+ },
+ },
+ {
+ Name: "utf8-infozip.zip",
+ File: []ZipTestFile{
+ {
+ Name: "世界",
+ Content: []byte{},
+ Mode: 0644,
+ // Name is valid UTF-8, but format does not have UTF-8 flag set.
+ // We don't do UTF-8 detection for multi-byte runes due to
+ // false-positives with other encodings (e.g., Shift-JIS).
+ // Format says encoding is not UTF-8, so we trust it.
+ NonUTF8: true,
+ ModTime: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)),
+ },
+ },
+ },
+ {
+ Name: "utf8-osx.zip",
+ File: []ZipTestFile{
+ {
+ Name: "世界",
+ Content: []byte{},
+ Mode: 0644,
+ // Name is valid UTF-8, but format does not have UTF-8 set.
+ NonUTF8: true,
+ ModTime: time.Date(2017, 11, 6, 13, 9, 27, 0, timeZone(-8*time.Hour)),
+ },
+ },
+ },
+ {
+ Name: "utf8-winrar.zip",
+ File: []ZipTestFile{
+ {
+ Name: "世界",
+ Content: []byte{},
+ Mode: 0666,
+ ModTime: time.Date(2017, 11, 6, 13, 9, 27, 867862500, timeZone(-8*time.Hour)),
+ },
+ },
+ },
+ {
+ Name: "utf8-winzip.zip",
+ File: []ZipTestFile{
+ {
+ Name: "世界",
+ Content: []byte{},
+ Mode: 0666,
+ ModTime: time.Date(2017, 11, 6, 13, 9, 27, 867000000, timeZone(-8*time.Hour)),
+ },
+ },
+ },
+ {
Name: "time-7zip.zip",
File: []ZipTestFile{
{