aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Gerrand <adg@golang.org>2010-09-30 11:59:46 +1000
committerAndrew Gerrand <adg@golang.org>2010-09-30 11:59:46 +1000
commita00b98ec3f181c2e1940bdb312615024c17d75ef (patch)
treeaca6330c345797567022accb91e49e8eac7c5749
parentfdb9e68c4e0f79f46daf36cb2fb57841ef4b91f7 (diff)
downloadgo-a00b98ec3f181c2e1940bdb312615024c17d75ef.tar.gz
go-a00b98ec3f181c2e1940bdb312615024c17d75ef.zip
archive/zip: new package for reading ZIP files
R=rsc CC=golang-dev https://golang.org/cl/2125042
-rw-r--r--src/pkg/Makefile1
-rw-r--r--src/pkg/archive/zip/Makefile12
-rw-r--r--src/pkg/archive/zip/reader.go278
-rw-r--r--src/pkg/archive/zip/reader_test.go180
-rw-r--r--src/pkg/archive/zip/struct.go33
-rw-r--r--src/pkg/archive/zip/testdata/gophercolor16x16.pngbin0 -> 785 bytes
-rw-r--r--src/pkg/archive/zip/testdata/r.zipbin0 -> 440 bytes
-rw-r--r--src/pkg/archive/zip/testdata/readme.notzipbin0 -> 1905 bytes
-rw-r--r--src/pkg/archive/zip/testdata/readme.zipbin0 -> 1885 bytes
-rw-r--r--src/pkg/archive/zip/testdata/test.zipbin0 -> 1170 bytes
10 files changed, 504 insertions, 0 deletions
diff --git a/src/pkg/Makefile b/src/pkg/Makefile
index 33194918b8..d7351c5993 100644
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -15,6 +15,7 @@ all: install
DIRS=\
archive/tar\
+ archive/zip\
asn1\
big\
bufio\
diff --git a/src/pkg/archive/zip/Makefile b/src/pkg/archive/zip/Makefile
new file mode 100644
index 0000000000..32a543133c
--- /dev/null
+++ b/src/pkg/archive/zip/Makefile
@@ -0,0 +1,12 @@
+# Copyright 2010 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../../Make.inc
+
+TARG=archive/zip
+GOFILES=\
+ reader.go\
+ struct.go\
+
+include ../../../Make.pkg
diff --git a/src/pkg/archive/zip/reader.go b/src/pkg/archive/zip/reader.go
new file mode 100644
index 0000000000..579ba16029
--- /dev/null
+++ b/src/pkg/archive/zip/reader.go
@@ -0,0 +1,278 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+The zip package provides support for reading ZIP archives.
+
+See: http://www.pkware.com/documents/casestudies/APPNOTE.TXT
+
+This package does not support ZIP64 or disk spanning.
+*/
+package zip
+
+import (
+ "bufio"
+ "bytes"
+ "compress/flate"
+ "hash"
+ "hash/crc32"
+ "encoding/binary"
+ "io"
+ "os"
+)
+
+var (
+ FormatError = os.NewError("not a valid zip file")
+ UnsupportedMethod = os.NewError("unsupported compression algorithm")
+ ChecksumError = os.NewError("checksum error")
+)
+
+type Reader struct {
+ r io.ReaderAt
+ File []*File
+ Comment string
+}
+
+type File struct {
+ FileHeader
+ zipr io.ReaderAt
+ zipsize int64
+ headerOffset uint32
+ bodyOffset int64
+}
+
+// OpenReader will open the Zip file specified by name and return a Reader.
+func OpenReader(name string) (*Reader, os.Error) {
+ f, err := os.Open(name, os.O_RDONLY, 0644)
+ if err != nil {
+ return nil, err
+ }
+ fi, err := f.Stat()
+ if err != nil {
+ return nil, err
+ }
+ return NewReader(f, fi.Size)
+}
+
+// NewReader returns a new Reader reading from r, which is assumed to
+// have the given size in bytes.
+func NewReader(r io.ReaderAt, size int64) (*Reader, os.Error) {
+ end, err := readDirectoryEnd(r, size)
+ if err != nil {
+ return nil, err
+ }
+ z := &Reader{
+ r: r,
+ File: make([]*File, end.directoryRecords),
+ Comment: end.comment,
+ }
+ rs := io.NewSectionReader(r, 0, size)
+ if _, err = rs.Seek(int64(end.directoryOffset), 0); err != nil {
+ return nil, err
+ }
+ buf := bufio.NewReader(rs)
+ for i := range z.File {
+ z.File[i] = &File{zipr: r, zipsize: size}
+ if err := readDirectoryHeader(z.File[i], buf); err != nil {
+ return nil, err
+ }
+ }
+ return z, nil
+}
+
+// Open returns a ReadCloser that provides access to the File's contents.
+func (f *File) Open() (rc io.ReadCloser, err os.Error) {
+ off := int64(f.headerOffset)
+ if f.bodyOffset == 0 {
+ r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
+ if err = readFileHeader(f, r); err != nil {
+ return
+ }
+ if f.bodyOffset, err = r.Seek(0, 1); err != nil {
+ return
+ }
+ }
+ r := io.NewSectionReader(f.zipr, off+f.bodyOffset, int64(f.CompressedSize))
+ switch f.Method {
+ case 0: // store (no compression)
+ rc = nopCloser{r}
+ case 8: // DEFLATE
+ rc = flate.NewReader(r)
+ default:
+ err = UnsupportedMethod
+ }
+ if rc != nil {
+ rc = &checksumReader{rc, crc32.NewIEEE(), f.CRC32}
+ }
+ return
+}
+
+type checksumReader struct {
+ rc io.ReadCloser
+ hash hash.Hash32
+ sum uint32
+}
+
+func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
+ n, err = r.rc.Read(b)
+ r.hash.Write(b[:n])
+ if err != os.EOF {
+ return
+ }
+ if r.hash.Sum32() != r.sum {
+ err = ChecksumError
+ }
+ return
+}
+
+func (r *checksumReader) Close() os.Error { return r.rc.Close() }
+
+type nopCloser struct {
+ io.Reader
+}
+
+func (f nopCloser) Close() os.Error { return nil }
+
+func readFileHeader(f *File, r io.Reader) (err os.Error) {
+ defer func() {
+ if rerr, ok := recover().(os.Error); ok {
+ err = rerr
+ }
+ }()
+ var (
+ signature uint32
+ filenameLength uint16
+ extraLength uint16
+ )
+ read(r, &signature)
+ if signature != fileHeaderSignature {
+ return FormatError
+ }
+ read(r, &f.ReaderVersion)
+ read(r, &f.Flags)
+ read(r, &f.Method)
+ read(r, &f.ModifiedTime)
+ read(r, &f.ModifiedDate)
+ read(r, &f.CRC32)
+ read(r, &f.CompressedSize)
+ read(r, &f.UncompressedSize)
+ read(r, &filenameLength)
+ read(r, &extraLength)
+ f.Name = string(readByteSlice(r, filenameLength))
+ f.Extra = readByteSlice(r, extraLength)
+ return
+}
+
+func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
+ defer func() {
+ if rerr, ok := recover().(os.Error); ok {
+ err = rerr
+ }
+ }()
+ var (
+ signature uint32
+ filenameLength uint16
+ extraLength uint16
+ commentLength uint16
+ startDiskNumber uint16 // unused
+ internalAttributes uint16 // unused
+ externalAttributes uint32 // unused
+ )
+ read(r, &signature)
+ if signature != directoryHeaderSignature {
+ return FormatError
+ }
+ read(r, &f.CreatorVersion)
+ read(r, &f.ReaderVersion)
+ read(r, &f.Flags)
+ read(r, &f.Method)
+ read(r, &f.ModifiedTime)
+ read(r, &f.ModifiedDate)
+ read(r, &f.CRC32)
+ read(r, &f.CompressedSize)
+ read(r, &f.UncompressedSize)
+ read(r, &filenameLength)
+ read(r, &extraLength)
+ read(r, &commentLength)
+ read(r, &startDiskNumber)
+ read(r, &internalAttributes)
+ read(r, &externalAttributes)
+ read(r, &f.headerOffset)
+ f.Name = string(readByteSlice(r, filenameLength))
+ f.Extra = readByteSlice(r, extraLength)
+ f.Comment = string(readByteSlice(r, commentLength))
+ return
+}
+
+func readDirectoryEnd(r io.ReaderAt, size int64) (d *directoryEnd, err os.Error) {
+ // look for directoryEndSignature in the last 1k, then in the last 65k
+ var b []byte
+ for i, bLen := range []int64{1024, 65 * 1024} {
+ if bLen > size {
+ bLen = size
+ }
+ b = make([]byte, int(bLen))
+ if _, err := r.ReadAt(b, size-bLen); err != nil && err != os.EOF {
+ return nil, err
+ }
+ if p := findSignatureInBlock(b); p >= 0 {
+ b = b[p:]
+ break
+ }
+ if i == 1 || bLen == size {
+ return nil, FormatError
+ }
+ }
+
+ // read header into struct
+ defer func() {
+ if rerr, ok := recover().(os.Error); ok {
+ err = rerr
+ d = nil
+ }
+ }()
+ br := bytes.NewBuffer(b[4:]) // skip over signature
+ d = new(directoryEnd)
+ read(br, &d.diskNbr)
+ read(br, &d.dirDiskNbr)
+ read(br, &d.dirRecordsThisDisk)
+ read(br, &d.directoryRecords)
+ read(br, &d.directorySize)
+ read(br, &d.directoryOffset)
+ read(br, &d.commentLen)
+ d.comment = string(readByteSlice(br, d.commentLen))
+ return d, nil
+}
+
+func findSignatureInBlock(b []byte) int {
+ const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
+ for i := len(b) - minSize; i >= 0; i-- {
+ // defined from directoryEndSignature in struct.go
+ if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
+ // n is length of comment
+ n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
+ if n+minSize+i == len(b) {
+ return i
+ }
+ }
+ }
+ return -1
+}
+
+func read(r io.Reader, data interface{}) {
+ if err := binary.Read(r, binary.LittleEndian, data); err != nil {
+ panic(err)
+ }
+}
+
+func readByteSlice(r io.Reader, l uint16) []byte {
+ b := make([]byte, l)
+ if l == 0 {
+ return b
+ }
+ if _, err := io.ReadFull(r, b); err != nil {
+ panic(err)
+ }
+ return b
+}
diff --git a/src/pkg/archive/zip/reader_test.go b/src/pkg/archive/zip/reader_test.go
new file mode 100644
index 0000000000..36b925c4fc
--- /dev/null
+++ b/src/pkg/archive/zip/reader_test.go
@@ -0,0 +1,180 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package zip
+
+import (
+ "bytes"
+ "encoding/binary"
+ "io"
+ "io/ioutil"
+ "os"
+ "testing"
+)
+
+type ZipTest struct {
+ Name string
+ Comment string
+ File []ZipTestFile
+ Error os.Error // the error that Opening this file should return
+}
+
+type ZipTestFile struct {
+ Name string
+ Content []byte // if blank, will attempt to compare against File
+ File string // name of file to compare to (relative to testdata/)
+}
+
+var tests = []ZipTest{
+ ZipTest{
+ Name: "test.zip",
+ Comment: "This is a zipfile comment.",
+ File: []ZipTestFile{
+ ZipTestFile{
+ Name: "test.txt",
+ Content: []byte("This is a test text file.\n"),
+ },
+ ZipTestFile{
+ Name: "gophercolor16x16.png",
+ File: "gophercolor16x16.png",
+ },
+ },
+ },
+ ZipTest{
+ Name: "r.zip",
+ File: []ZipTestFile{
+ ZipTestFile{
+ Name: "r/r.zip",
+ File: "r.zip",
+ },
+ },
+ },
+ ZipTest{Name: "readme.zip"},
+ ZipTest{Name: "readme.notzip", Error: FormatError},
+}
+
+func TestReader(t *testing.T) {
+ for _, zt := range tests {
+ readTestZip(t, zt)
+ }
+}
+
+func readTestZip(t *testing.T, zt ZipTest) {
+ z, err := OpenReader("testdata/" + zt.Name)
+ if err != zt.Error {
+ t.Errorf("error=%v, want %v", err, zt.Error)
+ return
+ }
+
+ // bail here if no Files expected to be tested
+ // (there may actually be files in the zip, but we don't care)
+ if zt.File == nil {
+ return
+ }
+
+ if z.Comment != zt.Comment {
+ t.Errorf("%s: comment=%q, want %q", zt.Name, z.Comment, zt.Comment)
+ }
+ if len(z.File) != len(zt.File) {
+ t.Errorf("%s: file count=%d, want %d", zt.Name, len(z.File), len(zt.File))
+ }
+
+ // test read of each file
+ for i, ft := range zt.File {
+ readTestFile(t, ft, z.File[i])
+ }
+
+ // test simultaneous reads
+ n := 0
+ done := make(chan bool)
+ for i := 0; i < 5; i++ {
+ for j, ft := range zt.File {
+ go func() {
+ readTestFile(t, ft, z.File[j])
+ done <- true
+ }()
+ n++
+ }
+ }
+ for ; n > 0; n-- {
+ <-done
+ }
+
+ // test invalid checksum
+ z.File[0].CRC32++ // invalidate
+ r, err := z.File[0].Open()
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ var b bytes.Buffer
+ _, err = io.Copy(&b, r)
+ if err != ChecksumError {
+ t.Errorf("%s: copy error=%v, want %v", err, ChecksumError)
+ }
+}
+
+func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
+ if f.Name != ft.Name {
+ t.Errorf("name=%q, want %q", f.Name, ft.Name)
+ }
+ var b bytes.Buffer
+ r, err := f.Open()
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ _, err = io.Copy(&b, r)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ r.Close()
+ var c []byte
+ if len(ft.Content) != 0 {
+ c = ft.Content
+ } else if c, err = ioutil.ReadFile("testdata/" + ft.File); err != nil {
+ t.Error(err)
+ return
+ }
+ if b.Len() != len(c) {
+ t.Errorf("%s: len=%d, want %d", f.Name, b.Len(), len(c))
+ return
+ }
+ for i, b := range b.Bytes() {
+ if b != c[i] {
+ t.Errorf("%s: content[%d]=%q want %q", i, b, c[i])
+ return
+ }
+ }
+}
+
+func TestInvalidFiles(t *testing.T) {
+ const size = 1024 * 70 // 70kb
+ b := make([]byte, size)
+
+ // zeroes
+ _, err := NewReader(sliceReaderAt(b), size)
+ if err != FormatError {
+ t.Errorf("zeroes: error=%v, want %v", err, FormatError)
+ }
+
+ // repeated directoryEndSignatures
+ sig := make([]byte, 4)
+ binary.LittleEndian.PutUint32(sig, directoryEndSignature)
+ for i := 0; i < size-4; i += 4 {
+ copy(b[i:i+4], sig)
+ }
+ _, err = NewReader(sliceReaderAt(b), size)
+ if err != FormatError {
+ t.Errorf("sigs: error=%v, want %v", err, FormatError)
+ }
+}
+
+type sliceReaderAt []byte
+
+func (r sliceReaderAt) ReadAt(b []byte, off int64) (int, os.Error) {
+ copy(b, r[int(off):int(off)+len(b)])
+ return len(b), nil
+}
diff --git a/src/pkg/archive/zip/struct.go b/src/pkg/archive/zip/struct.go
new file mode 100644
index 0000000000..8a8c727d47
--- /dev/null
+++ b/src/pkg/archive/zip/struct.go
@@ -0,0 +1,33 @@
+package zip
+
+const (
+ fileHeaderSignature = 0x04034b50
+ directoryHeaderSignature = 0x02014b50
+ directoryEndSignature = 0x06054b50
+)
+
+type FileHeader struct {
+ Name string
+ CreatorVersion uint16
+ ReaderVersion uint16
+ Flags uint16
+ Method uint16
+ ModifiedTime uint16
+ ModifiedDate uint16
+ CRC32 uint32
+ CompressedSize uint32
+ UncompressedSize uint32
+ Extra []byte
+ Comment string
+}
+
+type directoryEnd struct {
+ diskNbr uint16 // unused
+ dirDiskNbr uint16 // unused
+ dirRecordsThisDisk uint16 // unused
+ directoryRecords uint16
+ directorySize uint32
+ directoryOffset uint32 // relative to file
+ commentLen uint16
+ comment string
+}
diff --git a/src/pkg/archive/zip/testdata/gophercolor16x16.png b/src/pkg/archive/zip/testdata/gophercolor16x16.png
new file mode 100644
index 0000000000..48854ff3b7
--- /dev/null
+++ b/src/pkg/archive/zip/testdata/gophercolor16x16.png
Binary files differ
diff --git a/src/pkg/archive/zip/testdata/r.zip b/src/pkg/archive/zip/testdata/r.zip
new file mode 100644
index 0000000000..ea0fa2ffcc
--- /dev/null
+++ b/src/pkg/archive/zip/testdata/r.zip
Binary files differ
diff --git a/src/pkg/archive/zip/testdata/readme.notzip b/src/pkg/archive/zip/testdata/readme.notzip
new file mode 100644
index 0000000000..06668c4c1c
--- /dev/null
+++ b/src/pkg/archive/zip/testdata/readme.notzip
Binary files differ
diff --git a/src/pkg/archive/zip/testdata/readme.zip b/src/pkg/archive/zip/testdata/readme.zip
new file mode 100644
index 0000000000..db3bb900e4
--- /dev/null
+++ b/src/pkg/archive/zip/testdata/readme.zip
Binary files differ
diff --git a/src/pkg/archive/zip/testdata/test.zip b/src/pkg/archive/zip/testdata/test.zip
new file mode 100644
index 0000000000..03890c05d4
--- /dev/null
+++ b/src/pkg/archive/zip/testdata/test.zip
Binary files differ