aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Gerrand <adg@golang.org>2011-07-29 10:47:00 -0700
committerAndrew Gerrand <adg@golang.org>2011-07-29 10:47:00 -0700
commite0b6f4721fac04fbaaa669e3a52818ef6c2d5df1 (patch)
tree7616483c88c3cba3aa502311c4661c3eb71d7203
parent60dac9b3db4aa1771ba1d843fa94928b4ff515b8 (diff)
downloadgo-e0b6f4721fac04fbaaa669e3a52818ef6c2d5df1.tar.gz
go-e0b6f4721fac04fbaaa669e3a52818ef6c2d5df1.zip
archive/zip: more efficient reader and bug fix
Fixes #2090. R=golang-dev, bradfitz CC=golang-dev https://golang.org/cl/4815068
-rw-r--r--src/pkg/archive/zip/reader.go228
-rw-r--r--src/pkg/archive/zip/reader_test.go6
-rw-r--r--src/pkg/archive/zip/struct.go3
3 files changed, 121 insertions, 116 deletions
diff --git a/src/pkg/archive/zip/reader.go b/src/pkg/archive/zip/reader.go
index 98d4fb9943..f92f9297ad 100644
--- a/src/pkg/archive/zip/reader.go
+++ b/src/pkg/archive/zip/reader.go
@@ -6,7 +6,6 @@ package zip
import (
"bufio"
- "bytes"
"compress/flate"
"hash"
"hash/crc32"
@@ -37,8 +36,7 @@ type File struct {
FileHeader
zipr io.ReaderAt
zipsize int64
- headerOffset uint32
- bodyOffset int64
+ headerOffset int64
}
func (f *File) hasDataDescriptor() bool {
@@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
// The count of files inside a zip is truncated to fit in a uint16.
// Gloss over this by reading headers until we encounter
- // a bad one, and then only report a FormatError if
+ // a bad one, and then only report a FormatError or UnexpectedEOF if
// the file count modulo 65536 is incorrect.
for {
f := &File{zipr: r, zipsize: size}
- err := readDirectoryHeader(f, buf)
- if err == FormatError {
+ err = readDirectoryHeader(f, buf)
+ if err == FormatError || err == io.ErrUnexpectedEOF {
break
}
if err != nil {
@@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error {
z.File = append(z.File, f)
}
if uint16(len(z.File)) != end.directoryRecords {
- return FormatError
+ // Return the readDirectoryHeader error if we read
+ // the wrong number of directory entries.
+ return err
}
-
return nil
}
@@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error {
}
// Open returns a ReadCloser that provides access to the File's contents.
+// It is safe to Open and Read from files concurrently.
func (f *File) Open() (rc io.ReadCloser, err os.Error) {
- off := int64(f.headerOffset)
- size := int64(f.CompressedSize)
- if f.bodyOffset == 0 {
- r := io.NewSectionReader(f.zipr, off, f.zipsize-off)
- if err = readFileHeader(f, r); err != nil {
- return
- }
- if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil {
- return
- }
- if size == 0 {
- size = int64(f.CompressedSize)
- }
+ bodyOffset, err := f.findBodyOffset()
+ if err != nil {
+ return
}
- if f.hasDataDescriptor() && size == 0 {
+ size := int64(f.CompressedSize)
+ if size == 0 && f.hasDataDescriptor() {
// permit SectionReader to see the rest of the file
- size = f.zipsize - (off + f.bodyOffset)
+ size = f.zipsize - (f.headerOffset + bodyOffset)
}
- r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size)
+ r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
switch f.Method {
case Store: // (no compression)
rc = ioutil.NopCloser(r)
@@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) {
func (r *checksumReader) Close() os.Error { return r.rc.Close() }
-func readFileHeader(f *File, r io.Reader) (err os.Error) {
- defer recoverError(&err)
- var (
- signature uint32
- filenameLength uint16
- extraLength uint16
- )
- read(r, &signature)
- if signature != fileHeaderSignature {
+func readFileHeader(f *File, r io.Reader) os.Error {
+ var b [fileHeaderLen]byte
+ if _, err := io.ReadFull(r, b[:]); err != nil {
+ return err
+ }
+ c := binary.LittleEndian
+ if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
return FormatError
}
- read(r, &f.ReaderVersion)
- read(r, &f.Flags)
- read(r, &f.Method)
- read(r, &f.ModifiedTime)
- read(r, &f.ModifiedDate)
- read(r, &f.CRC32)
- read(r, &f.CompressedSize)
- read(r, &f.UncompressedSize)
- read(r, &filenameLength)
- read(r, &extraLength)
- f.Name = string(readByteSlice(r, filenameLength))
- f.Extra = readByteSlice(r, extraLength)
- return
+ f.ReaderVersion = c.Uint16(b[4:6])
+ f.Flags = c.Uint16(b[6:8])
+ f.Method = c.Uint16(b[8:10])
+ f.ModifiedTime = c.Uint16(b[10:12])
+ f.ModifiedDate = c.Uint16(b[12:14])
+ f.CRC32 = c.Uint32(b[14:18])
+ f.CompressedSize = c.Uint32(b[18:22])
+ f.UncompressedSize = c.Uint32(b[22:26])
+ filenameLen := int(c.Uint16(b[26:28]))
+ extraLen := int(c.Uint16(b[28:30]))
+ d := make([]byte, filenameLen+extraLen)
+ if _, err := io.ReadFull(r, d); err != nil {
+ return err
+ }
+ f.Name = string(d[:filenameLen])
+ f.Extra = d[filenameLen:]
+ return nil
}
-func readDirectoryHeader(f *File, r io.Reader) (err os.Error) {
- defer recoverError(&err)
- var (
- signature uint32
- filenameLength uint16
- extraLength uint16
- commentLength uint16
- startDiskNumber uint16 // unused
- internalAttributes uint16 // unused
- externalAttributes uint32 // unused
- )
- read(r, &signature)
- if signature != directoryHeaderSignature {
+// findBodyOffset does the minimum work to verify the file has a header
+// and returns the file body offset.
+func (f *File) findBodyOffset() (int64, os.Error) {
+ r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset)
+ var b [fileHeaderLen]byte
+ if _, err := io.ReadFull(r, b[:]); err != nil {
+ return 0, err
+ }
+ c := binary.LittleEndian
+ if sig := c.Uint32(b[:4]); sig != fileHeaderSignature {
+ return 0, FormatError
+ }
+ filenameLen := int(c.Uint16(b[26:28]))
+ extraLen := int(c.Uint16(b[28:30]))
+ return int64(fileHeaderLen + filenameLen + extraLen), nil
+}
+
+// readDirectoryHeader attempts to read a directory header from r.
+// It returns io.ErrUnexpectedEOF if it cannot read a complete header,
+// and FormatError if it doesn't find a valid header signature.
+func readDirectoryHeader(f *File, r io.Reader) os.Error {
+ var b [directoryHeaderLen]byte
+ if _, err := io.ReadFull(r, b[:]); err != nil {
+ return err
+ }
+ c := binary.LittleEndian
+ if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature {
return FormatError
}
- read(r, &f.CreatorVersion)
- read(r, &f.ReaderVersion)
- read(r, &f.Flags)
- read(r, &f.Method)
- read(r, &f.ModifiedTime)
- read(r, &f.ModifiedDate)
- read(r, &f.CRC32)
- read(r, &f.CompressedSize)
- read(r, &f.UncompressedSize)
- read(r, &filenameLength)
- read(r, &extraLength)
- read(r, &commentLength)
- read(r, &startDiskNumber)
- read(r, &internalAttributes)
- read(r, &externalAttributes)
- read(r, &f.headerOffset)
- f.Name = string(readByteSlice(r, filenameLength))
- f.Extra = readByteSlice(r, extraLength)
- f.Comment = string(readByteSlice(r, commentLength))
- return
+ f.CreatorVersion = c.Uint16(b[4:6])
+ f.ReaderVersion = c.Uint16(b[6:8])
+ f.Flags = c.Uint16(b[8:10])
+ f.Method = c.Uint16(b[10:12])
+ f.ModifiedTime = c.Uint16(b[12:14])
+ f.ModifiedDate = c.Uint16(b[14:16])
+ f.CRC32 = c.Uint32(b[16:20])
+ f.CompressedSize = c.Uint32(b[20:24])
+ f.UncompressedSize = c.Uint32(b[24:28])
+ filenameLen := int(c.Uint16(b[28:30]))
+ extraLen := int(c.Uint16(b[30:32]))
+ commentLen := int(c.Uint16(b[32:34]))
+ // startDiskNumber := c.Uint16(b[34:36]) // Unused
+ // internalAttributes := c.Uint16(b[36:38]) // Unused
+ // externalAttributes := c.Uint32(b[38:42]) // Unused
+ f.headerOffset = int64(c.Uint32(b[42:46]))
+ d := make([]byte, filenameLen+extraLen+commentLen)
+ if _, err := io.ReadFull(r, d); err != nil {
+ return err
+ }
+ f.Name = string(d[:filenameLen])
+ f.Extra = d[filenameLen : filenameLen+extraLen]
+ f.Comment = string(d[filenameLen+extraLen:])
+ return nil
}
-func readDataDescriptor(r io.Reader, f *File) (err os.Error) {
- defer recoverError(&err)
- read(r, &f.CRC32)
- read(r, &f.CompressedSize)
- read(r, &f.UncompressedSize)
- return
+func readDataDescriptor(r io.Reader, f *File) os.Error {
+ var b [dataDescriptorLen]byte
+ if _, err := io.ReadFull(r, b[:]); err != nil {
+ return err
+ }
+ c := binary.LittleEndian
+ f.CRC32 = c.Uint32(b[:4])
+ f.CompressedSize = c.Uint32(b[4:8])
+ f.UncompressedSize = c.Uint32(b[8:12])
+ return nil
}
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) {
@@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro
}
// read header into struct
- defer recoverError(&err)
- br := bytes.NewBuffer(b[4:]) // skip over signature
+ c := binary.LittleEndian
d := new(directoryEnd)
- read(br, &d.diskNbr)
- read(br, &d.dirDiskNbr)
- read(br, &d.dirRecordsThisDisk)
- read(br, &d.directoryRecords)
- read(br, &d.directorySize)
- read(br, &d.directoryOffset)
- read(br, &d.commentLen)
- d.comment = string(readByteSlice(br, d.commentLen))
+ d.diskNbr = c.Uint16(b[4:6])
+ d.dirDiskNbr = c.Uint16(b[6:8])
+ d.dirRecordsThisDisk = c.Uint16(b[8:10])
+ d.directoryRecords = c.Uint16(b[10:12])
+ d.directorySize = c.Uint32(b[12:16])
+ d.directoryOffset = c.Uint32(b[16:20])
+ d.commentLen = c.Uint16(b[20:22])
+ d.comment = string(b[22 : 22+int(d.commentLen)])
return d, nil
}
func findSignatureInBlock(b []byte) int {
- const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header
- for i := len(b) - minSize; i >= 0; i-- {
+ for i := len(b) - directoryEndLen; i >= 0; i-- {
// defined from directoryEndSignature in struct.go
if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
// n is length of comment
- n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8
- if n+minSize+i == len(b) {
+ n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
+ if n+directoryEndLen+i == len(b) {
return i
}
}
}
return -1
}
-
-func read(r io.Reader, data interface{}) {
- if err := binary.Read(r, binary.LittleEndian, data); err != nil {
- panic(err)
- }
-}
-
-func readByteSlice(r io.Reader, l uint16) []byte {
- b := make([]byte, l)
- if l == 0 {
- return b
- }
- if _, err := io.ReadFull(r, b); err != nil {
- panic(err)
- }
- return b
-}
diff --git a/src/pkg/archive/zip/reader_test.go b/src/pkg/archive/zip/reader_test.go
index 14603ce672..fd5fed2af0 100644
--- a/src/pkg/archive/zip/reader_test.go
+++ b/src/pkg/archive/zip/reader_test.go
@@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want)
}
+ size0 := f.UncompressedSize
+
var b bytes.Buffer
r, err := f.Open()
if err != nil {
@@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) {
return
}
+ if size1 := f.UncompressedSize; size0 != size1 {
+ t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1)
+ }
+
_, err = io.Copy(&b, r)
if err != nil {
t.Error(err)
diff --git a/src/pkg/archive/zip/struct.go b/src/pkg/archive/zip/struct.go
index 8bcd6a5814..1d6e70f105 100644
--- a/src/pkg/archive/zip/struct.go
+++ b/src/pkg/archive/zip/struct.go
@@ -24,6 +24,9 @@ const (
fileHeaderSignature = 0x04034b50
directoryHeaderSignature = 0x02014b50
directoryEndSignature = 0x06054b50
+ fileHeaderLen = 30 // + filename + extra
+ directoryHeaderLen = 46 // + filename + extra + comment
+ directoryEndLen = 22 // + comment
dataDescriptorLen = 12
)