diff options
author | Andrew Gerrand <adg@golang.org> | 2011-07-29 10:47:00 -0700 |
---|---|---|
committer | Andrew Gerrand <adg@golang.org> | 2011-07-29 10:47:00 -0700 |
commit | e0b6f4721fac04fbaaa669e3a52818ef6c2d5df1 (patch) | |
tree | 7616483c88c3cba3aa502311c4661c3eb71d7203 | |
parent | 60dac9b3db4aa1771ba1d843fa94928b4ff515b8 (diff) | |
download | go-e0b6f4721fac04fbaaa669e3a52818ef6c2d5df1.tar.gz go-e0b6f4721fac04fbaaa669e3a52818ef6c2d5df1.zip |
archive/zip: more efficient reader and bug fix
Fixes #2090.
R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4815068
-rw-r--r-- | src/pkg/archive/zip/reader.go | 228 | ||||
-rw-r--r-- | src/pkg/archive/zip/reader_test.go | 6 | ||||
-rw-r--r-- | src/pkg/archive/zip/struct.go | 3 |
3 files changed, 121 insertions, 116 deletions
diff --git a/src/pkg/archive/zip/reader.go b/src/pkg/archive/zip/reader.go index 98d4fb9943..f92f9297ad 100644 --- a/src/pkg/archive/zip/reader.go +++ b/src/pkg/archive/zip/reader.go @@ -6,7 +6,6 @@ package zip import ( "bufio" - "bytes" "compress/flate" "hash" "hash/crc32" @@ -37,8 +36,7 @@ type File struct { FileHeader zipr io.ReaderAt zipsize int64 - headerOffset uint32 - bodyOffset int64 + headerOffset int64 } func (f *File) hasDataDescriptor() bool { @@ -90,12 +88,12 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error { // The count of files inside a zip is truncated to fit in a uint16. // Gloss over this by reading headers until we encounter - // a bad one, and then only report a FormatError if + // a bad one, and then only report a FormatError or UnexpectedEOF if // the file count modulo 65536 is incorrect. for { f := &File{zipr: r, zipsize: size} - err := readDirectoryHeader(f, buf) - if err == FormatError { + err = readDirectoryHeader(f, buf) + if err == FormatError || err == io.ErrUnexpectedEOF { break } if err != nil { @@ -104,9 +102,10 @@ func (z *Reader) init(r io.ReaderAt, size int64) os.Error { z.File = append(z.File, f) } if uint16(len(z.File)) != end.directoryRecords { - return FormatError + // Return the readDirectoryHeader error if we read + // the wrong number of directory entries. + return err } - return nil } @@ -116,26 +115,18 @@ func (rc *ReadCloser) Close() os.Error { } // Open returns a ReadCloser that provides access to the File's contents. +// It is safe to Open and Read from files concurrently. func (f *File) Open() (rc io.ReadCloser, err os.Error) { - off := int64(f.headerOffset) - size := int64(f.CompressedSize) - if f.bodyOffset == 0 { - r := io.NewSectionReader(f.zipr, off, f.zipsize-off) - if err = readFileHeader(f, r); err != nil { - return - } - if f.bodyOffset, err = r.Seek(0, os.SEEK_CUR); err != nil { - return - } - if size == 0 { - size = int64(f.CompressedSize) - } + bodyOffset, err := f.findBodyOffset() + if err != nil { + return } - if f.hasDataDescriptor() && size == 0 { + size := int64(f.CompressedSize) + if size == 0 && f.hasDataDescriptor() { // permit SectionReader to see the rest of the file - size = f.zipsize - (off + f.bodyOffset) + size = f.zipsize - (f.headerOffset + bodyOffset) } - r := io.NewSectionReader(f.zipr, off+f.bodyOffset, size) + r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size) switch f.Method { case Store: // (no compression) rc = ioutil.NopCloser(r) @@ -176,75 +167,99 @@ func (r *checksumReader) Read(b []byte) (n int, err os.Error) { func (r *checksumReader) Close() os.Error { return r.rc.Close() } -func readFileHeader(f *File, r io.Reader) (err os.Error) { - defer recoverError(&err) - var ( - signature uint32 - filenameLength uint16 - extraLength uint16 - ) - read(r, &signature) - if signature != fileHeaderSignature { +func readFileHeader(f *File, r io.Reader) os.Error { + var b [fileHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != fileHeaderSignature { return FormatError } - read(r, &f.ReaderVersion) - read(r, &f.Flags) - read(r, &f.Method) - read(r, &f.ModifiedTime) - read(r, &f.ModifiedDate) - read(r, &f.CRC32) - read(r, &f.CompressedSize) - read(r, &f.UncompressedSize) - read(r, &filenameLength) - read(r, &extraLength) - f.Name = string(readByteSlice(r, filenameLength)) - f.Extra = readByteSlice(r, extraLength) - return + f.ReaderVersion = c.Uint16(b[4:6]) + f.Flags = c.Uint16(b[6:8]) + f.Method = c.Uint16(b[8:10]) + f.ModifiedTime = c.Uint16(b[10:12]) + f.ModifiedDate = c.Uint16(b[12:14]) + f.CRC32 = c.Uint32(b[14:18]) + f.CompressedSize = c.Uint32(b[18:22]) + f.UncompressedSize = c.Uint32(b[22:26]) + filenameLen := int(c.Uint16(b[26:28])) + extraLen := int(c.Uint16(b[28:30])) + d := make([]byte, filenameLen+extraLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen:] + return nil } -func readDirectoryHeader(f *File, r io.Reader) (err os.Error) { - defer recoverError(&err) - var ( - signature uint32 - filenameLength uint16 - extraLength uint16 - commentLength uint16 - startDiskNumber uint16 // unused - internalAttributes uint16 // unused - externalAttributes uint32 // unused - ) - read(r, &signature) - if signature != directoryHeaderSignature { +// findBodyOffset does the minimum work to verify the file has a header +// and returns the file body offset. +func (f *File) findBodyOffset() (int64, os.Error) { + r := io.NewSectionReader(f.zipr, f.headerOffset, f.zipsize-f.headerOffset) + var b [fileHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return 0, err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != fileHeaderSignature { + return 0, FormatError + } + filenameLen := int(c.Uint16(b[26:28])) + extraLen := int(c.Uint16(b[28:30])) + return int64(fileHeaderLen + filenameLen + extraLen), nil +} + +// readDirectoryHeader attempts to read a directory header from r. +// It returns io.ErrUnexpectedEOF if it cannot read a complete header, +// and FormatError if it doesn't find a valid header signature. +func readDirectoryHeader(f *File, r io.Reader) os.Error { + var b [directoryHeaderLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + if sig := c.Uint32(b[:4]); sig != directoryHeaderSignature { return FormatError } - read(r, &f.CreatorVersion) - read(r, &f.ReaderVersion) - read(r, &f.Flags) - read(r, &f.Method) - read(r, &f.ModifiedTime) - read(r, &f.ModifiedDate) - read(r, &f.CRC32) - read(r, &f.CompressedSize) - read(r, &f.UncompressedSize) - read(r, &filenameLength) - read(r, &extraLength) - read(r, &commentLength) - read(r, &startDiskNumber) - read(r, &internalAttributes) - read(r, &externalAttributes) - read(r, &f.headerOffset) - f.Name = string(readByteSlice(r, filenameLength)) - f.Extra = readByteSlice(r, extraLength) - f.Comment = string(readByteSlice(r, commentLength)) - return + f.CreatorVersion = c.Uint16(b[4:6]) + f.ReaderVersion = c.Uint16(b[6:8]) + f.Flags = c.Uint16(b[8:10]) + f.Method = c.Uint16(b[10:12]) + f.ModifiedTime = c.Uint16(b[12:14]) + f.ModifiedDate = c.Uint16(b[14:16]) + f.CRC32 = c.Uint32(b[16:20]) + f.CompressedSize = c.Uint32(b[20:24]) + f.UncompressedSize = c.Uint32(b[24:28]) + filenameLen := int(c.Uint16(b[28:30])) + extraLen := int(c.Uint16(b[30:32])) + commentLen := int(c.Uint16(b[32:34])) + // startDiskNumber := c.Uint16(b[34:36]) // Unused + // internalAttributes := c.Uint16(b[36:38]) // Unused + // externalAttributes := c.Uint32(b[38:42]) // Unused + f.headerOffset = int64(c.Uint32(b[42:46])) + d := make([]byte, filenameLen+extraLen+commentLen) + if _, err := io.ReadFull(r, d); err != nil { + return err + } + f.Name = string(d[:filenameLen]) + f.Extra = d[filenameLen : filenameLen+extraLen] + f.Comment = string(d[filenameLen+extraLen:]) + return nil } -func readDataDescriptor(r io.Reader, f *File) (err os.Error) { - defer recoverError(&err) - read(r, &f.CRC32) - read(r, &f.CompressedSize) - read(r, &f.UncompressedSize) - return +func readDataDescriptor(r io.Reader, f *File) os.Error { + var b [dataDescriptorLen]byte + if _, err := io.ReadFull(r, b[:]); err != nil { + return err + } + c := binary.LittleEndian + f.CRC32 = c.Uint32(b[:4]) + f.CompressedSize = c.Uint32(b[4:8]) + f.UncompressedSize = c.Uint32(b[8:12]) + return nil } func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Error) { @@ -268,48 +283,29 @@ func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err os.Erro } // read header into struct - defer recoverError(&err) - br := bytes.NewBuffer(b[4:]) // skip over signature + c := binary.LittleEndian d := new(directoryEnd) - read(br, &d.diskNbr) - read(br, &d.dirDiskNbr) - read(br, &d.dirRecordsThisDisk) - read(br, &d.directoryRecords) - read(br, &d.directorySize) - read(br, &d.directoryOffset) - read(br, &d.commentLen) - d.comment = string(readByteSlice(br, d.commentLen)) + d.diskNbr = c.Uint16(b[4:6]) + d.dirDiskNbr = c.Uint16(b[6:8]) + d.dirRecordsThisDisk = c.Uint16(b[8:10]) + d.directoryRecords = c.Uint16(b[10:12]) + d.directorySize = c.Uint32(b[12:16]) + d.directoryOffset = c.Uint32(b[16:20]) + d.commentLen = c.Uint16(b[20:22]) + d.comment = string(b[22 : 22+int(d.commentLen)]) return d, nil } func findSignatureInBlock(b []byte) int { - const minSize = 4 + 2 + 2 + 2 + 2 + 4 + 4 + 2 // fixed part of header - for i := len(b) - minSize; i >= 0; i-- { + for i := len(b) - directoryEndLen; i >= 0; i-- { // defined from directoryEndSignature in struct.go if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { // n is length of comment - n := int(b[i+minSize-2]) | int(b[i+minSize-1])<<8 - if n+minSize+i == len(b) { + n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 + if n+directoryEndLen+i == len(b) { return i } } } return -1 } - -func read(r io.Reader, data interface{}) { - if err := binary.Read(r, binary.LittleEndian, data); err != nil { - panic(err) - } -} - -func readByteSlice(r io.Reader, l uint16) []byte { - b := make([]byte, l) - if l == 0 { - return b - } - if _, err := io.ReadFull(r, b); err != nil { - panic(err) - } - return b -} diff --git a/src/pkg/archive/zip/reader_test.go b/src/pkg/archive/zip/reader_test.go index 14603ce672..fd5fed2af0 100644 --- a/src/pkg/archive/zip/reader_test.go +++ b/src/pkg/archive/zip/reader_test.go @@ -162,6 +162,8 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { t.Errorf("%s: mtime=%s (%d); want %s (%d)", f.Name, time.SecondsToUTC(got), got, mtime, want) } + size0 := f.UncompressedSize + var b bytes.Buffer r, err := f.Open() if err != nil { @@ -169,6 +171,10 @@ func readTestFile(t *testing.T, ft ZipTestFile, f *File) { return } + if size1 := f.UncompressedSize; size0 != size1 { + t.Errorf("file %q changed f.UncompressedSize from %d to %d", f.Name, size0, size1) + } + _, err = io.Copy(&b, r) if err != nil { t.Error(err) diff --git a/src/pkg/archive/zip/struct.go b/src/pkg/archive/zip/struct.go index 8bcd6a5814..1d6e70f105 100644 --- a/src/pkg/archive/zip/struct.go +++ b/src/pkg/archive/zip/struct.go @@ -24,6 +24,9 @@ const ( fileHeaderSignature = 0x04034b50 directoryHeaderSignature = 0x02014b50 directoryEndSignature = 0x06054b50 + fileHeaderLen = 30 // + filename + extra + directoryHeaderLen = 46 // + filename + extra + comment + directoryEndLen = 22 // + comment dataDescriptorLen = 12 ) |