image/gif: make blockReader a ByteReader, harden tests

golang.org/cl/37258 was committed to fix issue #16146. This patch seemed intent to allow at most one dangling byte. But, as implemented, many more bytes may actually slip through. This is because the LZW layer creates a bufio.Reader which will itself consume data beyond the end of the LZW stream, and this isn't accounted for anywhere. This change means to avoid the allocation of the bufio.Reader by making blockReader implement io.ByteReader. Further, it adds a close() method which detects extra data in the block sequence. To avoid any regressions with poorly encoded GIFs which may have worked accidentally, there are no restrictions on how many extra bytes may exist in the final full sub-block that contained LZW data. If the end of the LZW stream happened to align with the end of a sub-block, at most one more sub-block with a length of 1 byte may exist before the block terminator. This change aims to be at least as performant as the prior implementation. But the primary gain is avoiding the allocation of a bufio.Reader per frame: name old time/op new time/op delta Decode-8 276µs ± 0% 275µs ± 2% ~ (p=0.690 n=5+5) name old speed new speed delta Decode-8 55.9MB/s ± 0% 56.3MB/s ± 2% ~ (p=0.690 n=5+5) name old alloc/op new alloc/op delta Decode-8 49.2kB ± 0% 44.8kB ± 0% -9.10% (p=0.008 n=5+5) name old allocs/op new allocs/op delta Decode-8 269 ± 0% 267 ± 0% -0.74% (p=0.008 n=5+5) Change-Id: Iec4f9b895561ad52266313fbc73ec82c070c3349 Reviewed-on: https://go-review.googlesource.com/68350 Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Nigel Tao <nigeltao@golang.org>
author: Jed Denlea <jed@fastly.com> 2017-10-04 17:32:44 -0700
committer: Nigel Tao <nigeltao@golang.org> 2017-10-19 04:03:39 +0000
commit: 58d7231b2f503f70955fb050f02ea8a0277db0fa (patch)
tree: 9da8b3881f9699cd9858afc41e15973aa24a2bbc /src/image
parent: 717d37591d9d446007b830c70ff74d8f8dca13d9 (diff)
download: go-58d7231b2f503f70955fb050f02ea8a0277db0fa.tar.gz
go-58d7231b2f503f70955fb050f02ea8a0277db0fa.zip
2 files changed, 104 insertions, 39 deletions
diff --git a/src/image/gif/reader.go b/src/image/gif/reader.go
index 19f3c61e5d..89ef3c7fc3 100644
--- a/src/image/gif/reader.go
+++ b/src/image/gif/reader.go
@@ -109,46 +109,112 @@ type decoder struct {
 	tmp      [1024]byte // must be at least 768 so we can read color table
 }
 
-// blockReader parses the block structure of GIF image data, which
-// comprises (n, (n bytes)) blocks, with 1 <= n <= 255.  It is the
-// reader given to the LZW decoder, which is thus immune to the
-// blocking. After the LZW decoder completes, there will be a 0-byte
-// block remaining (0, ()), which is consumed when checking that the
-// blockReader is exhausted.
+// blockReader parses the block structure of GIF image data, which comprises
+// (n, (n bytes)) blocks, with 1 <= n <= 255. It is the reader given to the
+// LZW decoder, which is thus immune to the blocking. After the LZW decoder
+// completes, there will be a 0-byte block remaining (0, ()), which is
+// consumed when checking that the blockReader is exhausted.
+//
+// To avoid the allocation of a bufio.Reader for the lzw Reader, blockReader
+// implements io.ReadByte and buffers blocks into the decoder's "tmp" buffer.
 type blockReader struct {
-	r     reader
-	slice []byte
-	err   error
-	tmp   [256]byte
+	d    *decoder
+	i, j uint8 // d.tmp[i:j] contains the buffered bytes
+	err  error
 }
 
-func (b *blockReader) Read(p []byte) (int, error) {
+func (b *blockReader) fill() {
 	if b.err != nil {
-		return 0, b.err
+		return
+	}
+	b.j, b.err = readByte(b.d.r)
+	if b.j == 0 && b.err == nil {
+		b.err = io.EOF
+	}
+	if b.err != nil {
+		return
 	}
-	if len(p) == 0 {
-		return 0, nil
+
+	b.i = 0
+	b.err = readFull(b.d.r, b.d.tmp[:b.j])
+	if b.err != nil {
+		b.j = 0
 	}
-	if len(b.slice) == 0 {
-		var blockLen uint8
-		blockLen, b.err = b.r.ReadByte()
+}
+
+func (b *blockReader) ReadByte() (byte, error) {
+	if b.i == b.j {
+		b.fill()
 		if b.err != nil {
 			return 0, b.err
 		}
-		if blockLen == 0 {
-			b.err = io.EOF
-			return 0, b.err
-		}
-		b.slice = b.tmp[:blockLen]
-		if b.err = readFull(b.r, b.slice); b.err != nil {
+	}
+
+	c := b.d.tmp[b.i]
+	b.i++
+	return c, nil
+}
+
+// blockReader must implement io.Reader, but its Read shouldn't ever actually
+// be called in practice. The compress/lzw package will only call ReadByte.
+func (b *blockReader) Read(p []byte) (int, error) {
+	if len(p) == 0 || b.err != nil {
+		return 0, b.err
+	}
+	if b.i == b.j {
+		b.fill()
+		if b.err != nil {
 			return 0, b.err
 		}
 	}
-	n := copy(p, b.slice)
-	b.slice = b.slice[n:]
+
+	n := copy(p, b.d.tmp[b.i:b.j])
+	b.i += uint8(n)
 	return n, nil
 }
 
+// close primarily detects whether or not a block terminator was encountered
+// after reading a sequence of data sub-blocks. It allows at most one trailing
+// sub-block worth of data. I.e., if some number of bytes exist in one sub-block
+// following the end of LZW data, the very next sub-block must be the block
+// terminator. If the very end of LZW data happened to fill one sub-block, at
+// most one more sub-block of length 1 may exist before the block-terminator.
+// These accomodations allow us to support GIFs created by less strict encoders.
+// See https://golang.org/issue/16146.
+func (b *blockReader) close() error {
+	if b.err == io.EOF {
+		// A clean block-sequence terminator was encountered while reading.
+		return nil
+	} else if b.err != nil {
+		// Some other error was encountered while reading.
+		return b.err
+	}
+
+	if b.i == b.j {
+		// We reached the end of a sub block reading LZW data. We'll allow at
+		// most one more sub block of data with a length of 1 byte.
+		b.fill()
+		if b.err == io.EOF {
+			return nil
+		} else if b.err != nil {
+			return b.err
+		} else if b.j > 1 {
+			return errTooMuch
+		}
+	}
+
+	// Part of a sub-block remains buffered. We expect that the next attempt to
+	// buffer a sub-block will reach the block terminator.
+	b.fill()
+	if b.err == io.EOF {
+		return nil
+	} else if b.err != nil {
+		return b.err
+	}
+
+	return errTooMuch
+}
+
 // decode reads a GIF image from r and stores the result in d.
 func (d *decoder) decode(r io.Reader, configOnly, keepAllFrames bool) error {
 	// Add buffering if r does not provide ReadByte.
@@ -222,7 +288,7 @@ func (d *decoder) decode(r io.Reader, configOnly, keepAllFrames bool) error {
 				return fmt.Errorf("gif: pixel size in decode out of range: %d", litWidth)
 			}
 			// A wonderfully Go-like piece of magic.
-			br := &blockReader{r: d.r}
+			br := &blockReader{d: d}
 			lzwr := lzw.NewReader(br, lzw.LSB, int(litWidth))
 			defer lzwr.Close()
 			if err = readFull(lzwr, m.Pix); err != nil {
@@ -242,7 +308,7 @@ func (d *decoder) decode(r io.Reader, configOnly, keepAllFrames bool) error {
 			// before the LZW decoder saw an explicit end code), provided that
 			// the io.ReadFull call above successfully read len(m.Pix) bytes.
 			// See https://golang.org/issue/9856 for an example GIF.
-			if n, err := lzwr.Read(d.tmp[:1]); n != 0 || (err != io.EOF && err != io.ErrUnexpectedEOF) {
+			if n, err := lzwr.Read(d.tmp[256:257]); n != 0 || (err != io.EOF && err != io.ErrUnexpectedEOF) {
 				if err != nil {
 					return fmt.Errorf("gif: reading image data: %v", err)
 				}
@@ -251,18 +317,10 @@ func (d *decoder) decode(r io.Reader, configOnly, keepAllFrames bool) error {
 
 			// In practice, some GIFs have an extra byte in the data sub-block
 			// stream, which we ignore. See https://golang.org/issue/16146.
-			for nExtraBytes := 0; ; {
-				n, err := br.Read(d.tmp[:2])
-				nExtraBytes += n
-				if nExtraBytes > 1 {
-					return errTooMuch
-				}
-				if err == io.EOF {
-					break
-				}
-				if err != nil {
-					return fmt.Errorf("gif: reading image data: %v", err)
-				}
+			if err := br.close(); err == errTooMuch {
+				return errTooMuch
+			} else if err != nil {
+				return fmt.Errorf("gif: reading image data: %v", err)
 			}
 
 			// Check that the color indexes are inside the palette.
diff --git a/src/image/gif/reader_test.go b/src/image/gif/reader_test.go
index 4b83c9662a..261f59192f 100644
--- a/src/image/gif/reader_test.go
+++ b/src/image/gif/reader_test.go
@@ -9,6 +9,7 @@ import (
 	"compress/lzw"
 	"image"
 	"image/color"
+	"io"
 	"io/ioutil"
 	"reflect"
 	"strings"
@@ -24,6 +25,9 @@ const (
 	trailerStr = "\x3b"
 )
 
+// lzw.NewReader wants a io.ByteReader, this ensures we're compatible.
+var _ io.ByteReader = (*blockReader)(nil)
+
 // lzwEncode returns an LZW encoding (with 2-bit literals) of in.
 func lzwEncode(in []byte) []byte {
 	b := &bytes.Buffer{}
@@ -67,6 +71,9 @@ func TestDecode(t *testing.T) {
 		{2, 1, 0, nil},
 		// Two extra bytes after LZW data, but inside the same data sub-block.
 		{2, 2, 0, nil},
+		// Extra data exists in the final sub-block with LZW data, AND there is
+		// a bogus sub-block following.
+		{2, 1, 1, errTooMuch},
 	}
 	for _, tc := range testCases {
 		b := &bytes.Buffer{}
author	Jed Denlea <jed@fastly.com>	2017-10-04 17:32:44 -0700
committer	Nigel Tao <nigeltao@golang.org>	2017-10-19 04:03:39 +0000
commit	58d7231b2f503f70955fb050f02ea8a0277db0fa (patch)
tree	9da8b3881f9699cd9858afc41e15973aa24a2bbc /src/image
parent	717d37591d9d446007b830c70ff74d8f8dca13d9 (diff)
download	go-58d7231b2f503f70955fb050f02ea8a0277db0fa.tar.gz go-58d7231b2f503f70955fb050f02ea8a0277db0fa.zip