aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNigel Tao <nigeltao@golang.org>2024-04-22 23:36:32 +1000
committerNigel Tao <nigeltao@golang.org>2024-04-25 00:46:29 +0000
commite7aeeae0c89f5bae76ba263b1ab2b82c56ad32a3 (patch)
tree8323867b3672c9fb795a548d5a165192b44203ac
parent799968dfc3b9121019b752a9110489baa71a4355 (diff)
downloadgo-e7aeeae0c89f5bae76ba263b1ab2b82c56ad32a3.tar.gz
go-e7aeeae0c89f5bae76ba263b1ab2b82c56ad32a3.zip
image/jpeg: ignore garbage bytes before a RST marker
Well-formed JPEG images will not have garbage bytes. However, for corrupted JPEG images, the RST (restart) mechanism is specifically designed so that a decoder can re-synchronize to an upcoming restartable MCU (Minimum Coded Unit, e.g. 16x16 block of pixels) boundary and resume decoding. Even if the resultant image isn't perfect, a 98%-good image is better than a fatal error. Every JPEG marker is encoded in two bytes, the first of which is 0xFF. There are 8 possible RST markers, cycling as "0xFF 0xD0", "0xFF 0xD1", ..., "0xFF 0xD7". Suppose that, our decoder is expecting "0xFF 0xD1". Before this commit, Go's image/jpeg package would accept only two possible inputs: a well-formed "0xFF 0xD1" or one very specific pattern of spec non-compliance, "0xFF 0x00 0xFF 0xD1". After this commit, it is more lenient, similar to libjpeg's jdmarker.c's next_marker function. https://github.com/libjpeg-turbo/libjpeg-turbo/blob/2dfe6c0fe9e18671105e94f7cbf044d4a1d157e6/jdmarker.c#L892-L935 The new testdata file was created by: $ convert video-001.png a.ppm $ cjpeg -restart 2 a.ppm > video-001.restart2.jpeg $ rm a.ppm Fixes #40130 Change-Id: Ic598a5f489f110d6bd63e0735200fb6acac3aca3 Reviewed-on: https://go-review.googlesource.com/c/go/+/580755 Reviewed-by: Ian Lance Taylor <iant@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Joedian Reid <joedian@google.com>
-rw-r--r--src/image/jpeg/reader_test.go42
-rw-r--r--src/image/jpeg/scan.go71
-rw-r--r--src/image/testdata/video-001.restart2.jpegbin0 -> 4855 bytes
3 files changed, 91 insertions, 22 deletions
diff --git a/src/image/jpeg/reader_test.go b/src/image/jpeg/reader_test.go
index cdac2dd756..0872f5e91d 100644
--- a/src/image/jpeg/reader_test.go
+++ b/src/image/jpeg/reader_test.go
@@ -504,6 +504,48 @@ func TestIssue56724(t *testing.T) {
}
}
+func TestBadRestartMarker(t *testing.T) {
+ b, err := os.ReadFile("../testdata/video-001.restart2.jpeg")
+ if err != nil {
+ t.Fatal(err)
+ } else if len(b) != 4855 {
+ t.Fatal("test image had unexpected length")
+ } else if (b[2816] != 0xff) || (b[2817] != 0xd1) {
+ t.Fatal("test image did not have FF D1 restart marker at expected offset")
+ }
+ prefix, suffix := b[:2816], b[2816:]
+
+ testCases := []string{
+ "PASS:",
+ "PASS:\x00",
+ "PASS:\x61",
+ "PASS:\x61\x62\x63\xff\x00\x64",
+ "PASS:\xff",
+ "PASS:\xff\x00",
+ "PASS:\xff\xff\xff\x00\xff\x00\x00\xff\xff\xff",
+
+ "FAIL:\xff\x03",
+ "FAIL:\xff\xd5",
+ "FAIL:\xff\xff\xd5",
+ }
+
+ for _, tc := range testCases {
+ want := tc[:5] == "PASS:"
+ infix := tc[5:]
+
+ data := []byte(nil)
+ data = append(data, prefix...)
+ data = append(data, infix...)
+ data = append(data, suffix...)
+ _, err := Decode(bytes.NewReader(data))
+ got := err == nil
+
+ if got != want {
+ t.Errorf("%q: got %v, want %v", tc, got, want)
+ }
+ }
+}
+
func benchmarkDecode(b *testing.B, filename string) {
data, err := os.ReadFile(filename)
if err != nil {
diff --git a/src/image/jpeg/scan.go b/src/image/jpeg/scan.go
index 94f3d3a326..de82a29bff 100644
--- a/src/image/jpeg/scan.go
+++ b/src/image/jpeg/scan.go
@@ -305,33 +305,16 @@ func (d *decoder) processSOS(n int) error {
} // for i
mcu++
if d.ri > 0 && mcu%d.ri == 0 && mcu < mxx*myy {
- // A more sophisticated decoder could use RST[0-7] markers to resynchronize from corrupt input,
- // but this one assumes well-formed input, and hence the restart marker follows immediately.
+ // For well-formed input, the RST[0-7] restart marker follows
+ // immediately. For corrupt input, call findRST to try to
+ // resynchronize.
if err := d.readFull(d.tmp[:2]); err != nil {
return err
- }
-
- // Section F.1.2.3 says that "Byte alignment of markers is
- // achieved by padding incomplete bytes with 1-bits. If padding
- // with 1-bits creates a X’FF’ value, a zero byte is stuffed
- // before adding the marker."
- //
- // Seeing "\xff\x00" here is not spec compliant, as we are not
- // expecting an *incomplete* byte (that needed padding). Still,
- // some real world encoders (see golang.org/issue/28717) insert
- // it, so we accept it and re-try the 2 byte read.
- //
- // libjpeg issues a warning (but not an error) for this:
- // https://github.com/LuaDist/libjpeg/blob/6c0fcb8ddee365e7abc4d332662b06900612e923/jdmarker.c#L1041-L1046
- if d.tmp[0] == 0xff && d.tmp[1] == 0x00 {
- if err := d.readFull(d.tmp[:2]); err != nil {
+ } else if d.tmp[0] != 0xff || d.tmp[1] != expectedRST {
+ if err := d.findRST(expectedRST); err != nil {
return err
}
}
-
- if d.tmp[0] != 0xff || d.tmp[1] != expectedRST {
- return FormatError("bad RST marker")
- }
expectedRST++
if expectedRST == rst7Marker+1 {
expectedRST = rst0Marker
@@ -521,3 +504,47 @@ func (d *decoder) reconstructBlock(b *block, bx, by, compIndex int) error {
}
return nil
}
+
+// findRST advances past the next RST restart marker that matches expectedRST.
+// Other than I/O errors, it is also an error if we encounter an {0xFF, M}
+// two-byte marker sequence where M is not 0x00, 0xFF or the expectedRST.
+//
+// This is similar to libjpeg's jdmarker.c's next_marker function.
+// https://github.com/libjpeg-turbo/libjpeg-turbo/blob/2dfe6c0fe9e18671105e94f7cbf044d4a1d157e6/jdmarker.c#L892-L935
+//
+// Precondition: d.tmp[:2] holds the next two bytes of JPEG-encoded input
+// (input in the d.readFull sense).
+func (d *decoder) findRST(expectedRST uint8) error {
+ for {
+ // i is the index such that, at the bottom of the loop, we read 2-i
+ // bytes into d.tmp[i:2], maintaining the invariant that d.tmp[:2]
+ // holds the next two bytes of JPEG-encoded input. It is either 0 or 1,
+ // so that each iteration advances by 1 or 2 bytes (or returns).
+ i := 0
+
+ if d.tmp[0] == 0xff {
+ if d.tmp[1] == expectedRST {
+ return nil
+ } else if d.tmp[1] == 0xff {
+ i = 1
+ } else if d.tmp[1] != 0x00 {
+ // libjpeg's jdmarker.c's jpeg_resync_to_restart does something
+ // fancy here, treating RST markers within two (modulo 8) of
+ // expectedRST differently from RST markers that are 'more
+ // distant'. Until we see evidence that recovering from such
+ // cases is frequent enough to be worth the complexity, we take
+ // a simpler approach for now. Any marker that's not 0x00, 0xff
+ // or expectedRST is a fatal FormatError.
+ return FormatError("bad RST marker")
+ }
+
+ } else if d.tmp[1] == 0xff {
+ d.tmp[0] = 0xff
+ i = 1
+ }
+
+ if err := d.readFull(d.tmp[i:2]); err != nil {
+ return err
+ }
+ }
+}
diff --git a/src/image/testdata/video-001.restart2.jpeg b/src/image/testdata/video-001.restart2.jpeg
new file mode 100644
index 0000000000..707639eba1
--- /dev/null
+++ b/src/image/testdata/video-001.restart2.jpeg
Binary files differ