diff options
author | Joe Tsai <joetsai@digital-static.net> | 2017-01-11 23:03:15 -0800 |
---|---|---|
committer | Joe Tsai <thebrokentoaster@gmail.com> | 2017-01-12 19:15:57 +0000 |
commit | 9c3630f578db1d4331b367c3c7d284db299be3a6 (patch) | |
tree | 72de2a9968d080852f37f68c4aceb292a7e203d0 /src/compress | |
parent | 4f0aac52d926270255fa2b682aca15e8ff404c59 (diff) | |
download | go-9c3630f578db1d4331b367c3c7d284db299be3a6.tar.gz go-9c3630f578db1d4331b367c3c7d284db299be3a6.zip |
compress/flate: avoid large stack growth in fillDeflate
Ranging over an array causes the array to be copied over to the
stack, which cause large re-growths. Instead, we should iterate
over slices of the array.
Also, assigning a large struct literal uses the stack even
though the actual fields being populated are small in comparison
to the entirety of the struct (see #18636).
Fixing the stack growth does not alter CPU-time performance much
since the stack-growth and copying was such a tiny portion of the
compression work:
name old time/op new time/op delta
Encode/Digits/Default/1e4-8 332µs ± 1% 332µs ± 1% ~ (p=0.796 n=10+10)
Encode/Digits/Default/1e5-8 5.07ms ± 2% 5.05ms ± 1% ~ (p=0.815 n=9+8)
Encode/Digits/Default/1e6-8 53.7ms ± 1% 53.9ms ± 1% ~ (p=0.075 n=10+10)
Encode/Twain/Default/1e4-8 380µs ± 1% 380µs ± 1% ~ (p=0.684 n=10+10)
Encode/Twain/Default/1e5-8 5.79ms ± 2% 5.79ms ± 1% ~ (p=0.497 n=9+10)
Encode/Twain/Default/1e6-8 61.5ms ± 1% 61.8ms ± 1% ~ (p=0.247 n=10+10)
name old speed new speed delta
Encode/Digits/Default/1e4-8 30.1MB/s ± 1% 30.1MB/s ± 1% ~ (p=0.753 n=10+10)
Encode/Digits/Default/1e5-8 19.7MB/s ± 2% 19.8MB/s ± 1% ~ (p=0.795 n=9+8)
Encode/Digits/Default/1e6-8 18.6MB/s ± 1% 18.5MB/s ± 1% ~ (p=0.072 n=10+10)
Encode/Twain/Default/1e4-8 26.3MB/s ± 1% 26.3MB/s ± 1% ~ (p=0.616 n=10+10)
Encode/Twain/Default/1e5-8 17.3MB/s ± 2% 17.3MB/s ± 1% ~ (p=0.484 n=9+10)
Encode/Twain/Default/1e6-8 16.3MB/s ± 1% 16.2MB/s ± 1% ~ (p=0.238 n=10+10)
Updates #18636
Fixes #18625
Change-Id: I471b20339bf675f63dc56d38b3acdd824fe23328
Reviewed-on: https://go-review.googlesource.com/35122
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/compress')
-rw-r--r-- | src/compress/flate/deflate.go | 7 | ||||
-rw-r--r-- | src/compress/flate/deflate_test.go | 31 | ||||
-rw-r--r-- | src/compress/flate/deflatefast.go | 19 |
3 files changed, 53 insertions, 4 deletions
diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go index 97265b3ca2..4d6a5357d8 100644 --- a/src/compress/flate/deflate.go +++ b/src/compress/flate/deflate.go @@ -136,14 +136,17 @@ func (d *compressor) fillDeflate(b []byte) int { delta := d.hashOffset - 1 d.hashOffset -= delta d.chainHead -= delta - for i, v := range d.hashPrev { + + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range d.hashPrev[:] { if int(v) > delta { d.hashPrev[i] = uint32(int(v) - delta) } else { d.hashPrev[i] = 0 } } - for i, v := range d.hashHead { + for i, v := range d.hashHead[:] { if int(v) > delta { d.hashHead[i] = uint32(int(v) - delta) } else { diff --git a/src/compress/flate/deflate_test.go b/src/compress/flate/deflate_test.go index 521a260365..fbea761721 100644 --- a/src/compress/flate/deflate_test.go +++ b/src/compress/flate/deflate_test.go @@ -12,6 +12,7 @@ import ( "io" "io/ioutil" "reflect" + "runtime/debug" "sync" "testing" ) @@ -864,3 +865,33 @@ func TestBestSpeedMaxMatchOffset(t *testing.T) { } } } + +func TestMaxStackSize(t *testing.T) { + // This test must not run in parallel with other tests as debug.SetMaxStack + // affects all goroutines. + n := debug.SetMaxStack(1 << 16) + defer debug.SetMaxStack(n) + + var wg sync.WaitGroup + defer wg.Wait() + + b := make([]byte, 1<<20) + for level := HuffmanOnly; level <= BestCompression; level++ { + // Run in separate goroutine to increase probability of stack regrowth. + wg.Add(1) + go func(level int) { + defer wg.Done() + zw, err := NewWriter(ioutil.Discard, level) + if err != nil { + t.Errorf("level %d, NewWriter() = %v, want nil", level, err) + } + if n, err := zw.Write(b); n != len(b) || err != nil { + t.Errorf("level %d, Write() = (%d, %v), want (%d, nil)", level, n, err, len(b)) + } + if err := zw.Close(); err != nil { + t.Errorf("level %d, Close() = %v, want nil", level, err) + } + zw.Reset(ioutil.Discard) + }(level) + } +} diff --git a/src/compress/flate/deflatefast.go b/src/compress/flate/deflatefast.go index a1636a37d6..08298b76bb 100644 --- a/src/compress/flate/deflatefast.go +++ b/src/compress/flate/deflatefast.go @@ -60,7 +60,7 @@ func newDeflateFast() *deflateFast { func (e *deflateFast) encode(dst []token, src []byte) []token { // Ensure that e.cur doesn't wrap. if e.cur > 1<<30 { - *e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]} + e.resetAll() } // This check isn't in the Snappy implementation, but there, the caller @@ -265,6 +265,21 @@ func (e *deflateFast) reset() { // Protect against e.cur wraparound. if e.cur > 1<<30 { - *e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]} + e.resetAll() + } +} + +// resetAll resets the deflateFast struct and is only called in rare +// situations to prevent integer overflow. It manually resets each field +// to avoid causing large stack growth. +// +// See https://golang.org/issue/18636. +func (e *deflateFast) resetAll() { + // This is equivalent to: + // *e = deflateFast{cur: maxStoreBlockSize, prev: e.prev[:0]} + e.cur = maxStoreBlockSize + e.prev = e.prev[:0] + for i := range e.table { + e.table[i] = tableEntry{} } } |