diff options
author | Russ Cox <rsc@golang.org> | 2011-05-03 01:40:19 -0400 |
---|---|---|
committer | Russ Cox <rsc@golang.org> | 2011-05-03 01:40:19 -0400 |
commit | 6a9e2c727917e10d2b4b6d71bb9d3f47043b6c32 (patch) | |
tree | db48e93f5ba7bbaa5806cb6b63e967f7c838825f | |
parent | 50e57603ab7b1346055316654bdf01b0565ee700 (diff) | |
download | go-6a9e2c727917e10d2b4b6d71bb9d3f47043b6c32.tar.gz go-6a9e2c727917e10d2b4b6d71bb9d3f47043b6c32.zip |
[release-branch.r57] png: speed up opaque RGBA encoding
««« CL 4432077 / 66eb68cbd5c2
png: speed up opaque RGBA encoding
With Linux/8g on a 2006 Mac Mini (1.66 GHz Intel Core Duo,
2KB L1, 2MB L2, 2G main memory), GOMAXPROCS unset:
start:
png.BenchmarkEncodePaletted 50 44772820 ns/op
png.BenchmarkEncodeRGBOpaque 10 208395900 ns/op
png.BenchmarkEncodeRGBA 5 331088000 ns/op
remove interface method calls:
png.BenchmarkEncodePaletted 50 44722880 ns/op
png.BenchmarkEncodeRGBOpaque 10 139042600 ns/op
png.BenchmarkEncodeRGBA 5 334033600 ns/op
flate inline min/max():
png.BenchmarkEncodePaletted 50 40631180 ns/op
png.BenchmarkEncodeRGBOpaque 10 124894900 ns/op
png.BenchmarkEncodeRGBA 5 312099000 ns/op
after adler change:
png.BenchmarkEncodePaletted 50 40181760 ns/op
png.BenchmarkEncodeRGBOpaque 20 121781950 ns/op
png.BenchmarkEncodeRGBA 5 313890800 ns/op
In comparison to 121 ms on this 2006 machine, on my
Core2 Duo 2.66 GHz laptop, the final BenchmarkEncodeRGBOpaque
runs in 27 ms. (these are all for 640x480 images)
R=nigeltao, rsc, r
CC=golang-dev
https://golang.org/cl/4432077
»»»
TBR=adg
CC=golang-dev
https://golang.org/cl/4430076
-rw-r--r-- | src/pkg/compress/flate/deflate.go | 29 | ||||
-rw-r--r-- | src/pkg/image/png/writer.go | 29 |
2 files changed, 47 insertions, 11 deletions
diff --git a/src/pkg/compress/flate/deflate.go b/src/pkg/compress/flate/deflate.go index e5b2beaefc..a02a5e8d94 100644 --- a/src/pkg/compress/flate/deflate.go +++ b/src/pkg/compress/flate/deflate.go @@ -143,10 +143,18 @@ func (d *compressor) fillWindow(index int) (int, os.Error) { d.blockStart = math.MaxInt32 } for i, h := range d.hashHead { - d.hashHead[i] = max(h-wSize, -1) + v := h - wSize + if v < -1 { + v = -1 + } + d.hashHead[i] = v } for i, h := range d.hashPrev { - d.hashPrev[i] = max(h-wSize, -1) + v := -h - wSize + if v < -1 { + v = -1 + } + d.hashPrev[i] = v } } count, err := d.r.Read(d.window[d.windowEnd:]) @@ -177,10 +185,18 @@ func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error { // Try to find a match starting at index whose length is greater than prevSize. // We only look at chainCount possibilities before giving up. func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { - win := d.window[0 : pos+min(maxMatchLength, lookahead)] + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] // We quit when we get a match that's at least nice long - nice := min(d.niceMatch, len(win)-pos) + nice := len(win) - pos + if d.niceMatch < nice { + nice = d.niceMatch + } // If we've got a match that's good enough, only look in 1/4 the chain. tries := d.maxChainLength @@ -344,9 +360,12 @@ Loop: } prevLength := length prevOffset := offset - minIndex := max(index-maxOffset, 0) length = minMatchLength - 1 offset = 0 + minIndex := index - maxOffset + if minIndex < 0 { + minIndex = 0 + } if chainHead >= minIndex && (isFastDeflate && lookahead > minMatchLength-1 || diff --git a/src/pkg/image/png/writer.go b/src/pkg/image/png/writer.go index 081d06bf57..2d593f6a7f 100644 --- a/src/pkg/image/png/writer.go +++ b/src/pkg/image/png/writer.go @@ -263,7 +263,12 @@ func writeImage(w io.Writer, m image.Image, cb int) os.Error { defer zw.Close() bpp := 0 // Bytes per pixel. + + // Used by fast paths for common image types var paletted *image.Paletted + var rgba *image.RGBA + rgba, _ = m.(*image.RGBA) + switch cb { case cbG8: bpp = 1 @@ -303,12 +308,24 @@ func writeImage(w io.Writer, m image.Image, cb int) os.Error { cr[0][x+1] = c.Y } case cbTC8: - for x := b.Min.X; x < b.Max.X; x++ { - // We have previously verified that the alpha value is fully opaque. - r, g, b, _ := m.At(x, y).RGBA() - cr[0][3*x+1] = uint8(r >> 8) - cr[0][3*x+2] = uint8(g >> 8) - cr[0][3*x+3] = uint8(b >> 8) + // We have previously verified that the alpha value is fully opaque. + cr0 := cr[0] + if rgba != nil { + yoff := y * rgba.Stride + xoff := 3*b.Min.X + 1 + for _, color := range rgba.Pix[yoff+b.Min.X : yoff+b.Max.X] { + cr0[xoff] = color.R + cr0[xoff+1] = color.G + cr0[xoff+2] = color.B + xoff += 3 + } + } else { + for x := b.Min.X; x < b.Max.X; x++ { + r, g, b, _ := m.At(x, y).RGBA() + cr0[3*x+1] = uint8(r >> 8) + cr0[3*x+2] = uint8(g >> 8) + cr0[3*x+3] = uint8(b >> 8) + } } case cbP8: rowOffset := y * paletted.Stride |