diff options
author | Robert Griesemer <gri@golang.org> | 2017-02-24 18:13:29 -0800 |
---|---|---|
committer | Robert Griesemer <gri@golang.org> | 2017-03-01 00:15:32 +0000 |
commit | bca0320641000d842341637f22f140c262adb360 (patch) | |
tree | 4b78b0f0a5a9cf5c2db4623f154a293916a3a2d7 /src/compress | |
parent | 32b41c8dc75a731e4053b59b19c542a79eb56c1f (diff) | |
download | go-bca0320641000d842341637f22f140c262adb360.tar.gz go-bca0320641000d842341637f22f140c262adb360.zip |
compress/flate: use math/bits.Reverse8/16 instead of local implementation
No measurable impact on performance (specifically, no degradation).
Reverse is used in Huffman en/de-coding. For completeness, here are
all the speed-related benchmark results:
name old time/op new time/op delta
Decode/Digits/Huffman/1e4-8 181µs ± 0% 178µs ± 1% ~ (p=0.100 n=3+3)
Decode/Digits/Huffman/1e5-8 1.60ms ± 3% 1.56ms ± 3% ~ (p=0.400 n=3+3)
Decode/Digits/Huffman/1e6-8 15.7ms ± 1% 15.3ms ± 3% ~ (p=0.700 n=3+3)
Decode/Digits/Speed/1e4-8 179µs ± 0% 180µs ± 0% ~ (p=0.200 n=3+3)
Decode/Digits/Speed/1e5-8 1.68ms ± 0% 1.66ms ± 3% ~ (p=0.700 n=3+3)
Decode/Digits/Speed/1e6-8 16.6ms ± 2% 16.6ms ± 5% ~ (p=0.700 n=3+3)
Decode/Digits/Default/1e4-8 179µs ± 1% 178µs ± 1% ~ (p=0.700 n=3+3)
Decode/Digits/Default/1e5-8 1.62ms ± 3% 1.62ms ± 4% ~ (p=1.000 n=3+3)
Decode/Digits/Default/1e6-8 16.0ms ± 2% 16.0ms ± 3% ~ (p=1.000 n=3+3)
Decode/Digits/Compression/1e4-8 179µs ± 1% 179µs ± 0% ~ (p=0.200 n=3+3)
Decode/Digits/Compression/1e5-8 1.62ms ± 2% 1.62ms ± 3% ~ (p=1.000 n=3+3)
Decode/Digits/Compression/1e6-8 16.1ms ± 3% 16.0ms ± 3% ~ (p=1.000 n=3+3)
Decode/Twain/Huffman/1e4-8 205µs ± 2% 207µs ± 1% ~ (p=1.000 n=3+3)
Decode/Twain/Huffman/1e5-8 1.77ms ± 2% 1.77ms ± 4% ~ (p=0.700 n=3+3)
Decode/Twain/Huffman/1e6-8 17.4ms ± 2% 17.4ms ± 3% ~ (p=1.000 n=3+3)
Decode/Twain/Speed/1e4-8 186µs ± 1% 186µs ± 1% ~ (p=0.400 n=3+3)
Decode/Twain/Speed/1e5-8 1.53ms ± 2% 1.52ms ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Speed/1e6-8 14.9ms ± 1% 14.8ms ± 1% ~ (p=1.000 n=3+3)
Decode/Twain/Default/1e4-8 176µs ± 1% 174µs ± 0% ~ (p=0.200 n=3+3)
Decode/Twain/Default/1e5-8 1.30ms ± 2% 1.31ms ± 1% ~ (p=0.700 n=3+3)
Decode/Twain/Default/1e6-8 12.6ms ± 3% 12.5ms ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Compression/1e4-8 177µs ± 0% 174µs ± 1% ~ (p=0.100 n=3+3)
Decode/Twain/Compression/1e5-8 1.30ms ± 1% 1.31ms ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Compression/1e6-8 12.5ms ± 1% 12.5ms ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Huffman/1e4-8 47.4µs ± 1% 46.5µs ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Huffman/1e5-8 453µs ± 2% 446µs ± 1% ~ (p=0.700 n=3+3)
Encode/Digits/Huffman/1e6-8 4.44ms ± 3% 4.39ms ± 0% ~ (p=1.000 n=3+3)
Encode/Digits/Speed/1e4-8 190µs ± 4% 185µs ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Speed/1e5-8 1.78ms ± 5% 1.75ms ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Speed/1e6-8 17.9ms ± 7% 17.3ms ± 1% ~ (p=0.400 n=3+3)
Encode/Digits/Default/1e4-8 366µs ± 1% 361µs ± 0% ~ (p=0.200 n=3+3)
Encode/Digits/Default/1e5-8 5.58ms ± 5% 5.44ms ± 1% ~ (p=0.400 n=3+3)
Encode/Digits/Default/1e6-8 59.0ms ± 3% 58.2ms ± 1% ~ (p=0.700 n=3+3)
Encode/Digits/Compression/1e4-8 369µs ± 3% 362µs ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Compression/1e5-8 5.50ms ± 2% 5.47ms ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Compression/1e6-8 59.4ms ± 2% 58.5ms ± 1% ~ (p=0.400 n=3+3)
Encode/Twain/Huffman/1e4-8 64.4µs ± 3% 64.7µs ± 1% ~ (p=0.700 n=3+3)
Encode/Twain/Huffman/1e5-8 526µs ± 1% 526µs ± 2% ~ (p=1.000 n=3+3)
Encode/Twain/Huffman/1e6-8 5.18ms ± 2% 5.17ms ± 1% ~ (p=0.700 n=3+3)
Encode/Twain/Speed/1e4-8 206µs ± 1% 204µs ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Speed/1e5-8 1.73ms ± 2% 1.70ms ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Speed/1e6-8 16.7ms ± 0% 16.7ms ± 1% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e4-8 423µs ± 3% 418µs ± 1% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e5-8 6.34ms ± 4% 6.23ms ± 0% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e6-8 68.0ms ± 3% 67.5ms ± 0% ~ (p=0.700 n=3+3)
Encode/Twain/Compression/1e4-8 435µs ± 3% 424µs ± 0% ~ (p=0.700 n=3+3)
Encode/Twain/Compression/1e5-8 7.01ms ± 1% 6.92ms ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Compression/1e6-8 77.1ms ± 4% 75.5ms ± 1% ~ (p=0.400 n=3+3)
name old speed new speed delta
Decode/Digits/Huffman/1e4-8 55.2MB/s ± 0% 56.2MB/s ± 1% ~ (p=0.100 n=3+3)
Decode/Digits/Huffman/1e5-8 62.4MB/s ± 3% 64.1MB/s ± 3% ~ (p=0.400 n=3+3)
Decode/Digits/Huffman/1e6-8 63.8MB/s ± 1% 65.3MB/s ± 3% ~ (p=0.700 n=3+3)
Decode/Digits/Speed/1e4-8 55.8MB/s ± 0% 55.4MB/s ± 0% ~ (p=0.200 n=3+3)
Decode/Digits/Speed/1e5-8 59.6MB/s ± 0% 60.3MB/s ± 3% ~ (p=0.700 n=3+3)
Decode/Digits/Speed/1e6-8 60.1MB/s ± 2% 60.3MB/s ± 4% ~ (p=0.700 n=3+3)
Decode/Digits/Default/1e4-8 55.8MB/s ± 1% 56.1MB/s ± 1% ~ (p=0.700 n=3+3)
Decode/Digits/Default/1e5-8 61.8MB/s ± 3% 61.7MB/s ± 4% ~ (p=1.000 n=3+3)
Decode/Digits/Default/1e6-8 62.4MB/s ± 2% 62.4MB/s ± 3% ~ (p=1.000 n=3+3)
Decode/Digits/Compression/1e4-8 55.7MB/s ± 1% 56.0MB/s ± 0% ~ (p=0.300 n=3+3)
Decode/Digits/Compression/1e5-8 61.7MB/s ± 2% 61.9MB/s ± 3% ~ (p=1.000 n=3+3)
Decode/Digits/Compression/1e6-8 62.2MB/s ± 3% 62.6MB/s ± 3% ~ (p=1.000 n=3+3)
Decode/Twain/Huffman/1e4-8 48.8MB/s ± 2% 48.4MB/s ± 1% ~ (p=1.000 n=3+3)
Decode/Twain/Huffman/1e5-8 56.4MB/s ± 2% 56.6MB/s ± 4% ~ (p=0.700 n=3+3)
Decode/Twain/Huffman/1e6-8 57.6MB/s ± 2% 57.5MB/s ± 3% ~ (p=1.000 n=3+3)
Decode/Twain/Speed/1e4-8 53.7MB/s ± 1% 53.9MB/s ± 1% ~ (p=0.400 n=3+3)
Decode/Twain/Speed/1e5-8 65.5MB/s ± 2% 65.6MB/s ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Speed/1e6-8 66.9MB/s ± 1% 67.4MB/s ± 1% ~ (p=1.000 n=3+3)
Decode/Twain/Default/1e4-8 56.9MB/s ± 1% 57.3MB/s ± 0% ~ (p=0.200 n=3+3)
Decode/Twain/Default/1e5-8 77.2MB/s ± 2% 76.6MB/s ± 1% ~ (p=0.700 n=3+3)
Decode/Twain/Default/1e6-8 79.3MB/s ± 3% 80.0MB/s ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Compression/1e4-8 56.4MB/s ± 0% 57.5MB/s ± 1% ~ (p=0.100 n=3+3)
Decode/Twain/Compression/1e5-8 76.8MB/s ± 1% 76.5MB/s ± 0% ~ (p=0.700 n=3+3)
Decode/Twain/Compression/1e6-8 80.1MB/s ± 1% 79.8MB/s ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Huffman/1e4-8 211MB/s ± 1% 215MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Huffman/1e5-8 221MB/s ± 2% 224MB/s ± 1% ~ (p=0.700 n=3+3)
Encode/Digits/Huffman/1e6-8 225MB/s ± 3% 228MB/s ± 0% ~ (p=1.000 n=3+3)
Encode/Digits/Speed/1e4-8 52.8MB/s ± 4% 54.1MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Speed/1e5-8 56.2MB/s ± 5% 57.0MB/s ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Speed/1e6-8 56.0MB/s ± 6% 57.7MB/s ± 1% ~ (p=0.400 n=3+3)
Encode/Digits/Default/1e4-8 27.3MB/s ± 1% 27.7MB/s ± 0% ~ (p=0.200 n=3+3)
Encode/Digits/Default/1e5-8 17.9MB/s ± 4% 18.4MB/s ± 1% ~ (p=0.400 n=3+3)
Encode/Digits/Default/1e6-8 17.0MB/s ± 3% 17.2MB/s ± 1% ~ (p=0.500 n=3+3)
Encode/Digits/Compression/1e4-8 27.1MB/s ± 3% 27.6MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Digits/Compression/1e5-8 18.2MB/s ± 2% 18.3MB/s ± 1% ~ (p=1.000 n=3+3)
Encode/Digits/Compression/1e6-8 16.9MB/s ± 2% 17.1MB/s ± 1% ~ (p=0.400 n=3+3)
Encode/Twain/Huffman/1e4-8 155MB/s ± 3% 155MB/s ± 1% ~ (p=0.700 n=3+3)
Encode/Twain/Huffman/1e5-8 190MB/s ± 1% 190MB/s ± 2% ~ (p=1.000 n=3+3)
Encode/Twain/Huffman/1e6-8 193MB/s ± 2% 193MB/s ± 1% ~ (p=0.700 n=3+3)
Encode/Twain/Speed/1e4-8 48.5MB/s ± 1% 49.1MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Speed/1e5-8 57.7MB/s ± 2% 59.0MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Speed/1e6-8 59.7MB/s ± 0% 59.7MB/s ± 1% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e4-8 23.6MB/s ± 3% 23.9MB/s ± 1% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e5-8 15.8MB/s ± 4% 16.1MB/s ± 0% ~ (p=1.000 n=3+3)
Encode/Twain/Default/1e6-8 14.7MB/s ± 3% 14.8MB/s ± 0% ~ (p=0.700 n=3+3)
Encode/Twain/Compression/1e4-8 23.0MB/s ± 3% 23.6MB/s ± 0% ~ (p=0.700 n=3+3)
Encode/Twain/Compression/1e5-8 14.3MB/s ± 1% 14.5MB/s ± 0% ~ (p=0.100 n=3+3)
Encode/Twain/Compression/1e6-8 13.0MB/s ± 4% 13.2MB/s ± 1% ~ (p=0.400 n=3+3)
Measured on a "quiet" (no browser running) 2.3 GHz Intel Core i7, running macOS 10.12.3.
See also #19279.
Change-Id: Ice759eb34eb37442b543957447c264e0aadc1fa9
Reviewed-on: https://go-review.googlesource.com/37460
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Diffstat (limited to 'src/compress')
-rw-r--r-- | src/compress/flate/huffman_code.go | 5 | ||||
-rw-r--r-- | src/compress/flate/inflate.go | 7 | ||||
-rw-r--r-- | src/compress/flate/reverse_bits.go | 48 |
3 files changed, 9 insertions, 51 deletions
diff --git a/src/compress/flate/huffman_code.go b/src/compress/flate/huffman_code.go index bdcbd823b0..891537ed5e 100644 --- a/src/compress/flate/huffman_code.go +++ b/src/compress/flate/huffman_code.go @@ -6,6 +6,7 @@ package flate import ( "math" + "math/bits" "sort" ) @@ -342,3 +343,7 @@ func (s byFreq) Less(i, j int) bool { } func (s byFreq) Swap(i, j int) { s[i], s[j] = s[j], s[i] } + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << (16 - bitLength)) +} diff --git a/src/compress/flate/inflate.go b/src/compress/flate/inflate.go index 9a8c4fc455..faa33cc6e9 100644 --- a/src/compress/flate/inflate.go +++ b/src/compress/flate/inflate.go @@ -10,6 +10,7 @@ package flate import ( "bufio" "io" + mathbits "math/bits" "strconv" "sync" ) @@ -176,7 +177,7 @@ func (h *huffmanDecoder) init(bits []int) bool { link := nextcode[huffmanChunkBits+1] >> 1 h.links = make([][]uint32, huffmanNumChunks-link) for j := uint(link); j < huffmanNumChunks; j++ { - reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 + reverse := int(mathbits.Reverse16(uint16(j))) reverse >>= uint(16 - huffmanChunkBits) off := j - uint(link) if sanity && h.chunks[reverse] != 0 { @@ -194,7 +195,7 @@ func (h *huffmanDecoder) init(bits []int) bool { code := nextcode[n] nextcode[n]++ chunk := uint32(i<<huffmanValueShift | n) - reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 + reverse := int(mathbits.Reverse16(uint16(code))) reverse >>= uint(16 - n) if n <= huffmanChunkBits { for off := reverse; off < len(h.chunks); off += 1 << uint(n) { @@ -556,7 +557,7 @@ readLiteral: return } } - dist = int(reverseByte[(f.b&0x1F)<<3]) + dist = int(mathbits.Reverse8(uint8(f.b & 0x1F << 3))) f.b >>= 5 f.nb -= 5 } else { diff --git a/src/compress/flate/reverse_bits.go b/src/compress/flate/reverse_bits.go deleted file mode 100644 index 6b222900c1..0000000000 --- a/src/compress/flate/reverse_bits.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package flate - -var reverseByte = [256]byte{ - 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, - 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, - 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, - 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, - 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, - 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, - 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, - 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, - 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, - 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, - 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, - 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, - 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, - 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, - 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, - 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, - 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, - 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, - 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, - 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, - 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, - 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, - 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, - 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, - 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, - 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, - 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, - 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, - 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, - 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, - 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, - 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, -} - -func reverseUint16(v uint16) uint16 { - return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8 -} - -func reverseBits(number uint16, bitLength byte) uint16 { - return reverseUint16(number << (16 - bitLength)) -} |