aboutsummaryrefslogtreecommitdiff
path: root/src/encoding/base64
diff options
context:
space:
mode:
authorDaniel Martí <mvdan@mvdan.cc>2018-11-25 17:30:36 +0000
committerDaniel Martí <mvdan@mvdan.cc>2019-03-13 10:39:58 +0000
commit5f40351708cabe28f90500be87dbe316a2280f4a (patch)
treede5ed22cba18f52937b23b9d9fe15a3b04a5e8da /src/encoding/base64
parenta5fdd58c84b6b0a1ae5a53faebc0550024e3a066 (diff)
downloadgo-5f40351708cabe28f90500be87dbe316a2280f4a.tar.gz
go-5f40351708cabe28f90500be87dbe316a2280f4a.zip
encoding/base64: speed up the decoder
Most of the decoding time is spent in the first Decode loop, since the rest of the function only deals with the few remaining bytes. Any unnecessary work done in that loop body matters tremendously. One such unnecessary bottleneck was the use of the enc.decodeMap table. Since enc is a pointer receiver, and the field is used within the non-inlineable function decode64, the decoder must perform a nil check at every iteration. To fix that, move the enc.decodeMap uses to the parent function, where we can lift the nil check outside the loop. That gives roughly a 15% speed-up. The function no longer performs decoding per se, so rename it. While at it, remove the now unnecessary receivers. An unfortunate side effect of this change is that the loop now contains eight bounds checks on src instead of just one. However, not having to slice src plus the nil check removal well outweigh the added cost. The other piece that made decode64 slow was that it wasn't inlined, and had multiple branches. Use a simple bitwise-or trick suggested by Roger Peppe, and collapse the rest of the bitwise logic into a single expression. Inlinability and the reduced branching give a further 10% speed-up. Finally, add these two functions to TestIntendedInlining, since we want them to stay inlinable. Apply the same refactor to decode32 for consistency, and to let 32-bit architectures see a similar performance gain for large inputs. name old time/op new time/op delta DecodeString/2-8 47.3ns ± 1% 45.8ns ± 0% -3.28% (p=0.002 n=6+6) DecodeString/4-8 55.8ns ± 2% 51.5ns ± 0% -7.71% (p=0.004 n=5+6) DecodeString/8-8 64.9ns ± 0% 61.7ns ± 0% -4.99% (p=0.004 n=5+6) DecodeString/64-8 238ns ± 0% 198ns ± 0% -16.54% (p=0.002 n=6+6) DecodeString/8192-8 19.5µs ± 0% 14.6µs ± 0% -24.96% (p=0.004 n=6+5) name old speed new speed delta DecodeString/2-8 84.6MB/s ± 1% 87.4MB/s ± 0% +3.38% (p=0.002 n=6+6) DecodeString/4-8 143MB/s ± 2% 155MB/s ± 0% +8.41% (p=0.004 n=5+6) DecodeString/8-8 185MB/s ± 0% 195MB/s ± 0% +5.29% (p=0.004 n=5+6) DecodeString/64-8 369MB/s ± 0% 442MB/s ± 0% +19.78% (p=0.002 n=6+6) DecodeString/8192-8 560MB/s ± 0% 746MB/s ± 0% +33.27% (p=0.004 n=6+5) Updates #19636. Change-Id: Ib839577b0e3f5a2bb201f5cae580c61365d92894 Reviewed-on: https://go-review.googlesource.com/c/go/+/151177 Run-TryBot: Daniel Martí <mvdan@mvdan.cc> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-by: roger peppe <rogpeppe@gmail.com>
Diffstat (limited to 'src/encoding/base64')
-rw-r--r--src/encoding/base64/base64.go112
1 files changed, 52 insertions, 60 deletions
diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go
index a7da7747ef..082210198f 100644
--- a/src/encoding/base64/base64.go
+++ b/src/encoding/base64/base64.go
@@ -284,6 +284,9 @@ func (enc *Encoding) decodeQuantum(dst, src []byte, si int) (nsi, n int, err err
var dbuf [4]byte
dlen := 4
+ // Lift the nil check outside of the loop.
+ _ = enc.decodeMap
+
for j := 0; j < len(dbuf); j++ {
if len(src) == si {
switch {
@@ -467,9 +470,23 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
return 0, nil
}
+ // Lift the nil check outside of the loop. enc.decodeMap is directly
+ // used later in this function, to let the compiler know that the
+ // receiver can't be nil.
+ _ = enc.decodeMap
+
si := 0
for strconv.IntSize >= 64 && len(src)-si >= 8 && len(dst)-n >= 8 {
- if dn, ok := enc.decode64(src[si:]); ok {
+ if dn, ok := assemble64(
+ enc.decodeMap[src[si+0]],
+ enc.decodeMap[src[si+1]],
+ enc.decodeMap[src[si+2]],
+ enc.decodeMap[src[si+3]],
+ enc.decodeMap[src[si+4]],
+ enc.decodeMap[src[si+5]],
+ enc.decodeMap[src[si+6]],
+ enc.decodeMap[src[si+7]],
+ ); ok {
binary.BigEndian.PutUint64(dst[n:], dn)
n += 6
si += 8
@@ -484,7 +501,12 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
}
for len(src)-si >= 4 && len(dst)-n >= 4 {
- if dn, ok := enc.decode32(src[si:]); ok {
+ if dn, ok := assemble32(
+ enc.decodeMap[src[si+0]],
+ enc.decodeMap[src[si+1]],
+ enc.decodeMap[src[si+2]],
+ enc.decodeMap[src[si+3]],
+ ); ok {
binary.BigEndian.PutUint32(dst[n:], dn)
n += 3
si += 4
@@ -509,70 +531,40 @@ func (enc *Encoding) Decode(dst, src []byte) (n int, err error) {
return n, err
}
-// decode32 tries to decode 4 base64 characters into 3 bytes, and returns those
-// bytes. len(src) must be >= 4.
-// Returns (0, false) if decoding failed.
-func (enc *Encoding) decode32(src []byte) (dn uint32, ok bool) {
- var n uint32
- _ = src[3]
- if n = uint32(enc.decodeMap[src[0]]); n == 0xff {
- return 0, false
- }
- dn |= n << 26
- if n = uint32(enc.decodeMap[src[1]]); n == 0xff {
- return 0, false
- }
- dn |= n << 20
- if n = uint32(enc.decodeMap[src[2]]); n == 0xff {
+// assemble32 assembles 4 base64 digits into 3 bytes.
+// Each digit comes from the decode map, and will be 0xff
+// if it came from an invalid character.
+func assemble32(n1, n2, n3, n4 byte) (dn uint32, ok bool) {
+ // Check that all the digits are valid. If any of them was 0xff, their
+ // bitwise OR will be 0xff.
+ if n1|n2|n3|n4 == 0xff {
return 0, false
}
- dn |= n << 14
- if n = uint32(enc.decodeMap[src[3]]); n == 0xff {
- return 0, false
- }
- dn |= n << 8
- return dn, true
+ return uint32(n1)<<26 |
+ uint32(n2)<<20 |
+ uint32(n3)<<14 |
+ uint32(n4)<<8,
+ true
}
-// decode64 tries to decode 8 base64 characters into 6 bytes, and returns those
-// bytes. len(src) must be >= 8.
-// Returns (0, false) if decoding failed.
-func (enc *Encoding) decode64(src []byte) (dn uint64, ok bool) {
- var n uint64
- _ = src[7]
- if n = uint64(enc.decodeMap[src[0]]); n == 0xff {
- return 0, false
- }
- dn |= n << 58
- if n = uint64(enc.decodeMap[src[1]]); n == 0xff {
- return 0, false
- }
- dn |= n << 52
- if n = uint64(enc.decodeMap[src[2]]); n == 0xff {
- return 0, false
- }
- dn |= n << 46
- if n = uint64(enc.decodeMap[src[3]]); n == 0xff {
- return 0, false
- }
- dn |= n << 40
- if n = uint64(enc.decodeMap[src[4]]); n == 0xff {
- return 0, false
- }
- dn |= n << 34
- if n = uint64(enc.decodeMap[src[5]]); n == 0xff {
- return 0, false
- }
- dn |= n << 28
- if n = uint64(enc.decodeMap[src[6]]); n == 0xff {
- return 0, false
- }
- dn |= n << 22
- if n = uint64(enc.decodeMap[src[7]]); n == 0xff {
+// assemble64 assembles 8 base64 digits into 6 bytes.
+// Each digit comes from the decode map, and will be 0xff
+// if it came from an invalid character.
+func assemble64(n1, n2, n3, n4, n5, n6, n7, n8 byte) (dn uint64, ok bool) {
+ // Check that all the digits are valid. If any of them was 0xff, their
+ // bitwise OR will be 0xff.
+ if n1|n2|n3|n4|n5|n6|n7|n8 == 0xff {
return 0, false
}
- dn |= n << 16
- return dn, true
+ return uint64(n1)<<58 |
+ uint64(n2)<<52 |
+ uint64(n3)<<46 |
+ uint64(n4)<<40 |
+ uint64(n5)<<34 |
+ uint64(n6)<<28 |
+ uint64(n7)<<22 |
+ uint64(n8)<<16,
+ true
}
type newlineFilteringReader struct {