aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/symtab.go
diff options
context:
space:
mode:
authorJosh Bleecher Snyder <josharian@gmail.com>2017-05-10 10:19:43 -0700
committerJosh Bleecher Snyder <josharian@gmail.com>2017-06-14 05:28:24 +0000
commitaafd96408feef0785d32fd3e1c5a67d4159a98e7 (patch)
tree38bc0efb8c9769aceeab83d7dfd1b67bd58740e0 /src/runtime/symtab.go
parent2f7fbf8851e4c45f7e0d207836a3c7a97eaa1823 (diff)
downloadgo-aafd96408feef0785d32fd3e1c5a67d4159a98e7.tar.gz
go-aafd96408feef0785d32fd3e1c5a67d4159a98e7.zip
runtime: speed up stack copying
I was surprised to see readvarint show up in a cpu profile. Use a few simple optimizations to speed up stack copying: * Avoid making a copy of the cache.entries array or any of its elements. * Use a shift instead of a signed division in stackmapdata. * Change readvarint to return the number of bytes consumed rather than an updated slice. * Make some minor optimizations to readvarint to help the compiler. * Avoid called readvarint when the value fits in a single byte. The first and last optimizations are the most significant, although they all contribute a little. Add a benchmark for stack copying that includes lots of different functions in a recursive loop, to bust the cache. This might speed up other runtime operations as well; I only benchmarked stack copying. name old time/op new time/op delta StackCopy-8 96.4ms ± 2% 82.7ms ± 1% -14.24% (p=0.000 n=20+19) StackCopyNoCache-8 167ms ± 1% 131ms ± 1% -21.58% (p=0.000 n=20+20) Change-Id: I13d5c455c65073c73b656acad86cf8e8e3c9807b Reviewed-on: https://go-review.googlesource.com/43150 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/runtime/symtab.go')
-rw-r--r--src/runtime/symtab.go33
1 files changed, 23 insertions, 10 deletions
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 8fb3d3ca94..029c2f15af 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -686,12 +686,13 @@ func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, stric
// cheaper than doing the hashing for a less associative
// cache.
if cache != nil {
- for _, ent := range cache.entries {
+ for i := range cache.entries {
// We check off first because we're more
// likely to have multiple entries with
// different offsets for the same targetpc
// than the other way around, so we'll usually
// fail in the first clause.
+ ent := &cache.entries[i]
if ent.off == off && ent.targetpc == targetpc {
return ent.val
}
@@ -836,35 +837,47 @@ func funcdata(f funcInfo, i int32) unsafe.Pointer {
// step advances to the next pc, value pair in the encoded table.
func step(p []byte, pc *uintptr, val *int32, first bool) (newp []byte, ok bool) {
- p, uvdelta := readvarint(p)
+ // For both uvdelta and pcdelta, the common case (~70%)
+ // is that they are a single byte. If so, avoid calling readvarint.
+ uvdelta := uint32(p[0])
if uvdelta == 0 && !first {
return nil, false
}
+ n := uint32(1)
+ if uvdelta&0x80 != 0 {
+ n, uvdelta = readvarint(p)
+ }
+ p = p[n:]
if uvdelta&1 != 0 {
uvdelta = ^(uvdelta >> 1)
} else {
uvdelta >>= 1
}
vdelta := int32(uvdelta)
- p, pcdelta := readvarint(p)
+ pcdelta := uint32(p[0])
+ n = 1
+ if pcdelta&0x80 != 0 {
+ n, pcdelta = readvarint(p)
+ }
+ p = p[n:]
*pc += uintptr(pcdelta * sys.PCQuantum)
*val += vdelta
return p, true
}
// readvarint reads a varint from p.
-func readvarint(p []byte) (newp []byte, val uint32) {
- var v, shift uint32
+func readvarint(p []byte) (read uint32, val uint32) {
+ var v, shift, n uint32
for {
- b := p[0]
- p = p[1:]
- v |= (uint32(b) & 0x7F) << shift
+ b := p[n]
+ n++
+ v |= uint32(b&0x7F) << (shift & 31)
if b&0x80 == 0 {
break
}
shift += 7
}
- return p, v
+ return n, v
}
type stackmap struct {
@@ -878,7 +891,7 @@ func stackmapdata(stkmap *stackmap, n int32) bitvector {
if n < 0 || n >= stkmap.n {
throw("stackmapdata: index out of range")
}
- return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+7)/8))))}
+ return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+7)>>3))))}
}
// inlinedCall is the encoding of entries in the FUNCDATA_InlTree table.