aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2020-02-28 14:16:41 -0500
committerMichael Pratt <mpratt@google.com>2020-03-05 21:51:02 +0000
commitfadbf7404d2b1aca63993e289448fcc3b6a23107 (patch)
tree7c182756a1dbfe6a2497f14e00c868bf4f64ab51
parent4a70ff425b3c16c19785b04bb89ca856749ed65b (diff)
downloadgo-fadbf7404d2b1aca63993e289448fcc3b6a23107.tar.gz
go-fadbf7404d2b1aca63993e289448fcc3b6a23107.zip
runtime/pprof: expand final stack frame to avoid truncation
When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37446 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
-rw-r--r--src/runtime/pprof/map.go3
-rw-r--r--src/runtime/pprof/pprof_test.go47
-rw-r--r--src/runtime/pprof/proto.go40
-rw-r--r--src/runtime/pprof/runtime.go3
-rw-r--r--src/runtime/symtab.go53
5 files changed, 115 insertions, 31 deletions
diff --git a/src/runtime/pprof/map.go b/src/runtime/pprof/map.go
index a271ad022e..7c75872351 100644
--- a/src/runtime/pprof/map.go
+++ b/src/runtime/pprof/map.go
@@ -68,7 +68,8 @@ Search:
if len(m.freeStk) < len(stk) {
m.freeStk = make([]uintptr, 1024)
}
- e.stk = m.freeStk[:len(stk)]
+ // Limit cap to prevent append from clobbering freeStk.
+ e.stk = m.freeStk[:len(stk):len(stk)]
m.freeStk = m.freeStk[len(stk):]
for j := range stk {
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 20b44e1e01..5bfc3b6134 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -1172,16 +1172,25 @@ func TestTryAdd(t *testing.T) {
{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
- name: "recursive_inlined_funcs",
+ // If a function is called recursively then it must not be
+ // inlined in the caller.
+ //
+ // N.B. We're generating an impossible profile here, with a
+ // recursive inlineCallee call. This is simulating a non-Go
+ // function that looks like an inlined Go function other than
+ // its recursive property. See pcDeck.tryAdd.
+ name: "recursive_func_is_not_inlined",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 30, inlinedCalleePtr, inlinedCalleePtr,
4, 0, 40, inlinedCalleePtr,
},
- wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}},
+ // inlinedCaller shows up here because
+ // runtime_expandFinalInlineFrame adds it to the stack frame.
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}, {"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
- {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
- {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
+ {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
},
}, {
name: "truncated_stack_trace_later",
@@ -1202,12 +1211,36 @@ func TestTryAdd(t *testing.T) {
4, 0, 70, inlinedCalleePtr,
5, 0, 80, inlinedCalleePtr, inlinedCallerPtr,
},
- wantLocs: [][]string{ // the inline info is screwed up, but better than a crash.
- {"runtime/pprof.inlinedCallee"},
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // We can recover the inlined caller from a truncated stack.
+ name: "truncated_stack_trace_only",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // The same location is used for duplicated stacks.
+ name: "truncated_stack_trace_twice",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCalleePtr,
+ 5, 0, 80, inlinedCallerPtr, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{
+ {"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"},
{"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
- {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
},
}}
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
index 8a30c7151d..416ace7ab2 100644
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@@ -384,6 +384,10 @@ func (b *profileBuilder) build() {
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
+
+ // The last frame might be truncated. Recover lost inline frames.
+ stk = runtime_expandFinalInlineFrame(stk)
+
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
@@ -395,22 +399,12 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// then, record the cached location.
locs = append(locs, l.id)
- // The stk may be truncated due to the stack depth limit
- // (e.g. See maxStack and maxCPUProfStack in runtime) or
- // bugs in runtime. Avoid the crash in either case.
- // TODO(hyangah): The correct fix may require using the exact
- // pcs as the key for b.locs cache management instead of just
- // relying on the very first pc. We are late in the go1.14 dev
- // cycle, so this is a workaround with little code change.
- if len(l.pcs) > len(stk) {
- stk = nil
- // TODO(hyangah): would be nice if we can enable
- // debug print out on demand and report the problematic
- // cached location entry and stack traces. Do we already
- // have such facility to utilize (e.g. GODEBUG)?
- } else {
- stk = stk[len(l.pcs):] // skip the matching pcs.
- }
+ // Skip the matching pcs.
+ //
+ // Even if stk was truncated due to the stack depth
+ // limit, expandFinalInlineFrame above has already
+ // fixed the truncation, ensuring it is long enough.
+ stk = stk[len(l.pcs):]
continue
}
@@ -427,9 +421,9 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
stk = stk[1:]
continue
}
- // add failed because this addr is not inlined with
- // the existing PCs in the deck. Flush the deck and retry to
- // handle this pc.
+ // add failed because this addr is not inlined with the
+ // existing PCs in the deck. Flush the deck and retry handling
+ // this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
@@ -463,8 +457,8 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
// Frame's Func is nil (note: also true for non-Go functions), and
-// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
-// Frame's Name does not match its entry function frame's name.
+// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
+// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be recursive).
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
@@ -486,8 +480,8 @@ func (d *pcDeck) reset() {
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.pcs); existing > 0 {
- // 'frames' are all expanded from one 'pc' and represent all inlined functions
- // so we check only the last one.
+ // 'd.frames' are all expanded from one 'pc' and represent all
+ // inlined functions so we check only the last one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil { // the last frame can't be inlined. Flush.
diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go
index b71bbad9a6..dd2545b339 100644
--- a/src/runtime/pprof/runtime.go
+++ b/src/runtime/pprof/runtime.go
@@ -9,6 +9,9 @@ import (
"unsafe"
)
+// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
+
// runtime_setProfLabel is defined in runtime/proflabel.go.
func runtime_setProfLabel(labels unsafe.Pointer)
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index a6e08d7214..997cfa3f7a 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -148,6 +148,59 @@ func (ci *Frames) Next() (frame Frame, more bool) {
return
}
+// runtime_expandFinalInlineFrame expands the final pc in stk to include all
+// "callers" if pc is inline.
+//
+//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr {
+ pc := stk[len(stk)-1]
+ tracepc := pc - 1
+
+ f := findfunc(tracepc)
+ if !f.valid() {
+ // Not a Go function.
+ return stk
+ }
+
+ inldata := funcdata(f, _FUNCDATA_InlTree)
+ if inldata == nil {
+ // Nothing inline in f.
+ return stk
+ }
+
+ // Treat the previous func as normal. We haven't actually checked, but
+ // since this pc was included in the stack, we know it shouldn't be
+ // elided.
+ lastFuncID := funcID_normal
+
+ // Remove pc from stk; we'll re-add it below.
+ stk = stk[:len(stk)-1]
+
+ // See inline expansion in gentraceback.
+ var cache pcvalueCache
+ inltree := (*[1 << 20]inlinedCall)(inldata)
+ for {
+ ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache)
+ if ix < 0 {
+ break
+ }
+ if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) {
+ // ignore wrappers
+ } else {
+ stk = append(stk, pc)
+ }
+ lastFuncID = inltree[ix].funcID
+ // Back up to an instruction in the "caller".
+ tracepc = f.entry + uintptr(inltree[ix].parentPc)
+ pc = tracepc + 1
+ }
+
+ // N.B. we want to keep the last parentPC which is not inline.
+ stk = append(stk, pc)
+
+ return stk
+}
+
// expandCgoFrames expands frame information for pc, known to be
// a non-Go function, using the cgoSymbolizer hook. expandCgoFrames
// returns nil if pc could not be expanded.