aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2020-02-28 14:16:41 -0500
committerAlexander Rakoczy <alex@golang.org>2020-03-10 19:03:26 +0000
commit9c41c1d8dcb76d161ce0c633f325abe59f667d48 (patch)
tree3877d9e44d8c8923f6c1e77e440e25b5ef68b395
parent2e08d807329e008de13035879daf9f48d08dc48c (diff)
downloadgo-9c41c1d8dcb76d161ce0c633f325abe59f667d48.tar.gz
go-9c41c1d8dcb76d161ce0c633f325abe59f667d48.zip
[release-branch.go1.14] runtime/pprof: expand final stack frame to avoid truncation
When generating stacks, the runtime automatically expands inline functions to inline all inline frames in the stack. However, due to the stack size limit, the final frame may be truncated in the middle of several inline frames at the same location. As-is, we assume that the final frame is a normal function, and emit and cache a Location for it. If we later receive a complete stack frame, we will first use the cached Location for the inlined function and then generate a new Location for the "caller" frame, in violation of the pprof requirement to merge inlined functions into the same Location. As a result, we: 1. Nondeterministically may generate a profile with the different stacks combined or split, depending on which is encountered first. This is particularly problematic when performing a diff of profiles. 2. When split stacks are generated, we lose the inlining information. We avoid both of these problems by performing a second expansion of the last stack frame to recover additional inline frames that may have been lost. This expansion is a bit simpler than the one done by the runtime because we don't have to handle skipping, and we know that the last emitted frame is not an elided wrapper, since it by definition is already included in the stack. Fixes #37447 Change-Id: If3ca2af25b21d252cf457cc867dd932f107d4c61 Reviewed-on: https://go-review.googlesource.com/c/go/+/221577 Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Heschi Kreinick <heschi@google.com> Reviewed-by: Keith Randall <khr@golang.org> Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com> (cherry picked from commit fadbf7404d2b1aca63993e289448fcc3b6a23107) Reviewed-on: https://go-review.googlesource.com/c/go/+/222762
-rw-r--r--src/runtime/pprof/map.go3
-rw-r--r--src/runtime/pprof/pprof_test.go47
-rw-r--r--src/runtime/pprof/proto.go40
-rw-r--r--src/runtime/pprof/runtime.go3
-rw-r--r--src/runtime/symtab.go53
5 files changed, 115 insertions, 31 deletions
diff --git a/src/runtime/pprof/map.go b/src/runtime/pprof/map.go
index a271ad022e..7c75872351 100644
--- a/src/runtime/pprof/map.go
+++ b/src/runtime/pprof/map.go
@@ -68,7 +68,8 @@ Search:
if len(m.freeStk) < len(stk) {
m.freeStk = make([]uintptr, 1024)
}
- e.stk = m.freeStk[:len(stk)]
+ // Limit cap to prevent append from clobbering freeStk.
+ e.stk = m.freeStk[:len(stk):len(stk)]
m.freeStk = m.freeStk[len(stk):]
for j := range stk {
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 20b44e1e01..5bfc3b6134 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -1172,16 +1172,25 @@ func TestTryAdd(t *testing.T) {
{Value: []int64{20, 20 * period}, Location: []*profile.Location{{ID: 1}}},
},
}, {
- name: "recursive_inlined_funcs",
+ // If a function is called recursively then it must not be
+ // inlined in the caller.
+ //
+ // N.B. We're generating an impossible profile here, with a
+ // recursive inlineCallee call. This is simulating a non-Go
+ // function that looks like an inlined Go function other than
+ // its recursive property. See pcDeck.tryAdd.
+ name: "recursive_func_is_not_inlined",
input: []uint64{
3, 0, 500, // hz = 500. Must match the period.
5, 0, 30, inlinedCalleePtr, inlinedCalleePtr,
4, 0, 40, inlinedCalleePtr,
},
- wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}},
+ // inlinedCaller shows up here because
+ // runtime_expandFinalInlineFrame adds it to the stack frame.
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee"}, {"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
- {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}}},
- {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{30, 30 * period}, Location: []*profile.Location{{ID: 1}, {ID: 1}, {ID: 2}}},
+ {Value: []int64{40, 40 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
},
}, {
name: "truncated_stack_trace_later",
@@ -1202,12 +1211,36 @@ func TestTryAdd(t *testing.T) {
4, 0, 70, inlinedCalleePtr,
5, 0, 80, inlinedCalleePtr, inlinedCallerPtr,
},
- wantLocs: [][]string{ // the inline info is screwed up, but better than a crash.
- {"runtime/pprof.inlinedCallee"},
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // We can recover the inlined caller from a truncated stack.
+ name: "truncated_stack_trace_only",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{{"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"}},
+ wantSamples: []*profile.Sample{
+ {Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
+ },
+ }, {
+ // The same location is used for duplicated stacks.
+ name: "truncated_stack_trace_twice",
+ input: []uint64{
+ 3, 0, 500, // hz = 500. Must match the period.
+ 4, 0, 70, inlinedCalleePtr,
+ 5, 0, 80, inlinedCallerPtr, inlinedCalleePtr,
+ },
+ wantLocs: [][]string{
+ {"runtime/pprof.inlinedCallee", "runtime/pprof.inlinedCaller"},
{"runtime/pprof.inlinedCaller"}},
wantSamples: []*profile.Sample{
{Value: []int64{70, 70 * period}, Location: []*profile.Location{{ID: 1}}},
- {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 1}, {ID: 2}}},
+ {Value: []int64{80, 80 * period}, Location: []*profile.Location{{ID: 2}, {ID: 1}}},
},
}}
diff --git a/src/runtime/pprof/proto.go b/src/runtime/pprof/proto.go
index 8a30c7151d..416ace7ab2 100644
--- a/src/runtime/pprof/proto.go
+++ b/src/runtime/pprof/proto.go
@@ -384,6 +384,10 @@ func (b *profileBuilder) build() {
// It may emit to b.pb, so there must be no message encoding in progress.
func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLocs []uint64) {
b.deck.reset()
+
+ // The last frame might be truncated. Recover lost inline frames.
+ stk = runtime_expandFinalInlineFrame(stk)
+
for len(stk) > 0 {
addr := stk[0]
if l, ok := b.locs[addr]; ok {
@@ -395,22 +399,12 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// then, record the cached location.
locs = append(locs, l.id)
- // The stk may be truncated due to the stack depth limit
- // (e.g. See maxStack and maxCPUProfStack in runtime) or
- // bugs in runtime. Avoid the crash in either case.
- // TODO(hyangah): The correct fix may require using the exact
- // pcs as the key for b.locs cache management instead of just
- // relying on the very first pc. We are late in the go1.14 dev
- // cycle, so this is a workaround with little code change.
- if len(l.pcs) > len(stk) {
- stk = nil
- // TODO(hyangah): would be nice if we can enable
- // debug print out on demand and report the problematic
- // cached location entry and stack traces. Do we already
- // have such facility to utilize (e.g. GODEBUG)?
- } else {
- stk = stk[len(l.pcs):] // skip the matching pcs.
- }
+ // Skip the matching pcs.
+ //
+ // Even if stk was truncated due to the stack depth
+ // limit, expandFinalInlineFrame above has already
+ // fixed the truncation, ensuring it is long enough.
+ stk = stk[len(l.pcs):]
continue
}
@@ -427,9 +421,9 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
stk = stk[1:]
continue
}
- // add failed because this addr is not inlined with
- // the existing PCs in the deck. Flush the deck and retry to
- // handle this pc.
+ // add failed because this addr is not inlined with the
+ // existing PCs in the deck. Flush the deck and retry handling
+ // this pc.
if id := b.emitLocation(); id > 0 {
locs = append(locs, id)
}
@@ -463,8 +457,8 @@ func (b *profileBuilder) appendLocsForStack(locs []uint64, stk []uintptr) (newLo
// the fake pcs and restore the inlined and entry functions. Inlined functions
// have the following properties:
// Frame's Func is nil (note: also true for non-Go functions), and
-// Frame's Entry matches its entry function frame's Entry. (note: could also be true for recursive calls and non-Go functions),
-// Frame's Name does not match its entry function frame's name.
+// Frame's Entry matches its entry function frame's Entry (note: could also be true for recursive calls and non-Go functions), and
+// Frame's Name does not match its entry function frame's name (note: inlined functions cannot be recursive).
//
// As reading and processing the pcs in a stack trace one by one (from leaf to the root),
// we use pcDeck to temporarily hold the observed pcs and their expanded frames
@@ -486,8 +480,8 @@ func (d *pcDeck) reset() {
// to the deck. If it fails the caller needs to flush the deck and retry.
func (d *pcDeck) tryAdd(pc uintptr, frames []runtime.Frame, symbolizeResult symbolizeFlag) (success bool) {
if existing := len(d.pcs); existing > 0 {
- // 'frames' are all expanded from one 'pc' and represent all inlined functions
- // so we check only the last one.
+ // 'd.frames' are all expanded from one 'pc' and represent all
+ // inlined functions so we check only the last one.
newFrame := frames[0]
last := d.frames[existing-1]
if last.Func != nil { // the last frame can't be inlined. Flush.
diff --git a/src/runtime/pprof/runtime.go b/src/runtime/pprof/runtime.go
index b71bbad9a6..dd2545b339 100644
--- a/src/runtime/pprof/runtime.go
+++ b/src/runtime/pprof/runtime.go
@@ -9,6 +9,9 @@ import (
"unsafe"
)
+// runtime_expandFinalInlineFrame is defined in runtime/symtab.go.
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr
+
// runtime_setProfLabel is defined in runtime/proflabel.go.
func runtime_setProfLabel(labels unsafe.Pointer)
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index a6e08d7214..997cfa3f7a 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -148,6 +148,59 @@ func (ci *Frames) Next() (frame Frame, more bool) {
return
}
+// runtime_expandFinalInlineFrame expands the final pc in stk to include all
+// "callers" if pc is inline.
+//
+//go:linkname runtime_expandFinalInlineFrame runtime/pprof.runtime_expandFinalInlineFrame
+func runtime_expandFinalInlineFrame(stk []uintptr) []uintptr {
+ pc := stk[len(stk)-1]
+ tracepc := pc - 1
+
+ f := findfunc(tracepc)
+ if !f.valid() {
+ // Not a Go function.
+ return stk
+ }
+
+ inldata := funcdata(f, _FUNCDATA_InlTree)
+ if inldata == nil {
+ // Nothing inline in f.
+ return stk
+ }
+
+ // Treat the previous func as normal. We haven't actually checked, but
+ // since this pc was included in the stack, we know it shouldn't be
+ // elided.
+ lastFuncID := funcID_normal
+
+ // Remove pc from stk; we'll re-add it below.
+ stk = stk[:len(stk)-1]
+
+ // See inline expansion in gentraceback.
+ var cache pcvalueCache
+ inltree := (*[1 << 20]inlinedCall)(inldata)
+ for {
+ ix := pcdatavalue(f, _PCDATA_InlTreeIndex, tracepc, &cache)
+ if ix < 0 {
+ break
+ }
+ if inltree[ix].funcID == funcID_wrapper && elideWrapperCalling(lastFuncID) {
+ // ignore wrappers
+ } else {
+ stk = append(stk, pc)
+ }
+ lastFuncID = inltree[ix].funcID
+ // Back up to an instruction in the "caller".
+ tracepc = f.entry + uintptr(inltree[ix].parentPc)
+ pc = tracepc + 1
+ }
+
+ // N.B. we want to keep the last parentPC which is not inline.
+ stk = append(stk, pc)
+
+ return stk
+}
+
// expandCgoFrames expands frame information for pc, known to be
// a non-Go function, using the cgoSymbolizer hook. expandCgoFrames
// returns nil if pc could not be expanded.