aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Geisendörfer <felix.geisendoerfer@datadoghq.com>2024-03-29 19:59:47 +0100
committerMichael Pratt <mpratt@google.com>2024-05-08 17:48:38 +0000
commit2141315251da47745c8f649c01e598a19bd68897 (patch)
treeb6e34cc8dff2cd039e2d6e2fb14f5275e88fce39
parentc3bd543cc304b405b00263e2d4e683f9b65214ec (diff)
downloadgo-2141315251da47745c8f649c01e598a19bd68897.tar.gz
go-2141315251da47745c8f649c01e598a19bd68897.zip
runtime: move profiling pc buffers to m
Move profiling pc buffers from being stack allocated to an m field. This is motivated by the next patch, which will increase the default stack depth to 128, which might lead to undesirable stack growth for goroutines that produce profiling events. Additionally, this change paves the way to make the stack depth configurable via GODEBUG. Change-Id: Ifa407f899188e2c7c0a81de92194fdb627cb4b36 Reviewed-on: https://go-review.googlesource.com/c/go/+/574699 Reviewed-by: Michael Knyszek <mknyszek@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com>
-rw-r--r--src/runtime/malloc.go2
-rw-r--r--src/runtime/mprof.go37
-rw-r--r--src/runtime/proc.go10
-rw-r--r--src/runtime/runtime2.go1
4 files changed, 30 insertions, 20 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index 96c4761bc8..1df9006011 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -1404,7 +1404,7 @@ func profilealloc(mp *m, x unsafe.Pointer, size uintptr) {
throw("profilealloc called without a P or outside bootstrapping")
}
c.nextSample = nextSample()
- mProf_Malloc(x, size)
+ mProf_Malloc(mp, x, size)
}
// nextSample returns the next sampling point for heap profiling. The goal is
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index 26b7d78283..d4d5e285fd 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -422,15 +422,13 @@ func mProf_PostSweep() {
}
// Called by malloc to record a profiled block.
-func mProf_Malloc(p unsafe.Pointer, size uintptr) {
- var stk [maxStack]uintptr
- nstk := callers(4, stk[:])
-
+func mProf_Malloc(mp *m, p unsafe.Pointer, size uintptr) {
+ nstk := callers(4, mp.profStack)
index := (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
- b := stkbucket(memProfile, size, stk[:nstk], true)
- mp := b.mp()
- mpc := &mp.future[index]
+ b := stkbucket(memProfile, size, mp.profStack[:nstk], true)
+ mr := b.mp()
+ mpc := &mr.future[index]
lock(&profMemFutureLock[index])
mpc.allocs++
@@ -505,16 +503,17 @@ func blocksampled(cycles, rate int64) bool {
}
func saveblockevent(cycles, rate int64, skip int, which bucketType) {
- gp := getg()
var nstk int
- var stk [maxStack]uintptr
+ gp := getg()
+ mp := acquirem() // we must not be preempted while accessing profstack
if gp.m.curg == nil || gp.m.curg == gp {
- nstk = callers(skip, stk[:])
+ nstk = callers(skip, mp.profStack)
} else {
- nstk = gcallers(gp.m.curg, skip, stk[:])
+ nstk = gcallers(gp.m.curg, skip, mp.profStack)
}
- saveBlockEventStack(cycles, rate, stk[:nstk], which)
+ saveBlockEventStack(cycles, rate, mp.profStack[:nstk], which)
+ releasem(mp)
}
// lockTimer assists with profiling contention on runtime-internal locks.
@@ -613,12 +612,12 @@ func (lt *lockTimer) end() {
}
type mLockProfile struct {
- waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank
- stack [maxStack]uintptr // stack that experienced contention in runtime.lockWithRank
- pending uintptr // *mutex that experienced contention (to be traceback-ed)
- cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack"
- cyclesLost int64 // contention for which we weren't able to record a call stack
- disabled bool // attribute all time to "lost"
+ waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank
+ stack []uintptr // stack that experienced contention in runtime.lockWithRank
+ pending uintptr // *mutex that experienced contention (to be traceback-ed)
+ cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack"
+ cyclesLost int64 // contention for which we weren't able to record a call stack
+ disabled bool // attribute all time to "lost"
}
func (prof *mLockProfile) recordLock(cycles int64, l *mutex) {
@@ -703,7 +702,7 @@ func (prof *mLockProfile) captureStack() {
systemstack(func() {
var u unwinder
u.initAt(pc, sp, 0, gp, unwindSilentErrors|unwindJumpStack)
- nstk = tracebackPCs(&u, skip, prof.stack[:])
+ nstk = tracebackPCs(&u, skip, prof.stack)
})
if nstk < len(prof.stack) {
prof.stack[nstk] = 0
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 161d14e562..7a1c957822 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -922,6 +922,16 @@ func mcommoninit(mp *m, id int64) {
if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" {
mp.cgoCallers = new(cgoCallers)
}
+ mProfStackInit(mp)
+}
+
+// mProfStackInit is used to eagilery initialize stack trace buffers for
+// profiling. Lazy allocation would have to deal with reentrancy issues in
+// malloc and runtime locks for mLockProfile.
+// TODO(mknyszek): Implement lazy allocation if this becomes a problem.
+func mProfStackInit(mp *m) {
+ mp.profStack = make([]uintptr, maxStack)
+ mp.mLockProfile.stack = make([]uintptr, maxStack)
}
func (mp *m) becomeSpinning() {
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 0093a6ddb9..2ce9e8d0a2 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -599,6 +599,7 @@ type m struct {
nextwaitm muintptr // next m waiting for lock
mLockProfile mLockProfile // fields relating to runtime.lock contention
+ profStack []uintptr // used for memory/block/mutex stack traces
// wait* are used to carry arguments from gopark into park_m, because
// there's no stack to put them on. That is their sole purpose.