aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2021-05-10 16:50:32 -0400
committerMichael Pratt <mpratt@google.com>2021-05-11 14:21:06 +0000
commit2520e72d3bbfe6651cd6324077afdb4babb36b9a (patch)
treec379ba5d793d3cfe83892a418fe9e085881c1aee /src/runtime
parent326a7925179ea669aa9f947dda82e425673cb220 (diff)
downloadgo-2520e72d3bbfe6651cd6324077afdb4babb36b9a.tar.gz
go-2520e72d3bbfe6651cd6324077afdb4babb36b9a.zip
runtime: hold sched.lock across atomic pidleget/pidleput
As a cleanup, golang.org/cl/307914 unintentionally caused the idle GC work recheck to drop sched.lock between acquiring a P and committing to keep it (once a worker G was found). This is unsafe, as releasing a P requires extra checks once sched.lock is taken (such as for runSafePointFn). Since checkIdleGCNoP does not perform these extra checks, we can now race with other users. In the case of #45975, we may hang with this sequence: 1. M1: checkIdleGCNoP takes sched.lock, gets P1, releases sched.lock. 2. M2: forEachP takes sched.lock, iterates over sched.pidle without finding P1, releases sched.lock. 3. M1: checkIdleGCNoP puts P1 back in sched.pidle. 4. M2: forEachP waits forever for P1 to run the safePointFn. Change back to the old behavior of releasing sched.lock only after we are certain we will keep the P. Thus if we put it back its removal from sched.pidle was never visible. Fixes #45975 For #45916 For #45885 For #45884 Change-Id: I191a1800923b206ccaf96bdcdd0bfdad17b532e9 Reviewed-on: https://go-review.googlesource.com/c/go/+/318569 Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/proc.go32
1 files changed, 21 insertions, 11 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ba02b14995..378d5e32f5 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -3132,24 +3132,33 @@ func checkIdleGCNoP() (*p, *g) {
return nil, nil
}
- // Work is available; we can start an idle GC worker only if
- // there is an available P and available worker G.
+ // Work is available; we can start an idle GC worker only if there is
+ // an available P and available worker G.
//
- // We can attempt to acquire these in either order. Workers are
- // almost always available (see comment in findRunnableGCWorker
- // for the one case there may be none). Since we're slightly
- // less likely to find a P, check for that first.
+ // We can attempt to acquire these in either order, though both have
+ // synchonization concerns (see below). Workers are almost always
+ // available (see comment in findRunnableGCWorker for the one case
+ // there may be none). Since we're slightly less likely to find a P,
+ // check for that first.
+ //
+ // Synchronization: note that we must hold sched.lock until we are
+ // committed to keeping it. Otherwise we cannot put the unnecessary P
+ // back in sched.pidle without performing the full set of idle
+ // transition checks.
+ //
+ // If we were to check gcBgMarkWorkerPool first, we must somehow handle
+ // the assumption in gcControllerState.findRunnableGCWorker that an
+ // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running.
lock(&sched.lock)
pp := pidleget()
- unlock(&sched.lock)
if pp == nil {
+ unlock(&sched.lock)
return nil, nil
}
- // Now that we own a P, gcBlackenEnabled can't change
- // (as it requires STW).
+ // Now that we own a P, gcBlackenEnabled can't change (as it requires
+ // STW).
if gcBlackenEnabled == 0 {
- lock(&sched.lock)
pidleput(pp)
unlock(&sched.lock)
return nil, nil
@@ -3157,12 +3166,13 @@ func checkIdleGCNoP() (*p, *g) {
node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop())
if node == nil {
- lock(&sched.lock)
pidleput(pp)
unlock(&sched.lock)
return nil, nil
}
+ unlock(&sched.lock)
+
return pp, node.gp.ptr()
}