aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/proc.go
diff options
context:
space:
mode:
authorAndrew G. Morgan <agm@google.com>2021-03-26 19:27:22 -0700
committerMichael Pratt <mpratt@google.com>2021-04-21 21:25:26 +0000
commit7e97e4e8ccdba9677f31ab9380802cd7613f62c5 (patch)
tree74a0797fa08596e554f1f5f6070fb95ea961aba3 /src/runtime/proc.go
parent54af9fd9e69d5cc33b16b9a32d9f7dc71eef0d18 (diff)
downloadgo-7e97e4e8ccdba9677f31ab9380802cd7613f62c5.tar.gz
go-7e97e4e8ccdba9677f31ab9380802cd7613f62c5.zip
syscall: syscall.AllThreadsSyscall signal handling fixes
The runtime support for syscall.AllThreadsSyscall() functions had some corner case deadlock issues when signal handling was in use. This was observed in at least 3 build test failures on ppc64 and amd64 architecture CGO_ENABLED=0 builds over the last few months. The fixes involve more controlled handling of signals while the AllThreads mechanism is being executed. Further details are discussed in bug #44193. The all-threads syscall support is new in go1.16, so earlier releases are not affected by this bug. Fixes #44193 Change-Id: I01ba8508a6e1bb2d872751f50da86dd07911a41d Reviewed-on: https://go-review.googlesource.com/c/go/+/305149 Reviewed-by: Michael Pratt <mpratt@google.com> Trust: Michael Pratt <mpratt@google.com> Trust: Ian Lance Taylor <iant@golang.org> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/runtime/proc.go')
-rw-r--r--src/runtime/proc.go52
1 files changed, 48 insertions, 4 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index b3f113f6ab..5c7328aacc 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -1402,6 +1402,9 @@ func mPark() {
g := getg()
for {
notesleep(&g.m.park)
+ // Note, because of signal handling by this parked m,
+ // a preemptive mDoFixup() may actually occur via
+ // mDoFixupAndOSYield(). (See golang.org/issue/44193)
noteclear(&g.m.park)
if !mDoFixup() {
return
@@ -1635,6 +1638,22 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
for atomic.Load(&sched.sysmonStarting) != 0 {
osyield()
}
+
+ // We don't want this thread to handle signals for the
+ // duration of this critical section. The underlying issue
+ // being that this locked coordinating m is the one monitoring
+ // for fn() execution by all the other m's of the runtime,
+ // while no regular go code execution is permitted (the world
+ // is stopped). If this present m were to get distracted to
+ // run signal handling code, and find itself waiting for a
+ // second thread to execute go code before being able to
+ // return from that signal handling, a deadlock will result.
+ // (See golang.org/issue/44193.)
+ lockOSThread()
+ var sigmask sigset
+ sigsave(&sigmask)
+ sigblock(false)
+
stopTheWorldGC("doAllThreadsSyscall")
if atomic.Load(&newmHandoff.haveTemplateThread) != 0 {
// Ensure that there are no in-flight thread
@@ -1686,6 +1705,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
// the possibility of racing with mp.
lock(&mp.mFixup.lock)
mp.mFixup.fn = fn
+ atomic.Store(&mp.mFixup.used, 1)
if mp.doesPark {
// For non-service threads this will
// cause the wakeup to be short lived
@@ -1702,9 +1722,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
if mp.procid == tid {
continue
}
- lock(&mp.mFixup.lock)
- done = done && (mp.mFixup.fn == nil)
- unlock(&mp.mFixup.lock)
+ done = atomic.Load(&mp.mFixup.used) == 0
}
if done {
break
@@ -1731,6 +1749,8 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) {
unlock(&mFixupRace.lock)
}
startTheWorldGC()
+ msigrestore(sigmask)
+ unlockOSThread()
}
// runSafePointFn runs the safe point function, if any, for this P.
@@ -2225,9 +2245,21 @@ var mFixupRace struct {
// mDoFixup runs any outstanding fixup function for the running m.
// Returns true if a fixup was outstanding and actually executed.
//
+// Note: to avoid deadlocks, and the need for the fixup function
+// itself to be async safe, signals are blocked for the working m
+// while it holds the mFixup lock. (See golang.org/issue/44193)
+//
//go:nosplit
func mDoFixup() bool {
_g_ := getg()
+ if used := atomic.Load(&_g_.m.mFixup.used); used == 0 {
+ return false
+ }
+
+ // slow path - if fixup fn is used, block signals and lock.
+ var sigmask sigset
+ sigsave(&sigmask)
+ sigblock(false)
lock(&_g_.m.mFixup.lock)
fn := _g_.m.mFixup.fn
if fn != nil {
@@ -2244,7 +2276,6 @@ func mDoFixup() bool {
// is more obviously safe.
throw("GC must be disabled to protect validity of fn value")
}
- *(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0
if _g_.racectx != 0 || !raceenabled {
fn(false)
} else {
@@ -2259,11 +2290,24 @@ func mDoFixup() bool {
_g_.racectx = 0
unlock(&mFixupRace.lock)
}
+ *(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0
+ atomic.Store(&_g_.m.mFixup.used, 0)
}
unlock(&_g_.m.mFixup.lock)
+ msigrestore(sigmask)
return fn != nil
}
+// mDoFixupAndOSYield is called when an m is unable to send a signal
+// because the allThreadsSyscall mechanism is in progress. That is, an
+// mPark() has been interrupted with this signal handler so we need to
+// ensure the fixup is executed from this context.
+//go:nosplit
+func mDoFixupAndOSYield() {
+ mDoFixup()
+ osyield()
+}
+
// templateThread is a thread in a known-good state that exists solely
// to start new threads in known-good states when the calling thread
// may not be in a good state.