diff options
author | Andrew G. Morgan <agm@google.com> | 2021-03-26 19:27:22 -0700 |
---|---|---|
committer | Michael Pratt <mpratt@google.com> | 2021-04-21 21:25:26 +0000 |
commit | 7e97e4e8ccdba9677f31ab9380802cd7613f62c5 (patch) | |
tree | 74a0797fa08596e554f1f5f6070fb95ea961aba3 /src/runtime/proc.go | |
parent | 54af9fd9e69d5cc33b16b9a32d9f7dc71eef0d18 (diff) | |
download | go-7e97e4e8ccdba9677f31ab9380802cd7613f62c5.tar.gz go-7e97e4e8ccdba9677f31ab9380802cd7613f62c5.zip |
syscall: syscall.AllThreadsSyscall signal handling fixes
The runtime support for syscall.AllThreadsSyscall() functions had
some corner case deadlock issues when signal handling was in use.
This was observed in at least 3 build test failures on ppc64 and
amd64 architecture CGO_ENABLED=0 builds over the last few months.
The fixes involve more controlled handling of signals while the
AllThreads mechanism is being executed. Further details are
discussed in bug #44193.
The all-threads syscall support is new in go1.16, so earlier
releases are not affected by this bug.
Fixes #44193
Change-Id: I01ba8508a6e1bb2d872751f50da86dd07911a41d
Reviewed-on: https://go-review.googlesource.com/c/go/+/305149
Reviewed-by: Michael Pratt <mpratt@google.com>
Trust: Michael Pratt <mpratt@google.com>
Trust: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Michael Pratt <mpratt@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Diffstat (limited to 'src/runtime/proc.go')
-rw-r--r-- | src/runtime/proc.go | 52 |
1 files changed, 48 insertions, 4 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go index b3f113f6ab..5c7328aacc 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -1402,6 +1402,9 @@ func mPark() { g := getg() for { notesleep(&g.m.park) + // Note, because of signal handling by this parked m, + // a preemptive mDoFixup() may actually occur via + // mDoFixupAndOSYield(). (See golang.org/issue/44193) noteclear(&g.m.park) if !mDoFixup() { return @@ -1635,6 +1638,22 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) { for atomic.Load(&sched.sysmonStarting) != 0 { osyield() } + + // We don't want this thread to handle signals for the + // duration of this critical section. The underlying issue + // being that this locked coordinating m is the one monitoring + // for fn() execution by all the other m's of the runtime, + // while no regular go code execution is permitted (the world + // is stopped). If this present m were to get distracted to + // run signal handling code, and find itself waiting for a + // second thread to execute go code before being able to + // return from that signal handling, a deadlock will result. + // (See golang.org/issue/44193.) + lockOSThread() + var sigmask sigset + sigsave(&sigmask) + sigblock(false) + stopTheWorldGC("doAllThreadsSyscall") if atomic.Load(&newmHandoff.haveTemplateThread) != 0 { // Ensure that there are no in-flight thread @@ -1686,6 +1705,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) { // the possibility of racing with mp. lock(&mp.mFixup.lock) mp.mFixup.fn = fn + atomic.Store(&mp.mFixup.used, 1) if mp.doesPark { // For non-service threads this will // cause the wakeup to be short lived @@ -1702,9 +1722,7 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) { if mp.procid == tid { continue } - lock(&mp.mFixup.lock) - done = done && (mp.mFixup.fn == nil) - unlock(&mp.mFixup.lock) + done = atomic.Load(&mp.mFixup.used) == 0 } if done { break @@ -1731,6 +1749,8 @@ func syscall_runtime_doAllThreadsSyscall(fn func(bool) bool) { unlock(&mFixupRace.lock) } startTheWorldGC() + msigrestore(sigmask) + unlockOSThread() } // runSafePointFn runs the safe point function, if any, for this P. @@ -2225,9 +2245,21 @@ var mFixupRace struct { // mDoFixup runs any outstanding fixup function for the running m. // Returns true if a fixup was outstanding and actually executed. // +// Note: to avoid deadlocks, and the need for the fixup function +// itself to be async safe, signals are blocked for the working m +// while it holds the mFixup lock. (See golang.org/issue/44193) +// //go:nosplit func mDoFixup() bool { _g_ := getg() + if used := atomic.Load(&_g_.m.mFixup.used); used == 0 { + return false + } + + // slow path - if fixup fn is used, block signals and lock. + var sigmask sigset + sigsave(&sigmask) + sigblock(false) lock(&_g_.m.mFixup.lock) fn := _g_.m.mFixup.fn if fn != nil { @@ -2244,7 +2276,6 @@ func mDoFixup() bool { // is more obviously safe. throw("GC must be disabled to protect validity of fn value") } - *(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0 if _g_.racectx != 0 || !raceenabled { fn(false) } else { @@ -2259,11 +2290,24 @@ func mDoFixup() bool { _g_.racectx = 0 unlock(&mFixupRace.lock) } + *(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0 + atomic.Store(&_g_.m.mFixup.used, 0) } unlock(&_g_.m.mFixup.lock) + msigrestore(sigmask) return fn != nil } +// mDoFixupAndOSYield is called when an m is unable to send a signal +// because the allThreadsSyscall mechanism is in progress. That is, an +// mPark() has been interrupted with this signal handler so we need to +// ensure the fixup is executed from this context. +//go:nosplit +func mDoFixupAndOSYield() { + mDoFixup() + osyield() +} + // templateThread is a thread in a known-good state that exists solely // to start new threads in known-good states when the calling thread // may not be in a good state. |