runtime, syscall: reimplement AllThreadsSyscall using only signals.

In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
author: Michael Pratt <mpratt@google.com> 2022-02-04 17:15:28 -0500
committer: Michael Pratt <mpratt@google.com> 2022-02-15 15:40:35 +0000
commit: 0a5fae2a0e965024f692b95f7e857904a274fcb6 (patch)
tree: 393819d9f85f5be1f54bc480f7c6763859bc8997 /src/runtime/proc.go
parent: 0b321c9a7c0055dfd3f875dea930a28690659211 (diff)
download: go-0a5fae2a0e965024f692b95f7e857904a274fcb6.tar.gz
go-0a5fae2a0e965024f692b95f7e857904a274fcb6.zip
1 files changed, 41 insertions, 108 deletions
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index 728b3bcd23..b997a467ba 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -167,10 +167,6 @@ func main() {
 	mainStarted = true
 
 	if GOARCH != "wasm" { // no threads on wasm yet, so no sysmon
-		// For runtime_syscall_doAllThreadsSyscall, we
-		// register sysmon is not ready for the world to be
-		// stopped.
-		atomic.Store(&sched.sysmonStarting, 1)
 		systemstack(func() {
 			newm(sysmon, nil, -1)
 		})
@@ -187,7 +183,6 @@ func main() {
 	if g.m != &m0 {
 		throw("runtime.main not on m0")
 	}
-	m0.doesPark = true
 
 	// Record when the world started.
 	// Must be before doInit for tracing init.
@@ -1447,22 +1442,12 @@ func mstartm0() {
 	initsig(false)
 }
 
-// mPark causes a thread to park itself - temporarily waking for
-// fixups but otherwise waiting to be fully woken. This is the
-// only way that m's should park themselves.
+// mPark causes a thread to park itself, returning once woken.
 //go:nosplit
 func mPark() {
-	g := getg()
-	for {
-		notesleep(&g.m.park)
-		// Note, because of signal handling by this parked m,
-		// a preemptive mDoFixup() may actually occur via
-		// mDoFixupAndOSYield(). (See golang.org/issue/44193)
-		noteclear(&g.m.park)
-		if !mDoFixup() {
-			return
-		}
-	}
+	gp := getg()
+	notesleep(&gp.m.park)
+	noteclear(&gp.m.park)
 }
 
 // mexit tears down and exits the current thread.
@@ -1718,8 +1703,14 @@ type cgothreadstart struct {
 //
 //go:yeswritebarrierrec
 func allocm(_p_ *p, fn func(), id int64) *m {
+	allocmLock.rlock()
+
+	// The caller owns _p_, but we may borrow (i.e., acquirep) it. We must
+	// disable preemption to ensure it is not stolen, which would make the
+	// caller lose ownership.
+	acquirem()
+
 	_g_ := getg()
-	acquirem() // disable GC because it can be called from sysmon
 	if _g_.m.p == 0 {
 		acquirep(_p_) // temporarily borrow p for mallocs in this function
 	}
@@ -1765,8 +1756,9 @@ func allocm(_p_ *p, fn func(), id int64) *m {
 	if _p_ == _g_.m.p.ptr() {
 		releasep()
 	}
-	releasem(_g_.m)
 
+	releasem(_g_.m)
+	allocmLock.runlock()
 	return mp
 }
 
@@ -2043,9 +2035,17 @@ func unlockextra(mp *m) {
 	atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
 }
 
-// execLock serializes exec and clone to avoid bugs or unspecified behaviour
-// around exec'ing while creating/destroying threads.  See issue #19546.
-var execLock rwmutex
+var (
+	// allocmLock is locked for read when creating new Ms in allocm and their
+	// addition to allm. Thus acquiring this lock for write blocks the
+	// creation of new Ms.
+	allocmLock rwmutex
+
+	// execLock serializes exec and clone to avoid bugs or unspecified
+	// behaviour around exec'ing while creating/destroying threads. See
+	// issue #19546.
+	execLock rwmutex
+)
 
 // newmHandoff contains a list of m structures that need new OS threads.
 // This is used by newm in situations where newm itself can't safely
@@ -2075,8 +2075,19 @@ var newmHandoff struct {
 // id is optional pre-allocated m ID. Omit by passing -1.
 //go:nowritebarrierrec
 func newm(fn func(), _p_ *p, id int64) {
+	// allocm adds a new M to allm, but they do not start until created by
+	// the OS in newm1 or the template thread.
+	//
+	// doAllThreadsSyscall requires that every M in allm will eventually
+	// start and be signal-able, even with a STW.
+	//
+	// Disable preemption here until we start the thread to ensure that
+	// newm is not preempted between allocm and starting the new thread,
+	// ensuring that anything added to allm is guaranteed to eventually
+	// start.
+	acquirem()
+
 	mp := allocm(_p_, fn, id)
-	mp.doesPark = (_p_ != nil)
 	mp.nextp.set(_p_)
 	mp.sigmask = initSigmask
 	if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" {
@@ -2102,9 +2113,14 @@ func newm(fn func(), _p_ *p, id int64) {
 			notewakeup(&newmHandoff.wake)
 		}
 		unlock(&newmHandoff.lock)
+		// The M has not started yet, but the template thread does not
+		// participate in STW, so it will always process queued Ms and
+		// it is safe to releasem.
+		releasem(getg().m)
 		return
 	}
 	newm1(mp)
+	releasem(getg().m)
 }
 
 func newm1(mp *m) {
@@ -2152,81 +2168,6 @@ func startTemplateThread() {
 	releasem(mp)
 }
 
-// mFixupRace is used to temporarily borrow the race context from the
-// coordinating m during a syscall_runtime_doAllThreadsSyscall and
-// loan it out to each of the m's of the runtime so they can execute a
-// mFixup.fn in that context.
-var mFixupRace struct {
-	lock mutex
-	ctx  uintptr
-}
-
-// mDoFixup runs any outstanding fixup function for the running m.
-// Returns true if a fixup was outstanding and actually executed.
-//
-// Note: to avoid deadlocks, and the need for the fixup function
-// itself to be async safe, signals are blocked for the working m
-// while it holds the mFixup lock. (See golang.org/issue/44193)
-//
-//go:nosplit
-func mDoFixup() bool {
-	_g_ := getg()
-	if used := atomic.Load(&_g_.m.mFixup.used); used == 0 {
-		return false
-	}
-
-	// slow path - if fixup fn is used, block signals and lock.
-	var sigmask sigset
-	sigsave(&sigmask)
-	sigblock(false)
-	lock(&_g_.m.mFixup.lock)
-	fn := _g_.m.mFixup.fn
-	if fn != nil {
-		if gcphase != _GCoff {
-			// We can't have a write barrier in this
-			// context since we may not have a P, but we
-			// clear fn to signal that we've executed the
-			// fixup. As long as fn is kept alive
-			// elsewhere, technically we should have no
-			// issues with the GC, but fn is likely
-			// generated in a different package altogether
-			// that may change independently. Just assert
-			// the GC is off so this lack of write barrier
-			// is more obviously safe.
-			throw("GC must be disabled to protect validity of fn value")
-		}
-		if _g_.racectx != 0 || !raceenabled {
-			fn(false)
-		} else {
-			// temporarily acquire the context of the
-			// originator of the
-			// syscall_runtime_doAllThreadsSyscall and
-			// block others from using it for the duration
-			// of the fixup call.
-			lock(&mFixupRace.lock)
-			_g_.racectx = mFixupRace.ctx
-			fn(false)
-			_g_.racectx = 0
-			unlock(&mFixupRace.lock)
-		}
-		*(*uintptr)(unsafe.Pointer(&_g_.m.mFixup.fn)) = 0
-		atomic.Store(&_g_.m.mFixup.used, 0)
-	}
-	unlock(&_g_.m.mFixup.lock)
-	msigrestore(sigmask)
-	return fn != nil
-}
-
-// mDoFixupAndOSYield is called when an m is unable to send a signal
-// because the allThreadsSyscall mechanism is in progress. That is, an
-// mPark() has been interrupted with this signal handler so we need to
-// ensure the fixup is executed from this context.
-//go:nosplit
-func mDoFixupAndOSYield() {
-	mDoFixup()
-	osyield()
-}
-
 // templateThread is a thread in a known-good state that exists solely
 // to start new threads in known-good states when the calling thread
 // may not be in a good state.
@@ -2263,7 +2204,6 @@ func templateThread() {
 		noteclear(&newmHandoff.wake)
 		unlock(&newmHandoff.lock)
 		notesleep(&newmHandoff.wake)
-		mDoFixup()
 	}
 }
 
@@ -5110,10 +5050,6 @@ func sysmon() {
 	checkdead()
 	unlock(&sched.lock)
 
-	// For syscall_runtime_doAllThreadsSyscall, sysmon is
-	// sufficiently up to participate in fixups.
-	atomic.Store(&sched.sysmonStarting, 0)
-
 	lasttrace := int64(0)
 	idle := 0 // how many cycles in succession we had not wokeup somebody
 	delay := uint32(0)
@@ -5128,7 +5064,6 @@ func sysmon() {
 			delay = 10 * 1000
 		}
 		usleep(delay)
-		mDoFixup()
 
 		// sysmon should not enter deep sleep if schedtrace is enabled so that
 		// it can print that information at the right time.
@@ -5165,7 +5100,6 @@ func sysmon() {
 						osRelax(true)
 					}
 					syscallWake = notetsleep(&sched.sysmonnote, sleep)
-					mDoFixup()
 					if shouldRelax {
 						osRelax(false)
 					}
@@ -5208,7 +5142,6 @@ func sysmon() {
 				incidlelocked(1)
 			}
 		}
-		mDoFixup()
 		if GOOS == "netbsd" && needSysmonWorkaround {
 			// netpoll is responsible for waiting for timer
 			// expiration, so we typically don't have to worry
author	Michael Pratt <mpratt@google.com>	2022-02-04 17:15:28 -0500
committer	Michael Pratt <mpratt@google.com>	2022-02-15 15:40:35 +0000
commit	0a5fae2a0e965024f692b95f7e857904a274fcb6 (patch)
tree	393819d9f85f5be1f54bc480f7c6763859bc8997 /src/runtime/proc.go
parent	0b321c9a7c0055dfd3f875dea930a28690659211 (diff)
download	go-0a5fae2a0e965024f692b95f7e857904a274fcb6.tar.gz go-0a5fae2a0e965024f692b95f7e857904a274fcb6.zip