aboutsummaryrefslogtreecommitdiff
path: root/src/runtime
diff options
context:
space:
mode:
authorMichael Pratt <mpratt@google.com>2020-11-17 11:55:53 -0500
committerMichael Pratt <mpratt@google.com>2021-01-05 20:00:43 +0000
commit6b37b15d9520f9fa2b819e66a37fac4b2d08da78 (patch)
treee9870c709e116c1cc9fba45b3e702bb5ccd753fb /src/runtime
parent9eef49cfa6eb016e3b20df189e540c6c5a71f365 (diff)
downloadgo-6b37b15d9520f9fa2b819e66a37fac4b2d08da78.tar.gz
go-6b37b15d9520f9fa2b819e66a37fac4b2d08da78.zip
runtime: don't take allglock in tracebackothers
tracebackothers is called from fatal throw/panic. A fatal throw may be taken with allglock held (notably in the allocator when allglock is held), which would cause a deadlock in tracebackothers when we try to take allglock again. Locking allglock here is also often a lock order violation w.r.t. the locks held when throw was called. Avoid the deadlock and ordering issues by skipping locking altogether. It is OK to miss concurrently created Gs (which are generally avoided by freezetheworld(), and which were possible previously anyways if created after the loop). Fatal throw/panic freezetheworld(), which should freeze other threads that may be racing to modify allgs. However, freezetheworld() does _not_ guarantee that it stops all other threads, so we can't simply drop the lock. Fixes #42669 Updates #43175 Change-Id: I657aec46ed35fd5d1b3f1ba25b500128ab26b088 Reviewed-on: https://go-review.googlesource.com/c/go/+/270861 Reviewed-by: Michael Knyszek <mknyszek@google.com> Trust: Michael Pratt <mpratt@google.com>
Diffstat (limited to 'src/runtime')
-rw-r--r--src/runtime/mgcmark.go1
-rw-r--r--src/runtime/proc.go43
-rw-r--r--src/runtime/runtime2.go1
-rw-r--r--src/runtime/traceback.go27
4 files changed, 56 insertions, 16 deletions
diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go
index 52267e6fb0..46fae5de72 100644
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@@ -132,7 +132,6 @@ fail:
println("gp", gp, "goid", gp.goid,
"status", readgstatus(gp),
"gcscandone", gp.gcscandone)
- unlock(&allglock) // Avoid self-deadlock with traceback.
throw("scan missed a g")
}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index ca78587aad..5a942a6831 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -490,8 +490,29 @@ func lockedOSThread() bool {
}
var (
- allgs []*g
+ // allgs contains all Gs ever created (including dead Gs), and thus
+ // never shrinks.
+ //
+ // Access via the slice is protected by allglock or stop-the-world.
+ // Readers that cannot take the lock may (carefully!) use the atomic
+ // variables below.
allglock mutex
+ allgs []*g
+
+ // allglen and allgptr are atomic variables that contain len(allg) and
+ // &allg[0] respectively. Proper ordering depends on totally-ordered
+ // loads and stores. Writes are protected by allglock.
+ //
+ // allgptr is updated before allglen. Readers should read allglen
+ // before allgptr to ensure that allglen is always <= len(allgptr). New
+ // Gs appended during the race can be missed. For a consistent view of
+ // all Gs, allglock must be held.
+ //
+ // allgptr copies should always be stored as a concrete type or
+ // unsafe.Pointer, not uintptr, to ensure that GC can still reach it
+ // even if it points to a stale array.
+ allglen uintptr
+ allgptr **g
)
func allgadd(gp *g) {
@@ -501,10 +522,25 @@ func allgadd(gp *g) {
lock(&allglock)
allgs = append(allgs, gp)
- allglen = uintptr(len(allgs))
+ if &allgs[0] != allgptr {
+ atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0]))
+ }
+ atomic.Storeuintptr(&allglen, uintptr(len(allgs)))
unlock(&allglock)
}
+// atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex.
+func atomicAllG() (**g, uintptr) {
+ length := atomic.Loaduintptr(&allglen)
+ ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr)))
+ return ptr, length
+}
+
+// atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG.
+func atomicAllGIndex(ptr **g, i uintptr) *g {
+ return *(**g)(add(unsafe.Pointer(ptr), i*sys.PtrSize))
+}
+
const (
// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
@@ -4266,7 +4302,7 @@ func badunlockosthread() {
}
func gcount() int32 {
- n := int32(allglen) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
+ n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.n - int32(atomic.Load(&sched.ngsys))
for _, _p_ := range allp {
n -= _p_.gFree.n
}
@@ -4970,7 +5006,6 @@ func checkdead() {
case _Grunnable,
_Grunning,
_Gsyscall:
- unlock(&allglock)
print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
throw("checkdead: runnable g")
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index c9376827da..109f0da131 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -1052,7 +1052,6 @@ func (w waitReason) String() string {
}
var (
- allglen uintptr
allm *m
gomaxprocs int32
ncpu int32
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 0825e9e707..2601cd697f 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -917,17 +917,25 @@ func tracebackothers(me *g) {
level, _, _ := gotraceback()
// Show the current goroutine first, if we haven't already.
- g := getg()
- gp := g.m.curg
- if gp != nil && gp != me {
+ curgp := getg().m.curg
+ if curgp != nil && curgp != me {
print("\n")
- goroutineheader(gp)
- traceback(^uintptr(0), ^uintptr(0), 0, gp)
+ goroutineheader(curgp)
+ traceback(^uintptr(0), ^uintptr(0), 0, curgp)
}
- lock(&allglock)
- for _, gp := range allgs {
- if gp == me || gp == g.m.curg || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 {
+ // We can't take allglock here because this may be during fatal
+ // throw/panic, where locking allglock could be out-of-order or a
+ // direct deadlock.
+ //
+ // Instead, use atomic access to allgs which requires no locking. We
+ // don't lock against concurrent creation of new Gs, but even with
+ // allglock we may miss Gs created after this loop.
+ ptr, length := atomicAllG()
+ for i := uintptr(0); i < length; i++ {
+ gp := atomicAllGIndex(ptr, i)
+
+ if gp == me || gp == curgp || readgstatus(gp) == _Gdead || isSystemGoroutine(gp, false) && level < 2 {
continue
}
print("\n")
@@ -936,14 +944,13 @@ func tracebackothers(me *g) {
// called from a signal handler initiated during a
// systemstack call. The original G is still in the
// running state, and we want to print its stack.
- if gp.m != g.m && readgstatus(gp)&^_Gscan == _Grunning {
+ if gp.m != getg().m && readgstatus(gp)&^_Gscan == _Grunning {
print("\tgoroutine running on other thread; stack unavailable\n")
printcreatedby(gp)
} else {
traceback(^uintptr(0), ^uintptr(0), 0, gp)
}
}
- unlock(&allglock)
}
// tracebackHexdump hexdumps part of stk around frame.sp and frame.fp