aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/runtime/chan.go22
-rw-r--r--src/runtime/chan_test.go56
-rw-r--r--src/runtime/proc_test.go10
-rw-r--r--src/runtime/runtime2.go4
-rw-r--r--src/runtime/select.go19
-rw-r--r--src/runtime/stack.go13
6 files changed, 122 insertions, 2 deletions
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index c953b23add..17ec2e186c 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -233,6 +233,11 @@ func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool {
gp.waiting = mysg
gp.param = nil
c.sendq.enqueue(mysg)
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanSend, traceEvGoBlockSend, 2)
// Ensure the value being sent is kept alive until the
// receiver copies it out. The sudog has a pointer to the
@@ -522,6 +527,11 @@ func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool)
mysg.c = c
gp.param = nil
c.recvq.enqueue(mysg)
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(chanparkcommit, unsafe.Pointer(&c.lock), waitReasonChanReceive, traceEvGoBlockRecv, 2)
// someone woke us up
@@ -599,7 +609,19 @@ func recv(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) {
func chanparkcommit(gp *g, chanLock unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
+ // Set activeStackChans here instead of before we try parking
+ // because we could self-deadlock in stack growth on the
+ // channel lock.
gp.activeStackChans = true
+ // Mark that it's safe for stack shrinking to occur now,
+ // because any thread acquiring this G's stack for shrinking
+ // is guaranteed to observe activeStackChans after this store.
+ atomic.Store8(&gp.parkingOnChan, 0)
+ // Make sure we unlock after setting activeStackChans and
+ // unsetting parkingOnChan. The moment we unlock chanLock
+ // we risk gp getting readied by a channel operation and
+ // so gp could continue running before everything before
+ // the unlock is visible (even to gp itself).
unlock((*mutex)(chanLock))
return true
}
diff --git a/src/runtime/chan_test.go b/src/runtime/chan_test.go
index 1180e76fcd..cf596f9296 100644
--- a/src/runtime/chan_test.go
+++ b/src/runtime/chan_test.go
@@ -623,6 +623,62 @@ func TestShrinkStackDuringBlockedSend(t *testing.T) {
<-done
}
+func TestNoShrinkStackWhileParking(t *testing.T) {
+ // The goal of this test is to trigger a "racy sudog adjustment"
+ // throw. Basically, there's a window between when a goroutine
+ // becomes available for preemption for stack scanning (and thus,
+ // stack shrinking) but before the goroutine has fully parked on a
+ // channel. See issue 40641 for more details on the problem.
+ //
+ // The way we try to induce this failure is to set up two
+ // goroutines: a sender and a reciever that communicate across
+ // a channel. We try to set up a situation where the sender
+ // grows its stack temporarily then *fully* blocks on a channel
+ // often. Meanwhile a GC is triggered so that we try to get a
+ // mark worker to shrink the sender's stack and race with the
+ // sender parking.
+ //
+ // Unfortunately the race window here is so small that we
+ // either need a ridiculous number of iterations, or we add
+ // "usleep(1000)" to park_m, just before the unlockf call.
+ const n = 10
+ send := func(c chan<- int, done chan struct{}) {
+ for i := 0; i < n; i++ {
+ c <- i
+ // Use lots of stack briefly so that
+ // the GC is going to want to shrink us
+ // when it scans us. Make sure not to
+ // do any function calls otherwise
+ // in order to avoid us shrinking ourselves
+ // when we're preempted.
+ stackGrowthRecursive(20)
+ }
+ done <- struct{}{}
+ }
+ recv := func(c <-chan int, done chan struct{}) {
+ for i := 0; i < n; i++ {
+ // Sleep here so that the sender always
+ // fully blocks.
+ time.Sleep(10 * time.Microsecond)
+ <-c
+ }
+ done <- struct{}{}
+ }
+ for i := 0; i < n*20; i++ {
+ c := make(chan int)
+ done := make(chan struct{})
+ go recv(c, done)
+ go send(c, done)
+ // Wait a little bit before triggering
+ // the GC to make sure the sender and
+ // reciever have gotten into their groove.
+ time.Sleep(50 * time.Microsecond)
+ runtime.GC()
+ <-done
+ <-done
+ }
+}
+
func TestSelectDuplicateChannel(t *testing.T) {
// This test makes sure we can queue a G on
// the same channel multiple times.
diff --git a/src/runtime/proc_test.go b/src/runtime/proc_test.go
index 8c70c19d92..db3362d23f 100644
--- a/src/runtime/proc_test.go
+++ b/src/runtime/proc_test.go
@@ -517,9 +517,17 @@ func BenchmarkPingPongHog(b *testing.B) {
<-done
}
+var padData [128]uint64
+
func stackGrowthRecursive(i int) {
var pad [128]uint64
- if i != 0 && pad[0] == 0 {
+ pad = padData
+ for j := range pad {
+ if pad[j] != 0 {
+ return
+ }
+ }
+ if i != 0 {
stackGrowthRecursive(i - 1)
}
}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 2a872bfba9..c4b27e90ce 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -436,6 +436,10 @@ type g struct {
// copying needs to acquire channel locks to protect these
// areas of the stack.
activeStackChans bool
+ // parkingOnChan indicates that the goroutine is about to
+ // park on a chansend or chanrecv. Used to signal an unsafe point
+ // for stack shrinking. It's a boolean value, but is updated atomically.
+ parkingOnChan uint8
raceignore int8 // ignore race detection events
sysblocktraced bool // StartTrace has emitted EvGoInSyscall about this goroutine
diff --git a/src/runtime/select.go b/src/runtime/select.go
index 8033b6512f..f8f768387f 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -7,6 +7,7 @@ package runtime
// This file contains the implementation of Go select statements.
import (
+ "runtime/internal/atomic"
"unsafe"
)
@@ -77,7 +78,20 @@ func selunlock(scases []scase, lockorder []uint16) {
func selparkcommit(gp *g, _ unsafe.Pointer) bool {
// There are unlocked sudogs that point into gp's stack. Stack
// copying must lock the channels of those sudogs.
+ // Set activeStackChans here instead of before we try parking
+ // because we could self-deadlock in stack growth on a
+ // channel lock.
gp.activeStackChans = true
+ // Mark that it's safe for stack shrinking to occur now,
+ // because any thread acquiring this G's stack for shrinking
+ // is guaranteed to observe activeStackChans after this store.
+ atomic.Store8(&gp.parkingOnChan, 0)
+ // Make sure we unlock after setting activeStackChans and
+ // unsetting parkingOnChan. The moment we unlock any of the
+ // channel locks we risk gp getting readied by a channel operation
+ // and so gp could continue running before everything before the
+ // unlock is visible (even to gp itself).
+
// This must not access gp's stack (see gopark). In
// particular, it must not access the *hselect. That's okay,
// because by the time this is called, gp.waiting has all
@@ -313,6 +327,11 @@ loop:
// wait for someone to wake us up
gp.param = nil
+ // Signal to anyone trying to shrink our stack that we're about
+ // to park on a channel. The window between when this G's status
+ // changes and when we set gp.activeStackChans is not safe for
+ // stack shrinking.
+ atomic.Store8(&gp.parkingOnChan, 1)
gopark(selparkcommit, nil, waitReasonSelect, traceEvGoBlockSelect, 1)
gp.activeStackChans = false
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
index 0f5b16543d..e49c905f42 100644
--- a/src/runtime/stack.go
+++ b/src/runtime/stack.go
@@ -850,6 +850,13 @@ func copystack(gp *g, newsize uintptr) {
// Adjust sudogs, synchronizing with channel ops if necessary.
ncopy := used
if !gp.activeStackChans {
+ if newsize < old.hi-old.lo && atomic.Load8(&gp.parkingOnChan) != 0 {
+ // It's not safe for someone to shrink this stack while we're actively
+ // parking on a channel, but it is safe to grow since we do that
+ // ourselves and explicitly don't want to synchronize with channels
+ // since we could self-deadlock.
+ throw("racy sudog adjustment due to parking on channel")
+ }
adjustsudogs(gp, &adjinfo)
} else {
// sudogs may be pointing in to the stack and gp has
@@ -1078,7 +1085,11 @@ func isShrinkStackSafe(gp *g) bool {
// We also can't copy the stack if we're at an asynchronous
// safe-point because we don't have precise pointer maps for
// all frames.
- return gp.syscallsp == 0 && !gp.asyncSafePoint
+ //
+ // We also can't *shrink* the stack in the window between the
+ // goroutine calling gopark to park on a channel and
+ // gp.activeStackChans being set.
+ return gp.syscallsp == 0 && !gp.asyncSafePoint && atomic.Load8(&gp.parkingOnChan) == 0
}
// Maybe shrink the stack being used by gp.