diff options
author | Michael Anthony Knyszek <mknyszek@google.com> | 2023-03-27 15:47:18 +0000 |
---|---|---|
committer | Michael Knyszek <mknyszek@google.com> | 2023-07-21 00:28:09 +0000 |
commit | 4f4c23512e2d5cddfa7024a7e9c217ba357b9978 (patch) | |
tree | 82a945eacc4a4bee1e7dc7f73a7c7072e5be4e03 | |
parent | 16ec51471288f86d601efb9a2eb6b418cf634c16 (diff) | |
download | go-4f4c23512e2d5cddfa7024a7e9c217ba357b9978.tar.gz go-4f4c23512e2d5cddfa7024a7e9c217ba357b9978.zip |
runtime: fix debug non-concurrent sweep mode after activeSweep changes
Currently the GC creates a sweepLocker before restarting the world at
the end of the mark phase, so that it can safely flush mcaches without
the runtime incorrectly concluding that sweeping is done before that
happens.
However, with GODEBUG=gcstoptheworld=2, where sweeping happens during
that STW phase, creating that sweepLocker will fail, since the runtime
will conclude that sweeping is in fact complete (all the queues will be
drained). The problem however is that gcSweep, which does the
non-concurrent sweeping, doesn't actually flush mcaches.
In essence, this failure to create a sweepLocker is indicating a real
issue: sweeping is marked as complete, but we haven't flush the mcaches
yet!
The fix to this is to flush mcaches in gcSweep when in a non-concurrent
sweep. Now that gcSweep actually completes a full sweep, it's safe to
ignore a failure to create a sweepLocker (and in fact, it *must* fail).
While we're here, let's also remove _ConcurrentSweep, the debug flag.
There's already an alias for it called concurrentSweep, and there's only
one use of it in gcSweep.
Lastly, add a dist test for the GODEBUG=gcstoptheworld=2 mode.
Fixes #53885.
Change-Id: I8a1e5b8f362ed8abd03f76e4950d3211f145ab1f
Reviewed-on: https://go-review.googlesource.com/c/go/+/479517
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
-rw-r--r-- | src/cmd/dist/test.go | 18 | ||||
-rw-r--r-- | src/runtime/malloc.go | 2 | ||||
-rw-r--r-- | src/runtime/mgc.go | 49 |
3 files changed, 54 insertions, 15 deletions
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go index f148fb97b5..bd02149c11 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go @@ -676,8 +676,22 @@ func (t *tester) registerTests() { }) } - // morestack tests. We only run these on in long-test mode - // (with GO_TEST_SHORT=false) because the runtime test is + // GODEBUG=gcstoptheworld=2 tests. We only run these in long-test + // mode (with GO_TEST_SHORT=0) because this is just testing a + // non-critical debug setting. + if !t.compileOnly && !t.short { + t.registerTest("GODEBUG=gcstoptheworld=2 archive/zip", + &goTest{ + variant: "runtime:gcstoptheworld2", + timeout: 300 * time.Second, + short: true, + env: []string{"GODEBUG=gcstoptheworld=2"}, + pkg: "archive/zip", + }) + } + + // morestack tests. We only run these in long-test mode + // (with GO_TEST_SHORT=0) because the runtime test is // already quite long and mayMoreStackMove makes it about // twice as slow. if !t.compileOnly && !t.short { diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 44479cc2be..ec2e547d3f 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -117,8 +117,6 @@ const ( pageShift = _PageShift pageSize = _PageSize - concurrentSweep = _ConcurrentSweep - _PageSize = 1 << _PageShift _PageMask = _PageSize - 1 diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index de5ae0ae00..8efee74243 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -135,9 +135,12 @@ import ( ) const ( - _DebugGC = 0 - _ConcurrentSweep = true - _FinBlockSize = 4 * 1024 + _DebugGC = 0 + _FinBlockSize = 4 * 1024 + + // concurrentSweep is a debug flag. Disabling this flag + // ensures all spans are swept while the world is stopped. + concurrentSweep = true // debugScanConservative enables debug logging for stack // frames that are scanned conservatively. @@ -969,6 +972,7 @@ func gcMarkTermination() { // before continuing. }) + var stwSwept bool systemstack(func() { work.heap2 = work.bytesMarked if debug.gccheckmark > 0 { @@ -987,7 +991,7 @@ func gcMarkTermination() { // marking is complete so we can turn the write barrier off setGCPhase(_GCoff) - gcSweep(work.mode) + stwSwept = gcSweep(work.mode) }) mp.traceback = 0 @@ -1079,9 +1083,19 @@ func gcMarkTermination() { // Those aren't tracked in any sweep lists, so we need to // count them against sweep completion until we ensure all // those spans have been forced out. + // + // If gcSweep fully swept the heap (for example if the sweep + // is not concurrent due to a GODEBUG setting), then we expect + // the sweepLocker to be invalid, since sweeping is done. + // + // N.B. Below we might duplicate some work from gcSweep; this is + // fine as all that work is idempotent within a GC cycle, and + // we're still holding worldsema so a new cycle can't start. sl := sweep.active.begin() - if !sl.valid { + if !stwSwept && !sl.valid { throw("failed to set sweep barrier") + } else if stwSwept && sl.valid { + throw("non-concurrent sweep failed to drain all sweep queues") } systemstack(func() { startTheWorldWithSema() }) @@ -1123,9 +1137,15 @@ func gcMarkTermination() { pp.pinnerCache = nil }) }) - // Now that we've swept stale spans in mcaches, they don't - // count against unswept spans. - sweep.active.end(sl) + if sl.valid { + // Now that we've swept stale spans in mcaches, they don't + // count against unswept spans. + // + // Note: this sweepLocker may not be valid if sweeping had + // already completed during the STW. See the corresponding + // begin() call that produced sl. + sweep.active.end(sl) + } // Print gctrace before dropping worldsema. As soon as we drop // worldsema another cycle could start and smash the stats @@ -1538,10 +1558,12 @@ func gcMark(startTime int64) { // gcSweep must be called on the system stack because it acquires the heap // lock. See mheap for details. // +// Returns true if the heap was fully swept by this function. +// // The world must be stopped. // //go:systemstack -func gcSweep(mode gcMode) { +func gcSweep(mode gcMode) bool { assertWorldStopped() if gcphase != _GCoff { @@ -1559,12 +1581,16 @@ func gcSweep(mode gcMode) { sweep.centralIndex.clear() - if !_ConcurrentSweep || mode == gcForceBlockMode { + if !concurrentSweep || mode == gcForceBlockMode { // Special case synchronous sweep. // Record that no proportional sweeping has to happen. lock(&mheap_.lock) mheap_.sweepPagesPerByte = 0 unlock(&mheap_.lock) + // Flush all mcaches. + for _, pp := range allp { + pp.mcache.prepareForSweep() + } // Sweep all spans eagerly. for sweepone() != ^uintptr(0) { sweep.npausesweep++ @@ -1578,7 +1604,7 @@ func gcSweep(mode gcMode) { // available immediately. mProf_NextCycle() mProf_Flush() - return + return true } // Background sweep. @@ -1588,6 +1614,7 @@ func gcSweep(mode gcMode) { ready(sweep.g, 0, true) } unlock(&sweep.lock) + return false } // gcResetMarkState resets global state prior to marking (concurrent |