diff options
author | Michael Anthony Knyszek <mknyszek@google.com> | 2022-02-14 22:36:25 +0000 |
---|---|---|
committer | Michael Knyszek <mknyszek@google.com> | 2022-03-31 20:02:10 +0000 |
commit | 3334afd7609185574ad179cb030064f0825c2101 (patch) | |
tree | 5ab20e6974a9343d7157cbdfc545c2e36dac6596 | |
parent | f1b5c048d7b7589a5b3ae423114baf27b3456021 (diff) | |
download | go-3334afd7609185574ad179cb030064f0825c2101.tar.gz go-3334afd7609185574ad179cb030064f0825c2101.zip |
runtime: remove old pacer and the PacerRedesign goexperiment
Now that Go 1.18 has been released, remove the old pacer.
Change-Id: Ie7a7596d67f3fc25d3f375a08fc75eafac2eb834
Reviewed-on: https://go-review.googlesource.com/c/go/+/393396
Trust: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
-rw-r--r-- | src/internal/buildcfg/exp.go | 1 | ||||
-rw-r--r-- | src/internal/goexperiment/exp_pacerredesign_off.go | 9 | ||||
-rw-r--r-- | src/internal/goexperiment/exp_pacerredesign_on.go | 9 | ||||
-rw-r--r-- | src/internal/goexperiment/flags.go | 6 | ||||
-rw-r--r-- | src/runtime/export_test.go | 4 | ||||
-rw-r--r-- | src/runtime/mgc.go | 8 | ||||
-rw-r--r-- | src/runtime/mgcmark.go | 30 | ||||
-rw-r--r-- | src/runtime/mgcpacer.go | 577 | ||||
-rw-r--r-- | src/runtime/mgcpacer_test.go | 52 |
9 files changed, 192 insertions, 504 deletions
diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go index 6b770558fd..87b1686c20 100644 --- a/src/internal/buildcfg/exp.go +++ b/src/internal/buildcfg/exp.go @@ -70,7 +70,6 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { baseline := goexperiment.Flags{ RegabiWrappers: regabiSupported, RegabiArgs: regabiSupported, - PacerRedesign: true, } // Start with the statically enabled set of experiments. diff --git a/src/internal/goexperiment/exp_pacerredesign_off.go b/src/internal/goexperiment/exp_pacerredesign_off.go deleted file mode 100644 index 62e1831437..0000000000 --- a/src/internal/goexperiment/exp_pacerredesign_off.go +++ /dev/null @@ -1,9 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build !goexperiment.pacerredesign -// +build !goexperiment.pacerredesign - -package goexperiment - -const PacerRedesign = false -const PacerRedesignInt = 0 diff --git a/src/internal/goexperiment/exp_pacerredesign_on.go b/src/internal/goexperiment/exp_pacerredesign_on.go deleted file mode 100644 index b22b031009..0000000000 --- a/src/internal/goexperiment/exp_pacerredesign_on.go +++ /dev/null @@ -1,9 +0,0 @@ -// Code generated by mkconsts.go. DO NOT EDIT. - -//go:build goexperiment.pacerredesign -// +build goexperiment.pacerredesign - -package goexperiment - -const PacerRedesign = true -const PacerRedesignInt = 1 diff --git a/src/internal/goexperiment/flags.go b/src/internal/goexperiment/flags.go index 9150493575..558b02607e 100644 --- a/src/internal/goexperiment/flags.go +++ b/src/internal/goexperiment/flags.go @@ -79,12 +79,6 @@ type Flags struct { // reflection calls use registers). RegabiArgs bool - // PacerRedesign enables the new GC pacer in the runtime. - // - // Details regarding the new pacer may be found at - // https://golang.org/design/44167-gc-pacer-redesign - PacerRedesign bool - // HeapMinimum512KiB reduces the minimum heap size to 512 KiB. // // This was originally reduced as part of PacerRedesign, but diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 0ac15ce82c..0156981524 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1308,9 +1308,9 @@ func (c *GCController) Revise(d GCControllerReviseDelta) { func (c *GCController) EndCycle(bytesMarked uint64, assistTime, elapsed int64, gomaxprocs int) { c.assistTime = assistTime - triggerRatio := c.endCycle(elapsed, gomaxprocs, false) + c.endCycle(elapsed, gomaxprocs, false) c.resetLive(bytesMarked) - c.commit(triggerRatio) + c.commit() } var escapeSink any diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go index 44b96154e7..ba679e0af5 100644 --- a/src/runtime/mgc.go +++ b/src/runtime/mgc.go @@ -898,15 +898,15 @@ top: // endCycle depends on all gcWork cache stats being flushed. // The termination algorithm above ensured that up to // allocations since the ragged barrier. - nextTriggerRatio := gcController.endCycle(now, int(gomaxprocs), work.userForced) + gcController.endCycle(now, int(gomaxprocs), work.userForced) // Perform mark termination. This will restart the world. - gcMarkTermination(nextTriggerRatio) + gcMarkTermination() } // World must be stopped and mark assists and background workers must be // disabled. -func gcMarkTermination(nextTriggerRatio float64) { +func gcMarkTermination() { // Start marktermination (write barrier remains enabled for now). setGCPhase(_GCmarktermination) @@ -976,7 +976,7 @@ func gcMarkTermination(nextTriggerRatio float64) { memstats.last_heap_inuse = memstats.heap_inuse // Update GC trigger and pacing for the next cycle. - gcController.commit(nextTriggerRatio) + gcController.commit() gcPaceSweeper(gcController.trigger) gcPaceScavenger(gcController.heapGoal, gcController.lastHeapGoal) diff --git a/src/runtime/mgcmark.go b/src/runtime/mgcmark.go index 0bf044e314..3e1a0b560a 100644 --- a/src/runtime/mgcmark.go +++ b/src/runtime/mgcmark.go @@ -8,7 +8,6 @@ package runtime import ( "internal/goarch" - "internal/goexperiment" "runtime/internal/atomic" "runtime/internal/sys" "unsafe" @@ -247,12 +246,10 @@ func markroot(gcw *gcWork, i uint32, flushBgCredit bool) int64 { } }) } - if goexperiment.PacerRedesign { - if workCounter != nil && workDone != 0 { - workCounter.Add(workDone) - if flushBgCredit { - gcFlushBgCredit(workDone) - } + if workCounter != nil && workDone != 0 { + workCounter.Add(workDone) + if flushBgCredit { + gcFlushBgCredit(workDone) } } return workDone @@ -701,7 +698,6 @@ func gcFlushBgCredit(scanWork int64) { // scanstack scans gp's stack, greying all pointers found on the stack. // -// For goexperiment.PacerRedesign: // Returns the amount of scan work performed, but doesn't update // gcController.stackScanWork or flush any credit. Any background credit produced // by this function should be flushed by its caller. scanstack itself can't @@ -1157,10 +1153,7 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 { if work.markrootNext < work.markrootJobs { job := atomic.Xadd(&work.markrootNext, +1) - 1 if job < work.markrootJobs { - work := markroot(gcw, job, false) - if goexperiment.PacerRedesign { - workFlushed += work - } + workFlushed += markroot(gcw, job, false) continue } } @@ -1558,19 +1551,6 @@ func gcmarknewobject(span *mspan, obj, size, scanSize uintptr) { gcw := &getg().m.p.ptr().gcw gcw.bytesMarked += uint64(size) - if !goexperiment.PacerRedesign { - // The old pacer counts newly allocated memory toward - // heapScanWork because heapScan is continuously updated - // throughout the GC cycle with newly allocated memory. However, - // these objects are never actually scanned, so we need - // to account for them in heapScanWork here, "faking" their work. - // Otherwise the pacer will think it's always behind, potentially - // by a large margin. - // - // The new pacer doesn't care about this because it ceases to updated - // heapScan once a GC cycle starts, effectively snapshotting it. - gcw.heapScanWork += int64(scanSize) - } } // gcMarkTinyAllocs greys all active tiny alloc blocks. diff --git a/src/runtime/mgcpacer.go b/src/runtime/mgcpacer.go index d54dbc26c2..7857ee7527 100644 --- a/src/runtime/mgcpacer.go +++ b/src/runtime/mgcpacer.go @@ -14,8 +14,11 @@ import ( const ( // gcGoalUtilization is the goal CPU utilization for // marking as a fraction of GOMAXPROCS. - gcGoalUtilization = goexperiment.PacerRedesignInt*gcBackgroundUtilization + - (1-goexperiment.PacerRedesignInt)*(gcBackgroundUtilization+0.05) + // + // Increasing the goal utilization will shorten GC cycles as the GC + // has more resources behind it, lessening costs from the write barrier, + // but comes at the cost of increasing mutator latency. + gcGoalUtilization = gcBackgroundUtilization // gcBackgroundUtilization is the fixed CPU utilization for background // marking. It must be <= gcGoalUtilization. The difference between @@ -23,16 +26,14 @@ const ( // mark assists. The scheduler will aim to use within 50% of this // goal. // - // Setting this to < gcGoalUtilization avoids saturating the trigger - // feedback controller when there are no assists, which allows it to - // better control CPU and heap growth. However, the larger the gap, - // the more mutator assists are expected to happen, which impact - // mutator latency. - // - // If goexperiment.PacerRedesign, the trigger feedback controller - // is replaced with an estimate of the mark/cons ratio that doesn't - // have the same saturation issues, so this is set equal to - // gcGoalUtilization. + // As a general rule, there's little reason to set gcBackgroundUtilization + // < gcGoalUtilization. One reason might be in mostly idle applications, + // where goroutines are unlikely to assist at all, so the actual + // utilization will be lower than the goal. But this is moot point + // because the idle mark workers already soak up idle CPU resources. + // These two values are still kept separate however because they are + // distinct conceptually, and in previous iterations of the pacer the + // distinction was more important. gcBackgroundUtilization = 0.25 // gcCreditSlack is the amount of scan work credit that can @@ -104,27 +105,14 @@ type gcControllerState struct { // debugging. heapMinimum uint64 - // triggerRatio is the heap growth ratio that triggers marking. - // - // E.g., if this is 0.6, then GC should start when the live - // heap has reached 1.6 times the heap size marked by the - // previous cycle. This should be ≤ GOGC/100 so the trigger - // heap size is less than the goal heap size. This is set - // during mark termination for the next cycle's trigger. - // - // Protected by mheap_.lock or a STW. - // - // Used if !goexperiment.PacerRedesign. - triggerRatio float64 - // trigger is the heap size that triggers marking. // // When heapLive ≥ trigger, the mark phase will start. // This is also the heap size by which proportional sweeping // must be complete. // - // This is computed from triggerRatio during mark termination - // for the next cycle's trigger. + // This is computed from consMark during mark termination for + // the next cycle's trigger. // // Protected by mheap_.lock or a STW. trigger uint64 @@ -141,8 +129,6 @@ type gcControllerState struct { // cycle, divided by the CPU time spent on each activity. // // Updated at the end of each GC cycle, in endCycle. - // - // For goexperiment.PacerRedesign. consMark float64 // consMarkController holds the state for the mark-cons ratio @@ -150,8 +136,6 @@ type gcControllerState struct { // // Its purpose is to smooth out noisiness in the computation of // consMark; see consMark for details. - // - // For goexperiment.PacerRedesign. consMarkController piController _ uint32 // Padding for atomics on 32-bit platforms. @@ -198,14 +182,9 @@ type gcControllerState struct { // is the live heap (as counted by heapLive), but omitting // no-scan objects and no-scan tails of objects. // - // For !goexperiment.PacerRedesign: Whenever this is updated, - // call this gcControllerState's revise() method. It is read - // and written atomically or with the world stopped. - // - // For goexperiment.PacerRedesign: This value is fixed at the - // start of a GC cycle, so during a GC cycle it is safe to - // read without atomics, and it represents the maximum scannable - // heap. + // This value is fixed at the start of a GC cycle, so during a + // GC cycle it is safe to read without atomics, and it represents + // the maximum scannable heap. heapScan uint64 // lastHeapScan is the number of bytes of heap that were scanned @@ -259,9 +238,6 @@ type gcControllerState struct { // // Note that stackScanWork includes all allocated space, not just the // size of the stack itself, mirroring stackSize. - // - // For !goexperiment.PacerRedesign, stackScanWork and globalsScanWork - // are always zero. heapScanWork atomic.Int64 stackScanWork atomic.Int64 globalsScanWork atomic.Int64 @@ -339,35 +315,25 @@ type gcControllerState struct { func (c *gcControllerState) init(gcPercent int32) { c.heapMinimum = defaultHeapMinimum - if goexperiment.PacerRedesign { - c.consMarkController = piController{ - // Tuned first via the Ziegler-Nichols process in simulation, - // then the integral time was manually tuned against real-world - // applications to deal with noisiness in the measured cons/mark - // ratio. - kp: 0.9, - ti: 4.0, - - // Set a high reset time in GC cycles. - // This is inversely proportional to the rate at which we - // accumulate error from clipping. By making this very high - // we make the accumulation slow. In general, clipping is - // OK in our situation, hence the choice. - // - // Tune this if we get unintended effects from clipping for - // a long time. - tt: 1000, - min: -1000, - max: 1000, - } - } else { - // Set a reasonable initial GC trigger. - c.triggerRatio = 7 / 8.0 - - // Fake a heapMarked value so it looks like a trigger at - // heapMinimum is the appropriate growth from heapMarked. - // This will go into computing the initial GC goal. - c.heapMarked = uint64(float64(c.heapMinimum) / (1 + c.triggerRatio)) + c.consMarkController = piController{ + // Tuned first via the Ziegler-Nichols process in simulation, + // then the integral time was manually tuned against real-world + // applications to deal with noisiness in the measured cons/mark + // ratio. + kp: 0.9, + ti: 4.0, + + // Set a high reset time in GC cycles. + // This is inversely proportional to the rate at which we + // accumulate error from clipping. By making this very high + // we make the accumulation slow. In general, clipping is + // OK in our situation, hence the choice. + // + // Tune this if we get unintended effects from clipping for + // a long time. + tt: 1000, + min: -1000, + max: 1000, } // This will also compute and set the GC trigger and goal. @@ -396,14 +362,8 @@ func (c *gcControllerState) startCycle(markStartTime int64, procs int) { // GOGC. Assist is proportional to this distance, so enforce a // minimum distance, even if it means going over the GOGC goal // by a tiny bit. - if goexperiment.PacerRedesign { - if c.heapGoal < c.heapLive+64<<10 { - c.heapGoal = c.heapLive + 64<<10 - } - } else { - if c.heapGoal < c.heapLive+1<<20 { - c.heapGoal = c.heapLive + 1<<20 - } + if c.heapGoal < c.heapLive+64<<10 { + c.heapGoal = c.heapLive + 64<<10 } // Compute the background mark utilization goal. In general, @@ -492,74 +452,50 @@ func (c *gcControllerState) revise() { // heapGoal assuming the heap is in steady-state. heapGoal := int64(atomic.Load64(&c.heapGoal)) - var scanWorkExpected int64 - if goexperiment.PacerRedesign { - // The expected scan work is computed as the amount of bytes scanned last - // GC cycle, plus our estimate of stacks and globals work for this cycle. - scanWorkExpected = int64(c.lastHeapScan + c.stackScan + c.globalsScan) - - // maxScanWork is a worst-case estimate of the amount of scan work that - // needs to be performed in this GC cycle. Specifically, it represents - // the case where *all* scannable memory turns out to be live. - maxScanWork := int64(scan + c.stackScan + c.globalsScan) - if work > scanWorkExpected { - // We've already done more scan work than expected. Because our expectation - // is based on a steady-state scannable heap size, we assume this means our - // heap is growing. Compute a new heap goal that takes our existing runway - // computed for scanWorkExpected and extrapolates it to maxScanWork, the worst-case - // scan work. This keeps our assist ratio stable if the heap continues to grow. - // - // The effect of this mechanism is that assists stay flat in the face of heap - // growths. It's OK to use more memory this cycle to scan all the live heap, - // because the next GC cycle is inevitably going to use *at least* that much - // memory anyway. - extHeapGoal := int64(float64(heapGoal-int64(c.trigger))/float64(scanWorkExpected)*float64(maxScanWork)) + int64(c.trigger) - scanWorkExpected = maxScanWork - - // hardGoal is a hard limit on the amount that we're willing to push back the - // heap goal, and that's twice the heap goal (i.e. if GOGC=100 and the heap and/or - // stacks and/or globals grow to twice their size, this limits the current GC cycle's - // growth to 4x the original live heap's size). - // - // This maintains the invariant that we use no more memory than the next GC cycle - // will anyway. - hardGoal := int64((1.0 + float64(gcPercent)/100.0) * float64(heapGoal)) - if extHeapGoal > hardGoal { - extHeapGoal = hardGoal - } - heapGoal = extHeapGoal - } - if int64(live) > heapGoal { - // We're already past our heap goal, even the extrapolated one. - // Leave ourselves some extra runway, so in the worst case we - // finish by that point. - const maxOvershoot = 1.1 - heapGoal = int64(float64(heapGoal) * maxOvershoot) - - // Compute the upper bound on the scan work remaining. - scanWorkExpected = maxScanWork - } - } else { - // Compute the expected scan work remaining. + // The expected scan work is computed as the amount of bytes scanned last + // GC cycle, plus our estimate of stacks and globals work for this cycle. + scanWorkExpected := int64(c.lastHeapScan + c.stackScan + c.globalsScan) + + // maxScanWork is a worst-case estimate of the amount of scan work that + // needs to be performed in this GC cycle. Specifically, it represents + // the case where *all* scannable memory turns out to be live. + maxScanWork := int64(scan + c.stackScan + c.globalsScan) + if work > scanWorkExpected { + // We've already done more scan work than expected. Because our expectation + // is based on a steady-state scannable heap size, we assume this means our + // heap is growing. Compute a new heap goal that takes our existing runway + // computed for scanWorkExpected and extrapolates it to maxScanWork, the worst-case + // scan work. This keeps our assist ratio stable if the heap continues to grow. // - // This is estimated based on the expected - // steady-state scannable heap. For example, with - // GOGC=100, only half of the scannable heap is - // expected to be live, so that's what we target. + // The effect of this mechanism is that assists stay flat in the face of heap + // growths. It's OK to use more memory this cycle to scan all the live heap, + // because the next GC cycle is inevitably going to use *at least* that much + // memory anyway. + extHeapGoal := int64(float64(heapGoal-int64(c.trigger))/float64(scanWorkExpected)*float64(maxScanWork)) + int64(c.trigger) + scanWorkExpected = maxScanWork + + // hardGoal is a hard limit on the amount that we're willing to push back the + // heap goal, and that's twice the heap goal (i.e. if GOGC=100 and the heap and/or + // stacks and/or globals grow to twice their size, this limits the current GC cycle's + // growth to 4x the original live heap's size). // - // (This is a float calculation to avoid overflowing on - // 100*heapScan.) - scanWorkExpected = int64(float64(scan) * 100 / float64(100+gcPercent)) - if int64(live) > heapGoal || work > scanWorkExpected { - // We're past the soft goal, or we've already done more scan - // work than we expected. Pace GC so that in the worst case it - // will complete by the hard goal. - const maxOvershoot = 1.1 - heapGoal = int64(float64(heapGoal) * maxOvershoot) - - // Compute the upper bound on the scan work remaining. - scanWorkExpected = int64(scan) + // This maintains the invariant that we use no more memory than the next GC cycle + // will anyway. + hardGoal := int64((1.0 + float64(gcPercent)/100.0) * float64(heapGoal)) + if extHeapGoal > hardGoal { + extHeapGoal = hardGoal } + heapGoal = extHeapGoal + } + if int64(live) > heapGoal { + // We're already past our heap goal, even the extrapolated one. + // Leave ourselves some extra runway, so in the worst case we + // finish by that point. + const maxOvershoot = 1.1 + heapGoal = int64(float64(heapGoal) * maxOvershoot) + + // Compute the upper bound on the scan work remaining. + scanWorkExpected = maxScanWork } // Compute the remaining scan work estimate. @@ -604,12 +540,10 @@ func (c *gcControllerState) revise() { c.assistBytesPerWork.Store(assistBytesPerWork) } -// endCycle computes the trigger ratio (!goexperiment.PacerRedesign) -// or the consMark estimate (goexperiment.PacerRedesign) for the next cycle. -// Returns the trigger ratio if application, or 0 (goexperiment.PacerRedesign). +// endCycle computes the consMark estimate for the next cycle. // userForced indicates whether the current GC cycle was forced // by the application. -func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) float64 { +func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) { // Record last heap goal for the scavenger. // We'll be updating the heap goal soon. gcController.lastHeapGoal = gcController.heapGoal @@ -624,155 +558,91 @@ func (c *gcControllerState) endCycle(now int64, procs int, userForced bool) floa utilization += float64(c.assistTime) / float64(assistDuration*int64(procs)) } - if goexperiment.PacerRedesign { - if c.heapLive <= c.trigger { - // Shouldn't happen, but let's be very safe about this in case the - // GC is somehow extremely short. - // - // In this case though, the only reasonable value for c.heapLive-c.trigger - // would be 0, which isn't really all that useful, i.e. the GC was so short - // that it didn't matter. - // - // Ignore this case and don't update anything. - return 0 - } - idleUtilization := 0.0 - if assistDuration > 0 { - idleUtilization = float64(c.idleMarkTime) / float64(assistDuration*int64(procs)) - } - // Determine the cons/mark ratio. - // - // The units we want for the numerator and denominator are both B / cpu-ns. - // We get this by taking the bytes allocated or scanned, and divide by the amount of - // CPU time it took for those operations. For allocations, that CPU time is - // - // assistDuration * procs * (1 - utilization) - // - // Where utilization includes just background GC workers and assists. It does *not* - // include idle GC work time, because in theory the mutator is free to take that at - // any point. + if c.heapLive <= c.trigger { + // Shouldn't happen, but let's be very safe about this in case the + // GC is somehow extremely short. // - // For scanning, that CPU time is + // In this case though, the only reasonable value for c.heapLive-c.trigger + // would be 0, which isn't really all that useful, i.e. the GC was so short + // that it didn't matter. // - // assistDuration * procs * (utilization + idleUtilization) - // - // In this case, we *include* idle utilization, because that is additional CPU time that the - // the GC had available to it. - // - // In effect, idle GC time is sort of double-counted here, but it's very weird compared - // to other kinds of GC work, because of how fluid it is. Namely, because the mutator is - // *always* free to take it. - // - // So this calculation is really: - // (heapLive-trigger) / (assistDuration * procs * (1-utilization)) / - // (scanWork) / (assistDuration * procs * (utilization+idleUtilization) - // - // Note that because we only care about the ratio, assistDuration and procs cancel out. - scanWork := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load() - currentConsMark := (float64(c.heapLive-c.trigger) * (utilization + idleUtilization)) / - (float64(scanWork) * (1 - utilization)) - - // Update cons/mark controller. The time period for this is 1 GC cycle. - // - // This use of a PI controller might seem strange. So, here's an explanation: - // - // currentConsMark represents the consMark we *should've* had to be perfectly - // on-target for this cycle. Given that we assume the next GC will be like this - // one in the steady-state, it stands to reason that we should just pick that - // as our next consMark. In practice, however, currentConsMark is too noisy: - // we're going to be wildly off-target in each GC cycle if we do that. - // - // What we do instead is make a long-term assumption: there is some steady-state - // consMark value, but it's obscured by noise. By constantly shooting for this - // noisy-but-perfect consMark value, the controller will bounce around a bit, - // but its average behavior, in aggregate, should be less noisy and closer to - // the true long-term consMark value, provided its tuned to be slightly overdamped. - var ok bool - oldConsMark := c.consMark - c.consMark, ok = c.consMarkController.next(c.consMark, currentConsMark, 1.0) - if !ok { - // The error spiraled out of control. This is incredibly unlikely seeing - // as this controller is essentially just a smoothing function, but it might - // mean that something went very wrong with how currentConsMark was calculated. - // Just reset consMark and keep going. - c.consMark = 0 - } - - if debug.gcpacertrace > 0 { - printlock() - goal := gcGoalUtilization * 100 - print("pacer: ", int(utilization*100), "% CPU (", int(goal), " exp.) for ") - print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.stackScan+c.globalsScan, " B exp.) ") - print("in ", c.trigger, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.heapGoal), ", cons/mark ", oldConsMark, ")") - if !ok { - print("[controller reset]") - } - println() - printunlock() - } - return 0 + // Ignore this case and don't update anything. + return } - - // !goexperiment.PacerRedesign below. - - if userForced { - // Forced GC means this cycle didn't start at the - // trigger, so where it finished isn't good - // information about how to adjust the trigger. - // Just leave it where it is. - return c.triggerRatio + idleUtilization := 0.0 + if assistDuration > 0 { + idleUtilization = float64(c.idleMarkTime) / float64(assistDuration*int64(procs)) } + // Determine the cons/mark ratio. + // + // The units we want for the numerator and denominator are both B / cpu-ns. + // We get this by taking the bytes allocated or scanned, and divide by the amount of + // CPU time it took for those operations. For allocations, that CPU time is + // + // assistDuration * procs * (1 - utilization) + // + // Where utilization includes just background GC workers and assists. It does *not* + // include idle GC work time, because in theory the mutator is free to take that at + // any point. + // + // For scanning, that CPU time is + // + // assistDuration * procs * (utilization + idleUtilization) + // + // In this case, we *include* idle utilization, because that is additional CPU time that the + // the GC had available to it. + // + // In effect, idle GC time is sort of double-counted here, but it's very weird compared + // to other kinds of GC work, because of how fluid it is. Namely, because the mutator is + // *always* free to take it. + // + // So this calculation is really: + // (heapLive-trigger) / (assistDuration * procs * (1-utilization)) / + // (scanWork) / (assistDuration * procs * (utilization+idleUtilization) + // + // Note that because we only care about the ratio, assistDuration and procs cancel out. + scanWork := c.heapScanWork.Load() + c.stackScanWork.Load() + c.globalsScanWork.Load() + currentConsMark := (float64(c.heapLive-c.trigger) * (utilization + idleUtilization)) / + (float64(scanWork) * (1 - utilization)) - // Proportional response gain for the trigger controller. Must - // be in [0, 1]. Lower values smooth out transient effects but - // take longer to respond to phase changes. Higher values - // react to phase changes quickly, but are more affected by - // transient changes. Values near 1 may be unstable. - const triggerGain = 0.5 - - // Compute next cycle trigger ratio. First, this computes the - // "error" for this cycle; that is, how far off the trigger - // was from what it should have been, accounting for both heap - // growth and GC CPU utilization. We compute the actual heap - // growth during this cycle and scale that by how far off from - // the goal CPU utilization we were (to estimate the heap - // growth if we had the desired CPU utilization). The - // difference between this estimate and the GOGC-based goal - // heap growth is the error. - goalGrowthRatio := c.effectiveGrowthRatio() - actualGrowthRatio := float64(c.heapLive)/float64(c.heapMarked) - 1 - triggerError := goalGrowthRatio - c.triggerRatio - utilization/gcGoalUtilization*(actualGrowthRatio-c.triggerRatio) - - // Finally, we adjust the trigger for next time by this error, - // damped by the proportional gain. - triggerRatio := c.triggerRatio + triggerGain*triggerError + // Update cons/mark controller. The time period for this is 1 GC cycle. + // + // This use of a PI controller might seem strange. So, here's an explanation: + // + // currentConsMark represents the consMark we *should've* had to be perfectly + // on-target for this cycle. Given that we assume the next GC will be like this + // one in the steady-state, it stands to reason that we should just pick that + // as our next consMark. In practice, however, currentConsMark is too noisy: + // we're going to be wildly off-target in each GC cycle if we do that. + // + // What we do instead is make a long-term assumption: there is some steady-state + // consMark value, but it's obscured by noise. By constantly shooting for this + // noisy-but-perfect consMark value, the controller will bounce around a bit, + // but its average behavior, in aggregate, should be less noisy and closer to + // the true long-term consMark value, provided its tuned to be slightly overdamped. + var ok bool + oldConsMark := c.consMark + c.consMark, ok = c.consMarkController.next(c.consMark, currentConsMark, 1.0) + if !ok { + // The error spiraled out of control. This is incredibly unlikely seeing + // as this controller is essentially just a smoothing function, but it might + // mean that something went very wrong with how currentConsMark was calculated. + // Just reset consMark and keep going. + c.consMark = 0 + } if debug.gcpacertrace > 0 { - // Print controller state in terms of the design - // document. - H_m_prev := c.heapMarked - h_t := c.triggerRatio - H_T := c.trigger - h_a := actualGrowthRatio - H_a := c.heapLive - h_g := goalGrowthRatio - H_g := int64(float64(H_m_prev) * (1 + h_g)) - u_a := utilization - u_g := gcGoalUtilization - W_a := c.heapScanWork.Load() - print("pacer: H_m_prev=", H_m_prev, - " h_t=", h_t, " H_T=", H_T, - " h_a=", h_a, " H_a=", H_a, - " h_g=", h_g, " H_g=", H_g, - " u_a=", u_a, " u_g=", u_g, - " W_a=", W_a, - " goalΔ=", goalGrowthRatio-h_t, - " actualΔ=", h_a-h_t, - " u_a/u_g=", u_a/u_g, - "\n") + printlock() + goal := gcGoalUtilization * 100 + print("pacer: ", int(utilization*100), "% CPU (", int(goal), " exp.) for ") + print(c.heapScanWork.Load(), "+", c.stackScanWork.Load(), "+", c.globalsScanWork.Load(), " B work (", c.lastHeapScan+c.stackScan+c.globalsScan, " B exp.) ") + print("in ", c.trigger, " B -> ", c.heapLive, " B (∆goal ", int64(c.heapLive)-int64(c.heapGoal), ", cons/mark ", oldConsMark, ")") + if !ok { + print("[controller reset]") + } + println() + printunlock() } - - return triggerRatio } // enlistWorker encourages another dedicated mark worker to start on @@ -938,15 +808,14 @@ func (c *gcControllerState) update(dHeapLive, dHeapScan int64) { traceHeapAlloc() } } - // Only update heapScan in the new pacer redesign if we're not - // currently in a GC. - if !goexperiment.PacerRedesign || gcBlackenEnabled == 0 { + if gcBlackenEnabled == 0 { + // Update heapScan when we're not in a current GC. It is fixed + // at the beginning of a cycle. if dHeapScan != 0 { atomic.Xadd64(&gcController.heapScan, dHeapScan) } - } - if gcBlackenEnabled != 0 { - // gcController.heapLive and heapScan changed. + } else { + // gcController.heapLive changed. c.revise() } } @@ -970,8 +839,6 @@ func (c *gcControllerState) addGlobals(amount int64) { // commit recomputes all pacing parameters from scratch, namely // absolute trigger, the heap goal, mark pacing, and sweep pacing. // -// If goexperiment.PacerRedesign is true, triggerRatio is ignored. -// // This can be called any time. If GC is the in the middle of a // concurrent phase, it will adjust the pacing of that phase. // @@ -979,16 +846,11 @@ func (c *gcControllerState) addGlobals(amount int64) { // gcController.heapLive. These must be up to date. // // mheap_.lock must be held or the world must be stopped. -func (c *gcControllerState) commit(triggerRatio float64) { +func (c *gcControllerState) commit() { if !c.test { assertWorldStoppedOrLockHeld(&mheap_.lock) } - if !goexperiment.PacerRedesign { - c.oldCommit(triggerRatio) - return - } - // Compute the next GC goal, which is when the allocated heap // has grown by GOGC/100 over where it started the last cycle, // plus additional runway for non-heap sources of GC work. @@ -1096,113 +958,6 @@ func (c *gcControllerState) commit(triggerRatio float64) { } } -// oldCommit sets the trigger ratio and updates everything -// derived from it: the absolute trigger, the heap goal, mark pacing, -// and sweep pacing. -// -// This can be called any time. If GC is the in the middle of a -// concurrent phase, it will adjust the pacing of that phase. -// -// This depends on gcPercent, gcController.heapMarked, and -// gcController.heapLive. These must be up to date. -// -// For !goexperiment.PacerRedesign. -func (c *gcControllerState) oldCommit(triggerRatio float64) { - gcPercent := c.gcPercent.Load() - - // Compute the next GC goal, which is when the allocated heap - // has grown by GOGC/100 over the heap marked by the last - // cycle. - goal := ^uint64(0) - if gcPercent >= 0 { - goal = c.heapMarked + c.heapMarked*uint64(gcPercent)/100 - } - - // Set the trigger ratio, capped to reasonable bounds. - if gcPercent >= 0 { - scalingFactor := float64(gcPercent) / 100 - // Ensure there's always a little margin so that the - // mutator assist ratio isn't infinity. - maxTriggerRatio := 0.95 * scalingFactor - if triggerRatio > maxTriggerRatio { - triggerRatio = maxTriggerRatio - } - - // If we let triggerRatio go too low, then if the application - // is allocating very rapidly we might end up in a situation - // where we're allocating black during a nearly always-on GC. - // The result of this is a growing heap and ultimately an - // increase in RSS. By capping us at a point >0, we're essentially - // saying that we're OK using more CPU during the GC to prevent - // this growth in RSS. - // - // The current constant was chosen empirically: given a sufficiently - // fast/scalable allocator with 48 Ps that could drive the trigger ratio - // to <0.05, this constant causes applications to retain the same peak - // RSS compared to not having this allocator. - minTriggerRatio := 0.6 * scalingFactor - if triggerRatio < minTriggerRatio { - triggerRatio = minTriggerRatio - } - } else if triggerRatio < 0 { - // gcPercent < 0, so just make sure we're not getting a negative - // triggerRatio. This case isn't expected to happen in practice, - // and doesn't really matter because if gcPercent < 0 then we won't - // ever consume triggerRatio further on in this function, but let's - // just be defensive here; the triggerRatio being negative is almost - // certainly undesirable. - triggerRatio = 0 - } - c.triggerRatio = triggerRatio - - // Compute the absolute GC trigger from the trigger ratio. - // - // We trigger the next GC cycle when the allocated heap has - // grown by the trigger ratio over the marked heap size. - trigger := ^uint64(0) - if gcPercent >= 0 { - trigger = uint64(float64(c.heapMarked) * (1 + triggerRatio)) - // Don't trigger below the minimum heap size. - minTrigger := c.heapMinimum - if !isSweepDone() { - // Concurrent sweep happens in the heap growth - // from gcController.heapLive to trigger, so ensure - // that concurrent sweep has some heap growth - // in which to perform sweeping before we - // start the next GC cycle. - sweepMin := atomic.Load64(&c.heapLive) + sweepMinHeapDistance - if sweepMin > minTrigger { - minTrigger = sweepMin - } - } - if trigger < minTrigger { - trigger = minTrigger - } - if int64(trigger) < 0 { - print("runtime: heapGoal=", c.heapGoal, " heapMarked=", c.heapMarked, " gcController.heapLive=", c.heapLive, " initialHeapLive=", work.initialHeapLive, "triggerRatio=", triggerRatio, " minTrigger=", minTrigger, "\n") - throw("trigger underflow") - } - if trigger > goal { - // The trigger ratio is always less than GOGC/100, but - // other bounds on the trigger may have raised it. - // Push up the goal, too. - goal = trigger - } - } - - // Commit to the trigger and goal. - c.trigger = trigger - atomic.Store64(&c.heapGoal, goal) - if trace.enabled { - traceHeapGoal() - } - - // Update mark pacing. - if gcphase != _GCoff { - c.revise() - } -} - // effectiveGrowthRatio returns the current effective heap growth // ratio (GOGC/100) based on heapMarked from the previous GC and // heapGoal for the current GC. @@ -1243,7 +998,7 @@ func (c *gcControllerState) setGCPercent(in int32) int32 { c.heapMinimum = defaultHeapMinimum * uint64(in) / 100 c.gcPercent.Store(in) // Update pacing in response to gcPercent change. - c.commit(c.triggerRatio) + c.commit() return out } diff --git a/src/runtime/mgcpacer_test.go b/src/runtime/mgcpacer_test.go index 10a8ca2520..b49e3a8d24 100644 --- a/src/runtime/mgcpacer_test.go +++ b/src/runtime/mgcpacer_test.go @@ -6,7 +6,6 @@ package runtime_test import ( "fmt" - "internal/goexperiment" "math" "math/rand" . "runtime" @@ -35,11 +34,9 @@ func TestGcPacer(t *testing.T) { checker: func(t *testing.T, c []gcCycleResult) { n := len(c) if n >= 25 { - if goexperiment.PacerRedesign { - // For the pacer redesign, assert something even stronger: at this alloc/scan rate, - // it should be extremely close to the goal utilization. - assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) - } + // For the pacer redesign, assert something even stronger: at this alloc/scan rate, + // it should be extremely close to the goal utilization. + assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles. assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005) @@ -64,12 +61,10 @@ func TestGcPacer(t *testing.T) { // really handle this well, so don't check the goal ratio for it. n := len(c) if n >= 25 { - if goexperiment.PacerRedesign { - // For the pacer redesign, assert something even stronger: at this alloc/scan rate, - // it should be extremely close to the goal utilization. - assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) - assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) - } + // For the pacer redesign, assert something even stronger: at this alloc/scan rate, + // it should be extremely close to the goal utilization. + assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) + assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles. assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005) @@ -93,12 +88,10 @@ func TestGcPacer(t *testing.T) { // really handle this well, so don't check the goal ratio for it. n := len(c) if n >= 25 { - if goexperiment.PacerRedesign { - // For the pacer redesign, assert something even stronger: at this alloc/scan rate, - // it should be extremely close to the goal utilization. - assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) - assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) - } + // For the pacer redesign, assert something even stronger: at this alloc/scan rate, + // it should be extremely close to the goal utilization. + assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005) + assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) // Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles. assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005) @@ -187,7 +180,7 @@ func TestGcPacer(t *testing.T) { length: 50, checker: func(t *testing.T, c []gcCycleResult) { n := len(c) - if goexperiment.PacerRedesign && n > 12 { + if n > 12 { if n == 26 { // In the 26th cycle there's a heap growth. Overshoot is expected to maintain // a stable utilization, but we should *never* overshoot more than GOGC of @@ -232,12 +225,7 @@ func TestGcPacer(t *testing.T) { // 1. Utilization isn't varying _too_ much, and // 2. The pacer is mostly keeping up with the goal. assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) - if goexperiment.PacerRedesign { - assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3) - } else { - // The old pacer is messier here, and needs a lot more tolerance. - assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4) - } + assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3) } }, }, @@ -260,12 +248,7 @@ func TestGcPacer(t *testing.T) { // 1. Utilization isn't varying _too_ much, and // 2. The pacer is mostly keeping up with the goal. assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05) - if goexperiment.PacerRedesign { - assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3) - } else { - // The old pacer is messier here, and needs a lot more tolerance. - assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4) - } + assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3) } }, }, @@ -293,12 +276,7 @@ func TestGcPacer(t *testing.T) { // Unlike the other tests, GC utilization here will vary more and tend higher. // Just make sure it's not going too crazy. assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.05) - if goexperiment.PacerRedesign { - assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.05) - } else { - // The old pacer is messier here, and needs a little more tolerance. - assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07) - } + assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.05) } }, }, |