diff options
Diffstat (limited to 'src/runtime/mgcscavenge.go')
-rw-r--r-- | src/runtime/mgcscavenge.go | 584 |
1 files changed, 360 insertions, 224 deletions
diff --git a/src/runtime/mgcscavenge.go b/src/runtime/mgcscavenge.go index 5f50378adf..1abdbf3a0d 100644 --- a/src/runtime/mgcscavenge.go +++ b/src/runtime/mgcscavenge.go @@ -163,53 +163,186 @@ func gcPaceScavenger(heapGoal, lastHeapGoal uint64) { atomic.Store64(&mheap_.scavengeGoal, retainedGoal) } +const ( + // It doesn't really matter what value we start at, but we can't be zero, because + // that'll cause divide-by-zero issues. Pick something conservative which we'll + // also use as a fallback. + startingScavSleepRatio = 0.001 + + // Spend at least 1 ms scavenging, otherwise the corresponding + // sleep time to maintain our desired utilization is too low to + // be reliable. + minScavWorkTime = 1e6 +) + // Sleep/wait state of the background scavenger. -var scavenge struct { - lock mutex - g *g - parked bool - timer *timer - sysmonWake uint32 // Set atomically. - printControllerReset bool // Whether the scavenger is in cooldown. +var scavenger scavengerState + +type scavengerState struct { + // lock protects all fields below. + lock mutex + + // g is the goroutine the scavenger is bound to. + g *g + + // parked is whether or not the scavenger is parked. + parked bool + + // timer is the timer used for the scavenger to sleep. + timer *timer + + // sysmonWake signals to sysmon that it should wake the scavenger. + sysmonWake atomic.Uint32 + + // targetCPUFraction is the target CPU overhead for the scavenger. + targetCPUFraction float64 + + // sleepRatio is the ratio of time spent doing scavenging work to + // time spent sleeping. This is used to decide how long the scavenger + // should sleep for in between batches of work. It is set by + // critSleepController in order to maintain a CPU overhead of + // targetCPUFraction. + // + // Lower means more sleep, higher means more aggressive scavenging. + sleepRatio float64 + + // sleepController controls sleepRatio. + // + // See sleepRatio for more details. + sleepController piController + + // cooldown is the time left in nanoseconds during which we avoid + // using the controller and we hold sleepRatio at a conservative + // value. Used if the controller's assumptions fail to hold. + controllerCooldown int64 + + // printControllerReset instructs printScavTrace to signal that + // the controller was reset. + printControllerReset bool + + // sleepStub is a stub used for testing to avoid actually having + // the scavenger sleep. + // + // Unlike the other stubs, this is not populated if left nil + // Instead, it is called when non-nil because any valid implementation + // of this function basically requires closing over this scavenger + // state, and allocating a closure is not allowed in the runtime as + // a matter of policy. + sleepStub func(n int64) int64 + + // scavenge is a function that scavenges n bytes of memory. + // Returns how many bytes of memory it actually scavenged, as + // well as the time it took in nanoseconds. Usually mheap.pages.scavenge + // with nanotime called around it, but stubbed out for testing. + // Like mheap.pages.scavenge, if it scavenges less than n bytes of + // memory, the caller may assume the heap is exhausted of scavengable + // memory for now. + // + // If this is nil, it is populated with the real thing in init. + scavenge func(n uintptr) (uintptr, int64) + + // shouldStop is a callback called in the work loop and provides a + // point that can force the scavenger to stop early, for example because + // the scavenge policy dictates too much has been scavenged already. + // + // If this is nil, it is populated with the real thing in init. + shouldStop func() bool + + // gomaxprocs returns the current value of gomaxprocs. Stub for testing. + // + // If this is nil, it is populated with the real thing in init. + gomaxprocs func() int32 } -// readyForScavenger signals sysmon to wake the scavenger because -// there may be new work to do. +// init initializes a scavenger state and wires to the current G. // -// There may be a significant delay between when this function runs -// and when the scavenger is kicked awake, but it may be safely invoked -// in contexts where wakeScavenger is unsafe to call directly. -func readyForScavenger() { - atomic.Store(&scavenge.sysmonWake, 1) +// Must be called from a regular goroutine that can allocate. +func (s *scavengerState) init() { + if s.g != nil { + throw("scavenger state is already wired") + } + lockInit(&s.lock, lockRankScavenge) + s.g = getg() + + s.timer = new(timer) + s.timer.arg = s + s.timer.f = func(s any, _ uintptr) { + s.(*scavengerState).wake() + } + + // input: fraction of CPU time actually used. + // setpoint: ideal CPU fraction. + // output: ratio of time worked to time slept (determines sleep time). + // + // The output of this controller is somewhat indirect to what we actually + // want to achieve: how much time to sleep for. The reason for this definition + // is to ensure that the controller's outputs have a direct relationship with + // its inputs (as opposed to an inverse relationship), making it somewhat + // easier to reason about for tuning purposes. + s.sleepController = piController{ + // Tuned loosely via Ziegler-Nichols process. + kp: 0.3375, + ti: 3.2e6, + tt: 1e9, // 1 second reset time. + + // These ranges seem wide, but we want to give the controller plenty of + // room to hunt for the optimal value. + min: 0.001, // 1:1000 + max: 1000.0, // 1000:1 + } + s.sleepRatio = startingScavSleepRatio + + // Install real functions if stubs aren't present. + if s.scavenge == nil { + s.scavenge = func(n uintptr) (uintptr, int64) { + start := nanotime() + r := mheap_.pages.scavenge(n) + end := nanotime() + if start >= end { + return r, 0 + } + return r, end - start + } + } + if s.shouldStop == nil { + s.shouldStop = func() bool { + // If background scavenging is disabled or if there's no work to do just stop. + return heapRetained() <= atomic.Load64(&mheap_.scavengeGoal) + } + } + if s.gomaxprocs == nil { + s.gomaxprocs = func() int32 { + return gomaxprocs + } + } +} + +// park parks the scavenger goroutine. +func (s *scavengerState) park() { + lock(&s.lock) + if getg() != s.g { + throw("tried to park scavenger from another goroutine") + } + s.parked = true + goparkunlock(&s.lock, waitReasonGCScavengeWait, traceEvGoBlock, 2) } -// wakeScavenger immediately unparks the scavenger if necessary. -// -// May run without a P, but it may allocate, so it must not be called -// on any allocation path. -// -// mheap_.lock, scavenge.lock, and sched.lock must not be held. -func wakeScavenger() { - lock(&scavenge.lock) - if scavenge.parked { - // Notify sysmon that it shouldn't bother waking up the scavenger. - atomic.Store(&scavenge.sysmonWake, 0) - - // Try to stop the timer but we don't really care if we succeed. - // It's possible that either a timer was never started, or that - // we're racing with it. - // In the case that we're racing with there's the low chance that - // we experience a spurious wake-up of the scavenger, but that's - // totally safe. - stopTimer(scavenge.timer) - - // Unpark the goroutine and tell it that there may have been a pacing - // change. Note that we skip the scheduler's runnext slot because we - // want to avoid having the scavenger interfere with the fair - // scheduling of user goroutines. In effect, this schedules the - // scavenger at a "lower priority" but that's OK because it'll - // catch up on the work it missed when it does get scheduled. - scavenge.parked = false +// ready signals to sysmon that the scavenger should be awoken. +func (s *scavengerState) ready() { + s.sysmonWake.Store(1) +} + +// wake immediately unparks the scavenger if necessary. +// +// Safe to run without a P. +func (s *scavengerState) wake() { + lock(&s.lock) + if s.parked { + // Unset sysmonWake, since the scavenger is now being awoken. + s.sysmonWake.Store(0) + + // s.parked is unset to prevent a double wake-up. + s.parked = false // Ready the goroutine by injecting it. We use injectglist instead // of ready or goready in order to allow us to run this function @@ -218,217 +351,217 @@ func wakeScavenger() { // the scavenger from interfering with user goroutine scheduling // too much. var list gList - list.push(scavenge.g) + list.push(s.g) injectglist(&list) } - unlock(&scavenge.lock) + unlock(&s.lock) } -// scavengeSleep attempts to put the scavenger to sleep for ns. +// sleep puts the scavenger to sleep based on the amount of time that it worked +// in nanoseconds. // // Note that this function should only be called by the scavenger. // // The scavenger may be woken up earlier by a pacing change, and it may not go // to sleep at all if there's a pending pacing change. -// -// Returns the amount of time actually slept. -func scavengeSleep(ns int64) int64 { - lock(&scavenge.lock) - - // Set the timer. - // - // This must happen here instead of inside gopark - // because we can't close over any variables without - // failing escape analysis. - start := nanotime() - resetTimer(scavenge.timer, start+ns) - - // Mark ourself as asleep and go to sleep. - scavenge.parked = true - goparkunlock(&scavenge.lock, waitReasonSleep, traceEvGoSleep, 2) - - // Return how long we actually slept for. - return nanotime() - start -} - -// Background scavenger. -// -// The background scavenger maintains the RSS of the application below -// the line described by the proportional scavenging statistics in -// the mheap struct. -func bgscavenge(c chan int) { - scavenge.g = getg() +func (s *scavengerState) sleep(worked float64) { + lock(&s.lock) + if getg() != s.g { + throw("tried to sleep scavenger from another goroutine") + } - lockInit(&scavenge.lock, lockRankScavenge) - lock(&scavenge.lock) - scavenge.parked = true + if worked < minScavWorkTime { + // This means there wasn't enough work to actually fill up minScavWorkTime. + // That's fine; we shouldn't try to do anything with this information + // because it's going result in a short enough sleep request that things + // will get messy. Just assume we did at least this much work. + // All this means is that we'll sleep longer than we otherwise would have. + worked = minScavWorkTime + } - scavenge.timer = new(timer) - scavenge.timer.f = func(_ any, _ uintptr) { - wakeScavenger() + // Multiply the critical time by 1 + the ratio of the costs of using + // scavenged memory vs. scavenging memory. This forces us to pay down + // the cost of reusing this memory eagerly by sleeping for a longer period + // of time and scavenging less frequently. More concretely, we avoid situations + // where we end up scavenging so often that we hurt allocation performance + // because of the additional overheads of using scavenged memory. + worked *= 1 + scavengeCostRatio + + // sleepTime is the amount of time we're going to sleep, based on the amount + // of time we worked, and the sleepRatio. + sleepTime := int64(worked / s.sleepRatio) + + var slept int64 + if s.sleepStub == nil { + // Set the timer. + // + // This must happen here instead of inside gopark + // because we can't close over any variables without + // failing escape analysis. + start := nanotime() + resetTimer(s.timer, start+sleepTime) + + // Mark ourselves as asleep and go to sleep. + s.parked = true + goparkunlock(&s.lock, waitReasonSleep, traceEvGoSleep, 2) + + // How long we actually slept for. + slept = nanotime() - start + + lock(&s.lock) + // Stop the timer here because s.wake is unable to do it for us. + // We don't really care if we succeed in stopping the timer. One + // reason we might fail is that we've already woken up, but the timer + // might be in the process of firing on some other P; essentially we're + // racing with it. That's totally OK. Double wake-ups are perfectly safe. + stopTimer(s.timer) + unlock(&s.lock) + } else { + unlock(&s.lock) + slept = s.sleepStub(sleepTime) } - c <- 1 - goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) + // Stop here if we're cooling down from the controller. + if s.controllerCooldown > 0 { + // worked and slept aren't exact measures of time, but it's OK to be a bit + // sloppy here. We're just hoping we're avoiding some transient bad behavior. + t := slept + int64(worked) + if t > s.controllerCooldown { + s.controllerCooldown = 0 + } else { + s.controllerCooldown -= t + } + return + } // idealFraction is the ideal % of overall application CPU time that we // spend scavenging. idealFraction := float64(scavengePercent) / 100.0 - // Input: fraction of CPU time used. - // Setpoint: idealFraction. - // Output: ratio of critical time to sleep time (determines sleep time). + // Calculate the CPU time spent. // - // The output of this controller is somewhat indirect to what we actually - // want to achieve: how much time to sleep for. The reason for this definition - // is to ensure that the controller's outputs have a direct relationship with - // its inputs (as opposed to an inverse relationship), making it somewhat - // easier to reason about for tuning purposes. - critSleepController := piController{ - // Tuned loosely via Ziegler-Nichols process. - kp: 0.3375, - ti: 3.2e6, - tt: 1e9, // 1 second reset time. - - // These ranges seem wide, but we want to give the controller plenty of - // room to hunt for the optimal value. - min: 0.001, // 1:1000 - max: 1000.0, // 1000:1 + // This may be slightly inaccurate with respect to GOMAXPROCS, but we're + // recomputing this often enough relative to GOMAXPROCS changes in general + // (it only changes when the world is stopped, and not during a GC) that + // that small inaccuracy is in the noise. + cpuFraction := worked / ((float64(slept) + worked) * float64(s.gomaxprocs())) + + // Update the critSleepRatio, adjusting until we reach our ideal fraction. + var ok bool + s.sleepRatio, ok = s.sleepController.next(cpuFraction, idealFraction, float64(slept)+worked) + if !ok { + // The core assumption of the controller, that we can get a proportional + // response, broke down. This may be transient, so temporarily switch to + // sleeping a fixed, conservative amount. + s.sleepRatio = startingScavSleepRatio + s.controllerCooldown = 5e9 // 5 seconds. + + // Signal the scav trace printer to output this. + s.controllerFailed() } - // It doesn't really matter what value we start at, but we can't be zero, because - // that'll cause divide-by-zero issues. Pick something conservative which we'll - // also use as a fallback. - const startingCritSleepRatio = 0.001 - critSleepRatio := startingCritSleepRatio - // Duration left in nanoseconds during which we avoid using the controller and - // we hold critSleepRatio at a conservative value. Used if the controller's - // assumptions fail to hold. - controllerCooldown := int64(0) - for { - released := uintptr(0) - crit := float64(0) - - // Spend at least 1 ms scavenging, otherwise the corresponding - // sleep time to maintain our desired utilization is too low to - // be reliable. - const minCritTime = 1e6 - for crit < minCritTime { - // If background scavenging is disabled or if there's no work to do just park. - retained, goal := heapRetained(), atomic.Load64(&mheap_.scavengeGoal) - if retained <= goal { - break - } - - // scavengeQuantum is the amount of memory we try to scavenge - // in one go. A smaller value means the scavenger is more responsive - // to the scheduler in case of e.g. preemption. A larger value means - // that the overheads of scavenging are better amortized, so better - // scavenging throughput. - // - // The current value is chosen assuming a cost of ~10µs/physical page - // (this is somewhat pessimistic), which implies a worst-case latency of - // about 160µs for 4 KiB physical pages. The current value is biased - // toward latency over throughput. - const scavengeQuantum = 64 << 10 +} - // Accumulate the amount of time spent scavenging. - start := nanotime() - r := mheap_.pages.scavenge(scavengeQuantum) - atomic.Xadduintptr(&mheap_.pages.scav.released, r) - end := nanotime() +// controllerFailed indicates that the scavenger's scheduling +// controller failed. +func (s *scavengerState) controllerFailed() { + lock(&s.lock) + s.printControllerReset = true + unlock(&s.lock) +} - // On some platforms we may see end >= start if the time it takes to scavenge - // memory is less than the minimum granularity of its clock (e.g. Windows) or - // due to clock bugs. - // - // In this case, just assume scavenging takes 10 µs per regular physical page - // (determined empirically), and conservatively ignore the impact of huge pages - // on timing. - const approxCritNSPerPhysicalPage = 10e3 - if end <= start { - crit += approxCritNSPerPhysicalPage * float64(r/physPageSize) - } else { - crit += float64(end - start) - } - released += r +// run is the body of the main scavenging loop. +// +// Returns the number of bytes released and the estimated time spent +// releasing those bytes. +// +// Must be run on the scavenger goroutine. +func (s *scavengerState) run() (released uintptr, worked float64) { + lock(&s.lock) + if getg() != s.g { + throw("tried to run scavenger from another goroutine") + } + unlock(&s.lock) - // When using fake time just do one loop. - if faketime != 0 { - break - } + for worked < minScavWorkTime { + // If something from outside tells us to stop early, stop. + if s.shouldStop() { + break } - if released == 0 { - lock(&scavenge.lock) - scavenge.parked = true - goparkunlock(&scavenge.lock, waitReasonGCScavengeWait, traceEvGoBlock, 1) - continue + // scavengeQuantum is the amount of memory we try to scavenge + // in one go. A smaller value means the scavenger is more responsive + // to the scheduler in case of e.g. preemption. A larger value means + // that the overheads of scavenging are better amortized, so better + // scavenging throughput. + // + // The current value is chosen assuming a cost of ~10µs/physical page + // (this is somewhat pessimistic), which implies a worst-case latency of + // about 160µs for 4 KiB physical pages. The current value is biased + // toward latency over throughput. + const scavengeQuantum = 64 << 10 + + // Accumulate the amount of time spent scavenging. + r, duration := s.scavenge(scavengeQuantum) + + // On some platforms we may see end >= start if the time it takes to scavenge + // memory is less than the minimum granularity of its clock (e.g. Windows) or + // due to clock bugs. + // + // In this case, just assume scavenging takes 10 µs per regular physical page + // (determined empirically), and conservatively ignore the impact of huge pages + // on timing. + const approxWorkedNSPerPhysicalPage = 10e3 + if duration == 0 { + worked += approxWorkedNSPerPhysicalPage * float64(r/physPageSize) + } else { + // TODO(mknyszek): If duration is small compared to worked, it could be + // rounded down to zero. Probably not a problem in practice because the + // values are all within a few orders of magnitude of each other but maybe + // worth worrying about. + worked += float64(duration) } + released += r - if released < physPageSize { - // If this happens, it means that we may have attempted to release part - // of a physical page, but the likely effect of that is that it released - // the whole physical page, some of which may have still been in-use. - // This could lead to memory corruption. Throw. - throw("released less than one physical page of memory") + // scavenge does not return until it either finds the requisite amount of + // memory to scavenge, or exhausts the heap. If we haven't found enough + // to scavenge, then the heap must be exhausted. + if r < scavengeQuantum { + break } - - if crit < minCritTime { - // This means there wasn't enough work to actually fill up minCritTime. - // That's fine; we shouldn't try to do anything with this information - // because it's going result in a short enough sleep request that things - // will get messy. Just assume we did at least this much work. - // All this means is that we'll sleep longer than we otherwise would have. - crit = minCritTime + // When using fake time just do one loop. + if faketime != 0 { + break } + } + if released > 0 && released < physPageSize { + // If this happens, it means that we may have attempted to release part + // of a physical page, but the likely effect of that is that it released + // the whole physical page, some of which may have still been in-use. + // This could lead to memory corruption. Throw. + throw("released less than one physical page of memory") + } + return +} - // Multiply the critical time by 1 + the ratio of the costs of using - // scavenged memory vs. scavenging memory. This forces us to pay down - // the cost of reusing this memory eagerly by sleeping for a longer period - // of time and scavenging less frequently. More concretely, we avoid situations - // where we end up scavenging so often that we hurt allocation performance - // because of the additional overheads of using scavenged memory. - crit *= 1 + scavengeCostRatio - - // Go to sleep based on how much time we spent doing work. - slept := scavengeSleep(int64(crit / critSleepRatio)) - - // Stop here if we're cooling down from the controller. - if controllerCooldown > 0 { - // crit and slept aren't exact measures of time, but it's OK to be a bit - // sloppy here. We're just hoping we're avoiding some transient bad behavior. - t := slept + int64(crit) - if t > controllerCooldown { - controllerCooldown = 0 - } else { - controllerCooldown -= t - } - continue - } +// Background scavenger. +// +// The background scavenger maintains the RSS of the application below +// the line described by the proportional scavenging statistics in +// the mheap struct. +func bgscavenge(c chan int) { + scavenger.init() - // Calculate the CPU time spent. - // - // This may be slightly inaccurate with respect to GOMAXPROCS, but we're - // recomputing this often enough relative to GOMAXPROCS changes in general - // (it only changes when the world is stopped, and not during a GC) that - // that small inaccuracy is in the noise. - cpuFraction := float64(crit) / ((float64(slept) + crit) * float64(gomaxprocs)) - - // Update the critSleepRatio, adjusting until we reach our ideal fraction. - var ok bool - critSleepRatio, ok = critSleepController.next(cpuFraction, idealFraction, float64(slept)+crit) - if !ok { - // The core assumption of the controller, that we can get a proportional - // response, broke down. This may be transient, so temporarily switch to - // sleeping a fixed, conservative amount. - critSleepRatio = startingCritSleepRatio - controllerCooldown = 5e9 // 5 seconds. - - // Signal the scav trace printer to output this. - lock(&scavenge.lock) - scavenge.printControllerReset = true - unlock(&scavenge.lock) + c <- 1 + scavenger.park() + + for { + released, workTime := scavenger.run() + if released == 0 { + scavenger.park() + continue } + atomic.Xadduintptr(&mheap_.pages.scav.released, released) + scavenger.sleep(workTime) } } @@ -438,6 +571,9 @@ func bgscavenge(c chan int) { // back to the top of the heap. // // Returns the amount of memory scavenged in bytes. +// +// scavenge always tries to scavenge nbytes worth of memory, and will +// only fail to do so if the heap is exhausted for now. func (p *pageAlloc) scavenge(nbytes uintptr) uintptr { var ( addrs addrRange @@ -468,9 +604,9 @@ func (p *pageAlloc) scavenge(nbytes uintptr) uintptr { // was called, and forced indicates whether the scavenge was forced by the // application. // -// scavenge.lock must be held. +// scavenger.lock must be held. func printScavTrace(gen uint32, released uintptr, forced bool) { - assertLockHeld(&scavenge.lock) + assertLockHeld(&scavenger.lock) printlock() print("scav ", gen, " ", @@ -480,9 +616,9 @@ func printScavTrace(gen uint32, released uintptr, forced bool) { ) if forced { print(" (forced)") - } else if scavenge.printControllerReset { + } else if scavenger.printControllerReset { print(" [controller reset]") - scavenge.printControllerReset = false + scavenger.printControllerReset = false } println() printunlock() |