runtime: implement GC pacer redesign

This change implements the GC pacer redesign outlined in #44167 and the accompanying design document, behind a GOEXPERIMENT flag that is on by default. In addition to adding the new pacer, this CL also includes code to track and account for stack and globals scan work in the pacer and in the assist credit system. The new pacer also deviates slightly from the document in that it increases the bound on the minimum trigger ratio from 0.6 (scaled by GOGC) to 0.7. The logic behind this change is that the new pacer much more consistently hits the goal (good!) leading to slightly less frequent GC cycles, but _longer_ ones (in this case, bad!). It turns out that the cost of having the GC on hurts throughput significantly (per byte of memory used), though tail latencies can improve by up to 10%! To be conservative, this change moves the value to 0.7 where there is a small improvement to both throughput and latency, given the memory use. Because the new pacer accounts for the two most significant sources of scan work after heap objects, it is now also safer to reduce the minimum heap size without leading to very poor amortization. This change thus decreases the minimum heap size to 512 KiB, which corresponds to the fact that the runtime has around 200 KiB of scannable globals always there, up-front, providing a baseline. Benchmark results: https://perf.golang.org/search?q=upload:20211001.6 tile38's KNearest benchmark shows a memory increase, but throughput (and latency) per byte of memory used is better. gopher-lua showed an increase in both CPU time and memory usage, but subsequent attempts to reproduce this behavior are inconsistent. Sometimes the overall performance is better, sometimes it's worse. This suggests that the benchmark is fairly noisy in a way not captured by the benchmarking framework itself. biogo-igor is the only benchmark to show a significant performance loss. This benchmark exhibits a very high GC rate, with relatively little work to do in each cycle. The idle mark workers are quite active. In the new pacer, mark phases are longer, mark assists are fewer, and some of that time in mark assists has shifted to idle workers. Linux perf indicates that the difference in CPU time can be mostly attributed to write-barrier slow path related calls, which in turn indicates that the write barrier being on for longer is the primary culprit. This also explains the memory increase, as a longer mark phase leads to more memory allocated black, surviving an extra cycle and contributing to the heap goal. For #44167. Change-Id: I8ac7cfef7d593e4a642c9b2be43fb3591a8ec9c4 Reviewed-on: https://go-review.googlesource.com/c/go/+/309869 Trust: Michael Knyszek <mknyszek@google.com> Run-TryBot: Michael Knyszek <mknyszek@google.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Michael Pratt <mpratt@google.com>
author: Michael Anthony Knyszek <mknyszek@google.com> 2021-04-13 03:07:27 +0000
committer: Michael Knyszek <mknyszek@google.com> 2021-11-04 20:00:31 +0000
commit: a108b280bc724779ebaa6656d35f0fb307fb2a9b (patch)
tree: a3966a35f17d9a768c17ad140446d18d5d916870 /src/runtime/mgcpacer_test.go
parent: 988efd58197205060ace508d29984fbab6eb3840 (diff)
download: go-a108b280bc724779ebaa6656d35f0fb307fb2a9b.tar.gz
go-a108b280bc724779ebaa6656d35f0fb307fb2a9b.zip
1 files changed, 124 insertions, 3 deletions
diff --git a/src/runtime/mgcpacer_test.go b/src/runtime/mgcpacer_test.go
index 5a9f46c6d1..d2707ca5a1 100644
--- a/src/runtime/mgcpacer_test.go
+++ b/src/runtime/mgcpacer_test.go
@@ -6,6 +6,7 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/goexperiment"
 	"math"
 	"math/rand"
 	. "runtime"
@@ -34,6 +35,12 @@ func TestGcPacer(t *testing.T) {
 			checker: func(t *testing.T, c []gcCycleResult) {
 				n := len(c)
 				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+					}
+
 					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
 					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
 					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
@@ -41,6 +48,64 @@ func TestGcPacer(t *testing.T) {
 			},
 		},
 		{
+			// Same as the steady-state case, but lots of stacks to scan relative to the heap size.
+			name:          "SteadyBigStacks",
+			gcPercent:     100,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     constant(132.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(2048).sum(ramp(128<<20, 8)),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				// Check the same conditions as the steady-state case, except the old pacer can't
+				// really handle this well, so don't check the goal ratio for it.
+				n := len(c)
+				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					}
+
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+				}
+			},
+		},
+		{
+			// Same as the steady-state case, but lots of globals to scan relative to the heap size.
+			name:          "SteadyBigGlobals",
+			gcPercent:     100,
+			globalsBytes:  128 << 20,
+			nCores:        8,
+			allocRate:     constant(132.0),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				// Check the same conditions as the steady-state case, except the old pacer can't
+				// really handle this well, so don't check the goal ratio for it.
+				n := len(c)
+				if n >= 25 {
+					if goexperiment.PacerRedesign {
+						// For the pacer redesign, assert something even stronger: at this alloc/scan rate,
+						// it should be extremely close to the goal utilization.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, 0.005)
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
+					}
+
+					// Make sure the pacer settles into a non-degenerate state in at least 25 GC cycles.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.005)
+				}
+			},
+		},
+		{
 			// This tests the GC pacer's response to a small change in allocation rate.
 			name:          "StepAlloc",
 			gcPercent:     100,
@@ -107,6 +172,47 @@ func TestGcPacer(t *testing.T) {
 			},
 		},
 		{
+			// Tests the pacer for a high GOGC value with a large heap growth happening
+			// in the middle. The purpose of the large heap growth is to check if GC
+			// utilization ends up sensitive
+			name:          "HighGOGC",
+			gcPercent:     1500,
+			globalsBytes:  32 << 10,
+			nCores:        8,
+			allocRate:     random(7, 0x53).offset(165),
+			scanRate:      constant(1024.0),
+			growthRate:    constant(2.0).sum(ramp(-1.0, 12), random(0.01, 0x1), unit(14).delay(25)),
+			scannableFrac: constant(1.0),
+			stackBytes:    constant(8192),
+			length:        50,
+			checker: func(t *testing.T, c []gcCycleResult) {
+				n := len(c)
+				if goexperiment.PacerRedesign && n > 12 {
+					if n == 26 {
+						// In the 26th cycle there's a heap growth. Overshoot is expected to maintain
+						// a stable utilization, but we should *never* overshoot more than GOGC of
+						// the next cycle.
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 15)
+					} else {
+						// Give a wider goal range here. With such a high GOGC value we're going to be
+						// forced to undershoot.
+						//
+						// TODO(mknyszek): Instead of placing a 0.95 limit on the trigger, make the limit
+						// based on absolute bytes, that's based somewhat in how the minimum heap size
+						// is determined.
+						assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.90, 1.05)
+					}
+
+					// Ensure utilization remains stable despite a growth in live heap size
+					// at GC #25. This test fails prior to the GC pacer redesign.
+					//
+					// Because GOGC is so large, we should also be really close to the goal utilization.
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, GCGoalUtilization, GCGoalUtilization+0.03)
+					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.03)
+				}
+			},
+		},
+		{
 			// This test makes sure that in the face of a varying (in this case, oscillating) allocation
 			// rate, the pacer does a reasonably good job of staying abreast of the changes.
 			name:          "OscAlloc",
@@ -126,7 +232,12 @@ func TestGcPacer(t *testing.T) {
 					// 1. Utilization isn't varying _too_ much, and
 					// 2. The pacer is mostly keeping up with the goal.
 					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
-					assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					if goexperiment.PacerRedesign {
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+					} else {
+						// The old pacer is messier here, and needs a lot more tolerance.
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					}
 				}
 			},
 		},
@@ -149,7 +260,12 @@ func TestGcPacer(t *testing.T) {
 					// 1. Utilization isn't varying _too_ much, and
 					// 2. The pacer is mostly keeping up with the goal.
 					assertInRange(t, "goal ratio", c[n-1].goalRatio(), 0.95, 1.05)
-					assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					if goexperiment.PacerRedesign {
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.3)
+					} else {
+						// The old pacer is messier here, and needs a lot more tolerance.
+						assertInRange(t, "GC utilization", c[n-1].gcUtilization, 0.25, 0.4)
+					}
 				}
 			},
 		},
@@ -177,7 +293,12 @@ func TestGcPacer(t *testing.T) {
 					// Unlike the other tests, GC utilization here will vary more and tend higher.
 					// Just make sure it's not going too crazy.
 					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[n-2].gcUtilization, 0.05)
-					assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07)
+					if goexperiment.PacerRedesign {
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.05)
+					} else {
+						// The old pacer is messier here, and needs a little more tolerance.
+						assertInEpsilon(t, "GC utilization", c[n-1].gcUtilization, c[11].gcUtilization, 0.07)
+					}
 				}
 			},
 		},
author	Michael Anthony Knyszek <mknyszek@google.com>	2021-04-13 03:07:27 +0000
committer	Michael Knyszek <mknyszek@google.com>	2021-11-04 20:00:31 +0000
commit	a108b280bc724779ebaa6656d35f0fb307fb2a9b (patch)
tree	a3966a35f17d9a768c17ad140446d18d5d916870 /src/runtime/mgcpacer_test.go
parent	988efd58197205060ace508d29984fbab6eb3840 (diff)
download	go-a108b280bc724779ebaa6656d35f0fb307fb2a9b.tar.gz go-a108b280bc724779ebaa6656d35f0fb307fb2a9b.zip