aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Anthony Knyszek <mknyszek@google.com>2023-11-27 22:27:32 +0000
committerGopher Robot <gobot@golang.org>2024-01-04 21:33:30 +0000
commit95afc744a7abd78dcaf06423583821ead5cc7c1e (patch)
treead99155bc106ab2c975ec008897ba53af2cc5565
parent8cb86b5f85ce695cba8cb54fd9780d39d9de924d (diff)
downloadgo-95afc744a7abd78dcaf06423583821ead5cc7c1e.tar.gz
go-95afc744a7abd78dcaf06423583821ead5cc7c1e.zip
[release-branch.go1.20] runtime: put ReadMemStats debug assertions behind a double-check mode
ReadMemStats has a few assertions it makes about the consistency of the stats it's about to produce. Specifically, how those stats line up with runtime-internal stats. These checks are generally useful, but crashing just because some stats are wrong is a heavy price to pay. For a long time this wasn't a problem, but very recently it became a real problem. It turns out that there's real benign skew that can happen wherein sysmon (which doesn't synchronize with a STW) generates a trace event when tracing is enabled, and may mutate some stats while ReadMemStats is running its checks. Fix this by synchronizing with both sysmon and the tracer. This is a bit heavy-handed, but better that than false positives. Also, put the checks behind a debug mode. We want to reduce the risk of backporting this change, and again, it's not great to crash just because user-facing stats are off. Still, enable this debug mode during the runtime tests so we don't lose quite as much coverage from disabling these checks by default. For #64401. Fixes #64409. Change-Id: I9adb3e5c7161d207648d07373a11da8a5f0fda9a Reviewed-on: https://go-review.googlesource.com/c/go/+/545277 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Felix Geisendörfer <felix.geisendoerfer@datadoghq.com> (cherry picked from commit b2efd1de97402ec4b8fb4e9e0ec29c8e49e8e200) Reviewed-on: https://go-review.googlesource.com/c/go/+/545556 Auto-Submit: Matthew Dempsky <mdempsky@google.com> TryBot-Bypass: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Matthew Dempsky <mdempsky@google.com>
-rw-r--r--src/runtime/export_test.go2
-rw-r--r--src/runtime/gc_test.go5
-rw-r--r--src/runtime/mstats.go114
3 files changed, 71 insertions, 50 deletions
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index e7476e606b..2290f7c728 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -336,6 +336,8 @@ func ReadMetricsSlow(memStats *MemStats, samplesp unsafe.Pointer, len, cap int)
startTheWorld()
}
+var DoubleCheckReadMemStats = &doubleCheckReadMemStats
+
// ReadMemStatsSlow returns both the runtime-computed MemStats and
// MemStats accumulated by scanning the heap.
func ReadMemStatsSlow() (base, slow MemStats) {
diff --git a/src/runtime/gc_test.go b/src/runtime/gc_test.go
index 0b2c972d3f..1a1655eae6 100644
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@@ -573,6 +573,11 @@ func TestPageAccounting(t *testing.T) {
}
}
+func init() {
+ // Enable ReadMemStats' double-check mode.
+ *runtime.DoubleCheckReadMemStats = true
+}
+
func TestReadMemStats(t *testing.T) {
base, slow := runtime.ReadMemStatsSlow()
if base != slow {
diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go
index 3a5273f361..8424946e77 100644
--- a/src/runtime/mstats.go
+++ b/src/runtime/mstats.go
@@ -356,6 +356,11 @@ func ReadMemStats(m *MemStats) {
startTheWorld()
}
+// doubleCheckReadMemStats controls a double-check mode for ReadMemStats that
+// ensures consistency between the values that ReadMemStats is using and the
+// runtime-internal stats.
+var doubleCheckReadMemStats = false
+
// readmemstats_m populates stats for internal runtime values.
//
// The world must be stopped.
@@ -430,56 +435,65 @@ func readmemstats_m(stats *MemStats) {
heapGoal := gcController.heapGoal()
- // The world is stopped, so the consistent stats (after aggregation)
- // should be identical to some combination of memstats. In particular:
- //
- // * memstats.heapInUse == inHeap
- // * memstats.heapReleased == released
- // * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
- // * memstats.totalAlloc == totalAlloc
- // * memstats.totalFree == totalFree
- //
- // Check if that's actually true.
- //
- // TODO(mknyszek): Maybe don't throw here. It would be bad if a
- // bug in otherwise benign accounting caused the whole application
- // to crash.
- if gcController.heapInUse.load() != uint64(consStats.inHeap) {
- print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
- print("runtime: consistent value=", consStats.inHeap, "\n")
- throw("heapInUse and consistent stats are not equal")
- }
- if gcController.heapReleased.load() != uint64(consStats.released) {
- print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
- print("runtime: consistent value=", consStats.released, "\n")
- throw("heapReleased and consistent stats are not equal")
- }
- heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
- consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
- if heapRetained != consRetained {
- print("runtime: global value=", heapRetained, "\n")
- print("runtime: consistent value=", consRetained, "\n")
- throw("measures of the retained heap are not equal")
- }
- if gcController.totalAlloc.Load() != totalAlloc {
- print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
- print("runtime: consistent value=", totalAlloc, "\n")
- throw("totalAlloc and consistent stats are not equal")
- }
- if gcController.totalFree.Load() != totalFree {
- print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
- print("runtime: consistent value=", totalFree, "\n")
- throw("totalFree and consistent stats are not equal")
- }
- // Also check that mappedReady lines up with totalMapped - released.
- // This isn't really the same type of "make sure consistent stats line up" situation,
- // but this is an opportune time to check.
- if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
- print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
- print("runtime: totalMapped=", totalMapped, "\n")
- print("runtime: released=", uint64(consStats.released), "\n")
- print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
- throw("mappedReady and other memstats are not equal")
+ if doubleCheckReadMemStats {
+ // Only check this if we're debugging. It would be bad to crash an application
+ // just because the debugging stats are wrong. We mostly rely on tests to catch
+ // these issues, and we enable the double check mode for tests.
+ //
+ // The world is stopped, so the consistent stats (after aggregation)
+ // should be identical to some combination of memstats. In particular:
+ //
+ // * memstats.heapInUse == inHeap
+ // * memstats.heapReleased == released
+ // * memstats.heapInUse + memstats.heapFree == committed - inStacks - inWorkBufs - inPtrScalarBits
+ // * memstats.totalAlloc == totalAlloc
+ // * memstats.totalFree == totalFree
+ //
+ // Check if that's actually true.
+ //
+ // Prevent sysmon and the tracer from skewing the stats since they can
+ // act without synchronizing with a STW. See #64401.
+ lock(&sched.sysmonlock)
+ lock(&trace.lock)
+ if gcController.heapInUse.load() != uint64(consStats.inHeap) {
+ print("runtime: heapInUse=", gcController.heapInUse.load(), "\n")
+ print("runtime: consistent value=", consStats.inHeap, "\n")
+ throw("heapInUse and consistent stats are not equal")
+ }
+ if gcController.heapReleased.load() != uint64(consStats.released) {
+ print("runtime: heapReleased=", gcController.heapReleased.load(), "\n")
+ print("runtime: consistent value=", consStats.released, "\n")
+ throw("heapReleased and consistent stats are not equal")
+ }
+ heapRetained := gcController.heapInUse.load() + gcController.heapFree.load()
+ consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
+ if heapRetained != consRetained {
+ print("runtime: global value=", heapRetained, "\n")
+ print("runtime: consistent value=", consRetained, "\n")
+ throw("measures of the retained heap are not equal")
+ }
+ if gcController.totalAlloc.Load() != totalAlloc {
+ print("runtime: totalAlloc=", gcController.totalAlloc.Load(), "\n")
+ print("runtime: consistent value=", totalAlloc, "\n")
+ throw("totalAlloc and consistent stats are not equal")
+ }
+ if gcController.totalFree.Load() != totalFree {
+ print("runtime: totalFree=", gcController.totalFree.Load(), "\n")
+ print("runtime: consistent value=", totalFree, "\n")
+ throw("totalFree and consistent stats are not equal")
+ }
+ // Also check that mappedReady lines up with totalMapped - released.
+ // This isn't really the same type of "make sure consistent stats line up" situation,
+ // but this is an opportune time to check.
+ if gcController.mappedReady.Load() != totalMapped-uint64(consStats.released) {
+ print("runtime: mappedReady=", gcController.mappedReady.Load(), "\n")
+ print("runtime: totalMapped=", totalMapped, "\n")
+ print("runtime: released=", uint64(consStats.released), "\n")
+ print("runtime: totalMapped-released=", totalMapped-uint64(consStats.released), "\n")
+ throw("mappedReady and other memstats are not equal")
+ }
+ unlock(&trace.lock)
+ unlock(&sched.sysmonlock)
}
// We've calculated all the values we need. Now, populate stats.