From b1be664d64750bccd5081d51b585036c931b5cf0 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Thu, 9 Jun 2022 18:25:01 +0000 Subject: [release-branch.go1.17] runtime: store consistent total allocation stats as uint64 Currently the consistent total allocation stats are managed as uintptrs, which means they can easily overflow on 32-bit systems. Fix this by storing these stats as uint64s. This will cause some minor performance degradation on 32-bit systems, but there really isn't a way around this, and it affects the correctness of the metrics we export. For #52680. Fixes #52688. Change-Id: I8b1926116e899ae9f03d58e0320bcb9264945b3e Reviewed-on: https://go-review.googlesource.com/c/go/+/411496 Run-TryBot: Michael Knyszek TryBot-Result: Gopher Robot Reviewed-by: Michael Pratt --- src/runtime/mcache.go | 16 ++++++++-------- src/runtime/metrics.go | 12 ++++++------ src/runtime/mgcsweep.go | 6 +++--- src/runtime/mstats.go | 42 ++++++++++++++++++++++-------------------- 4 files changed, 39 insertions(+), 37 deletions(-) diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go index a9e959109a..99303358be 100644 --- a/src/runtime/mcache.go +++ b/src/runtime/mcache.go @@ -175,11 +175,11 @@ func (c *mcache) refill(spc spanClass) { // Assume all objects from this span will be allocated in the // mcache. If it gets uncached, we'll adjust this. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.smallAllocCount[spc.sizeclass()], uintptr(s.nelems)-uintptr(s.allocCount)) + atomic.Xadd64(&stats.smallAllocCount[spc.sizeclass()], int64(s.nelems)-int64(s.allocCount)) // Flush tinyAllocs. if spc == tinySpanClass { - atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs) + atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) c.tinyAllocs = 0 } memstats.heapStats.release() @@ -229,8 +229,8 @@ func (c *mcache) allocLarge(size uintptr, needzero bool, noscan bool) (*mspan, b throw("out of memory") } stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.largeAlloc, npages*pageSize) - atomic.Xadduintptr(&stats.largeAllocCount, 1) + atomic.Xadd64(&stats.largeAlloc, int64(npages*pageSize)) + atomic.Xadd64(&stats.largeAllocCount, 1) memstats.heapStats.release() // Update gcController.heapLive and revise pacing if needed. @@ -261,9 +261,9 @@ func (c *mcache) releaseAll() { s := c.alloc[i] if s != &emptymspan { // Adjust nsmallalloc in case the span wasn't fully allocated. - n := uintptr(s.nelems) - uintptr(s.allocCount) + n := int64(s.nelems) - int64(s.allocCount) stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.smallAllocCount[spanClass(i).sizeclass()], -n) + atomic.Xadd64(&stats.smallAllocCount[spanClass(i).sizeclass()], -n) memstats.heapStats.release() if s.sweepgen != sg+1 { // refill conservatively counted unallocated slots in gcController.heapLive. @@ -273,7 +273,7 @@ func (c *mcache) releaseAll() { // gcController.heapLive was totally recomputed since // caching this span, so we don't do this for // stale spans. - atomic.Xadd64(&gcController.heapLive, -int64(n)*int64(s.elemsize)) + atomic.Xadd64(&gcController.heapLive, -n*int64(s.elemsize)) } // Release the span to the mcentral. mheap_.central[i].mcentral.uncacheSpan(s) @@ -286,7 +286,7 @@ func (c *mcache) releaseAll() { // Flush tinyAllocs. stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.tinyAllocCount, c.tinyAllocs) + atomic.Xadd64(&stats.tinyAllocCount, int64(c.tinyAllocs)) c.tinyAllocs = 0 memstats.heapStats.release() diff --git a/src/runtime/metrics.go b/src/runtime/metrics.go index ba0a920a5d..4af2b0aef9 100644 --- a/src/runtime/metrics.go +++ b/src/runtime/metrics.go @@ -388,13 +388,13 @@ func (a *heapStatsAggregate) compute() { memstats.heapStats.read(&a.heapStatsDelta) // Calculate derived stats. - a.totalAllocs = uint64(a.largeAllocCount) - a.totalFrees = uint64(a.largeFreeCount) - a.totalAllocated = uint64(a.largeAlloc) - a.totalFreed = uint64(a.largeFree) + a.totalAllocs = a.largeAllocCount + a.totalFrees = a.largeFreeCount + a.totalAllocated = a.largeAlloc + a.totalFreed = a.largeFree for i := range a.smallAllocCount { - na := uint64(a.smallAllocCount[i]) - nf := uint64(a.smallFreeCount[i]) + na := a.smallAllocCount[i] + nf := a.smallFreeCount[i] a.totalAllocs += na a.totalFrees += nf a.totalAllocated += na * uint64(class_to_size[i]) diff --git a/src/runtime/mgcsweep.go b/src/runtime/mgcsweep.go index 8fe3a65340..468df4d097 100644 --- a/src/runtime/mgcsweep.go +++ b/src/runtime/mgcsweep.go @@ -581,7 +581,7 @@ func (sl *sweepLocked) sweep(preserve bool) bool { // free slots zeroed. s.needzero = 1 stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.smallFreeCount[spc.sizeclass()], uintptr(nfreed)) + atomic.Xadd64(&stats.smallFreeCount[spc.sizeclass()], int64(nfreed)) memstats.heapStats.release() } if !preserve { @@ -628,8 +628,8 @@ func (sl *sweepLocked) sweep(preserve bool) bool { mheap_.freeSpan(s) } stats := memstats.heapStats.acquire() - atomic.Xadduintptr(&stats.largeFreeCount, 1) - atomic.Xadduintptr(&stats.largeFree, size) + atomic.Xadd64(&stats.largeFreeCount, 1) + atomic.Xadd64(&stats.largeFree, int64(size)) memstats.heapStats.release() return true } diff --git a/src/runtime/mstats.go b/src/runtime/mstats.go index eeb2a7b4bc..94c507b45f 100644 --- a/src/runtime/mstats.go +++ b/src/runtime/mstats.go @@ -8,7 +8,6 @@ package runtime import ( "runtime/internal/atomic" - "runtime/internal/sys" "unsafe" ) @@ -565,29 +564,29 @@ func updatememstats() { memstats.heapStats.unsafeRead(&consStats) // Collect large allocation stats. - totalAlloc := uint64(consStats.largeAlloc) - memstats.nmalloc += uint64(consStats.largeAllocCount) - totalFree := uint64(consStats.largeFree) - memstats.nfree += uint64(consStats.largeFreeCount) + totalAlloc := consStats.largeAlloc + memstats.nmalloc += consStats.largeAllocCount + totalFree := consStats.largeFree + memstats.nfree += consStats.largeFreeCount // Collect per-sizeclass stats. for i := 0; i < _NumSizeClasses; i++ { // Malloc stats. - a := uint64(consStats.smallAllocCount[i]) + a := consStats.smallAllocCount[i] totalAlloc += a * uint64(class_to_size[i]) memstats.nmalloc += a memstats.by_size[i].nmalloc = a // Free stats. - f := uint64(consStats.smallFreeCount[i]) + f := consStats.smallFreeCount[i] totalFree += f * uint64(class_to_size[i]) memstats.nfree += f memstats.by_size[i].nfree = f } // Account for tiny allocations. - memstats.nfree += uint64(consStats.tinyAllocCount) - memstats.nmalloc += uint64(consStats.tinyAllocCount) + memstats.nfree += consStats.tinyAllocCount + memstats.nmalloc += consStats.tinyAllocCount // Calculate derived stats. memstats.total_alloc = totalAlloc @@ -703,17 +702,20 @@ type heapStatsDelta struct { inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits // Allocator stats. - tinyAllocCount uintptr // number of tiny allocations - largeAlloc uintptr // bytes allocated for large objects - largeAllocCount uintptr // number of large object allocations - smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects - largeFree uintptr // bytes freed for large objects (>maxSmallSize) - largeFreeCount uintptr // number of frees for large objects (>maxSmallSize) - smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize) - - // Add a uint32 to ensure this struct is a multiple of 8 bytes in size. - // Only necessary on 32-bit platforms. - _ [(sys.PtrSize / 4) % 2]uint32 + // + // These are all uint64 because they're cumulative, and could quickly wrap + // around otherwise. + tinyAllocCount uint64 // number of tiny allocations + largeAlloc uint64 // bytes allocated for large objects + largeAllocCount uint64 // number of large object allocations + smallAllocCount [_NumSizeClasses]uint64 // number of allocs for small objects + largeFree uint64 // bytes freed for large objects (>maxSmallSize) + largeFreeCount uint64 // number of frees for large objects (>maxSmallSize) + smallFreeCount [_NumSizeClasses]uint64 // number of frees for small objects (<=maxSmallSize) + + // NOTE: This struct must be a multiple of 8 bytes in size because it + // is stored in an array. If it's not, atomic accesses to the above + // fields may be unaligned and fail on 32-bit platforms. } // merge adds in the deltas from b into a. -- cgit v1.2.3-54-g00ecf