diff options
author | David Chase <drchase@google.com> | 2020-11-17 19:54:31 -0500 |
---|---|---|
committer | David Chase <drchase@google.com> | 2021-04-30 19:41:02 +0000 |
commit | 0bbfc5c31eb4cb77f12e10c73d5462377e66b06c (patch) | |
tree | 7641ae1ee24c4e0c425ef8e023844506398de440 /src/runtime/malloc.go | |
parent | 41afd3af42bd8028a1740c30a2b745105b4063d2 (diff) | |
download | go-0bbfc5c31eb4cb77f12e10c73d5462377e66b06c.tar.gz go-0bbfc5c31eb4cb77f12e10c73d5462377e66b06c.zip |
runtime: break up large calls to memclrNoHeapPointers to allow preemption
If something "huge" is allocated, and the zeroing is trivial (no pointers
involved) then zero it by chunks in a loop so that preemption can occur,
not all in a single non-preemptible call.
Benchmarking suggests that 256K is the best chunk size.
Updates #42642.
Change-Id: I94015e467eaa098c59870e479d6d83bc88efbfb4
Reviewed-on: https://go-review.googlesource.com/c/go/+/270943
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Diffstat (limited to 'src/runtime/malloc.go')
-rw-r--r-- | src/runtime/malloc.go | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 3db884f498..81e5225883 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -979,6 +979,9 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { var span *mspan var x unsafe.Pointer noscan := typ == nil || typ.ptrdata == 0 + // In some cases block zeroing can profitably (for latency reduction purposes) + // be delayed till preemption is possible; isZeroed tracks that state. + isZeroed := true if size <= maxSmallSize { if noscan && size < maxTinySize { // Tiny allocator. @@ -1074,7 +1077,9 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { } } else { shouldhelpgc = true - span = c.allocLarge(size, needzero, noscan) + // For large allocations, keep track of zeroed state so that + // bulk zeroing can be happen later in a preemptible context. + span, isZeroed = c.allocLarge(size, needzero && !noscan, noscan) span.freeindex = 1 span.allocCount = 1 x = unsafe.Pointer(span.base()) @@ -1133,6 +1138,12 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { mp.mallocing = 0 releasem(mp) + // Pointerfree data can be zeroed late in a context where preemption can occur. + // x will keep the memory alive. + if !isZeroed && needzero { + memclrNoHeapPointersChunked(size, x) + } + if debug.malloc { if debug.allocfreetrace != 0 { tracealloc(x, size, typ) @@ -1185,6 +1196,33 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer { return x } +// memclrNoHeapPointersChunked repeatedly calls memclrNoHeapPointers +// on chunks of the buffer to be zeroed, with opportunities for preemption +// along the way. memclrNoHeapPointers contains no safepoints and also +// cannot be preemptively scheduled, so this provides a still-efficient +// block copy that can also be preempted on a reasonable granularity. +// +// Use this with care; if the data being cleared is tagged to contain +// pointers, this allows the GC to run before it is all cleared. +func memclrNoHeapPointersChunked(size uintptr, x unsafe.Pointer) { + v := uintptr(x) + // got this from benchmarking. 128k is too small, 512k is too large. + const chunkBytes = 256 * 1024 + vsize := v + size + for voff := v; voff < vsize; voff = voff + chunkBytes { + if getg().preempt { + // may hold locks, e.g., profiling + goschedguarded() + } + // clear min(avail, lump) bytes + n := vsize - voff + if n > chunkBytes { + n = chunkBytes + } + memclrNoHeapPointers(unsafe.Pointer(voff), n) + } +} + // implementation of new builtin // compiler (both frontend and SSA backend) knows the signature // of this function |