aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2015-09-30 11:52:54 -0400
committerAustin Clements <austin@google.com>2015-11-17 02:23:29 +0000
commit244294f821c9256394d29218ee1f17ab4313551f (patch)
treeb1fae3253949e71fe04fee28f1a20570c8d84382
parent36fe6f2d5d242d5097b7a3f1a95e8b5457914840 (diff)
downloadgo-244294f821c9256394d29218ee1f17ab4313551f.tar.gz
go-244294f821c9256394d29218ee1f17ab4313551f.zip
[release-branch.go1.5] runtime: adjust huge page flags only on huge page granularity
This fixes an issue where the runtime panics with "out of memory" or "cannot allocate memory" even though there's ample memory by reducing the number of memory mappings created by the memory allocator. Commit 7e1b61c worked around issue #8832 where Linux's transparent huge page support could dramatically increase the RSS of a Go process by setting the MADV_NOHUGEPAGE flag on any regions of pages released to the OS with MADV_DONTNEED. This had the side effect of also increasing the number of VMAs (memory mappings) in a Go address space because a separate VMA is needed for every region of the virtual address space with different flags. Unfortunately, by default, Linux limits the number of VMAs in an address space to 65530, and a large heap can quickly reach this limit when the runtime starts scavenging memory. This commit dramatically reduces the number of VMAs. It does this primarily by only adjusting the huge page flag at huge page granularity. With this change, on amd64, even a pessimal heap that alternates between MADV_NOHUGEPAGE and MADV_HUGEPAGE must reach 128GB to reach the VMA limit. Because of this rounding to huge page granularity, this change is also careful to leave large used and unused regions huge page-enabled. This change reduces the maximum number of VMAs during the runtime benchmarks with GODEBUG=scavenge=1 from 692 to 49. Fixes #12233. Change-Id: Ic397776d042f20d53783a1cacf122e2e2db00584 Reviewed-on: https://go-review.googlesource.com/15191 Reviewed-by: Keith Randall <khr@golang.org> Reviewed-on: https://go-review.googlesource.com/16980 Run-TryBot: Austin Clements <austin@google.com> Reviewed-by: Ian Lance Taylor <iant@golang.org> Reviewed-by: Russ Cox <rsc@golang.org>
-rw-r--r--src/runtime/mem_linux.go94
1 files changed, 77 insertions, 17 deletions
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
index f988e75a17..e8c8999847 100644
--- a/src/runtime/mem_linux.go
+++ b/src/runtime/mem_linux.go
@@ -69,29 +69,89 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
}
func sysUnused(v unsafe.Pointer, n uintptr) {
- var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
- if s != 0 && (uintptr(v)%s != 0 || n%s != 0) {
- // See issue 8832
- // Linux kernel bug: https://bugzilla.kernel.org/show_bug.cgi?id=93111
- // Mark the region as NOHUGEPAGE so the kernel's khugepaged
- // doesn't undo our DONTNEED request. khugepaged likes to migrate
- // regions which are only partially mapped to huge pages, including
- // regions with some DONTNEED marks. That needlessly allocates physical
- // memory for our DONTNEED regions.
- madvise(v, n, _MADV_NOHUGEPAGE)
+ // By default, Linux's "transparent huge page" support will
+ // merge pages into a huge page if there's even a single
+ // present regular page, undoing the effects of the DONTNEED
+ // below. On amd64, that means khugepaged can turn a single
+ // 4KB page to 2MB, bloating the process's RSS by as much as
+ // 512X. (See issue #8832 and Linux kernel bug
+ // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
+ //
+ // To work around this, we explicitly disable transparent huge
+ // pages when we release pages of the heap. However, we have
+ // to do this carefully because changing this flag tends to
+ // split the VMA (memory mapping) containing v in to three
+ // VMAs in order to track the different values of the
+ // MADV_NOHUGEPAGE flag in the different regions. There's a
+ // default limit of 65530 VMAs per address space (sysctl
+ // vm.max_map_count), so we must be careful not to create too
+ // many VMAs (see issue #12233).
+ //
+ // Since huge pages are huge, there's little use in adjusting
+ // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
+ // exploding the number of VMAs by only adjusting the
+ // MADV_NOHUGEPAGE flag on a large granularity. This still
+ // gets most of the benefit of huge pages while keeping the
+ // number of VMAs under control. With hugePageSize = 2MB, even
+ // a pessimal heap can reach 128GB before running out of VMAs.
+ if hugePageSize != 0 {
+ var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
+
+ // If it's a large allocation, we want to leave huge
+ // pages enabled. Hence, we only adjust the huge page
+ // flag on the huge pages containing v and v+n-1, and
+ // only if those aren't aligned.
+ var head, tail uintptr
+ if uintptr(v)%s != 0 {
+ // Compute huge page containing v.
+ head = uintptr(v) &^ (s - 1)
+ }
+ if (uintptr(v)+n)%s != 0 {
+ // Compute huge page containing v+n-1.
+ tail = (uintptr(v) + n - 1) &^ (s - 1)
+ }
+
+ // Note that madvise will return EINVAL if the flag is
+ // already set, which is quite likely. We ignore
+ // errors.
+ if head != 0 && head+hugePageSize == tail {
+ // head and tail are different but adjacent,
+ // so do this in one call.
+ madvise(unsafe.Pointer(head), 2*hugePageSize, _MADV_NOHUGEPAGE)
+ } else {
+ // Advise the huge pages containing v and v+n-1.
+ if head != 0 {
+ madvise(unsafe.Pointer(head), hugePageSize, _MADV_NOHUGEPAGE)
+ }
+ if tail != 0 && tail != head {
+ madvise(unsafe.Pointer(tail), hugePageSize, _MADV_NOHUGEPAGE)
+ }
+ }
}
+
madvise(v, n, _MADV_DONTNEED)
}
func sysUsed(v unsafe.Pointer, n uintptr) {
if hugePageSize != 0 {
- // Undo the NOHUGEPAGE marks from sysUnused. There is no alignment check
- // around this call as spans may have been merged in the interim.
- // Note that this might enable huge pages for regions which were
- // previously disabled. Unfortunately there is no easy way to detect
- // what the previous state was, and in any case we probably want huge
- // pages to back our heap if the kernel can arrange that.
- madvise(v, n, _MADV_HUGEPAGE)
+ // Partially undo the NOHUGEPAGE marks from sysUnused
+ // for whole huge pages between v and v+n. This may
+ // leave huge pages off at the end points v and v+n
+ // even though allocations may cover these entire huge
+ // pages. We could detect this and undo NOHUGEPAGE on
+ // the end points as well, but it's probably not worth
+ // the cost because when neighboring allocations are
+ // freed sysUnused will just set NOHUGEPAGE again.
+ var s uintptr = hugePageSize
+
+ // Round v up to a huge page boundary.
+ beg := (uintptr(v) + (s - 1)) &^ (s - 1)
+ // Round v+n down to a huge page boundary.
+ end := (uintptr(v) + n) &^ (s - 1)
+
+ if beg < end {
+ madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
+ }
}
}