aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2020-03-11 21:51:09 -0700
committerIan Lance Taylor <iant@golang.org>2020-03-16 19:55:30 +0000
commitc5125098b23067a274a49aaa0c39a12abcc45704 (patch)
tree26e20504ca108609378bff8afdbfb7655c1c21d4
parentadba22a9ae10fa58704ee75bd6d7c8086df35350 (diff)
downloadgo-c5125098b23067a274a49aaa0c39a12abcc45704.tar.gz
go-c5125098b23067a274a49aaa0c39a12abcc45704.zip
[release-branch.go1.14] runtime: don't crash on mlock failure
Instead, note that mlock has failed, start trying the mitigation of touching the signal stack before sending a preemption signal, and, if the program crashes, mention the possible problem and a wiki page describing the issue (https://golang.org/wiki/LinuxKernelSignalVectorBug). Tested on a kernel in the buggy version range, but with the patch, by using `ulimit -l 0`. For #37436 Fixes #37807 Change-Id: I072aadb2101496dffd655e442fa5c367dad46ce8 Reviewed-on: https://go-review.googlesource.com/c/go/+/223121 Run-TryBot: Ian Lance Taylor <iant@golang.org> Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Keith Randall <khr@golang.org> (cherry picked from commit b851e51160bc8ed412e229152b430b75e7ce56f9) Reviewed-on: https://go-review.googlesource.com/c/go/+/223417 TryBot-Result: Gobot Gobot <gobot@golang.org>
-rw-r--r--src/runtime/os_linux.go15
-rw-r--r--src/runtime/os_linux_x86.go35
-rw-r--r--src/runtime/panic.go10
3 files changed, 52 insertions, 8 deletions
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index e0e3f4e341..d8c1827852 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -5,6 +5,7 @@
package runtime
import (
+ "runtime/internal/atomic"
"runtime/internal/sys"
"unsafe"
)
@@ -479,7 +480,21 @@ func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
func getpid() int
func tgkill(tgid, tid, sig int)
+// touchStackBeforeSignal stores an errno value. If non-zero, it means
+// that we should touch the signal stack before sending a signal.
+// This is used on systems that have a bug when the signal stack must
+// be faulted in. See #35777 and #37436.
+//
+// This is accessed atomically as it is set and read in different threads.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var touchStackBeforeSignal uint32
+
// signalM sends a signal to mp.
func signalM(mp *m, sig int) {
+ if atomic.Load(&touchStackBeforeSignal) != 0 {
+ atomic.Cas((*uint32)(unsafe.Pointer(mp.gsignal.stack.hi-4)), 0, 0)
+ }
tgkill(getpid(), int(mp.procid), sig)
}
diff --git a/src/runtime/os_linux_x86.go b/src/runtime/os_linux_x86.go
index 0e1c9185b1..d001e6ee59 100644
--- a/src/runtime/os_linux_x86.go
+++ b/src/runtime/os_linux_x86.go
@@ -7,6 +7,8 @@
package runtime
+import "runtime/internal/atomic"
+
//go:noescape
func uname(utsname *new_utsname) int
@@ -58,17 +60,34 @@ func osArchInit() {
if m0.gsignal != nil {
throw("gsignal quirk too late")
}
+ throwReportQuirk = throwBadKernel
}
}
func mlockGsignal(gsignal *g) {
- if err := mlock(gsignal.stack.hi-physPageSize, physPageSize); err < 0 {
- printlock()
- println("runtime: mlock of signal stack failed:", -err)
- if err == -_ENOMEM {
- println("runtime: increase the mlock limit (ulimit -l) or")
- }
- println("runtime: update your kernel to 5.3.15+, 5.4.2+, or 5.5+")
- throw("mlock failed")
+ if atomic.Load(&touchStackBeforeSignal) != 0 {
+ // mlock has already failed, don't try again.
+ return
+ }
+
+ // This mlock call may fail, but we don't report the failure.
+ // Instead, if something goes badly wrong, we rely on prepareSignalM
+ // and throwBadKernel to do further mitigation and to report a problem
+ // to the user if mitigation fails. This is because many
+ // systems have a limit on the total mlock size, and many kernels
+ // that appear to have bad versions are actually patched to avoid the
+ // bug described above. We want Go 1.14 to run on those systems.
+ // See #37436.
+ if errno := mlock(gsignal.stack.hi-physPageSize, physPageSize); errno < 0 {
+ atomic.Store(&touchStackBeforeSignal, uint32(-errno))
+ }
+}
+
+// throwBadKernel is called, via throwReportQuirk, by throw.
+func throwBadKernel() {
+ if errno := atomic.Load(&touchStackBeforeSignal); errno != 0 {
+ println("runtime: note: your Linux kernel may be buggy")
+ println("runtime: note: see https://golang.org/wiki/LinuxKernelSignalVectorBug")
+ println("runtime: note: mlock workaround for kernel bug failed with errno", errno)
}
}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index c6ab1bac3f..28b5cbefcc 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -1281,6 +1281,12 @@ func startpanic_m() bool {
}
}
+// throwReportQuirk, if non-nil, is called by throw after dumping the stacks.
+//
+// TODO(austin): Remove this after Go 1.15 when we remove the
+// mlockGsignal workaround.
+var throwReportQuirk func()
+
var didothers bool
var deadlock mutex
@@ -1327,6 +1333,10 @@ func dopanic_m(gp *g, pc, sp uintptr) bool {
printDebugLog()
+ if throwReportQuirk != nil {
+ throwReportQuirk()
+ }
+
return docrash
}