runtime: implement GOTRACEBACK=crash for linux/386

Change-Id: I401ce8d612160a4f4ee617bddca6827fa544763a Reviewed-on: https://go-review.googlesource.com/11087 Reviewed-by: Austin Clements <austin@google.com>
author: Russ Cox <rsc@golang.org> 2015-06-16 16:37:48 -0400
committer: Russ Cox <rsc@golang.org> 2015-06-16 20:47:47 +0000
commit: 142e434006e371440c8be4494d27d1774fe161e5 (patch)
tree: dadc0b5b2fa0f9456b7051d02ef8504d3741b016 /src/runtime/signal_386.go
parent: 0266bc84940657b1e09f72bfe3d932f0344bc6a3 (diff)
download: go-142e434006e371440c8be4494d27d1774fe161e5.tar.gz
go-142e434006e371440c8be4494d27d1774fe161e5.zip
1 files changed, 59 insertions, 5 deletions
diff --git a/src/runtime/signal_386.go b/src/runtime/signal_386.go
index f3c36cb07a..8fb197952e 100644
--- a/src/runtime/signal_386.go
+++ b/src/runtime/signal_386.go
@@ -24,6 +24,8 @@ func dumpregs(c *sigctxt) {
 	print("gs     ", hex(c.gs()), "\n")
 }
 
+var crashing int32
+
 // May run during STW, so write barriers are not allowed.
 //go:nowritebarrier
 func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
@@ -101,7 +103,10 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 
 	_g_.m.throwing = 1
 	_g_.m.caughtsig.set(gp)
-	startpanic()
+
+	if crashing == 0 {
+		startpanic()
+	}
 
 	if sig < uint32(len(sigtable)) {
 		print(sigtable[sig].name, "\n")
@@ -109,7 +114,7 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 		print("Signal ", sig, "\n")
 	}
 
-	print("PC=", hex(c.eip()), "\n")
+	print("PC=", hex(c.eip()), " m=", _g_.m.id, "\n")
 	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
 		print("signal arrived during cgo execution\n")
 		gp = _g_.m.lockedg
@@ -119,13 +124,62 @@ func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
 	var docrash bool
 	if gotraceback(&docrash) > 0 {
 		goroutineheader(gp)
-		tracebacktrap(uintptr(c.eip()), uintptr(c.esp()), 0, gp)
-		tracebackothers(gp)
-		print("\n")
+
+		// On Linux/386, all system calls go through the vdso kernel_vsyscall routine.
+		// Normally we don't see those PCs, but during signals we can.
+		// If we see a PC in the vsyscall area (it moves around, but near the top of memory),
+		// assume we're blocked in the vsyscall routine, which has saved
+		// three words on the stack after the initial call saved the caller PC.
+		// Pop all four words off SP and use the saved PC.
+		// The check of the stack bounds here should suffice to avoid a fault
+		// during the actual PC pop.
+		// If we do load a bogus PC, not much harm done: we weren't going
+		// to get a decent traceback anyway.
+		// TODO(rsc): Make this more precise: we should do more checks on the PC,
+		// and we should find out whether different versions of the vdso page
+		// use different prologues that store different amounts on the stack.
+		pc := uintptr(c.eip())
+		sp := uintptr(c.esp())
+		if GOOS == "linux" && pc >= 0xf4000000 && gp.stack.lo <= sp && sp+16 <= gp.stack.hi {
+			// Assume in vsyscall page.
+			sp += 16
+			pc = *(*uintptr)(unsafe.Pointer(sp - 4))
+			print("runtime: unwind vdso kernel_vsyscall: pc=", hex(pc), " sp=", hex(sp), "\n")
+		}
+
+		tracebacktrap(pc, sp, 0, gp)
+		if crashing > 0 && gp != _g_.m.curg && _g_.m.curg != nil && readgstatus(_g_.m.curg)&^_Gscan == _Grunning {
+			// tracebackothers on original m skipped this one; trace it now.
+			goroutineheader(_g_.m.curg)
+			traceback(^uintptr(0), ^uintptr(0), 0, gp)
+		} else if crashing == 0 {
+			tracebackothers(gp)
+			print("\n")
+		}
 		dumpregs(c)
 	}
 
 	if docrash {
+		// TODO(rsc): Implement raiseproc on other systems
+		// and then add to this if condition.
+		if GOOS == "linux" {
+			crashing++
+			if crashing < sched.mcount {
+				// There are other m's that need to dump their stacks.
+				// Relay SIGQUIT to the next m by sending it to the current process.
+				// All m's that have already received SIGQUIT have signal masks blocking
+				// receipt of any signals, so the SIGQUIT will go to an m that hasn't seen it yet.
+				// When the last m receives the SIGQUIT, it will fall through to the call to
+				// crash below. Just in case the relaying gets botched, each m involved in
+				// the relay sleeps for 5 seconds and then does the crash/exit itself.
+				// In expected operation, the last m has received the SIGQUIT and run
+				// crash/exit and the process is gone, all long before any of the
+				// 5-second sleeps have finished.
+				print("\n-----\n\n")
+				raiseproc(_SIGQUIT)
+				usleep(5 * 1000 * 1000)
+			}
+		}
 		crash()
 	}
author	Russ Cox <rsc@golang.org>	2015-06-16 16:37:48 -0400
committer	Russ Cox <rsc@golang.org>	2015-06-16 20:47:47 +0000
commit	142e434006e371440c8be4494d27d1774fe161e5 (patch)
tree	dadc0b5b2fa0f9456b7051d02ef8504d3741b016 /src/runtime/signal_386.go
parent	0266bc84940657b1e09f72bfe3d932f0344bc6a3 (diff)
download	go-142e434006e371440c8be4494d27d1774fe161e5.tar.gz go-142e434006e371440c8be4494d27d1774fe161e5.zip