aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2017-06-20 15:12:12 -0400
committerAustin Clements <austin@google.com>2017-06-20 19:55:45 +0000
commit67e537541c043c701001f002bed0cda70ce72767 (patch)
treeabd25ba42c2cc7d69e8708bfcd7f0c99415b172f
parent5a5ac34133f31eedf39e0cc6bef06085190317d2 (diff)
downloadgo-67e537541c043c701001f002bed0cda70ce72767.tar.gz
go-67e537541c043c701001f002bed0cda70ce72767.zip
syscall: use CLONE_VFORK safely
Currently, CLONE_VFORK is used without much regard to the stack. This is dangerous, because anything the child does to the stack is visible to the parent. For example, if the compiler were to reuse named stack slots (which it currently doesn't do), it would be easy for the child running in the same stack frame as the parent to corrupt local variables that the parent then depended on. We're not sure of anything specific going wrong in this code right now, but it is at best a ticking time bomb. CLONE_VFORK can only safely be used if we ensure the child does not execute in any of the active stack frames of the parent. This commit implements this by arranging for the parent to return immediately from the frame the child will operate in, and for the child to never return to the frame the parent will operate in. Fixes #20732. Change-Id: Iad5b4ddc2b994c082bd278bfd52ef53bd38c037f Reviewed-on: https://go-review.googlesource.com/46173 Run-TryBot: Austin Clements <austin@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
-rw-r--r--src/syscall/exec_linux.go70
1 files changed, 45 insertions, 25 deletions
diff --git a/src/syscall/exec_linux.go b/src/syscall/exec_linux.go
index e631cd6470..db52360717 100644
--- a/src/syscall/exec_linux.go
+++ b/src/syscall/exec_linux.go
@@ -63,15 +63,46 @@ func runtime_AfterForkInChild()
// functions that do not grow the stack.
//go:norace
func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
+ // Set up and fork. This returns immediately in the parent or
+ // if there's an error.
+ r1, err1, p, locked := forkAndExecInChild1(argv0, argv, envv, chroot, dir, attr, sys, pipe)
+ if locked {
+ runtime_AfterFork()
+ }
+ if err1 != 0 {
+ return 0, err1
+ }
+
+ // parent; return PID
+ pid = int(r1)
+
+ if sys.UidMappings != nil || sys.GidMappings != nil {
+ Close(p[0])
+ err := writeUidGidMappings(pid, sys)
+ var err2 Errno
+ if err != nil {
+ err2 = err.(Errno)
+ }
+ RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
+ Close(p[1])
+ }
+
+ return pid, 0
+}
+
+//go:norace
+func forkAndExecInChild1(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (r1 uintptr, err1 Errno, p [2]int, locked bool) {
+ // vfork requires that the child not touch any of the parent's
+ // active stack frames. Hence, the child does all post-fork
+ // processing in this stack frame and never returns, while the
+ // parent returns immediately from this frame and does all
+ // post-fork processing in the outer frame.
// Declare all variables at top in case any
// declarations require heap allocation (e.g., err1).
var (
- r1 uintptr
- err1 Errno
err2 Errno
nextfd int
i int
- p [2]int
)
// Record parent PID so child can test if it has died.
@@ -94,13 +125,15 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
// synchronizing writing of User ID/Group ID mappings.
if sys.UidMappings != nil || sys.GidMappings != nil {
if err := forkExecPipe(p[:]); err != nil {
- return 0, err.(Errno)
+ err1 = err.(Errno)
+ return
}
}
// About to call fork.
// No more allocation or calls of non-assembly functions.
runtime_BeforeFork()
+ locked = true
switch {
case runtime.GOARCH == "amd64" && sys.Cloneflags&CLONE_NEWUSER == 0:
r1, err1 = rawVforkSyscall(SYS_CLONE, uintptr(SIGCHLD|CLONE_VFORK|CLONE_VM)|sys.Cloneflags)
@@ -109,27 +142,14 @@ func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr
default:
r1, _, err1 = RawSyscall6(SYS_CLONE, uintptr(SIGCHLD)|sys.Cloneflags, 0, 0, 0, 0, 0)
}
- if err1 != 0 {
- runtime_AfterFork()
- return 0, err1
- }
-
- if r1 != 0 {
- // parent; return PID
- runtime_AfterFork()
- pid = int(r1)
-
- if sys.UidMappings != nil || sys.GidMappings != nil {
- Close(p[0])
- err := writeUidGidMappings(pid, sys)
- if err != nil {
- err2 = err.(Errno)
- }
- RawSyscall(SYS_WRITE, uintptr(p[1]), uintptr(unsafe.Pointer(&err2)), unsafe.Sizeof(err2))
- Close(p[1])
- }
-
- return pid, 0
+ if err1 != 0 || r1 != 0 {
+ // If we're in the parent, we must return immediately
+ // so we're not in the same stack frame as the child.
+ // This can at most use the return PC, which the child
+ // will not modify, and the results of
+ // rawVforkSyscall, which must have been written after
+ // the child was replaced.
+ return
}
// Fork succeeded, now in child.