aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2022-06-15 15:09:24 -0400
committerCherry Mui <cherryyz@google.com>2022-06-29 22:26:58 +0000
commit64ef16e77795957d47e3889bca9483d6f3099bbf (patch)
tree2bddc87d36076445f042b6935f4419a77265acd9
parent0750107074c39f7b846515de47c2857cbdb7e3d6 (diff)
downloadgo-64ef16e77795957d47e3889bca9483d6f3099bbf.tar.gz
go-64ef16e77795957d47e3889bca9483d6f3099bbf.zip
cmd/internal/obj/arm64: save LR and SP in one instruction for small frames
When we create a thread with signals blocked. But glibc's pthread_sigmask doesn't really allow us to block SIGSETXID. So we may get a signal early on before the signal stack is set. If we get a signal on the current stack, it will clobber anything below the SP. This CL makes it to save LR and decrement SP in a single MOVD.W instruction for small frames, so we don't write below the SP. We used to use a single MOVD.W instruction before CL 379075. CL 379075 changed to use an STP instruction to save the LR and FP, then decrementing the SP. This CL changes it back, just this part (epilogues and large frame prologues are unchanged). For small frames, it is the same number of instructions either way. This decreases the size of a "small" frame from 0x1f0 to 0xf0. For frame sizes in between, it could benefit from using an STP instruction instead of using the prologue for the "large" frame case. We don't bother it for now as this is a stop-gap solution anyway. This only addresses the issue with small frames. Luckily, all functions from thread entry to setting up the signal stack have samll frames. Other possible ideas: - Expand the unwind info metadata, separate SP delta and the location of the return address, so we can express "SP is decremented but the return address is in the LR register". Then we can always create the frame first then write the LR, without writing anything below the SP (except the frame pointer at SP-8, which is minor because it doesn't really affect program execution). - Set up the signal stack immediately in mstart in assembly. For Go 1.19 we do this simple fix. We plan to do the metadata fix in Go 1.20 ( #53609 ). Other LR architectures are addressed in CL 413428. Fix #53374. Change-Id: I9d6582ab14ccb06ac61ad43852943d9555e22ae5 Reviewed-on: https://go-review.googlesource.com/c/go/+/412474 Run-TryBot: Cherry Mui <cherryyz@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Austin Clements <austin@google.com> Reviewed-by: Eric Fang <eric.fang@arm.com>
-rw-r--r--misc/cgo/test/cgo_linux_test.go8
-rw-r--r--misc/cgo/test/setgid2_linux.go35
-rw-r--r--src/cmd/internal/obj/arm64/obj7.go65
3 files changed, 69 insertions, 39 deletions
diff --git a/misc/cgo/test/cgo_linux_test.go b/misc/cgo/test/cgo_linux_test.go
index a9746b552e..7c4628c493 100644
--- a/misc/cgo/test/cgo_linux_test.go
+++ b/misc/cgo/test/cgo_linux_test.go
@@ -15,6 +15,14 @@ func TestSetgid(t *testing.T) {
}
testSetgid(t)
}
+
+func TestSetgidStress(t *testing.T) {
+ if runtime.GOOS == "android" {
+ t.Skip("unsupported on Android")
+ }
+ testSetgidStress(t)
+}
+
func Test1435(t *testing.T) { test1435(t) }
func Test6997(t *testing.T) { test6997(t) }
func TestBuildID(t *testing.T) { testBuildID(t) }
diff --git a/misc/cgo/test/setgid2_linux.go b/misc/cgo/test/setgid2_linux.go
new file mode 100644
index 0000000000..d239893f43
--- /dev/null
+++ b/misc/cgo/test/setgid2_linux.go
@@ -0,0 +1,35 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Stress test setgid and thread creation. A thread
+// can get a SIGSETXID signal early on at thread
+// initialization, causing crash. See issue 53374.
+
+package cgotest
+
+/*
+#include <sys/types.h>
+#include <unistd.h>
+*/
+import "C"
+
+import (
+ "runtime"
+ "testing"
+)
+
+func testSetgidStress(t *testing.T) {
+ const N = 1000
+ ch := make(chan int, N)
+ for i := 0; i < N; i++ {
+ go func() {
+ C.setgid(0)
+ ch <- 1
+ runtime.LockOSThread() // so every goroutine uses a new thread
+ }()
+ }
+ for i := 0; i < N; i++ {
+ <-ch
+ }
+}
diff --git a/src/cmd/internal/obj/arm64/obj7.go b/src/cmd/internal/obj/arm64/obj7.go
index 1f2625d54f..83ae64a19b 100644
--- a/src/cmd/internal/obj/arm64/obj7.go
+++ b/src/cmd/internal/obj/arm64/obj7.go
@@ -609,17 +609,17 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
var prologueEnd *obj.Prog
aoffset := c.autosize
- if aoffset > 0x1f0 {
- // LDP offset variant range is -512 to 504, SP should be 16-byte aligned,
- // so the maximum aoffset value is 496.
- aoffset = 0x1f0
+ if aoffset > 0xf0 {
+ // MOVD.W offset variant range is -0x100 to 0xf8, SP should be 16-byte aligned.
+ // so the maximum aoffset value is 0xf0.
+ aoffset = 0xf0
}
// Frame is non-empty. Make sure to save link register, even if
// it is a leaf function, so that traceback works.
q = p
if c.autosize > aoffset {
- // Frame size is too large for a STP instruction. Store the frame pointer
+ // Frame size is too large for a MOVD.W instruction. Store the frame pointer
// register and link register before decrementing SP, so if a signal comes
// during the execution of the function prologue, the traceback code will
// not see a half-updated stack frame.
@@ -679,50 +679,37 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
q1.To.Offset = -8
}
} else {
- // small frame, save FP and LR with one STP instruction, then update SP.
- // Store first, so if a signal comes during the execution of the function
- // prologue, the traceback code will not see a half-updated stack frame.
- // STP (R29, R30), -aoffset-8(RSP)
+ // small frame, update SP and save LR in a single MOVD.W instruction.
+ // So if a signal comes during the execution of the function prologue,
+ // the traceback code will not see a half-updated stack frame.
+ // Also, on Linux, in a cgo binary we may get a SIGSETXID signal
+ // early on before the signal stack is set, as glibc doesn't allow
+ // us to block SIGSETXID. So it is important that we don't write below
+ // the SP until the signal stack is set.
+ // Luckily, all the functions from thread entry to setting the signal
+ // stack have small frames.
q1 = obj.Appendp(q, c.newprog)
- q1.As = ASTP
+ q1.As = AMOVD
q1.Pos = p.Pos
- q1.From.Type = obj.TYPE_REGREG
- q1.From.Reg = REGFP
- q1.From.Offset = REGLINK
+ q1.From.Type = obj.TYPE_REG
+ q1.From.Reg = REGLINK
q1.To.Type = obj.TYPE_MEM
- q1.To.Offset = int64(-aoffset - 8)
+ q1.Scond = C_XPRE
+ q1.To.Offset = int64(-aoffset)
q1.To.Reg = REGSP
+ q1.Spadj = aoffset
prologueEnd = q1
- q1 = c.ctxt.StartUnsafePoint(q1, c.newprog)
- // This instruction is not async preemptible, see the above comment.
- // SUB $aoffset, RSP, RSP
+ // Frame pointer.
q1 = obj.Appendp(q1, c.newprog)
q1.Pos = p.Pos
- q1.As = ASUB
- q1.From.Type = obj.TYPE_CONST
- q1.From.Offset = int64(aoffset)
- q1.Reg = REGSP
- q1.To.Type = obj.TYPE_REG
+ q1.As = AMOVD
+ q1.From.Type = obj.TYPE_REG
+ q1.From.Reg = REGFP
+ q1.To.Type = obj.TYPE_MEM
q1.To.Reg = REGSP
- q1.Spadj = aoffset
-
- q1 = c.ctxt.EndUnsafePoint(q1, c.newprog, -1)
-
- if buildcfg.GOOS == "ios" {
- // See the above comment.
- // STP (R29, R30), -8(RSP)
- q1 = obj.Appendp(q1, c.newprog)
- q1.As = ASTP
- q1.Pos = p.Pos
- q1.From.Type = obj.TYPE_REGREG
- q1.From.Reg = REGFP
- q1.From.Offset = REGLINK
- q1.To.Type = obj.TYPE_MEM
- q1.To.Offset = int64(-8)
- q1.To.Reg = REGSP
- }
+ q1.To.Offset = -8
}
prologueEnd.Pos = prologueEnd.Pos.WithXlogue(src.PosPrologueEnd)