aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIan Lance Taylor <iant@golang.org>2017-11-10 09:48:39 -0800
committerAndrew Bonventre <andybons@golang.org>2018-01-22 20:25:11 +0000
commite36f34fa55f1afaa3d5b731dadacb0145f2d928c (patch)
treece2eefd623a140f7fb291af5af08648ac7469f88
parent75c7c40d2999c1328f4cbf033c3f37f5ee8bb11e (diff)
downloadgo-e36f34fa55f1afaa3d5b731dadacb0145f2d928c.tar.gz
go-e36f34fa55f1afaa3d5b731dadacb0145f2d928c.zip
[release-branch.go1.9] runtime: call amd64 VDSO entry points on large stack
NOTE: This elides changes to src/runtime/sys_linux_386.s since that requires another change (golang.org/cl/69390) which we don’t want to backport. If the Linux kernel was built with CONFIG_OPTIMIZE_INLINING=n and was built with hardening options turned on, GCC will insert a stack probe in the VDSO function that requires a full page of stack space. The stack probe can corrupt memory if another thread is using it. Avoid sporadic crashes by calling the VDSO on the g0 or gsignal stack. While we're at it, align the stack as C code expects. We've been getting away with a misaligned stack, but it's possible that the VDSO code will change in the future to break that assumption. Benchmarks show a 11% hit on time.Now, but it's only 6ns. name old time/op new time/op delta AfterFunc-12 1.66ms ± 0% 1.66ms ± 1% ~ (p=0.905 n=9+10) After-12 1.90ms ± 6% 1.86ms ± 0% -2.05% (p=0.012 n=10+8) Stop-12 113µs ± 3% 115µs ± 2% +1.60% (p=0.017 n=9+10) SimultaneousAfterFunc-12 145µs ± 1% 144µs ± 0% -0.68% (p=0.002 n=10+8) StartStop-12 39.5µs ± 3% 40.4µs ± 5% +2.19% (p=0.023 n=10+10) Reset-12 10.2µs ± 0% 10.4µs ± 0% +2.45% (p=0.000 n=10+9) Sleep-12 190µs ± 1% 190µs ± 1% ~ (p=0.971 n=10+10) Ticker-12 4.68ms ± 2% 4.64ms ± 2% -0.83% (p=0.043 n=9+10) Now-12 48.4ns ±11% 54.0ns ±11% +11.42% (p=0.017 n=10+10) NowUnixNano-12 48.5ns ±13% 56.9ns ± 8% +17.30% (p=0.000 n=10+10) Format-12 489ns ±11% 504ns ± 6% ~ (p=0.289 n=10+10) FormatNow-12 436ns ±23% 480ns ±13% +10.25% (p=0.026 n=9+10) MarshalJSON-12 656ns ±14% 587ns ±24% ~ (p=0.063 n=10+10) MarshalText-12 647ns ± 7% 638ns ± 9% ~ (p=0.516 n=10+10) Parse-12 348ns ± 8% 328ns ± 9% -5.66% (p=0.030 n=10+10) ParseDuration-12 136ns ± 9% 140ns ±11% ~ (p=0.425 n=10+10) Hour-12 14.8ns ± 6% 15.6ns ±11% ~ (p=0.085 n=10+10) Second-12 14.0ns ± 6% 14.3ns ±12% ~ (p=0.443 n=10+10) Year-12 32.4ns ±11% 33.4ns ± 6% ~ (p=0.492 n=10+10) Day-12 41.5ns ± 9% 42.3ns ±12% ~ (p=0.239 n=10+10) Fixes #20427 Change-Id: Ia395cbb863215f4499b8e7ef95f4b99f51090911 Reviewed-on: https://go-review.googlesource.com/76990 Reviewed-by: Austin Clements <austin@google.com> Reviewed-on: https://go-review.googlesource.com/88495 Run-TryBot: Andrew Bonventre <andybons@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org>
-rw-r--r--src/runtime/sys_linux_amd64.s57
1 files changed, 49 insertions, 8 deletions
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index e0dc3e1264..79e110f15a 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -139,11 +139,31 @@ TEXT runtime·mincore(SB),NOSPLIT,$0-28
RET
// func walltime() (sec int64, nsec int32)
-TEXT runtime·walltime(SB),NOSPLIT,$16
- // Be careful. We're calling a function with gcc calling convention here.
- // We're guaranteed 128 bytes on entry, and we've taken 16, and the
- // call uses another 8.
- // That leaves 104 for the gettime code to use. Hope that's enough!
+TEXT runtime·walltime(SB),NOSPLIT,$0-12
+ // We don't know how much stack space the VDSO code will need,
+ // so switch to g0.
+ // In particular, a kernel configured with CONFIG_OPTIMIZE_INLINING=n
+ // and hardening can use a full page of stack space in gettime_sym
+ // due to stack probes inserted to avoid stack/heap collisions.
+ // See issue #20427.
+
+ MOVQ SP, BP // Save old SP; BP unchanged by C code.
+
+ get_tls(CX)
+ MOVQ g(CX), AX
+ MOVQ g_m(AX), CX
+ MOVQ m_curg(CX), DX
+
+ CMPQ AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVQ m_g0(CX), DX
+ MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBQ $16, SP // Space for results
+ ANDQ $~15, SP // Align for C code
+
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback
@@ -152,6 +172,7 @@ TEXT runtime·walltime(SB),NOSPLIT,$16
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
+ MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
@@ -163,13 +184,31 @@ fallback:
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
IMULQ $1000, DX
+ MOVQ BP, SP // Restore real SP
MOVQ AX, sec+0(FP)
MOVL DX, nsec+8(FP)
RET
-TEXT runtime·nanotime(SB),NOSPLIT,$16
- // Duplicate time.now here to avoid using up precious stack space.
- // See comment above in time.now.
+TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+ // Switch to g0 stack. See comment above in runtime·walltime.
+
+ MOVQ SP, BP // Save old SP; BX unchanged by C code.
+
+ get_tls(CX)
+ MOVQ g(CX), AX
+ MOVQ g_m(AX), CX
+ MOVQ m_curg(CX), DX
+
+ CMPQ AX, DX // Only switch if on curg.
+ JNE noswitch
+
+ MOVQ m_g0(CX), DX
+ MOVQ (g_sched+gobuf_sp)(DX), SP // Set SP to g0 stack
+
+noswitch:
+ SUBQ $16, SP // Space for results
+ ANDQ $~15, SP // Align for C code
+
MOVQ runtime·__vdso_clock_gettime_sym(SB), AX
CMPQ AX, $0
JEQ fallback
@@ -178,6 +217,7 @@ TEXT runtime·nanotime(SB),NOSPLIT,$16
CALL AX
MOVQ 0(SP), AX // sec
MOVQ 8(SP), DX // nsec
+ MOVQ BP, SP // Restore real SP
// sec is in AX, nsec in DX
// return nsec in AX
IMULQ $1000000000, AX
@@ -191,6 +231,7 @@ fallback:
CALL AX
MOVQ 0(SP), AX // sec
MOVL 8(SP), DX // usec
+ MOVQ BP, SP // Restore real SP
IMULQ $1000, DX
// sec is in AX, nsec in DX
// return nsec in AX