aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/race_amd64.s
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2016-02-26 21:57:16 +0100
committerDmitry Vyukov <dvyukov@google.com>2016-05-03 11:00:43 +0000
commitcaa21475328999c1cd108b71ceb6efb7f4cf8fc4 (patch)
tree9555dae9965819297a5f490ca45c6c4c8cf2c1e8 /src/runtime/race_amd64.s
parentfcd7c02c70a110c6f6dbac30ad4ac3eb435ac3fd (diff)
downloadgo-caa21475328999c1cd108b71ceb6efb7f4cf8fc4.tar.gz
go-caa21475328999c1cd108b71ceb6efb7f4cf8fc4.zip
runtime: per-P contexts for race detector
Race runtime also needs local malloc caches and currently uses a mix of per-OS-thread and per-goroutine caches. This leads to increased memory consumption. But more importantly cache of synchronization objects is per-goroutine and we don't always have goroutine context when feeing memory in GC. As the result synchronization object descriptors leak (more precisely, they can be reused if another synchronization object is recreated at the same address, but it does not always help). For example, the added BenchmarkSyncLeak has effectively runaway memory consumption (based on a real long running server). This change updates race runtime with support for per-P contexts. BenchmarkSyncLeak now stabilizes at ~1GB memory consumption. Long term, this will allow us to remove race runtime dependency on glibc (as malloc is the main cornerstone). I've also implemented a different scheme to pass P context to race runtime: scheduler notified race runtime about association between G and P by calling procwire(g, p)/procunwire(g, p). But it turned out to be very messy as we have lots of places where the association changes (e.g. syscalls). So I dropped it in favor of the current scheme: race runtime asks scheduler about the current P. Fixes #14533 Change-Id: Iad10d2f816a44affae1b9fed446b3580eafd8c69 Reviewed-on: https://go-review.googlesource.com/19970 Reviewed-by: Ian Lance Taylor <iant@golang.org> Run-TryBot: Dmitry Vyukov <dvyukov@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/runtime/race_amd64.s')
-rw-r--r--src/runtime/race_amd64.s23
1 files changed, 21 insertions, 2 deletions
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
index 55c48fc5f2..94ca76da27 100644
--- a/src/runtime/race_amd64.s
+++ b/src/runtime/race_amd64.s
@@ -384,7 +384,24 @@ call:
// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
// The overall effect of Go->C->Go call chain is similar to that of mcall.
-TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
+// RARG0 contains command code. RARG1 contains command-specific context.
+// See racecallback for command codes.
+TEXT runtime·racecallbackthunk(SB), NOSPLIT, $56-8
+ // Handle command raceGetProcCmd (0) here.
+ // First, code below assumes that we are on curg, while raceGetProcCmd
+ // can be executed on g0. Second, it is called frequently, so will
+ // benefit from this fast path.
+ CMPQ RARG0, $0
+ JNE rest
+ get_tls(RARG0)
+ MOVQ g(RARG0), RARG0
+ MOVQ g_m(RARG0), RARG0
+ MOVQ m_p(RARG0), RARG0
+ MOVQ p_racectx(RARG0), RARG0
+ MOVQ RARG0, (RARG1)
+ RET
+
+rest:
// Save callee-saved registers (Go code won't respect that).
// This is superset of darwin/linux/windows registers.
PUSHQ BX
@@ -401,8 +418,10 @@ TEXT runtime·racesymbolizethunk(SB), NOSPLIT, $56-8
MOVQ g_m(R13), R13
MOVQ m_g0(R13), R14
MOVQ R14, g(R12) // g = m->g0
+ PUSHQ RARG1 // func arg
PUSHQ RARG0 // func arg
- CALL runtime·racesymbolize(SB)
+ CALL runtime·racecallback(SB)
+ POPQ R12
POPQ R12
// All registers are smashed after Go code, reload.
get_tls(R12)