aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/amd64
diff options
context:
space:
mode:
authorThan McIntosh <thanm@google.com>2021-04-06 16:18:50 -0400
committerThan McIntosh <thanm@google.com>2021-04-12 10:30:24 +0000
commitc26f954a540a99eafac6ee3bb3b996c750aad8a4 (patch)
tree7d1f54018243071e6d165a1d961299b6d67a6cca /src/cmd/compile/internal/amd64
parent16cd770e0668a410a511680b2ac1412e554bd27b (diff)
downloadgo-c26f954a540a99eafac6ee3bb3b996c750aad8a4.tar.gz
go-c26f954a540a99eafac6ee3bb3b996c750aad8a4.zip
cmd/compile/internal/amd64: follow-on regabi fix for amd64 zerorange
This patch provides a better long-term fix for the compiler's zerorange() helper function to make it generate code friendly to the register ABI. CL 305829 did part of the work, but didn't properly handle the case where the compiler emits a REP.STOSQ sequence; this patch changes the REP code to make sure it doesn't clobber any incoming register parameter values. Also included is a test that is specifically written to trigger the REP emit code in the compiler (prior to this, this code was not being hit on linux/amd64 all.bash). Updates #45372. Updates #40724. Change-Id: Iaf1c4e709e98eda45cd6f3aeebda0fe9160f1f42 Reviewed-on: https://go-review.googlesource.com/c/go/+/307829 Trust: Than McIntosh <thanm@google.com> Run-TryBot: Than McIntosh <thanm@google.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/compile/internal/amd64')
-rw-r--r--src/cmd/compile/internal/amd64/ggen.go30
1 files changed, 22 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go
index f065bb4dd4..b5847d48b9 100644
--- a/src/cmd/compile/internal/amd64/ggen.go
+++ b/src/cmd/compile/internal/amd64/ggen.go
@@ -58,7 +58,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
const (
r13 = 1 << iota // if R13 is already zeroed.
x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero.
- rax // if RAX is already zeroed.
)
if cnt == 0 {
@@ -117,18 +116,33 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.
p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
} else {
- // Note: here we have to use RAX since it is an implicit input
- // for the REPSTOSQ below. This is going to be problematic when
- // regabi is in effect; this will be fixed in a forthcoming CL.
- if *state&rax == 0 {
- p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
- *state |= rax
- }
+ // When the register ABI is in effect, at this point in the
+ // prolog we may have live values in all of RAX,RDI,RCX. Save
+ // them off to registers before the REPSTOSQ below, then
+ // restore. Note that R12 and R13 are always available as
+ // scratch regs; here we also use R15 (this is safe to do
+ // since there won't be any globals accessed in the prolog).
+ // See rewriteToUseGot() in obj6.go for more on r15 use.
+
+ // Save rax/rdi/rcx
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
+ // Set up the REPSTOSQ and kick it off.
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0)
p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
+
+ // Restore rax/rdi/rcx
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
+ p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
+
+ // Record the fact that r13 is no longer zero.
+ *state &= ^uint32(r13)
}
return p