diff options
author | Than McIntosh <thanm@google.com> | 2021-04-06 16:18:50 -0400 |
---|---|---|
committer | Than McIntosh <thanm@google.com> | 2021-04-12 10:30:24 +0000 |
commit | c26f954a540a99eafac6ee3bb3b996c750aad8a4 (patch) | |
tree | 7d1f54018243071e6d165a1d961299b6d67a6cca /src/cmd/compile/internal/amd64 | |
parent | 16cd770e0668a410a511680b2ac1412e554bd27b (diff) | |
download | go-c26f954a540a99eafac6ee3bb3b996c750aad8a4.tar.gz go-c26f954a540a99eafac6ee3bb3b996c750aad8a4.zip |
cmd/compile/internal/amd64: follow-on regabi fix for amd64 zerorange
This patch provides a better long-term fix for the compiler's
zerorange() helper function to make it generate code friendly to the
register ABI.
CL 305829 did part of the work, but didn't properly handle the case
where the compiler emits a REP.STOSQ sequence; this patch changes the
REP code to make sure it doesn't clobber any incoming register
parameter values.
Also included is a test that is specifically written to trigger
the REP emit code in the compiler (prior to this, this code was
not being hit on linux/amd64 all.bash).
Updates #45372.
Updates #40724.
Change-Id: Iaf1c4e709e98eda45cd6f3aeebda0fe9160f1f42
Reviewed-on: https://go-review.googlesource.com/c/go/+/307829
Trust: Than McIntosh <thanm@google.com>
Run-TryBot: Than McIntosh <thanm@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/compile/internal/amd64')
-rw-r--r-- | src/cmd/compile/internal/amd64/ggen.go | 30 |
1 files changed, 22 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/amd64/ggen.go b/src/cmd/compile/internal/amd64/ggen.go index f065bb4dd4..b5847d48b9 100644 --- a/src/cmd/compile/internal/amd64/ggen.go +++ b/src/cmd/compile/internal/amd64/ggen.go @@ -58,7 +58,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. const ( r13 = 1 << iota // if R13 is already zeroed. x15 // if X15 is already zeroed. Note: in new ABI, X15 is always zero. - rax // if RAX is already zeroed. ) if cnt == 0 { @@ -117,18 +116,33 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj. p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) } else { - // Note: here we have to use RAX since it is an implicit input - // for the REPSTOSQ below. This is going to be problematic when - // regabi is in effect; this will be fixed in a forthcoming CL. - if *state&rax == 0 { - p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) - *state |= rax - } + // When the register ABI is in effect, at this point in the + // prolog we may have live values in all of RAX,RDI,RCX. Save + // them off to registers before the REPSTOSQ below, then + // restore. Note that R12 and R13 are always available as + // scratch regs; here we also use R15 (this is safe to do + // since there won't be any globals accessed in the prolog). + // See rewriteToUseGot() in obj6.go for more on r15 use. + + // Save rax/rdi/rcx + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0) + // Set up the REPSTOSQ and kick it off. + p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, x86.REG_AX, 0) p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0) p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0) p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0) + + // Restore rax/rdi/rcx + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0) + p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0) + + // Record the fact that r13 is no longer zero. + *state &= ^uint32(r13) } return p |