diff options
author | Cherry Zhang <cherryyz@google.com> | 2021-04-17 22:50:13 -0400 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2021-04-19 16:21:39 +0000 |
commit | 6b8e3e2d060b009fe92b39f7022445d96e225325 (patch) | |
tree | 9b5125fe5edb2613a59a099fc08630a674eb1a16 /src/cmd/compile/internal/ssa | |
parent | b21e739f871d9dc119dccfba82622bfb18227391 (diff) | |
download | go-6b8e3e2d060b009fe92b39f7022445d96e225325.tar.gz go-6b8e3e2d060b009fe92b39f7022445d96e225325.zip |
cmd/compile: reduce redundant register moves for regabi calls
Currently, if we have AX=a and BX=b, and we want to make a call
F(1, a, b), to move arguments into the desired registers it emits
MOVQ AX, CX
MOVL $1, AX // AX=1
MOVQ BX, DX
MOVQ CX, BX // BX=a
MOVQ DX, CX // CX=b
This has a few redundant moves.
This is because we process inputs in order. First, allocate 1 to
AX, which kicks out a (in AX) to CX (a free register at the
moment). Then, allocate a to BX, which kicks out b (in BX) to DX.
Finally, put b to CX.
Notice that if we start with allocating CX=b, then BX=a, AX=1,
we will not have redundant moves. This CL reduces redundant moves
by allocating them in different order: First, for inpouts that are
already in place, keep them there. Then allocate free registers.
Then everything else.
before after
cmd/compile binary size 23703888 23609680
text size 8565899 8533291
(with regabiargs enabled.)
Change-Id: I69e1bdf745f2c90bb791f6d7c45b37384af1e874
Reviewed-on: https://go-review.googlesource.com/c/go/+/311371
Trust: Cherry Zhang <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa')
-rw-r--r-- | src/cmd/compile/internal/ssa/regalloc.go | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 336cd3d737..8ddb3d045b 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -1377,12 +1377,58 @@ func (s *regAllocState) regalloc(f *Func) { } } - // Move arguments to registers. Process in an ordering defined + // Move arguments to registers. + // First, if an arg must be in a specific register and it is already + // in place, keep it. + args = append(args[:0], make([]*Value, len(v.Args))...) + for i, a := range v.Args { + if !s.values[a.ID].needReg { + args[i] = a + } + } + for _, i := range regspec.inputs { + mask := i.regs + if countRegs(mask) == 1 && mask&s.values[v.Args[i.idx].ID].regs != 0 { + args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) + } + } + // Then, if an arg must be in a specific register and that + // register is free, allocate that one. Otherwise when processing + // another input we may kick a value into the free register, which + // then will be kicked out again. + // This is a common case for passing-in-register arguments for + // function calls. + for { + freed := false + for _, i := range regspec.inputs { + if args[i.idx] != nil { + continue // already allocated + } + mask := i.regs + if countRegs(mask) == 1 && mask&^s.used != 0 { + args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) + // If the input is in other registers that will be clobbered by v, + // or the input is dead, free the registers. This may make room + // for other inputs. + oldregs := s.values[v.Args[i.idx].ID].regs + if oldregs&^regspec.clobbers == 0 || !s.liveAfterCurrentInstruction(v.Args[i.idx]) { + s.freeRegs(oldregs &^ mask &^ s.nospill) + freed = true + } + } + } + if !freed { + break + } + } + // Last, allocate remaining ones, in an ordering defined // by the register specification (most constrained first). - args = append(args[:0], v.Args...) for _, i := range regspec.inputs { + if args[i.idx] != nil { + continue // already allocated + } mask := i.regs - if mask&s.values[args[i.idx].ID].regs == 0 { + if mask&s.values[v.Args[i.idx].ID].regs == 0 { // Need a new register for the input. mask &= s.allocatable mask &^= s.nospill @@ -1401,7 +1447,7 @@ func (s *regAllocState) regalloc(f *Func) { mask &^= desired.avoid } } - args[i.idx] = s.allocValToReg(args[i.idx], mask, true, v.Pos) + args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos) } // If the output clobbers the input register, make sure we have |