diff options
author | Paul E. Murphy <murp@ibm.com> | 2020-08-17 16:14:48 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-08-18 21:09:30 +0000 |
commit | e7c7ce646f37b260fe5a5635bc52243d28125dd8 (patch) | |
tree | 78b97934e94c41896615e8c74d30ecf4f619a182 /src/cmd/compile/internal/ppc64 | |
parent | 216714e44f703470f102cf248f7e9097160093d4 (diff) | |
download | go-e7c7ce646f37b260fe5a5635bc52243d28125dd8.tar.gz go-e7c7ce646f37b260fe5a5635bc52243d28125dd8.zip |
cmd/compile: combine multiply/add into maddld on ppc64le/power9
Add a new lowering rule to match and replace such instances
with the MADDLD instruction available on power9 where
possible.
Likewise, this plumbs in a new ppc64 ssa opcode to house
the newly generated MADDLD instructions.
When testing ed25519, this reduced binary size by 936B.
Similarly, MADDLD combination occcurs in a few other less
obvious cases such as division by constant.
Testing of golang.org/x/crypto/ed25519 shows non-trivial
speedup during keygeneration:
name old time/op new time/op delta
KeyGeneration 65.2µs ± 0% 63.1µs ± 0% -3.19%
Signing 64.3µs ± 0% 64.4µs ± 0% +0.16%
Verification 147µs ± 0% 147µs ± 0% +0.11%
Similarly, this test binary has shrunk by 66488B.
Change-Id: I077aeda7943119b41f07e4e62e44a648f16e4ad0
Reviewed-on: https://go-review.googlesource.com/c/go/+/248723
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ppc64')
-rw-r--r-- | src/cmd/compile/internal/ppc64/ssa.go | 14 |
1 files changed, 14 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 0efdd710fb..4d2ad48135 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -601,6 +601,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpPPC64MADDLD: + r := v.Reg() + r1 := v.Args[0].Reg() + r2 := v.Args[1].Reg() + r3 := v.Args[2].Reg() + // r = r1*r2 ± r3 + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.Reg = r2 + p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: r3}) + p.To.Type = obj.TYPE_REG + p.To.Reg = r + case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS: r := v.Reg() r1 := v.Args[0].Reg() |