diff options
author | Martin Möhrmann <moehrmann@google.com> | 2018-01-27 11:55:34 +0100 |
---|---|---|
committer | Martin Möhrmann <moehrmann@google.com> | 2018-10-15 19:04:09 +0000 |
commit | a1ca4893ff755d6b0b3bf4b026196d55251ea846 (patch) | |
tree | 72a39af659aa88e0a62a5da6ef6d45a659fcfc51 /src/cmd/compile/internal/x86 | |
parent | 9f66b41beea82cc613cad9138c10a50f2b3ea137 (diff) | |
download | go-a1ca4893ff755d6b0b3bf4b026196d55251ea846.tar.gz go-a1ca4893ff755d6b0b3bf4b026196d55251ea846.zip |
cmd/compile: add intrinsics for runtime/internal/math on 386 and amd64
Add generic, 386 and amd64 specific ops and SSA rules for multiplication
with overflow and branching based on overflow flags. Use these to intrinsify
runtime/internal/math.MulUinptr.
On amd64
mul, overflow := math.MulUintptr(a, b)
if overflow {
is lowered to two instructions:
MULQ SI
JO 0x10ee35c
No codegen tests as codegen can not currently test unexported internal runtime
functions.
amd64:
name old time/op new time/op delta
MulUintptr/small 1.16ns ± 5% 0.88ns ± 6% -24.36% (p=0.000 n=19+20)
MulUintptr/large 10.7ns ± 1% 1.1ns ± 1% -89.28% (p=0.000 n=17+19)
Change-Id: If60739a86f820e5044d677276c21df90d3c7a86a
Reviewed-on: https://go-review.googlesource.com/c/141820
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/x86')
-rw-r--r-- | src/cmd/compile/internal/x86/ssa.go | 13 |
1 files changed, 12 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go index e0aebb449c..8a6f015854 100644 --- a/src/cmd/compile/internal/x86/ssa.go +++ b/src/cmd/compile/internal/x86/ssa.go @@ -278,6 +278,13 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { m.To.Reg = x86.REG_DX } + case ssa.Op386MULLU: + // Arg[0] is already in AX as it's the only register we allow + // results lo in AX + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() + case ssa.Op386MULLQU: // AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]). p := s.Prog(v.Op.Asm()) @@ -770,7 +777,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ssa.Op386SETGF, ssa.Op386SETGEF, ssa.Op386SETB, ssa.Op386SETBE, ssa.Op386SETORD, ssa.Op386SETNAN, - ssa.Op386SETA, ssa.Op386SETAE: + ssa.Op386SETA, ssa.Op386SETAE, + ssa.Op386SETO: p := s.Prog(v.Op.Asm()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() @@ -842,6 +850,8 @@ var blockJump = [...]struct { ssa.Block386GE: {x86.AJGE, x86.AJLT}, ssa.Block386LE: {x86.AJLE, x86.AJGT}, ssa.Block386GT: {x86.AJGT, x86.AJLE}, + ssa.Block386OS: {x86.AJOS, x86.AJOC}, + ssa.Block386OC: {x86.AJOC, x86.AJOS}, ssa.Block386ULT: {x86.AJCS, x86.AJCC}, ssa.Block386UGE: {x86.AJCC, x86.AJCS}, ssa.Block386UGT: {x86.AJHI, x86.AJLS}, @@ -903,6 +913,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) { case ssa.Block386EQ, ssa.Block386NE, ssa.Block386LT, ssa.Block386GE, ssa.Block386LE, ssa.Block386GT, + ssa.Block386OS, ssa.Block386OC, ssa.Block386ULT, ssa.Block386UGT, ssa.Block386ULE, ssa.Block386UGE: jmp := blockJump[b.Kind] |