aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
diff options
context:
space:
mode:
authorerifan01 <eric.fang@arm.com>2019-03-20 12:46:20 +0000
committerCherry Zhang <cherryyz@google.com>2019-04-22 14:40:20 +0000
commitf8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1 (patch)
tree4c6bd812e0cddace3c11512e5b27013f9102b7f7 /src/cmd/compile/internal/ssa/gen/ARM64Ops.go
parentdb42bb3b70fe82e9d33011fb36b4171897663ccf (diff)
downloadgo-f8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1.tar.gz
go-f8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1.zip
cmd/compile: intrinsify math/bits.Sub64 for arm64
This CL instrinsifies Sub64 with arm64 instruction sequence NEGS, SBCS, NGC and NEG, and optimzes the case of borrowing chains. Benchmarks: name old time/op new time/op delta Sub-64 2.500000ns +- 0% 2.048000ns +- 1% -18.08% (p=0.000 n=10+10) Sub32-64 2.500000ns +- 0% 2.500000ns +- 0% ~ (all equal) Sub64-64 2.500000ns +- 0% 2.080000ns +- 0% -16.80% (p=0.000 n=10+7) Sub64multiple-64 7.090000ns +- 0% 2.090000ns +- 0% -70.52% (p=0.000 n=10+10) Change-Id: I3d2664e009a9635e13b55d2c4567c7b34c2c0655 Reviewed-on: https://go-review.googlesource.com/c/go/+/159018 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64Ops.go')
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64Ops.go34
1 files changed, 19 insertions, 15 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index a885a8f467..ece53eb750 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -183,6 +183,8 @@ func init() {
{name: "ADDSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "ADDS", commutative: true}, // arg0+arg1, set flags.
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1
{name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"}, // arg0 - auxInt
+ {name: "SBCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "SBCS"}, // arg0-(arg1+borrowing), set flags.
+ {name: "SUBSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "SUBS"}, // arg0 - arg1, set flags.
{name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true}, // arg0 * arg1
{name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true}, // arg0 * arg1, 32-bit
{name: "MNEG", argLength: 2, reg: gp21, asm: "MNEG", commutative: true}, // -arg0 * arg1
@@ -224,21 +226,23 @@ func init() {
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
// unary ops
- {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
- {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
- {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64
- {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32
- {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
- {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
- {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit
- {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit
- {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit
- {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit
- {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit
- {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero, 64-bit
- {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit
- {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
- {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
+ {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
+ {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
+ {name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags.
+ {name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"}, // -1 if borrowing, 0 otherwise.
+ {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64
+ {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32
+ {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
+ {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
+ {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit
+ {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit
+ {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit
+ {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit
+ {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit
+ {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero, 64-bit
+ {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit
+ {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit
+ {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},