diff options
author | erifan01 <eric.fang@arm.com> | 2019-03-20 12:46:20 +0000 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2019-04-22 14:40:20 +0000 |
commit | f8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1 (patch) | |
tree | 4c6bd812e0cddace3c11512e5b27013f9102b7f7 /src/cmd/compile/internal/ssa/gen/ARM64Ops.go | |
parent | db42bb3b70fe82e9d33011fb36b4171897663ccf (diff) | |
download | go-f8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1.tar.gz go-f8f265b9cfd57970b2bc8b3dd8531cedaf57ccc1.zip |
cmd/compile: intrinsify math/bits.Sub64 for arm64
This CL instrinsifies Sub64 with arm64 instruction sequence NEGS, SBCS,
NGC and NEG, and optimzes the case of borrowing chains.
Benchmarks:
name old time/op new time/op delta
Sub-64 2.500000ns +- 0% 2.048000ns +- 1% -18.08% (p=0.000 n=10+10)
Sub32-64 2.500000ns +- 0% 2.500000ns +- 0% ~ (all equal)
Sub64-64 2.500000ns +- 0% 2.080000ns +- 0% -16.80% (p=0.000 n=10+7)
Sub64multiple-64 7.090000ns +- 0% 2.090000ns +- 0% -70.52% (p=0.000 n=10+10)
Change-Id: I3d2664e009a9635e13b55d2c4567c7b34c2c0655
Reviewed-on: https://go-review.googlesource.com/c/go/+/159018
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64Ops.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 34 |
1 files changed, 19 insertions, 15 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index a885a8f467..ece53eb750 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -183,6 +183,8 @@ func init() { {name: "ADDSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "ADDS", commutative: true}, // arg0+arg1, set flags. {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1 {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"}, // arg0 - auxInt + {name: "SBCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "SBCS"}, // arg0-(arg1+borrowing), set flags. + {name: "SUBSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "SUBS"}, // arg0 - arg1, set flags. {name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true}, // arg0 * arg1 {name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true}, // arg0 * arg1, 32-bit {name: "MNEG", argLength: 2, reg: gp21, asm: "MNEG", commutative: true}, // -arg0 * arg1 @@ -224,21 +226,23 @@ func init() { {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo) // unary ops - {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 - {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 - {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64 - {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32 - {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64 - {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64 - {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit - {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit - {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit - {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit - {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit - {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero, 64-bit - {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit - {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit - {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit. + {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 + {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 + {name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags. + {name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"}, // -1 if borrowing, 0 otherwise. + {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64 + {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32 + {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64 + {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64 + {name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit + {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit + {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit + {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit + {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit + {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero, 64-bit + {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zero, 32-bit + {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"}, // count set bits for each 8-bit unit and store the result in each 8-bit unit + {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit. {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true}, |