aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
diff options
context:
space:
mode:
authorGeoff Berry <gberry.qdt@qualcommdatacenter.com>2018-02-21 16:15:39 -0500
committerCherry Zhang <cherryyz@google.com>2018-03-15 14:10:41 +0000
commite244a7a7d340ed5fd88567ee587b574a7951fac8 (patch)
tree270bb74bedbe36dc54fa77c6553bc16900181143 /src/cmd/compile/internal/ssa/gen/ARM64Ops.go
parentded9a1b3723e2c16f2ac0373ef4593f09e65f54d (diff)
downloadgo-e244a7a7d340ed5fd88567ee587b574a7951fac8.tar.gz
go-e244a7a7d340ed5fd88567ee587b574a7951fac8.zip
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX, UBFIZ and UBFX opcodes. go1 benchmarks results on Amberwing: name old time/op new time/op delta FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10) Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10) FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9) FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10) FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8) FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9) GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10) JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10) RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10) RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10) RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9) TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9) RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10) GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10) RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10) RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9) BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10) HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10) RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10) FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal) GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10) Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9) Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9) RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9) Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10) FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10) JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10) Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10) TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10) There are some opportunities to improve this change further by adding patterns to match the "extended register" versions of ADD/SUB/CMP, but I think that should be evaluated on its own. The regressions in Template and TimeFormat would likely be recovered by this, as they seem to be due to generating: ubfiz x0, x0, #3, #8 add x1, x2, x0 instead of add x1, x2, x0, lsl #3 Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b Reviewed-on: https://go-review.googlesource.com/88355 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64Ops.go')
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64Ops.go34
1 files changed, 26 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 008be3c47e..b311359721 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -139,6 +139,7 @@ func init() {
gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
+ gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
@@ -231,14 +232,16 @@ func init() {
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
// shifts
- {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
- {name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
- {name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
- {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
- {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
- {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
- {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
- {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
+ {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
+ {name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
+ {name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
+ {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
+ {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
+ {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
+ {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
+ {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
+ {name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt
+ {name: "EXTRWconst", argLength: 2, reg: gp21, asm: "EXTRW", aux: "Int64"}, // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits
// comparisons
{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
@@ -281,6 +284,21 @@ func init() {
{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
+ // bitfield ops
+ // for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff
+ // insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0
+ {name: "BFI", argLength: 2, reg: gp21nog, asm: "BFI", aux: "Int64", resultInArg0: true},
+ // extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0
+ {name: "BFXIL", argLength: 2, reg: gp21nog, asm: "BFXIL", aux: "Int64", resultInArg0: true},
+ // insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed
+ {name: "SBFIZ", argLength: 1, reg: gp11, asm: "SBFIZ", aux: "Int64"},
+ // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield
+ {name: "SBFX", argLength: 1, reg: gp11, asm: "SBFX", aux: "Int64"},
+ // insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed
+ {name: "UBFIZ", argLength: 1, reg: gp11, asm: "UBFIZ", aux: "Int64"},
+ // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed
+ {name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "Int64"},
+
// moves
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 32 low bits of auxint
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float