diff options
author | Geoff Berry <gberry.qdt@qualcommdatacenter.com> | 2018-02-21 16:15:39 -0500 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2018-03-15 14:10:41 +0000 |
commit | e244a7a7d340ed5fd88567ee587b574a7951fac8 (patch) | |
tree | 270bb74bedbe36dc54fa77c6553bc16900181143 /src/cmd/compile/internal/ssa/gen/ARM64Ops.go | |
parent | ded9a1b3723e2c16f2ac0373ef4593f09e65f54d (diff) | |
download | go-e244a7a7d340ed5fd88567ee587b574a7951fac8.tar.gz go-e244a7a7d340ed5fd88567ee587b574a7951fac8.zip |
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX,
UBFIZ and UBFX opcodes.
go1 benchmarks results on Amberwing:
name old time/op new time/op delta
FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10)
Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10)
FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9)
FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10)
FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8)
FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9)
GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10)
JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10)
RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10)
RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10)
RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9)
TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9)
RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10)
GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10)
RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10)
RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9)
BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10)
HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10)
RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10)
FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal)
GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10)
Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9)
Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9)
RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9)
Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10)
FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10)
JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10)
Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10)
TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10)
There are some opportunities to improve this change further by adding
patterns to match the "extended register" versions of ADD/SUB/CMP, but I
think that should be evaluated on its own. The regressions in Template
and TimeFormat would likely be recovered by this, as they seem to be due
to generating:
ubfiz x0, x0, #3, #8
add x1, x2, x0
instead of
add x1, x2, x0, lsl #3
Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b
Reviewed-on: https://go-review.googlesource.com/88355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64Ops.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 34 |
1 files changed, 26 insertions, 8 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 008be3c47e..b311359721 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -139,6 +139,7 @@ func init() { gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} + gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} @@ -231,14 +232,16 @@ func init() { {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2) // shifts - {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64 - {name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt - {name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64 - {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned - {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64 - {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed - {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits - {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits + {name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64 + {name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt + {name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64 + {name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned + {name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64 + {name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed + {name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits + {name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits + {name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt + {name: "EXTRWconst", argLength: 2, reg: gp21, asm: "EXTRW", aux: "Int64"}, // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits // comparisons {name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1 @@ -281,6 +284,21 @@ func init() { {name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift {name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift + // bitfield ops + // for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff + // insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0 + {name: "BFI", argLength: 2, reg: gp21nog, asm: "BFI", aux: "Int64", resultInArg0: true}, + // extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0 + {name: "BFXIL", argLength: 2, reg: gp21nog, asm: "BFXIL", aux: "Int64", resultInArg0: true}, + // insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed + {name: "SBFIZ", argLength: 1, reg: gp11, asm: "SBFIZ", aux: "Int64"}, + // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield + {name: "SBFX", argLength: 1, reg: gp11, asm: "SBFX", aux: "Int64"}, + // insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed + {name: "UBFIZ", argLength: 1, reg: gp11, asm: "UBFIZ", aux: "Int64"}, + // extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed + {name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "Int64"}, + // moves {name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 32 low bits of auxint {name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float |