aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen/RISCV64.rules
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2020-10-25 01:34:17 +1100
committerJoel Sing <joel@sing.id.au>2020-10-27 12:36:54 +0000
commit320cc79002b5ce5f8d7f667f0aa78a1fdce59eb4 (patch)
tree39090b91832550a6f15d3f553639d996798b02e2 /src/cmd/compile/internal/ssa/gen/RISCV64.rules
parent333e90448a0e55f2e1161853caecf3d30ef3a74a (diff)
downloadgo-320cc79002b5ce5f8d7f667f0aa78a1fdce59eb4.tar.gz
go-320cc79002b5ce5f8d7f667f0aa78a1fdce59eb4.zip
cmd/compile: eliminate unnecessary sign/zero extension for riscv64
Add additional rules to eliminate unnecessary sign/zero extension for riscv64. Also where possible, replace an extension following a load with a different typed load. This removes almost another 8,000 instructions from the go binary. Of particular note, change Eq16/Eq8/Neq16/Neq8 to zero extend each value before subtraction, rather than zero extending after subtraction. While this appears to double the number of zero extensions, it often lets us completely eliminate them as the load can already be performed in a properly typed manner. As an example, prior to this change runtime.memequal16 was: 0000000000013028 <runtime.memequal16>: 13028: 00813183 ld gp,8(sp) 1302c: 00019183 lh gp,0(gp) 13030: 01013283 ld t0,16(sp) 13034: 00029283 lh t0,0(t0) 13038: 405181b3 sub gp,gp,t0 1303c: 03019193 slli gp,gp,0x30 13040: 0301d193 srli gp,gp,0x30 13044: 0011b193 seqz gp,gp 13048: 00310c23 sb gp,24(sp) 1304c: 00008067 ret Whereas it now becomes: 0000000000012fa8 <runtime.memequal16>: 12fa8: 00813183 ld gp,8(sp) 12fac: 0001d183 lhu gp,0(gp) 12fb0: 01013283 ld t0,16(sp) 12fb4: 0002d283 lhu t0,0(t0) 12fb8: 405181b3 sub gp,gp,t0 12fbc: 0011b193 seqz gp,gp 12fc0: 00310c23 sb gp,24(sp) 12fc4: 00008067 ret Change-Id: I16321feb18381241cab121c0097a126104c56c2c Reviewed-on: https://go-review.googlesource.com/c/go/+/264659 Trust: Joel Sing <joel@sing.id.au> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64.rules')
-rw-r--r--src/cmd/compile/internal/ssa/gen/RISCV64.rules79
1 files changed, 67 insertions, 12 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
index b356247ff6..3bc2e8498a 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
@@ -3,11 +3,7 @@
// license that can be found in the LICENSE file.
// Optimizations TODO:
-// * Somehow track when values are already zero/signed-extended, avoid re-extending.
// * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers
-// * Find a more efficient way to do zero/sign extension than left+right shift.
-// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...),
-// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway.
// * Use the zero register instead of moving 0 into a register.
// * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...).
// * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants.
@@ -66,8 +62,8 @@
(Mod32u ...) => (REMUW ...)
(Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y))
(Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
+(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
(And64 ...) => (AND ...)
(And32 ...) => (AND ...)
@@ -257,16 +253,16 @@
(EqPtr x y) => (SEQZ (SUB <x.Type> x y))
(Eq64 x y) => (SEQZ (SUB <x.Type> x y))
(Eq32 x y) => (SEQZ (SUBW <x.Type> x y))
-(Eq16 x y) => (SEQZ (ZeroExt16to64 (SUB <x.Type> x y)))
-(Eq8 x y) => (SEQZ (ZeroExt8to64 (SUB <x.Type> x y)))
+(Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Eq64F ...) => (FEQD ...)
(Eq32F ...) => (FEQS ...)
(NeqPtr x y) => (SNEZ (SUB <x.Type> x y))
(Neq64 x y) => (SNEZ (SUB <x.Type> x y))
(Neq32 x y) => (SNEZ (SUBW <x.Type> x y))
-(Neq16 x y) => (SNEZ (ZeroExt16to64 (SUB <x.Type> x y)))
-(Neq8 x y) => (SNEZ (ZeroExt8to64 (SUB <x.Type> x y)))
+(Neq16 x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Neq8 x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Neq64F ...) => (FNED ...)
(Neq32F ...) => (FNES ...)
@@ -287,8 +283,8 @@
(Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
// We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis
// knows what variables are being read/written by the ops.
@@ -497,6 +493,65 @@
(MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
+// Avoid sign/zero extension after properly typed load.
+(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
+
+// Fold double extensions.
+(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
+
+// Do not extend before store.
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+
+// Replace extend after load with alternate load where possible.
+(MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem)
+(MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem)
+(MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem)
+(MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem)
+(MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
+(MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)
+
+// If a register move has only 1 use, just use the same register without emitting instruction
+// MOVnop does not emit an instruction, only for ensuring the type.
+(MOVDreg x) && x.Uses == 1 => (MOVDnop x)
+
// Fold constant into immediate instructions where possible.
(ADD (MOVBconst [val]) x) => (ADDI [int64(val)] x)
(ADD (MOVHconst [val]) x) => (ADDI [int64(val)] x)