diff options
author | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
commit | a16e30d162c1c7408db7821e7b9513cefa09c6ca (patch) | |
tree | af752ba9ba44c547df39bb0af9bff79f610ba9d5 /src/cmd/compile/internal/ssa/gen/RISCV64.rules | |
parent | 91e4d2d57bc341dd82c98247117114c851380aef (diff) | |
parent | cf6cfba4d5358404dd890f6025e573a4b2156543 (diff) | |
download | go-dev.link.tar.gz go-dev.link.zip |
[dev.link] all: merge branch 'master' into dev.linkdev.link
Clean merge.
Change-Id: Ia7b2808bc649790198d34c226a61d9e569084dc5
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/RISCV64.rules')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/RISCV64.rules | 127 |
1 files changed, 96 insertions, 31 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 9437c8e9d4..325cbeb825 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -3,11 +3,7 @@ // license that can be found in the LICENSE file. // Optimizations TODO: -// * Somehow track when values are already zero/signed-extended, avoid re-extending. // * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers -// * Find a more efficient way to do zero/sign extension than left+right shift. -// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...), -// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway. // * Use the zero register instead of moving 0 into a register. // * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...). // * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants. @@ -66,8 +62,8 @@ (Mod32u ...) => (REMUW ...) (Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y)) (Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y)) -(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y)) +(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y)) (And64 ...) => (AND ...) (And32 ...) => (AND ...) @@ -98,25 +94,21 @@ (Sqrt ...) => (FSQRTD ...) -// Zero and sign extension -// Shift left until the bits we want are at the top of the register. -// Then logical/arithmetic shift right for zero/sign extend. -// We always extend to 64 bits; there's no reason not to, -// and optimization rules can then collapse some extensions. - -(SignExt8to16 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt8to32 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt8to64 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt16to32 <t> x) => (SRAI [48] (SLLI <t> [48] x)) -(SignExt16to64 <t> x) => (SRAI [48] (SLLI <t> [48] x)) -(SignExt32to64 <t> x) => (ADDIW [0] x) - -(ZeroExt8to16 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt8to32 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt8to64 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt16to32 <t> x) => (SRLI [48] (SLLI <t> [48] x)) -(ZeroExt16to64 <t> x) => (SRLI [48] (SLLI <t> [48] x)) -(ZeroExt32to64 <t> x) => (SRLI [32] (SLLI <t> [32] x)) +// Sign and zero extension. + +(SignExt8to16 ...) => (MOVBreg ...) +(SignExt8to32 ...) => (MOVBreg ...) +(SignExt8to64 ...) => (MOVBreg ...) +(SignExt16to32 ...) => (MOVHreg ...) +(SignExt16to64 ...) => (MOVHreg ...) +(SignExt32to64 ...) => (MOVWreg ...) + +(ZeroExt8to16 ...) => (MOVBUreg ...) +(ZeroExt8to32 ...) => (MOVBUreg ...) +(ZeroExt8to64 ...) => (MOVBUreg ...) +(ZeroExt16to32 ...) => (MOVHUreg ...) +(ZeroExt16to64 ...) => (MOVHUreg ...) +(ZeroExt32to64 ...) => (MOVWUreg ...) (Cvt32to32F ...) => (FCVTSW ...) (Cvt32to64F ...) => (FCVTDW ...) @@ -261,16 +253,16 @@ (EqPtr x y) => (SEQZ (SUB <x.Type> x y)) (Eq64 x y) => (SEQZ (SUB <x.Type> x y)) (Eq32 x y) => (SEQZ (SUBW <x.Type> x y)) -(Eq16 x y) => (SEQZ (ZeroExt16to64 (SUB <x.Type> x y))) -(Eq8 x y) => (SEQZ (ZeroExt8to64 (SUB <x.Type> x y))) +(Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y))) +(Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Eq64F ...) => (FEQD ...) (Eq32F ...) => (FEQS ...) (NeqPtr x y) => (SNEZ (SUB <x.Type> x y)) (Neq64 x y) => (SNEZ (SUB <x.Type> x y)) (Neq32 x y) => (SNEZ (SUBW <x.Type> x y)) -(Neq16 x y) => (SNEZ (ZeroExt16to64 (SUB <x.Type> x y))) -(Neq8 x y) => (SNEZ (ZeroExt8to64 (SUB <x.Type> x y))) +(Neq16 x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y))) +(Neq8 x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Neq64F ...) => (FNED ...) (Neq32F ...) => (FNES ...) @@ -291,8 +283,8 @@ (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem) -(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem) -(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) +(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem) +(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) // We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis // knows what variables are being read/written by the ops. @@ -368,6 +360,13 @@ (Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) +// Medium zeroing uses a Duff's device +// 8 and 128 are magic constants, see runtime/mkduff.go +(Zero [s] {t} ptr mem) + && s%8 == 0 && s >= 16 && s <= 8*128 + && t.Alignment()%8 == 0 && !config.noDuffDevice => + (DUFFZERO [8 * (128 - s/8)] ptr mem) + // Generic zeroing uses a loop (Zero [s] {t} ptr mem) => (LoweredZero [t.Alignment()] @@ -403,6 +402,13 @@ (Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) +// Medium move uses a Duff's device +// 16 and 128 are magic constants, see runtime/mkduff.go +(Move [s] {t} dst src mem) + && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 + && !config.noDuffDevice && logLargeCopy(v, s) => + (DUFFCOPY [16 * (128 - s/8)] dst src mem) + // Generic move uses a loop (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) => (LoweredMove [t.Alignment()] @@ -501,6 +507,65 @@ (MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem) +// Avoid sign/zero extension after properly typed load. +(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) + +// Fold double extensions. +(MOVBreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVWreg _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) + +// Do not extend before store. +(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) + +// Replace extend after load with alternate load where possible. +(MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem) +(MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem) +(MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem) +(MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem) +(MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) +(MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) + +// If a register move has only 1 use, just use the same register without emitting instruction +// MOVnop does not emit an instruction, only for ensuring the type. +(MOVDreg x) && x.Uses == 1 => (MOVDnop x) + // Fold constant into immediate instructions where possible. (ADD (MOVBconst [val]) x) => (ADDI [int64(val)] x) (ADD (MOVHconst [val]) x) => (ADDI [int64(val)] x) |