diff options
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/ARM64.rules')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/ARM64.rules | 244 |
1 files changed, 239 insertions, 5 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 9290b3c186..9b80094f86 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -19,12 +19,21 @@ (Sub64F x y) -> (FSUBD x y) (Mul64 x y) -> (MUL x y) -(Mul32 x y) -> (MUL x y) -(Mul16 x y) -> (MUL x y) -(Mul8 x y) -> (MUL x y) +(Mul32 x y) -> (MULW x y) +(Mul16 x y) -> (MULW x y) +(Mul8 x y) -> (MULW x y) (Mul32F x y) -> (FMULS x y) (Mul64F x y) -> (FMULD x y) +(Hmul64 x y) -> (MULH x y) +(Hmul64u x y) -> (UMULH x y) +(Hmul32 x y) -> (SRAconst (MULL <config.fe.TypeInt64()> x y) [32]) +(Hmul32u x y) -> (SRAconst (UMULL <config.fe.TypeUInt64()> x y) [32]) +(Hmul16 x y) -> (SRAconst (MULW <config.fe.TypeInt32()> (SignExt16to32 x) (SignExt16to32 y)) [16]) +(Hmul16u x y) -> (SRLconst (MUL <config.fe.TypeUInt32()> (ZeroExt16to32 x) (ZeroExt16to32 y)) [16]) +(Hmul8 x y) -> (SRAconst (MULW <config.fe.TypeInt16()> (SignExt8to32 x) (SignExt8to32 y)) [8]) +(Hmul8u x y) -> (SRLconst (MUL <config.fe.TypeUInt16()> (ZeroExt8to32 x) (ZeroExt8to32 y)) [8]) + (Div64 x y) -> (DIV x y) (Div64u x y) -> (UDIV x y) (Div32 x y) -> (DIVW x y) @@ -45,6 +54,8 @@ (Mod8 x y) -> (MODW (SignExt8to32 x) (SignExt8to32 y)) (Mod8u x y) -> (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Avg64u <t> x y) -> (ADD (ADD <t> (SRLconst <t> x [1]) (SRLconst <t> y [1])) (AND <t> (AND <t> x y) (MOVDconst [1]))) + (And64 x y) -> (AND x y) (And32 x y) -> (AND x y) (And16 x y) -> (AND x y) @@ -73,12 +84,112 @@ (Com16 x) -> (MVN x) (Com8 x) -> (MVN x) +(Sqrt x) -> (FSQRTD x) + // boolean ops -- booleans are represented with 0=false, 1=true (AndB x y) -> (AND x y) (OrB x y) -> (OR x y) -(EqB x y) -> (XORconst [1] (XOR <config.fe.TypeBool()> x y)) +(EqB x y) -> (XOR (MOVDconst [1]) (XOR <config.fe.TypeBool()> x y)) (NeqB x y) -> (XOR x y) -(Not x) -> (XORconst [1] x) +(Not x) -> (XOR (MOVDconst [1]) x) + +// constant shifts +(Lsh64x64 x (MOVDconst [c])) && uint64(c) < 64 -> (SLLconst x [c]) +(Rsh64x64 x (MOVDconst [c])) && uint64(c) < 64 -> (SRAconst x [c]) +(Rsh64Ux64 x (MOVDconst [c])) && uint64(c) < 64 -> (SRLconst x [c]) +(Lsh32x64 x (MOVDconst [c])) && uint64(c) < 32 -> (SLLconst x [c]) +(Rsh32x64 x (MOVDconst [c])) && uint64(c) < 32 -> (SRAconst (SignExt32to64 x) [c]) +(Rsh32Ux64 x (MOVDconst [c])) && uint64(c) < 32 -> (SRLconst (ZeroExt32to64 x) [c]) +(Lsh16x64 x (MOVDconst [c])) && uint64(c) < 16 -> (SLLconst x [c]) +(Rsh16x64 x (MOVDconst [c])) && uint64(c) < 16 -> (SRAconst (SignExt16to64 x) [c]) +(Rsh16Ux64 x (MOVDconst [c])) && uint64(c) < 16 -> (SRLconst (ZeroExt16to64 x) [c]) +(Lsh8x64 x (MOVDconst [c])) && uint64(c) < 8 -> (SLLconst x [c]) +(Rsh8x64 x (MOVDconst [c])) && uint64(c) < 8 -> (SRAconst (SignExt8to64 x) [c]) +(Rsh8Ux64 x (MOVDconst [c])) && uint64(c) < 8 -> (SRLconst (ZeroExt8to64 x) [c]) + +// large constant shifts +(Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 -> (MOVDconst [0]) +(Rsh64Ux64 _ (MOVDconst [c])) && uint64(c) >= 64 -> (MOVDconst [0]) +(Lsh32x64 _ (MOVDconst [c])) && uint64(c) >= 32 -> (MOVDconst [0]) +(Rsh32Ux64 _ (MOVDconst [c])) && uint64(c) >= 32 -> (MOVDconst [0]) +(Lsh16x64 _ (MOVDconst [c])) && uint64(c) >= 16 -> (MOVDconst [0]) +(Rsh16Ux64 _ (MOVDconst [c])) && uint64(c) >= 16 -> (MOVDconst [0]) +(Lsh8x64 _ (MOVDconst [c])) && uint64(c) >= 8 -> (MOVDconst [0]) +(Rsh8Ux64 _ (MOVDconst [c])) && uint64(c) >= 8 -> (MOVDconst [0]) + +// large constant signed right shift, we leave the sign bit +(Rsh64x64 x (MOVDconst [c])) && uint64(c) >= 64 -> (SRAconst x [63]) +(Rsh32x64 x (MOVDconst [c])) && uint64(c) >= 32 -> (SRAconst (SignExt32to64 x) [63]) +(Rsh16x64 x (MOVDconst [c])) && uint64(c) >= 16 -> (SRAconst (SignExt16to64 x) [63]) +(Rsh8x64 x (MOVDconst [c])) && uint64(c) >= 8 -> (SRAconst (SignExt8to64 x) [63]) + +// shifts +// hardware instruction uses only the low 6 bits of the shift +// we compare to 64 to ensure Go semantics for large shifts +(Lsh64x64 <t> x y) -> (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) +(Lsh64x32 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Lsh64x16 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Lsh64x8 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Lsh32x64 <t> x y) -> (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) +(Lsh32x32 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Lsh32x16 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Lsh32x8 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Lsh16x64 <t> x y) -> (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) +(Lsh16x32 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Lsh16x16 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Lsh16x8 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Lsh8x64 <t> x y) -> (CSELULT (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) +(Lsh8x32 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Lsh8x16 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Lsh8x8 <t> x y) -> (CSELULT (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Rsh64Ux64 <t> x y) -> (CSELULT (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y)) +(Rsh64Ux32 <t> x y) -> (CSELULT (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Rsh64Ux16 <t> x y) -> (CSELULT (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Rsh64Ux8 <t> x y) -> (CSELULT (SRL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Rsh32Ux64 <t> x y) -> (CSELULT (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) +(Rsh32Ux32 <t> x y) -> (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Rsh32Ux16 <t> x y) -> (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Rsh32Ux8 <t> x y) -> (CSELULT (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Rsh16Ux64 <t> x y) -> (CSELULT (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) +(Rsh16Ux32 <t> x y) -> (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Rsh16Ux16 <t> x y) -> (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Rsh16Ux8 <t> x y) -> (CSELULT (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Rsh8Ux64 <t> x y) -> (CSELULT (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y)) +(Rsh8Ux32 <t> x y) -> (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y))) +(Rsh8Ux16 <t> x y) -> (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y))) +(Rsh8Ux8 <t> x y) -> (CSELULT (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y))) + +(Rsh64x64 x y) -> (SRA x (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) +(Rsh64x32 x y) -> (SRA x (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y)))) +(Rsh64x16 x y) -> (SRA x (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y)))) +(Rsh64x8 x y) -> (SRA x (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y)))) + +(Rsh32x64 x y) -> (SRA (SignExt32to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) +(Rsh32x32 x y) -> (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y)))) +(Rsh32x16 x y) -> (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y)))) +(Rsh32x8 x y) -> (SRA (SignExt32to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y)))) + +(Rsh16x64 x y) -> (SRA (SignExt16to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) +(Rsh16x32 x y) -> (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y)))) +(Rsh16x16 x y) -> (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y)))) +(Rsh16x8 x y) -> (SRA (SignExt16to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y)))) + +(Rsh8x64 x y) -> (SRA (SignExt8to64 x) (CSELULT <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y))) +(Rsh8x32 x y) -> (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y)))) +(Rsh8x16 x y) -> (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y)))) +(Rsh8x8 x y) -> (SRA (SignExt8to64 x) (CSELULT <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y)))) + +(Lrot64 x [c]) -> (RORconst x [64-c&63]) +(Lrot32 x [c]) -> (RORWconst x [32-c&31]) +(Lrot16 <t> x [c]) -> (OR (SLLconst <t> x [c&15]) (SRLconst <t> (ZeroExt16to64 x) [16-c&15])) +(Lrot8 <t> x [c]) -> (OR (SLLconst <t> x [c&7]) (SRLconst <t> (ZeroExt8to64 x) [8-c&7])) // constants (Const64 [val]) -> (MOVDconst [val]) @@ -224,6 +335,129 @@ (Store [4] ptr val mem) && is32BitFloat(val.Type) -> (FMOVSstore ptr val mem) (Store [8] ptr val mem) && is64BitFloat(val.Type) -> (FMOVDstore ptr val mem) +// zeroing +(Zero [s] _ mem) && SizeAndAlign(s).Size() == 0 -> mem +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore ptr (MOVDconst [0]) mem) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore ptr (MOVDconst [0]) mem) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 2 -> + (MOVBstore [1] ptr (MOVDconst [0]) + (MOVBstore ptr (MOVDconst [0]) mem)) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore ptr (MOVDconst [0]) mem) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [2] ptr (MOVDconst [0]) + (MOVHstore ptr (MOVDconst [0]) mem)) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 4 -> + (MOVBstore [3] ptr (MOVDconst [0]) + (MOVBstore [2] ptr (MOVDconst [0]) + (MOVBstore [1] ptr (MOVDconst [0]) + (MOVBstore ptr (MOVDconst [0]) mem)))) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore ptr (MOVDconst [0]) mem) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore [4] ptr (MOVDconst [0]) + (MOVWstore ptr (MOVDconst [0]) mem)) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [6] ptr (MOVDconst [0]) + (MOVHstore [4] ptr (MOVDconst [0]) + (MOVHstore [2] ptr (MOVDconst [0]) + (MOVHstore ptr (MOVDconst [0]) mem)))) + +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 3 -> + (MOVBstore [2] ptr (MOVDconst [0]) + (MOVBstore [1] ptr (MOVDconst [0]) + (MOVBstore ptr (MOVDconst [0]) mem))) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 6 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [4] ptr (MOVDconst [0]) + (MOVHstore [2] ptr (MOVDconst [0]) + (MOVHstore ptr (MOVDconst [0]) mem))) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 12 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore [8] ptr (MOVDconst [0]) + (MOVWstore [4] ptr (MOVDconst [0]) + (MOVWstore ptr (MOVDconst [0]) mem))) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 16 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore [8] ptr (MOVDconst [0]) + (MOVDstore ptr (MOVDconst [0]) mem)) +(Zero [s] ptr mem) && SizeAndAlign(s).Size() == 24 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore [16] ptr (MOVDconst [0]) + (MOVDstore [8] ptr (MOVDconst [0]) + (MOVDstore ptr (MOVDconst [0]) mem))) + +// medium zeroing uses a duff device +// 4, 8, and 128 are magic constants, see runtime/mkduff.go +(Zero [s] ptr mem) + && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 + && SizeAndAlign(s).Align()%8 == 0 && !config.noDuffDevice -> + (DUFFZERO [4 * (128 - int64(SizeAndAlign(s).Size()/8))] ptr mem) + +// large or unaligned zeroing uses a loop +(Zero [s] ptr mem) + && (SizeAndAlign(s).Size() > 8*128 || config.noDuffDevice) || SizeAndAlign(s).Align()%8 != 0 -> + (LoweredZero [SizeAndAlign(s).Align()] + ptr + (ADDconst <ptr.Type> [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)] ptr) + mem) + +// moves +(Move [s] _ _ mem) && SizeAndAlign(s).Size() == 0 -> mem +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 1 -> (MOVBstore dst (MOVBUload src mem) mem) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore dst (MOVHUload src mem) mem) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 2 -> + (MOVBstore [1] dst (MOVBUload [1] src mem) + (MOVBstore dst (MOVBUload src mem) mem)) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore dst (MOVWUload src mem) mem) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [2] dst (MOVHUload [2] src mem) + (MOVHstore dst (MOVHUload src mem) mem)) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 4 -> + (MOVBstore [3] dst (MOVBUload [3] src mem) + (MOVBstore [2] dst (MOVBUload [2] src mem) + (MOVBstore [1] dst (MOVBUload [1] src mem) + (MOVBstore dst (MOVBUload src mem) mem)))) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore dst (MOVDload src mem) mem) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore [4] dst (MOVWUload [4] src mem) + (MOVWstore dst (MOVWUload src mem) mem)) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 8 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [6] dst (MOVHUload [6] src mem) + (MOVHstore [4] dst (MOVHUload [4] src mem) + (MOVHstore [2] dst (MOVHUload [2] src mem) + (MOVHstore dst (MOVHUload src mem) mem)))) + +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 3 -> + (MOVBstore [2] dst (MOVBUload [2] src mem) + (MOVBstore [1] dst (MOVBUload [1] src mem) + (MOVBstore dst (MOVBUload src mem) mem))) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 6 && SizeAndAlign(s).Align()%2 == 0 -> + (MOVHstore [4] dst (MOVHUload [4] src mem) + (MOVHstore [2] dst (MOVHUload [2] src mem) + (MOVHstore dst (MOVHUload src mem) mem))) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 12 && SizeAndAlign(s).Align()%4 == 0 -> + (MOVWstore [8] dst (MOVWUload [8] src mem) + (MOVWstore [4] dst (MOVWUload [4] src mem) + (MOVWstore dst (MOVWUload src mem) mem))) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 16 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore [8] dst (MOVDload [8] src mem) + (MOVDstore dst (MOVDload src mem) mem)) +(Move [s] dst src mem) && SizeAndAlign(s).Size() == 24 && SizeAndAlign(s).Align()%8 == 0 -> + (MOVDstore [16] dst (MOVDload [16] src mem) + (MOVDstore [8] dst (MOVDload [8] src mem) + (MOVDstore dst (MOVDload src mem) mem))) + +// large or unaligned move uses a loop +// DUFFCOPY is not implemented on ARM64 (TODO) +(Move [s] dst src mem) + && SizeAndAlign(s).Size() > 24 || SizeAndAlign(s).Align()%8 != 0 -> + (LoweredMove [SizeAndAlign(s).Align()] + dst + src + (ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)]) + mem) + // calls (StaticCall [argwid] {target} mem) -> (CALLstatic [argwid] {target} mem) (ClosureCall [argwid] entry closure mem) -> (CALLclosure [argwid] entry closure mem) |