aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen')
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules835
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64.rules162
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64Ops.go4
-rw-r--r--src/cmd/compile/internal/ssa/gen/PPC64.rules40
-rw-r--r--src/cmd/compile/internal/ssa/gen/PPC64Ops.go13
-rw-r--r--src/cmd/compile/internal/ssa/gen/S390X.rules885
-rw-r--r--src/cmd/compile/internal/ssa/gen/dec.rules4
-rw-r--r--src/cmd/compile/internal/ssa/gen/rulegen.go7
8 files changed, 781 insertions, 1169 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 5111ef79d3..8898fe55eb 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -436,69 +436,69 @@
// Absorb InvertFlags
(CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
- -> (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+ => (CMOVQ(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
- -> (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+ => (CMOVL(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
(CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond))
- -> (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
+ => (CMOVW(EQ|NE|GT|LT|GE|LE|CS|HI|LS|CC) x y cond)
// Absorb constants generated during lower
-(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) -> x
-(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) -> y
-(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) -> x
-(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) -> y
-(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) -> x
-(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) -> y
-(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) -> x
-(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) -> y
-(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) -> x
-(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) -> y
+(CMOV(QEQ|QLE|QGE|QCC|QLS|LEQ|LLE|LGE|LCC|LLS|WEQ|WLE|WGE|WCC|WLS) _ x (FlagEQ)) => x
+(CMOV(QNE|QLT|QGT|QCS|QHI|LNE|LLT|LGT|LCS|LHI|WNE|WLT|WGT|WCS|WHI) y _ (FlagEQ)) => y
+(CMOV(QNE|QGT|QGE|QHI|QCC|LNE|LGT|LGE|LHI|LCC|WNE|WGT|WGE|WHI|WCC) _ x (FlagGT_UGT)) => x
+(CMOV(QEQ|QLE|QLT|QLS|QCS|LEQ|LLE|LLT|LLS|LCS|WEQ|WLE|WLT|WLS|WCS) y _ (FlagGT_UGT)) => y
+(CMOV(QNE|QGT|QGE|QLS|QCS|LNE|LGT|LGE|LLS|LCS|WNE|WGT|WGE|WLS|WCS) _ x (FlagGT_ULT)) => x
+(CMOV(QEQ|QLE|QLT|QHI|QCC|LEQ|LLE|LLT|LHI|LCC|WEQ|WLE|WLT|WHI|WCC) y _ (FlagGT_ULT)) => y
+(CMOV(QNE|QLT|QLE|QCS|QLS|LNE|LLT|LLE|LCS|LLS|WNE|WLT|WLE|WCS|WLS) _ x (FlagLT_ULT)) => x
+(CMOV(QEQ|QGT|QGE|QHI|QCC|LEQ|LGT|LGE|LHI|LCC|WEQ|WGT|WGE|WHI|WCC) y _ (FlagLT_ULT)) => y
+(CMOV(QNE|QLT|QLE|QHI|QCC|LNE|LLT|LLE|LHI|LCC|WNE|WLT|WLE|WHI|WCC) _ x (FlagLT_UGT)) => x
+(CMOV(QEQ|QGT|QGE|QCS|QLS|LEQ|LGT|LGE|LCS|LLS|WEQ|WGT|WGE|WCS|WLS) y _ (FlagLT_UGT)) => y
// Miscellaneous
-(IsNonNil p) -> (SETNE (TESTQ p p))
-(IsInBounds idx len) -> (SETB (CMPQ idx len))
-(IsSliceInBounds idx len) -> (SETBE (CMPQ idx len))
-(NilCheck ...) -> (LoweredNilCheck ...)
-(GetG ...) -> (LoweredGetG ...)
-(GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
-(GetCallerPC ...) -> (LoweredGetCallerPC ...)
-(GetCallerSP ...) -> (LoweredGetCallerSP ...)
-
-(HasCPUFeature {s}) -> (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
+(IsNonNil p) => (SETNE (TESTQ p p))
+(IsInBounds idx len) => (SETB (CMPQ idx len))
+(IsSliceInBounds idx len) => (SETBE (CMPQ idx len))
+(NilCheck ...) => (LoweredNilCheck ...)
+(GetG ...) => (LoweredGetG ...)
+(GetClosurePtr ...) => (LoweredGetClosurePtr ...)
+(GetCallerPC ...) => (LoweredGetCallerPC ...)
+(GetCallerSP ...) => (LoweredGetCallerSP ...)
+
+(HasCPUFeature {s}) => (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
(Addr ...) -> (LEAQ ...)
-(LocalAddr {sym} base _) -> (LEAQ {sym} base)
-
-(MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 -> (SETLstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 -> (SETLEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 -> (SETGstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 -> (SETGEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 -> (SETEQstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 -> (SETNEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 -> (SETBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 -> (SETBEstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 -> (SETAstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 -> (SETAEstore [off] {sym} ptr x mem)
+(LocalAddr {sym} base _) => (LEAQ {sym} base)
+
+(MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETLE x) mem) && y.Uses == 1 => (SETLEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETG x) mem) && y.Uses == 1 => (SETGstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETGE x) mem) && y.Uses == 1 => (SETGEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETEQ x) mem) && y.Uses == 1 => (SETEQstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETNE x) mem) && y.Uses == 1 => (SETNEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETB x) mem) && y.Uses == 1 => (SETBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETBE x) mem) && y.Uses == 1 => (SETBEstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETA x) mem) && y.Uses == 1 => (SETAstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr y:(SETAE x) mem) && y.Uses == 1 => (SETAEstore [off] {sym} ptr x mem)
// block rewrites
-(If (SETL cmp) yes no) -> (LT cmp yes no)
-(If (SETLE cmp) yes no) -> (LE cmp yes no)
-(If (SETG cmp) yes no) -> (GT cmp yes no)
-(If (SETGE cmp) yes no) -> (GE cmp yes no)
-(If (SETEQ cmp) yes no) -> (EQ cmp yes no)
-(If (SETNE cmp) yes no) -> (NE cmp yes no)
-(If (SETB cmp) yes no) -> (ULT cmp yes no)
-(If (SETBE cmp) yes no) -> (ULE cmp yes no)
-(If (SETA cmp) yes no) -> (UGT cmp yes no)
-(If (SETAE cmp) yes no) -> (UGE cmp yes no)
-(If (SETO cmp) yes no) -> (OS cmp yes no)
+(If (SETL cmp) yes no) => (LT cmp yes no)
+(If (SETLE cmp) yes no) => (LE cmp yes no)
+(If (SETG cmp) yes no) => (GT cmp yes no)
+(If (SETGE cmp) yes no) => (GE cmp yes no)
+(If (SETEQ cmp) yes no) => (EQ cmp yes no)
+(If (SETNE cmp) yes no) => (NE cmp yes no)
+(If (SETB cmp) yes no) => (ULT cmp yes no)
+(If (SETBE cmp) yes no) => (ULE cmp yes no)
+(If (SETA cmp) yes no) => (UGT cmp yes no)
+(If (SETAE cmp) yes no) => (UGE cmp yes no)
+(If (SETO cmp) yes no) => (OS cmp yes no)
// Special case for floating point - LF/LEF not generated
-(If (SETGF cmp) yes no) -> (UGT cmp yes no)
-(If (SETGEF cmp) yes no) -> (UGE cmp yes no)
-(If (SETEQF cmp) yes no) -> (EQF cmp yes no)
-(If (SETNEF cmp) yes no) -> (NEF cmp yes no)
+(If (SETGF cmp) yes no) => (UGT cmp yes no)
+(If (SETGEF cmp) yes no) => (UGE cmp yes no)
+(If (SETEQF cmp) yes no) => (EQF cmp yes no)
+(If (SETNEF cmp) yes no) => (NEF cmp yes no)
-(If cond yes no) -> (NE (TESTB cond cond) yes no)
+(If cond yes no) => (NE (TESTB cond cond) yes no)
// Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here.
(AtomicLoad8 ...) -> (MOVBatomicload ...)
@@ -508,22 +508,22 @@
// Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load.
// TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those?
-(AtomicStore8 ptr val mem) -> (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
-(AtomicStore32 ptr val mem) -> (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
-(AtomicStore64 ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
-(AtomicStorePtrNoWB ptr val mem) -> (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
+(AtomicStore8 ptr val mem) => (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
+(AtomicStore32 ptr val mem) => (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
+(AtomicStore64 ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
+(AtomicStorePtrNoWB ptr val mem) => (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
// Atomic exchanges.
-(AtomicExchange32 ptr val mem) -> (XCHGL val ptr mem)
-(AtomicExchange64 ptr val mem) -> (XCHGQ val ptr mem)
+(AtomicExchange32 ptr val mem) => (XCHGL val ptr mem)
+(AtomicExchange64 ptr val mem) => (XCHGQ val ptr mem)
// Atomic adds.
-(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (XADDLlock val ptr mem))
-(AtomicAdd64 ptr val mem) -> (AddTupleFirst64 val (XADDQlock val ptr mem))
-(Select0 <t> (AddTupleFirst32 val tuple)) -> (ADDL val (Select0 <t> tuple))
-(Select1 (AddTupleFirst32 _ tuple)) -> (Select1 tuple)
-(Select0 <t> (AddTupleFirst64 val tuple)) -> (ADDQ val (Select0 <t> tuple))
-(Select1 (AddTupleFirst64 _ tuple)) -> (Select1 tuple)
+(AtomicAdd32 ptr val mem) => (AddTupleFirst32 val (XADDLlock val ptr mem))
+(AtomicAdd64 ptr val mem) => (AddTupleFirst64 val (XADDQlock val ptr mem))
+(Select0 <t> (AddTupleFirst32 val tuple)) => (ADDL val (Select0 <t> tuple))
+(Select1 (AddTupleFirst32 _ tuple)) => (Select1 tuple)
+(Select0 <t> (AddTupleFirst64 val tuple)) => (ADDQ val (Select0 <t> tuple))
+(Select1 (AddTupleFirst64 _ tuple)) => (Select1 tuple)
// Atomic compare and swap.
(AtomicCompareAndSwap32 ...) -> (CMPXCHGLlock ...)
@@ -536,9 +536,9 @@
// Write barrier.
(WB ...) -> (LoweredWB ...)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
// ***************************
// Above: lowering rules
@@ -547,23 +547,23 @@
// TODO: Should the optimizations be a separate pass?
// Fold boolean tests into blocks
-(NE (TESTB (SETL cmp) (SETL cmp)) yes no) -> (LT cmp yes no)
-(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) -> (LE cmp yes no)
-(NE (TESTB (SETG cmp) (SETG cmp)) yes no) -> (GT cmp yes no)
-(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) -> (GE cmp yes no)
-(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) -> (EQ cmp yes no)
-(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) -> (NE cmp yes no)
-(NE (TESTB (SETB cmp) (SETB cmp)) yes no) -> (ULT cmp yes no)
-(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) -> (ULE cmp yes no)
-(NE (TESTB (SETA cmp) (SETA cmp)) yes no) -> (UGT cmp yes no)
-(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
-(NE (TESTB (SETO cmp) (SETO cmp)) yes no) -> (OS cmp yes no)
+(NE (TESTB (SETL cmp) (SETL cmp)) yes no) => (LT cmp yes no)
+(NE (TESTB (SETLE cmp) (SETLE cmp)) yes no) => (LE cmp yes no)
+(NE (TESTB (SETG cmp) (SETG cmp)) yes no) => (GT cmp yes no)
+(NE (TESTB (SETGE cmp) (SETGE cmp)) yes no) => (GE cmp yes no)
+(NE (TESTB (SETEQ cmp) (SETEQ cmp)) yes no) => (EQ cmp yes no)
+(NE (TESTB (SETNE cmp) (SETNE cmp)) yes no) => (NE cmp yes no)
+(NE (TESTB (SETB cmp) (SETB cmp)) yes no) => (ULT cmp yes no)
+(NE (TESTB (SETBE cmp) (SETBE cmp)) yes no) => (ULE cmp yes no)
+(NE (TESTB (SETA cmp) (SETA cmp)) yes no) => (UGT cmp yes no)
+(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) => (UGE cmp yes no)
+(NE (TESTB (SETO cmp) (SETO cmp)) yes no) => (OS cmp yes no)
// Unsigned comparisons to 0/1
-(ULT (TEST(Q|L|W|B) x x) yes no) -> (First no yes)
-(UGE (TEST(Q|L|W|B) x x) yes no) -> (First yes no)
-(SETB (TEST(Q|L|W|B) x x)) -> (ConstBool [0])
-(SETAE (TEST(Q|L|W|B) x x)) -> (ConstBool [1])
+(ULT (TEST(Q|L|W|B) x x) yes no) => (First no yes)
+(UGE (TEST(Q|L|W|B) x x) yes no) => (First yes no)
+(SETB (TEST(Q|L|W|B) x x)) => (ConstBool [false])
+(SETAE (TEST(Q|L|W|B) x x)) => (ConstBool [true])
// x & 1 != 0 -> x & 1
(SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x)
@@ -574,75 +574,75 @@
// into tests for carry flags.
// ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis
// mutandis, for UGE and SETAE, and CC and SETCC.
-((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> ((ULT|UGE) (BTL x y))
-((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> ((ULT|UGE) (BTQ x y))
-((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c)
- -> ((ULT|UGE) (BTLconst [log2uint32(c)] x))
-((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c)
- -> ((ULT|UGE) (BTQconst [log2(c)] x))
+((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
+((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
+((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
+ => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
+((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
+ => ((ULT|UGE) (BTQconst [int8(log32(c))] x))
((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
- -> ((ULT|UGE) (BTQconst [log2(c)] x))
-(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) -> (SET(B|AE) (BTL x y))
-(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) -> (SET(B|AE) (BTQ x y))
-(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c)
- -> (SET(B|AE) (BTLconst [log2uint32(c)] x))
-(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(c)
- -> (SET(B|AE) (BTQconst [log2(c)] x))
+ => ((ULT|UGE) (BTQconst [int8(log2(c))] x))
+(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
+(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
+(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
+ => (SET(B|AE) (BTLconst [int8(log32(c))] x))
+(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
+ => (SET(B|AE) (BTQconst [int8(log32(c))] x))
(SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
- -> (SET(B|AE) (BTQconst [log2(c)] x))
+ => (SET(B|AE) (BTQconst [int8(log2(c))] x))
// SET..store variant
(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
- -> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
- -> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(c)
- -> (SET(B|AE)store [off] {sym} ptr (BTLconst [log2uint32(c)] x) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(c)
- -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
+ => (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c)
- -> (SET(B|AE)store [off] {sym} ptr (BTQconst [log2(c)] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log2(c))] x) mem)
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
// and further combining shifts.
-(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x)
-(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x)
-(BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x)
-(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x)
-(BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x)
-(BTLconst [0] s:(SHRL x y)) -> (BTL y x)
+(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
+(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
+(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
+(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
+(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
+(BTLconst [0] s:(SHRL x y)) => (BTL y x)
// Rewrite a & 1 != 1 into a & 1 == 0.
// Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
-(SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) -> (SET(EQ|NE) (CMPLconst [0] s))
-(SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem)
-(SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) -> (SET(EQ|NE) (CMPQconst [0] s))
-(SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) -> (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem)
+(SET(NE|EQ) (CMPLconst [1] s:(ANDLconst [1] _))) => (SET(EQ|NE) (CMPLconst [0] s))
+(SET(NE|EQ)store [off] {sym} ptr (CMPLconst [1] s:(ANDLconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPLconst [0] s) mem)
+(SET(NE|EQ) (CMPQconst [1] s:(ANDQconst [1] _))) => (SET(EQ|NE) (CMPQconst [0] s))
+(SET(NE|EQ)store [off] {sym} ptr (CMPQconst [1] s:(ANDQconst [1] _)) mem) => (SET(EQ|NE)store [off] {sym} ptr (CMPQconst [0] s) mem)
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
-(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTS(Q|L) x y)
-(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) -> (BTC(Q|L) x y)
+(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
+(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
// Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
-((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
- -> (BT(S|C)Qconst [log2(c)] x)
-((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128
- -> (BT(S|C)Lconst [log2uint32(c)] x)
+((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(int64(c)) && uint64(c) >= 128
+ => (BT(S|C)Qconst [int8(log32(c))] x)
+((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+ => (BT(S|C)Lconst [int8(log32(c))] x)
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
- -> (BT(S|C)Qconst [log2(c)] x)
-((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128
- -> (BT(S|C)Lconst [log2uint32(c)] x)
+ => (BT(S|C)Qconst [int8(log2(c))] x)
+((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
+ => (BT(S|C)Lconst [int8(log32(c))] x)
// Recognize bit clearing: a &^= 1<<b
-(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) -> (BTR(Q|L) x y)
-(ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
- -> (BTRQconst [log2(^c)] x)
-(ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128
- -> (BTRLconst [log2uint32(^c)] x)
+(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
+(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+ => (BTRQconst [int8(log32(^c))] x)
+(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+ => (BTRLconst [int8(log32(^c))] x)
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
- -> (BTRQconst [log2(^c)] x)
-(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128
- -> (BTRLconst [log2uint32(^c)] x)
+ => (BTRQconst [int8(log2(^c))] x)
+(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
+ => (BTRLconst [int8(log32(^c))] x)
// Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@@ -656,84 +656,84 @@
// Special case resetting first/last bit
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x))
- -> (BTR(L|Q)const [0] x)
+ => (BTR(L|Q)const [0] x)
(SHRLconst [1] (SHLLconst [1] x))
- -> (BTRLconst [31] x)
+ => (BTRLconst [31] x)
(SHRQconst [1] (SHLQconst [1] x))
- -> (BTRQconst [63] x)
+ => (BTRQconst [63] x)
// Special case testing first/last bit (with double-shift generated by generic.rules)
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
+ => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
+ => ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
(SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x))
+ => ((SETB|SETAE|ULT|UGE) (BTQconst [0] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x))
+ => ((SETB|SETAE|ULT|UGE) (BTLconst [0] x))
(SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [0] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTLconst [0] x) mem)
// Special-case manually testing last bit with "a>>63 != 0" (without "&1")
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
+ => ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2
- -> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
+ => ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
(SET(NE|EQ)store [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [63] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2
- -> (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTLconst [31] x) mem)
// Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
-(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
-(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
-(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
+(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
+(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTS(Q|L)const [c] x)
+(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
+(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) => (BTR(Q|L)const [c] x)
// Fold boolean negation into SETcc.
-(XORLconst [1] (SETNE x)) -> (SETEQ x)
-(XORLconst [1] (SETEQ x)) -> (SETNE x)
-(XORLconst [1] (SETL x)) -> (SETGE x)
-(XORLconst [1] (SETGE x)) -> (SETL x)
-(XORLconst [1] (SETLE x)) -> (SETG x)
-(XORLconst [1] (SETG x)) -> (SETLE x)
-(XORLconst [1] (SETB x)) -> (SETAE x)
-(XORLconst [1] (SETAE x)) -> (SETB x)
-(XORLconst [1] (SETBE x)) -> (SETA x)
-(XORLconst [1] (SETA x)) -> (SETBE x)
+(XORLconst [1] (SETNE x)) => (SETEQ x)
+(XORLconst [1] (SETEQ x)) => (SETNE x)
+(XORLconst [1] (SETL x)) => (SETGE x)
+(XORLconst [1] (SETGE x)) => (SETL x)
+(XORLconst [1] (SETLE x)) => (SETG x)
+(XORLconst [1] (SETG x)) => (SETLE x)
+(XORLconst [1] (SETB x)) => (SETAE x)
+(XORLconst [1] (SETAE x)) => (SETB x)
+(XORLconst [1] (SETBE x)) => (SETA x)
+(XORLconst [1] (SETA x)) => (SETBE x)
// Special case for floating point - LF/LEF not generated
-(NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) -> (UGT cmp yes no)
-(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) -> (UGE cmp yes no)
-(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) -> (EQF cmp yes no)
-(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) -> (NEF cmp yes no)
+(NE (TESTB (SETGF cmp) (SETGF cmp)) yes no) => (UGT cmp yes no)
+(NE (TESTB (SETGEF cmp) (SETGEF cmp)) yes no) => (UGE cmp yes no)
+(NE (TESTB (SETEQF cmp) (SETEQF cmp)) yes no) => (EQF cmp yes no)
+(NE (TESTB (SETNEF cmp) (SETNEF cmp)) yes no) => (NEF cmp yes no)
// Disabled because it interferes with the pattern match above and makes worse code.
-// (SETNEF x) -> (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x))
-// (SETEQF x) -> (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x))
+// (SETNEF x) => (ORQ (SETNE <typ.Int8> x) (SETNAN <typ.Int8> x))
+// (SETEQF x) => (ANDQ (SETEQ <typ.Int8> x) (SETORD <typ.Int8> x))
// fold constants into instructions
-(ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
-(ADDQ x (MOVLconst [c])) && is32Bit(c) -> (ADDQconst [int64(int32(c))] x)
-(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
+(ADDQ x (MOVQconst [c])) && is32Bit(c) => (ADDQconst [int32(c)] x)
+(ADDQ x (MOVLconst [c])) => (ADDQconst [c] x)
+(ADDL x (MOVLconst [c])) => (ADDLconst [c] x)
-(SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
-(SUBQ (MOVQconst [c]) x) && is32Bit(c) -> (NEGQ (SUBQconst <v.Type> x [c]))
-(SUBL x (MOVLconst [c])) -> (SUBLconst x [c])
-(SUBL (MOVLconst [c]) x) -> (NEGL (SUBLconst <v.Type> x [c]))
+(SUBQ x (MOVQconst [c])) && is32Bit(c) => (SUBQconst x [int32(c)])
+(SUBQ (MOVQconst [c]) x) && is32Bit(c) => (NEGQ (SUBQconst <v.Type> x [int32(c)]))
+(SUBL x (MOVLconst [c])) => (SUBLconst x [c])
+(SUBL (MOVLconst [c]) x) => (NEGL (SUBLconst <v.Type> x [c]))
-(MULQ x (MOVQconst [c])) && is32Bit(c) -> (MULQconst [c] x)
-(MULL x (MOVLconst [c])) -> (MULLconst [c] x)
+(MULQ x (MOVQconst [c])) && is32Bit(c) => (MULQconst [int32(c)] x)
+(MULL x (MOVLconst [c])) => (MULLconst [c] x)
-(ANDQ x (MOVQconst [c])) && is32Bit(c) -> (ANDQconst [c] x)
-(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
+(ANDQ x (MOVQconst [c])) && is32Bit(c) => (ANDQconst [int32(c)] x)
+(ANDL x (MOVLconst [c])) => (ANDLconst [c] x)
(AND(L|Q)const [c] (AND(L|Q)const [d] x)) => (AND(L|Q)const [c & d] x)
(XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) => (XOR(L|Q)const [c ^ d] x)
@@ -763,68 +763,70 @@
(ORQconst [c] (BTSQconst [d] x)) && is32Bit(int64(c) | 1<<uint32(d)) => (ORQconst [c | 1<<uint32(d)] x)
(BTSQconst [c] (BTSQconst [d] x)) && is32Bit(1<<uint32(c) | 1<<uint32(d)) => (ORQconst [1<<uint32(c) | 1<<uint32(d)] x)
-(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
-(MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
-(ORQ x (MOVQconst [c])) && is32Bit(c) -> (ORQconst [c] x)
-(ORQ x (MOVLconst [c])) -> (ORQconst [c] x)
-(ORL x (MOVLconst [c])) -> (ORLconst [c] x)
+(MULLconst [c] (MULLconst [d] x)) => (MULLconst [c * d] x)
+(MULQconst [c] (MULQconst [d] x)) && is32Bit(int64(c)*int64(d)) => (MULQconst [c * d] x)
-(XORQ x (MOVQconst [c])) && is32Bit(c) -> (XORQconst [c] x)
-(XORL x (MOVLconst [c])) -> (XORLconst [c] x)
+(ORQ x (MOVQconst [c])) && is32Bit(c) => (ORQconst [int32(c)] x)
+(ORQ x (MOVLconst [c])) => (ORQconst [c] x)
+(ORL x (MOVLconst [c])) => (ORLconst [c] x)
-(SHLQ x (MOV(Q|L)const [c])) -> (SHLQconst [c&63] x)
-(SHLL x (MOV(Q|L)const [c])) -> (SHLLconst [c&31] x)
+(XORQ x (MOVQconst [c])) && is32Bit(c) => (XORQconst [int32(c)] x)
+(XORL x (MOVLconst [c])) => (XORLconst [c] x)
-(SHRQ x (MOV(Q|L)const [c])) -> (SHRQconst [c&63] x)
-(SHRL x (MOV(Q|L)const [c])) -> (SHRLconst [c&31] x)
-(SHRW x (MOV(Q|L)const [c])) && c&31 < 16 -> (SHRWconst [c&31] x)
-(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 -> (MOVLconst [0])
-(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 -> (SHRBconst [c&31] x)
-(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 -> (MOVLconst [0])
+(SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
+(SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
+
+(SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
+(SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
+(SHRW x (MOV(Q|L)const [c])) && c&31 < 16 => (SHRWconst [int8(c&31)] x)
+(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
+(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
+(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
+
+(SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
+(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
+(SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
+(SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
-(SARQ x (MOV(Q|L)const [c])) -> (SARQconst [c&63] x)
-(SARL x (MOV(Q|L)const [c])) -> (SARLconst [c&31] x)
-(SARW x (MOV(Q|L)const [c])) -> (SARWconst [min(c&31,15)] x)
-(SARB x (MOV(Q|L)const [c])) -> (SARBconst [min(c&31,7)] x)
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
-((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
-
-((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
-((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGQ <t> y))
-
-((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 -> ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
-
-((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
-((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 -> ((SHLL|SHRL|SARL) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
+
+((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
+
+((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
+((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
+
+((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
+((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
+((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
// Constant rotate instructions
-((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c -> (ROLQconst x [c])
-((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c -> (ROLLconst x [c])
+((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
+((ADDL|ORL|XORL) (SHLLconst x [c]) (SHRLconst x [d])) && d==32-c => (ROLLconst x [c])
-((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 -> (ROLWconst x [c])
-((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 -> (ROLBconst x [c])
+((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRWconst x [d])) && d==16-c && c < 16 && t.Size() == 2 => (ROLWconst x [c])
+((ADDL|ORL|XORL) <t> (SHLLconst x [c]) (SHRBconst x [d])) && d==8-c && c < 8 && t.Size() == 1 => (ROLBconst x [c])
-(ROLQconst [c] (ROLQconst [d] x)) -> (ROLQconst [(c+d)&63] x)
-(ROLLconst [c] (ROLLconst [d] x)) -> (ROLLconst [(c+d)&31] x)
-(ROLWconst [c] (ROLWconst [d] x)) -> (ROLWconst [(c+d)&15] x)
-(ROLBconst [c] (ROLBconst [d] x)) -> (ROLBconst [(c+d)& 7] x)
+(ROLQconst [c] (ROLQconst [d] x)) => (ROLQconst [(c+d)&63] x)
+(ROLLconst [c] (ROLLconst [d] x)) => (ROLLconst [(c+d)&31] x)
+(ROLWconst [c] (ROLWconst [d] x)) => (ROLWconst [(c+d)&15] x)
+(ROLBconst [c] (ROLBconst [d] x)) => (ROLBconst [(c+d)& 7] x)
-(RotateLeft8 ...) -> (ROLB ...)
-(RotateLeft16 ...) -> (ROLW ...)
-(RotateLeft32 ...) -> (ROLL ...)
-(RotateLeft64 ...) -> (ROLQ ...)
+(RotateLeft8 ...) => (ROLB ...)
+(RotateLeft16 ...) => (ROLW ...)
+(RotateLeft32 ...) => (ROLL ...)
+(RotateLeft64 ...) => (ROLQ ...)
// Non-constant rotates.
// We want to issue a rotate when the Go source contains code like
@@ -837,15 +839,15 @@
// But x >> 64 is 0, not x. So there's an additional mask that is ANDed in
// to force the second term to 0. We don't need that mask, but we must match
// it in order to strip it out.
-(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (ROLQ x y)
-(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) -> (RORQ x y)
+(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
+(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
-(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (ROLL x y)
-(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) -> (RORL x y)
+(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
+(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
// Help with rotate detection
-(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) -> (FlagLT_ULT)
-(CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) -> (FlagLT_ULT)
+(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
+(CMPQconst (NEGQ (ADDQconst [ -8] (ANDQconst [7] _))) [32]) => (FlagLT_ULT)
(ORL (SHLL x (AND(Q|L)const y [15]))
(ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
@@ -855,69 +857,74 @@
(ORL (SHRW x (AND(Q|L)const y [15]))
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
&& v.Type.Size() == 2
- -> (RORW x y)
+ => (RORW x y)
(ORL (SHLL x (AND(Q|L)const y [ 7]))
(ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
(SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
&& v.Type.Size() == 1
- -> (ROLB x y)
+ => (ROLB x y)
(ORL (SHRB x (AND(Q|L)const y [ 7]))
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
&& v.Type.Size() == 1
- -> (RORB x y)
+ => (RORB x y)
// rotate left negative = rotate right
-(ROLQ x (NEG(Q|L) y)) -> (RORQ x y)
-(ROLL x (NEG(Q|L) y)) -> (RORL x y)
-(ROLW x (NEG(Q|L) y)) -> (RORW x y)
-(ROLB x (NEG(Q|L) y)) -> (RORB x y)
+(ROLQ x (NEG(Q|L) y)) => (RORQ x y)
+(ROLL x (NEG(Q|L) y)) => (RORL x y)
+(ROLW x (NEG(Q|L) y)) => (RORW x y)
+(ROLB x (NEG(Q|L) y)) => (RORB x y)
// rotate right negative = rotate left
-(RORQ x (NEG(Q|L) y)) -> (ROLQ x y)
-(RORL x (NEG(Q|L) y)) -> (ROLL x y)
-(RORW x (NEG(Q|L) y)) -> (ROLW x y)
-(RORB x (NEG(Q|L) y)) -> (ROLB x y)
+(RORQ x (NEG(Q|L) y)) => (ROLQ x y)
+(RORL x (NEG(Q|L) y)) => (ROLL x y)
+(RORW x (NEG(Q|L) y)) => (ROLW x y)
+(RORB x (NEG(Q|L) y)) => (ROLB x y)
// rotate by constants
-(ROLQ x (MOV(Q|L)const [c])) -> (ROLQconst [c&63] x)
-(ROLL x (MOV(Q|L)const [c])) -> (ROLLconst [c&31] x)
-(ROLW x (MOV(Q|L)const [c])) -> (ROLWconst [c&15] x)
-(ROLB x (MOV(Q|L)const [c])) -> (ROLBconst [c&7 ] x)
+(ROLQ x (MOV(Q|L)const [c])) => (ROLQconst [int8(c&63)] x)
+(ROLL x (MOV(Q|L)const [c])) => (ROLLconst [int8(c&31)] x)
+(ROLW x (MOV(Q|L)const [c])) => (ROLWconst [int8(c&15)] x)
+(ROLB x (MOV(Q|L)const [c])) => (ROLBconst [int8(c&7) ] x)
-(RORQ x (MOV(Q|L)const [c])) -> (ROLQconst [(-c)&63] x)
-(RORL x (MOV(Q|L)const [c])) -> (ROLLconst [(-c)&31] x)
-(RORW x (MOV(Q|L)const [c])) -> (ROLWconst [(-c)&15] x)
-(RORB x (MOV(Q|L)const [c])) -> (ROLBconst [(-c)&7 ] x)
+(RORQ x (MOV(Q|L)const [c])) => (ROLQconst [int8((-c)&63)] x)
+(RORL x (MOV(Q|L)const [c])) => (ROLLconst [int8((-c)&31)] x)
+(RORW x (MOV(Q|L)const [c])) => (ROLWconst [int8((-c)&15)] x)
+(RORB x (MOV(Q|L)const [c])) => (ROLBconst [int8((-c)&7) ] x)
// Constant shift simplifications
-((SHLQ|SHRQ|SARQ)const x [0]) -> x
-((SHLL|SHRL|SARL)const x [0]) -> x
-((SHRW|SARW)const x [0]) -> x
-((SHRB|SARB)const x [0]) -> x
-((ROLQ|ROLL|ROLW|ROLB)const x [0]) -> x
+((SHLQ|SHRQ|SARQ)const x [0]) => x
+((SHLL|SHRL|SARL)const x [0]) => x
+((SHRW|SARW)const x [0]) => x
+((SHRB|SARB)const x [0]) => x
+((ROLQ|ROLL|ROLW|ROLB)const x [0]) => x
// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
// because the x86 instructions are defined to use all 5 bits of the shift even
// for the small shifts. I don't think we'll ever generate a weird shift (e.g.
// (SHRW x (MOVLconst [24])), but just in case.
-(CMPQ x (MOVQconst [c])) && is32Bit(c) -> (CMPQconst x [c])
-(CMPQ (MOVQconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPQconst x [c]))
-(CMPL x (MOVLconst [c])) -> (CMPLconst x [c])
-(CMPL (MOVLconst [c]) x) -> (InvertFlags (CMPLconst x [c]))
-(CMPW x (MOVLconst [c])) -> (CMPWconst x [int64(int16(c))])
-(CMPW (MOVLconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int16(c))]))
-(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
-(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
+(CMPQ x (MOVQconst [c])) && is32Bit(c) => (CMPQconst x [int32(c)])
+(CMPQ (MOVQconst [c]) x) && is32Bit(c) => (InvertFlags (CMPQconst x [int32(c)]))
+(CMPL x (MOVLconst [c])) => (CMPLconst x [c])
+(CMPL (MOVLconst [c]) x) => (InvertFlags (CMPLconst x [c]))
+(CMPW x (MOVLconst [c])) => (CMPWconst x [int16(c)])
+(CMPW (MOVLconst [c]) x) => (InvertFlags (CMPWconst x [int16(c)]))
+(CMPB x (MOVLconst [c])) => (CMPBconst x [int8(c)])
+(CMPB (MOVLconst [c]) x) => (InvertFlags (CMPBconst x [int8(c)]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
-(CMP(Q|L|W|B) x y) && x.ID > y.ID -> (InvertFlags (CMP(Q|L|W|B) y x))
+(CMP(Q|L|W|B) x y) && x.ID > y.ID => (InvertFlags (CMP(Q|L|W|B) y x))
// Using MOVZX instead of AND is cheaper.
-(AND(Q|L)const [ 0xFF] x) -> (MOVBQZX x)
-(AND(Q|L)const [0xFFFF] x) -> (MOVWQZX x)
-(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
+(AND(Q|L)const [ 0xFF] x) => (MOVBQZX x)
+(AND(Q|L)const [0xFFFF] x) => (MOVWQZX x)
+// This rule is currently invalid because 0xFFFFFFFF is not representable by a signed int32.
+// Commenting out for now, because it also can't trigger because of the is32bit guard on the
+// ANDQconst lowering-rule, above, prevents 0xFFFFFFFF from matching (for the same reason)
+// Using an alternate form of this rule segfaults some binaries because of
+// adverse interactions with other passes.
+// (ANDQconst [0xFFFFFFFF] x) => (MOVLQZX x)
// strength reduction
// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
@@ -928,98 +935,98 @@
// which can require a register-register move
// to preserve the original value,
// so it must be used with care.
-(MUL(Q|L)const [-9] x) -> (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [-5] x) -> (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [-3] x) -> (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [-1] x) -> (NEG(Q|L) x)
-(MUL(Q|L)const [ 0] _) -> (MOV(Q|L)const [0])
-(MUL(Q|L)const [ 1] x) -> x
-(MUL(Q|L)const [ 3] x) -> (LEA(Q|L)2 x x)
-(MUL(Q|L)const [ 5] x) -> (LEA(Q|L)4 x x)
-(MUL(Q|L)const [ 7] x) -> (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [ 9] x) -> (LEA(Q|L)8 x x)
-(MUL(Q|L)const [11] x) -> (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [13] x) -> (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [19] x) -> (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [21] x) -> (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [25] x) -> (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [27] x) -> (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [37] x) -> (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [41] x) -> (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [45] x) -> (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [73] x) -> (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [81] x) -> (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
-
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB(Q|L) (SHL(Q|L)const <v.Type> [log2(c+1)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [log2(c-1)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [log2(c-2)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [log2(c-4)] x) x)
-(MUL(Q|L)const [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [log2(c-8)] x) x)
-(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo(c/3) -> (SHL(Q|L)const [log2(c/3)] (LEA(Q|L)2 <v.Type> x x))
-(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo(c/5) -> (SHL(Q|L)const [log2(c/5)] (LEA(Q|L)4 <v.Type> x x))
-(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo(c/9) -> (SHL(Q|L)const [log2(c/9)] (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-9] x) => (NEG(Q|L) (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [-5] x) => (NEG(Q|L) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [-3] x) => (NEG(Q|L) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [-1] x) => (NEG(Q|L) x)
+(MUL(Q|L)const [ 0] _) => (MOV(Q|L)const [0])
+(MUL(Q|L)const [ 1] x) => x
+(MUL(Q|L)const [ 3] x) => (LEA(Q|L)2 x x)
+(MUL(Q|L)const [ 5] x) => (LEA(Q|L)4 x x)
+(MUL(Q|L)const [ 7] x) => (LEA(Q|L)2 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [ 9] x) => (LEA(Q|L)8 x x)
+(MUL(Q|L)const [11] x) => (LEA(Q|L)2 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [13] x) => (LEA(Q|L)4 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [19] x) => (LEA(Q|L)2 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [21] x) => (LEA(Q|L)4 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [25] x) => (LEA(Q|L)8 x (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [27] x) => (LEA(Q|L)8 (LEA(Q|L)2 <v.Type> x x) (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [37] x) => (LEA(Q|L)4 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [41] x) => (LEA(Q|L)8 x (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [45] x) => (LEA(Q|L)8 (LEA(Q|L)4 <v.Type> x x) (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
+(MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
+
+(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-8) && c >= 136 => (LEA(Q|L)8 (SHL(Q|L)const <v.Type> [int8(log32(c-8))] x) x)
+(MUL(Q|L)const [c] x) && c%3 == 0 && isPowerOfTwo32(c/3) => (SHL(Q|L)const [int8(log32(c/3))] (LEA(Q|L)2 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%5 == 0 && isPowerOfTwo32(c/5) => (SHL(Q|L)const [int8(log32(c/5))] (LEA(Q|L)4 <v.Type> x x))
+(MUL(Q|L)const [c] x) && c%9 == 0 && isPowerOfTwo32(c/9) => (SHL(Q|L)const [int8(log32(c/9))] (LEA(Q|L)8 <v.Type> x x))
// combine add/shift into LEAQ/LEAL
-(ADD(L|Q) x (SHL(L|Q)const [3] y)) -> (LEA(L|Q)8 x y)
-(ADD(L|Q) x (SHL(L|Q)const [2] y)) -> (LEA(L|Q)4 x y)
-(ADD(L|Q) x (SHL(L|Q)const [1] y)) -> (LEA(L|Q)2 x y)
-(ADD(L|Q) x (ADD(L|Q) y y)) -> (LEA(L|Q)2 x y)
-(ADD(L|Q) x (ADD(L|Q) x y)) -> (LEA(L|Q)2 y x)
+(ADD(L|Q) x (SHL(L|Q)const [3] y)) => (LEA(L|Q)8 x y)
+(ADD(L|Q) x (SHL(L|Q)const [2] y)) => (LEA(L|Q)4 x y)
+(ADD(L|Q) x (SHL(L|Q)const [1] y)) => (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) y y)) => (LEA(L|Q)2 x y)
+(ADD(L|Q) x (ADD(L|Q) x y)) => (LEA(L|Q)2 y x)
// combine ADDQ/ADDQconst into LEAQ1/LEAL1
-(ADD(Q|L)const [c] (ADD(Q|L) x y)) -> (LEA(Q|L)1 [c] x y)
-(ADD(Q|L) (ADD(Q|L)const [c] x) y) -> (LEA(Q|L)1 [c] x y)
-(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) -> (LEA(Q|L)1 [c] x x)
+(ADD(Q|L)const [c] (ADD(Q|L) x y)) => (LEA(Q|L)1 [c] x y)
+(ADD(Q|L) (ADD(Q|L)const [c] x) y) => (LEA(Q|L)1 [c] x y)
+(ADD(Q|L)const [c] (SHL(Q|L)const [1] x)) => (LEA(Q|L)1 [c] x x)
// fold ADDQ/ADDL into LEAQ/LEAL
-(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
-(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(c+d) -> (LEA(Q|L) [c+d] {s} x)
-(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
-(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB -> (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L) [d] {s} x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L)const [d] x)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L) [c+d] {s} x)
+(LEA(Q|L) [c] {s} (ADD(Q|L) x y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y)
+(ADD(Q|L) x (LEA(Q|L) [c] {s} y)) && x.Op != OpSB && y.Op != OpSB => (LEA(Q|L)1 [c] {s} x y)
// fold ADDQconst/ADDLconst into LEAQx/LEALx
-(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)1 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)2 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)4 [c+d] {s} x y)
-(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(c+d) -> (LEA(Q|L)8 [c+d] {s} x y)
-(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)1 [c+d] {s} x y)
-(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)2 [c+d] {s} x y)
-(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+2*d) && y.Op != OpSB -> (LEA(Q|L)2 [c+2*d] {s} x y)
-(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)4 [c+d] {s} x y)
-(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+4*d) && y.Op != OpSB -> (LEA(Q|L)4 [c+4*d] {s} x y)
-(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(c+d) && x.Op != OpSB -> (LEA(Q|L)8 [c+d] {s} x y)
-(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(c+8*d) && y.Op != OpSB -> (LEA(Q|L)8 [c+8*d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)1 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)1 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)2 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)2 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)4 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)4 [c+d] {s} x y)
+(ADD(Q|L)const [c] (LEA(Q|L)8 [d] {s} x y)) && is32Bit(int64(c)+int64(d)) => (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)1 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)1 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)2 [c+d] {s} x y)
+(LEA(Q|L)2 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+2*int64(d)) && y.Op != OpSB => (LEA(Q|L)2 [c+2*d] {s} x y)
+(LEA(Q|L)4 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)4 [c+d] {s} x y)
+(LEA(Q|L)4 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+4*int64(d)) && y.Op != OpSB => (LEA(Q|L)4 [c+4*d] {s} x y)
+(LEA(Q|L)8 [c] {s} (ADD(Q|L)const [d] x) y) && is32Bit(int64(c)+int64(d)) && x.Op != OpSB => (LEA(Q|L)8 [c+d] {s} x y)
+(LEA(Q|L)8 [c] {s} x (ADD(Q|L)const [d] y)) && is32Bit(int64(c)+8*int64(d)) && y.Op != OpSB => (LEA(Q|L)8 [c+8*d] {s} x y)
// fold shifts into LEAQx/LEALx
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)2 [c] {s} x y)
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)4 [c] {s} x y)
-(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) -> (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)4 [c] {s} x y)
-(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) -> (LEA(Q|L)8 [c] {s} x y)
-(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) -> (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)2 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)1 [c] {s} x (SHL(Q|L)const [3] y)) => (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)4 [c] {s} x y)
+(LEA(Q|L)2 [c] {s} x (SHL(Q|L)const [2] y)) => (LEA(Q|L)8 [c] {s} x y)
+(LEA(Q|L)4 [c] {s} x (SHL(Q|L)const [1] y)) => (LEA(Q|L)8 [c] {s} x y)
// reverse ordering of compare instruction
-(SETL (InvertFlags x)) -> (SETG x)
-(SETG (InvertFlags x)) -> (SETL x)
-(SETB (InvertFlags x)) -> (SETA x)
-(SETA (InvertFlags x)) -> (SETB x)
-(SETLE (InvertFlags x)) -> (SETGE x)
-(SETGE (InvertFlags x)) -> (SETLE x)
-(SETBE (InvertFlags x)) -> (SETAE x)
-(SETAE (InvertFlags x)) -> (SETBE x)
-(SETEQ (InvertFlags x)) -> (SETEQ x)
-(SETNE (InvertFlags x)) -> (SETNE x)
-
-(SETLstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGstore [off] {sym} ptr x mem)
-(SETGstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLstore [off] {sym} ptr x mem)
-(SETBstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAstore [off] {sym} ptr x mem)
-(SETAstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBstore [off] {sym} ptr x mem)
-(SETLEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETGEstore [off] {sym} ptr x mem)
-(SETGEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETLEstore [off] {sym} ptr x mem)
-(SETBEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETAEstore [off] {sym} ptr x mem)
-(SETAEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETBEstore [off] {sym} ptr x mem)
-(SETEQstore [off] {sym} ptr (InvertFlags x) mem) -> (SETEQstore [off] {sym} ptr x mem)
-(SETNEstore [off] {sym} ptr (InvertFlags x) mem) -> (SETNEstore [off] {sym} ptr x mem)
+(SETL (InvertFlags x)) => (SETG x)
+(SETG (InvertFlags x)) => (SETL x)
+(SETB (InvertFlags x)) => (SETA x)
+(SETA (InvertFlags x)) => (SETB x)
+(SETLE (InvertFlags x)) => (SETGE x)
+(SETGE (InvertFlags x)) => (SETLE x)
+(SETBE (InvertFlags x)) => (SETAE x)
+(SETAE (InvertFlags x)) => (SETBE x)
+(SETEQ (InvertFlags x)) => (SETEQ x)
+(SETNE (InvertFlags x)) => (SETNE x)
+
+(SETLstore [off] {sym} ptr (InvertFlags x) mem) => (SETGstore [off] {sym} ptr x mem)
+(SETGstore [off] {sym} ptr (InvertFlags x) mem) => (SETLstore [off] {sym} ptr x mem)
+(SETBstore [off] {sym} ptr (InvertFlags x) mem) => (SETAstore [off] {sym} ptr x mem)
+(SETAstore [off] {sym} ptr (InvertFlags x) mem) => (SETBstore [off] {sym} ptr x mem)
+(SETLEstore [off] {sym} ptr (InvertFlags x) mem) => (SETGEstore [off] {sym} ptr x mem)
+(SETGEstore [off] {sym} ptr (InvertFlags x) mem) => (SETLEstore [off] {sym} ptr x mem)
+(SETBEstore [off] {sym} ptr (InvertFlags x) mem) => (SETAEstore [off] {sym} ptr x mem)
+(SETAEstore [off] {sym} ptr (InvertFlags x) mem) => (SETBEstore [off] {sym} ptr x mem)
+(SETEQstore [off] {sym} ptr (InvertFlags x) mem) => (SETEQstore [off] {sym} ptr x mem)
+(SETNEstore [off] {sym} ptr (InvertFlags x) mem) => (SETNEstore [off] {sym} ptr x mem)
// sign extended loads
// Note: The combined instruction must end up in the same block
@@ -1029,100 +1036,100 @@
// Make sure we don't combine these ops if the load has another use.
// This prevents a single load from being split into multiple loads
// which then might return different values. See test/atomicload.go.
-(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
-(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-(MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
-(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-(MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
-
-(MOVLQZX x) && zeroUpper32Bits(x,3) -> x
-(MOVWQZX x) && zeroUpper48Bits(x,3) -> x
-(MOVBQZX x) && zeroUpper56Bits(x,3) -> x
+(MOVBQSX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBQSXload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVBQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWQSXload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVWQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <v.Type> [off] {sym} ptr mem)
+(MOVLQSX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+(MOVLQSX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLQSXload <v.Type> [off] {sym} ptr mem)
+(MOVLQZX x:(MOVLload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+(MOVLQZX x:(MOVQload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVLload <v.Type> [off] {sym} ptr mem)
+
+(MOVLQZX x) && zeroUpper32Bits(x,3) => x
+(MOVWQZX x) && zeroUpper48Bits(x,3) => x
+(MOVBQZX x) && zeroUpper56Bits(x,3) => x
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
-(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x)
-(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x)
-(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQZX x)
-(MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-(MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQSX x)
-(MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQSX x)
-(MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVLQSX x)
+(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQZX x)
+(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQZX x)
+(MOVLload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQZX x)
+(MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => x
+(MOVBQSXload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBQSX x)
+(MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVWQSX x)
+(MOVLQSXload [off] {sym} ptr (MOVLstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVLQSX x)
// Fold extensions and ANDs together.
-(MOVBQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xff] x)
-(MOVWQZX (ANDLconst [c] x)) -> (ANDLconst [c & 0xffff] x)
-(MOVLQZX (ANDLconst [c] x)) -> (ANDLconst [c] x)
-(MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 -> (ANDLconst [c & 0x7f] x)
-(MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 -> (ANDLconst [c & 0x7fff] x)
-(MOVLQSX (ANDLconst [c] x)) && c & 0x80000000 == 0 -> (ANDLconst [c & 0x7fffffff] x)
+(MOVBQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xff] x)
+(MOVWQZX (ANDLconst [c] x)) => (ANDLconst [c & 0xffff] x)
+(MOVLQZX (ANDLconst [c] x)) => (ANDLconst [c] x)
+(MOVBQSX (ANDLconst [c] x)) && c & 0x80 == 0 => (ANDLconst [c & 0x7f] x)
+(MOVWQSX (ANDLconst [c] x)) && c & 0x8000 == 0 => (ANDLconst [c & 0x7fff] x)
+(MOVLQSX (ANDLconst [c] x)) && uint32(c) & 0x80000000 == 0 => (ANDLconst [c & 0x7fffffff] x)
// Don't extend before storing
-(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWQSX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVBQSX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
-(MOVLstore [off] {sym} ptr (MOVLQZX x) mem) -> (MOVLstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWQZX x) mem) -> (MOVWstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVBQZX x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVLstore [off] {sym} ptr (MOVLQSX x) mem) => (MOVLstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWQSX x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBQSX x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVLstore [off] {sym} ptr (MOVLQZX x) mem) => (MOVLstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWQZX x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBQZX x) mem) => (MOVBstore [off] {sym} ptr x mem)
// fold constants into memory operations
// Note that this is not always a good idea because if not all the uses of
// the ADDQconst get eliminated, we still have to compute the ADDQconst and we now
// have potentially two live values (ptr and (ADDQconst [off] ptr)) instead of one.
// Nevertheless, let's do it!
-(MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
+(MOV(Q|L|W|B|SS|SD|O)load [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
(MOV(Q|L|W|B|SS|SD|O)load [off1+off2] {sym} ptr mem)
-(MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(off1+off2) ->
+(MOV(Q|L|W|B|SS|SD|O)store [off1] {sym} (ADDQconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) =>
(MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {sym} ptr val mem)
-(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR)Qload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {sym} val base mem)
-((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR)Lload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
-(CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+(CMP(Q|L|W|B)load [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
(CMP(Q|L|W|B)load [off1+off2] {sym} base val mem)
-(CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
- (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {sym} base mem)
+(CMP(Q|L|W|B)constload [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+ (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
-((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
-((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
+((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
-((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
- ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
- ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores.
-(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
- (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
- (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
- (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) && validOff(off) ->
- (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) =>
+ (MOVQstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVLstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+ (MOVLstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+ (MOVWstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
+(MOVBstore [off] {sym} ptr (MOV(L|Q)const [c]) mem) =>
+ (MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
// Fold address offsets into constant stores.
-(MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) ->
- (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {s} ptr mem)
+(MOV(Q|L|W|B)storeconst [sc] {s} (ADDQconst [off] ptr) mem) && ValAndOff(sc).canAdd32(off) =>
+ (MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {s} ptr mem)
// We need to fold LEAQ into the MOVx ops so that the live variable analysis knows
// what variables are being read/written by the ops.
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c29e7f7edf..311067e87a 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -132,65 +132,65 @@
// we compare to 64 to ensure Go semantics for large shifts
// Rules about rotates with non-const shift are based on the following rules,
// if the following rules change, please also modify the rules based on them.
-(Lsh64x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh64x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh64x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh64x8 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh32x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh32x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh32x8 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh16x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh16x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh16x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh16x8 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh8x64 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh8x32 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh8x16 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh8x8 <t> x y) => (CSEL {OpARM64LessThanU} (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh64Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh64Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh64Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh64Ux8 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh32Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh32Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh32Ux8 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh16Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh16Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh16Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh16Ux8 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh8Ux64 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh8Ux32 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh8Ux16 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh8Ux8 <t> x y) => (CSEL {OpARM64LessThanU} (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh64x64 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh64x32 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh64x16 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh64x8 x y) => (SRA x (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh32x8 x y) => (SRA (SignExt32to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh16x8 x y) => (SRA (SignExt16to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh8x8 x y) => (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+(Lsh64x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh64x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh64x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh64x8 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Lsh32x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh32x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh32x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh32x8 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Lsh16x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh16x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh16x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh16x8 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Lsh8x64 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh8x32 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Lsh8x16 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Lsh8x8 <t> x y) => (CSEL [OpARM64LessThanU] (SLL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Rsh64Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh64Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh64Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh64Ux8 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> x (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Rsh32Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh32Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh32Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh32Ux8 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Rsh16Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh16Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh16Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh16Ux8 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Rsh8Ux64 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh8Ux32 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt32to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
+(Rsh8Ux16 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt16to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
+(Rsh8Ux8 <t> x y) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) (ZeroExt8to64 y)) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
+
+(Rsh64x64 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh64x32 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh64x16 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh64x8 x y) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+
+(Rsh32x64 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh32x32 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh32x16 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh32x8 x y) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+
+(Rsh16x64 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh16x32 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh16x16 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh16x8 x y) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+
+(Rsh8x64 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh8x32 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt32to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
+(Rsh8x16 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt16to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
+(Rsh8x8 x y) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> (ZeroExt8to64 y) (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
// constants
(Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
@@ -315,8 +315,8 @@
(FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x))
// CSEL needs a flag-generating argument. Synthesize a CMPW if necessary.
-(CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL {boolval.Op} x y flagArg(boolval))
-(CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL {OpARM64NotEqual} x y (CMPWconst [0] boolval))
+(CondSelect x y boolval) && flagArg(boolval) != nil => (CSEL [boolval.Op] x y flagArg(boolval))
+(CondSelect x y boolval) && flagArg(boolval) == nil => (CSEL [OpARM64NotEqual] x y (CMPWconst [0] boolval))
(OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr)
(OffPtr [off] ptr) => (ADDconst [off] ptr)
@@ -1324,8 +1324,8 @@
(XOR x (MVN y)) -> (EON x y)
(OR x (MVN y)) -> (ORN x y)
(MVN (XOR x y)) -> (EON x y)
-(CSEL {cc} x (MOVDconst [0]) flag) -> (CSEL0 {cc} x flag)
-(CSEL {cc} (MOVDconst [0]) y flag) -> (CSEL0 {arm64Negate(cc.(Op))} y flag)
+(CSEL [cc] x (MOVDconst [0]) flag) => (CSEL0 [cc] x flag)
+(CSEL [cc] (MOVDconst [0]) y flag) => (CSEL0 [arm64Negate(cc)] y flag)
(SUB x (SUB y z)) -> (SUB (ADD <v.Type> x z) y)
(SUB (SUB x y) z) -> (SUB x (ADD <y.Type> y z))
@@ -1481,8 +1481,8 @@
(GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
// absorb InvertFlags into CSEL(0)
-(CSEL {cc} x y (InvertFlags cmp)) => (CSEL {arm64Invert(cc)} x y cmp)
-(CSEL0 {cc} x (InvertFlags cmp)) => (CSEL0 {arm64Invert(cc)} x cmp)
+(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
+(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
// absorb flag constants into boolean values
(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
@@ -1517,20 +1517,20 @@
(MOVBUreg x) && x.Type.IsBoolean() => (MOVDreg x)
// absorb flag constants into conditional instructions
-(CSEL {cc} x _ flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL {cc} _ y flag) && ccARM64Eval(cc, flag) < 0 => y
-(CSEL0 {cc} x flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL0 {cc} _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
+(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
+(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
// absorb flags back into boolean CSEL
-(CSEL {cc} x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
- (CSEL {boolval.Op} x y flagArg(boolval))
-(CSEL {cc} x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
- (CSEL {arm64Negate(boolval.Op)} x y flagArg(boolval))
-(CSEL0 {cc} x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
- (CSEL0 {boolval.Op} x flagArg(boolval))
-(CSEL0 {cc} x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
- (CSEL0 {arm64Negate(boolval.Op)} x flagArg(boolval))
+(CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
+ (CSEL [boolval.Op] x y flagArg(boolval))
+(CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
+ (CSEL [arm64Negate(boolval.Op)] x y flagArg(boolval))
+(CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
+ (CSEL0 [boolval.Op] x flagArg(boolval))
+(CSEL0 [cc] x (CMPWconst [0] boolval)) && cc == OpARM64Equal && flagArg(boolval) != nil =>
+ (CSEL0 [arm64Negate(boolval.Op)] x flagArg(boolval))
// absorb shifts into ops
(NEG x:(SLLconst [c] y)) && clobberIfDead(x) => (NEGshiftLL [c] y)
@@ -1691,11 +1691,11 @@
// "|" can also be "^" or "+".
// As arm64 does not have a ROL instruction, so ROL(x, y) is replaced by ROR(x, -y).
((ADD|OR|XOR) (SLL x (ANDconst <t> [63] y))
- (CSEL0 <typ.UInt64> {cc} (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+ (CSEL0 <typ.UInt64> [cc] (SRL <typ.UInt64> x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
=> (ROR x (NEG <t> y))
((ADD|OR|XOR) (SRL <typ.UInt64> x (ANDconst <t> [63] y))
- (CSEL0 <typ.UInt64> {cc} (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
+ (CSEL0 <typ.UInt64> [cc] (SLL x (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y)))
(CMPconst [64] (SUB <t> (MOVDconst [64]) (ANDconst <t> [63] y))))) && cc == OpARM64LessThanU
=> (ROR x y)
@@ -1705,11 +1705,11 @@
// "|" can also be "^" or "+".
// As arm64 does not have a ROLW instruction, so ROLW(x, y) is replaced by RORW(x, -y).
((ADD|OR|XOR) (SLL x (ANDconst <t> [31] y))
- (CSEL0 <typ.UInt32> {cc} (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+ (CSEL0 <typ.UInt32> [cc] (SRL <typ.UInt32> (MOVWUreg x) (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
=> (RORW x (NEG <t> y))
((ADD|OR|XOR) (SRL <typ.UInt32> (MOVWUreg x) (ANDconst <t> [31] y))
- (CSEL0 <typ.UInt32> {cc} (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
+ (CSEL0 <typ.UInt32> [cc] (SLL x (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y)))
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
=> (RORW x y)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 2424e67e20..e9af261a6a 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -467,8 +467,8 @@ func init() {
// conditional instructions; auxint is
// one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.)
- {name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"}, // aux(flags) ? arg0 : arg1
- {name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // aux(flags) ? arg0 : 0
+ {name: "CSEL", argLength: 3, reg: gp2flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : arg1
+ {name: "CSEL0", argLength: 2, reg: gp1flags1, asm: "CSEL", aux: "CCop"}, // auxint(flags) ? arg0 : 0
// function calls
{name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "SymOff", clobberFlags: true, call: true, symEffect: "None"}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 14942d50f9..e5fb1e98c2 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -110,13 +110,21 @@
// Rotate generation with non-const shift
// these match patterns from math/bits/RotateLeft[32|64], but there could be others
(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+
+(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+
// Lowering rotates
(RotateLeft32 x y) => (ROTLW x y)
(RotateLeft64 x y) => (ROTL x y)
@@ -192,11 +200,15 @@
(Rsh64Ux64 x (AND y (MOVDconst [63]))) => (SRD x (ANDconst <typ.Int64> [63] y))
(Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) => (SRD x (ANDconst <typ.UInt> [63] y))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
(Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (AND y (MOVDconst [63]))) => (SRAD x (ANDconst <typ.Int64> [63] y))
(Rsh64x64 x (ANDconst <typ.UInt> [63] y)) => (SRAD x (ANDconst <typ.UInt> [63] y))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
(Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
(Lsh64x64 x y) => (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
(Rsh64x64 x y) => (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
@@ -208,12 +220,16 @@
(Rsh32Ux64 x (AND y (MOVDconst [31]))) => (SRW x (ANDconst <typ.Int32> [31] y))
(Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) => (SRW x (ANDconst <typ.UInt> [31] y))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
(Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (AND y (MOVDconst [31]))) => (SRAW x (ANDconst <typ.Int32> [31] y))
(Rsh32x64 x (ANDconst <typ.UInt> [31] y)) => (SRAW x (ANDconst <typ.UInt> [31] y))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
(Rsh32x64 x y) => (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
(Rsh32Ux64 x y) => (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
@@ -276,18 +292,11 @@
(Rsh8Ux8 x y) => (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
(Lsh8x8 x y) => (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
-// Cleaning up shift ops when input is masked
-(MaskIfNotCarry (ADDconstForCarry [c] (ANDconst [d] _))) && c < 0 && d > 0 && int64(c) + d < 0 => (MOVDconst [-1])
+// Cleaning up shift ops
(ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPU (ANDconst [d] y) (MOVDconst [c]))) && c >= d => (ANDconst [d] y)
(ISEL [0] (ANDconst [d] y) (MOVDconst [-1]) (CMPUconst [c] (ANDconst [d] y))) && c >= d => (ANDconst [d] y)
(ORN x (MOVDconst [-1])) => x
-(ADDconstForCarry [c] (MOVDconst [d])) && c < 0 && (c < 0 || int64(c) + d >= 0) => (FlagCarryClear)
-(ADDconstForCarry [c] (MOVDconst [d])) && c < 0 && c >= 0 && int64(c) + d < 0 => (FlagCarrySet)
-
-(MaskIfNotCarry (FlagCarrySet)) => (MOVDconst [0])
-(MaskIfNotCarry (FlagCarryClear)) => (MOVDconst [-1])
-
(S(RAD|RD|LD) x (MOVDconst [c])) => (S(RAD|RD|LD)const [c&63 | (c>>6&1*63)] x)
(S(RAW|RW|LW) x (MOVDconst [c])) => (S(RAW|RW|LW)const [c&31 | (c>>5&1*31)] x)
@@ -306,8 +315,8 @@
(Ctz16 x) => (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
(Ctz8 x) => (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
-(BitLen64 x) => (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
-(BitLen32 x) => (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
+(BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x))
+(BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x))
(PopCount64 ...) => (POPCNTD ...)
(PopCount32 x) => (POPCNTW (MOVWZreg x))
@@ -777,10 +786,19 @@
(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x)
(ADDconst [0] x) => x
(SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x)
-// TODO deal with subtract-from-const
(ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
+// Subtract from (with carry, but ignored) constant.
+// Note, these clobber the carry bit.
+(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
+(SUBFCconst [c] (NEG x)) => (ADDconst [c] x)
+(SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x)
+(SUBFCconst [0] x) => (NEG x)
+(ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x)
+(NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x)
+(NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x)
+
// Use register moves instead of stores and loads to move int<=>float values
// Common with math Float64bits, Float64frombits
(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index 825d0faf34..44f6a74c63 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -175,6 +175,7 @@ func init() {
{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1
+ {name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (with carry)
{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1
{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1
@@ -206,9 +207,7 @@ func init() {
{name: "ROTL", argLength: 2, reg: gp21, asm: "ROTL"}, // arg0 rotate left by arg1 mod 64
{name: "ROTLW", argLength: 2, reg: gp21, asm: "ROTLW"}, // uint32(arg0) rotate left by arg1 mod 32
- {name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry)
- {name: "ADDconstForCarry", argLength: 1, reg: regInfo{inputs: []regMask{gp | sp | sb}, clobbers: tmp}, aux: "Int16", asm: "ADDC", typ: "Flags"}, // _, carry := arg0 + auxint
- {name: "MaskIfNotCarry", argLength: 1, reg: crgp, asm: "ADDME", typ: "Int64"}, // carry - 1 (if carry then 0 else -1)
+ {name: "LoweredAdd64Carry", argLength: 3, reg: gp32, resultNotInArgs: true}, // arg0 + arg1 + carry, returns (sum, carry)
{name: "SRADconst", argLength: 1, reg: gp11, asm: "SRAD", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 64, 64 bit width
{name: "SRAWconst", argLength: 1, reg: gp11, asm: "SRAW", aux: "Int64"}, // signed arg0 >> auxInt, 0 <= auxInt < 32, 32 bit width
@@ -674,11 +673,9 @@ func init() {
// These ops are for temporary use by rewrite rules. They
// cannot appear in the generated assembly.
- {name: "FlagEQ"}, // equal
- {name: "FlagLT"}, // signed < or unsigned <
- {name: "FlagGT"}, // signed > or unsigned >
- {name: "FlagCarrySet"}, // carry flag set
- {name: "FlagCarryClear"}, // carry flag clear
+ {name: "FlagEQ"}, // equal
+ {name: "FlagLT"}, // signed < or unsigned <
+ {name: "FlagGT"}, // signed > or unsigned >
}
blocks := []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 5e4c436ca1..e564f638d3 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -498,27 +498,19 @@
// Remove zero extensions after zero extending load.
// Note: take care that if x is spilled it is restored correctly.
(MOV(B|H|W)Zreg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
-(MOV(B|H|W)Zreg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
(MOV(H|W)Zreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
-(MOV(H|W)Zreg x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
(MOVWZreg x:(MOVWZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) => x
-(MOVWZreg x:(MOVWZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 4) => x
// Remove sign extensions after sign extending load.
// Note: take care that if x is spilled it is restored correctly.
(MOV(B|H|W)reg x:(MOVBload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOV(B|H|W)reg x:(MOVBloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
(MOV(H|W)reg x:(MOVHload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOV(H|W)reg x:(MOVHloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
(MOVWreg x:(MOVWload _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
-(MOVWreg x:(MOVWloadidx _ _ _)) && (x.Type.IsSigned() || x.Type.Size() == 8) => x
// Remove sign extensions after zero extending load.
// These type checks are probably unnecessary but do them anyway just in case.
(MOV(H|W)reg x:(MOVBZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
-(MOV(H|W)reg x:(MOVBZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 1) => x
(MOVWreg x:(MOVHZload _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
-(MOVWreg x:(MOVHZloadidx _ _ _)) && (!x.Type.IsSigned() || x.Type.Size() > 2) => x
// Fold sign and zero extensions into loads.
//
@@ -538,14 +530,6 @@
&& x.Uses == 1
&& clobber(x)
=> @x.Block (MOV(B|H|W)load <t> [o] {s} p mem)
-(MOV(B|H|W)Zreg <t> x:(MOV(B|H|W)loadidx [o] {s} p i mem))
- && x.Uses == 1
- && clobber(x)
- => @x.Block (MOV(B|H|W)Zloadidx <t> [o] {s} p i mem)
-(MOV(B|H|W)reg <t> x:(MOV(B|H|W)Zloadidx [o] {s} p i mem))
- && x.Uses == 1
- && clobber(x)
- => @x.Block (MOV(B|H|W)loadidx <t> [o] {s} p i mem)
// Remove zero extensions after argument load.
(MOVBZreg x:(Arg <t>)) && !t.IsSigned() && t.Size() == 1 => x
@@ -641,41 +625,37 @@
(BRC {c} (CMPWUconst x [y]) yes no) && y == int32( int8(y)) && (c == s390x.Equal || c == s390x.LessOrGreater) => (CIJ {c} x [ int8(y)] yes no)
// Fold constants into instructions.
-(ADD x (MOVDconst [c])) && is32Bit(c) -> (ADDconst [c] x)
-(ADDW x (MOVDconst [c])) -> (ADDWconst [int64(int32(c))] x)
+(ADD x (MOVDconst [c])) && is32Bit(c) => (ADDconst [int32(c)] x)
+(ADDW x (MOVDconst [c])) => (ADDWconst [int32(c)] x)
-(SUB x (MOVDconst [c])) && is32Bit(c) -> (SUBconst x [c])
-(SUB (MOVDconst [c]) x) && is32Bit(c) -> (NEG (SUBconst <v.Type> x [c]))
-(SUBW x (MOVDconst [c])) -> (SUBWconst x [int64(int32(c))])
-(SUBW (MOVDconst [c]) x) -> (NEGW (SUBWconst <v.Type> x [int64(int32(c))]))
+(SUB x (MOVDconst [c])) && is32Bit(c) => (SUBconst x [int32(c)])
+(SUB (MOVDconst [c]) x) && is32Bit(c) => (NEG (SUBconst <v.Type> x [int32(c)]))
+(SUBW x (MOVDconst [c])) => (SUBWconst x [int32(c)])
+(SUBW (MOVDconst [c]) x) => (NEGW (SUBWconst <v.Type> x [int32(c)]))
-(MULLD x (MOVDconst [c])) && is32Bit(c) -> (MULLDconst [c] x)
-(MULLW x (MOVDconst [c])) -> (MULLWconst [int64(int32(c))] x)
+(MULLD x (MOVDconst [c])) && is32Bit(c) => (MULLDconst [int32(c)] x)
+(MULLW x (MOVDconst [c])) => (MULLWconst [int32(c)] x)
// NILF instructions leave the high 32 bits unchanged which is
// equivalent to the leftmost 32 bits being set.
// TODO(mundaym): modify the assembler to accept 64-bit values
// and use isU32Bit(^c).
(AND x (MOVDconst [c])) && is32Bit(c) && c < 0 => (ANDconst [c] x)
-(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(int32(c))] x))
-(ANDW x (MOVDconst [c])) -> (ANDWconst [int64(int32(c))] x)
+(AND x (MOVDconst [c])) && is32Bit(c) && c >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(c)] x))
+(ANDW x (MOVDconst [c])) => (ANDWconst [int32(c)] x)
-(ANDWconst [c] (ANDWconst [d] x)) => (ANDWconst [c & d] x)
-(ANDconst [c] (ANDconst [d] x)) => (ANDconst [c & d] x)
+((AND|ANDW)const [c] ((AND|ANDW)const [d] x)) => ((AND|ANDW)const [c&d] x)
-(OR x (MOVDconst [c])) && isU32Bit(c) => (ORconst [c] x)
-(ORW x (MOVDconst [c])) -> (ORWconst [int64(int32(c))] x)
-
-(XOR x (MOVDconst [c])) && isU32Bit(c) => (XORconst [c] x)
-(XORW x (MOVDconst [c])) -> (XORWconst [int64(int32(c))] x)
+((OR|XOR) x (MOVDconst [c])) && isU32Bit(c) => ((OR|XOR)const [c] x)
+((OR|XOR)W x (MOVDconst [c])) => ((OR|XOR)Wconst [int32(c)] x)
// Constant shifts.
(S(LD|RD|RAD|LW|RW|RAW) x (MOVDconst [c]))
- -> (S(LD|RD|RAD|LW|RW|RAW)const x [c&63])
+ => (S(LD|RD|RAD|LW|RW|RAW)const x [int8(c&63)])
// Shifts only use the rightmost 6 bits of the shift value.
(S(LD|RD|RAD|LW|RW|RAW) x (AND (MOVDconst [c]) y))
- -> (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [c&63] y))
+ => (S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst <typ.UInt32> [int32(c&63)] y))
(S(LD|RD|RAD|LW|RW|RAW) x (ANDWconst [c] y)) && c&63 == 63
=> (S(LD|RD|RAD|LW|RW|RAW) x y)
(SLD x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SLD x y)
@@ -686,8 +666,8 @@
(SRAW x (MOV(W|H|B|WZ|HZ|BZ)reg y)) => (SRAW x y)
// Constant rotate generation
-(RLL x (MOVDconst [c])) -> (RLLconst x [c&31])
-(RLLG x (MOVDconst [c])) -> (RLLGconst x [c&63])
+(RLL x (MOVDconst [c])) => (RLLconst x [int8(c&31)])
+(RLLG x (MOVDconst [c])) => (RLLGconst x [int8(c&63)])
(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x)
( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c => (RLLGconst [c] x)
@@ -697,14 +677,17 @@
( ORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x)
(XORW (SLWconst x [c]) (SRWconst x [d])) && d == 32-c => (RLLconst [c] x)
-(CMP x (MOVDconst [c])) && is32Bit(c) -> (CMPconst x [c])
-(CMP (MOVDconst [c]) x) && is32Bit(c) -> (InvertFlags (CMPconst x [c]))
-(CMPW x (MOVDconst [c])) -> (CMPWconst x [int64(int32(c))])
-(CMPW (MOVDconst [c]) x) -> (InvertFlags (CMPWconst x [int64(int32(c))]))
-(CMPU x (MOVDconst [c])) && isU32Bit(c) -> (CMPUconst x [int64(int32(c))])
-(CMPU (MOVDconst [c]) x) && isU32Bit(c) -> (InvertFlags (CMPUconst x [int64(int32(c))]))
-(CMPWU x (MOVDconst [c])) -> (CMPWUconst x [int64(int32(c))])
-(CMPWU (MOVDconst [c]) x) -> (InvertFlags (CMPWUconst x [int64(int32(c))]))
+// Signed 64-bit comparison with immediate.
+(CMP x (MOVDconst [c])) && is32Bit(c) => (CMPconst x [int32(c)])
+(CMP (MOVDconst [c]) x) && is32Bit(c) => (InvertFlags (CMPconst x [int32(c)]))
+
+// Unsigned 64-bit comparison with immediate.
+(CMPU x (MOVDconst [c])) && isU32Bit(c) => (CMPUconst x [int32(c)])
+(CMPU (MOVDconst [c]) x) && isU32Bit(c) => (InvertFlags (CMPUconst x [int32(c)]))
+
+// Signed and unsigned 32-bit comparison with immediate.
+(CMP(W|WU) x (MOVDconst [c])) => (CMP(W|WU)const x [int32(c)])
+(CMP(W|WU) (MOVDconst [c]) x) => (InvertFlags (CMP(W|WU)const x [int32(c)]))
// Canonicalize the order of arguments to comparisons - helps with CSE.
((CMP|CMPW|CMPU|CMPWU) x y) && x.ID > y.ID => (InvertFlags ((CMP|CMPW|CMPU|CMPWU) y x))
@@ -752,14 +735,14 @@
(SL(D|W)const <t> x [int8(log32(-c+(-c&^(-c-1))))]))
// Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
-(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
-(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(c+d) -> (MOVDaddr [c+d] {s} x)
-(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB && idx.Op != OpSB => (MOVDaddridx [c] {s} ptr idx)
+(ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x)
+(ADDconst [c] (MOVDaddr [d] {s} x)) && x.Op != OpSB && is20Bit(int64(c)+int64(d)) => (MOVDaddr [c+d] {s} x)
+(ADD idx (MOVDaddr [c] {s} ptr)) && ptr.Op != OpSB => (MOVDaddridx [c] {s} ptr idx)
// fold ADDconst into MOVDaddrx
-(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(c+d) -> (MOVDaddridx [c+d] {s} x y)
-(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(c+d) && x.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
-(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(c+d) && y.Op != OpSB -> (MOVDaddridx [c+d] {s} x y)
+(ADDconst [c] (MOVDaddridx [d] {s} x y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
+(MOVDaddridx [c] {s} (ADDconst [d] x) y) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
+(MOVDaddridx [c] {s} x (ADDconst [d] y)) && is20Bit(int64(c)+int64(d)) => (MOVDaddridx [c+d] {s} x y)
// reverse ordering of compare instruction
(LOCGR {c} x y (InvertFlags cmp)) => (LOCGR {c.ReverseComparison()} x y cmp)
@@ -799,11 +782,11 @@
// detect copysign
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) => (LGDR (CPSDR <t> y x))
-(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
+(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [math.Float64frombits(uint64(c))]) x))
(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (LGDR (LPDFR <t> y))) => (LGDR (CPSDR <t> y x))
-(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
-(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
-(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
+(OR (AND (MOVDconst [-1<<63]) (LGDR x)) (MOVDconst [c])) && c & -1<<63 == 0 => (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [math.Float64frombits(uint64(c))]) x))
+(CPSDR y (FMOVDconst [c])) && !math.Signbit(c) => (LPDFR y)
+(CPSDR y (FMOVDconst [c])) && math.Signbit(c) => (LNDFR y)
// absorb negations into set/clear sign bit
(FNEG (LPDFR x)) => (LNDFR x)
@@ -832,216 +815,131 @@
// the ADDconst get eliminated, we still have to compute the ADDconst and we now
// have potentially two live values (ptr and (ADDconst [off] ptr)) instead of one.
// Nevertheless, let's do it!
-(MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVDload [off1+off2] {sym} ptr mem)
-(MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem)
-(MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHload [off1+off2] {sym} ptr mem)
-(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem)
-(MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} ptr mem)
-(MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} ptr mem)
-(MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (MOVBZload [off1+off2] {sym} ptr mem)
-(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
-(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
-
-(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} ptr val mem)
-(MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem)
-(MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} ptr val mem)
-(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem)
-(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
-(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
-
-(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem)
-(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem)
-(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
-(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
-(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem)
-(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem)
-
-(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem)
-(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem)
-(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem)
-(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem)
-(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem)
-(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem)
+(MOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDload [off1+off2] {sym} ptr mem)
+(MOVWload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWload [off1+off2] {sym} ptr mem)
+(MOVHload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHload [off1+off2] {sym} ptr mem)
+(MOVBload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBload [off1+off2] {sym} ptr mem)
+(MOVWZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWZload [off1+off2] {sym} ptr mem)
+(MOVHZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHZload [off1+off2] {sym} ptr mem)
+(MOVBZload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBZload [off1+off2] {sym} ptr mem)
+(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSload [off1+off2] {sym} ptr mem)
+(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDload [off1+off2] {sym} ptr mem)
+
+(MOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVDstore [off1+off2] {sym} ptr val mem)
+(MOVWstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVWstore [off1+off2] {sym} ptr val mem)
+(MOVHstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVHstore [off1+off2] {sym} ptr val mem)
+(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (MOVBstore [off1+off2] {sym} ptr val mem)
+(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVSstore [off1+off2] {sym} ptr val mem)
+(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(int64(off1)+int64(off2)) => (FMOVDstore [off1+off2] {sym} ptr val mem)
+
+(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDload [off1+off2] {sym} x ptr mem)
+(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ADDWload [off1+off2] {sym} x ptr mem)
+(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLDload [off1+off2] {sym} x ptr mem)
+(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (MULLWload [off1+off2] {sym} x ptr mem)
+(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBload [off1+off2] {sym} x ptr mem)
+(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (SUBWload [off1+off2] {sym} x ptr mem)
+
+(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDload [off1+off2] {sym} x ptr mem)
+(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ANDWload [off1+off2] {sym} x ptr mem)
+(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORload [off1+off2] {sym} x ptr mem)
+(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (ORWload [off1+off2] {sym} x ptr mem)
+(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORload [off1+off2] {sym} x ptr mem)
+(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(int64(off1)+int64(off2)) => (XORWload [off1+off2] {sym} x ptr mem)
// Fold constants into stores.
-(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
- (MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
- (MOVWstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem)
-(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(off) && ptr.Op != OpSB ->
- (MOVHstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem)
-(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(off) && ptr.Op != OpSB ->
- (MOVBstoreconst [makeValAndOff(int64(int8(c)),off)] {sym} ptr mem)
+(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+ (MOVDstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+ (MOVWstoreconst [makeValAndOff32(int32(c),off)] {sym} ptr mem)
+(MOVHstore [off] {sym} ptr (MOVDconst [c]) mem) && isU12Bit(int64(off)) && ptr.Op != OpSB =>
+ (MOVHstoreconst [makeValAndOff32(int32(int16(c)),off)] {sym} ptr mem)
+(MOVBstore [off] {sym} ptr (MOVDconst [c]) mem) && is20Bit(int64(off)) && ptr.Op != OpSB =>
+ (MOVBstoreconst [makeValAndOff32(int32(int8(c)),off)] {sym} ptr mem)
// Fold address offsets into constant stores.
-(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
- (MOVDstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
- (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(ValAndOff(sc).Off()+off) ->
- (MOVHstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
-(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(ValAndOff(sc).Off()+off) ->
- (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem)
+(MOVDstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+ (MOVDstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVWstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+ (MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVHstoreconst [sc] {s} (ADDconst [off] ptr) mem) && isU12Bit(sc.Off()+int64(off)) =>
+ (MOVHstoreconst [sc.addOffset32(off)] {s} ptr mem)
+(MOVBstoreconst [sc] {s} (ADDconst [off] ptr) mem) && is20Bit(sc.Off()+int64(off)) =>
+ (MOVBstoreconst [sc.addOffset32(off)] {s} ptr mem)
// Merge address calculations into loads and stores.
// Offsets from SB must not be merged into unaligned memory accesses because
// loads/stores using PC-relative addressing directly must be aligned to the
// size of the target.
-(MOVDload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
- (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVWZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
- (MOVWZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVHZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
- (MOVHZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBZload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-
-(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
- (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
- (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
-
-(MOVDstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) ->
- (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVWstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) ->
- (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVHstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) ->
- (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-
-(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-
-(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
-(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
+(MOVDload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) =>
+ (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVWZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+ (MOVWZload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVHZload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+ (MOVHZload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVBZload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (MOVBZload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (FMOVSload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (FMOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+
+(MOVWload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+ (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVHload [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+ (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+
+(MOVDstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%8 == 0 && (off1+off2)%8 == 0)) =>
+ (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVWstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%4 == 0 && (off1+off2)%4 == 0)) =>
+ (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVHstore [off1] {sym1} (MOVDaddr <t> [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || (t.IsPtr() && t.Elem().Alignment()%2 == 0 && (off1+off2)%2 == 0)) =>
+ (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (FMOVSstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
+ (FMOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+
+(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ADDWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLDload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (MULLWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (SUBWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+
+(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ANDWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (ORWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
+(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(int64(o1)+int64(o2)) && canMergeSym(s1, s2) => (XORWload [o1+o2] {mergeSymTyped(s1, s2)} x ptr mem)
// Cannot store constant to SB directly (no 'move relative long immediate' instructions).
-(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
- (MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
- (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
- (MOVHstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
- (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
-
-// generating indexed loads and stores
-(MOVBZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVBload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVHZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVHZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVHload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVHloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWZload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVWZloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVWload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVWloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(MOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(FMOVSload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVSloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-(FMOVDload [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVDloadidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem)
-
-(MOVBstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVBstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVHstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVHstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVWstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVWstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(MOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (MOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(FMOVSstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVSstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-(FMOVDstore [off1] {sym1} (MOVDaddridx [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- (FMOVDstoreidx [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem)
-
-(MOVBZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVBZloadidx [off] {sym} ptr idx mem)
-(MOVBload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVBloadidx [off] {sym} ptr idx mem)
-(MOVHZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVHZloadidx [off] {sym} ptr idx mem)
-(MOVHload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVHloadidx [off] {sym} ptr idx mem)
-(MOVWZload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVWZloadidx [off] {sym} ptr idx mem)
-(MOVWload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVWloadidx [off] {sym} ptr idx mem)
-(MOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (MOVDloadidx [off] {sym} ptr idx mem)
-(FMOVSload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (FMOVSloadidx [off] {sym} ptr idx mem)
-(FMOVDload [off] {sym} (ADD ptr idx) mem) && ptr.Op != OpSB => (FMOVDloadidx [off] {sym} ptr idx mem)
-
-(MOVBstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVBstoreidx [off] {sym} ptr idx val mem)
-(MOVHstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVHstoreidx [off] {sym} ptr idx val mem)
-(MOVWstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVWstoreidx [off] {sym} ptr idx val mem)
-(MOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (MOVDstoreidx [off] {sym} ptr idx val mem)
-(FMOVSstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (FMOVSstoreidx [off] {sym} ptr idx val mem)
-(FMOVDstore [off] {sym} (ADD ptr idx) val mem) && ptr.Op != OpSB => (FMOVDstoreidx [off] {sym} ptr idx val mem)
-
-// combine ADD into indexed loads and stores
-(MOVBZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
-(MOVBloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
-(MOVHZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
-(MOVHloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
-(MOVWZloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
-(MOVWloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
-(MOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
-(FMOVSloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
-(FMOVDloadidx [c] {sym} (ADDconst [d] ptr) idx mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
-
-(MOVBstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
-(MOVHstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-(MOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVSstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVDstoreidx [c] {sym} (ADDconst [d] ptr) idx val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
-
-(MOVBZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBZloadidx [c+d] {sym} ptr idx mem)
-(MOVBloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVBloadidx [c+d] {sym} ptr idx mem)
-(MOVHZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHZloadidx [c+d] {sym} ptr idx mem)
-(MOVHloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVHloadidx [c+d] {sym} ptr idx mem)
-(MOVWZloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWZloadidx [c+d] {sym} ptr idx mem)
-(MOVWloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVWloadidx [c+d] {sym} ptr idx mem)
-(MOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (MOVDloadidx [c+d] {sym} ptr idx mem)
-(FMOVSloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVSloadidx [c+d] {sym} ptr idx mem)
-(FMOVDloadidx [c] {sym} ptr (ADDconst [d] idx) mem) && is20Bit(c+d) -> (FMOVDloadidx [c+d] {sym} ptr idx mem)
-
-(MOVBstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVBstoreidx [c+d] {sym} ptr idx val mem)
-(MOVHstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVHstoreidx [c+d] {sym} ptr idx val mem)
-(MOVWstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVWstoreidx [c+d] {sym} ptr idx val mem)
-(MOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (MOVDstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVSstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVSstoreidx [c+d] {sym} ptr idx val mem)
-(FMOVDstoreidx [c] {sym} ptr (ADDconst [d] idx) val mem) && is20Bit(c+d) -> (FMOVDstoreidx [c+d] {sym} ptr idx val mem)
+(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+ (MOVDstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVWstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+ (MOVWstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVHstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+ (MOVHstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+(MOVBstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
+ (MOVBstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
// MOVDaddr into MOVDaddridx
-(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB ->
- (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
-(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && y.Op != OpSB ->
- (MOVDaddridx [off1+off2] {mergeSym(sym1,sym2)} x y)
+(MOVDaddridx [off1] {sym1} (MOVDaddr [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
+ (MOVDaddridx [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+(MOVDaddridx [off1] {sym1} x (MOVDaddr [off2] {sym2} y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && y.Op != OpSB =>
+ (MOVDaddridx [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
// Absorb InvertFlags into branches.
(BRC {c} (InvertFlags cmp) yes no) => (BRC {c.ReverseComparison()} cmp yes no)
// Constant comparisons.
-(CMPconst (MOVDconst [x]) [y]) && x==y -> (FlagEQ)
-(CMPconst (MOVDconst [x]) [y]) && x<y -> (FlagLT)
-(CMPconst (MOVDconst [x]) [y]) && x>y -> (FlagGT)
+(CMPconst (MOVDconst [x]) [y]) && x==int64(y) => (FlagEQ)
+(CMPconst (MOVDconst [x]) [y]) && x<int64(y) => (FlagLT)
+(CMPconst (MOVDconst [x]) [y]) && x>int64(y) => (FlagGT)
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)==uint64(y) => (FlagEQ)
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)<uint64(y) => (FlagLT)
(CMPUconst (MOVDconst [x]) [y]) && uint64(x)>uint64(y) => (FlagGT)
@@ -1158,31 +1056,31 @@
// Convert constant subtracts to constant adds.
(SUBconst [c] x) && c != -(1<<31) => (ADDconst [-c] x)
-(SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
+(SUBWconst [c] x) => (ADDWconst [-int32(c)] x)
// generic constant folding
// TODO: more of this
-(ADDconst [c] (MOVDconst [d])) -> (MOVDconst [c+d])
-(ADDWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c+d))])
-(ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) -> (ADDconst [c+d] x)
-(ADDWconst [c] (ADDWconst [d] x)) -> (ADDWconst [int64(int32(c+d))] x)
-(SUBconst (MOVDconst [d]) [c]) -> (MOVDconst [d-c])
-(SUBconst (SUBconst x [d]) [c]) && is32Bit(-c-d) -> (ADDconst [-c-d] x)
+(ADDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d])
+(ADDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)+d])
+(ADDconst [c] (ADDconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDconst [c+d] x)
+(ADDWconst [c] (ADDWconst [d] x)) => (ADDWconst [int32(c+d)] x)
+(SUBconst (MOVDconst [d]) [c]) => (MOVDconst [d-int64(c)])
+(SUBconst (SUBconst x [d]) [c]) && is32Bit(-int64(c)-int64(d)) => (ADDconst [-c-d] x)
(SRADconst [c] (MOVDconst [d])) => (MOVDconst [d>>uint64(c)])
(SRAWconst [c] (MOVDconst [d])) => (MOVDconst [int64(int32(d))>>uint64(c)])
(NEG (MOVDconst [c])) => (MOVDconst [-c])
(NEGW (MOVDconst [c])) => (MOVDconst [int64(int32(-c))])
-(MULLDconst [c] (MOVDconst [d])) -> (MOVDconst [c*d])
-(MULLWconst [c] (MOVDconst [d])) -> (MOVDconst [int64(int32(c*d))])
+(MULLDconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)*d])
+(MULLWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c*int32(d))])
(AND (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c&d])
(ANDconst [c] (MOVDconst [d])) => (MOVDconst [c&d])
-(ANDWconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
+(ANDWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)&d])
(OR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c|d])
(ORconst [c] (MOVDconst [d])) => (MOVDconst [c|d])
-(ORWconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
+(ORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)|d])
(XOR (MOVDconst [c]) (MOVDconst [d])) => (MOVDconst [c^d])
(XORconst [c] (MOVDconst [d])) => (MOVDconst [c^d])
-(XORWconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
+(XORWconst [c] (MOVDconst [d])) => (MOVDconst [int64(c)^d])
(LoweredRound32F x:(FMOVSconst)) => x
(LoweredRound64F x:(FMOVDconst)) => x
@@ -1199,19 +1097,19 @@
(XOR x x) => (MOVDconst [0])
(XORW x x) => (MOVDconst [0])
(NEG (ADDconst [c] (NEG x))) && c != -(1<<31) => (ADDconst [-c] x)
-(MOVBZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
-(MOVHZreg (ANDWconst [m] x)) -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
-(MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64( uint8(m))] x))
-(MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 -> (MOVWZreg (ANDWconst <typ.UInt32> [int64(uint16(m))] x))
+(MOVBZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x))
+(MOVHZreg (ANDWconst [m] x)) => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x))
+(MOVBreg (ANDWconst [m] x)) && int8(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32( uint8(m))] x))
+(MOVHreg (ANDWconst [m] x)) && int16(m) >= 0 => (MOVWZreg (ANDWconst <typ.UInt32> [int32(uint16(m))] x))
// carry flag generation
// (only constant fold carry of zero)
(Select1 (ADDCconst (MOVDconst [c]) [d]))
- && uint64(c+d) >= uint64(c) && c+d == 0
- -> (FlagEQ)
+ && uint64(c+int64(d)) >= uint64(c) && c+int64(d) == 0
+ => (FlagEQ)
(Select1 (ADDCconst (MOVDconst [c]) [d]))
- && uint64(c+d) >= uint64(c) && c+d != 0
- -> (FlagLT)
+ && uint64(c+int64(d)) >= uint64(c) && c+int64(d) != 0
+ => (FlagLT)
// borrow flag generation
// (only constant fold borrow of zero)
@@ -1225,8 +1123,8 @@
// add with carry
(ADDE x y (FlagEQ)) => (ADDC x y)
(ADDE x y (FlagLT)) => (ADDC x y)
-(ADDC x (MOVDconst [c])) && is16Bit(c) -> (ADDCconst x [c])
-(Select0 (ADDCconst (MOVDconst [c]) [d])) -> (MOVDconst [c+d])
+(ADDC x (MOVDconst [c])) && is16Bit(c) => (ADDCconst x [int16(c)])
+(Select0 (ADDCconst (MOVDconst [c]) [d])) => (MOVDconst [c+int64(d)])
// subtract with borrow
(SUBE x y (FlagGT)) => (SUBC x y)
@@ -1256,14 +1154,12 @@
(C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow)
// fused multiply-add
-(Select0 (F(ADD|SUB) (FMUL y z) x)) -> (FM(ADD|SUB) x y z)
-(Select0 (F(ADDS|SUBS) (FMULS y z) x)) -> (FM(ADDS|SUBS) x y z)
+(Select0 (F(ADD|SUB) (FMUL y z) x)) => (FM(ADD|SUB) x y z)
+(Select0 (F(ADDS|SUBS) (FMULS y z) x)) => (FM(ADDS|SUBS) x y z)
// Convert floating point comparisons against zero into 'load and test' instructions.
-(FCMP x (FMOVDconst [c])) && auxTo64F(c) == 0 -> (LTDBR x)
-(FCMPS x (FMOVSconst [c])) && auxTo32F(c) == 0 -> (LTEBR x)
-(FCMP (FMOVDconst [c]) x) && auxTo64F(c) == 0 -> (InvertFlags (LTDBR <v.Type> x))
-(FCMPS (FMOVSconst [c]) x) && auxTo32F(c) == 0 -> (InvertFlags (LTEBR <v.Type> x))
+(F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x)
+(F(CMP|CMPS) (FMOV(D|S)const [0.0]) x) => (InvertFlags (LT(D|E)BR <v.Type> x))
// FSUB, FSUBS, FADD, FADDS now produce a condition code representing the
// comparison of the result with 0.0. If a compare with zero instruction
@@ -1274,30 +1170,30 @@
// but moving the flag generating value to a different block seems to
// increase the likelihood that the flags value will have to be regenerated
// by flagalloc which is not what we want.
-(LTDBR (Select0 x:(F(ADD|SUB) _ _))) && b == x.Block -> (Select1 x)
-(LTEBR (Select0 x:(F(ADDS|SUBS) _ _))) && b == x.Block -> (Select1 x)
+(LTDBR (Select0 x:(F(ADD|SUB) _ _))) && b == x.Block => (Select1 x)
+(LTEBR (Select0 x:(F(ADDS|SUBS) _ _))) && b == x.Block => (Select1 x)
// Fold memory operations into operations.
// Exclude global data (SB) because these instructions cannot handle relative addresses.
// TODO(mundaym): indexed versions of these?
((ADD|SUB|MULLD|AND|OR|XOR) <t> x g:(MOVDload [off] {sym} ptr mem))
&& ptr.Op != OpSB
- && is20Bit(off)
+ && is20Bit(int64(off))
&& canMergeLoadClobber(v, g, x)
&& clobber(g)
- -> ((ADD|SUB|MULLD|AND|OR|XOR)load <t> [off] {sym} x ptr mem)
+ => ((ADD|SUB|MULLD|AND|OR|XOR)load <t> [off] {sym} x ptr mem)
((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWload [off] {sym} ptr mem))
&& ptr.Op != OpSB
- && is20Bit(off)
+ && is20Bit(int64(off))
&& canMergeLoadClobber(v, g, x)
&& clobber(g)
- -> ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
+ => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
((ADD|SUB|MULL|AND|OR|XOR)W <t> x g:(MOVWZload [off] {sym} ptr mem))
&& ptr.Op != OpSB
- && is20Bit(off)
+ && is20Bit(int64(off))
&& canMergeLoadClobber(v, g, x)
&& clobber(g)
- -> ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
+ => ((ADD|SUB|MULL|AND|OR|XOR)Wload <t> [off] {sym} x ptr mem)
// Combine constant stores into larger (unaligned) stores.
// Avoid SB because constant stores to relative offsets are
@@ -1305,21 +1201,21 @@
(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
&& p.Op != OpSB
&& x.Uses == 1
- && ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
+ && a.Off() + 1 == c.Off()
&& clobber(x)
- -> (MOVHstoreconst [makeValAndOff(ValAndOff(c).Val()&0xff | ValAndOff(a).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+ => (MOVHstoreconst [makeValAndOff32(c.Val32()&0xff | a.Val32()<<8, a.Off32())] {s} p mem)
(MOVHstoreconst [c] {s} p x:(MOVHstoreconst [a] {s} p mem))
&& p.Op != OpSB
&& x.Uses == 1
- && ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
+ && a.Off() + 2 == c.Off()
&& clobber(x)
- -> (MOVWstore [ValAndOff(a).Off()] {s} p (MOVDconst [int64(int32(ValAndOff(c).Val()&0xffff | ValAndOff(a).Val()<<16))]) mem)
+ => (MOVWstore [a.Off32()] {s} p (MOVDconst [int64(c.Val32()&0xffff | a.Val32()<<16)]) mem)
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
&& p.Op != OpSB
&& x.Uses == 1
- && ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
+ && a.Off() + 4 == c.Off()
&& clobber(x)
- -> (MOVDstore [ValAndOff(a).Off()] {s} p (MOVDconst [ValAndOff(c).Val()&0xffffffff | ValAndOff(a).Val()<<32]) mem)
+ => (MOVDstore [a.Off32()] {s} p (MOVDconst [c.Val()&0xffffffff | a.Val()<<32]) mem)
// Combine stores into larger (unaligned) stores.
// It doesn't work on global data (based on SB) because stores with relative addressing
@@ -1328,93 +1224,52 @@
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHstore [i-1] {s} p w mem)
+ => (MOVHstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p w0:(SRDconst [j] w) x:(MOVBstore [i-1] {s} p (SRDconst [j+8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHstore [i-1] {s} p w0 mem)
+ => (MOVHstore [i-1] {s} p w0 mem)
(MOVBstore [i] {s} p w x:(MOVBstore [i-1] {s} p (SRWconst [8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHstore [i-1] {s} p w mem)
+ => (MOVHstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p w0:(SRWconst [j] w) x:(MOVBstore [i-1] {s} p (SRWconst [j+8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHstore [i-1] {s} p w0 mem)
+ => (MOVHstore [i-1] {s} p w0 mem)
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRDconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVWstore [i-2] {s} p w mem)
+ => (MOVWstore [i-2] {s} p w mem)
(MOVHstore [i] {s} p w0:(SRDconst [j] w) x:(MOVHstore [i-2] {s} p (SRDconst [j+16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVWstore [i-2] {s} p w0 mem)
+ => (MOVWstore [i-2] {s} p w0 mem)
(MOVHstore [i] {s} p w x:(MOVHstore [i-2] {s} p (SRWconst [16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVWstore [i-2] {s} p w mem)
+ => (MOVWstore [i-2] {s} p w mem)
(MOVHstore [i] {s} p w0:(SRWconst [j] w) x:(MOVHstore [i-2] {s} p (SRWconst [j+16] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVWstore [i-2] {s} p w0 mem)
+ => (MOVWstore [i-2] {s} p w0 mem)
(MOVWstore [i] {s} p (SRDconst [32] w) x:(MOVWstore [i-4] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVDstore [i-4] {s} p w mem)
+ => (MOVDstore [i-4] {s} p w mem)
(MOVWstore [i] {s} p w0:(SRDconst [j] w) x:(MOVWstore [i-4] {s} p (SRDconst [j+32] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVDstore [i-4] {s} p w0 mem)
-
-(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRDconst [j+8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
-(MOVBstoreidx [i] {s} p idx w x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx (SRWconst [j+8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHstoreidx [i-1] {s} p idx w0 mem)
-(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWstoreidx [i-2] {s} p idx w mem)
-(MOVHstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRDconst [j+16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
-(MOVHstoreidx [i] {s} p idx w x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWstoreidx [i-2] {s} p idx w mem)
-(MOVHstoreidx [i] {s} p idx w0:(SRWconst [j] w) x:(MOVHstoreidx [i-2] {s} p idx (SRWconst [j+16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWstoreidx [i-2] {s} p idx w0 mem)
-(MOVWstoreidx [i] {s} p idx w x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [32] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVDstoreidx [i-4] {s} p idx w mem)
-(MOVWstoreidx [i] {s} p idx w0:(SRDconst [j] w) x:(MOVWstoreidx [i-4] {s} p idx (SRDconst [j+32] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVDstoreidx [i-4] {s} p idx w0 mem)
+ => (MOVDstore [i-4] {s} p w0 mem)
// Combine stores into larger (unaligned) stores with the bytes reversed (little endian).
// Store-with-bytes-reversed instructions do not support relative memory addresses,
@@ -1423,87 +1278,46 @@
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHBRstore [i-1] {s} p w mem)
+ => (MOVHBRstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p (SRDconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRDconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHBRstore [i-1] {s} p w0 mem)
+ => (MOVHBRstore [i-1] {s} p w0 mem)
(MOVBstore [i] {s} p (SRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHBRstore [i-1] {s} p w mem)
+ => (MOVHBRstore [i-1] {s} p w mem)
(MOVBstore [i] {s} p (SRWconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SRWconst [j-8] w) mem))
&& p.Op != OpSB
&& x.Uses == 1
&& clobber(x)
- -> (MOVHBRstore [i-1] {s} p w0 mem)
+ => (MOVHBRstore [i-1] {s} p w0 mem)
(MOVHBRstore [i] {s} p (SRDconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVWBRstore [i-2] {s} p w mem)
+ => (MOVWBRstore [i-2] {s} p w mem)
(MOVHBRstore [i] {s} p (SRDconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRDconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVWBRstore [i-2] {s} p w0 mem)
+ => (MOVWBRstore [i-2] {s} p w0 mem)
(MOVHBRstore [i] {s} p (SRWconst [16] w) x:(MOVHBRstore [i-2] {s} p w mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVWBRstore [i-2] {s} p w mem)
+ => (MOVWBRstore [i-2] {s} p w mem)
(MOVHBRstore [i] {s} p (SRWconst [j] w) x:(MOVHBRstore [i-2] {s} p w0:(SRWconst [j-16] w) mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVWBRstore [i-2] {s} p w0 mem)
+ => (MOVWBRstore [i-2] {s} p w0 mem)
(MOVWBRstore [i] {s} p (SRDconst [32] w) x:(MOVWBRstore [i-4] {s} p w mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVDBRstore [i-4] {s} p w mem)
+ => (MOVDBRstore [i-4] {s} p w mem)
(MOVWBRstore [i] {s} p (SRDconst [j] w) x:(MOVWBRstore [i-4] {s} p w0:(SRDconst [j-32] w) mem))
&& x.Uses == 1
&& clobber(x)
- -> (MOVDBRstore [i-4] {s} p w0 mem)
-
-(MOVBstoreidx [i] {s} p idx (SRDconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRDconst [j-8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
-(MOVBstoreidx [i] {s} p idx (SRWconst [8] w) x:(MOVBstoreidx [i-1] {s} p idx w mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHBRstoreidx [i-1] {s} p idx w mem)
-(MOVBstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVBstoreidx [i-1] {s} p idx w0:(SRWconst [j-8] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVHBRstoreidx [i-1] {s} p idx w0 mem)
-(MOVHBRstoreidx [i] {s} p idx (SRDconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
-(MOVHBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRDconst [j-16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
-(MOVHBRstoreidx [i] {s} p idx (SRWconst [16] w) x:(MOVHBRstoreidx [i-2] {s} p idx w mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWBRstoreidx [i-2] {s} p idx w mem)
-(MOVHBRstoreidx [i] {s} p idx (SRWconst [j] w) x:(MOVHBRstoreidx [i-2] {s} p idx w0:(SRWconst [j-16] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVWBRstoreidx [i-2] {s} p idx w0 mem)
-(MOVWBRstoreidx [i] {s} p idx (SRDconst [32] w) x:(MOVWBRstoreidx [i-4] {s} p idx w mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVDBRstoreidx [i-4] {s} p idx w mem)
-(MOVWBRstoreidx [i] {s} p idx (SRDconst [j] w) x:(MOVWBRstoreidx [i-4] {s} p idx w0:(SRDconst [j-32] w) mem))
- && x.Uses == 1
- && clobber(x)
- -> (MOVDBRstoreidx [i-4] {s} p idx w0 mem)
+ => (MOVDBRstore [i-4] {s} p w0 mem)
// Combining byte loads into larger (unaligned) loads.
@@ -1518,7 +1332,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
(OR x1:(MOVBZload [i1] {s} p mem)
sh:(SLDconst [8] x0:(MOVBZload [i0] {s} p mem)))
@@ -1529,7 +1343,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVHZload [i0] {s} p mem)
(ORW x1:(MOVHZload [i1] {s} p mem)
sh:(SLWconst [16] x0:(MOVHZload [i0] {s} p mem)))
@@ -1540,7 +1354,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
(OR x1:(MOVHZload [i1] {s} p mem)
sh:(SLDconst [16] x0:(MOVHZload [i0] {s} p mem)))
@@ -1551,7 +1365,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVWZload [i0] {s} p mem)
(OR x1:(MOVWZload [i1] {s} p mem)
sh:(SLDconst [32] x0:(MOVWZload [i0] {s} p mem)))
@@ -1562,7 +1376,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVDload [i0] {s} p mem)
(ORW
s0:(SLWconst [j0] x0:(MOVBZload [i0] {s} p mem))
@@ -1579,7 +1393,7 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+ => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
(OR
s0:(SLDconst [j0] x0:(MOVBZload [i0] {s} p mem))
@@ -1596,7 +1410,7 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
+ => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZload [i0] {s} p mem)) y)
(OR
s0:(SLDconst [j0] x0:(MOVHZload [i0] {s} p mem))
@@ -1613,115 +1427,7 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
-
-// Big-endian indexed loads
-
-(ORW x1:(MOVBZloadidx [i1] {s} p idx mem)
- sh:(SLWconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
- && i1 == i0+1
- && p.Op != OpSB
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
-
-(OR x1:(MOVBZloadidx [i1] {s} p idx mem)
- sh:(SLDconst [8] x0:(MOVBZloadidx [i0] {s} p idx mem)))
- && i1 == i0+1
- && p.Op != OpSB
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZloadidx [i0] {s} p idx mem)
-
-(ORW x1:(MOVHZloadidx [i1] {s} p idx mem)
- sh:(SLWconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
- && i1 == i0+2
- && p.Op != OpSB
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
-
-(OR x1:(MOVHZloadidx [i1] {s} p idx mem)
- sh:(SLDconst [16] x0:(MOVHZloadidx [i0] {s} p idx mem)))
- && i1 == i0+2
- && p.Op != OpSB
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZloadidx [i0] {s} p idx mem)
-
-(OR x1:(MOVWZloadidx [i1] {s} p idx mem)
- sh:(SLDconst [32] x0:(MOVWZloadidx [i0] {s} p idx mem)))
- && i1 == i0+4
- && p.Op != OpSB
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVDloadidx [i0] {s} p idx mem)
-
-(ORW
- s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
- or:(ORW
- s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
- y))
- && i1 == i0+1
- && j1 == j0-8
- && j1 % 16 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
-
-(OR
- s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
- or:(OR
- s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
- y))
- && i1 == i0+1
- && j1 == j0-8
- && j1 % 16 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVHZloadidx [i0] {s} p idx mem)) y)
-
-(OR
- s0:(SLDconst [j0] x0:(MOVHZloadidx [i0] {s} p idx mem))
- or:(OR
- s1:(SLDconst [j1] x1:(MOVHZloadidx [i1] {s} p idx mem))
- y))
- && i1 == i0+2
- && j1 == j0-16
- && j1 % 32 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZloadidx [i0] {s} p idx mem)) y)
+ => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j1] (MOVWZload [i0] {s} p mem)) y)
// Little-endian loads
@@ -1734,7 +1440,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+ => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
(OR x0:(MOVBZload [i0] {s} p mem)
sh:(SLDconst [8] x1:(MOVBZload [i1] {s} p mem)))
@@ -1745,7 +1451,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
+ => @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRload [i0] {s} p mem))
(ORW r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
@@ -1757,7 +1463,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVWBRload [i0] {s} p mem)
(OR r0:(MOVHZreg x0:(MOVHBRload [i0] {s} p mem))
sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem))))
@@ -1769,7 +1475,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
+ => @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRload [i0] {s} p mem))
(OR r0:(MOVWZreg x0:(MOVWBRload [i0] {s} p mem))
sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRload [i1] {s} p mem))))
@@ -1781,7 +1487,7 @@
&& sh.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
+ => @mergePoint(b,x0,x1) (MOVDBRload [i0] {s} p mem)
(ORW
s1:(SLWconst [j1] x1:(MOVBZload [i1] {s} p mem))
@@ -1799,7 +1505,7 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+ => @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
(OR
s1:(SLDconst [j1] x1:(MOVBZload [i1] {s} p mem))
@@ -1817,7 +1523,7 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
+ => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRload [i0] {s} p mem))) y)
(OR
s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRload [i1] {s} p mem)))
@@ -1836,168 +1542,53 @@
&& or.Uses == 1
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, r0, r1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
-
-// Little-endian indexed loads
-
-(ORW x0:(MOVBZloadidx [i0] {s} p idx mem)
- sh:(SLWconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
- && p.Op != OpSB
- && i1 == i0+1
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
-
-(OR x0:(MOVBZloadidx [i0] {s} p idx mem)
- sh:(SLDconst [8] x1:(MOVBZloadidx [i1] {s} p idx mem)))
- && p.Op != OpSB
- && i1 == i0+1
- && x0.Uses == 1
- && x1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))
-
-(ORW r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
- sh:(SLWconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
- && i1 == i0+2
- && x0.Uses == 1
- && x1.Uses == 1
- && r0.Uses == 1
- && r1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVWBRloadidx [i0] {s} p idx mem)
-
-(OR r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem))
- sh:(SLDconst [16] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem))))
- && i1 == i0+2
- && x0.Uses == 1
- && x1.Uses == 1
- && r0.Uses == 1
- && r1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))
-
-(OR r0:(MOVWZreg x0:(MOVWBRloadidx [i0] {s} p idx mem))
- sh:(SLDconst [32] r1:(MOVWZreg x1:(MOVWBRloadidx [i1] {s} p idx mem))))
- && i1 == i0+4
- && x0.Uses == 1
- && x1.Uses == 1
- && r0.Uses == 1
- && r1.Uses == 1
- && sh.Uses == 1
- && mergePoint(b,x0,x1) != nil
- && clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (MOVDBRloadidx [i0] {s} p idx mem)
-
-(ORW
- s1:(SLWconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
- or:(ORW
- s0:(SLWconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
- y))
- && p.Op != OpSB
- && i1 == i0+1
- && j1 == j0+8
- && j0 % 16 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORW <v.Type> (SLWconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
-
-(OR
- s1:(SLDconst [j1] x1:(MOVBZloadidx [i1] {s} p idx mem))
- or:(OR
- s0:(SLDconst [j0] x0:(MOVBZloadidx [i0] {s} p idx mem))
- y))
- && p.Op != OpSB
- && i1 == i0+1
- && j1 == j0+8
- && j0 % 16 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVHZreg (MOVHBRloadidx [i0] {s} p idx mem))) y)
-
-(OR
- s1:(SLDconst [j1] r1:(MOVHZreg x1:(MOVHBRloadidx [i1] {s} p idx mem)))
- or:(OR
- s0:(SLDconst [j0] r0:(MOVHZreg x0:(MOVHBRloadidx [i0] {s} p idx mem)))
- y))
- && i1 == i0+2
- && j1 == j0+16
- && j0 % 32 == 0
- && x0.Uses == 1
- && x1.Uses == 1
- && r0.Uses == 1
- && r1.Uses == 1
- && s0.Uses == 1
- && s1.Uses == 1
- && or.Uses == 1
- && mergePoint(b,x0,x1,y) != nil
- && clobber(x0, x1, r0, r1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRloadidx [i0] {s} p idx mem))) y)
+ => @mergePoint(b,x0,x1,y) (OR <v.Type> (SLDconst <v.Type> [j0] (MOVWZreg (MOVWBRload [i0] {s} p mem))) y)
// Combine stores into store multiples.
// 32-bit
(MOVWstore [i] {s} p w1 x:(MOVWstore [i-4] {s} p w0 mem))
&& p.Op != OpSB
&& x.Uses == 1
- && is20Bit(i-4)
+ && is20Bit(int64(i)-4)
&& clobber(x)
- -> (STM2 [i-4] {s} p w0 w1 mem)
+ => (STM2 [i-4] {s} p w0 w1 mem)
(MOVWstore [i] {s} p w2 x:(STM2 [i-8] {s} p w0 w1 mem))
&& x.Uses == 1
- && is20Bit(i-8)
+ && is20Bit(int64(i)-8)
&& clobber(x)
- -> (STM3 [i-8] {s} p w0 w1 w2 mem)
+ => (STM3 [i-8] {s} p w0 w1 w2 mem)
(MOVWstore [i] {s} p w3 x:(STM3 [i-12] {s} p w0 w1 w2 mem))
&& x.Uses == 1
- && is20Bit(i-12)
+ && is20Bit(int64(i)-12)
&& clobber(x)
- -> (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
+ => (STM4 [i-12] {s} p w0 w1 w2 w3 mem)
(STM2 [i] {s} p w2 w3 x:(STM2 [i-8] {s} p w0 w1 mem))
&& x.Uses == 1
- && is20Bit(i-8)
+ && is20Bit(int64(i)-8)
&& clobber(x)
- -> (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
+ => (STM4 [i-8] {s} p w0 w1 w2 w3 mem)
// 64-bit
(MOVDstore [i] {s} p w1 x:(MOVDstore [i-8] {s} p w0 mem))
&& p.Op != OpSB
&& x.Uses == 1
- && is20Bit(i-8)
+ && is20Bit(int64(i)-8)
&& clobber(x)
- -> (STMG2 [i-8] {s} p w0 w1 mem)
+ => (STMG2 [i-8] {s} p w0 w1 mem)
(MOVDstore [i] {s} p w2 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& x.Uses == 1
- && is20Bit(i-16)
+ && is20Bit(int64(i)-16)
&& clobber(x)
- -> (STMG3 [i-16] {s} p w0 w1 w2 mem)
+ => (STMG3 [i-16] {s} p w0 w1 w2 mem)
(MOVDstore [i] {s} p w3 x:(STMG3 [i-24] {s} p w0 w1 w2 mem))
&& x.Uses == 1
- && is20Bit(i-24)
+ && is20Bit(int64(i)-24)
&& clobber(x)
- -> (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
+ => (STMG4 [i-24] {s} p w0 w1 w2 w3 mem)
(STMG2 [i] {s} p w2 w3 x:(STMG2 [i-16] {s} p w0 w1 mem))
&& x.Uses == 1
- && is20Bit(i-16)
+ && is20Bit(int64(i)-16)
&& clobber(x)
- -> (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
+ => (STMG4 [i-16] {s} p w0 w1 w2 w3 mem)
// Convert 32-bit store multiples into 64-bit stores.
-(STM2 [i] {s} p (SRDconst [32] x) x mem) -> (MOVDstore [i] {s} p x mem)
+(STM2 [i] {s} p (SRDconst [32] x) x mem) => (MOVDstore [i] {s} p x mem)
diff --git a/src/cmd/compile/internal/ssa/gen/dec.rules b/src/cmd/compile/internal/ssa/gen/dec.rules
index 3fd2be409f..4c677f8418 100644
--- a/src/cmd/compile/internal/ssa/gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec.rules
@@ -66,14 +66,14 @@
(Load <typ.Int>
(OffPtr <typ.IntPtr> [2*config.PtrSize] ptr)
mem))
-(Store dst (SliceMake ptr len cap) mem) =>
+(Store {t} dst (SliceMake ptr len cap) mem) =>
(Store {typ.Int}
(OffPtr <typ.IntPtr> [2*config.PtrSize] dst)
cap
(Store {typ.Int}
(OffPtr <typ.IntPtr> [config.PtrSize] dst)
len
- (Store {typ.BytePtr} dst ptr mem)))
+ (Store {t.Elem().PtrTo()} dst ptr mem)))
// interface ops
(ITab (IMake itab _)) => itab
diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index e520503ab1..9e2e112cd7 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -1423,7 +1423,8 @@ func parseValue(val string, arch arch, loc string) (op opData, oparch, typ, auxi
func opHasAuxInt(op opData) bool {
switch op.aux {
- case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64", "SymOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant":
+ case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "Float32", "Float64",
+ "SymOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant", "CCop":
return true
}
return false
@@ -1431,7 +1432,7 @@ func opHasAuxInt(op opData) bool {
func opHasAux(op opData) bool {
switch op.aux {
- case "String", "Sym", "SymOff", "SymValAndOff", "Typ", "TypSize", "CCop",
+ case "String", "Sym", "SymOff", "SymValAndOff", "Typ", "TypSize",
"S390XCCMask", "S390XRotateParams":
return true
}
@@ -1784,8 +1785,6 @@ func (op opData) auxType() string {
return "s390x.CCMask"
case "S390XRotateParams":
return "s390x.RotateParams"
- case "CCop":
- return "CCop"
default:
return "invalid"
}