diff options
author | Jeremy Faller <jeremy@golang.org> | 2020-09-30 17:57:14 -0400 |
---|---|---|
committer | Jeremy Faller <jeremy@golang.org> | 2020-09-30 18:00:58 -0400 |
commit | 91e4d2d57bc341dd82c98247117114c851380aef (patch) | |
tree | 15a2d023cdc63543cf8a6e99f8a561c0a0459000 /src/cmd/compile/internal/ssa/gen/AMD64.rules | |
parent | c863e14a6c15e174ac0979ddd7f9530d6a4ec9cc (diff) | |
parent | 846dce9d05f19a1f53465e62a304dea21b99f910 (diff) | |
download | go-91e4d2d57bc341dd82c98247117114c851380aef.tar.gz go-91e4d2d57bc341dd82c98247117114c851380aef.zip |
[dev.link] Merge branch 'master' into dev.link
2 conflicts, that make sense.
src/cmd/internal/obj/objfile.go
src/cmd/link/internal/loader/loader.go
Change-Id: Ib224e2d248cb568fa1e888af79dd908b2f5e05ff
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen/AMD64.rules')
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 992 |
1 files changed, 497 insertions, 495 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 8898fe55eb..408678f054 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -404,35 +404,35 @@ (ConstBool [c]) => (MOVLconst [int32(b2i(c))]) // Lowering calls -(StaticCall ...) -> (CALLstatic ...) -(ClosureCall ...) -> (CALLclosure ...) -(InterCall ...) -> (CALLinter ...) +(StaticCall ...) => (CALLstatic ...) +(ClosureCall ...) => (CALLclosure ...) +(InterCall ...) => (CALLinter ...) // Lowering conditional moves // If the condition is a SETxx, we can just run a CMOV from the comparison that was // setting the flags. // Legend: HI=unsigned ABOVE, CS=unsigned BELOW, CC=unsigned ABOVE EQUAL, LS=unsigned BELOW EQUAL (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && (is64BitInt(t) || isPtr(t)) - -> (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) + => (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is32BitInt(t) - -> (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) + => (CMOVL(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) (CondSelect <t> x y (SET(EQ|NE|L|G|LE|GE|A|B|AE|BE|EQF|NEF|GF|GEF) cond)) && is16BitInt(t) - -> (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) + => (CMOVW(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS|EQF|NEF|GTF|GEF) y x cond) // If the condition does not set the flags, we need to generate a comparison. (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 1 - -> (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) + => (CondSelect <t> x y (MOVBQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 2 - -> (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) + => (CondSelect <t> x y (MOVWQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 4 - -> (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) + => (CondSelect <t> x y (MOVLQZX <typ.UInt64> check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t)) - -> (CMOVQNE y x (CMPQconst [0] check)) + => (CMOVQNE y x (CMPQconst [0] check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t) - -> (CMOVLNE y x (CMPQconst [0] check)) + => (CMOVLNE y x (CMPQconst [0] check)) (CondSelect <t> x y check) && !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t) - -> (CMOVWNE y x (CMPQconst [0] check)) + => (CMOVWNE y x (CMPQconst [0] check)) // Absorb InvertFlags (CMOVQ(EQ|NE|LT|GT|LE|GE|HI|CS|CC|LS) x y (InvertFlags cond)) @@ -465,7 +465,7 @@ (GetCallerSP ...) => (LoweredGetCallerSP ...) (HasCPUFeature {s}) => (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s}))) -(Addr ...) -> (LEAQ ...) +(Addr {sym} base) => (LEAQ {sym} base) (LocalAddr {sym} base _) => (LEAQ {sym} base) (MOVBstore [off] {sym} ptr y:(SETL x) mem) && y.Uses == 1 => (SETLstore [off] {sym} ptr x mem) @@ -501,10 +501,10 @@ (If cond yes no) => (NE (TESTB cond cond) yes no) // Atomic loads. Other than preserving their ordering with respect to other loads, nothing special here. -(AtomicLoad8 ...) -> (MOVBatomicload ...) -(AtomicLoad32 ...) -> (MOVLatomicload ...) -(AtomicLoad64 ...) -> (MOVQatomicload ...) -(AtomicLoadPtr ...) -> (MOVQatomicload ...) +(AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem) +(AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem) +(AtomicLoad64 ptr mem) => (MOVQatomicload ptr mem) +(AtomicLoadPtr ptr mem) => (MOVQatomicload ptr mem) // Atomic stores. We use XCHG to prevent the hardware reordering a subsequent load. // TODO: most runtime uses of atomic stores don't need that property. Use normal stores for those? @@ -526,15 +526,15 @@ (Select1 (AddTupleFirst64 _ tuple)) => (Select1 tuple) // Atomic compare and swap. -(AtomicCompareAndSwap32 ...) -> (CMPXCHGLlock ...) -(AtomicCompareAndSwap64 ...) -> (CMPXCHGQlock ...) +(AtomicCompareAndSwap32 ptr old new_ mem) => (CMPXCHGLlock ptr old new_ mem) +(AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem) // Atomic memory updates. -(AtomicAnd8 ...) -> (ANDBlock ...) -(AtomicOr8 ...) -> (ORBlock ...) +(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) +(AtomicOr8 ptr val mem) => (ORBlock ptr val mem) // Write barrier. -(WB ...) -> (LoweredWB ...) +(WB ...) => (LoweredWB ...) (PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem) (PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem) @@ -853,7 +853,7 @@ (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16])))) && v.Type.Size() == 2 - -> (ROLW x y) + => (ROLW x y) (ORL (SHRW x (AND(Q|L)const y [15])) (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))) && v.Type.Size() == 2 @@ -1134,268 +1134,270 @@ // We need to fold LEAQ into the MOVx ops so that the live variable analysis knows // what variables are being read/written by the ops. (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSymTyped(sym1,sym2)} base mem) (MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) -(MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> - (MOV(Q|L|W|B)storeconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) +(MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) => + (MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem) (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem) ((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem) (CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (CMP(Q|L|W|B)load [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) (CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - (CMP(Q|L|W|B)constload [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => + (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem) ((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem) ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) - && ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) -> - ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem) + && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) + && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) // fold LEAQs together -(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) +(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ [off1+off2] {mergeSymTyped(sym1,sym2)} x) // LEAQ into LEAQ1 -(LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> - (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) +(LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => + (LEAQ1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) // LEAQ1 into LEAQ -(LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) // LEAQ into LEAQ[248] -(LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> - (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) -(LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> - (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) -(LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && x.Op != OpSB -> - (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) +(LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => + (LEAQ2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) +(LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => + (LEAQ4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) +(LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB => + (LEAQ8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) // LEAQ[248] into LEAQ -(LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y) -(LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y) -(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) +(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y) // LEAQ[1248] into LEAQ[1248]. Only some such merges are possible. -(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) -(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) -(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil -> +(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ2 [off1+off2] {mergeSymTyped(sym1, sym2)} x y) +(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (LEAQ2 [off1+off2] {mergeSymTyped(sym1, sym2)} y x) +(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil => (LEAQ4 [off1+2*off2] {sym1} x y) -(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil -> +(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil => (LEAQ8 [off1+4*off2] {sym1} x y) // TODO: more? // Lower LEAQ2/4/8 when the offset is a constant -(LEAQ2 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*2) -> - (LEAQ [off+scale*2] {sym} x) -(LEAQ4 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*4) -> - (LEAQ [off+scale*4] {sym} x) -(LEAQ8 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*8) -> - (LEAQ [off+scale*8] {sym} x) +(LEAQ2 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*2) => + (LEAQ [off+int32(scale)*2] {sym} x) +(LEAQ4 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*4) => + (LEAQ [off+int32(scale)*4] {sym} x) +(LEAQ8 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(int64(off)+int64(scale)*8) => + (LEAQ [off+int32(scale)*8] {sym} x) // Absorb InvertFlags into branches. -(LT (InvertFlags cmp) yes no) -> (GT cmp yes no) -(GT (InvertFlags cmp) yes no) -> (LT cmp yes no) -(LE (InvertFlags cmp) yes no) -> (GE cmp yes no) -(GE (InvertFlags cmp) yes no) -> (LE cmp yes no) -(ULT (InvertFlags cmp) yes no) -> (UGT cmp yes no) -(UGT (InvertFlags cmp) yes no) -> (ULT cmp yes no) -(ULE (InvertFlags cmp) yes no) -> (UGE cmp yes no) -(UGE (InvertFlags cmp) yes no) -> (ULE cmp yes no) -(EQ (InvertFlags cmp) yes no) -> (EQ cmp yes no) -(NE (InvertFlags cmp) yes no) -> (NE cmp yes no) +(LT (InvertFlags cmp) yes no) => (GT cmp yes no) +(GT (InvertFlags cmp) yes no) => (LT cmp yes no) +(LE (InvertFlags cmp) yes no) => (GE cmp yes no) +(GE (InvertFlags cmp) yes no) => (LE cmp yes no) +(ULT (InvertFlags cmp) yes no) => (UGT cmp yes no) +(UGT (InvertFlags cmp) yes no) => (ULT cmp yes no) +(ULE (InvertFlags cmp) yes no) => (UGE cmp yes no) +(UGE (InvertFlags cmp) yes no) => (ULE cmp yes no) +(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no) +(NE (InvertFlags cmp) yes no) => (NE cmp yes no) // Constant comparisons. -(CMPQconst (MOVQconst [x]) [y]) && x==y -> (FlagEQ) -(CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) -(CMPQconst (MOVQconst [x]) [y]) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) -(CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) -(CMPQconst (MOVQconst [x]) [y]) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) -(CMPLconst (MOVLconst [x]) [y]) && int32(x)==int32(y) -> (FlagEQ) -(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)<uint32(y) -> (FlagLT_ULT) -(CMPLconst (MOVLconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT) -(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT) -(CMPLconst (MOVLconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT) -(CMPWconst (MOVLconst [x]) [y]) && int16(x)==int16(y) -> (FlagEQ) -(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)<uint16(y) -> (FlagLT_ULT) -(CMPWconst (MOVLconst [x]) [y]) && int16(x)<int16(y) && uint16(x)>uint16(y) -> (FlagLT_UGT) -(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)<uint16(y) -> (FlagGT_ULT) -(CMPWconst (MOVLconst [x]) [y]) && int16(x)>int16(y) && uint16(x)>uint16(y) -> (FlagGT_UGT) -(CMPBconst (MOVLconst [x]) [y]) && int8(x)==int8(y) -> (FlagEQ) -(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)<uint8(y) -> (FlagLT_ULT) -(CMPBconst (MOVLconst [x]) [y]) && int8(x)<int8(y) && uint8(x)>uint8(y) -> (FlagLT_UGT) -(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)<uint8(y) -> (FlagGT_ULT) -(CMPBconst (MOVLconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) +(CMPQconst (MOVQconst [x]) [y]) && x==int64(y) => (FlagEQ) +(CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)<uint64(int64(y)) => (FlagLT_ULT) +(CMPQconst (MOVQconst [x]) [y]) && x<int64(y) && uint64(x)>uint64(int64(y)) => (FlagLT_UGT) +(CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)<uint64(int64(y)) => (FlagGT_ULT) +(CMPQconst (MOVQconst [x]) [y]) && x>int64(y) && uint64(x)>uint64(int64(y)) => (FlagGT_UGT) +(CMPLconst (MOVLconst [x]) [y]) && x==y => (FlagEQ) +(CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)<uint32(y) => (FlagLT_ULT) +(CMPLconst (MOVLconst [x]) [y]) && x<y && uint32(x)>uint32(y) => (FlagLT_UGT) +(CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)<uint32(y) => (FlagGT_ULT) +(CMPLconst (MOVLconst [x]) [y]) && x>y && uint32(x)>uint32(y) => (FlagGT_UGT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)==y => (FlagEQ) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)<uint16(y) => (FlagLT_ULT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)<y && uint16(x)>uint16(y) => (FlagLT_UGT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)<uint16(y) => (FlagGT_ULT) +(CMPWconst (MOVLconst [x]) [y]) && int16(x)>y && uint16(x)>uint16(y) => (FlagGT_UGT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)==y => (FlagEQ) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)<uint8(y) => (FlagLT_ULT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)<y && uint8(x)>uint8(y) => (FlagLT_UGT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)<uint8(y) => (FlagGT_ULT) +(CMPBconst (MOVLconst [x]) [y]) && int8(x)>y && uint8(x)>uint8(y) => (FlagGT_UGT) // CMPQconst requires a 32 bit const, but we can still constant-fold 64 bit consts. // In theory this applies to any of the simplifications above, // but CMPQ is the only one I've actually seen occur. -(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x==y -> (FlagEQ) -(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)<uint64(y) -> (FlagLT_ULT) -(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)>uint64(y) -> (FlagLT_UGT) -(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)<uint64(y) -> (FlagGT_ULT) -(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)>uint64(y) -> (FlagGT_UGT) +(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x==y => (FlagEQ) +(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)<uint64(y) => (FlagLT_ULT) +(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x<y && uint64(x)>uint64(y) => (FlagLT_UGT) +(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)<uint64(y) => (FlagGT_ULT) +(CMPQ (MOVQconst [x]) (MOVQconst [y])) && x>y && uint64(x)>uint64(y) => (FlagGT_UGT) // Other known comparisons. -(CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) -(CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) -(CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) -(CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) -> (FlagLT_ULT) -(CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) -> (FlagLT_ULT) -(CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) -(CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) -(CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) -(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) -(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < int8(n) -> (FlagLT_ULT) +(CMPQconst (MOVBQZX _) [c]) && 0xFF < c => (FlagLT_ULT) +(CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c => (FlagLT_ULT) +(CMPLconst (SHRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 32 && (1<<uint64(32-c)) <= uint64(n) => (FlagLT_ULT) +(CMPQconst (SHRQconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 64 && (1<<uint64(64-c)) <= uint64(n) => (FlagLT_ULT) +(CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) +(CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) +(CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) +(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= m && int16(m) < n => (FlagLT_ULT) +(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= m && int8(m) < n => (FlagLT_ULT) // TESTQ c c sets flags like CMPQ c 0. -(TEST(Q|L)const [c] (MOV(Q|L)const [c])) && c == 0 -> (FlagEQ) -(TEST(Q|L)const [c] (MOV(Q|L)const [c])) && c < 0 -> (FlagLT_UGT) -(TEST(Q|L)const [c] (MOV(Q|L)const [c])) && c > 0 -> (FlagGT_UGT) +(TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c == 0 => (FlagEQ) +(TESTLconst [c] (MOVLconst [c])) && c == 0 => (FlagEQ) +(TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c < 0 => (FlagLT_UGT) +(TESTLconst [c] (MOVLconst [c])) && c < 0 => (FlagLT_UGT) +(TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c > 0 => (FlagGT_UGT) +(TESTLconst [c] (MOVLconst [c])) && c > 0 => (FlagGT_UGT) // TODO: DIVxU also. // Absorb flag constants into SBB ops. -(SBBQcarrymask (FlagEQ)) -> (MOVQconst [0]) -(SBBQcarrymask (FlagLT_ULT)) -> (MOVQconst [-1]) -(SBBQcarrymask (FlagLT_UGT)) -> (MOVQconst [0]) -(SBBQcarrymask (FlagGT_ULT)) -> (MOVQconst [-1]) -(SBBQcarrymask (FlagGT_UGT)) -> (MOVQconst [0]) -(SBBLcarrymask (FlagEQ)) -> (MOVLconst [0]) -(SBBLcarrymask (FlagLT_ULT)) -> (MOVLconst [-1]) -(SBBLcarrymask (FlagLT_UGT)) -> (MOVLconst [0]) -(SBBLcarrymask (FlagGT_ULT)) -> (MOVLconst [-1]) -(SBBLcarrymask (FlagGT_UGT)) -> (MOVLconst [0]) +(SBBQcarrymask (FlagEQ)) => (MOVQconst [0]) +(SBBQcarrymask (FlagLT_ULT)) => (MOVQconst [-1]) +(SBBQcarrymask (FlagLT_UGT)) => (MOVQconst [0]) +(SBBQcarrymask (FlagGT_ULT)) => (MOVQconst [-1]) +(SBBQcarrymask (FlagGT_UGT)) => (MOVQconst [0]) +(SBBLcarrymask (FlagEQ)) => (MOVLconst [0]) +(SBBLcarrymask (FlagLT_ULT)) => (MOVLconst [-1]) +(SBBLcarrymask (FlagLT_UGT)) => (MOVLconst [0]) +(SBBLcarrymask (FlagGT_ULT)) => (MOVLconst [-1]) +(SBBLcarrymask (FlagGT_UGT)) => (MOVLconst [0]) // Absorb flag constants into branches. -((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) -> (First yes no) -((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) -> (First no yes) -((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) -> (First yes no) -((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) -> (First no yes) -((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) -> (First yes no) -((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) -> (First no yes) -((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) -> (First yes no) -((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) -> (First no yes) -((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) -> (First yes no) -((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) -> (First no yes) +((EQ|LE|GE|ULE|UGE) (FlagEQ) yes no) => (First yes no) +((NE|LT|GT|ULT|UGT) (FlagEQ) yes no) => (First no yes) +((NE|LT|LE|ULT|ULE) (FlagLT_ULT) yes no) => (First yes no) +((EQ|GT|GE|UGT|UGE) (FlagLT_ULT) yes no) => (First no yes) +((NE|LT|LE|UGT|UGE) (FlagLT_UGT) yes no) => (First yes no) +((EQ|GT|GE|ULT|ULE) (FlagLT_UGT) yes no) => (First no yes) +((NE|GT|GE|ULT|ULE) (FlagGT_ULT) yes no) => (First yes no) +((EQ|LT|LE|UGT|UGE) (FlagGT_ULT) yes no) => (First no yes) +((NE|GT|GE|UGT|UGE) (FlagGT_UGT) yes no) => (First yes no) +((EQ|LT|LE|ULT|ULE) (FlagGT_UGT) yes no) => (First no yes) // Absorb flag constants into SETxx ops. -((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) -> (MOVLconst [1]) -((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) -> (MOVLconst [0]) -((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) -> (MOVLconst [1]) -((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) -> (MOVLconst [0]) -((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) -> (MOVLconst [1]) -((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) -> (MOVLconst [0]) -((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) -> (MOVLconst [1]) -((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) -> (MOVLconst [0]) -((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) -> (MOVLconst [1]) -((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) -> (MOVLconst [0]) - -(SETEQstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) - -(SETNEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) - -(SETLstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETLstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETLstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETLstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETLstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) - -(SETLEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) - -(SETGstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETGstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETGstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETGstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETGstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) - -(SETGEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) - -(SETBstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETBstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETBstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETBstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETBstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) - -(SETBEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) - -(SETAstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETAstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETAstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETAstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETAstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) - -(SETAEstore [off] {sym} ptr (FlagEQ) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) -(SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) -(SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) -> (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +((SETEQ|SETLE|SETGE|SETBE|SETAE) (FlagEQ)) => (MOVLconst [1]) +((SETNE|SETL|SETG|SETB|SETA) (FlagEQ)) => (MOVLconst [0]) +((SETNE|SETL|SETLE|SETB|SETBE) (FlagLT_ULT)) => (MOVLconst [1]) +((SETEQ|SETG|SETGE|SETA|SETAE) (FlagLT_ULT)) => (MOVLconst [0]) +((SETNE|SETL|SETLE|SETA|SETAE) (FlagLT_UGT)) => (MOVLconst [1]) +((SETEQ|SETG|SETGE|SETB|SETBE) (FlagLT_UGT)) => (MOVLconst [0]) +((SETNE|SETG|SETGE|SETB|SETBE) (FlagGT_ULT)) => (MOVLconst [1]) +((SETEQ|SETL|SETLE|SETA|SETAE) (FlagGT_ULT)) => (MOVLconst [0]) +((SETNE|SETG|SETGE|SETA|SETAE) (FlagGT_UGT)) => (MOVLconst [1]) +((SETEQ|SETL|SETLE|SETB|SETBE) (FlagGT_UGT)) => (MOVLconst [0]) + +(SETEQstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETEQstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETEQstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETEQstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETEQstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) + +(SETNEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETNEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETNEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETNEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETNEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) + +(SETLstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETLstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETLstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETLstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETLstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) + +(SETLEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETLEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETLEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETLEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETLEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) + +(SETGstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETGstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETGstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETGstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETGstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) + +(SETGEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETGEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETGEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETGEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETGEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) + +(SETBstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETBstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETBstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETBstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETBstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) + +(SETBEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETBEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETBEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETBEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETBEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) + +(SETAstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETAstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETAstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETAstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETAstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) + +(SETAEstore [off] {sym} ptr (FlagEQ) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETAEstore [off] {sym} ptr (FlagLT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETAEstore [off] {sym} ptr (FlagLT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) +(SETAEstore [off] {sym} ptr (FlagGT_ULT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [0]) mem) +(SETAEstore [off] {sym} ptr (FlagGT_UGT) mem) => (MOVBstore [off] {sym} ptr (MOVLconst <typ.UInt8> [1]) mem) // Remove redundant *const ops -(ADDQconst [0] x) -> x -(ADDLconst [c] x) && int32(c)==0 -> x -(SUBQconst [0] x) -> x -(SUBLconst [c] x) && int32(c) == 0 -> x -(ANDQconst [0] _) -> (MOVQconst [0]) -(ANDLconst [c] _) && int32(c)==0 -> (MOVLconst [0]) -(ANDQconst [-1] x) -> x -(ANDLconst [c] x) && int32(c)==-1 -> x -(ORQconst [0] x) -> x -(ORLconst [c] x) && int32(c)==0 -> x -(ORQconst [-1] _) -> (MOVQconst [-1]) -(ORLconst [c] _) && int32(c)==-1 -> (MOVLconst [-1]) -(XORQconst [0] x) -> x -(XORLconst [c] x) && int32(c)==0 -> x +(ADDQconst [0] x) => x +(ADDLconst [c] x) && c==0 => x +(SUBQconst [0] x) => x +(SUBLconst [c] x) && c==0 => x +(ANDQconst [0] _) => (MOVQconst [0]) +(ANDLconst [c] _) && c==0 => (MOVLconst [0]) +(ANDQconst [-1] x) => x +(ANDLconst [c] x) && c==-1 => x +(ORQconst [0] x) => x +(ORLconst [c] x) && c==0 => x +(ORQconst [-1] _) => (MOVQconst [-1]) +(ORLconst [c] _) && c==-1 => (MOVLconst [-1]) +(XORQconst [0] x) => x +(XORLconst [c] x) && c==0 => x // TODO: since we got rid of the W/B versions, we might miss // things like (ANDLconst [0x100] x) which were formerly // (ANDBconst [0] x). Probably doesn't happen very often. @@ -1404,99 +1406,99 @@ // Remove redundant ops // Not in generic rules, because they may appear after lowering e. g. Slicemask -(NEG(Q|L) (NEG(Q|L) x)) -> x -(NEG(Q|L) s:(SUB(Q|L) x y)) && s.Uses == 1 -> (SUB(Q|L) y x) +(NEG(Q|L) (NEG(Q|L) x)) => x +(NEG(Q|L) s:(SUB(Q|L) x y)) && s.Uses == 1 => (SUB(Q|L) y x) // Convert constant subtracts to constant adds -(SUBQconst [c] x) && c != -(1<<31) -> (ADDQconst [-c] x) -(SUBLconst [c] x) -> (ADDLconst [int64(int32(-c))] x) +(SUBQconst [c] x) && c != -(1<<31) => (ADDQconst [-c] x) +(SUBLconst [c] x) => (ADDLconst [-c] x) // generic constant folding // TODO: more of this -(ADDQconst [c] (MOVQconst [d])) -> (MOVQconst [c+d]) -(ADDLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c+d))]) -(ADDQconst [c] (ADDQconst [d] x)) && is32Bit(c+d) -> (ADDQconst [c+d] x) -(ADDLconst [c] (ADDLconst [d] x)) -> (ADDLconst [int64(int32(c+d))] x) -(SUBQconst (MOVQconst [d]) [c]) -> (MOVQconst [d-c]) -(SUBQconst (SUBQconst x [d]) [c]) && is32Bit(-c-d) -> (ADDQconst [-c-d] x) -(SARQconst [c] (MOVQconst [d])) -> (MOVQconst [d>>uint64(c)]) -(SARLconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int32(d))>>uint64(c)]) -(SARWconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int16(d))>>uint64(c)]) -(SARBconst [c] (MOVQconst [d])) -> (MOVQconst [int64(int8(d))>>uint64(c)]) -(NEGQ (MOVQconst [c])) -> (MOVQconst [-c]) -(NEGL (MOVLconst [c])) -> (MOVLconst [int64(int32(-c))]) -(MULQconst [c] (MOVQconst [d])) -> (MOVQconst [c*d]) -(MULLconst [c] (MOVLconst [d])) -> (MOVLconst [int64(int32(c*d))]) -(ANDQconst [c] (MOVQconst [d])) -> (MOVQconst [c&d]) -(ANDLconst [c] (MOVLconst [d])) -> (MOVLconst [c&d]) -(ORQconst [c] (MOVQconst [d])) -> (MOVQconst [c|d]) -(ORLconst [c] (MOVLconst [d])) -> (MOVLconst [c|d]) -(XORQconst [c] (MOVQconst [d])) -> (MOVQconst [c^d]) -(XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d]) -(NOTQ (MOVQconst [c])) -> (MOVQconst [^c]) -(NOTL (MOVLconst [c])) -> (MOVLconst [^c]) -(BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))]) -(BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))]) -(BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))]) -(BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))]) -(BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))]) -(BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))]) +(ADDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)+d]) +(ADDLconst [c] (MOVLconst [d])) => (MOVLconst [c+d]) +(ADDQconst [c] (ADDQconst [d] x)) && is32Bit(int64(c)+int64(d)) => (ADDQconst [c+d] x) +(ADDLconst [c] (ADDLconst [d] x)) => (ADDLconst [c+d] x) +(SUBQconst (MOVQconst [d]) [c]) => (MOVQconst [d-int64(c)]) +(SUBQconst (SUBQconst x [d]) [c]) && is32Bit(int64(-c)-int64(d)) => (ADDQconst [-c-d] x) +(SARQconst [c] (MOVQconst [d])) => (MOVQconst [d>>uint64(c)]) +(SARLconst [c] (MOVQconst [d])) => (MOVQconst [int64(int32(d))>>uint64(c)]) +(SARWconst [c] (MOVQconst [d])) => (MOVQconst [int64(int16(d))>>uint64(c)]) +(SARBconst [c] (MOVQconst [d])) => (MOVQconst [int64(int8(d))>>uint64(c)]) +(NEGQ (MOVQconst [c])) => (MOVQconst [-c]) +(NEGL (MOVLconst [c])) => (MOVLconst [-c]) +(MULQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)*d]) +(MULLconst [c] (MOVLconst [d])) => (MOVLconst [c*d]) +(ANDQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)&d]) +(ANDLconst [c] (MOVLconst [d])) => (MOVLconst [c&d]) +(ORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)|d]) +(ORLconst [c] (MOVLconst [d])) => (MOVLconst [c|d]) +(XORQconst [c] (MOVQconst [d])) => (MOVQconst [int64(c)^d]) +(XORLconst [c] (MOVLconst [d])) => (MOVLconst [c^d]) +(NOTQ (MOVQconst [c])) => (MOVQconst [^c]) +(NOTL (MOVLconst [c])) => (MOVLconst [^c]) +(BTSQconst [c] (MOVQconst [d])) => (MOVQconst [d|(1<<uint32(c))]) +(BTSLconst [c] (MOVLconst [d])) => (MOVLconst [d|(1<<uint32(c))]) +(BTRQconst [c] (MOVQconst [d])) => (MOVQconst [d&^(1<<uint32(c))]) +(BTRLconst [c] (MOVLconst [d])) => (MOVLconst [d&^(1<<uint32(c))]) +(BTCQconst [c] (MOVQconst [d])) => (MOVQconst [d^(1<<uint32(c))]) +(BTCLconst [c] (MOVLconst [d])) => (MOVLconst [d^(1<<uint32(c))]) // If c or d doesn't fit into 32 bits, then we can't construct ORQconst, // but we can still constant-fold. // In theory this applies to any of the simplifications above, // but ORQ is the only one I've actually seen occur. -(ORQ (MOVQconst [c]) (MOVQconst [d])) -> (MOVQconst [c|d]) +(ORQ (MOVQconst [c]) (MOVQconst [d])) => (MOVQconst [c|d]) // generic simplifications // TODO: more of this -(ADDQ x (NEGQ y)) -> (SUBQ x y) -(ADDL x (NEGL y)) -> (SUBL x y) -(SUBQ x x) -> (MOVQconst [0]) -(SUBL x x) -> (MOVLconst [0]) -(ANDQ x x) -> x -(ANDL x x) -> x -(ORQ x x) -> x -(ORL x x) -> x -(XORQ x x) -> (MOVQconst [0]) -(XORL x x) -> (MOVLconst [0]) - -(SHLLconst [d] (MOVLconst [c])) -> (MOVLconst [int64(int32(c)) << uint64(d)]) -(SHLQconst [d] (MOVQconst [c])) -> (MOVQconst [c << uint64(d)]) -(SHLQconst [d] (MOVLconst [c])) -> (MOVQconst [int64(int32(c)) << uint64(d)]) +(ADDQ x (NEGQ y)) => (SUBQ x y) +(ADDL x (NEGL y)) => (SUBL x y) +(SUBQ x x) => (MOVQconst [0]) +(SUBL x x) => (MOVLconst [0]) +(ANDQ x x) => x +(ANDL x x) => x +(ORQ x x) => x +(ORL x x) => x +(XORQ x x) => (MOVQconst [0]) +(XORL x x) => (MOVLconst [0]) + +(SHLLconst [d] (MOVLconst [c])) => (MOVLconst [c << uint64(d)]) +(SHLQconst [d] (MOVQconst [c])) => (MOVQconst [c << uint64(d)]) +(SHLQconst [d] (MOVLconst [c])) => (MOVQconst [int64(c) << uint64(d)]) // Fold NEG into ADDconst/MULconst. Take care to keep c in 32 bit range. -(NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) -> (ADDQconst [-c] x) -(MULQconst [c] (NEGQ x)) && c != -(1<<31) -> (MULQconst [-c] x) +(NEGQ (ADDQconst [c] (NEGQ x))) && c != -(1<<31) => (ADDQconst [-c] x) +(MULQconst [c] (NEGQ x)) && c != -(1<<31) => (MULQconst [-c] x) // checking AND against 0. -(CMPQconst a:(ANDQ x y) [0]) && a.Uses == 1 -> (TESTQ x y) -(CMPLconst a:(ANDL x y) [0]) && a.Uses == 1 -> (TESTL x y) -(CMPWconst a:(ANDL x y) [0]) && a.Uses == 1 -> (TESTW x y) -(CMPBconst a:(ANDL x y) [0]) && a.Uses == 1 -> (TESTB x y) -(CMPQconst a:(ANDQconst [c] x) [0]) && a.Uses == 1 -> (TESTQconst [c] x) -(CMPLconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 -> (TESTLconst [c] x) -(CMPWconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 -> (TESTWconst [int64(int16(c))] x) -(CMPBconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 -> (TESTBconst [int64(int8(c))] x) +(CMPQconst a:(ANDQ x y) [0]) && a.Uses == 1 => (TESTQ x y) +(CMPLconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTL x y) +(CMPWconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTW x y) +(CMPBconst a:(ANDL x y) [0]) && a.Uses == 1 => (TESTB x y) +(CMPQconst a:(ANDQconst [c] x) [0]) && a.Uses == 1 => (TESTQconst [c] x) +(CMPLconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTLconst [c] x) +(CMPWconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTWconst [int16(c)] x) +(CMPBconst a:(ANDLconst [c] x) [0]) && a.Uses == 1 => (TESTBconst [int8(c)] x) // Convert TESTx to TESTxconst if possible. -(TESTQ (MOVQconst [c]) x) && is32Bit(c) -> (TESTQconst [c] x) -(TESTL (MOVLconst [c]) x) -> (TESTLconst [c] x) -(TESTW (MOVLconst [c]) x) -> (TESTWconst [c] x) -(TESTB (MOVLconst [c]) x) -> (TESTBconst [c] x) +(TESTQ (MOVQconst [c]) x) && is32Bit(c) => (TESTQconst [int32(c)] x) +(TESTL (MOVLconst [c]) x) => (TESTLconst [c] x) +(TESTW (MOVLconst [c]) x) => (TESTWconst [int16(c)] x) +(TESTB (MOVLconst [c]) x) => (TESTBconst [int8(c)] x) // TEST %reg,%reg is shorter than CMP -(CMPQconst x [0]) -> (TESTQ x x) -(CMPLconst x [0]) -> (TESTL x x) -(CMPWconst x [0]) -> (TESTW x x) -(CMPBconst x [0]) -> (TESTB x x) -(TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst -> (TESTQ x x) -(TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTL x x) -(TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTW x x) -(TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst -> (TESTB x x) +(CMPQconst x [0]) => (TESTQ x x) +(CMPLconst x [0]) => (TESTL x x) +(CMPWconst x [0]) => (TESTW x x) +(CMPBconst x [0]) => (TESTB x x) +(TESTQconst [-1] x) && x.Op != OpAMD64MOVQconst => (TESTQ x x) +(TESTLconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTL x x) +(TESTWconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTW x x) +(TESTBconst [-1] x) && x.Op != OpAMD64MOVLconst => (TESTB x x) // Convert LEAQ1 back to ADDQ if we can -(LEAQ1 [0] x y) && v.Aux == nil -> (ADDQ x y) +(LEAQ1 [0] x y) && v.Aux == nil => (ADDQ x y) // Combining byte loads into larger (unaligned) loads. // There are many ways these combinations could occur. This is @@ -1512,7 +1514,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) + => @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) (OR(L|Q) x0:(MOVBload [i] {s} p0 mem) sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem))) @@ -1522,7 +1524,7 @@ && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + => @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) (OR(L|Q) x0:(MOVWload [i0] {s} p mem) sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem))) @@ -1532,7 +1534,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) + => @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) (OR(L|Q) x0:(MOVWload [i] {s} p0 mem) sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem))) @@ -1542,7 +1544,7 @@ && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) + => @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) @@ -1552,7 +1554,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) + => @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) (ORQ x0:(MOVLload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) @@ -1562,7 +1564,7 @@ && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) + => @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) (OR(L|Q) s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) @@ -1579,7 +1581,7 @@ && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) + => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) (OR(L|Q) s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) @@ -1596,7 +1598,7 @@ && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) + => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) @@ -1613,7 +1615,7 @@ && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) + => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) @@ -1630,7 +1632,7 @@ && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y) + => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y) // Big-endian loads @@ -1643,7 +1645,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) + => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) (OR(L|Q) x1:(MOVBload [i] {s} p1 mem) @@ -1654,7 +1656,7 @@ && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) + => @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) (OR(L|Q) r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) @@ -1667,7 +1669,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) + => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) (OR(L|Q) r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) @@ -1680,7 +1682,7 @@ && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) + => @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) @@ -1693,7 +1695,7 @@ && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) + => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) (ORQ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) @@ -1706,7 +1708,7 @@ && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem)) + => @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem)) (OR(L|Q) s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) @@ -1723,7 +1725,7 @@ && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) + => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) (OR(L|Q) s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) @@ -1740,7 +1742,7 @@ && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) + => @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) @@ -1759,7 +1761,7 @@ && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) + => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) @@ -1778,20 +1780,20 @@ && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y) + => @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y) // Combine 2 byte stores + shift into rolw 8 + word store (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) && x0.Uses == 1 && clobber(x0) - -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) + => (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) (MOVBstore [i] {s} p1 w x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) && x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0) - -> (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem) + => (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem) // Combine stores + shifts into bswap and larger (unaligned) stores (MOVBstore [i] {s} p w @@ -1802,7 +1804,7 @@ && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) - -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) + => (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) (MOVBstore [i] {s} p3 w x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) @@ -1814,7 +1816,7 @@ && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2) - -> (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem) + => (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem) (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) @@ -1832,7 +1834,7 @@ && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) + => (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) (MOVBstore [i] {s} p7 w x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) @@ -1856,114 +1858,114 @@ && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem) + => (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem) // Combine constant stores into larger (unaligned) stores. (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && a.Off() + 1 == c.Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + => (MOVWstoreconst [makeValAndOff64(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() + && a.Off() + 1 == c.Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + => (MOVWstoreconst [makeValAndOff64(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && a.Off() + 2 == c.Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + => (MOVLstoreconst [makeValAndOff64(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() + && a.Off() + 2 == c.Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + => (MOVLstoreconst [makeValAndOff64(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && a.Off() + 4 == c.Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + => (MOVQstore [a.Off32()] {s} p (MOVQconst [a.Val()&0xffffffff | c.Val()<<32]) mem) (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) && x.Uses == 1 - && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() + && a.Off() + 4 == c.Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + => (MOVQstore [a.Off32()] {s} p (MOVQconst [a.Val()&0xffffffff | c.Val()<<32]) mem) (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) && config.useSSE && x.Uses == 1 - && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() - && ValAndOff(c).Val() == 0 - && ValAndOff(c2).Val() == 0 + && c2.Off() + 8 == c.Off() + && c.Val() == 0 + && c2.Val() == 0 && clobber(x) - -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) + => (MOVOstore [c2.Off32()] {s} p (MOVOconst [0]) mem) // Combine stores into larger (unaligned) stores. Little endian. (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) && x.Uses == 1 && clobber(x) - -> (MOVWstore [i-1] {s} p w mem) + => (MOVWstore [i-1] {s} p w mem) (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 && clobber(x) - -> (MOVWstore [i] {s} p w mem) + => (MOVWstore [i] {s} p w mem) (MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 && clobber(x) - -> (MOVWstore [i-1] {s} p w0 mem) + => (MOVWstore [i-1] {s} p w0 mem) (MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstore [i] {s} p0 w mem) + => (MOVWstore [i] {s} p0 w mem) (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstore [i] {s} p0 w mem) + => (MOVWstore [i] {s} p0 w mem) (MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstore [i] {s} p0 w0 mem) + => (MOVWstore [i] {s} p0 w0 mem) (MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) && x.Uses == 1 && clobber(x) - -> (MOVLstore [i-2] {s} p w mem) + => (MOVLstore [i-2] {s} p w mem) (MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) && x.Uses == 1 && clobber(x) - -> (MOVLstore [i-2] {s} p w0 mem) + => (MOVLstore [i-2] {s} p w0 mem) (MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstore [i] {s} p0 w mem) + => (MOVLstore [i] {s} p0 w mem) (MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) - -> (MOVLstore [i] {s} p0 w0 mem) + => (MOVLstore [i] {s} p0 w0 mem) (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) && x.Uses == 1 && clobber(x) - -> (MOVQstore [i-4] {s} p w mem) + => (MOVQstore [i-4] {s} p w mem) (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 && clobber(x) - -> (MOVQstore [i-4] {s} p w0 mem) + => (MOVQstore [i-4] {s} p w0 mem) (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i] {s} p0 w mem) + => (MOVQstore [i] {s} p0 w mem) (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i] {s} p0 w0 mem) + => (MOVQstore [i] {s} p0 w0 mem) (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) @@ -1973,7 +1975,7 @@ && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2) - -> (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) + => (MOVWstore [i-1] {s} p (MOVWload [j-1] {s2} p2 mem) mem) (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) @@ -1983,7 +1985,7 @@ && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2) - -> (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) + => (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) @@ -1993,178 +1995,178 @@ && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2) - -> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) - -(MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) -(MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) -(MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) -(MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) - -(MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) -(MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) -(MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) -(MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(off1+off2) -> - (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - -(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> - (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> - (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> - (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) -(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> - (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) - -(MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem) -(MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem) -(MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem) -(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem) -(MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem) -(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem) -(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem) -(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem) -(MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> - (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) -(MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> - (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) -(MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> - (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) -(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> - (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + => (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem) + +(MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVQload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem) +(MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVLload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem) +(MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem) +(MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem) + +(MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVQstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) +(MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVLstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) +(MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) +(MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) => + (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem) + +(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) => + (MOVQstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem) +(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) => + (MOVLstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem) +(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) => + (MOVWstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem) +(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) => + (MOVBstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem) + +(MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVQload [off1+off2] {sym} ptr mem) +(MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVLload [off1+off2] {sym} ptr mem) +(MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVWload [off1+off2] {sym} ptr mem) +(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVBload [off1+off2] {sym} ptr mem) +(MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOVQstore [off1+off2] {sym} ptr val mem) +(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOVLstore [off1+off2] {sym} ptr val mem) +(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOVWstore [off1+off2] {sym} ptr val mem) +(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(int64(off1)+int64(off2)) => (MOVBstore [off1+off2] {sym} ptr val mem) +(MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) => + (MOVQstoreconst [sc.addOffset32(off)] {s} ptr mem) +(MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) => + (MOVLstoreconst [sc.addOffset32(off)] {s} ptr mem) +(MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) => + (MOVWstoreconst [sc.addOffset32(off)] {s} ptr mem) +(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && sc.canAdd32(off) => + (MOVBstoreconst [sc.addOffset32(off)] {s} ptr mem) // Merge load and op // TODO: add indexed variants? -((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) -((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) -(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) -(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> +((ADD|SUB|AND|OR|XOR)Q x l:(MOVQload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Qload x [off] {sym} ptr mem) +((ADD|SUB|AND|OR|XOR)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|AND|OR|XOR)Lload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) +(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) -(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) -(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) -> +(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) // Merge ADDQconst and LEAQ into atomic loads. -(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> +(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem) -(MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) +(MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => + (MOV(Q|L|B)atomicload [off1+off2] {mergeSymTyped(sym1, sym2)} ptr mem) // Merge ADDQconst and LEAQ into atomic stores. -(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> +(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XCHGQ [off1+off2] {sym} val ptr mem) -(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> - (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) -(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> +(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => + (XCHGQ [off1+off2] {mergeSymTyped(sym1,sym2)} val ptr mem) +(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XCHGL [off1+off2] {sym} val ptr mem) -(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) && ptr.Op != OpSB -> - (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) +(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB => + (XCHGL [off1+off2] {mergeSymTyped(sym1,sym2)} val ptr mem) // Merge ADDQconst into atomic adds. // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. -(XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> +(XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XADDQlock [off1+off2] {sym} val ptr mem) -(XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) -> +(XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (XADDLlock [off1+off2] {sym} val ptr mem) // Merge ADDQconst into atomic compare and swaps. // TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions. -(CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> +(CMPXCHGQlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => (CMPXCHGQlock [off1+off2] {sym} ptr old new_ mem) -(CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(off1+off2) -> +(CMPXCHGLlock [off1] {sym} (ADDQconst [off2] ptr) old new_ mem) && is32Bit(int64(off1)+int64(off2)) => (CMPXCHGLlock [off1+off2] {sym} ptr old new_ mem) // We don't need the conditional move if we know the arg of BSF is not zero. -(CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 -> x +(CMOVQEQ x _ (Select1 (BSFQ (ORQconst [c] _)))) && c != 0 => x // Extension is unnecessary for trailing zeros. -(BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst <t> [1<<8] x)) -(BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst <t> [1<<16] x)) +(BSFQ (ORQconst <t> [1<<8] (MOVBQZX x))) => (BSFQ (ORQconst <t> [1<<8] x)) +(BSFQ (ORQconst <t> [1<<16] (MOVWQZX x))) => (BSFQ (ORQconst <t> [1<<16] x)) // Redundant sign/zero extensions // Note: see issue 21963. We have to make sure we use the right type on // the resulting extension (the outer type, not the inner type). -(MOVLQSX (MOVLQSX x)) -> (MOVLQSX x) -(MOVLQSX (MOVWQSX x)) -> (MOVWQSX x) -(MOVLQSX (MOVBQSX x)) -> (MOVBQSX x) -(MOVWQSX (MOVWQSX x)) -> (MOVWQSX x) -(MOVWQSX (MOVBQSX x)) -> (MOVBQSX x) -(MOVBQSX (MOVBQSX x)) -> (MOVBQSX x) -(MOVLQZX (MOVLQZX x)) -> (MOVLQZX x) -(MOVLQZX (MOVWQZX x)) -> (MOVWQZX x) -(MOVLQZX (MOVBQZX x)) -> (MOVBQZX x) -(MOVWQZX (MOVWQZX x)) -> (MOVWQZX x) -(MOVWQZX (MOVBQZX x)) -> (MOVBQZX x) -(MOVBQZX (MOVBQZX x)) -> (MOVBQZX x) +(MOVLQSX (MOVLQSX x)) => (MOVLQSX x) +(MOVLQSX (MOVWQSX x)) => (MOVWQSX x) +(MOVLQSX (MOVBQSX x)) => (MOVBQSX x) +(MOVWQSX (MOVWQSX x)) => (MOVWQSX x) +(MOVWQSX (MOVBQSX x)) => (MOVBQSX x) +(MOVBQSX (MOVBQSX x)) => (MOVBQSX x) +(MOVLQZX (MOVLQZX x)) => (MOVLQZX x) +(MOVLQZX (MOVWQZX x)) => (MOVWQZX x) +(MOVLQZX (MOVBQZX x)) => (MOVBQZX x) +(MOVWQZX (MOVWQZX x)) => (MOVWQZX x) +(MOVWQZX (MOVBQZX x)) => (MOVBQZX x) +(MOVBQZX (MOVBQZX x)) => (MOVBQZX x) (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) -> - ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(int64(c),int64(off)) && clobber(l, a) => + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff32(int32(c),off)] ptr mem) (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) - && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) -> - ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem) + && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(int64(c),int64(off)) && clobber(l, a) => + ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff32(int32(c),off)] ptr mem) // float <-> int register moves, with no conversion. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. -(MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) -> (MOVQf2i val) -(MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) -> (MOVLf2i val) -(MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) -> (MOVQi2f val) -(MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) -> (MOVLi2f val) +(MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) => (MOVQf2i val) +(MOVLload [off] {sym} ptr (MOVSSstore [off] {sym} ptr val _)) => (MOVLf2i val) +(MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) => (MOVQi2f val) +(MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) => (MOVLi2f val) // Other load-like ops. -(ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ADDQ x (MOVQf2i y)) -(ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ADDL x (MOVLf2i y)) -(SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (SUBQ x (MOVQf2i y)) -(SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (SUBL x (MOVLf2i y)) -(ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (ANDQ x (MOVQf2i y)) -(ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (ANDL x (MOVLf2i y)) -( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> ( ORQ x (MOVQf2i y)) -( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> ( ORL x (MOVLf2i y)) -(XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) -> (XORQ x (MOVQf2i y)) -(XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) -> (XORL x (MOVLf2i y)) - -(ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (ADDSD x (MOVQi2f y)) -(ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (ADDSS x (MOVLi2f y)) -(SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (SUBSD x (MOVQi2f y)) -(SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (SUBSS x (MOVLi2f y)) -(MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) -> (MULSD x (MOVQi2f y)) -(MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) -> (MULSS x (MOVLi2f y)) +(ADDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ADDQ x (MOVQf2i y)) +(ADDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ADDL x (MOVLf2i y)) +(SUBQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (SUBQ x (MOVQf2i y)) +(SUBLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (SUBL x (MOVLf2i y)) +(ANDQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (ANDQ x (MOVQf2i y)) +(ANDLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (ANDL x (MOVLf2i y)) +( ORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => ( ORQ x (MOVQf2i y)) +( ORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => ( ORL x (MOVLf2i y)) +(XORQload x [off] {sym} ptr (MOVSDstore [off] {sym} ptr y _)) => (XORQ x (MOVQf2i y)) +(XORLload x [off] {sym} ptr (MOVSSstore [off] {sym} ptr y _)) => (XORL x (MOVLf2i y)) + +(ADDSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (ADDSD x (MOVQi2f y)) +(ADDSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (ADDSS x (MOVLi2f y)) +(SUBSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (SUBSD x (MOVQi2f y)) +(SUBSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (SUBSS x (MOVLi2f y)) +(MULSDload x [off] {sym} ptr (MOVQstore [off] {sym} ptr y _)) => (MULSD x (MOVQi2f y)) +(MULSSload x [off] {sym} ptr (MOVLstore [off] {sym} ptr y _)) => (MULSS x (MOVLi2f y)) // Redirect stores to use the other register set. -(MOVQstore [off] {sym} ptr (MOVQf2i val) mem) -> (MOVSDstore [off] {sym} ptr val mem) -(MOVLstore [off] {sym} ptr (MOVLf2i val) mem) -> (MOVSSstore [off] {sym} ptr val mem) -(MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) -> (MOVQstore [off] {sym} ptr val mem) -(MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) -> (MOVLstore [off] {sym} ptr val mem) +(MOVQstore [off] {sym} ptr (MOVQf2i val) mem) => (MOVSDstore [off] {sym} ptr val mem) +(MOVLstore [off] {sym} ptr (MOVLf2i val) mem) => (MOVSSstore [off] {sym} ptr val mem) +(MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) => (MOVQstore [off] {sym} ptr val mem) +(MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) => (MOVLstore [off] {sym} ptr val mem) // Load args directly into the register class where it will be used. // We do this by just modifying the type of the Arg. -(MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) -(MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) -(MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) -(MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() -> @b.Func.Entry (Arg <t> [off] {sym}) +(MOVQf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) +(MOVLf2i <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) +(MOVQi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) +(MOVLi2f <t> (Arg <u> [off] {sym})) && t.Size() == u.Size() => @b.Func.Entry (Arg <t> [off] {sym}) // LEAQ is rematerializeable, so this helps to avoid register spill. // See issue 22947 for details -(ADD(Q|L)const [off] x:(SP)) -> (LEA(Q|L) [off] x) +(ADD(Q|L)const [off] x:(SP)) => (LEA(Q|L) [off] x) // HMULx is commutative, but its first argument must go in AX. // If possible, put a rematerializeable value in the first argument slot, // to reduce the odds that another value will be have to spilled // specifically to free up AX. -(HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L) y x) -(HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() -> (HMUL(Q|L)U y x) +(HMUL(Q|L) x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L) y x) +(HMUL(Q|L)U x y) && !x.rematerializeable() && y.rematerializeable() => (HMUL(Q|L)U y x) // Fold loads into compares // Note: these may be undone by the flagalloc pass. -(CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem) -(CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) +(CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (CMP(Q|L|W|B)load {sym} [off] ptr x mem) +(CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem)) (CMP(Q|L)const l:(MOV(Q|L)load {sym} [off] ptr mem) [c]) && l.Uses == 1 @@ -2175,22 +2177,22 @@ && clobber(l) => @l.Block (CMP(W|B)constload {sym} [makeValAndOff32(int32(c),off)] ptr mem) -(CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem) -(CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem) -(CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),off) -> (CMPWconstload {sym} [makeValAndOff(int64(int16(c)),off)] ptr mem) -(CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem) +(CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,int64(off)) => (CMPQconstload {sym} [makeValAndOff64(c,int64(off))] ptr mem) +(CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(c),int64(off)) => (CMPLconstload {sym} [makeValAndOff32(c,off)] ptr mem) +(CMPWload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)),int64(off)) => (CMPWconstload {sym} [makeValAndOff32(int32(int16(c)),off)] ptr mem) +(CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),int64(off)) => (CMPBconstload {sym} [makeValAndOff32(int32(int8(c)),off)] ptr mem) (TEST(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) l2) && l == l2 && l.Uses == 2 - && validValAndOff(0,off) - && clobber(l) -> - @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem) - -(MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))]) -(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) -(MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.ctxt.Arch.ByteOrder))]) -(MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))]) -(MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) && symIsRO(srcSym) -> - (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))]) - (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))]) mem)) + && validValAndOff(0, int64(off)) + && clobber(l) => + @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff64(0, int64(off))] ptr mem) + +(MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read8(sym, int64(off)))]) +(MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) +(MOVLload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) +(MOVQload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read64(sym, int64(off), config.ctxt.Arch.ByteOrder))]) +(MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) && symIsRO(srcSym) => + (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff)+8, config.ctxt.Arch.ByteOrder))]) + (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, int64(srcOff), config.ctxt.Arch.ByteOrder))]) mem)) |