aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Randall <khr@golang.org>2020-03-27 22:03:33 -0700
committerKeith Randall <khr@golang.org>2020-03-30 17:25:29 +0000
commit33b648c0e9428c8775043db75fdff5864a64219a (patch)
tree8eea75f3d8068e06f4ae6c8b06a1be9274df025a
parent5a312288799c0a433e2061550ff92689b627e080 (diff)
downloadgo-33b648c0e9428c8775043db75fdff5864a64219a.tar.gz
go-33b648c0e9428c8775043db75fdff5864a64219a.zip
cmd/compile: fix ephemeral pointer problem on amd64
Make sure we don't use the rewrite ptr + (c + x) -> c + (ptr + x), as that may create an ephemeral out-of-bounds pointer. I have not seen an actual bug caused by this yet, but we've seen them in the 386 port so I'm fixing this issue for amd64 as well. The load-combining rules needed to be reworked somewhat to still work without the above broken rule. Update #37881 Change-Id: I8046d170e89e2035195f261535e34ca7d8aca68a Reviewed-on: https://go-review.googlesource.com/c/go/+/226437 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules420
-rw-r--r--src/cmd/compile/internal/ssa/rewrite.go45
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go2618
-rw-r--r--test/codegen/memcombine.go8
4 files changed, 1764 insertions, 1327 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index ca5962f249..b5133d6c14 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1491,65 +1491,70 @@
// Little-endian loads
-(ORL x0:(MOVBload [i0] {s} p0 mem)
- sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
+(OR(L|Q) x0:(MOVBload [i0] {s} p mem)
+ sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
+ -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
-(ORQ x0:(MOVBload [i0] {s} p0 mem)
- sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
- && i1 == i0+1
+(OR(L|Q) x0:(MOVBload [i] {s} p0 mem)
+ sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
+ -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
-(ORL x0:(MOVWload [i0] {s} p0 mem)
- sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
+(OR(L|Q) x0:(MOVWload [i0] {s} p mem)
+ sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem)))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
+ -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
-(ORQ x0:(MOVWload [i0] {s} p0 mem)
- sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
- && i1 == i0+2
+(OR(L|Q) x0:(MOVWload [i] {s} p0 mem)
+ sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 2)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
+ -> @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
-(ORQ x0:(MOVLload [i0] {s} p0 mem)
- sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
+(ORQ x0:(MOVLload [i0] {s} p mem)
+ sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
+ -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
-(ORL
- s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem))
- or:(ORL
- s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem))
+(ORQ x0:(MOVLload [i] {s} p0 mem)
+ sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem)))
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && sh.Uses == 1
+ && sequentialAddresses(p0, p1, 4)
+ && mergePoint(b,x0,x1) != nil
+ && clobber(x0, x1, sh)
+ -> @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem)
+
+(OR(L|Q)
+ s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
+ or:(OR(L|Q)
+ s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
y))
&& i1 == i0+1
&& j1 == j0+8
@@ -1559,17 +1564,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
+ -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
-(ORQ
- s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem))
- or:(ORQ
- s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem))
+(OR(L|Q)
+ s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
+ or:(OR(L|Q)
+ s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
y))
- && i1 == i0+1
&& j1 == j0+8
&& j0 % 16 == 0
&& x0.Uses == 1
@@ -1577,15 +1580,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
+ -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
(ORQ
- s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem))
+ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem))
or:(ORQ
- s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem))
+ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem))
y))
&& i1 == i0+2
&& j1 == j0+16
@@ -1595,106 +1598,107 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
-
-// Little-endian indexed loads
-
-// Move constants offsets from LEAQx up into load. This lets the above combining
-// rules discover indexed load-combining instances.
-//TODO:remove! These rules are bad.
-(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
-(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
-(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
-(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
-
-(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
-(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
-(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
-(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1)
--> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
+ -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
+
+(ORQ
+ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem))
+ or:(ORQ
+ s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem))
+ y))
+ && j1 == j0+16
+ && j0 % 32 == 0
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && s0.Uses == 1
+ && s1.Uses == 1
+ && or.Uses == 1
+ && sequentialAddresses(p0, p1, 2)
+ && mergePoint(b,x0,x1,y) != nil
+ && clobber(x0, x1, s0, s1, or)
+ -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y)
// Big-endian loads
-(ORL
- x1:(MOVBload [i1] {s} p0 mem)
- sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem)))
+(OR(L|Q)
+ x1:(MOVBload [i1] {s} p mem)
+ sh:(SHL(L|Q)const [8] x0:(MOVBload [i0] {s} p mem)))
&& i1 == i0+1
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
+ -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
-(ORQ
- x1:(MOVBload [i1] {s} p0 mem)
- sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem)))
- && i1 == i0+1
+(OR(L|Q)
+ x1:(MOVBload [i] {s} p1 mem)
+ sh:(SHL(L|Q)const [8] x0:(MOVBload [i] {s} p0 mem)))
&& x0.Uses == 1
&& x1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, sh)
- -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
+ -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
-(ORL
- r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
- sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
+(OR(L|Q)
+ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))
+ sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
&& i1 == i0+2
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
+ -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
-(ORQ
- r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem))
- sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
- && i1 == i0+2
+(OR(L|Q)
+ r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))
+ sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 2)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
+ -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
(ORQ
- r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem))
- sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
+ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem))
+ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
&& i1 == i0+4
&& x0.Uses == 1
&& x1.Uses == 1
&& r0.Uses == 1
&& r1.Uses == 1
&& sh.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1) != nil
&& clobber(x0, x1, r0, r1, sh)
- -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
+ -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
+
+(ORQ
+ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem))
+ sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem))))
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && r0.Uses == 1
+ && r1.Uses == 1
+ && sh.Uses == 1
+ && sequentialAddresses(p0, p1, 4)
+ && mergePoint(b,x0,x1) != nil
+ && clobber(x0, x1, r0, r1, sh)
+ -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem))
-(ORL
- s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem))
- or:(ORL
- s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem))
+(OR(L|Q)
+ s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem))
+ or:(OR(L|Q)
+ s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem))
y))
&& i1 == i0+1
&& j1 == j0-8
@@ -1704,17 +1708,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
+ -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
-(ORQ
- s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem))
- or:(ORQ
- s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem))
+(OR(L|Q)
+ s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem))
+ or:(OR(L|Q)
+ s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem))
y))
- && i1 == i0+1
&& j1 == j0-8
&& j1 % 16 == 0
&& x0.Uses == 1
@@ -1722,15 +1724,15 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
+ -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
(ORQ
- s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem)))
+ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))
or:(ORQ
- s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem)))
+ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)))
y))
&& i1 == i0+2
&& j1 == j0-16
@@ -1742,41 +1744,90 @@
&& s0.Uses == 1
&& s1.Uses == 1
&& or.Uses == 1
- && same(p0, p1, 1)
&& mergePoint(b,x0,x1,y) != nil
&& clobber(x0, x1, r0, r1, s0, s1, or)
- -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
+ -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
+
+(ORQ
+ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))
+ or:(ORQ
+ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)))
+ y))
+ && j1 == j0-16
+ && j1 % 32 == 0
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && r0.Uses == 1
+ && r1.Uses == 1
+ && s0.Uses == 1
+ && s1.Uses == 1
+ && or.Uses == 1
+ && sequentialAddresses(p0, p1, 2)
+ && mergePoint(b,x0,x1,y) != nil
+ && clobber(x0, x1, r0, r1, s0, s1, or)
+ -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y)
// Combine 2 byte stores + shift into rolw 8 + word store
+(MOVBstore [i] {s} p w
+ x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+ && x0.Uses == 1
+ && clobber(x0)
+ -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
(MOVBstore [i] {s} p1 w
- x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
+ x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem))
&& x0.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& clobber(x0)
- -> (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
+ -> (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem)
// Combine stores + shifts into bswap and larger (unaligned) stores
+(MOVBstore [i] {s} p w
+ x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w)
+ x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w)
+ x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && x2.Uses == 1
+ && clobber(x0, x1, x2)
+ -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
(MOVBstore [i] {s} p3 w
- x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w)
- x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w)
- x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
+ x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w)
+ x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w)
+ x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
- && same(p0, p1, 1)
- && same(p1, p2, 1)
- && same(p2, p3, 1)
+ && sequentialAddresses(p0, p1, 1)
+ && sequentialAddresses(p1, p2, 1)
+ && sequentialAddresses(p2, p3, 1)
&& clobber(x0, x1, x2)
- -> (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
-
+ -> (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem)
+
+(MOVBstore [i] {s} p w
+ x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w)
+ x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w)
+ x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w)
+ x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w)
+ x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w)
+ x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w)
+ x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+ && x0.Uses == 1
+ && x1.Uses == 1
+ && x2.Uses == 1
+ && x3.Uses == 1
+ && x4.Uses == 1
+ && x5.Uses == 1
+ && x6.Uses == 1
+ && clobber(x0, x1, x2, x3, x4, x5, x6)
+ -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
(MOVBstore [i] {s} p7 w
- x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w)
- x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w)
- x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w)
- x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w)
- x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w)
- x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w)
- x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
+ x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w)
+ x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w)
+ x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w)
+ x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w)
+ x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w)
+ x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w)
+ x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@@ -1784,99 +1835,122 @@
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
- && same(p0, p1, 1)
- && same(p1, p2, 1)
- && same(p2, p3, 1)
- && same(p3, p4, 1)
- && same(p4, p5, 1)
- && same(p5, p6, 1)
- && same(p6, p7, 1)
+ && sequentialAddresses(p0, p1, 1)
+ && sequentialAddresses(p1, p2, 1)
+ && sequentialAddresses(p2, p3, 1)
+ && sequentialAddresses(p3, p4, 1)
+ && sequentialAddresses(p4, p5, 1)
+ && sequentialAddresses(p5, p6, 1)
+ && sequentialAddresses(p6, p7, 1)
&& clobber(x0, x1, x2, x3, x4, x5, x6)
- -> (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
+ -> (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem)
// Combine constant stores into larger (unaligned) stores.
-(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
+(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
-(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
+ -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 1 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
-(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
+ -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
+(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
-(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
+ -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
-(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
+ -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
+(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
+ -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(a).Off() + 4 == ValAndOff(c).Off()
&& clobber(x)
- -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
-(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
+ -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
&& config.useSSE
&& x.Uses == 1
- && same(p0, p1, 1)
&& ValAndOff(c2).Off() + 8 == ValAndOff(c).Off()
&& ValAndOff(c).Val() == 0
&& ValAndOff(c2).Val() == 0
&& clobber(x)
- -> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
+ -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
-// Combine stores into larger (unaligned) stores.
-(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
+// Combine stores into larger (unaligned) stores. Little endian.
+(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
+ && x.Uses == 1
+ && clobber(x)
+ -> (MOVWstore [i-1] {s} p w mem)
+(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& clobber(x)
- -> (MOVWstore [i-1] {s} p0 w mem)
-(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem))
+ -> (MOVWstore [i] {s} p w mem)
+(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem))
+ && x.Uses == 1
+ && clobber(x)
+ -> (MOVWstore [i-1] {s} p w0 mem)
+(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem))
+ && x.Uses == 1
+ && sequentialAddresses(p0, p1, 1)
+ && clobber(x)
+ -> (MOVWstore [i] {s} p0 w mem)
+(MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem))
&& x.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& clobber(x)
-> (MOVWstore [i] {s} p0 w mem)
-(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
+(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem))
&& x.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 1)
&& clobber(x)
- -> (MOVWstore [i-1] {s} p0 w0 mem)
-(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
+ -> (MOVWstore [i] {s} p0 w0 mem)
+
+(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem))
+ && x.Uses == 1
+ && clobber(x)
+ -> (MOVLstore [i-2] {s} p w mem)
+(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem))
+ && x.Uses == 1
+ && clobber(x)
+ -> (MOVLstore [i-2] {s} p w0 mem)
+(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem))
+ && x.Uses == 1
+ && sequentialAddresses(p0, p1, 2)
+ && clobber(x)
+ -> (MOVLstore [i] {s} p0 w mem)
+(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
+ && x.Uses == 1
+ && sequentialAddresses(p0, p1, 2)
+ && clobber(x)
+ -> (MOVLstore [i] {s} p0 w0 mem)
+
+(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& clobber(x)
- -> (MOVLstore [i-2] {s} p0 w mem)
-(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem))
+ -> (MOVQstore [i-4] {s} p w mem)
+(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
&& x.Uses == 1
- && same(p0, p1, 1)
&& clobber(x)
- -> (MOVLstore [i-2] {s} p0 w0 mem)
-(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
+ -> (MOVQstore [i-4] {s} p w0 mem)
+(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem))
&& x.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 4)
&& clobber(x)
- -> (MOVQstore [i-4] {s} p0 w mem)
-(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
+ -> (MOVQstore [i] {s} p0 w mem)
+(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem))
&& x.Uses == 1
- && same(p0, p1, 1)
+ && sequentialAddresses(p0, p1, 4)
&& clobber(x)
- -> (MOVQstore [i-4] {s} p0 w0 mem)
+ -> (MOVQstore [i] {s} p0 w0 mem)
(MOVBstore [i] {s} p
x1:(MOVBload [j] {s2} p2 mem)
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index fc03f0d72c..878b15eeee 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -1260,46 +1260,15 @@ func sequentialAddresses(x, y *Value, n int64) bool {
x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
- return false
-}
-
-// same reports whether x and y are the same value.
-// It checks to a maximum depth of d, so it may report
-// a false negative.
-// TODO: remove when amd64 port is switched to using sequentialAddresses
-func same(x, y *Value, depth int) bool {
- if x == y {
+ if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil &&
+ (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
+ x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
}
- if depth <= 0 {
- return false
- }
- if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt {
- return false
- }
- if len(x.Args) != len(y.Args) {
- return false
- }
- if opcodeTable[x.Op].commutative {
- // Check exchanged ordering first.
- for i, a := range x.Args {
- j := i
- if j < 2 {
- j ^= 1
- }
- b := y.Args[j]
- if !same(a, b, depth-1) {
- goto checkNormalOrder
- }
- }
+ if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux &&
+ (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] ||
+ x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) {
return true
- checkNormalOrder:
}
- for i, a := range x.Args {
- b := y.Args[i]
- if !same(a, b, depth-1) {
- return false
- }
- }
- return true
+ return false
}
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index b9a401cca9..e4d86485d4 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -10140,7 +10140,6 @@ func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool {
func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
- b := v.Block
// match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _))
// cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)
// result: (MOVBQZX x)
@@ -10205,118 +10204,6 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool {
v.AddArg2(base, mem)
return true
}
- // match: (MOVBload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVBload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVBload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVBload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVBload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVBload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVBload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVBload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
// match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
// cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
// result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -10722,159 +10609,124 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
v.AddArg3(base, val, mem)
return true
}
- // match: (MOVBstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem))
+ // cond: x0.Uses == 1 && clobber(x0)
+ // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ w := v_1
+ x0 := v_2
+ if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ mem := x0.Args[2]
+ if p != x0.Args[0] {
break
}
- v.reset(OpAMD64MOVBstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ x0_1 := x0.Args[1]
+ if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i - 1
+ v.Aux = s
+ v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type)
+ v0.AuxInt = 8
+ v0.AddArg(w)
+ v.AddArg3(p, v0, mem)
return true
}
- // match: (MOVBstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem))
+ // cond: x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)
+ // result: (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
+ i := v.AuxInt
+ s := v.Aux
+ p1 := v_0
+ w := v_1
+ x0 := v_2
+ if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ mem := x0.Args[2]
+ p0 := x0.Args[0]
+ x0_1 := x0.Args[1]
+ if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)) {
break
}
- v.reset(OpAMD64MOVBstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i
+ v.Aux = s
+ v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type)
+ v0.AuxInt = 8
+ v0.AddArg(w)
+ v.AddArg3(p0, v0, mem)
return true
}
- // match: (MOVBstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)
+ // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ w := v_1
+ x2 := v_2
+ if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ _ = x2.Args[2]
+ if p != x2.Args[0] {
break
}
- v.reset(OpAMD64MOVBstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVBstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVBstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
+ x2_1 := x2.Args[1]
+ if x2_1.Op != OpAMD64SHRLconst || x2_1.AuxInt != 8 || w != x2_1.Args[0] {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ x1 := x2.Args[2]
+ if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s {
break
}
- v.reset(OpAMD64MOVBstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem))
- // cond: x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)
- // result: (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem)
- for {
- i := v.AuxInt
- s := v.Aux
- p1 := v_0
- w := v_1
- x0 := v_2
- if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s {
+ _ = x1.Args[2]
+ if p != x1.Args[0] {
+ break
+ }
+ x1_1 := x1.Args[1]
+ if x1_1.Op != OpAMD64SHRLconst || x1_1.AuxInt != 16 || w != x1_1.Args[0] {
+ break
+ }
+ x0 := x1.Args[2]
+ if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s {
break
}
mem := x0.Args[2]
- p0 := x0.Args[0]
+ if p != x0.Args[0] {
+ break
+ }
x0_1 := x0.Args[1]
- if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)) {
+ if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) {
break
}
- v.reset(OpAMD64MOVWstore)
- v.AuxInt = i - 1
+ v.reset(OpAMD64MOVLstore)
+ v.AuxInt = i - 3
v.Aux = s
- v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type)
- v0.AuxInt = 8
+ v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type)
v0.AddArg(w)
- v.AddArg3(p0, v0, mem)
+ v.AddArg3(p, v0, mem)
return true
}
- // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem))))
- // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)
- // result: (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem)
+ // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)
+ // result: (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
p3 := v_0
w := v_1
x2 := v_2
- if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s {
+ if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s {
break
}
_ = x2.Args[2]
@@ -10884,7 +10736,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
x1 := x2.Args[2]
- if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s {
+ if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s {
break
}
_ = x1.Args[2]
@@ -10894,37 +10746,39 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
x0 := x1.Args[2]
- if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s {
+ if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s {
break
}
mem := x0.Args[2]
p0 := x0.Args[0]
x0_1 := x0.Args[1]
- if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)) {
+ if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)) {
break
}
v.reset(OpAMD64MOVLstore)
- v.AuxInt = i - 3
+ v.AuxInt = i
v.Aux = s
v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type)
v0.AddArg(w)
v.AddArg3(p0, v0, mem)
return true
}
- // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem))))))))
- // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)
- // result: (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem)
+ // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem))))))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)
+ // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem)
for {
i := v.AuxInt
s := v.Aux
- p7 := v_0
+ p := v_0
w := v_1
x6 := v_2
if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i-1 || x6.Aux != s {
break
}
_ = x6.Args[2]
- p6 := x6.Args[0]
+ if p != x6.Args[0] {
+ break
+ }
x6_1 := x6.Args[1]
if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] {
break
@@ -10934,7 +10788,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
_ = x5.Args[2]
- p5 := x5.Args[0]
+ if p != x5.Args[0] {
+ break
+ }
x5_1 := x5.Args[1]
if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] {
break
@@ -10944,7 +10800,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
_ = x4.Args[2]
- p4 := x4.Args[0]
+ if p != x4.Args[0] {
+ break
+ }
x4_1 := x4.Args[1]
if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] {
break
@@ -10954,7 +10812,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
_ = x3.Args[2]
- p3 := x3.Args[0]
+ if p != x3.Args[0] {
+ break
+ }
x3_1 := x3.Args[1]
if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] {
break
@@ -10964,7 +10824,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
_ = x2.Args[2]
- p2 := x2.Args[0]
+ if p != x2.Args[0] {
+ break
+ }
x2_1 := x2.Args[1]
if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] {
break
@@ -10974,7 +10836,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
_ = x1.Args[2]
- p1 := x1.Args[0]
+ if p != x1.Args[0] {
+ break
+ }
x1_1 := x1.Args[1]
if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] {
break
@@ -10984,9 +10848,11 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
mem := x0.Args[2]
- p0 := x0.Args[0]
+ if p != x0.Args[0] {
+ break
+ }
x0_1 := x0.Args[1]
- if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) {
+ if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) {
break
}
v.reset(OpAMD64MOVQstore)
@@ -10994,16 +10860,102 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
v.Aux = s
v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type)
v0.AddArg(w)
+ v.AddArg3(p, v0, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem))))))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)
+ // result: (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p7 := v_0
+ w := v_1
+ x6 := v_2
+ if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i || x6.Aux != s {
+ break
+ }
+ _ = x6.Args[2]
+ p6 := x6.Args[0]
+ x6_1 := x6.Args[1]
+ if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] {
+ break
+ }
+ x5 := x6.Args[2]
+ if x5.Op != OpAMD64MOVBstore || x5.AuxInt != i || x5.Aux != s {
+ break
+ }
+ _ = x5.Args[2]
+ p5 := x5.Args[0]
+ x5_1 := x5.Args[1]
+ if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] {
+ break
+ }
+ x4 := x5.Args[2]
+ if x4.Op != OpAMD64MOVBstore || x4.AuxInt != i || x4.Aux != s {
+ break
+ }
+ _ = x4.Args[2]
+ p4 := x4.Args[0]
+ x4_1 := x4.Args[1]
+ if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] {
+ break
+ }
+ x3 := x4.Args[2]
+ if x3.Op != OpAMD64MOVBstore || x3.AuxInt != i || x3.Aux != s {
+ break
+ }
+ _ = x3.Args[2]
+ p3 := x3.Args[0]
+ x3_1 := x3.Args[1]
+ if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] {
+ break
+ }
+ x2 := x3.Args[2]
+ if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s {
+ break
+ }
+ _ = x2.Args[2]
+ p2 := x2.Args[0]
+ x2_1 := x2.Args[1]
+ if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] {
+ break
+ }
+ x1 := x2.Args[2]
+ if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s {
+ break
+ }
+ _ = x1.Args[2]
+ p1 := x1.Args[0]
+ x1_1 := x1.Args[1]
+ if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] {
+ break
+ }
+ x0 := x1.Args[2]
+ if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s {
+ break
+ }
+ mem := x0.Args[2]
+ p0 := x0.Args[0]
+ x0_1 := x0.Args[1]
+ if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) {
+ break
+ }
+ v.reset(OpAMD64MOVQstore)
+ v.AuxInt = i
+ v.Aux = s
+ v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type)
+ v0.AddArg(w)
v.AddArg3(p0, v0, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVWstore [i-1] {s} p0 w mem)
+ // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i-1] {s} p w mem)
for {
i := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 {
break
}
@@ -11013,19 +10965,232 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
break
}
mem := x.Args[2]
- p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i - 1
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i-1] {s} p w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 {
+ break
+ }
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i - 1
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i-1] {s} p w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 {
+ break
+ }
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i - 1
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i] {s} p w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ w := v_1
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ x_1 := x.Args[1]
+ if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i] {s} p w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ w := v_1
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ x_1 := x.Args[1]
+ if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i] {s} p w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ w := v_1
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ x_1 := x.Args[1]
+ if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i
+ v.Aux = s
+ v.AddArg3(p, w, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i-1] {s} p w0 mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRLconst {
+ break
+ }
+ j := v_1.AuxInt
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ w0 := x.Args[1]
+ if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
v.AuxInt = i - 1
v.Aux = s
+ v.AddArg3(p, w0, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVWstore [i-1] {s} p w0 mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst {
+ break
+ }
+ j := v_1.AuxInt
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ w0 := x.Args[1]
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i - 1
+ v.Aux = s
+ v.AddArg3(p, w0, mem)
+ return true
+ }
+ // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+ // result: (MOVWstore [i] {s} p0 w mem)
+ for {
+ i := v.AuxInt
+ s := v.Aux
+ p1 := v_0
+ if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 {
+ break
+ }
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
+ break
+ }
+ mem := x.Args[2]
+ p0 := x.Args[0]
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVWstore)
+ v.AuxInt = i
+ v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVWstore [i-1] {s} p0 w mem)
+ // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+ // result: (MOVWstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
@@ -11035,23 +11200,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
}
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
- v.AuxInt = i - 1
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVWstore [i-1] {s} p0 w mem)
+ // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+ // result: (MOVWstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
@@ -11061,36 +11226,36 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
}
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
- v.AuxInt = i - 1
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRWconst [8] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+ // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRWconst [8] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
// result: (MOVWstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
- p1 := v_0
+ p0 := v_0
w := v_1
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
- p0 := x.Args[0]
+ p1 := x.Args[0]
x_1 := x.Args[1]
- if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
@@ -11099,22 +11264,22 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRLconst [8] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+ // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRLconst [8] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
// result: (MOVWstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
- p1 := v_0
+ p0 := v_0
w := v_1
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
- p0 := x.Args[0]
+ p1 := x.Args[0]
x_1 := x.Args[1]
- if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
@@ -11123,22 +11288,22 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRQconst [8] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
+ // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRQconst [8] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
// result: (MOVWstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
- p1 := v_0
+ p0 := v_0
w := v_1
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
- p0 := x.Args[0]
+ p1 := x.Args[0]
x_1 := x.Args[1]
- if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
@@ -11147,9 +11312,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVWstore [i-1] {s} p0 w0 mem)
+ // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+ // result: (MOVWstore [i] {s} p0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
@@ -11160,24 +11325,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
j := v_1.AuxInt
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
w0 := x.Args[1]
- if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
- v.AuxInt = i - 1
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w0, mem)
return true
}
- // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRQconst [j-8] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVWstore [i-1] {s} p0 w0 mem)
+ // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRQconst [j-8] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)
+ // result: (MOVWstore [i] {s} p0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
@@ -11188,17 +11353,17 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool {
j := v_1.AuxInt
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s {
+ if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
w0 := x.Args[1]
- if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstore)
- v.AuxInt = i - 1
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w0, mem)
return true
@@ -11337,13 +11502,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
- // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+ // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
for {
c := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVBstoreconst {
break
@@ -11353,23 +11518,22 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstoreconst)
v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
v.Aux = s
- v.AddArg2(p0, mem)
+ v.AddArg2(p, mem)
return true
}
- // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem)
+ // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem)
for {
a := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVBstoreconst {
break
@@ -11379,14 +11543,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVWstoreconst)
v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off())
v.Aux = s
- v.AddArg2(p0, mem)
+ v.AddArg2(p, mem)
return true
}
// match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -11860,118 +12023,6 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool {
v.AddArg2(base, mem)
return true
}
- // match: (MOVLload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVLload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVLload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVLload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVLload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVLload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVLload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVLload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
// match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
// cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
// result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -12174,125 +12225,64 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
v.AddArg3(base, val, mem)
return true
}
- // match: (MOVLstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVQstore [i-4] {s} p w mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 32 {
break
}
- v.reset(OpAMD64MOVLstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVLstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ mem := x.Args[2]
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
break
}
- v.reset(OpAMD64MOVLstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ v.reset(OpAMD64MOVQstore)
+ v.AuxInt = i - 4
+ v.Aux = s
+ v.AddArg3(p, w, mem)
return true
}
- // match: (MOVLstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVQstore [i-4] {s} p w0 mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ j := v_1.AuxInt
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s {
break
}
- v.reset(OpAMD64MOVLstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVLstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVLstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
+ mem := x.Args[2]
+ if p != x.Args[0] {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ w0 := x.Args[1]
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
break
}
- v.reset(OpAMD64MOVLstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ v.reset(OpAMD64MOVQstore)
+ v.AuxInt = i - 4
+ v.Aux = s
+ v.AddArg3(p, w0, mem)
return true
}
- // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVQstore [i-4] {s} p0 w mem)
+ // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)
+ // result: (MOVQstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
@@ -12302,23 +12292,23 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
}
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s {
+ if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) {
break
}
v.reset(OpAMD64MOVQstore)
- v.AuxInt = i - 4
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVQstore [i-4] {s} p0 w0 mem)
+ // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)
+ // result: (MOVQstore [i] {s} p0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
@@ -12329,17 +12319,17 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool {
j := v_1.AuxInt
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s {
+ if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
w0 := x.Args[1]
- if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) {
break
}
v.reset(OpAMD64MOVQstore)
- v.AuxInt = i - 4
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w0, mem)
return true
@@ -13018,13 +13008,13 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
- // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+ // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
for {
c := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVLstoreconst {
break
@@ -13034,8 +13024,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVQstore)
@@ -13043,16 +13032,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
v.Aux = s
v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
- v.AddArg3(p0, v0, mem)
+ v.AddArg3(p, v0, mem)
return true
}
- // match: (MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
+ // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem)
for {
a := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVLstoreconst {
break
@@ -13062,8 +13051,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVQstore)
@@ -13071,7 +13059,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool {
v.Aux = s
v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64)
v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32
- v.AddArg3(p0, v0, mem)
+ v.AddArg3(p, v0, mem)
return true
}
// match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -13423,118 +13411,6 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool {
v.AddArg2(base, mem)
return true
}
- // match: (MOVQload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVQload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVQload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVQload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
// match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
// cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
// result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -13614,7 +13490,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
- b := v.Block
// match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem)
// cond: is32Bit(off1+off2)
// result: (MOVQstore [off1+off2] {sym} ptr val mem)
@@ -13681,122 +13556,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool {
v.AddArg3(base, val, mem)
return true
}
- // match: (MOVQstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVQstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVQstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
- // match: (MOVQstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVQstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVQstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
- return true
- }
// match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
// cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
// result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
@@ -14430,13 +14189,13 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
- // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem))
- // cond: config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
- // result: (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem)
+ // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem))
+ // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)
+ // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem)
for {
c := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVQstoreconst {
break
@@ -14446,8 +14205,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
+ if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) {
break
}
v.reset(OpAMD64MOVOstore)
@@ -14455,7 +14213,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool {
v.Aux = s
v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128)
v0.AuxInt = 0
- v.AddArg3(p0, v0, mem)
+ v.AddArg3(p, v0, mem)
return true
}
// match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -15119,118 +14877,6 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool {
v.AddArg2(base, mem)
return true
}
- // match: (MOVWload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVWload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVWload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVWload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVWload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVWload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
- // match: (MOVWload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem)
- for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
- break
- }
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- mem := v_1
- if !(i1 != 0 && is32Bit(i0+i1)) {
- break
- }
- v.reset(OpAMD64MOVWload)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg2(v0, mem)
- return true
- }
// match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem)
// cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2)
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@@ -15416,125 +15062,119 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
v.AddArg3(base, val, mem)
return true
}
- // match: (MOVWstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVLstore [i-2] {s} p w mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ1 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
break
}
- v.reset(OpAMD64MOVWstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ mem := x.Args[2]
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVLstore)
+ v.AuxInt = i - 2
+ v.Aux = s
+ v.AddArg3(p, w, mem)
return true
}
- // match: (MOVWstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVLstore [i-2] {s} p w mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ2 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
break
}
- v.reset(OpAMD64MOVWstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ mem := x.Args[2]
+ if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVLstore)
+ v.AuxInt = i - 2
+ v.Aux = s
+ v.AddArg3(p, w, mem)
return true
}
- // match: (MOVWstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVLstore [i-2] {s} p w0 mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ4 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRLconst {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ j := v_1.AuxInt
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
break
}
- v.reset(OpAMD64MOVWstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ w0 := x.Args[1]
+ if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVLstore)
+ v.AuxInt = i - 2
+ v.Aux = s
+ v.AddArg3(p, w0, mem)
return true
}
- // match: (MOVWstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem)
- // cond: i1 != 0 && is32Bit(i0+i1)
- // result: (MOVWstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem)
+ // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem))
+ // cond: x.Uses == 1 && clobber(x)
+ // result: (MOVLstore [i-2] {s} p w0 mem)
for {
- i0 := v.AuxInt
- s0 := v.Aux
- l := v_0
- if l.Op != OpAMD64LEAQ8 {
+ i := v.AuxInt
+ s := v.Aux
+ p := v_0
+ if v_1.Op != OpAMD64SHRQconst {
break
}
- i1 := l.AuxInt
- s1 := l.Aux
- y := l.Args[1]
- x := l.Args[0]
- val := v_1
- mem := v_2
- if !(i1 != 0 && is32Bit(i0+i1)) {
+ j := v_1.AuxInt
+ w := v_1.Args[0]
+ x := v_2
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
break
}
- v.reset(OpAMD64MOVWstore)
- v.AuxInt = i0 + i1
- v.Aux = s0
- v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type)
- v0.AuxInt = 0
- v0.Aux = s1
- v0.AddArg2(x, y)
- v.AddArg3(v0, val, mem)
+ mem := x.Args[2]
+ if p != x.Args[0] {
+ break
+ }
+ w0 := x.Args[1]
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) {
+ break
+ }
+ v.reset(OpAMD64MOVLstore)
+ v.AuxInt = i - 2
+ v.Aux = s
+ v.AddArg3(p, w0, mem)
return true
}
- // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVLstore [i-2] {s} p0 w mem)
+ // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+ // result: (MOVLstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
@@ -15544,23 +15184,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
}
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
- v.AuxInt = i - 2
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVLstore [i-2] {s} p0 w mem)
+ // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i] {s} p0 w mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+ // result: (MOVLstore [i] {s} p0 w mem)
for {
i := v.AuxInt
s := v.Aux
@@ -15570,23 +15210,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
}
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
- if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
- v.AuxInt = i - 2
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w, mem)
return true
}
- // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVLstore [i-2] {s} p0 w0 mem)
+ // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+ // result: (MOVLstore [i] {s} p0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
@@ -15597,24 +15237,24 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
j := v_1.AuxInt
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
w0 := x.Args[1]
- if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
- v.AuxInt = i - 2
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w0, mem)
return true
}
- // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRQconst [j-16] w) mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x)
- // result: (MOVLstore [i-2] {s} p0 w0 mem)
+ // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRQconst [j-16] w) mem))
+ // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)
+ // result: (MOVLstore [i] {s} p0 w0 mem)
for {
i := v.AuxInt
s := v.Aux
@@ -15625,17 +15265,17 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool {
j := v_1.AuxInt
w := v_1.Args[0]
x := v_2
- if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s {
+ if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s {
break
}
mem := x.Args[2]
p0 := x.Args[0]
w0 := x.Args[1]
- if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) {
+ if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstore)
- v.AuxInt = i - 2
+ v.AuxInt = i
v.Aux = s
v.AddArg3(p0, w0, mem)
return true
@@ -15774,13 +15414,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
v.AddArg2(ptr, mem)
return true
}
- // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+ // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
for {
c := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVWstoreconst {
break
@@ -15790,23 +15430,22 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstoreconst)
v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
v.Aux = s
- v.AddArg2(p0, mem)
+ v.AddArg2(p, mem)
return true
}
- // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem))
- // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
- // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem)
+ // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
+ // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x)
+ // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem)
for {
a := v.AuxInt
s := v.Aux
- p1 := v_0
+ p := v_0
x := v_1
if x.Op != OpAMD64MOVWstoreconst {
break
@@ -15816,14 +15455,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool {
break
}
mem := x.Args[1]
- p0 := x.Args[0]
- if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
+ if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) {
break
}
v.reset(OpAMD64MOVLstoreconst)
v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off())
v.Aux = s
- v.AddArg2(p0, mem)
+ v.AddArg2(p, mem)
return true
}
// match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem)
@@ -17887,9 +17525,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v.copyOf(x)
return true
}
- // match: (ORL x0:(MOVBload [i0] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem)))
- // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
+ // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem)))
+ // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x0 := v_0
@@ -17899,7 +17537,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
continue
@@ -17913,8 +17551,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x1.Args[1]
- p1 := x1.Args[0]
- if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -17922,14 +17559,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v.copyOf(v0)
v0.AuxInt = i0
v0.Aux = s
+ v0.AddArg2(p, mem)
+ return true
+ }
+ break
+ }
+ // match: (ORL x0:(MOVBload [i] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x0 := v_0
+ if x0.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
+ continue
+ }
+ x1 := sh.Args[0]
+ if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
+ p1 := x1.Args[0]
+ if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
+ v.copyOf(v0)
+ v0.AuxInt = i
+ v0.Aux = s
v0.AddArg2(p0, mem)
return true
}
break
}
- // match: (ORL x0:(MOVWload [i0] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem)))
- // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
+ // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem)))
+ // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x0 := v_0
@@ -17939,7 +17612,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
continue
@@ -17953,8 +17626,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x1.Args[1]
- p1 := x1.Args[0]
- if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -17962,14 +17634,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v.copyOf(v0)
v0.AuxInt = i0
v0.Aux = s
+ v0.AddArg2(p, mem)
+ return true
+ }
+ break
+ }
+ // match: (ORL x0:(MOVWload [i] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i] {s} p1 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x0 := v_0
+ if x0.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
+ continue
+ }
+ x1 := sh.Args[0]
+ if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
+ p1 := x1.Args[0]
+ if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
+ v.copyOf(v0)
+ v0.AuxInt = i
+ v0.Aux = s
v0.AddArg2(p0, mem)
return true
}
break
}
- // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y))
- // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
+ // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+ // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s1 := v_0
@@ -17984,7 +17692,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
or := v_1
if or.Op != OpAMD64ORL {
continue
@@ -18007,12 +17715,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] {
+ if p != x0.Args[0] || mem != x0.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -18023,6 +17730,66 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
v2.AuxInt = i0
v2.Aux = s
+ v2.AddArg2(p, mem)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) y))
+ // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s1 := v_0
+ if s1.Op != OpAMD64SHLLconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ x1 := s1.Args[0]
+ if x1.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORL {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s0 := or_0
+ if s0.Op != OpAMD64SHLLconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ x0 := s0.Args[0]
+ if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x0.Pos, OpAMD64ORL, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64SHLLconst, v.Type)
+ v1.AuxInt = j0
+ v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
+ v2.AuxInt = i
+ v2.Aux = s
v2.AddArg2(p0, mem)
v1.AddArg(v2)
v0.AddArg2(v1, y)
@@ -18031,9 +17798,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
}
break
}
- // match: (ORL x1:(MOVBload [i1] {s} p0 mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem)))
- // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
+ // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem)))
+ // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x1 := v_0
@@ -18043,7 +17810,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
continue
@@ -18057,8 +17824,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -18068,15 +17834,54 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
v1.AuxInt = i0
v1.Aux = s
+ v1.AddArg2(p, mem)
+ v0.AddArg(v1)
+ return true
+ }
+ break
+ }
+ // match: (ORL x1:(MOVBload [i] {s} p1 mem) sh:(SHLLconst [8] x0:(MOVBload [i] {s} p0 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x1 := v_0
+ if x1.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 {
+ continue
+ }
+ x0 := sh.Args[0]
+ if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
+ v.copyOf(v0)
+ v0.AuxInt = 8
+ v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
+ v1.AuxInt = i
+ v1.Aux = s
v1.AddArg2(p0, mem)
v0.AddArg(v1)
return true
}
break
}
- // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
- // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
- // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
+ // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+ // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
r1 := v_0
@@ -18090,7 +17895,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
continue
@@ -18108,8 +17913,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -18118,15 +17922,61 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
v1.AuxInt = i0
v1.Aux = s
+ v1.AddArg2(p, mem)
+ v0.AddArg(v1)
+ return true
+ }
+ break
+ }
+ // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ r1 := v_0
+ if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
+ continue
+ }
+ x1 := r1.Args[0]
+ if x1.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 {
+ continue
+ }
+ r0 := sh.Args[0]
+ if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
+ continue
+ }
+ x0 := r0.Args[0]
+ if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
+ v1.AuxInt = i
+ v1.Aux = s
v1.AddArg2(p0, mem)
v0.AddArg(v1)
return true
}
break
}
- // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y))
- // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
+ // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+ // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s0 := v_0
@@ -18141,7 +17991,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
or := v_1
if or.Op != OpAMD64ORL {
continue
@@ -18164,12 +18014,74 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
continue
}
_ = x1.Args[1]
+ if p != x1.Args[0] || mem != x1.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type)
+ v1.AuxInt = j1
+ v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
+ v2.AuxInt = 8
+ v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
+ v3.AuxInt = i0
+ v3.Aux = s
+ v3.AddArg2(p, mem)
+ v2.AddArg(v3)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) y))
+ // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s0 := v_0
+ if s0.Op != OpAMD64SHLLconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ x0 := s0.Args[0]
+ if x0.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORL {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s1 := or_0
+ if s1.Op != OpAMD64SHLLconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ x1 := s1.Args[0]
+ if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
p1 := x1.Args[0]
if mem != x1.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -18180,7 +18092,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool {
v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
v2.AuxInt = 8
v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
- v3.AuxInt = i0
+ v3.AuxInt = i
v3.Aux = s
v3.AddArg2(p0, mem)
v2.AddArg(v3)
@@ -18804,9 +18716,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v.copyOf(x)
return true
}
- // match: (ORQ x0:(MOVBload [i0] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem)))
- // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem)
+ // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem)))
+ // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x0 := v_0
@@ -18816,7 +18728,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
continue
@@ -18830,8 +18742,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x1.Args[1]
- p1 := x1.Args[0]
- if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -18839,14 +18750,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v.copyOf(v0)
v0.AuxInt = i0
v0.Aux = s
+ v0.AddArg2(p, mem)
+ return true
+ }
+ break
+ }
+ // match: (ORQ x0:(MOVBload [i] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i] {s} p1 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x0 := v_0
+ if x0.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
+ continue
+ }
+ x1 := sh.Args[0]
+ if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
+ p1 := x1.Args[0]
+ if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
+ v.copyOf(v0)
+ v0.AuxInt = i
+ v0.Aux = s
v0.AddArg2(p0, mem)
return true
}
break
}
- // match: (ORQ x0:(MOVWload [i0] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem)))
- // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem)
+ // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem)))
+ // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x0 := v_0
@@ -18856,7 +18803,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
continue
@@ -18870,8 +18817,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x1.Args[1]
- p1 := x1.Args[0]
- if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -18879,14 +18825,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v.copyOf(v0)
v0.AuxInt = i0
v0.Aux = s
+ v0.AddArg2(p, mem)
+ return true
+ }
+ break
+ }
+ // match: (ORQ x0:(MOVWload [i] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i] {s} p1 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x0 := v_0
+ if x0.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
+ continue
+ }
+ x1 := sh.Args[0]
+ if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
+ p1 := x1.Args[0]
+ if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
+ v.copyOf(v0)
+ v0.AuxInt = i
+ v0.Aux = s
v0.AddArg2(p0, mem)
return true
}
break
}
- // match: (ORQ x0:(MOVLload [i0] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem)))
- // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem)
+ // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem)))
+ // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x0 := v_0
@@ -18896,7 +18878,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
continue
@@ -18910,8 +18892,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x1.Args[1]
- p1 := x1.Args[0]
- if mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -18919,14 +18900,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v.copyOf(v0)
v0.AuxInt = i0
v0.Aux = s
+ v0.AddArg2(p, mem)
+ return true
+ }
+ break
+ }
+ // match: (ORQ x0:(MOVLload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x0 := v_0
+ if x0.Op != OpAMD64MOVLload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
+ continue
+ }
+ x1 := sh.Args[0]
+ if x1.Op != OpAMD64MOVLload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
+ p1 := x1.Args[0]
+ if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64)
+ v.copyOf(v0)
+ v0.AuxInt = i
+ v0.Aux = s
v0.AddArg2(p0, mem)
return true
}
break
}
- // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y))
- // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y)
+ // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y))
+ // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s1 := v_0
@@ -18941,7 +18958,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
or := v_1
if or.Op != OpAMD64ORQ {
continue
@@ -18964,12 +18981,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] {
+ if p != x0.Args[0] || mem != x0.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -18980,6 +18996,66 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
v2.AuxInt = i0
v2.Aux = s
+ v2.AddArg2(p, mem)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) y))
+ // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s1 := v_0
+ if s1.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ x1 := s1.Args[0]
+ if x1.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORQ {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s0 := or_0
+ if s0.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ x0 := s0.Args[0]
+ if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
+ v1.AuxInt = j0
+ v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
+ v2.AuxInt = i
+ v2.Aux = s
v2.AddArg2(p0, mem)
v1.AddArg(v2)
v0.AddArg2(v1, y)
@@ -18988,9 +19064,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
}
break
}
- // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y))
- // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y)
+ // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y))
+ // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s1 := v_0
@@ -19005,7 +19081,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
or := v_1
if or.Op != OpAMD64ORQ {
continue
@@ -19028,12 +19104,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] {
+ if p != x0.Args[0] || mem != x0.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -19044,6 +19119,66 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
v2.AuxInt = i0
v2.Aux = s
+ v2.AddArg2(p, mem)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) y))
+ // cond: j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s1 := v_0
+ if s1.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ x1 := s1.Args[0]
+ if x1.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORQ {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s0 := or_0
+ if s0.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ x0 := s0.Args[0]
+ if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type)
+ v1.AuxInt = j0
+ v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
+ v2.AuxInt = i
+ v2.Aux = s
v2.AddArg2(p0, mem)
v1.AddArg(v2)
v0.AddArg2(v1, y)
@@ -19052,9 +19187,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
}
break
}
- // match: (ORQ x1:(MOVBload [i1] {s} p0 mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem)))
- // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
- // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem))
+ // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem)))
+ // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x1 := v_0
@@ -19064,7 +19199,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
continue
@@ -19078,8 +19213,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -19089,15 +19223,54 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
v1.AuxInt = i0
v1.Aux = s
+ v1.AddArg2(p, mem)
+ v0.AddArg(v1)
+ return true
+ }
+ break
+ }
+ // match: (ORQ x1:(MOVBload [i] {s} p1 mem) sh:(SHLQconst [8] x0:(MOVBload [i] {s} p0 mem)))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh)
+ // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x1 := v_0
+ if x1.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 {
+ continue
+ }
+ x0 := sh.Args[0]
+ if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type)
+ v.copyOf(v0)
+ v0.AuxInt = 8
+ v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16)
+ v1.AuxInt = i
+ v1.Aux = s
v1.AddArg2(p0, mem)
v0.AddArg(v1)
return true
}
break
}
- // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem))))
- // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
- // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem))
+ // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))))
+ // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
r1 := v_0
@@ -19111,7 +19284,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
continue
@@ -19129,8 +19302,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -19139,15 +19311,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
v1.AuxInt = i0
v1.Aux = s
+ v1.AddArg2(p, mem)
+ v0.AddArg(v1)
+ return true
+ }
+ break
+ }
+ // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ r1 := v_0
+ if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
+ continue
+ }
+ x1 := r1.Args[0]
+ if x1.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 {
+ continue
+ }
+ r0 := sh.Args[0]
+ if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
+ continue
+ }
+ x0 := r0.Args[0]
+ if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32)
+ v1.AuxInt = i
+ v1.Aux = s
v1.AddArg2(p0, mem)
v0.AddArg(v1)
return true
}
break
}
- // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem))))
- // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
- // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem))
+ // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem))))
+ // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem))
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
r1 := v_0
@@ -19161,7 +19379,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i1 := x1.AuxInt
s := x1.Aux
mem := x1.Args[1]
- p0 := x1.Args[0]
+ p := x1.Args[0]
sh := v_1
if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
continue
@@ -19179,8 +19397,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x0.Args[1]
- p1 := x0.Args[0]
- if mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
continue
}
b = mergePoint(b, x0, x1)
@@ -19189,15 +19406,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64)
v1.AuxInt = i0
v1.Aux = s
+ v1.AddArg2(p, mem)
+ v0.AddArg(v1)
+ return true
+ }
+ break
+ }
+ // match: (ORQ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem))))
+ // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh)
+ // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ r1 := v_0
+ if r1.Op != OpAMD64BSWAPL {
+ continue
+ }
+ x1 := r1.Args[0]
+ if x1.Op != OpAMD64MOVLload {
+ continue
+ }
+ i := x1.AuxInt
+ s := x1.Aux
+ mem := x1.Args[1]
+ p1 := x1.Args[0]
+ sh := v_1
+ if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 {
+ continue
+ }
+ r0 := sh.Args[0]
+ if r0.Op != OpAMD64BSWAPL {
+ continue
+ }
+ x0 := r0.Args[0]
+ if x0.Op != OpAMD64MOVLload || x0.AuxInt != i || x0.Aux != s {
+ continue
+ }
+ _ = x0.Args[1]
+ p0 := x0.Args[0]
+ if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1)
+ v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64)
+ v1.AuxInt = i
+ v1.Aux = s
v1.AddArg2(p0, mem)
v0.AddArg(v1)
return true
}
break
}
- // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y))
- // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y)
+ // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y))
+ // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s0 := v_0
@@ -19212,7 +19475,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
or := v_1
if or.Op != OpAMD64ORQ {
continue
@@ -19235,12 +19498,74 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x1.Args[1]
+ if p != x1.Args[0] || mem != x1.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
+ v1.AuxInt = j1
+ v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
+ v2.AuxInt = 8
+ v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
+ v3.AuxInt = i0
+ v3.Aux = s
+ v3.AddArg2(p, mem)
+ v2.AddArg(v3)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) y))
+ // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s0 := v_0
+ if s0.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ x0 := s0.Args[0]
+ if x0.Op != OpAMD64MOVBload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORQ {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s1 := or_0
+ if s1.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ x1 := s1.Args[0]
+ if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
p1 := x1.Args[0]
if mem != x1.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
+ if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -19251,7 +19576,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16)
v2.AuxInt = 8
v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16)
- v3.AuxInt = i0
+ v3.AuxInt = i
v3.Aux = s
v3.AddArg2(p0, mem)
v2.AddArg(v3)
@@ -19262,9 +19587,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
}
break
}
- // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y))
- // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
- // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y)
+ // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y))
+ // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
s0 := v_0
@@ -19283,7 +19608,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
i0 := x0.AuxInt
s := x0.Aux
mem := x0.Args[1]
- p0 := x0.Args[0]
+ p := x0.Args[0]
or := v_1
if or.Op != OpAMD64ORQ {
continue
@@ -19310,12 +19635,81 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
continue
}
_ = x1.Args[1]
+ if p != x1.Args[0] || mem != x1.Args[1] {
+ continue
+ }
+ y := or_1
+ if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
+ continue
+ }
+ b = mergePoint(b, x0, x1, y)
+ v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type)
+ v.copyOf(v0)
+ v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type)
+ v1.AuxInt = j1
+ v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32)
+ v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
+ v3.AuxInt = i0
+ v3.Aux = s
+ v3.AddArg2(p, mem)
+ v2.AddArg(v3)
+ v1.AddArg(v2)
+ v0.AddArg2(v1, y)
+ return true
+ }
+ }
+ break
+ }
+ // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) y))
+ // cond: j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)
+ // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ s0 := v_0
+ if s0.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j0 := s0.AuxInt
+ r0 := s0.Args[0]
+ if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 {
+ continue
+ }
+ x0 := r0.Args[0]
+ if x0.Op != OpAMD64MOVWload {
+ continue
+ }
+ i := x0.AuxInt
+ s := x0.Aux
+ mem := x0.Args[1]
+ p0 := x0.Args[0]
+ or := v_1
+ if or.Op != OpAMD64ORQ {
+ continue
+ }
+ _ = or.Args[1]
+ or_0 := or.Args[0]
+ or_1 := or.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 {
+ s1 := or_0
+ if s1.Op != OpAMD64SHLQconst {
+ continue
+ }
+ j1 := s1.AuxInt
+ r1 := s1.Args[0]
+ if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 {
+ continue
+ }
+ x1 := r1.Args[0]
+ if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s {
+ continue
+ }
+ _ = x1.Args[1]
p1 := x1.Args[0]
if mem != x1.Args[1] {
continue
}
y := or_1
- if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
+ if !(j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) {
continue
}
b = mergePoint(b, x0, x1, y)
@@ -19325,7 +19719,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
v1.AuxInt = j1
v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32)
v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32)
- v3.AuxInt = i0
+ v3.AuxInt = i
v3.Aux = s
v3.AddArg2(p0, mem)
v2.AddArg(v3)
diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go
index e2d703cb0c..6ad9514557 100644
--- a/test/codegen/memcombine.go
+++ b/test/codegen/memcombine.go
@@ -160,14 +160,14 @@ func load_le_byte8_uint64_inv(s []byte) uint64 {
func load_be_byte2_uint16(s []byte) uint16 {
// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
- // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
return uint16(s[0])<<8 | uint16(s[1])
}
func load_be_byte2_uint16_inv(s []byte) uint16 {
// arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB`
- // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR`
+ // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR`
// ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ`
return uint16(s[1]) | uint16(s[0])<<8
}
@@ -179,7 +179,7 @@ func load_be_byte4_uint32(s []byte) uint32 {
func load_be_byte4_uint32_inv(s []byte) uint32 {
// arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]`
- // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR`
+ // amd64:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR`
return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24
}
@@ -191,7 +191,7 @@ func load_be_byte8_uint64(s []byte) uint64 {
func load_be_byte8_uint64_inv(s []byte) uint64 {
// arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]`
- // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR`
+ // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR`
// ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z`
return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56
}