diff options
author | Keith Randall <khr@golang.org> | 2020-03-27 22:03:33 -0700 |
---|---|---|
committer | Keith Randall <khr@golang.org> | 2020-03-30 17:25:29 +0000 |
commit | 33b648c0e9428c8775043db75fdff5864a64219a (patch) | |
tree | 8eea75f3d8068e06f4ae6c8b06a1be9274df025a | |
parent | 5a312288799c0a433e2061550ff92689b627e080 (diff) | |
download | go-33b648c0e9428c8775043db75fdff5864a64219a.tar.gz go-33b648c0e9428c8775043db75fdff5864a64219a.zip |
cmd/compile: fix ephemeral pointer problem on amd64
Make sure we don't use the rewrite ptr + (c + x) -> c + (ptr + x), as
that may create an ephemeral out-of-bounds pointer.
I have not seen an actual bug caused by this yet, but we've seen
them in the 386 port so I'm fixing this issue for amd64 as well.
The load-combining rules needed to be reworked somewhat to still
work without the above broken rule.
Update #37881
Change-Id: I8046d170e89e2035195f261535e34ca7d8aca68a
Reviewed-on: https://go-review.googlesource.com/c/go/+/226437
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
-rw-r--r-- | src/cmd/compile/internal/ssa/gen/AMD64.rules | 420 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/rewrite.go | 45 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/rewriteAMD64.go | 2618 | ||||
-rw-r--r-- | test/codegen/memcombine.go | 8 |
4 files changed, 1764 insertions, 1327 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index ca5962f249..b5133d6c14 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1491,65 +1491,70 @@ // Little-endian loads -(ORL x0:(MOVBload [i0] {s} p0 mem) - sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) +(OR(L|Q) x0:(MOVBload [i0] {s} p mem) + sh:(SHL(L|Q)const [8] x1:(MOVBload [i1] {s} p mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) -(ORQ x0:(MOVBload [i0] {s} p0 mem) - sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) - && i1 == i0+1 +(OR(L|Q) x0:(MOVBload [i] {s} p0 mem) + sh:(SHL(L|Q)const [8] x1:(MOVBload [i] {s} p1 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) -(ORL x0:(MOVWload [i0] {s} p0 mem) - sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) +(OR(L|Q) x0:(MOVWload [i0] {s} p mem) + sh:(SHL(L|Q)const [16] x1:(MOVWload [i1] {s} p mem))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) -(ORQ x0:(MOVWload [i0] {s} p0 mem) - sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) - && i1 == i0+2 +(OR(L|Q) x0:(MOVWload [i] {s} p0 mem) + sh:(SHL(L|Q)const [16] x1:(MOVWload [i] {s} p1 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) -(ORQ x0:(MOVLload [i0] {s} p0 mem) - sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) +(ORQ x0:(MOVLload [i0] {s} p mem) + sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) + -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) -(ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) - or:(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) +(ORQ x0:(MOVLload [i] {s} p0 mem) + sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) + && x0.Uses == 1 + && x1.Uses == 1 + && sh.Uses == 1 + && sequentialAddresses(p0, p1, 4) + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, sh) + -> @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) + +(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) + or:(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) y)) && i1 == i0+1 && j1 == j0+8 @@ -1559,17 +1564,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) -(ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) - or:(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) +(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) + or:(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) y)) - && i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 @@ -1577,15 +1580,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) (ORQ - s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) + s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ - s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) + s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y)) && i1 == i0+2 && j1 == j0+16 @@ -1595,106 +1598,107 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y) - -// Little-endian indexed loads - -// Move constants offsets from LEAQx up into load. This lets the above combining -// rules discover indexed load-combining instances. -//TODO:remove! These rules are bad. -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem) -(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem) - -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem) -(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) --> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem) + -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) + +(ORQ + s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) + or:(ORQ + s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) + y)) + && j1 == j0+16 + && j0 % 32 == 0 + && x0.Uses == 1 + && x1.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && or.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && mergePoint(b,x0,x1,y) != nil + && clobber(x0, x1, s0, s1, or) + -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y) // Big-endian loads -(ORL - x1:(MOVBload [i1] {s} p0 mem) - sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) +(OR(L|Q) + x1:(MOVBload [i1] {s} p mem) + sh:(SHL(L|Q)const [8] x0:(MOVBload [i0] {s} p mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) -(ORQ - x1:(MOVBload [i1] {s} p0 mem) - sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) - && i1 == i0+1 +(OR(L|Q) + x1:(MOVBload [i] {s} p1 mem) + sh:(SHL(L|Q)const [8] x0:(MOVBload [i] {s} p0 mem))) && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) -(ORL - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) - sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) +(OR(L|Q) + r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) + sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) -(ORQ - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) - sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - && i1 == i0+2 +(OR(L|Q) + r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) + sh:(SHL(L|Q)const [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) (ORQ - r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) - sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) + r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) + sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem)) + -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) + +(ORQ + r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) + sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && r0.Uses == 1 + && r1.Uses == 1 + && sh.Uses == 1 + && sequentialAddresses(p0, p1, 4) + && mergePoint(b,x0,x1) != nil + && clobber(x0, x1, r0, r1, sh) + -> @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem)) -(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) - or:(ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) +(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i0] {s} p mem)) + or:(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i1] {s} p mem)) y)) && i1 == i0+1 && j1 == j0-8 @@ -1704,17 +1708,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) -(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) - or:(ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) +(OR(L|Q) + s0:(SHL(L|Q)const [j0] x0:(MOVBload [i] {s} p0 mem)) + or:(OR(L|Q) + s1:(SHL(L|Q)const [j1] x1:(MOVBload [i] {s} p1 mem)) y)) - && i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 @@ -1722,15 +1724,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (OR(L|Q) <v.Type> (SHL(L|Q)const <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) (ORQ - s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) + s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ - s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) + s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y)) && i1 == i0+2 && j1 == j0-16 @@ -1742,41 +1744,90 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 - && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y) + -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) + +(ORQ + s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) + or:(ORQ + s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) + y)) + && j1 == j0-16 + && j1 % 32 == 0 + && x0.Uses == 1 + && x1.Uses == 1 + && r0.Uses == 1 + && r1.Uses == 1 + && s0.Uses == 1 + && s1.Uses == 1 + && or.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && mergePoint(b,x0,x1,y) != nil + && clobber(x0, x1, r0, r1, s0, s1, or) + -> @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y) // Combine 2 byte stores + shift into rolw 8 + word store +(MOVBstore [i] {s} p w + x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) + && x0.Uses == 1 + && clobber(x0) + -> (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) (MOVBstore [i] {s} p1 w - x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) + x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) && x0.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x0) - -> (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem) + -> (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem) // Combine stores + shifts into bswap and larger (unaligned) stores +(MOVBstore [i] {s} p w + x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) + x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) + x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && clobber(x0, x1, x2) + -> (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) (MOVBstore [i] {s} p3 w - x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) - x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) - x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) + x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) + x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) + x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 - && same(p0, p1, 1) - && same(p1, p2, 1) - && same(p2, p3, 1) + && sequentialAddresses(p0, p1, 1) + && sequentialAddresses(p1, p2, 1) + && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2) - -> (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem) - + -> (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem) + +(MOVBstore [i] {s} p w + x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) + x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) + x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) + x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) + x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) + x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) + x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) + && x0.Uses == 1 + && x1.Uses == 1 + && x2.Uses == 1 + && x3.Uses == 1 + && x4.Uses == 1 + && x5.Uses == 1 + && x6.Uses == 1 + && clobber(x0, x1, x2, x3, x4, x5, x6) + -> (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) (MOVBstore [i] {s} p7 w - x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) - x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) - x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) - x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) - x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) - x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) - x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) + x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) + x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) + x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) + x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) + x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) + x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) + x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem)))))))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 @@ -1784,99 +1835,122 @@ && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 - && same(p0, p1, 1) - && same(p1, p2, 1) - && same(p2, p3, 1) - && same(p3, p4, 1) - && same(p4, p5, 1) - && same(p5, p6, 1) - && same(p6, p7, 1) + && sequentialAddresses(p0, p1, 1) + && sequentialAddresses(p1, p2, 1) + && sequentialAddresses(p2, p3, 1) + && sequentialAddresses(p3, p4, 1) + && sequentialAddresses(p4, p5, 1) + && sequentialAddresses(p5, p6, 1) + && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem) + -> (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem) // Combine constant stores into larger (unaligned) stores. -(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) +(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) -(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) -(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) +(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) && config.useSSE && x.Uses == 1 - && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - -> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) + -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) -// Combine stores into larger (unaligned) stores. -(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) +// Combine stores into larger (unaligned) stores. Little endian. +(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w mem) +(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w mem) -(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem)) + -> (MOVWstore [i] {s} p w mem) +(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVWstore [i-1] {s} p w0 mem) +(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i] {s} p0 w mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 1) + && clobber(x) + -> (MOVWstore [i] {s} p0 w mem) +(MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x) -> (MOVWstore [i] {s} p0 w mem) -(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) +(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 1) && clobber(x) - -> (MOVWstore [i-1] {s} p0 w0 mem) -(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) + -> (MOVWstore [i] {s} p0 w0 mem) + +(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w mem) +(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) + && x.Uses == 1 + && clobber(x) + -> (MOVLstore [i-2] {s} p w0 mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i] {s} p0 w mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && clobber(x) + -> (MOVLstore [i] {s} p0 w mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) + && x.Uses == 1 + && sequentialAddresses(p0, p1, 2) + && clobber(x) + -> (MOVLstore [i] {s} p0 w0 mem) + +(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w mem) -(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) + -> (MOVQstore [i-4] {s} p w mem) +(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) && clobber(x) - -> (MOVLstore [i-2] {s} p0 w0 mem) -(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) + -> (MOVQstore [i-4] {s} p w0 mem) +(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i-4] {s} p0 w mem) -(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) + -> (MOVQstore [i] {s} p0 w mem) +(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 - && same(p0, p1, 1) + && sequentialAddresses(p0, p1, 4) && clobber(x) - -> (MOVQstore [i-4] {s} p0 w0 mem) + -> (MOVQstore [i] {s} p0 w0 mem) (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index fc03f0d72c..878b15eeee 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1260,46 +1260,15 @@ func sequentialAddresses(x, y *Value, n int64) bool { x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true } - return false -} - -// same reports whether x and y are the same value. -// It checks to a maximum depth of d, so it may report -// a false negative. -// TODO: remove when amd64 port is switched to using sequentialAddresses -func same(x, y *Value, depth int) bool { - if x == y { + if x.Op == OpAMD64ADDQ && y.Op == OpAMD64LEAQ1 && y.AuxInt == n && y.Aux == nil && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true } - if depth <= 0 { - return false - } - if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt { - return false - } - if len(x.Args) != len(y.Args) { - return false - } - if opcodeTable[x.Op].commutative { - // Check exchanged ordering first. - for i, a := range x.Args { - j := i - if j < 2 { - j ^= 1 - } - b := y.Args[j] - if !same(a, b, depth-1) { - goto checkNormalOrder - } - } + if x.Op == OpAMD64LEAQ1 && y.Op == OpAMD64LEAQ1 && y.AuxInt == x.AuxInt+n && x.Aux == y.Aux && + (x.Args[0] == y.Args[0] && x.Args[1] == y.Args[1] || + x.Args[0] == y.Args[1] && x.Args[1] == y.Args[0]) { return true - checkNormalOrder: } - for i, a := range x.Args { - b := y.Args[i] - if !same(a, b, depth-1) { - return false - } - } - return true + return false } diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index b9a401cca9..e4d86485d4 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -10140,7 +10140,6 @@ func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) // result: (MOVBQZX x) @@ -10205,118 +10204,6 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVBload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVBload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -10722,159 +10609,124 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) + // cond: x0.Uses == 1 && clobber(x0) + // result: (MOVWstore [i-1] {s} p (ROLWconst <w.Type> [8] w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x0 := v_2 + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + mem := x0.Args[2] + if p != x0.Args[0] { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) + v0.AuxInt = 8 + v0.AddArg(w) + v.AddArg3(p, v0, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i] {s} p0 (SHRWconst [8] w) mem)) + // cond: x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0) + // result: (MOVWstore [i] {s} p0 (ROLWconst <w.Type> [8] w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + i := v.AuxInt + s := v.Aux + p1 := v_0 + w := v_1 + x0 := v_2 + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + mem := x0.Args[2] + p0 := x0.Args[0] + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x0)) { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) + v0.AuxInt = 8 + v0.AddArg(w) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem) + // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) + // result: (MOVLstore [i-3] {s} p (BSWAPL <w.Type> w) mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x2 := v_2 + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + _ = x2.Args[2] + if p != x2.Args[0] { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVBstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + x2_1 := x2.Args[1] + if x2_1.Op != OpAMD64SHRLconst || x2_1.AuxInt != 8 || w != x2_1.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + x1 := x2.Args[2] + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s { break } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) - // cond: x0.Uses == 1 && same(p0, p1, 1) && clobber(x0) - // result: (MOVWstore [i-1] {s} p0 (ROLWconst <w.Type> [8] w) mem) - for { - i := v.AuxInt - s := v.Aux - p1 := v_0 - w := v_1 - x0 := v_2 - if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { + _ = x1.Args[2] + if p != x1.Args[0] { + break + } + x1_1 := x1.Args[1] + if x1_1.Op != OpAMD64SHRLconst || x1_1.AuxInt != 16 || w != x1_1.Args[0] { + break + } + x0 := x1.Args[2] + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s { break } mem := x0.Args[2] - p0 := x0.Args[0] + if p != x0.Args[0] { + break + } x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)) { + if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 3 v.Aux = s - v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) - v0.AuxInt = 8 + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type) v0.AddArg(w) - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } - // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2) - // result: (MOVLstore [i-3] {s} p0 (BSWAPL <w.Type> w) mem) + // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i] {s} p0 (SHRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2) + // result: (MOVLstore [i] {s} p0 (BSWAPL <w.Type> w) mem) for { i := v.AuxInt s := v.Aux p3 := v_0 w := v_1 x2 := v_2 - if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s { + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s { break } _ = x2.Args[2] @@ -10884,7 +10736,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } x1 := x2.Args[2] - if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i-2 || x1.Aux != s { + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s { break } _ = x1.Args[2] @@ -10894,37 +10746,39 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } x0 := x1.Args[2] - if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-3 || x0.Aux != s { + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { break } mem := x0.Args[2] p0 := x0.Args[0] x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)) { + if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && clobber(x0, x1, x2)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 3 + v.AuxInt = i v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type) v0.AddArg(w) v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - // result: (MOVQstore [i-7] {s} p0 (BSWAPQ <w.Type> w) mem) + // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6) + // result: (MOVQstore [i-7] {s} p (BSWAPQ <w.Type> w) mem) for { i := v.AuxInt s := v.Aux - p7 := v_0 + p := v_0 w := v_1 x6 := v_2 if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i-1 || x6.Aux != s { break } _ = x6.Args[2] - p6 := x6.Args[0] + if p != x6.Args[0] { + break + } x6_1 := x6.Args[1] if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] { break @@ -10934,7 +10788,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x5.Args[2] - p5 := x5.Args[0] + if p != x5.Args[0] { + break + } x5_1 := x5.Args[1] if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] { break @@ -10944,7 +10800,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x4.Args[2] - p4 := x4.Args[0] + if p != x4.Args[0] { + break + } x4_1 := x4.Args[1] if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] { break @@ -10954,7 +10812,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x3.Args[2] - p3 := x3.Args[0] + if p != x3.Args[0] { + break + } x3_1 := x3.Args[1] if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] { break @@ -10964,7 +10824,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x2.Args[2] - p2 := x2.Args[0] + if p != x2.Args[0] { + break + } x2_1 := x2.Args[1] if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] { break @@ -10974,7 +10836,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x1.Args[2] - p1 := x1.Args[0] + if p != x1.Args[0] { + break + } x1_1 := x1.Args[1] if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] { break @@ -10984,9 +10848,11 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x0.Args[2] - p0 := x0.Args[0] + if p != x0.Args[0] { + break + } x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) { + if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) { break } v.reset(OpAMD64MOVQstore) @@ -10994,16 +10860,102 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type) v0.AddArg(w) + v.AddArg3(p, v0, mem) + return true + } + // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i] {s} p0 (SHRQconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) + // result: (MOVQstore [i] {s} p0 (BSWAPQ <w.Type> w) mem) + for { + i := v.AuxInt + s := v.Aux + p7 := v_0 + w := v_1 + x6 := v_2 + if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i || x6.Aux != s { + break + } + _ = x6.Args[2] + p6 := x6.Args[0] + x6_1 := x6.Args[1] + if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] { + break + } + x5 := x6.Args[2] + if x5.Op != OpAMD64MOVBstore || x5.AuxInt != i || x5.Aux != s { + break + } + _ = x5.Args[2] + p5 := x5.Args[0] + x5_1 := x5.Args[1] + if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] { + break + } + x4 := x5.Args[2] + if x4.Op != OpAMD64MOVBstore || x4.AuxInt != i || x4.Aux != s { + break + } + _ = x4.Args[2] + p4 := x4.Args[0] + x4_1 := x4.Args[1] + if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] { + break + } + x3 := x4.Args[2] + if x3.Op != OpAMD64MOVBstore || x3.AuxInt != i || x3.Aux != s { + break + } + _ = x3.Args[2] + p3 := x3.Args[0] + x3_1 := x3.Args[1] + if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] { + break + } + x2 := x3.Args[2] + if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i || x2.Aux != s { + break + } + _ = x2.Args[2] + p2 := x2.Args[0] + x2_1 := x2.Args[1] + if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] { + break + } + x1 := x2.Args[2] + if x1.Op != OpAMD64MOVBstore || x1.AuxInt != i || x1.Aux != s { + break + } + _ = x1.Args[2] + p1 := x1.Args[0] + x1_1 := x1.Args[1] + if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] { + break + } + x0 := x1.Args[2] + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i || x0.Aux != s { + break + } + mem := x0.Args[2] + p0 := x0.Args[0] + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && sequentialAddresses(p0, p1, 1) && sequentialAddresses(p1, p2, 1) && sequentialAddresses(p2, p3, 1) && sequentialAddresses(p3, p4, 1) && sequentialAddresses(p4, p5, 1) && sequentialAddresses(p5, p6, 1) && sequentialAddresses(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i + v.Aux = s + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type) + v0.AddArg(w) v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { break } @@ -11013,19 +10965,232 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i] {s} p w mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + w := v_1 + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + x_1 := x.Args[1] + if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s + v.AddArg3(p, w, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s + v.AddArg3(p, w0, mem) + return true + } + // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVWstore [i-1] {s} p w0 mem) + for { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { + break + } + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + break + } + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 + v.Aux = s + v.AddArg3(p, w0, mem) + return true + } + // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { + break + } + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i + v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -11035,23 +11200,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w mem) + // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -11061,36 +11226,36 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRWconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p0 := v_0 w := v_1 x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] + p1 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11099,22 +11264,22 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRLconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p0 := v_0 w := v_1 x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] + p1 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11123,22 +11288,22 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRQconst [8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // match: (MOVBstore [i] {s} p0 w x:(MOVBstore [i] {s} p1 (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p1 := v_0 + p0 := v_0 w := v_1 x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] - p0 := x.Args[0] + p1 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) @@ -11147,9 +11312,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w0 mem) + // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -11160,24 +11325,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true } - // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRQconst [j-8] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVWstore [i-1] {s} p0 w0 mem) + // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i] {s} p0 w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -11188,17 +11353,17 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVBstore || x.AuxInt != i-1 || x.Aux != s { + if x.Op != OpAMD64MOVBstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -11337,13 +11502,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11353,23 +11518,22 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } - // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11379,14 +11543,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -11860,118 +12023,6 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVLload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVLload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -12174,125 +12225,64 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVLstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem) + // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 32 { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg3(p, w, mem) return true } - // match: (MOVLstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem) + // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVQstore [i-4] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVLstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVLstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + mem := x.Args[2] + if p != x.Args[0] { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + v.reset(OpAMD64MOVQstore) + v.AuxInt = i - 4 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVQstore [i-4] {s} p0 w mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) + // result: (MOVQstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -12302,23 +12292,23 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { + if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) - v.AuxInt = i - 4 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVQstore [i-4] {s} p0 w0 mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i] {s} p0 w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x) + // result: (MOVQstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -12329,17 +12319,17 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVLstore || x.AuxInt != i-4 || x.Aux != s { + if x.Op != OpAMD64MOVLstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 4) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) - v.AuxInt = i - 4 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -13018,13 +13008,13 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -13034,8 +13024,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -13043,16 +13032,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } - // match: (MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -13062,8 +13051,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -13071,7 +13059,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -13423,118 +13411,6 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVQload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVQload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -13614,7 +13490,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) // result: (MOVQstore [off1+off2] {sym} ptr val mem) @@ -13681,122 +13556,6 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVQstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } - // match: (MOVQstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVQstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) - return true - } // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) @@ -14430,13 +14189,13 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) - // cond: config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - // result: (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) + // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) + // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) + // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVQstoreconst { break @@ -14446,8 +14205,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { + if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstore) @@ -14455,7 +14213,7 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128) v0.AuxInt = 0 - v.AddArg3(p0, v0, mem) + v.AddArg3(p, v0, mem) return true } // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -15119,118 +14877,6 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVWload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } - // match: (MOVWload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWload [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) mem) - for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { - break - } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - mem := v_1 - if !(i1 != 0 && is32Bit(i0+i1)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg2(v0, mem) - return true - } // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) @@ -15416,125 +15062,119 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ1 <l.Type> [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ1 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ2 <l.Type> [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ2 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + mem := x.Args[2] + if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ4 <l.Type> [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ4 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRLconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVWstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) - // cond: i1 != 0 && is32Bit(i0+i1) - // result: (MOVWstore [i0+i1] {s0} (LEAQ8 <l.Type> [0] {s1} x y) val mem) + // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && clobber(x) + // result: (MOVLstore [i-2] {s} p w0 mem) for { - i0 := v.AuxInt - s0 := v.Aux - l := v_0 - if l.Op != OpAMD64LEAQ8 { + i := v.AuxInt + s := v.Aux + p := v_0 + if v_1.Op != OpAMD64SHRQconst { break } - i1 := l.AuxInt - s1 := l.Aux - y := l.Args[1] - x := l.Args[0] - val := v_1 - mem := v_2 - if !(i1 != 0 && is32Bit(i0+i1)) { + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i0 + i1 - v.Aux = s0 - v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) - v0.AuxInt = 0 - v0.Aux = s1 - v0.AddArg2(x, y) - v.AddArg3(v0, val, mem) + mem := x.Args[2] + if p != x.Args[0] { + break + } + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p, w0, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -15544,23 +15184,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i] {s} p0 w mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux @@ -15570,23 +15210,23 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { } w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] - if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w != x.Args[1] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -15597,24 +15237,24 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true } - // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRQconst [j-16] w) mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) - // result: (MOVLstore [i-2] {s} p0 w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i] {s} p0 w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x) + // result: (MOVLstore [i] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux @@ -15625,17 +15265,17 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { j := v_1.AuxInt w := v_1.Args[0] x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + if x.Op != OpAMD64MOVWstore || x.AuxInt != i || x.Aux != s { break } mem := x.Args[2] p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && sequentialAddresses(p0, p1, 2) && clobber(x)) { break } v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 + v.AuxInt = i v.Aux = s v.AddArg3(p0, w0, mem) return true @@ -15774,13 +15414,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { c := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -15790,23 +15430,22 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } - // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) - // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) + // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) for { a := v.AuxInt s := v.Aux - p1 := v_0 + p := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -15816,14 +15455,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - p0 := x.Args[0] - if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p0, mem) + v.AddArg2(p, mem) return true } // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -17887,9 +17525,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(x) return true } - // match: (ORL x0:(MOVBload [i0] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -17899,7 +17537,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -17913,8 +17551,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -17922,14 +17559,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORL x0:(MOVBload [i] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORL x0:(MOVWload [i0] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -17939,7 +17612,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -17953,8 +17626,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -17962,14 +17634,50 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORL x0:(MOVWload [i] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y) + // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -17984,7 +17692,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] or := v_1 if or.Op != OpAMD64ORL { continue @@ -18007,12 +17715,11 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] { + if p != x0.Args[0] || mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18023,6 +17730,66 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v2.AuxInt = i0 v2.Aux = s + v2.AddArg2(p, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) y)) + // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLLconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + or := v_1 + if or.Op != OpAMD64ORL { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLLconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] { + continue + } + y := or_1 + if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLLconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AuxInt = i + v2.Aux = s v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -18031,9 +17798,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } - // match: (ORL x1:(MOVBload [i1] {s} p0 mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem)) + // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -18043,7 +17810,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -18057,8 +17824,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18068,15 +17834,54 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORL x1:(MOVBload [i] {s} p1 mem) sh:(SHLLconst [8] x0:(MOVBload [i] {s} p0 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x1 := v_0 + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { + continue + } + x0 := sh.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type) + v.copyOf(v0) + v0.AuxInt = 8 + v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem)) + // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -18090,7 +17895,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -18108,8 +17913,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18118,15 +17922,61 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y) + // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -18141,7 +17991,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORL { continue @@ -18164,12 +18014,74 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) + v2.AuxInt = 8 + v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i] {s} p1 mem)) y)) + // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL <v.Type> (SHLLconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLLconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORL { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLLconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18180,7 +18092,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) v2.AuxInt = 8 v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) @@ -18804,9 +18716,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(x) return true } - // match: (ORQ x0:(MOVBload [i0] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18816,7 +18728,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -18830,8 +18742,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18839,14 +18750,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVBload [i] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVWload [i0] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18856,7 +18803,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -18870,8 +18817,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18879,14 +18825,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVWload [i] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVLload [i0] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) + // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -18896,7 +18878,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -18910,8 +18892,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - p1 := x1.Args[0] - if mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -18919,14 +18900,50 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s + v0.AddArg2(p, mem) + return true + } + break + } + // match: (ORQ x0:(MOVLload [i] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i] {s} p1 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVQload [i] {s} p0 mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x0 := v_0 + if x0.Op != OpAMD64MOVLload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { + continue + } + x1 := sh.Args[0] + if x1.Op != OpAMD64MOVLload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x1.Pos, OpAMD64MOVQload, typ.UInt64) + v.copyOf(v0) + v0.AuxInt = i + v0.Aux = s v0.AddArg2(p0, mem) return true } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p0 mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i0] {s} p mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -18941,7 +18958,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -18964,12 +18981,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] { + if p != x0.Args[0] || mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -18980,6 +18996,66 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v2.AuxInt = i0 v2.Aux = s + v2.AddArg2(p, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) y)) + // cond: j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVWload [i] {s} p0 mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] { + continue + } + y := or_1 + if !(j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v2.AuxInt = i + v2.Aux = s v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -18988,9 +19064,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y)) - // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p0 mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y)) + // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i0] {s} p mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -19005,7 +19081,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19028,12 +19104,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] { + if p != x0.Args[0] || mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19044,6 +19119,66 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v2.AuxInt = i0 v2.Aux = s + v2.AddArg2(p, mem) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i] {s} p1 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i] {s} p0 mem)) y)) + // cond: j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j0] (MOVLload [i] {s} p0 mem)) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s1 := v_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVWload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s0 := or_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] { + continue + } + y := or_1 + if !(j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x0.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j0 + v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) + v2.AuxInt = i + v2.Aux = s v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -19052,9 +19187,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ x1:(MOVBload [i1] {s} p0 mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p0 mem)) + // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -19064,7 +19199,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -19078,8 +19213,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19089,15 +19223,54 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ x1:(MOVBload [i] {s} p1 mem) sh:(SHLQconst [8] x0:(MOVBload [i] {s} p0 mem))) + // cond: x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst <v.Type> [8] (MOVWload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x1 := v_0 + if x1.Op != OpAMD64MOVBload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { + continue + } + x0 := sh.Args[0] + if x0.Op != OpAMD64MOVBload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, v.Type) + v.copyOf(v0) + v0.AuxInt = 8 + v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p0 mem)) + // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -19111,7 +19284,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -19129,8 +19302,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19139,15 +19311,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL <v.Type> (MOVLload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p0 mem)) + // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i0] {s} p mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -19161,7 +19379,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p0 := x1.Args[0] + p := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -19179,8 +19397,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - p1 := x0.Args[0] - if mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -19189,15 +19406,61 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64) v1.AuxInt = i0 v1.Aux = s + v1.AddArg2(p, mem) + v0.AddArg(v1) + return true + } + break + } + // match: (ORQ r1:(BSWAPL x1:(MOVLload [i] {s} p1 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i] {s} p0 mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPQ <v.Type> (MOVQload [i] {s} p0 mem)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + r1 := v_0 + if r1.Op != OpAMD64BSWAPL { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVLload { + continue + } + i := x1.AuxInt + s := x1.Aux + mem := x1.Args[1] + p1 := x1.Args[0] + sh := v_1 + if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { + continue + } + r0 := sh.Args[0] + if r0.Op != OpAMD64BSWAPL { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVLload || x0.AuxInt != i || x0.Aux != s { + continue + } + _ = x0.Args[1] + p0 := x0.Args[0] + if mem != x0.Args[1] || !(x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && sequentialAddresses(p0, p1, 4) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + continue + } + b = mergePoint(b, x0, x1) + v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64) + v1.AuxInt = i + v1.Aux = s v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p0 mem))) y) + // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -19212,7 +19475,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19235,12 +19498,74 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) + v2.AuxInt = 8 + v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i] {s} p1 mem)) y)) + // cond: j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (ROLWconst <typ.UInt16> [8] (MOVWload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + x0 := s0.Args[0] + if x0.Op != OpAMD64MOVBload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19251,7 +19576,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) v2.AuxInt = 8 v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) @@ -19262,9 +19587,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y)) - // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p0 mem))) y) + // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y)) + // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i0] {s} p mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -19283,7 +19608,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p0 := x0.Args[0] + p := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -19310,12 +19635,81 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] + if p != x1.Args[0] || mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORQ, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLQconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32) + v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true + } + } + break + } + // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i] {s} p1 mem))) y)) + // cond: j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ <v.Type> (SHLQconst <v.Type> [j1] (BSWAPL <typ.UInt32> (MOVLload [i] {s} p0 mem))) y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + s0 := v_0 + if s0.Op != OpAMD64SHLQconst { + continue + } + j0 := s0.AuxInt + r0 := s0.Args[0] + if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { + continue + } + x0 := r0.Args[0] + if x0.Op != OpAMD64MOVWload { + continue + } + i := x0.AuxInt + s := x0.Aux + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORQ { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLQconst { + continue + } + j1 := s1.AuxInt + r1 := s1.Args[0] + if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { + continue + } + x1 := r1.Args[0] + if x1.Op != OpAMD64MOVWload || x1.AuxInt != i || x1.Aux != s { + continue + } + _ = x1.Args[1] p1 := x1.Args[0] if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { + if !(j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && sequentialAddresses(p0, p1, 2) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -19325,7 +19719,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1.AuxInt = j1 v2 := b.NewValue0(x1.Pos, OpAMD64BSWAPL, typ.UInt32) v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) - v3.AuxInt = i0 + v3.AuxInt = i v3.Aux = s v3.AddArg2(p0, mem) v2.AddArg(v3) diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index e2d703cb0c..6ad9514557 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -160,14 +160,14 @@ func load_le_byte8_uint64_inv(s []byte) uint64 { func load_be_byte2_uint16(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` return uint16(s[0])<<8 | uint16(s[1]) } func load_be_byte2_uint16_inv(s []byte) uint16 { // arm64:`MOVHU\t\(R[0-9]+\)`,`REV16W`,-`ORR`,-`MOVB` - // amd64:`MOVWLZX\s\([A-Z]+\)`,-`MOVB`,-`OR` + // amd64:`MOVWLZX\s\([A-Z]+\)`,`ROLW`,-`MOVB`,-`OR` // ppc64le:`MOVHBR\t\(R[0-9]+\)`,-`MOVBZ` return uint16(s[1]) | uint16(s[0])<<8 } @@ -179,7 +179,7 @@ func load_be_byte4_uint32(s []byte) uint32 { func load_be_byte4_uint32_inv(s []byte) uint32 { // arm64:`MOVWU\t\(R[0-9]+\)`,`REVW`,-`ORR`,-`REV16W`,-`MOV[BH]` - // amd64:`MOVL\s\([A-Z]+\)`,-`MOV[BW]`,-`OR` + // amd64:`MOVL\s\([A-Z]+\)`,`BSWAPL`,-`MOV[BW]`,-`OR` return uint32(s[3]) | uint32(s[2])<<8 | uint32(s[1])<<16 | uint32(s[0])<<24 } @@ -191,7 +191,7 @@ func load_be_byte8_uint64(s []byte) uint64 { func load_be_byte8_uint64_inv(s []byte) uint64 { // arm64:`MOVD\t\(R[0-9]+\)`,`REV`,-`ORR`,-`REVW`,-`REV16W`,-`MOV[BHW]` - // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,-`MOV[BWL]\t[^$]`,-`OR` + // amd64:`MOVQ\s\([A-Z]+\),\s[A-Z]+`,`BSWAPQ`,-`MOV[BWL]\t[^$]`,-`OR` // ppc64le:`MOVDBR\t\(R[0-9]+\)`,-`MOV[BHW]Z` return uint64(s[7]) | uint64(s[6])<<8 | uint64(s[5])<<16 | uint64(s[4])<<24 | uint64(s[3])<<32 | uint64(s[2])<<40 | uint64(s[1])<<48 | uint64(s[0])<<56 } |