aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/rewritePPC64.go
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2020-08-31 09:43:40 -0400
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-09-17 12:37:40 +0000
commit967465da2975fe4322080703ce5a77ea90752829 (patch)
tree1f3a8d2b15aaa382b3551c3d5a1b68b6808f8592 /src/cmd/compile/internal/ssa/rewritePPC64.go
parent0dde60a5fefcb1447c97efa5c7bb4dbcf3575736 (diff)
downloadgo-967465da2975fe4322080703ce5a77ea90752829.tar.gz
go-967465da2975fe4322080703ce5a77ea90752829.zip
cmd/compile: use combined shifts to improve array addressing on ppc64x
This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewritePPC64.go')
-rw-r--r--src/cmd/compile/internal/ssa/rewritePPC64.go787
1 files changed, 787 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 152cdfdf4d..12b08824b5 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -586,8 +586,12 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpPPC64ROTLW(v)
case OpPPC64SLD:
return rewriteValuePPC64_OpPPC64SLD(v)
+ case OpPPC64SLDconst:
+ return rewriteValuePPC64_OpPPC64SLDconst(v)
case OpPPC64SLW:
return rewriteValuePPC64_OpPPC64SLW(v)
+ case OpPPC64SLWconst:
+ return rewriteValuePPC64_OpPPC64SLWconst(v)
case OpPPC64SRAD:
return rewriteValuePPC64_OpPPC64SRAD(v)
case OpPPC64SRAW:
@@ -6565,6 +6569,255 @@ func rewriteValuePPC64_OpPPC64MOVBZreg(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (MOVBZreg (OR <t> x (MOVWZreg y)))
+ // result: (MOVBZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (XOR <t> x (MOVWZreg y)))
+ // result: (MOVBZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (AND <t> x (MOVWZreg y)))
+ // result: (MOVBZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (OR <t> x (MOVHZreg y)))
+ // result: (MOVBZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (XOR <t> x (MOVHZreg y)))
+ // result: (MOVBZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (AND <t> x (MOVHZreg y)))
+ // result: (MOVBZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (OR <t> x (MOVBZreg y)))
+ // result: (MOVBZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVBZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (XOR <t> x (MOVBZreg y)))
+ // result: (MOVBZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVBZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg (AND <t> x (MOVBZreg y)))
+ // result: (MOVBZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVBZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVBZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVBZreg z:(ANDconst [c] (MOVBZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVBZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVBZreg z:(AND y (MOVBZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64AND {
+ break
+ }
+ _ = z.Args[1]
+ z_0 := z.Args[0]
+ z_1 := z.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 {
+ if z_1.Op != OpPPC64MOVBZload {
+ continue
+ }
+ v.copyOf(z)
+ return true
+ }
+ break
+ }
// match: (MOVBZreg x:(MOVBZload _ _))
// result: x
for {
@@ -8507,6 +8760,197 @@ func rewriteValuePPC64_OpPPC64MOVHZreg(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (MOVHZreg (OR <t> x (MOVWZreg y)))
+ // result: (MOVHZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg (XOR <t> x (MOVWZreg y)))
+ // result: (MOVHZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg (AND <t> x (MOVWZreg y)))
+ // result: (MOVHZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg (OR <t> x (MOVHZreg y)))
+ // result: (MOVHZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg (XOR <t> x (MOVHZreg y)))
+ // result: (MOVHZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg (AND <t> x (MOVHZreg y)))
+ // result: (MOVHZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVHZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVHZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVHZreg z:(ANDconst [c] (MOVBZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVBZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVHZreg z:(ANDconst [c] (MOVHZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVHZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVHZreg z:(AND y (MOVHZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64AND {
+ break
+ }
+ _ = z.Args[1]
+ z_0 := z.Args[0]
+ z_1 := z.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 {
+ if z_1.Op != OpPPC64MOVHZload {
+ continue
+ }
+ v.copyOf(z)
+ return true
+ }
+ break
+ }
// match: (MOVHZreg x:(MOVBZload _ _))
// result: x
for {
@@ -9657,6 +10101,139 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (MOVWZreg (OR <t> x (MOVWZreg y)))
+ // result: (MOVWZreg (OR <t> x y))
+ for {
+ if v_0.Op != OpPPC64OR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVWZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64OR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVWZreg (XOR <t> x (MOVWZreg y)))
+ // result: (MOVWZreg (XOR <t> x y))
+ for {
+ if v_0.Op != OpPPC64XOR {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVWZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64XOR, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVWZreg (AND <t> x (MOVWZreg y)))
+ // result: (MOVWZreg (AND <t> x y))
+ for {
+ if v_0.Op != OpPPC64AND {
+ break
+ }
+ t := v_0.Type
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ if v_0_1.Op != OpPPC64MOVWZreg {
+ continue
+ }
+ y := v_0_1.Args[0]
+ v.reset(OpPPC64MOVWZreg)
+ v0 := b.NewValue0(v.Pos, OpPPC64AND, t)
+ v0.AddArg2(x, y)
+ v.AddArg(v0)
+ return true
+ }
+ break
+ }
+ // match: (MOVWZreg z:(ANDconst [c] (MOVBZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVBZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVWZreg z:(ANDconst [c] (MOVHZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVHZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVWZreg z:(ANDconst [c] (MOVWZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ z_0 := z.Args[0]
+ if z_0.Op != OpPPC64MOVWZload {
+ break
+ }
+ v.copyOf(z)
+ return true
+ }
+ // match: (MOVWZreg z:(AND y (MOVWZload ptr x)))
+ // result: z
+ for {
+ z := v_0
+ if z.Op != OpPPC64AND {
+ break
+ }
+ _ = z.Args[1]
+ z_0 := z.Args[0]
+ z_1 := z.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 {
+ if z_1.Op != OpPPC64MOVWZload {
+ continue
+ }
+ v.copyOf(z)
+ return true
+ }
+ break
+ }
// match: (MOVWZreg x:(MOVBZload _ _))
// result: x
for {
@@ -12197,6 +12774,111 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool {
}
return false
}
+func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (SLDconst [c] z:(MOVBZreg x))
+ // cond: c < 8 && z.Uses == 1
+ // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVBZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(c < 8 && z.Uses == 1) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLDI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 56, 63, 64))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLDconst [c] z:(MOVHZreg x))
+ // cond: c < 16 && z.Uses == 1
+ // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVHZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(c < 16 && z.Uses == 1) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLDI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 48, 63, 64))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLDconst [c] z:(MOVWZreg x))
+ // cond: c < 32 && z.Uses == 1
+ // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVWZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(c < 32 && z.Uses == 1) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLDI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32, 63, 64))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLDconst [c] z:(ANDconst [d] x))
+ // cond: z.Uses == 1 && isPPC64ValidShiftMask(d)
+ // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ d := auxIntToInt64(z.AuxInt)
+ x := z.Args[0]
+ if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLDI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLDconst [c] z:(AND (MOVDconst [d]) x))
+ // cond: z.Uses == 1 && isPPC64ValidShiftMask(d)
+ // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64AND {
+ break
+ }
+ _ = z.Args[1]
+ z_0 := z.Args[0]
+ z_1 := z.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 {
+ if z_0.Op != OpPPC64MOVDconst {
+ continue
+ }
+ d := auxIntToInt64(z_0.AuxInt)
+ x := z_1
+ if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) {
+ continue
+ }
+ v.reset(OpPPC64CLRLSLDI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64))
+ v.AddArg(x)
+ return true
+ }
+ break
+ }
+ return false
+}
func rewriteValuePPC64_OpPPC64SLW(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -12215,6 +12897,111 @@ func rewriteValuePPC64_OpPPC64SLW(v *Value) bool {
}
return false
}
+func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool {
+ v_0 := v.Args[0]
+ // match: (SLWconst [c] z:(MOVBZreg x))
+ // cond: z.Uses == 1 && c < 8
+ // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVBZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(z.Uses == 1 && c < 8) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLWI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 24, 31, 32))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLWconst [c] z:(MOVHZreg x))
+ // cond: z.Uses == 1 && c < 16
+ // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVHZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(z.Uses == 1 && c < 16) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLWI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 16, 31, 32))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLWconst [c] z:(MOVWZreg x))
+ // cond: z.Uses == 1 && c < 24
+ // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64MOVWZreg {
+ break
+ }
+ x := z.Args[0]
+ if !(z.Uses == 1 && c < 24) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLWI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLWconst [c] z:(ANDconst [d] x))
+ // cond: z.Uses == 1 && isPPC64ValidShiftMask(d)
+ // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64ANDconst {
+ break
+ }
+ d := auxIntToInt64(z.AuxInt)
+ x := z.Args[0]
+ if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) {
+ break
+ }
+ v.reset(OpPPC64CLRLSLWI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32))
+ v.AddArg(x)
+ return true
+ }
+ // match: (SLWconst [c] z:(AND (MOVDconst [d]) x))
+ // cond: z.Uses == 1 && isPPC64ValidShiftMask(d)
+ // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
+ for {
+ c := auxIntToInt64(v.AuxInt)
+ z := v_0
+ if z.Op != OpPPC64AND {
+ break
+ }
+ _ = z.Args[1]
+ z_0 := z.Args[0]
+ z_1 := z.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 {
+ if z_0.Op != OpPPC64MOVDconst {
+ continue
+ }
+ d := auxIntToInt64(z_0.AuxInt)
+ x := z_1
+ if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) {
+ continue
+ }
+ v.reset(OpPPC64CLRLSLWI)
+ v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32))
+ v.AddArg(x)
+ return true
+ }
+ break
+ }
+ return false
+}
func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]