diff options
author | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-08-31 09:43:40 -0400 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-09-17 12:37:40 +0000 |
commit | 967465da2975fe4322080703ce5a77ea90752829 (patch) | |
tree | 1f3a8d2b15aaa382b3551c3d5a1b68b6808f8592 /src/cmd/compile/internal/ssa/rewritePPC64.go | |
parent | 0dde60a5fefcb1447c97efa5c7bb4dbcf3575736 (diff) | |
download | go-967465da2975fe4322080703ce5a77ea90752829.tar.gz go-967465da2975fe4322080703ce5a77ea90752829.zip |
cmd/compile: use combined shifts to improve array addressing on ppc64x
This change adds rules to find pairs of instructions that can
be combined into a single shifts. These instruction sequences
are common in array addressing within loops. Improvements can
be seen in many crypto packages and the hash packages.
These are based on the extended mnemonics found in the ISA
sections C.8.1 and C.8.2.
Some rules in PPC64.rules were moved because the ordering prevented
some matching.
The following results were generated on power9.
hash/crc32:
CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41%
CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50%
CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46%
CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80%
CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27%
CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15%
CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22%
CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79%
CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56%
CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53%
crypto/rc4:
RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1)
RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1)
RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1)
crypto/sha1:
Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1)
Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1)
Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1)
Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1)
There are other improvements found in golang.org/x/crypto which are all in the
range of 5-15%.
Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c
Reviewed-on: https://go-review.googlesource.com/c/go/+/252097
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewritePPC64.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/rewritePPC64.go | 787 |
1 files changed, 787 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 152cdfdf4d..12b08824b5 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -586,8 +586,12 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ROTLW(v) case OpPPC64SLD: return rewriteValuePPC64_OpPPC64SLD(v) + case OpPPC64SLDconst: + return rewriteValuePPC64_OpPPC64SLDconst(v) case OpPPC64SLW: return rewriteValuePPC64_OpPPC64SLW(v) + case OpPPC64SLWconst: + return rewriteValuePPC64_OpPPC64SLWconst(v) case OpPPC64SRAD: return rewriteValuePPC64_OpPPC64SRAD(v) case OpPPC64SRAW: @@ -6565,6 +6569,255 @@ func rewriteValuePPC64_OpPPC64MOVBZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVBZreg (OR <t> x (MOVWZreg y))) + // result: (MOVBZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR <t> x (MOVWZreg y))) + // result: (MOVBZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND <t> x (MOVWZreg y))) + // result: (MOVBZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR <t> x (MOVHZreg y))) + // result: (MOVBZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR <t> x (MOVHZreg y))) + // result: (MOVBZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND <t> x (MOVHZreg y))) + // result: (MOVBZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (OR <t> x (MOVBZreg y))) + // result: (MOVBZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (XOR <t> x (MOVBZreg y))) + // result: (MOVBZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg (AND <t> x (MOVBZreg y))) + // result: (MOVBZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVBZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVBZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVBZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVBZreg z:(AND y (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVBZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVBZreg x:(MOVBZload _ _)) // result: x for { @@ -8507,6 +8760,197 @@ func rewriteValuePPC64_OpPPC64MOVHZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVHZreg (OR <t> x (MOVWZreg y))) + // result: (MOVHZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR <t> x (MOVWZreg y))) + // result: (MOVHZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND <t> x (MOVWZreg y))) + // result: (MOVHZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (OR <t> x (MOVHZreg y))) + // result: (MOVHZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (XOR <t> x (MOVHZreg y))) + // result: (MOVHZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg (AND <t> x (MOVHZreg y))) + // result: (MOVHZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVHZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVHZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVHZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVHZreg z:(AND y (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVHZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVHZreg x:(MOVBZload _ _)) // result: x for { @@ -9657,6 +10101,139 @@ func rewriteValuePPC64_OpPPC64MOVWZreg(v *Value) bool { v.AddArg(x) return true } + // match: (MOVWZreg (OR <t> x (MOVWZreg y))) + // result: (MOVWZreg (OR <t> x y)) + for { + if v_0.Op != OpPPC64OR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64OR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (XOR <t> x (MOVWZreg y))) + // result: (MOVWZreg (XOR <t> x y)) + for { + if v_0.Op != OpPPC64XOR { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64XOR, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg (AND <t> x (MOVWZreg y))) + // result: (MOVWZreg (AND <t> x y)) + for { + if v_0.Op != OpPPC64AND { + break + } + t := v_0.Type + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + if v_0_1.Op != OpPPC64MOVWZreg { + continue + } + y := v_0_1.Args[0] + v.reset(OpPPC64MOVWZreg) + v0 := b.NewValue0(v.Pos, OpPPC64AND, t) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } + break + } + // match: (MOVWZreg z:(ANDconst [c] (MOVBZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVBZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVHZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVHZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(ANDconst [c] (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + z_0 := z.Args[0] + if z_0.Op != OpPPC64MOVWZload { + break + } + v.copyOf(z) + return true + } + // match: (MOVWZreg z:(AND y (MOVWZload ptr x))) + // result: z + for { + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_1.Op != OpPPC64MOVWZload { + continue + } + v.copyOf(z) + return true + } + break + } // match: (MOVWZreg x:(MOVBZload _ _)) // result: x for { @@ -12197,6 +12774,111 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLDconst [c] z:(MOVBZreg x)) + // cond: c < 8 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(c < 8 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 56, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVHZreg x)) + // cond: c < 16 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(c < 16 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 48, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(MOVWZreg x)) + // cond: c < 32 && z.Uses == 1 + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(c < 32 && z.Uses == 1) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32, 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + // match: (SLDconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLDI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 64-getPPC64ShiftMaskLength(d), 63, 64)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12215,6 +12897,111 @@ func rewriteValuePPC64_OpPPC64SLW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SLWconst [c] z:(MOVBZreg x)) + // cond: z.Uses == 1 && c < 8 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVBZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 8) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 24, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVHZreg x)) + // cond: z.Uses == 1 && c < 16 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVHZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 16) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 16, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(MOVWZreg x)) + // cond: z.Uses == 1 && c < 24 + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64MOVWZreg { + break + } + x := z.Args[0] + if !(z.Uses == 1 && c < 24) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 8, 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(ANDconst [d] x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64ANDconst { + break + } + d := auxIntToInt64(z.AuxInt) + x := z.Args[0] + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + break + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + // match: (SLWconst [c] z:(AND (MOVDconst [d]) x)) + // cond: z.Uses == 1 && isPPC64ValidShiftMask(d) + // result: (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) + for { + c := auxIntToInt64(v.AuxInt) + z := v_0 + if z.Op != OpPPC64AND { + break + } + _ = z.Args[1] + z_0 := z.Args[0] + z_1 := z.Args[1] + for _i0 := 0; _i0 <= 1; _i0, z_0, z_1 = _i0+1, z_1, z_0 { + if z_0.Op != OpPPC64MOVDconst { + continue + } + d := auxIntToInt64(z_0.AuxInt) + x := z_1 + if !(z.Uses == 1 && isPPC64ValidShiftMask(d)) { + continue + } + v.reset(OpPPC64CLRLSLWI) + v.AuxInt = int32ToAuxInt(newPPC64ShiftAuxInt(c, 32-getPPC64ShiftMaskLength(d), 31, 32)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SRAD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] |