diff options
author | Paul E. Murphy <murp@ibm.com> | 2020-10-23 12:12:34 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-10-27 18:33:20 +0000 |
commit | c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch) | |
tree | 67a121a1ecd4bec56887f14e5894231017e73bb9 /src/cmd/compile/internal/ssa/rewritePPC64.go | |
parent | e3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff) | |
download | go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip |
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.
Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.
Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).
Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.
The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):
pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88%
ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15%
pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key 57.6ms ± 0% 52.3ms ± 0% -9.13%
pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal 90.9ms ± 0% 82.6ms ± 0% -9.13%
DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12%
Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/rewritePPC64.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/rewritePPC64.go | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 84938fe27a..e5a23e8625 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -444,6 +444,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ANDN(v) case OpPPC64ANDconst: return rewriteValuePPC64_OpPPC64ANDconst(v) + case OpPPC64CLRLSLDI: + return rewriteValuePPC64_OpPPC64CLRLSLDI(v) case OpPPC64CMP: return rewriteValuePPC64_OpPPC64CMP(v) case OpPPC64CMPU: @@ -598,6 +600,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64ROTL(v) case OpPPC64ROTLW: return rewriteValuePPC64_OpPPC64ROTLW(v) + case OpPPC64ROTLWconst: + return rewriteValuePPC64_OpPPC64ROTLWconst(v) case OpPPC64SLD: return rewriteValuePPC64_OpPPC64SLD(v) case OpPPC64SLDconst: @@ -614,6 +618,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64SRD(v) case OpPPC64SRW: return rewriteValuePPC64_OpPPC64SRW(v) + case OpPPC64SRWconst: + return rewriteValuePPC64_OpPPC64SRWconst(v) case OpPPC64SUB: return rewriteValuePPC64_OpPPC64SUB(v) case OpPPC64SUBFCconst: @@ -4212,6 +4218,100 @@ func rewriteValuePPC64_OpPPC64ADDconst(v *Value) bool { func rewriteValuePPC64_OpPPC64AND(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + // match: (AND (MOVDconst [m]) (ROTLWconst [r] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,m,32)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64ROTLWconst { + continue + } + r := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32)) + v.AddArg(x) + return true + } + break + } + // match: (AND (MOVDconst [m]) (ROTLW x r)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64ROTLW { + continue + } + r := v_1.Args[1] + x := v_1.Args[0] + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWNM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32)) + v.AddArg2(x, r) + return true + } + break + } + // match: (AND (MOVDconst [m]) (SRWconst x [s])) + // cond: mergePPC64RShiftMask(m,s,32) == 0 + // result: (MOVDconst [0]) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64SRWconst { + continue + } + s := auxIntToInt64(v_1.AuxInt) + if !(mergePPC64RShiftMask(m, s, 32) == 0) { + continue + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + break + } + // match: (AND (MOVDconst [m]) (SRWconst x [s])) + // cond: mergePPC64AndSrwi(m,s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m,s)] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0.AuxInt) + if v_1.Op != OpPPC64SRWconst { + continue + } + s := auxIntToInt64(v_1.AuxInt) + x := v_1.Args[0] + if !(mergePPC64AndSrwi(m, s) != 0) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s)) + v.AddArg(x) + return true + } + break + } // match: (AND x (NOR y y)) // result: (ANDN x y) for { @@ -4347,6 +4447,76 @@ func rewriteValuePPC64_OpPPC64ANDN(v *Value) bool { } func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool { v_0 := v.Args[0] + // match: (ANDconst [m] (ROTLWconst [r] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,m,32)] x) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ROTLWconst { + break + } + r := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32)) + v.AddArg(x) + return true + } + // match: (ANDconst [m] (ROTLW x r)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ROTLW { + break + } + r := v_0.Args[1] + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWNM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32)) + v.AddArg2(x, r) + return true + } + // match: (ANDconst [m] (SRWconst x [s])) + // cond: mergePPC64RShiftMask(m,s,32) == 0 + // result: (MOVDconst [0]) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + if !(mergePPC64RShiftMask(m, s, 32) == 0) { + break + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (ANDconst [m] (SRWconst x [s])) + // cond: mergePPC64AndSrwi(m,s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m,s)] x) + for { + m := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64AndSrwi(m, s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s)) + v.AddArg(x) + return true + } // match: (ANDconst [c] (ANDconst [d] x)) // result: (ANDconst [c&d] x) for { @@ -4511,6 +4681,47 @@ func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool { + v_0 := v.Args[0] + // match: (CLRLSLDI [c] (SRWconst [s] x)) + // cond: mergePPC64ClrlsldiSrw(int64(c),s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64ClrlsldiSrw(int64(c), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrw(int64(c), s)) + v.AddArg(x) + return true + } + // match: (CLRLSLDI [c] i:(RLWINM [s] x)) + // cond: mergePPC64ClrlsldiRlwinm(c,s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) + for { + c := auxIntToInt32(v.AuxInt) + i := v_0 + if i.Op != OpPPC64RLWINM { + break + } + s := auxIntToInt64(i.AuxInt) + x := i.Args[0] + if !(mergePPC64ClrlsldiRlwinm(c, s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiRlwinm(c, s)) + v.AddArg(x) + return true + } + return false +} func rewriteValuePPC64_OpPPC64CMP(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12850,6 +13061,55 @@ func rewriteValuePPC64_OpPPC64ROTLW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64ROTLWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (ROTLWconst [r] (AND (MOVDconst [m]) x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + for { + r := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(isPPC64WordRotateMask(m)) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32)) + v.AddArg(x) + return true + } + break + } + // match: (ROTLWconst [r] (ANDconst [m] x)) + // cond: isPPC64WordRotateMask(m) + // result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + for { + r := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(isPPC64WordRotateMask(m)) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32)) + v.AddArg(x) + return true + } + return false +} func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -12870,6 +13130,24 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool { } func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool { v_0 := v.Args[0] + // match: (SLDconst [l] (SRWconst [r] x)) + // cond: mergePPC64SldiSrw(l,r) != 0 + // result: (RLWINM [mergePPC64SldiSrw(l,r)] x) + for { + l := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SRWconst { + break + } + r := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64SldiSrw(l, r) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64SldiSrw(l, r)) + v.AddArg(x) + return true + } // match: (SLDconst [c] z:(MOVBZreg x)) // cond: c < 8 && z.Uses == 1 // result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x) @@ -13186,6 +13464,96 @@ func rewriteValuePPC64_OpPPC64SRW(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64SRWconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SRWconst (ANDconst [m] x) [s]) + // cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0 + // result: (MOVDconst [0]) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) { + break + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (SRWconst (ANDconst [m] x) [s]) + // cond: mergePPC64AndSrwi(m>>uint(s),s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64ANDconst { + break + } + m := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s)) + v.AddArg(x) + return true + } + // match: (SRWconst (AND (MOVDconst [m]) x) [s]) + // cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0 + // result: (MOVDconst [0]) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) { + continue + } + v.reset(OpPPC64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + break + } + // match: (SRWconst (AND (MOVDconst [m]) x) [s]) + // cond: mergePPC64AndSrwi(m>>uint(s),s) != 0 + // result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + for { + s := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64AND { + break + } + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + if v_0_0.Op != OpPPC64MOVDconst { + continue + } + m := auxIntToInt64(v_0_0.AuxInt) + x := v_0_1 + if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) { + continue + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s)) + v.AddArg(x) + return true + } + break + } + return false +} func rewriteValuePPC64_OpPPC64SUB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] |