aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2024-04-26 09:26:52 -0500
committerPaul Murphy <murp@ibm.com>2024-05-03 21:12:29 +0000
commit7994da4cc143fa16803bf80188b849457b7d55c3 (patch)
treefcef6b85c42e8f727c94bc4c633764931c7bac6c /src
parent99fc3179487b8014f5b301e83fce8ccd618afa09 (diff)
downloadgo-7994da4cc143fa16803bf80188b849457b7d55c3.tar.gz
go-7994da4cc143fa16803bf80188b849457b7d55c3.zip
cmd/compile/internal/ssa: on PPC64, try combining CLRLSLDI and SRDconst into RLWINM
This provides a small performance bump to crc64 as measured on ppc64le/power10: name old time/op new time/op delta Crc64/ISO64KB 49.6µs ± 0% 46.6µs ± 0% -6.18% Crc64/ISO4KB 3.16µs ± 0% 2.97µs ± 0% -5.83% Crc64/ISO1KB 840ns ± 0% 794ns ± 0% -5.46% Crc64/ECMA64KB 49.6µs ± 0% 46.5µs ± 0% -6.20% Crc64/Random64KB 53.1µs ± 0% 49.9µs ± 0% -6.04% Crc64/Random16KB 15.9µs ± 1% 15.0µs ± 0% -5.73% Change-Id: I302b5431c7dc46dfd2d211545c483bdcdfe011f1 Cq-Include-Trybots: luci.golang.try:gotip-linux-ppc64_power10,gotip-linux-ppc64_power8,gotip-linux-ppc64le_power8,gotip-linux-ppc64le_power9,gotip-linux-ppc64le_power10 Reviewed-on: https://go-review.googlesource.com/c/go/+/581937 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Eli Bendersky <eliben@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/PPC64.rules1
-rw-r--r--src/cmd/compile/internal/ssa/rewrite.go27
-rw-r--r--src/cmd/compile/internal/ssa/rewritePPC64.go18
3 files changed, 45 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
index 7518119147..f0cb23ba9f 100644
--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
@@ -158,6 +158,7 @@
// Merge shift right + shift left and clear left (e.g for a table lookup)
(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
+(CLRLSLDI [c] (SRDconst [s] x)) && mergePPC64ClrlsldiSrd(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x)
(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x)
// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF]
(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 9961b540b7..4e4d99af0b 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -1589,7 +1589,7 @@ func mergePPC64AndSrwi(m, s int64) int64 {
return encodePPC64RotateMask((32-s)&31, mask, 32)
}
-// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
+// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
mask_1 := uint64(0xFFFFFFFF >> uint(srw))
@@ -1609,6 +1609,31 @@ func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
}
+// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM.
+// Return the encoded RLWINM constant, or 0 if they cannot be merged.
+func mergePPC64ClrlsldiSrd(sld, srd int64) int64 {
+ mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd)
+ // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left.
+ mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
+
+ // Rewrite mask to apply after the final left shift.
+ mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
+
+ r_1 := 64 - srd
+ r_2 := GetPPC64Shiftsh(sld)
+ r_3 := (r_1 + r_2) & 63 // This can wrap.
+
+ if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
+ return 0
+ }
+ // This combine only works when selecting and shifting the lower 32 bits.
+ v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3))
+ if v1&mask_3 != 0 {
+ return 0
+ }
+ return encodePPC64RotateMask(int64(r_3-32), int64(mask_3), 32)
+}
+
// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
// the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 4ac5eec073..266ac14c38 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -4628,6 +4628,24 @@ func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool {
v.AddArg(x)
return true
}
+ // match: (CLRLSLDI [c] (SRDconst [s] x))
+ // cond: mergePPC64ClrlsldiSrd(int64(c),s) != 0
+ // result: (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x)
+ for {
+ c := auxIntToInt32(v.AuxInt)
+ if v_0.Op != OpPPC64SRDconst {
+ break
+ }
+ s := auxIntToInt64(v_0.AuxInt)
+ x := v_0.Args[0]
+ if !(mergePPC64ClrlsldiSrd(int64(c), s) != 0) {
+ break
+ }
+ v.reset(OpPPC64RLWINM)
+ v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrd(int64(c), s))
+ v.AddArg(x)
+ return true
+ }
// match: (CLRLSLDI [c] i:(RLWINM [s] x))
// cond: mergePPC64ClrlsldiRlwinm(c,s) != 0
// result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)