diff options
author | Paul E. Murphy <murp@ibm.com> | 2024-04-26 09:26:52 -0500 |
---|---|---|
committer | Paul Murphy <murp@ibm.com> | 2024-05-03 21:12:29 +0000 |
commit | 7994da4cc143fa16803bf80188b849457b7d55c3 (patch) | |
tree | fcef6b85c42e8f727c94bc4c633764931c7bac6c /src | |
parent | 99fc3179487b8014f5b301e83fce8ccd618afa09 (diff) | |
download | go-7994da4cc143fa16803bf80188b849457b7d55c3.tar.gz go-7994da4cc143fa16803bf80188b849457b7d55c3.zip |
cmd/compile/internal/ssa: on PPC64, try combining CLRLSLDI and SRDconst into RLWINM
This provides a small performance bump to crc64 as measured on ppc64le/power10:
name old time/op new time/op delta
Crc64/ISO64KB 49.6µs ± 0% 46.6µs ± 0% -6.18%
Crc64/ISO4KB 3.16µs ± 0% 2.97µs ± 0% -5.83%
Crc64/ISO1KB 840ns ± 0% 794ns ± 0% -5.46%
Crc64/ECMA64KB 49.6µs ± 0% 46.5µs ± 0% -6.20%
Crc64/Random64KB 53.1µs ± 0% 49.9µs ± 0% -6.04%
Crc64/Random16KB 15.9µs ± 1% 15.0µs ± 0% -5.73%
Change-Id: I302b5431c7dc46dfd2d211545c483bdcdfe011f1
Cq-Include-Trybots: luci.golang.try:gotip-linux-ppc64_power10,gotip-linux-ppc64_power8,gotip-linux-ppc64le_power8,gotip-linux-ppc64le_power9,gotip-linux-ppc64le_power10
Reviewed-on: https://go-review.googlesource.com/c/go/+/581937
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/cmd/compile/internal/ssa/_gen/PPC64.rules | 1 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/rewrite.go | 27 | ||||
-rw-r--r-- | src/cmd/compile/internal/ssa/rewritePPC64.go | 18 |
3 files changed, 45 insertions, 1 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules index 7518119147..f0cb23ba9f 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules @@ -158,6 +158,7 @@ // Merge shift right + shift left and clear left (e.g for a table lookup) (CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) +(CLRLSLDI [c] (SRDconst [s] x)) && mergePPC64ClrlsldiSrd(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x) (SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x) // The following reduction shows up frequently too. e.g b[(x>>14)&0xFF] (CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9961b540b7..4e4d99af0b 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1589,7 +1589,7 @@ func mergePPC64AndSrwi(m, s int64) int64 { return encodePPC64RotateMask((32-s)&31, mask, 32) } -// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM. // Return the encoded RLWINM constant, or 0 if they cannot be merged. func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { mask_1 := uint64(0xFFFFFFFF >> uint(srw)) @@ -1609,6 +1609,31 @@ func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) } +// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Return the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiSrd(sld, srd int64) int64 { + mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd) + // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // Rewrite mask to apply after the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) + + r_1 := 64 - srd + r_2 := GetPPC64Shiftsh(sld) + r_3 := (r_1 + r_2) & 63 // This can wrap. + + if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { + return 0 + } + // This combine only works when selecting and shifting the lower 32 bits. + v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3)) + if v1&mask_3 != 0 { + return 0 + } + return encodePPC64RotateMask(int64(r_3-32), int64(mask_3), 32) +} + // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return // the encoded RLWINM constant, or 0 if they cannot be merged. func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 4ac5eec073..266ac14c38 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -4628,6 +4628,24 @@ func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool { v.AddArg(x) return true } + // match: (CLRLSLDI [c] (SRDconst [s] x)) + // cond: mergePPC64ClrlsldiSrd(int64(c),s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpPPC64SRDconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64ClrlsldiSrd(int64(c), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrd(int64(c), s)) + v.AddArg(x) + return true + } // match: (CLRLSLDI [c] i:(RLWINM [s] x)) // cond: mergePPC64ClrlsldiRlwinm(c,s) != 0 // result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) |