From 7994da4cc143fa16803bf80188b849457b7d55c3 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Fri, 26 Apr 2024 09:26:52 -0500 Subject: cmd/compile/internal/ssa: on PPC64, try combining CLRLSLDI and SRDconst into RLWINM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides a small performance bump to crc64 as measured on ppc64le/power10: name old time/op new time/op delta Crc64/ISO64KB 49.6µs ± 0% 46.6µs ± 0% -6.18% Crc64/ISO4KB 3.16µs ± 0% 2.97µs ± 0% -5.83% Crc64/ISO1KB 840ns ± 0% 794ns ± 0% -5.46% Crc64/ECMA64KB 49.6µs ± 0% 46.5µs ± 0% -6.20% Crc64/Random64KB 53.1µs ± 0% 49.9µs ± 0% -6.04% Crc64/Random16KB 15.9µs ± 1% 15.0µs ± 0% -5.73% Change-Id: I302b5431c7dc46dfd2d211545c483bdcdfe011f1 Cq-Include-Trybots: luci.golang.try:gotip-linux-ppc64_power10,gotip-linux-ppc64_power8,gotip-linux-ppc64le_power8,gotip-linux-ppc64le_power9,gotip-linux-ppc64le_power10 Reviewed-on: https://go-review.googlesource.com/c/go/+/581937 Reviewed-by: Lynn Boger Reviewed-by: Eli Bendersky LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/_gen/PPC64.rules | 1 + src/cmd/compile/internal/ssa/rewrite.go | 27 ++++++++++++++++++++++++++- src/cmd/compile/internal/ssa/rewritePPC64.go | 18 ++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules index 7518119147..f0cb23ba9f 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules @@ -158,6 +158,7 @@ // Merge shift right + shift left and clear left (e.g for a table lookup) (CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) +(CLRLSLDI [c] (SRDconst [s] x)) && mergePPC64ClrlsldiSrd(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x) (SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x) // The following reduction shows up frequently too. e.g b[(x>>14)&0xFF] (CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 9961b540b7..4e4d99af0b 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1589,7 +1589,7 @@ func mergePPC64AndSrwi(m, s int64) int64 { return encodePPC64RotateMask((32-s)&31, mask, 32) } -// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Test if a word shift right feeding into a CLRLSLDI can be merged into RLWINM. // Return the encoded RLWINM constant, or 0 if they cannot be merged. func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { mask_1 := uint64(0xFFFFFFFF >> uint(srw)) @@ -1609,6 +1609,31 @@ func mergePPC64ClrlsldiSrw(sld, srw int64) int64 { return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32) } +// Test if a doubleword shift right feeding into a CLRLSLDI can be merged into RLWINM. +// Return the encoded RLWINM constant, or 0 if they cannot be merged. +func mergePPC64ClrlsldiSrd(sld, srd int64) int64 { + mask_1 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(srd) + // for CLRLSLDI, it's more convenient to think of it as a mask left bits then rotate left. + mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld))) + + // Rewrite mask to apply after the final left shift. + mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld)) + + r_1 := 64 - srd + r_2 := GetPPC64Shiftsh(sld) + r_3 := (r_1 + r_2) & 63 // This can wrap. + + if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 { + return 0 + } + // This combine only works when selecting and shifting the lower 32 bits. + v1 := bits.RotateLeft64(0xFFFFFFFF00000000, int(r_3)) + if v1&mask_3 != 0 { + return 0 + } + return encodePPC64RotateMask(int64(r_3-32), int64(mask_3), 32) +} + // Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return // the encoded RLWINM constant, or 0 if they cannot be merged. func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 { diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 4ac5eec073..266ac14c38 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -4628,6 +4628,24 @@ func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool { v.AddArg(x) return true } + // match: (CLRLSLDI [c] (SRDconst [s] x)) + // cond: mergePPC64ClrlsldiSrd(int64(c),s) != 0 + // result: (RLWINM [mergePPC64ClrlsldiSrd(int64(c),s)] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpPPC64SRDconst { + break + } + s := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(mergePPC64ClrlsldiSrd(int64(c), s) != 0) { + break + } + v.reset(OpPPC64RLWINM) + v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrd(int64(c), s)) + v.AddArg(x) + return true + } // match: (CLRLSLDI [c] i:(RLWINM [s] x)) // cond: mergePPC64ClrlsldiRlwinm(c,s) != 0 // result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) -- cgit v1.2.3-54-g00ecf