aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/opGen.go
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2020-10-23 12:12:34 -0500
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-10-27 18:33:20 +0000
commitc3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch)
tree67a121a1ecd4bec56887f14e5894231017e73bb9 /src/cmd/compile/internal/ssa/opGen.go
parente3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff)
downloadgo-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz
go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is and the shift value produce constant which can be encoded into an RLWINM instruction. Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate masks produces a constant which can be encoded into RLWINM. Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)). Combine rotate word + and operations which can be encoded as a single RLWINM/RLWNM instruction. The most notable performance improvements arise from the crypto benchmarks below (GOARCH=power8 on a ppc64le/linux): pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88% ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15% pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le Key 57.6ms ± 0% 52.3ms ± 0% -9.13% pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le Equal 90.9ms ± 0% 82.6ms ± 0% -9.13% DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12% Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce Reviewed-on: https://go-review.googlesource.com/c/go/+/260798 Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com> Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/opGen.go')
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go48
1 files changed, 48 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 779c19f72d..bb1cbc0baa 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1871,6 +1871,9 @@ const (
OpPPC64ROTLconst
OpPPC64ROTLWconst
OpPPC64EXTSWSLconst
+ OpPPC64RLWINM
+ OpPPC64RLWNM
+ OpPPC64RLWMI
OpPPC64CNTLZD
OpPPC64CNTLZW
OpPPC64CNTTZD
@@ -24972,6 +24975,51 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "RLWINM",
+ auxType: auxInt64,
+ argLen: 1,
+ asm: ppc64.ARLWNM,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "RLWNM",
+ auxType: auxInt64,
+ argLen: 2,
+ asm: ppc64.ARLWNM,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "RLWMI",
+ auxType: auxInt64,
+ argLen: 2,
+ resultInArg0: true,
+ asm: ppc64.ARLWMI,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
name: "CNTLZD",
argLen: 1,
clobberFlags: true,