diff options
author | Paul E. Murphy <murp@ibm.com> | 2020-10-23 12:12:34 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-10-27 18:33:20 +0000 |
commit | c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch) | |
tree | 67a121a1ecd4bec56887f14e5894231017e73bb9 /src/cmd/compile/internal/ssa/opGen.go | |
parent | e3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff) | |
download | go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip |
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.
Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.
Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).
Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.
The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):
pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88%
ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15%
pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key 57.6ms ± 0% 52.3ms ± 0% -9.13%
pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal 90.9ms ± 0% 82.6ms ± 0% -9.13%
DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12%
Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/opGen.go')
-rw-r--r-- | src/cmd/compile/internal/ssa/opGen.go | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 779c19f72d..bb1cbc0baa 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1871,6 +1871,9 @@ const ( OpPPC64ROTLconst OpPPC64ROTLWconst OpPPC64EXTSWSLconst + OpPPC64RLWINM + OpPPC64RLWNM + OpPPC64RLWMI OpPPC64CNTLZD OpPPC64CNTLZW OpPPC64CNTTZD @@ -24972,6 +24975,51 @@ var opcodeTable = [...]opInfo{ }, }, { + name: "RLWINM", + auxType: auxInt64, + argLen: 1, + asm: ppc64.ARLWNM, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "RLWNM", + auxType: auxInt64, + argLen: 2, + asm: ppc64.ARLWNM, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { + name: "RLWMI", + auxType: auxInt64, + argLen: 2, + resultInArg0: true, + asm: ppc64.ARLWMI, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, + { name: "CNTLZD", argLen: 1, clobberFlags: true, |