aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/opGen.go
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2020-08-31 09:43:40 -0400
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-09-17 12:37:40 +0000
commit967465da2975fe4322080703ce5a77ea90752829 (patch)
tree1f3a8d2b15aaa382b3551c3d5a1b68b6808f8592 /src/cmd/compile/internal/ssa/opGen.go
parent0dde60a5fefcb1447c97efa5c7bb4dbcf3575736 (diff)
downloadgo-967465da2975fe4322080703ce5a77ea90752829.tar.gz
go-967465da2975fe4322080703ce5a77ea90752829.zip
cmd/compile: use combined shifts to improve array addressing on ppc64x
This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Diffstat (limited to 'src/cmd/compile/internal/ssa/opGen.go')
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go45
1 files changed, 45 insertions, 0 deletions
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d95943231a..f00dc3f7f5 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1853,6 +1853,9 @@ const (
OpPPC64SLW
OpPPC64ROTL
OpPPC64ROTLW
+ OpPPC64RLDICL
+ OpPPC64CLRLSLWI
+ OpPPC64CLRLSLDI
OpPPC64LoweredAdd64Carry
OpPPC64SRADconst
OpPPC64SRAWconst
@@ -24673,6 +24676,48 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "RLDICL",
+ auxType: auxInt32,
+ argLen: 1,
+ asm: ppc64.ARLDICL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "CLRLSLWI",
+ auxType: auxInt32,
+ argLen: 1,
+ asm: ppc64.ACLRLSLWI,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
+ name: "CLRLSLDI",
+ auxType: auxInt32,
+ argLen: 1,
+ asm: ppc64.ACLRLSLDI,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ outputs: []outputInfo{
+ {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+ },
+ },
+ },
+ {
name: "LoweredAdd64Carry",
argLen: 3,
resultNotInArgs: true,