aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2020-08-31 09:43:40 -0400
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-09-17 12:37:40 +0000
commit967465da2975fe4322080703ce5a77ea90752829 (patch)
tree1f3a8d2b15aaa382b3551c3d5a1b68b6808f8592 /test/codegen
parent0dde60a5fefcb1447c97efa5c7bb4dbcf3575736 (diff)
downloadgo-967465da2975fe4322080703ce5a77ea90752829.tar.gz
go-967465da2975fe4322080703ce5a77ea90752829.zip
cmd/compile: use combined shifts to improve array addressing on ppc64x
This change adds rules to find pairs of instructions that can be combined into a single shifts. These instruction sequences are common in array addressing within loops. Improvements can be seen in many crypto packages and the hash packages. These are based on the extended mnemonics found in the ISA sections C.8.1 and C.8.2. Some rules in PPC64.rules were moved because the ordering prevented some matching. The following results were generated on power9. hash/crc32: CRC32/poly=Koopman/size=40/align=0 195ns ± 0% 163ns ± 0% -16.41% CRC32/poly=Koopman/size=40/align=1 200ns ± 0% 163ns ± 0% -18.50% CRC32/poly=Koopman/size=512/align=0 1.98µs ± 0% 1.67µs ± 0% -15.46% CRC32/poly=Koopman/size=512/align=1 1.98µs ± 0% 1.69µs ± 0% -14.80% CRC32/poly=Koopman/size=1kB/align=0 3.90µs ± 0% 3.31µs ± 0% -15.27% CRC32/poly=Koopman/size=1kB/align=1 3.85µs ± 0% 3.31µs ± 0% -14.15% CRC32/poly=Koopman/size=4kB/align=0 15.3µs ± 0% 13.1µs ± 0% -14.22% CRC32/poly=Koopman/size=4kB/align=1 15.4µs ± 0% 13.1µs ± 0% -14.79% CRC32/poly=Koopman/size=32kB/align=0 137µs ± 0% 105µs ± 0% -23.56% CRC32/poly=Koopman/size=32kB/align=1 137µs ± 0% 105µs ± 0% -23.53% crypto/rc4: RC4_128 733ns ± 0% 650ns ± 0% -11.32% (p=1.000 n=1+1) RC4_1K 5.80µs ± 0% 5.17µs ± 0% -10.89% (p=1.000 n=1+1) RC4_8K 45.7µs ± 0% 40.8µs ± 0% -10.73% (p=1.000 n=1+1) crypto/sha1: Hash8Bytes 635ns ± 0% 613ns ± 0% -3.46% (p=1.000 n=1+1) Hash320Bytes 2.30µs ± 0% 2.18µs ± 0% -5.38% (p=1.000 n=1+1) Hash1K 5.88µs ± 0% 5.38µs ± 0% -8.62% (p=1.000 n=1+1) Hash8K 42.0µs ± 0% 37.9µs ± 0% -9.75% (p=1.000 n=1+1) There are other improvements found in golang.org/x/crypto which are all in the range of 5-15%. Change-Id: I193471fbcf674151ffe2edab212799d9b08dfb8c Reviewed-on: https://go-review.googlesource.com/c/go/+/252097 Trust: Lynn Boger <laboger@linux.vnet.ibm.com> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/shift.go55
1 files changed, 55 insertions, 0 deletions
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index 5e50ea6bff..32214851b5 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -150,6 +150,61 @@ func lshGuarded64(v int64, s uint) int64 {
panic("shift too large")
}
+func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
+
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f := tab[byte(v)^b]
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[byte(v)&b]
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[byte(v)|b]
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)&h]
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)^h]
+ // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ f += tab[uint16(v)|h]
+ // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
+ // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
+ f += tab[v&0xff]
+ // ppc64le:-".*AND",".*CLRLSLWI"
+ // ppc64:-".*AND",".*CLRLSLWI"
+ f += 2*uint32(uint16(d))
+ // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
+ // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
+ g := 2*uint64(uint32(d))
+ return f, g
+}
+
+func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, v64 uint64) (uint8, uint16, uint32, uint64) {
+
+ // ppc64le:-"AND","CLRLSLWI"
+ // ppc64:-"AND","CLRLSLWI"
+ f := (v8 &0xF) << 2
+ // ppc64le:-"AND","CLRLSLWI"
+ // ppc64:-"AND","CLRLSLWI"
+ f += byte(v16)<<3
+ // ppc64le:-"AND","CLRLSLWI"
+ // ppc64:-"AND","CLRLSLWI"
+ g := (v16 & 0xFF) << 3
+ // ppc64le:-"AND","CLRLSLWI"
+ // ppc64:-"AND","CLRLSLWI"
+ h := (v32 & 0xFFFFF) << 2
+ // ppc64le:-"AND","CLRLSLWI"
+ // ppc64:-"AND","CLRLSLWI"
+ h += uint32(v64)<<4
+ // ppc64le:-"AND","CLRLSLDI"
+ // ppc64:-"AND","CLRLSLDI"
+ i := (v64 & 0xFFFFFFFF) << 5
+ return f, g, h, i
+}
+
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
// ppc64le:-".*MOVW"