aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorPaul E. Murphy <murp@ibm.com>2020-10-23 12:12:34 -0500
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-10-27 18:33:20 +0000
commitc3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch)
tree67a121a1ecd4bec56887f14e5894231017e73bb9 /test/codegen
parente3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff)
downloadgo-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz
go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is and the shift value produce constant which can be encoded into an RLWINM instruction. Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate masks produces a constant which can be encoded into RLWINM. Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)). Combine rotate word + and operations which can be encoded as a single RLWINM/RLWNM instruction. The most notable performance improvements arise from the crypto benchmarks below (GOARCH=power8 on a ppc64le/linux): pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88% ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15% pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le Key 57.6ms ± 0% 52.3ms ± 0% -9.13% pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le Equal 90.9ms ± 0% 82.6ms ± 0% -9.13% DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12% Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce Reviewed-on: https://go-review.googlesource.com/c/go/+/260798 Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com> Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/rotate.go45
-rw-r--r--test/codegen/shift.go94
2 files changed, 118 insertions, 21 deletions
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go
index ce24b57877..0c8b030970 100644
--- a/test/codegen/rotate.go
+++ b/test/codegen/rotate.go
@@ -6,6 +6,8 @@
package codegen
+import "math/bits"
+
// ------------------- //
// const rotates //
// ------------------- //
@@ -166,3 +168,46 @@ func f32(x uint32) uint32 {
// amd64:"ROLL\t[$]7"
return rot32nc(x, 7)
}
+
+// --------------------------------------- //
+// Combined Rotate + Masking operations //
+// --------------------------------------- //
+
+func checkMaskedRotate32(a []uint32, r int) {
+ i := 0
+
+ // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
+ // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
+ i++
+ // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
+ // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
+ i++
+ // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
+ // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
+ i++
+ // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
+ // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
+ i++
+
+ // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
+ // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
+ i++
+ // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
+ // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[3], r) & 0xFF00
+ i++
+
+ // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
+ // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF
+ i++
+ // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
+ // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
+ a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF
+ i++
+}
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index bbfc85ffbb..a45f27c9cf 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -156,29 +156,29 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt
// ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f := tab[byte(v)^b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
- // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)&b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
- // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[byte(v)|b]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
- // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)&h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
- // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)^h]
// ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
- // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
+ // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
f += tab[uint16(v)|h]
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
f += tab[v&0xff]
// ppc64le:-".*AND",".*CLRLSLWI"
- // ppc64:-".*AND",".*CLRLSLWI"
- f += 2*uint32(uint16(d))
+ // ppc64:-".*AND",".*CLRLSLWI"
+ f += 2 * uint32(uint16(d))
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
- g := 2*uint64(uint32(d))
+ g := 2 * uint64(uint32(d))
return f, g
}
@@ -186,10 +186,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
- f := (v8 &0xF) << 2
+ f := (v8 & 0xF) << 2
// ppc64le:"CLRLSLWI"
// ppc64:"CLRLSLWI"
- f += byte(v16)<<3
+ f += byte(v16) << 3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
g := (v16 & 0xFF) << 3
@@ -207,29 +207,81 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64
i += (v64 & 0xFFFF00) << 10
// ppc64le/power9:-"SLD","EXTSWSLI"
// ppc64/power9:-"SLD","EXTSWSLI"
- j := int64(x32+32)*8
+ j := int64(x32+32) * 8
return f, g, h, i, j
}
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
// ppc64le:-".*MOVW"
- f := int32(v>>32)
+ f := int32(v >> 32)
// ppc64le:".*MOVW"
- f += int32(v>>31)
+ f += int32(v >> 31)
// ppc64le:-".*MOVH"
- g := int16(v>>48)
+ g := int16(v >> 48)
// ppc64le:".*MOVH"
- g += int16(v>>30)
+ g += int16(v >> 30)
// ppc64le:-".*MOVH"
- g += int16(f>>16)
+ g += int16(f >> 16)
// ppc64le:-".*MOVB"
- h := int8(v>>56)
+ h := int8(v >> 56)
// ppc64le:".*MOVB"
- h += int8(v>>28)
+ h += int8(v >> 28)
// ppc64le:-".*MOVB"
- h += int8(f>>24)
+ h += int8(f >> 24)
// ppc64le:".*MOVB"
- h += int8(f>>16)
- return int64(h),uint64(g)
+ h += int8(f >> 16)
+ return int64(h), uint64(g)
+}
+
+func checkShiftAndMask32(v []uint32) {
+ i := 0
+
+ // ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
+ // ppc64: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
+ v[i] = (v[i] & 0xFF00000) >> 8
+ i++
+ // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
+ // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
+ v[i] = (v[i] & 0xFF00) >> 6
+ i++
+ // ppc64le: "MOVW\tR0"
+ // ppc64: "MOVW\tR0"
+ v[i] = (v[i] & 0xFF) >> 8
+ i++
+ // ppc64le: "MOVW\tR0"
+ // ppc64: "MOVW\tR0"
+ v[i] = (v[i] & 0xF000000) >> 28
+ i++
+ // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
+ // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
+ v[i] = (v[i] >> 6) & 0xFF
+ i++
+ // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
+ // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
+ v[i] = (v[i] >> 6) & 0xFF000
+ i++
+ // ppc64le: "MOVW\tR0"
+ // ppc64: "MOVW\tR0"
+ v[i] = (v[i] >> 20) & 0xFF000
+ i++
+ // ppc64le: "MOVW\tR0"
+ // ppc64: "MOVW\tR0"
+ v[i] = (v[i] >> 24) & 0xFF00
+ i++
+}
+
+func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
+ //ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
+ //ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
+ a[0] = a[uint8(v>>24)]
+ //ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
+ //ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
+ b[0] = b[uint8(v>>24)]
+ //ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
+ //ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
+ b[1] = b[(v>>20)&0xFF]
+ //ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
+ //ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
+ b[2] = b[v>>25]
}