diff options
author | Paul E. Murphy <murp@ibm.com> | 2020-10-23 12:12:34 -0500 |
---|---|---|
committer | Lynn Boger <laboger@linux.vnet.ibm.com> | 2020-10-27 18:33:20 +0000 |
commit | c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24 (patch) | |
tree | 67a121a1ecd4bec56887f14e5894231017e73bb9 /test/codegen | |
parent | e3bb53a7683eb9c3d04c09f28abb4cf9aa89a7c1 (diff) | |
download | go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.tar.gz go-c3c6fbf31419d37b0ae7d99b5378f6f8e9080b24.zip |
cmd/compile: combine more 32 bit shift and mask operations on ppc64
Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.
Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.
Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).
Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.
The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):
pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt 52.2µs ± 0% 47.5µs ± 0% -8.88%
ExpandKey 44.4µs ± 0% 40.3µs ± 0% -9.15%
pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key 57.6ms ± 0% 52.3ms ± 0% -9.13%
pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal 90.9ms ± 0% 82.6ms ± 0% -9.13%
DefaultCost 91.0ms ± 0% 82.7ms ± 0% -9.12%
Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Diffstat (limited to 'test/codegen')
-rw-r--r-- | test/codegen/rotate.go | 45 | ||||
-rw-r--r-- | test/codegen/shift.go | 94 |
2 files changed, 118 insertions, 21 deletions
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index ce24b57877..0c8b030970 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -6,6 +6,8 @@ package codegen +import "math/bits" + // ------------------- // // const rotates // // ------------------- // @@ -166,3 +168,46 @@ func f32(x uint32) uint32 { // amd64:"ROLL\t[$]7" return rot32nc(x, 7) } + +// --------------------------------------- // +// Combined Rotate + Masking operations // +// --------------------------------------- // + +func checkMaskedRotate32(a []uint32, r int) { + i := 0 + + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF, 16) + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0 + i++ + // ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+" + // ppc64: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+" + a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16) + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+" + a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000 + i++ + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], r) & 0xFF00 + i++ + + // ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+" + // ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF + i++ + // ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+" + // ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+" + a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF + i++ +} diff --git a/test/codegen/shift.go b/test/codegen/shift.go index bbfc85ffbb..a45f27c9cf 100644 --- a/test/codegen/shift.go +++ b/test/codegen/shift.go @@ -156,29 +156,29 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f := tab[byte(v)^b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[byte(v)&b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[byte(v)|b] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)&h] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)^h] // ppc64le:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" - // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" + // ppc64:-".*RLWINM",-".*RLDICR",".*CLRLSLDI" f += tab[uint16(v)|h] // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" f += tab[v&0xff] // ppc64le:-".*AND",".*CLRLSLWI" - // ppc64:-".*AND",".*CLRLSLWI" - f += 2*uint32(uint16(d)) + // ppc64:-".*AND",".*CLRLSLWI" + f += 2 * uint32(uint16(d)) // ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI" // ppc64:-".*AND",-"RLDICR",".*CLRLSLDI" - g := 2*uint64(uint32(d)) + g := 2 * uint64(uint32(d)) return f, g } @@ -186,10 +186,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" - f := (v8 &0xF) << 2 + f := (v8 & 0xF) << 2 // ppc64le:"CLRLSLWI" // ppc64:"CLRLSLWI" - f += byte(v16)<<3 + f += byte(v16) << 3 // ppc64le:-"AND","CLRLSLWI" // ppc64:-"AND","CLRLSLWI" g := (v16 & 0xFF) << 3 @@ -207,29 +207,81 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64 i += (v64 & 0xFFFF00) << 10 // ppc64le/power9:-"SLD","EXTSWSLI" // ppc64/power9:-"SLD","EXTSWSLI" - j := int64(x32+32)*8 + j := int64(x32+32) * 8 return f, g, h, i, j } func checkWidenAfterShift(v int64, u uint64) (int64, uint64) { // ppc64le:-".*MOVW" - f := int32(v>>32) + f := int32(v >> 32) // ppc64le:".*MOVW" - f += int32(v>>31) + f += int32(v >> 31) // ppc64le:-".*MOVH" - g := int16(v>>48) + g := int16(v >> 48) // ppc64le:".*MOVH" - g += int16(v>>30) + g += int16(v >> 30) // ppc64le:-".*MOVH" - g += int16(f>>16) + g += int16(f >> 16) // ppc64le:-".*MOVB" - h := int8(v>>56) + h := int8(v >> 56) // ppc64le:".*MOVB" - h += int8(v>>28) + h += int8(v >> 28) // ppc64le:-".*MOVB" - h += int8(f>>24) + h += int8(f >> 24) // ppc64le:".*MOVB" - h += int8(f>>16) - return int64(h),uint64(g) + h += int8(f >> 16) + return int64(h), uint64(g) +} + +func checkShiftAndMask32(v []uint32) { + i := 0 + + // ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+" + // ppc64: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+" + v[i] = (v[i] & 0xFF00000) >> 8 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+" + v[i] = (v[i] & 0xFF00) >> 6 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xFF) >> 8 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] & 0xF000000) >> 28 + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF + i++ + // ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+" + // ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+" + v[i] = (v[i] >> 6) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 20) & 0xFF000 + i++ + // ppc64le: "MOVW\tR0" + // ppc64: "MOVW\tR0" + v[i] = (v[i] >> 24) & 0xFF00 + i++ +} + +func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) { + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+" + a[0] = a[uint8(v>>24)] + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+" + b[0] = b[uint8(v>>24)] + //ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+" + //ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+" + b[1] = b[(v>>20)&0xFF] + //ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+" + //ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+" + b[2] = b[v>>25] } |