aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorMichael Munday <mike.munday@ibm.com>2020-05-11 09:44:48 -0700
committerMichael Munday <mike.munday@ibm.com>2020-11-06 10:45:31 +0000
commit854e892ce17e2555c59fce5b92f64bc505ba5d8c (patch)
tree210c8573fcdc53bc23f36a7ecfd9cb153dcc05f6 /test/codegen
parentb7e0adfee28b516a0dcceb5ecf0e5a695b38c61f (diff)
downloadgo-854e892ce17e2555c59fce5b92f64bc505ba5d8c.tar.gz
go-854e892ce17e2555c59fce5b92f64bc505ba5d8c.zip
cmd/compile: optimize shift pairs and masks on s390x
Optimize combinations of left and right shifts by a constant value into a 'rotate then insert selected bits [into zero]' instruction. Use the same instruction for contiguous masks since it has some benefits over 'and immediate' (not restricted to 32-bits, does not overwrite source register). To keep the complexity of this change under control I've only implemented 64 bit operations for now. There are a lot more optimizations that can be done with this instruction family. However, since their function overlaps with other instructions we need to be somewhat careful not to break existing optimization rules by creating optimization dead ends. This is particularly true of the load/store merging rules which contain lots of zero extensions and shifts. This CL does interfere with the store merging rules when an operand is shifted left before it is stored: binary.BigEndian.PutUint64(b, x << 1) This is unfortunate but it's not critical and somewhat complex so I plan to fix that in a follow up CL. file before after Δ % addr2line 4117446 4117282 -164 -0.004% api 4945184 4942752 -2432 -0.049% asm 4998079 4991891 -6188 -0.124% buildid 2685158 2684074 -1084 -0.040% cgo 4553732 4553394 -338 -0.007% compile 19294446 19245070 -49376 -0.256% cover 4897105 4891319 -5786 -0.118% dist 3544389 3542785 -1604 -0.045% doc 3926795 3927617 +822 +0.021% fix 3302958 3293868 -9090 -0.275% link 6546274 6543456 -2818 -0.043% nm 4102021 4100825 -1196 -0.029% objdump 4542431 4548483 +6052 +0.133% pack 2482465 2416389 -66076 -2.662% pprof 13366541 13363915 -2626 -0.020% test2json 2829007 2761515 -67492 -2.386% trace 10216164 10219684 +3520 +0.034% vet 6773956 6773572 -384 -0.006% total 107124151 106917891 -206260 -0.193% Change-Id: I7591cce41e06867ba10a745daae9333513062746 Reviewed-on: https://go-review.googlesource.com/c/go/+/233317 Run-TryBot: Michael Munday <mike.munday@ibm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org> Trust: Michael Munday <mike.munday@ibm.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/bitfield.go18
-rw-r--r--test/codegen/bits.go12
-rw-r--r--test/codegen/mathbits.go2
-rw-r--r--test/codegen/rotate.go6
-rw-r--r--test/codegen/shift.go33
5 files changed, 50 insertions, 21 deletions
diff --git a/test/codegen/bitfield.go b/test/codegen/bitfield.go
index 08788f1447..7abc1c2783 100644
--- a/test/codegen/bitfield.go
+++ b/test/codegen/bitfield.go
@@ -127,11 +127,13 @@ func sbfx6(x int32) int32 {
// ubfiz
func ubfiz1(x uint64) uint64 {
// arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
+ // s390x:"RISBGZ\t[$]49, [$]60, [$]3,",-"SLD",-"AND"
return (x & 0xfff) << 3
}
func ubfiz2(x uint64) uint64 {
// arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
+ // s390x:"RISBGZ\t[$]48, [$]59, [$]4,",-"SLD",-"AND"
return (x << 4) & 0xfff0
}
@@ -149,6 +151,7 @@ func ubfiz5(x uint8) uint64 {
func ubfiz6(x uint64) uint64 {
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]3, [$]62, [$]1, ",-"SLD",-"SRD"
return (x << 4) >> 3
}
@@ -159,6 +162,7 @@ func ubfiz7(x uint32) uint32 {
func ubfiz8(x uint64) uint64 {
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]43, [$]62, [$]1, ",-"SLD",-"SRD",-"AND"
return ((x & 0xfffff) << 4) >> 3
}
@@ -169,17 +173,20 @@ func ubfiz9(x uint64) uint64 {
func ubfiz10(x uint64) uint64 {
// arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]45, [$]56, [$]7, ",-"SLD",-"SRD",-"AND"
return ((x << 5) & (0xfff << 5)) << 2
}
// ubfx
func ubfx1(x uint64) uint64 {
// arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]54, [$]63, [$]39, ",-"SRD",-"AND"
return (x >> 25) & 1023
}
func ubfx2(x uint64) uint64 {
// arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]56, [$]63, [$]60, ",-"SRD",-"AND"
return (x & 0x0ff0) >> 4
}
@@ -196,30 +203,37 @@ func ubfx5(x uint8) uint64 {
}
func ubfx6(x uint64) uint64 {
- return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]2, [$]63, [$]63,",-"SLD",-"SRD"
+ return (x << 1) >> 2
}
func ubfx7(x uint32) uint32 {
- return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
+ // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
+ return (x << 1) >> 2
}
func ubfx8(x uint64) uint64 {
// arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]52, [$]63, [$]63,",-"SLD",-"SRD",-"AND"
return ((x << 1) >> 2) & 0xfff
}
func ubfx9(x uint64) uint64 {
// arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
+ // s390x:"RISBGZ\t[$]53, [$]63, [$]60, ",-"SLD",-"SRD",-"AND"
return ((x >> 3) & 0xfff) >> 1
}
func ubfx10(x uint64) uint64 {
// arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]8, [$]63, [$]59, ",-"SLD",-"SRD"
return ((x >> 2) << 5) >> 8
}
func ubfx11(x uint64) uint64 {
// arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
+ // s390x:"RISBGZ\t[$]45, [$]63, [$]63, ",-"SLD",-"SRD",-"AND"
return ((x & 0xfffff) << 3) >> 4
}
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index 398dd84e9e..56e0f3474e 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -340,3 +340,15 @@ func bitSetTest(x int) bool {
// amd64:"CMPQ\tAX, [$]9"
return x&9 == 9
}
+
+// mask contiguous one bits
+func cont1Mask64U(x uint64) uint64 {
+ // s390x:"RISBGZ\t[$]16, [$]47, [$]0,"
+ return x&0x0000ffffffff0000
+}
+
+// mask contiguous zero bits
+func cont0Mask64U(x uint64) uint64 {
+ // s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
+ return x&0xffff00000000ffff
+}
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 4c35f26997..fff6639546 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -213,7 +213,7 @@ func RotateLeft64(n uint64) uint64 {
// arm64:"ROR"
// ppc64:"ROTL"
// ppc64le:"ROTL"
- // s390x:"RLLG"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]37, "
// wasm:"I64Rotl"
return bits.RotateLeft64(n, 37)
}
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go
index 0c8b030970..e0bcd0abbc 100644
--- a/test/codegen/rotate.go
+++ b/test/codegen/rotate.go
@@ -17,21 +17,21 @@ func rot64(x uint64) uint64 {
// amd64:"ROLQ\t[$]7"
// arm64:"ROR\t[$]57"
- // s390x:"RLLG\t[$]7"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]7, "
// ppc64:"ROTL\t[$]7"
// ppc64le:"ROTL\t[$]7"
a += x<<7 | x>>57
// amd64:"ROLQ\t[$]8"
// arm64:"ROR\t[$]56"
- // s390x:"RLLG\t[$]8"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
// ppc64:"ROTL\t[$]8"
// ppc64le:"ROTL\t[$]8"
a += x<<8 + x>>56
// amd64:"ROLQ\t[$]9"
// arm64:"ROR\t[$]55"
- // s390x:"RLLG\t[$]9"
+ // s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
// ppc64:"ROTL\t[$]9"
// ppc64le:"ROTL\t[$]9"
a += x<<9 ^ x>>55
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index a45f27c9cf..d19a1984c1 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -11,84 +11,84 @@ package codegen
// ------------------ //
func lshMask64x64(v int64, s uint64) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << (s & 63)
}
func rshMask64Ux64(v uint64, s uint64) uint64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func rshMask64x64(v int64, s uint64) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask32x64(v int32, s uint64) int32 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v << (s & 63)
}
func rshMask32Ux64(v uint32, s uint64) uint32 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func rshMask32x64(v int32, s uint64) int32 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ISEL",-"ORN"
// ppc64:"ISEL",-"ORN"
return v >> (s & 63)
}
func lshMask64x32(v int64, s uint32) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v << (s & 63)
}
func rshMask64Ux32(v uint64, s uint32) uint64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN"
// ppc64:"ANDCC",-"ORN"
return v >> (s & 63)
}
func rshMask64x32(v int64, s uint32) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> (s & 63)
}
func lshMask64x32Ext(v int64, s int32) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v << uint(s&63)
}
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
}
func rshMask64x32Ext(v int64, s int32) int64 {
- // s390x:-".*AND",-".*MOVDGE"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
// ppc64le:"ANDCC",-"ORN",-"ISEL"
// ppc64:"ANDCC",-"ORN",-"ISEL"
return v >> uint(s&63)
@@ -128,7 +128,8 @@ func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
func rshGuarded64(v int64, s uint) int64 {
if s < 64 {
- // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
@@ -136,7 +137,8 @@ func rshGuarded64(v int64, s uint) int64 {
func rshGuarded64U(v uint64, s uint) uint64 {
if s < 64 {
- // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
return v >> s
}
panic("shift too large")
@@ -144,7 +146,8 @@ func rshGuarded64U(v uint64, s uint) uint64 {
func lshGuarded64(v int64, s uint) int64 {
if s < 64 {
- // s390x:-".*AND",-".*MOVDGE" wasm:-"Select",-".*LtU"
+ // s390x:-"RISBGZ",-"AND",-"LOCGR"
+ // wasm:-"Select",-".*LtU"
return v << s
}
panic("shift too large")