aboutsummaryrefslogtreecommitdiff
path: root/test/codegen
diff options
context:
space:
mode:
authorLynn Boger <laboger@linux.vnet.ibm.com>2020-03-30 15:23:19 -0400
committerLynn Boger <laboger@linux.vnet.ibm.com>2020-04-06 12:09:39 +0000
commit815509ae31fc7eaf753def9deb9cafee968f92b3 (patch)
treebd27aa71683f030619f08c9441616cbdcf3d62fb /test/codegen
parent5f3354d1bf2e6a61e4b9e1e31ee04b99dfe7de35 (diff)
downloadgo-815509ae31fc7eaf753def9deb9cafee968f92b3.tar.gz
go-815509ae31fc7eaf753def9deb9cafee968f92b3.zip
cmd/compile: improve lowered moves and zeros for ppc64le
This change includes the following: - Generate LXV/STXV sequences instead of LXVD2X/STXVD2X on power9. These instructions do not require an index register, which allows more loads and stores within a loop without initializing multiple index registers. The LoweredQuadXXX generate LXV/STXV. - Create LoweredMoveXXXShort and LoweredZeroXXXShort for short moves that don't generate loops, and therefore don't clobber the address registers or flags. - Use registers other than R3 and R4 to avoid conflicting with registers that have already been allocated to avoid unnecessary register moves. - Eliminate the use of R14 as scratch register and use R31 instead. - Add PCALIGN when the LoweredMoveXXX or LoweredZeroXXX generates a loop with more than 3 iterations. This performance opportunity was noticed in github.com/golang/snappy benchmarks. Results on power9: WordsDecode1e1 54.1ns ± 0% 53.8ns ± 0% -0.51% (p=0.029 n=4+4) WordsDecode1e2 287ns ± 0% 282ns ± 1% -1.83% (p=0.029 n=4+4) WordsDecode1e3 3.98µs ± 0% 3.64µs ± 0% -8.52% (p=0.029 n=4+4) WordsDecode1e4 66.9µs ± 0% 67.0µs ± 0% +0.20% (p=0.029 n=4+4) WordsDecode1e5 723µs ± 0% 723µs ± 0% -0.01% (p=0.200 n=4+4) WordsDecode1e6 7.21ms ± 0% 7.21ms ± 0% -0.02% (p=1.000 n=4+4) WordsEncode1e1 29.9ns ± 0% 29.4ns ± 0% -1.51% (p=0.029 n=4+4) WordsEncode1e2 2.12µs ± 0% 1.75µs ± 0% -17.70% (p=0.029 n=4+4) WordsEncode1e3 11.7µs ± 0% 11.2µs ± 0% -4.61% (p=0.029 n=4+4) WordsEncode1e4 119µs ± 0% 120µs ± 0% +0.36% (p=0.029 n=4+4) WordsEncode1e5 1.21ms ± 0% 1.22ms ± 0% +0.41% (p=0.029 n=4+4) WordsEncode1e6 12.0ms ± 0% 12.0ms ± 0% +0.57% (p=0.029 n=4+4) RandomEncode 286µs ± 0% 203µs ± 0% -28.82% (p=0.029 n=4+4) ExtendMatch 47.4µs ± 0% 47.0µs ± 0% -0.85% (p=0.029 n=4+4) Change-Id: Iecad3a39ae55280286e42760a5c9d5c1168f5858 Reviewed-on: https://go-review.googlesource.com/c/go/+/226539 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Cherry Zhang <cherryyz@google.com>
Diffstat (limited to 'test/codegen')
-rw-r--r--test/codegen/copy.go34
1 files changed, 34 insertions, 0 deletions
diff --git a/test/codegen/copy.go b/test/codegen/copy.go
index 46c2bde9ab..db75cde1c6 100644
--- a/test/codegen/copy.go
+++ b/test/codegen/copy.go
@@ -34,6 +34,8 @@ func movesmall7() {
func movesmall16() {
x := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}
// amd64:-".*memmove"
+ // ppc64:".*memmove"
+ // ppc64le:".*memmove"
copy(x[1:], x[:])
}
@@ -41,10 +43,34 @@ var x [256]byte
// Check that large disjoint copies are replaced with moves.
+func moveDisjointStack32() {
+ var s [32]byte
+ // ppc64:-".*memmove"
+ // ppc64le:-".*memmove"
+ // ppc64le/power8:"LXVD2X",-"ADD",-"BC"
+ // ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+ copy(s[:], x[:32])
+ runtime.KeepAlive(&s)
+}
+
+func moveDisjointStack64() {
+ var s [96]byte
+ // ppc64:-".*memmove"
+ // ppc64le:-".*memmove"
+ // ppc64le/power8:"LXVD2X","ADD","BC"
+ // ppc64le/power9:"LXV",-"LXVD2X",-"ADD",-"BC"
+ copy(s[:], x[:96])
+ runtime.KeepAlive(&s)
+}
+
func moveDisjointStack() {
var s [256]byte
// s390x:-".*memmove"
// amd64:-".*memmove"
+ // ppc64:-".*memmove"
+ // ppc64le:-".*memmove"
+ // ppc64le/power8:"LXVD2X"
+ // ppc64le/power9:"LXV",-"LXVD2X"
copy(s[:], x[:])
runtime.KeepAlive(&s)
}
@@ -53,6 +79,10 @@ func moveDisjointArg(b *[256]byte) {
var s [256]byte
// s390x:-".*memmove"
// amd64:-".*memmove"
+ // ppc64:-".*memmove"
+ // ppc64le:-".*memmove"
+ // ppc64le/power8:"LXVD2X"
+ // ppc64le/power9:"LXV",-"LXVD2X"
copy(s[:], b[:])
runtime.KeepAlive(&s)
}
@@ -60,6 +90,10 @@ func moveDisjointArg(b *[256]byte) {
func moveDisjointNoOverlap(a *[256]byte) {
// s390x:-".*memmove"
// amd64:-".*memmove"
+ // ppc64:-".*memmove"
+ // ppc64le:-".*memmove"
+ // ppc64le/power8:"LXVD2X"
+ // ppc64le/power9:"LXV",-"LXVD2X"
copy(a[:], a[128:])
}