aboutsummaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorJoel Sing <joel@sing.id.au>2024-02-01 23:58:54 +1100
committerJoel Sing <joel@sing.id.au>2024-02-16 11:59:07 +0000
commitdaa58db486c3806b6767a8d87ee275ed4d7c2713 (patch)
tree3b8bc5f35c34bf5917c416cef6780bace6c35550 /test
parentb634f6fdcbebee23b7da709a243f3db217b64776 (diff)
downloadgo-daa58db486c3806b6767a8d87ee275ed4d7c2713.tar.gz
go-daa58db486c3806b6767a8d87ee275ed4d7c2713.zip
cmd/compile: improve rotations for riscv64
Enable canRotate for riscv64, enable rotation intrinsics and provide better rewrite implementations for rotations. By avoiding Lsh*x64 and Rsh*Ux64 we can produce better code, especially for 32 and 64 bit rotations. By enabling canRotate we also benefit from the generic rotation rewrite rules. Benchmark on a StarFive VisionFive 2: │ rotate.1 │ rotate.2 │ │ sec/op │ sec/op vs base │ RotateLeft-4 14.700n ± 0% 8.016n ± 0% -45.47% (p=0.000 n=10) RotateLeft8-4 14.70n ± 0% 10.69n ± 0% -27.28% (p=0.000 n=10) RotateLeft16-4 14.70n ± 0% 12.02n ± 0% -18.23% (p=0.000 n=10) RotateLeft32-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10) RotateLeft64-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10) geomean 14.15n 9.208n -34.92% Change-Id: I1a2036fdc57cf88ebb6617eb8d92e1d187e183b2 Reviewed-on: https://go-review.googlesource.com/c/go/+/560315 Reviewed-by: M Zhuo <mengzhuo1203@gmail.com> Run-TryBot: Joel Sing <joel@sing.id.au> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Mark Ryan <markdryan@rivosinc.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'test')
-rw-r--r--test/codegen/rotate.go22
1 files changed, 22 insertions, 0 deletions
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go
index 5495f86b79..109e55763c 100644
--- a/test/codegen/rotate.go
+++ b/test/codegen/rotate.go
@@ -18,6 +18,7 @@ func rot64(x uint64) uint64 {
// amd64:"ROLQ\t[$]7"
// ppc64x:"ROTL\t[$]7"
// loong64: "ROTRV\t[$]57"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<7 | x>>57
// amd64:"ROLQ\t[$]8"
@@ -25,6 +26,7 @@ func rot64(x uint64) uint64 {
// s390x:"RISBGZ\t[$]0, [$]63, [$]8, "
// ppc64x:"ROTL\t[$]8"
// loong64: "ROTRV\t[$]56"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<8 + x>>56
// amd64:"ROLQ\t[$]9"
@@ -32,6 +34,7 @@ func rot64(x uint64) uint64 {
// s390x:"RISBGZ\t[$]0, [$]63, [$]9, "
// ppc64x:"ROTL\t[$]9"
// loong64: "ROTRV\t[$]55"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<9 ^ x>>55
// amd64:"ROLQ\t[$]10"
@@ -41,6 +44,7 @@ func rot64(x uint64) uint64 {
// arm64:"ROR\t[$]54"
// s390x:"RISBGZ\t[$]0, [$]63, [$]10, "
// loong64: "ROTRV\t[$]54"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += bits.RotateLeft64(x, 10)
return a
@@ -53,6 +57,7 @@ func rot32(x uint32) uint32 {
// arm:"MOVW\tR\\d+@>25"
// ppc64x:"ROTLW\t[$]7"
// loong64: "ROTR\t[$]25"
+ // riscv64: "OR","SLLIW","SRLIW",-"AND"
a += x<<7 | x>>25
// amd64:`ROLL\t[$]8`
@@ -61,6 +66,7 @@ func rot32(x uint32) uint32 {
// s390x:"RLL\t[$]8"
// ppc64x:"ROTLW\t[$]8"
// loong64: "ROTR\t[$]24"
+ // riscv64: "OR","SLLIW","SRLIW",-"AND"
a += x<<8 + x>>24
// amd64:"ROLL\t[$]9"
@@ -69,6 +75,7 @@ func rot32(x uint32) uint32 {
// s390x:"RLL\t[$]9"
// ppc64x:"ROTLW\t[$]9"
// loong64: "ROTR\t[$]23"
+ // riscv64: "OR","SLLIW","SRLIW",-"AND"
a += x<<9 ^ x>>23
// amd64:"ROLL\t[$]10"
@@ -79,6 +86,7 @@ func rot32(x uint32) uint32 {
// arm64:"RORW\t[$]22"
// s390x:"RLL\t[$]10"
// loong64: "ROTR\t[$]22"
+ // riscv64: "OR","SLLIW","SRLIW",-"AND"
a += bits.RotateLeft32(x, 10)
return a
@@ -88,12 +96,15 @@ func rot16(x uint16) uint16 {
var a uint16
// amd64:"ROLW\t[$]7"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<7 | x>>9
// amd64:`ROLW\t[$]8`
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<8 + x>>8
// amd64:"ROLW\t[$]9"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<9 ^ x>>7
return a
@@ -103,12 +114,15 @@ func rot8(x uint8) uint8 {
var a uint8
// amd64:"ROLB\t[$]5"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<5 | x>>3
// amd64:`ROLB\t[$]6`
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<6 + x>>2
// amd64:"ROLB\t[$]7"
+ // riscv64: "OR","SLLI","SRLI",-"AND"
a += x<<7 ^ x>>1
return a
@@ -127,12 +141,14 @@ func rot64nc(x uint64, z uint) uint64 {
// arm64:"ROR","NEG",-"AND"
// ppc64x:"ROTL",-"NEG",-"AND"
// loong64: "ROTRV", -"AND"
+ // riscv64: "OR","SLL","SRL",-"AND"
a += x<<z | x>>(64-z)
// amd64:"RORQ",-"AND"
// arm64:"ROR",-"NEG",-"AND"
// ppc64x:"ROTL","NEG",-"AND"
// loong64: "ROTRV", -"AND"
+ // riscv64: "OR","SLL","SRL",-"AND"
a += x>>z | x<<(64-z)
return a
@@ -147,12 +163,14 @@ func rot32nc(x uint32, z uint) uint32 {
// arm64:"ROR","NEG",-"AND"
// ppc64x:"ROTLW",-"NEG",-"AND"
// loong64: "ROTR", -"AND"
+ // riscv64: "OR","SLLW","SRLW",-"AND"
a += x<<z | x>>(32-z)
// amd64:"RORL",-"AND"
// arm64:"ROR",-"NEG",-"AND"
// ppc64x:"ROTLW","NEG",-"AND"
// loong64: "ROTR", -"AND"
+ // riscv64: "OR","SLLW","SRLW",-"AND"
a += x>>z | x<<(32-z)
return a
@@ -164,9 +182,11 @@ func rot16nc(x uint16, z uint) uint16 {
z &= 15
// amd64:"ROLW",-"ANDQ"
+ // riscv64: "OR","SLL","SRL",-"AND\t"
a += x<<z | x>>(16-z)
// amd64:"RORW",-"ANDQ"
+ // riscv64: "OR","SLL","SRL",-"AND\t"
a += x>>z | x<<(16-z)
return a
@@ -178,9 +198,11 @@ func rot8nc(x uint8, z uint) uint8 {
z &= 7
// amd64:"ROLB",-"ANDQ"
+ // riscv64: "OR","SLL","SRL",-"AND\t"
a += x<<z | x>>(8-z)
// amd64:"RORB",-"ANDQ"
+ // riscv64: "OR","SLL","SRL",-"AND\t"
a += x>>z | x<<(8-z)
return a