diff options
author | Joel Sing <joel@sing.id.au> | 2024-02-01 23:58:54 +1100 |
---|---|---|
committer | Joel Sing <joel@sing.id.au> | 2024-02-16 11:59:07 +0000 |
commit | daa58db486c3806b6767a8d87ee275ed4d7c2713 (patch) | |
tree | 3b8bc5f35c34bf5917c416cef6780bace6c35550 /test | |
parent | b634f6fdcbebee23b7da709a243f3db217b64776 (diff) | |
download | go-daa58db486c3806b6767a8d87ee275ed4d7c2713.tar.gz go-daa58db486c3806b6767a8d87ee275ed4d7c2713.zip |
cmd/compile: improve rotations for riscv64
Enable canRotate for riscv64, enable rotation intrinsics and provide
better rewrite implementations for rotations. By avoiding Lsh*x64
and Rsh*Ux64 we can produce better code, especially for 32 and 64
bit rotations. By enabling canRotate we also benefit from the generic
rotation rewrite rules.
Benchmark on a StarFive VisionFive 2:
│ rotate.1 │ rotate.2 │
│ sec/op │ sec/op vs base │
RotateLeft-4 14.700n ± 0% 8.016n ± 0% -45.47% (p=0.000 n=10)
RotateLeft8-4 14.70n ± 0% 10.69n ± 0% -27.28% (p=0.000 n=10)
RotateLeft16-4 14.70n ± 0% 12.02n ± 0% -18.23% (p=0.000 n=10)
RotateLeft32-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10)
RotateLeft64-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10)
geomean 14.15n 9.208n -34.92%
Change-Id: I1a2036fdc57cf88ebb6617eb8d92e1d187e183b2
Reviewed-on: https://go-review.googlesource.com/c/go/+/560315
Reviewed-by: M Zhuo <mengzhuo1203@gmail.com>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
Diffstat (limited to 'test')
-rw-r--r-- | test/codegen/rotate.go | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index 5495f86b79..109e55763c 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -18,6 +18,7 @@ func rot64(x uint64) uint64 { // amd64:"ROLQ\t[$]7" // ppc64x:"ROTL\t[$]7" // loong64: "ROTRV\t[$]57" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 | x>>57 // amd64:"ROLQ\t[$]8" @@ -25,6 +26,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " // ppc64x:"ROTL\t[$]8" // loong64: "ROTRV\t[$]56" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<8 + x>>56 // amd64:"ROLQ\t[$]9" @@ -32,6 +34,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " // ppc64x:"ROTL\t[$]9" // loong64: "ROTRV\t[$]55" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<9 ^ x>>55 // amd64:"ROLQ\t[$]10" @@ -41,6 +44,7 @@ func rot64(x uint64) uint64 { // arm64:"ROR\t[$]54" // s390x:"RISBGZ\t[$]0, [$]63, [$]10, " // loong64: "ROTRV\t[$]54" + // riscv64: "OR","SLLI","SRLI",-"AND" a += bits.RotateLeft64(x, 10) return a @@ -53,6 +57,7 @@ func rot32(x uint32) uint32 { // arm:"MOVW\tR\\d+@>25" // ppc64x:"ROTLW\t[$]7" // loong64: "ROTR\t[$]25" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<7 | x>>25 // amd64:`ROLL\t[$]8` @@ -61,6 +66,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]8" // ppc64x:"ROTLW\t[$]8" // loong64: "ROTR\t[$]24" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<8 + x>>24 // amd64:"ROLL\t[$]9" @@ -69,6 +75,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]9" // ppc64x:"ROTLW\t[$]9" // loong64: "ROTR\t[$]23" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<9 ^ x>>23 // amd64:"ROLL\t[$]10" @@ -79,6 +86,7 @@ func rot32(x uint32) uint32 { // arm64:"RORW\t[$]22" // s390x:"RLL\t[$]10" // loong64: "ROTR\t[$]22" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += bits.RotateLeft32(x, 10) return a @@ -88,12 +96,15 @@ func rot16(x uint16) uint16 { var a uint16 // amd64:"ROLW\t[$]7" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 | x>>9 // amd64:`ROLW\t[$]8` + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<8 + x>>8 // amd64:"ROLW\t[$]9" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<9 ^ x>>7 return a @@ -103,12 +114,15 @@ func rot8(x uint8) uint8 { var a uint8 // amd64:"ROLB\t[$]5" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<5 | x>>3 // amd64:`ROLB\t[$]6` + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<6 + x>>2 // amd64:"ROLB\t[$]7" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 ^ x>>1 return a @@ -127,12 +141,14 @@ func rot64nc(x uint64, z uint) uint64 { // arm64:"ROR","NEG",-"AND" // ppc64x:"ROTL",-"NEG",-"AND" // loong64: "ROTRV", -"AND" + // riscv64: "OR","SLL","SRL",-"AND" a += x<<z | x>>(64-z) // amd64:"RORQ",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64x:"ROTL","NEG",-"AND" // loong64: "ROTRV", -"AND" + // riscv64: "OR","SLL","SRL",-"AND" a += x>>z | x<<(64-z) return a @@ -147,12 +163,14 @@ func rot32nc(x uint32, z uint) uint32 { // arm64:"ROR","NEG",-"AND" // ppc64x:"ROTLW",-"NEG",-"AND" // loong64: "ROTR", -"AND" + // riscv64: "OR","SLLW","SRLW",-"AND" a += x<<z | x>>(32-z) // amd64:"RORL",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64x:"ROTLW","NEG",-"AND" // loong64: "ROTR", -"AND" + // riscv64: "OR","SLLW","SRLW",-"AND" a += x>>z | x<<(32-z) return a @@ -164,9 +182,11 @@ func rot16nc(x uint16, z uint) uint16 { z &= 15 // amd64:"ROLW",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x<<z | x>>(16-z) // amd64:"RORW",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x>>z | x<<(16-z) return a @@ -178,9 +198,11 @@ func rot8nc(x uint8, z uint) uint8 { z &= 7 // amd64:"ROLB",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x<<z | x>>(8-z) // amd64:"RORB",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x>>z | x<<(8-z) return a |