From daa58db486c3806b6767a8d87ee275ed4d7c2713 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Thu, 1 Feb 2024 23:58:54 +1100 Subject: cmd/compile: improve rotations for riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable canRotate for riscv64, enable rotation intrinsics and provide better rewrite implementations for rotations. By avoiding Lsh*x64 and Rsh*Ux64 we can produce better code, especially for 32 and 64 bit rotations. By enabling canRotate we also benefit from the generic rotation rewrite rules. Benchmark on a StarFive VisionFive 2: │ rotate.1 │ rotate.2 │ │ sec/op │ sec/op vs base │ RotateLeft-4 14.700n ± 0% 8.016n ± 0% -45.47% (p=0.000 n=10) RotateLeft8-4 14.70n ± 0% 10.69n ± 0% -27.28% (p=0.000 n=10) RotateLeft16-4 14.70n ± 0% 12.02n ± 0% -18.23% (p=0.000 n=10) RotateLeft32-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10) RotateLeft64-4 13.360n ± 0% 8.016n ± 0% -40.00% (p=0.000 n=10) geomean 14.15n 9.208n -34.92% Change-Id: I1a2036fdc57cf88ebb6617eb8d92e1d187e183b2 Reviewed-on: https://go-review.googlesource.com/c/go/+/560315 Reviewed-by: M Zhuo Run-TryBot: Joel Sing TryBot-Result: Gopher Robot Reviewed-by: Mark Ryan Reviewed-by: Cherry Mui Reviewed-by: David Chase --- test/codegen/rotate.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'test') diff --git a/test/codegen/rotate.go b/test/codegen/rotate.go index 5495f86b79..109e55763c 100644 --- a/test/codegen/rotate.go +++ b/test/codegen/rotate.go @@ -18,6 +18,7 @@ func rot64(x uint64) uint64 { // amd64:"ROLQ\t[$]7" // ppc64x:"ROTL\t[$]7" // loong64: "ROTRV\t[$]57" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 | x>>57 // amd64:"ROLQ\t[$]8" @@ -25,6 +26,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]8, " // ppc64x:"ROTL\t[$]8" // loong64: "ROTRV\t[$]56" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<8 + x>>56 // amd64:"ROLQ\t[$]9" @@ -32,6 +34,7 @@ func rot64(x uint64) uint64 { // s390x:"RISBGZ\t[$]0, [$]63, [$]9, " // ppc64x:"ROTL\t[$]9" // loong64: "ROTRV\t[$]55" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<9 ^ x>>55 // amd64:"ROLQ\t[$]10" @@ -41,6 +44,7 @@ func rot64(x uint64) uint64 { // arm64:"ROR\t[$]54" // s390x:"RISBGZ\t[$]0, [$]63, [$]10, " // loong64: "ROTRV\t[$]54" + // riscv64: "OR","SLLI","SRLI",-"AND" a += bits.RotateLeft64(x, 10) return a @@ -53,6 +57,7 @@ func rot32(x uint32) uint32 { // arm:"MOVW\tR\\d+@>25" // ppc64x:"ROTLW\t[$]7" // loong64: "ROTR\t[$]25" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<7 | x>>25 // amd64:`ROLL\t[$]8` @@ -61,6 +66,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]8" // ppc64x:"ROTLW\t[$]8" // loong64: "ROTR\t[$]24" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<8 + x>>24 // amd64:"ROLL\t[$]9" @@ -69,6 +75,7 @@ func rot32(x uint32) uint32 { // s390x:"RLL\t[$]9" // ppc64x:"ROTLW\t[$]9" // loong64: "ROTR\t[$]23" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += x<<9 ^ x>>23 // amd64:"ROLL\t[$]10" @@ -79,6 +86,7 @@ func rot32(x uint32) uint32 { // arm64:"RORW\t[$]22" // s390x:"RLL\t[$]10" // loong64: "ROTR\t[$]22" + // riscv64: "OR","SLLIW","SRLIW",-"AND" a += bits.RotateLeft32(x, 10) return a @@ -88,12 +96,15 @@ func rot16(x uint16) uint16 { var a uint16 // amd64:"ROLW\t[$]7" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 | x>>9 // amd64:`ROLW\t[$]8` + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<8 + x>>8 // amd64:"ROLW\t[$]9" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<9 ^ x>>7 return a @@ -103,12 +114,15 @@ func rot8(x uint8) uint8 { var a uint8 // amd64:"ROLB\t[$]5" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<5 | x>>3 // amd64:`ROLB\t[$]6` + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<6 + x>>2 // amd64:"ROLB\t[$]7" + // riscv64: "OR","SLLI","SRLI",-"AND" a += x<<7 ^ x>>1 return a @@ -127,12 +141,14 @@ func rot64nc(x uint64, z uint) uint64 { // arm64:"ROR","NEG",-"AND" // ppc64x:"ROTL",-"NEG",-"AND" // loong64: "ROTRV", -"AND" + // riscv64: "OR","SLL","SRL",-"AND" a += x<>(64-z) // amd64:"RORQ",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64x:"ROTL","NEG",-"AND" // loong64: "ROTRV", -"AND" + // riscv64: "OR","SLL","SRL",-"AND" a += x>>z | x<<(64-z) return a @@ -147,12 +163,14 @@ func rot32nc(x uint32, z uint) uint32 { // arm64:"ROR","NEG",-"AND" // ppc64x:"ROTLW",-"NEG",-"AND" // loong64: "ROTR", -"AND" + // riscv64: "OR","SLLW","SRLW",-"AND" a += x<>(32-z) // amd64:"RORL",-"AND" // arm64:"ROR",-"NEG",-"AND" // ppc64x:"ROTLW","NEG",-"AND" // loong64: "ROTR", -"AND" + // riscv64: "OR","SLLW","SRLW",-"AND" a += x>>z | x<<(32-z) return a @@ -164,9 +182,11 @@ func rot16nc(x uint16, z uint) uint16 { z &= 15 // amd64:"ROLW",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x<>(16-z) // amd64:"RORW",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x>>z | x<<(16-z) return a @@ -178,9 +198,11 @@ func rot8nc(x uint8, z uint) uint8 { z &= 7 // amd64:"ROLB",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x<>(8-z) // amd64:"RORB",-"ANDQ" + // riscv64: "OR","SLL","SRL",-"AND\t" a += x>>z | x<<(8-z) return a -- cgit v1.2.3-54-g00ecf