From be1d7388b349e86bc2fc1b8769902875e732918f Mon Sep 17 00:00:00 2001 From: Meng Zhuo Date: Wed, 30 Mar 2022 18:19:48 +0800 Subject: internal/bytealg: port bytealg functions to reg ABI on riscv64 This CL adds support for the reg ABI to the bytes functions for riscv64. These are initially under control of the GOEXPERIMENT macro until all changes are in. Change-Id: I026295ae38e2aba055f7a51c77f92c1921e5ec97 Reviewed-on: https://go-review.googlesource.com/c/go/+/361916 Run-TryBot: mzh TryBot-Result: Gopher Robot Reviewed-by: Cherry Mui Reviewed-by: Ian Lance Taylor --- src/internal/bytealg/compare_riscv64.s | 228 +++++++++++++++++-------------- src/internal/bytealg/count_riscv64.s | 69 ++++++---- src/internal/bytealg/equal_riscv64.s | 177 +++++++++++++----------- src/internal/bytealg/indexbyte_riscv64.s | 79 +++++++---- 4 files changed, 315 insertions(+), 238 deletions(-) diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s index 0dc62515a17..7d2f8d6d0b8 100644 --- a/src/internal/bytealg/compare_riscv64.s +++ b/src/internal/bytealg/compare_riscv64.s @@ -5,161 +5,179 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+24(FP), X7 - MOV b_len+32(FP), X8 - MOV $ret+48(FP), X9 +TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+24(FP), X12 + MOV b_len+32(FP), X13 + MOV $ret+48(FP), X14 +#else + // X10 = a_base + // X11 = a_len + // X12 = a_cap (unused) + // X13 = b_base (want in X12) + // X14 = b_len (want in X13) + // X15 = b_cap (unused) + MOV X13, X12 + MOV X14, X13 +#endif JMP compare<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+16(FP), X7 - MOV b_len+24(FP), X8 - MOV $ret+32(FP), X9 +TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+16(FP), X12 + MOV b_len+24(FP), X13 + MOV $ret+32(FP), X14 +#endif + // X10 = a_base + // X11 = a_len + // X12 = b_base + // X13 = b_len JMP compare<>(SB) // On entry: -// X5 points to start of a -// X6 length of a -// X7 points to start of b -// X8 length of b -// X9 points to the address to store the return value (-1/0/1) +// X10 points to start of a +// X11 length of a +// X12 points to start of b +// X13 length of b +// for non-regabi X14 points to the address to store the return value (-1/0/1) +// for regabi the return value in X10 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 - BEQ X5, X7, cmp_len + BEQ X10, X12, cmp_len - MOV X6, X10 - BGE X8, X10, use_a_len // X10 = min(len(a), len(b)) - MOV X8, X10 + MOV X11, X5 + BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) + MOV X13, X5 use_a_len: - BEQZ X10, cmp_len + BEQZ X5, cmp_len - MOV $32, X11 - BLT X10, X11, loop4_check + MOV $32, X6 + BLT X5, X6, loop4_check // Check alignment - if alignment differs we have to do one byte at a time. - AND $3, X5, X12 - AND $3, X7, X13 - BNE X12, X13, loop4_check - BEQZ X12, loop32_check + AND $3, X10, X7 + AND $3, X12, X8 + BNE X7, X8, loop4_check + BEQZ X7, loop32_check // Check one byte at a time until we reach 8 byte alignment. - SUB X12, X10, X10 + SUB X7, X5, X5 align: - ADD $-1, X12 - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - BNEZ X12, align + ADD $-1, X7 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + BNEZ X7, align loop32_check: - MOV $32, X12 - BLT X10, X12, loop16_check + MOV $32, X7 + BLT X5, X7, loop16_check loop32: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop32a JMP cmp8a loop32a: BEQ X17, X18, loop32b JMP cmp8b loop32b: - MOV 16(X5), X15 - MOV 16(X7), X16 - MOV 24(X5), X17 - MOV 24(X7), X18 + MOV 16(X10), X15 + MOV 16(X12), X16 + MOV 24(X10), X17 + MOV 24(X12), X18 BEQ X15, X16, loop32c JMP cmp8a loop32c: BEQ X17, X18, loop32d JMP cmp8b loop32d: - ADD $32, X5 - ADD $32, X7 - ADD $-32, X10 - BGE X10, X12, loop32 - BEQZ X10, cmp_len + ADD $32, X10 + ADD $32, X12 + ADD $-32, X5 + BGE X5, X7, loop32 + BEQZ X5, cmp_len loop16_check: - MOV $16, X11 - BLT X10, X11, loop4_check + MOV $16, X6 + BLT X5, X6, loop4_check loop16: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop16a JMP cmp8a loop16a: BEQ X17, X18, loop16b JMP cmp8b loop16b: - ADD $16, X5 - ADD $16, X7 - ADD $-16, X10 - BGE X10, X11, loop16 - BEQZ X10, cmp_len + ADD $16, X10 + ADD $16, X12 + ADD $-16, X5 + BGE X5, X6, loop16 + BEQZ X5, cmp_len loop4_check: - MOV $4, X11 - BLT X10, X11, loop1 + MOV $4, X6 + BLT X5, X6, loop1 loop4: - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - MOVBU 1(X5), X15 - MOVBU 1(X7), X16 - BEQ X13, X14, loop4a - SLTU X14, X13, X10 - SLTU X13, X14, X11 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + MOVBU 1(X10), X15 + MOVBU 1(X12), X16 + BEQ X8, X9, loop4a + SLTU X9, X8, X5 + SLTU X8, X9, X6 JMP cmp_ret loop4a: BEQ X15, X16, loop4b - SLTU X16, X15, X10 - SLTU X15, X16, X11 + SLTU X16, X15, X5 + SLTU X15, X16, X6 JMP cmp_ret loop4b: - MOVBU 2(X5), X21 - MOVBU 2(X7), X22 - MOVBU 3(X5), X23 - MOVBU 3(X7), X24 + MOVBU 2(X10), X21 + MOVBU 2(X12), X22 + MOVBU 3(X10), X23 + MOVBU 3(X12), X24 BEQ X21, X22, loop4c - SLTU X22, X21, X10 - SLTU X21, X22, X11 + SLTU X22, X21, X5 + SLTU X21, X22, X6 JMP cmp_ret loop4c: BEQ X23, X24, loop4d - SLTU X24, X23, X10 - SLTU X23, X24, X11 + SLTU X24, X23, X5 + SLTU X23, X24, X6 JMP cmp_ret loop4d: - ADD $4, X5 - ADD $4, X7 - ADD $-4, X10 - BGE X10, X11, loop4 + ADD $4, X10 + ADD $4, X12 + ADD $-4, X5 + BGE X5, X6, loop4 loop1: - BEQZ X10, cmp_len - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - ADD $-1, X10 + BEQZ X5, cmp_len + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + ADD $-1, X5 JMP loop1 // Compare 8 bytes of memory in X15/X16 that are known to differ. cmp8a: MOV $0xff, X19 cmp8a_loop: - AND X15, X19, X13 - AND X16, X19, X14 - BNE X13, X14, cmp + AND X15, X19, X8 + AND X16, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8a_loop @@ -167,19 +185,21 @@ cmp8a_loop: cmp8b: MOV $0xff, X19 cmp8b_loop: - AND X17, X19, X13 - AND X18, X19, X14 - BNE X13, X14, cmp + AND X17, X19, X8 + AND X18, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8b_loop cmp_len: - MOV X6, X13 - MOV X8, X14 + MOV X11, X8 + MOV X13, X9 cmp: - SLTU X14, X13, X10 - SLTU X13, X14, X11 + SLTU X9, X8, X5 + SLTU X8, X9, X6 cmp_ret: - SUB X10, X11, X12 - MOV X12, (X9) + SUB X5, X6, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, (X14) +#endif RET diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s index 3f4eb23286e..1e081e2c8c5 100644 --- a/src/internal/bytealg/count_riscv64.s +++ b/src/internal/bytealg/count_riscv64.s @@ -5,40 +5,61 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Count(SB),NOSPLIT,$0-40 - MOV b_base+0(FP), A1 - MOV b_len+8(FP), A2 - MOVBU c+24(FP), A3 // byte to count - MOV ZERO, A4 // count - ADD A1, A2 // end +TEXT ·Count(SB),NOSPLIT,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV b_base+0(FP), X10 + MOV b_len+8(FP), X11 + MOVBU c+24(FP), X12 // byte to count +#else + // X10 = b_base + // X11 = b_len + // X12 = b_cap (unused) + // X13 = byte to count (want in X12) + MOV X13, X12 +#endif + MOV ZERO, X14 // count + ADD X10, X11 // end loop: - BEQ A1, A2, done - MOVBU (A1), A5 - ADD $1, A1 - BNE A3, A5, loop - ADD $1, A4 + BEQ X10, X11, done + MOVBU (X10), X15 + ADD $1, X10 + BNE X12, X15, loop + ADD $1, X14 JMP loop done: - MOV A4, ret+32(FP) +#ifndef GOEXPERIMENT_regabiargs + MOV X14, ret+32(FP) +#else + MOV X14, X10 +#endif RET -TEXT ·CountString(SB),NOSPLIT,$0-32 - MOV s_base+0(FP), A1 - MOV s_len+8(FP), A2 - MOVBU c+16(FP), A3 // byte to count - MOV ZERO, A4 // count - ADD A1, A2 // end +TEXT ·CountString(SB),NOSPLIT,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOV s_base+0(FP), X10 + MOV s_len+8(FP), X11 + MOVBU c+16(FP), X12 // byte to count +#endif + // X10 = s_base + // X11 = s_len + // X12 = byte to count + MOV ZERO, X14 // count + ADD X10, X11 // end loop: - BEQ A1, A2, done - MOVBU (A1), A5 - ADD $1, A1 - BNE A3, A5, loop - ADD $1, A4 + BEQ X10, X11, done + MOVBU (X10), X15 + ADD $1, X10 + BNE X12, X15, loop + ADD $1, X14 JMP loop done: - MOV A4, ret+24(FP) +#ifndef GOEXPERIMENT_regabiargs + MOV X14, ret+24(FP) +#else + MOV X14, X10 +#endif RET diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s index 5dd13beb555..77202d60759 100644 --- a/src/internal/bytealg/equal_riscv64.s +++ b/src/internal/bytealg/equal_riscv64.s @@ -8,120 +8,137 @@ #define CTXT S10 // func memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOV a+0(FP), X5 - MOV b+8(FP), X6 - MOV size+16(FP), X7 - MOV $ret+24(FP), X19 +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +#ifndef GOEXPERIMENT_regabiargs + MOV a+0(FP), X10 + MOV b+8(FP), X11 + MOV size+16(FP), X12 + MOV $ret+24(FP), X13 +#endif + // X10 = a_base + // X11 = b_base + // X12 = size JMP memequal<>(SB) // func memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 - MOV a+0(FP), X5 - MOV b+8(FP), X6 - MOV 8(CTXT), X7 // compiler stores size at offset 8 in the closure - MOV $ret+16(FP), X19 +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 + MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure +#ifndef GOEXPERIMENT_regabiargs + MOV a+0(FP), X10 + MOV b+8(FP), X11 + MOV $ret+16(FP), X13 +#endif + // X10 = a_base + // X11 = b_base JMP memequal<>(SB) -// On entry X5 and X6 contain pointers, X7 contains length. -// X19 contains address for return value. +// On entry X10 and X11 contain pointers, X12 contains length. +// For non-regabi X13 contains address for return value. +// For regabi return value in X10. TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0 - BEQ X5, X6, eq + BEQ X10, X11, eq - MOV $32, X8 - BLT X7, X8, loop4_check + MOV $32, X23 + BLT X12, X23, loop4_check // Check alignment - if alignment differs we have to do one byte at a time. - AND $3, X5, X9 - AND $3, X6, X10 - BNE X9, X10, loop4_check + AND $3, X10, X9 + AND $3, X11, X19 + BNE X9, X19, loop4_check BEQZ X9, loop32_check // Check one byte at a time until we reach 8 byte alignment. - SUB X9, X7, X7 + SUB X9, X12, X12 align: ADD $-1, X9 - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - BNE X10, X11, not_eq - ADD $1, X5 - ADD $1, X6 + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + BNE X19, X20, not_eq + ADD $1, X10 + ADD $1, X11 BNEZ X9, align loop32_check: MOV $32, X9 - BLT X7, X9, loop16_check + BLT X12, X9, loop16_check loop32: - MOV 0(X5), X10 - MOV 0(X6), X11 - MOV 8(X5), X12 - MOV 8(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - MOV 16(X5), X14 - MOV 16(X6), X15 - MOV 24(X5), X16 - MOV 24(X6), X17 + MOV 0(X10), X19 + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + MOV 16(X10), X14 + MOV 16(X11), X15 + MOV 24(X10), X16 + MOV 24(X11), X17 BNE X14, X15, not_eq BNE X16, X17, not_eq - ADD $32, X5 - ADD $32, X6 - ADD $-32, X7 - BGE X7, X9, loop32 - BEQZ X7, eq + ADD $32, X10 + ADD $32, X11 + ADD $-32, X12 + BGE X12, X9, loop32 + BEQZ X12, eq loop16_check: - MOV $16, X8 - BLT X7, X8, loop4_check + MOV $16, X23 + BLT X12, X23, loop4_check loop16: - MOV 0(X5), X10 - MOV 0(X6), X11 - MOV 8(X5), X12 - MOV 8(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - ADD $16, X5 - ADD $16, X6 - ADD $-16, X7 - BGE X7, X8, loop16 - BEQZ X7, eq + MOV 0(X10), X19 + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + ADD $16, X10 + ADD $16, X11 + ADD $-16, X12 + BGE X12, X23, loop16 + BEQZ X12, eq loop4_check: - MOV $4, X8 - BLT X7, X8, loop1 + MOV $4, X23 + BLT X12, X23, loop1 loop4: - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - MOVBU 1(X5), X12 - MOVBU 1(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - MOVBU 2(X5), X14 - MOVBU 2(X6), X15 - MOVBU 3(X5), X16 - MOVBU 3(X6), X17 + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + MOVBU 1(X10), X21 + MOVBU 1(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + MOVBU 2(X10), X14 + MOVBU 2(X11), X15 + MOVBU 3(X10), X16 + MOVBU 3(X11), X17 BNE X14, X15, not_eq BNE X16, X17, not_eq - ADD $4, X5 - ADD $4, X6 - ADD $-4, X7 - BGE X7, X8, loop4 + ADD $4, X10 + ADD $4, X11 + ADD $-4, X12 + BGE X12, X23, loop4 loop1: - BEQZ X7, eq - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - BNE X10, X11, not_eq - ADD $1, X5 - ADD $1, X6 - ADD $-1, X7 + BEQZ X12, eq + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + BNE X19, X20, not_eq + ADD $1, X10 + ADD $1, X11 + ADD $-1, X12 JMP loop1 not_eq: - MOV $0, X5 - MOVB X5, (X19) +#ifndef GOEXPERIMENT_regabiargs + MOVB ZERO, (X13) +#else + MOVB ZERO, X10 +#endif RET eq: - MOV $1, X5 - MOVB X5, (X19) +#ifndef GOEXPERIMENT_regabiargs + MOV $1, X10 + MOVB X10, (X13) +#else + MOV $1, X10 +#endif RET diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s index 156c3036997..e9d3e6be393 100644 --- a/src/internal/bytealg/indexbyte_riscv64.s +++ b/src/internal/bytealg/indexbyte_riscv64.s @@ -5,48 +5,67 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·IndexByte(SB),NOSPLIT,$0-40 - MOV b_base+0(FP), A1 - MOV b_len+8(FP), A2 - MOVBU c+24(FP), A3 // byte to find - MOV A1, A4 // store base for later - ADD A1, A2 // end - ADD $-1, A1 +TEXT ·IndexByte(SB),NOSPLIT,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV b_base+0(FP), X10 + MOV b_len+8(FP), X11 + MOVBU c+24(FP), X13 // byte to find +#endif + // X10 = b_base + // X11 = b_len + // X12 = b_cap (unused) + // X13 = byte to find + MOV X10, X12 // store base for later + ADD X10, X11 // end + ADD $-1, X10 loop: - ADD $1, A1 - BEQ A1, A2, notfound - MOVBU (A1), A5 - BNE A3, A5, loop + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X13, X14, loop - SUB A4, A1 // remove base - MOV A1, ret+32(FP) + SUB X12, X10 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+32(FP) +#endif RET notfound: - MOV $-1, A1 - MOV A1, ret+32(FP) + MOV $-1, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+32(FP) +#endif RET -TEXT ·IndexByteString(SB),NOSPLIT,$0-32 - MOV s_base+0(FP), A1 - MOV s_len+8(FP), A2 - MOVBU c+16(FP), A3 // byte to find - MOV A1, A4 // store base for later - ADD A1, A2 // end - ADD $-1, A1 +TEXT ·IndexByteString(SB),NOSPLIT,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOV s_base+0(FP), X10 + MOV s_len+8(FP), X11 + MOVBU c+16(FP), X12 // byte to find +#endif + // X10 = b_base + // X11 = b_len + // X12 = byte to find + MOV X10, X13 // store base for later + ADD X10, X11 // end + ADD $-1, X10 loop: - ADD $1, A1 - BEQ A1, A2, notfound - MOVBU (A1), A5 - BNE A3, A5, loop + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X12, X14, loop - SUB A4, A1 // remove base - MOV A1, ret+24(FP) + SUB X13, X10 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+24(FP) +#endif RET notfound: - MOV $-1, A1 - MOV A1, ret+24(FP) + MOV $-1, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+24(FP) +#endif RET -- cgit v1.2.3-54-g00ecf