diff options
-rw-r--r-- | src/internal/bytealg/compare_riscv64.s | 228 | ||||
-rw-r--r-- | src/internal/bytealg/count_riscv64.s | 69 | ||||
-rw-r--r-- | src/internal/bytealg/equal_riscv64.s | 177 | ||||
-rw-r--r-- | src/internal/bytealg/indexbyte_riscv64.s | 79 |
4 files changed, 315 insertions, 238 deletions
diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s index 0dc62515a17..7d2f8d6d0b8 100644 --- a/src/internal/bytealg/compare_riscv64.s +++ b/src/internal/bytealg/compare_riscv64.s @@ -5,161 +5,179 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+24(FP), X7 - MOV b_len+32(FP), X8 - MOV $ret+48(FP), X9 +TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+24(FP), X12 + MOV b_len+32(FP), X13 + MOV $ret+48(FP), X14 +#else + // X10 = a_base + // X11 = a_len + // X12 = a_cap (unused) + // X13 = b_base (want in X12) + // X14 = b_len (want in X13) + // X15 = b_cap (unused) + MOV X13, X12 + MOV X14, X13 +#endif JMP compare<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+16(FP), X7 - MOV b_len+24(FP), X8 - MOV $ret+32(FP), X9 +TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+16(FP), X12 + MOV b_len+24(FP), X13 + MOV $ret+32(FP), X14 +#endif + // X10 = a_base + // X11 = a_len + // X12 = b_base + // X13 = b_len JMP compare<>(SB) // On entry: -// X5 points to start of a -// X6 length of a -// X7 points to start of b -// X8 length of b -// X9 points to the address to store the return value (-1/0/1) +// X10 points to start of a +// X11 length of a +// X12 points to start of b +// X13 length of b +// for non-regabi X14 points to the address to store the return value (-1/0/1) +// for regabi the return value in X10 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 - BEQ X5, X7, cmp_len + BEQ X10, X12, cmp_len - MOV X6, X10 - BGE X8, X10, use_a_len // X10 = min(len(a), len(b)) - MOV X8, X10 + MOV X11, X5 + BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) + MOV X13, X5 use_a_len: - BEQZ X10, cmp_len + BEQZ X5, cmp_len - MOV $32, X11 - BLT X10, X11, loop4_check + MOV $32, X6 + BLT X5, X6, loop4_check // Check alignment - if alignment differs we have to do one byte at a time. - AND $3, X5, X12 - AND $3, X7, X13 - BNE X12, X13, loop4_check - BEQZ X12, loop32_check + AND $3, X10, X7 + AND $3, X12, X8 + BNE X7, X8, loop4_check + BEQZ X7, loop32_check // Check one byte at a time until we reach 8 byte alignment. - SUB X12, X10, X10 + SUB X7, X5, X5 align: - ADD $-1, X12 - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - BNEZ X12, align + ADD $-1, X7 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + BNEZ X7, align loop32_check: - MOV $32, X12 - BLT X10, X12, loop16_check + MOV $32, X7 + BLT X5, X7, loop16_check loop32: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop32a JMP cmp8a loop32a: BEQ X17, X18, loop32b JMP cmp8b loop32b: - MOV 16(X5), X15 - MOV 16(X7), X16 - MOV 24(X5), X17 - MOV 24(X7), X18 + MOV 16(X10), X15 + MOV 16(X12), X16 + MOV 24(X10), X17 + MOV 24(X12), X18 BEQ X15, X16, loop32c JMP cmp8a loop32c: BEQ X17, X18, loop32d JMP cmp8b loop32d: - ADD $32, X5 - ADD $32, X7 - ADD $-32, X10 - BGE X10, X12, loop32 - BEQZ X10, cmp_len + ADD $32, X10 + ADD $32, X12 + ADD $-32, X5 + BGE X5, X7, loop32 + BEQZ X5, cmp_len loop16_check: - MOV $16, X11 - BLT X10, X11, loop4_check + MOV $16, X6 + BLT X5, X6, loop4_check loop16: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop16a JMP cmp8a loop16a: BEQ X17, X18, loop16b JMP cmp8b loop16b: - ADD $16, X5 - ADD $16, X7 - ADD $-16, X10 - BGE X10, X11, loop16 - BEQZ X10, cmp_len + ADD $16, X10 + ADD $16, X12 + ADD $-16, X5 + BGE X5, X6, loop16 + BEQZ X5, cmp_len loop4_check: - MOV $4, X11 - BLT X10, X11, loop1 + MOV $4, X6 + BLT X5, X6, loop1 loop4: - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - MOVBU 1(X5), X15 - MOVBU 1(X7), X16 - BEQ X13, X14, loop4a - SLTU X14, X13, X10 - SLTU X13, X14, X11 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + MOVBU 1(X10), X15 + MOVBU 1(X12), X16 + BEQ X8, X9, loop4a + SLTU X9, X8, X5 + SLTU X8, X9, X6 JMP cmp_ret loop4a: BEQ X15, X16, loop4b - SLTU X16, X15, X10 - SLTU X15, X16, X11 + SLTU X16, X15, X5 + SLTU X15, X16, X6 JMP cmp_ret loop4b: - MOVBU 2(X5), X21 - MOVBU 2(X7), X22 - MOVBU 3(X5), X23 - MOVBU 3(X7), X24 + MOVBU 2(X10), X21 + MOVBU 2(X12), X22 + MOVBU 3(X10), X23 + MOVBU 3(X12), X24 BEQ X21, X22, loop4c - SLTU X22, X21, X10 - SLTU X21, X22, X11 + SLTU X22, X21, X5 + SLTU X21, X22, X6 JMP cmp_ret loop4c: BEQ X23, X24, loop4d - SLTU X24, X23, X10 - SLTU X23, X24, X11 + SLTU X24, X23, X5 + SLTU X23, X24, X6 JMP cmp_ret loop4d: - ADD $4, X5 - ADD $4, X7 - ADD $-4, X10 - BGE X10, X11, loop4 + ADD $4, X10 + ADD $4, X12 + ADD $-4, X5 + BGE X5, X6, loop4 loop1: - BEQZ X10, cmp_len - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - ADD $-1, X10 + BEQZ X5, cmp_len + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + ADD $-1, X5 JMP loop1 // Compare 8 bytes of memory in X15/X16 that are known to differ. cmp8a: MOV $0xff, X19 cmp8a_loop: - AND X15, X19, X13 - AND X16, X19, X14 - BNE X13, X14, cmp + AND X15, X19, X8 + AND X16, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8a_loop @@ -167,19 +185,21 @@ cmp8a_loop: cmp8b: MOV $0xff, X19 cmp8b_loop: - AND X17, X19, X13 - AND X18, X19, X14 - BNE X13, X14, cmp + AND X17, X19, X8 + AND X18, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8b_loop cmp_len: - MOV X6, X13 - MOV X8, X14 + MOV X11, X8 + MOV X13, X9 cmp: - SLTU X14, X13, X10 - SLTU X13, X14, X11 + SLTU X9, X8, X5 + SLTU X8, X9, X6 cmp_ret: - SUB X10, X11, X12 - MOV X12, (X9) + SUB X5, X6, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, (X14) +#endif RET diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s index 3f4eb23286e..1e081e2c8c5 100644 --- a/src/internal/bytealg/count_riscv64.s +++ b/src/internal/bytealg/count_riscv64.s @@ -5,40 +5,61 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Count(SB),NOSPLIT,$0-40 - MOV b_base+0(FP), A1 - MOV b_len+8(FP), A2 - MOVBU c+24(FP), A3 // byte to count - MOV ZERO, A4 // count - ADD A1, A2 // end +TEXT ·Count<ABIInternal>(SB),NOSPLIT,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV b_base+0(FP), X10 + MOV b_len+8(FP), X11 + MOVBU c+24(FP), X12 // byte to count +#else + // X10 = b_base + // X11 = b_len + // X12 = b_cap (unused) + // X13 = byte to count (want in X12) + MOV X13, X12 +#endif + MOV ZERO, X14 // count + ADD X10, X11 // end loop: - BEQ A1, A2, done - MOVBU (A1), A5 - ADD $1, A1 - BNE A3, A5, loop - ADD $1, A4 + BEQ X10, X11, done + MOVBU (X10), X15 + ADD $1, X10 + BNE X12, X15, loop + ADD $1, X14 JMP loop done: - MOV A4, ret+32(FP) +#ifndef GOEXPERIMENT_regabiargs + MOV X14, ret+32(FP) +#else + MOV X14, X10 +#endif RET -TEXT ·CountString(SB),NOSPLIT,$0-32 - MOV s_base+0(FP), A1 - MOV s_len+8(FP), A2 - MOVBU c+16(FP), A3 // byte to count - MOV ZERO, A4 // count - ADD A1, A2 // end +TEXT ·CountString<ABIInternal>(SB),NOSPLIT,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOV s_base+0(FP), X10 + MOV s_len+8(FP), X11 + MOVBU c+16(FP), X12 // byte to count +#endif + // X10 = s_base + // X11 = s_len + // X12 = byte to count + MOV ZERO, X14 // count + ADD X10, X11 // end loop: - BEQ A1, A2, done - MOVBU (A1), A5 - ADD $1, A1 - BNE A3, A5, loop - ADD $1, A4 + BEQ X10, X11, done + MOVBU (X10), X15 + ADD $1, X10 + BNE X12, X15, loop + ADD $1, X14 JMP loop done: - MOV A4, ret+24(FP) +#ifndef GOEXPERIMENT_regabiargs + MOV X14, ret+24(FP) +#else + MOV X14, X10 +#endif RET diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s index 5dd13beb555..77202d60759 100644 --- a/src/internal/bytealg/equal_riscv64.s +++ b/src/internal/bytealg/equal_riscv64.s @@ -8,120 +8,137 @@ #define CTXT S10 // func memequal(a, b unsafe.Pointer, size uintptr) bool -TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 - MOV a+0(FP), X5 - MOV b+8(FP), X6 - MOV size+16(FP), X7 - MOV $ret+24(FP), X19 +TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25 +#ifndef GOEXPERIMENT_regabiargs + MOV a+0(FP), X10 + MOV b+8(FP), X11 + MOV size+16(FP), X12 + MOV $ret+24(FP), X13 +#endif + // X10 = a_base + // X11 = b_base + // X12 = size JMP memequal<>(SB) // func memequal_varlen(a, b unsafe.Pointer) bool -TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 - MOV a+0(FP), X5 - MOV b+8(FP), X6 - MOV 8(CTXT), X7 // compiler stores size at offset 8 in the closure - MOV $ret+16(FP), X19 +TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17 + MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure +#ifndef GOEXPERIMENT_regabiargs + MOV a+0(FP), X10 + MOV b+8(FP), X11 + MOV $ret+16(FP), X13 +#endif + // X10 = a_base + // X11 = b_base JMP memequal<>(SB) -// On entry X5 and X6 contain pointers, X7 contains length. -// X19 contains address for return value. +// On entry X10 and X11 contain pointers, X12 contains length. +// For non-regabi X13 contains address for return value. +// For regabi return value in X10. TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0 - BEQ X5, X6, eq + BEQ X10, X11, eq - MOV $32, X8 - BLT X7, X8, loop4_check + MOV $32, X23 + BLT X12, X23, loop4_check // Check alignment - if alignment differs we have to do one byte at a time. - AND $3, X5, X9 - AND $3, X6, X10 - BNE X9, X10, loop4_check + AND $3, X10, X9 + AND $3, X11, X19 + BNE X9, X19, loop4_check BEQZ X9, loop32_check // Check one byte at a time until we reach 8 byte alignment. - SUB X9, X7, X7 + SUB X9, X12, X12 align: ADD $-1, X9 - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - BNE X10, X11, not_eq - ADD $1, X5 - ADD $1, X6 + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + BNE X19, X20, not_eq + ADD $1, X10 + ADD $1, X11 BNEZ X9, align loop32_check: MOV $32, X9 - BLT X7, X9, loop16_check + BLT X12, X9, loop16_check loop32: - MOV 0(X5), X10 - MOV 0(X6), X11 - MOV 8(X5), X12 - MOV 8(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - MOV 16(X5), X14 - MOV 16(X6), X15 - MOV 24(X5), X16 - MOV 24(X6), X17 + MOV 0(X10), X19 + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + MOV 16(X10), X14 + MOV 16(X11), X15 + MOV 24(X10), X16 + MOV 24(X11), X17 BNE X14, X15, not_eq BNE X16, X17, not_eq - ADD $32, X5 - ADD $32, X6 - ADD $-32, X7 - BGE X7, X9, loop32 - BEQZ X7, eq + ADD $32, X10 + ADD $32, X11 + ADD $-32, X12 + BGE X12, X9, loop32 + BEQZ X12, eq loop16_check: - MOV $16, X8 - BLT X7, X8, loop4_check + MOV $16, X23 + BLT X12, X23, loop4_check loop16: - MOV 0(X5), X10 - MOV 0(X6), X11 - MOV 8(X5), X12 - MOV 8(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - ADD $16, X5 - ADD $16, X6 - ADD $-16, X7 - BGE X7, X8, loop16 - BEQZ X7, eq + MOV 0(X10), X19 + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + ADD $16, X10 + ADD $16, X11 + ADD $-16, X12 + BGE X12, X23, loop16 + BEQZ X12, eq loop4_check: - MOV $4, X8 - BLT X7, X8, loop1 + MOV $4, X23 + BLT X12, X23, loop1 loop4: - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - MOVBU 1(X5), X12 - MOVBU 1(X6), X13 - BNE X10, X11, not_eq - BNE X12, X13, not_eq - MOVBU 2(X5), X14 - MOVBU 2(X6), X15 - MOVBU 3(X5), X16 - MOVBU 3(X6), X17 + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + MOVBU 1(X10), X21 + MOVBU 1(X11), X22 + BNE X19, X20, not_eq + BNE X21, X22, not_eq + MOVBU 2(X10), X14 + MOVBU 2(X11), X15 + MOVBU 3(X10), X16 + MOVBU 3(X11), X17 BNE X14, X15, not_eq BNE X16, X17, not_eq - ADD $4, X5 - ADD $4, X6 - ADD $-4, X7 - BGE X7, X8, loop4 + ADD $4, X10 + ADD $4, X11 + ADD $-4, X12 + BGE X12, X23, loop4 loop1: - BEQZ X7, eq - MOVBU 0(X5), X10 - MOVBU 0(X6), X11 - BNE X10, X11, not_eq - ADD $1, X5 - ADD $1, X6 - ADD $-1, X7 + BEQZ X12, eq + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 + BNE X19, X20, not_eq + ADD $1, X10 + ADD $1, X11 + ADD $-1, X12 JMP loop1 not_eq: - MOV $0, X5 - MOVB X5, (X19) +#ifndef GOEXPERIMENT_regabiargs + MOVB ZERO, (X13) +#else + MOVB ZERO, X10 +#endif RET eq: - MOV $1, X5 - MOVB X5, (X19) +#ifndef GOEXPERIMENT_regabiargs + MOV $1, X10 + MOVB X10, (X13) +#else + MOV $1, X10 +#endif RET diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s index 156c3036997..e9d3e6be393 100644 --- a/src/internal/bytealg/indexbyte_riscv64.s +++ b/src/internal/bytealg/indexbyte_riscv64.s @@ -5,48 +5,67 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·IndexByte(SB),NOSPLIT,$0-40 - MOV b_base+0(FP), A1 - MOV b_len+8(FP), A2 - MOVBU c+24(FP), A3 // byte to find - MOV A1, A4 // store base for later - ADD A1, A2 // end - ADD $-1, A1 +TEXT ·IndexByte<ABIInternal>(SB),NOSPLIT,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV b_base+0(FP), X10 + MOV b_len+8(FP), X11 + MOVBU c+24(FP), X13 // byte to find +#endif + // X10 = b_base + // X11 = b_len + // X12 = b_cap (unused) + // X13 = byte to find + MOV X10, X12 // store base for later + ADD X10, X11 // end + ADD $-1, X10 loop: - ADD $1, A1 - BEQ A1, A2, notfound - MOVBU (A1), A5 - BNE A3, A5, loop + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X13, X14, loop - SUB A4, A1 // remove base - MOV A1, ret+32(FP) + SUB X12, X10 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+32(FP) +#endif RET notfound: - MOV $-1, A1 - MOV A1, ret+32(FP) + MOV $-1, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+32(FP) +#endif RET -TEXT ·IndexByteString(SB),NOSPLIT,$0-32 - MOV s_base+0(FP), A1 - MOV s_len+8(FP), A2 - MOVBU c+16(FP), A3 // byte to find - MOV A1, A4 // store base for later - ADD A1, A2 // end - ADD $-1, A1 +TEXT ·IndexByteString<ABIInternal>(SB),NOSPLIT,$0-32 +#ifndef GOEXPERIMENT_regabiargs + MOV s_base+0(FP), X10 + MOV s_len+8(FP), X11 + MOVBU c+16(FP), X12 // byte to find +#endif + // X10 = b_base + // X11 = b_len + // X12 = byte to find + MOV X10, X13 // store base for later + ADD X10, X11 // end + ADD $-1, X10 loop: - ADD $1, A1 - BEQ A1, A2, notfound - MOVBU (A1), A5 - BNE A3, A5, loop + ADD $1, X10 + BEQ X10, X11, notfound + MOVBU (X10), X14 + BNE X12, X14, loop - SUB A4, A1 // remove base - MOV A1, ret+24(FP) + SUB X13, X10 // remove base +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+24(FP) +#endif RET notfound: - MOV $-1, A1 - MOV A1, ret+24(FP) + MOV $-1, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, ret+24(FP) +#endif RET |