diff options
Diffstat (limited to 'src/internal/bytealg/compare_riscv64.s')
-rw-r--r-- | src/internal/bytealg/compare_riscv64.s | 228 |
1 files changed, 124 insertions, 104 deletions
diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s index 0dc62515a17..7d2f8d6d0b8 100644 --- a/src/internal/bytealg/compare_riscv64.s +++ b/src/internal/bytealg/compare_riscv64.s @@ -5,161 +5,179 @@ #include "go_asm.h" #include "textflag.h" -TEXT ·Compare(SB),NOSPLIT|NOFRAME,$0-56 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+24(FP), X7 - MOV b_len+32(FP), X8 - MOV $ret+48(FP), X9 +TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+24(FP), X12 + MOV b_len+32(FP), X13 + MOV $ret+48(FP), X14 +#else + // X10 = a_base + // X11 = a_len + // X12 = a_cap (unused) + // X13 = b_base (want in X12) + // X14 = b_len (want in X13) + // X15 = b_cap (unused) + MOV X13, X12 + MOV X14, X13 +#endif JMP compare<>(SB) -TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 - MOV a_base+0(FP), X5 - MOV a_len+8(FP), X6 - MOV b_base+16(FP), X7 - MOV b_len+24(FP), X8 - MOV $ret+32(FP), X9 +TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40 +#ifndef GOEXPERIMENT_regabiargs + MOV a_base+0(FP), X10 + MOV a_len+8(FP), X11 + MOV b_base+16(FP), X12 + MOV b_len+24(FP), X13 + MOV $ret+32(FP), X14 +#endif + // X10 = a_base + // X11 = a_len + // X12 = b_base + // X13 = b_len JMP compare<>(SB) // On entry: -// X5 points to start of a -// X6 length of a -// X7 points to start of b -// X8 length of b -// X9 points to the address to store the return value (-1/0/1) +// X10 points to start of a +// X11 length of a +// X12 points to start of b +// X13 length of b +// for non-regabi X14 points to the address to store the return value (-1/0/1) +// for regabi the return value in X10 TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 - BEQ X5, X7, cmp_len + BEQ X10, X12, cmp_len - MOV X6, X10 - BGE X8, X10, use_a_len // X10 = min(len(a), len(b)) - MOV X8, X10 + MOV X11, X5 + BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) + MOV X13, X5 use_a_len: - BEQZ X10, cmp_len + BEQZ X5, cmp_len - MOV $32, X11 - BLT X10, X11, loop4_check + MOV $32, X6 + BLT X5, X6, loop4_check // Check alignment - if alignment differs we have to do one byte at a time. - AND $3, X5, X12 - AND $3, X7, X13 - BNE X12, X13, loop4_check - BEQZ X12, loop32_check + AND $3, X10, X7 + AND $3, X12, X8 + BNE X7, X8, loop4_check + BEQZ X7, loop32_check // Check one byte at a time until we reach 8 byte alignment. - SUB X12, X10, X10 + SUB X7, X5, X5 align: - ADD $-1, X12 - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - BNEZ X12, align + ADD $-1, X7 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + BNEZ X7, align loop32_check: - MOV $32, X12 - BLT X10, X12, loop16_check + MOV $32, X7 + BLT X5, X7, loop16_check loop32: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop32a JMP cmp8a loop32a: BEQ X17, X18, loop32b JMP cmp8b loop32b: - MOV 16(X5), X15 - MOV 16(X7), X16 - MOV 24(X5), X17 - MOV 24(X7), X18 + MOV 16(X10), X15 + MOV 16(X12), X16 + MOV 24(X10), X17 + MOV 24(X12), X18 BEQ X15, X16, loop32c JMP cmp8a loop32c: BEQ X17, X18, loop32d JMP cmp8b loop32d: - ADD $32, X5 - ADD $32, X7 - ADD $-32, X10 - BGE X10, X12, loop32 - BEQZ X10, cmp_len + ADD $32, X10 + ADD $32, X12 + ADD $-32, X5 + BGE X5, X7, loop32 + BEQZ X5, cmp_len loop16_check: - MOV $16, X11 - BLT X10, X11, loop4_check + MOV $16, X6 + BLT X5, X6, loop4_check loop16: - MOV 0(X5), X15 - MOV 0(X7), X16 - MOV 8(X5), X17 - MOV 8(X7), X18 + MOV 0(X10), X15 + MOV 0(X12), X16 + MOV 8(X10), X17 + MOV 8(X12), X18 BEQ X15, X16, loop16a JMP cmp8a loop16a: BEQ X17, X18, loop16b JMP cmp8b loop16b: - ADD $16, X5 - ADD $16, X7 - ADD $-16, X10 - BGE X10, X11, loop16 - BEQZ X10, cmp_len + ADD $16, X10 + ADD $16, X12 + ADD $-16, X5 + BGE X5, X6, loop16 + BEQZ X5, cmp_len loop4_check: - MOV $4, X11 - BLT X10, X11, loop1 + MOV $4, X6 + BLT X5, X6, loop1 loop4: - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - MOVBU 1(X5), X15 - MOVBU 1(X7), X16 - BEQ X13, X14, loop4a - SLTU X14, X13, X10 - SLTU X13, X14, X11 + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + MOVBU 1(X10), X15 + MOVBU 1(X12), X16 + BEQ X8, X9, loop4a + SLTU X9, X8, X5 + SLTU X8, X9, X6 JMP cmp_ret loop4a: BEQ X15, X16, loop4b - SLTU X16, X15, X10 - SLTU X15, X16, X11 + SLTU X16, X15, X5 + SLTU X15, X16, X6 JMP cmp_ret loop4b: - MOVBU 2(X5), X21 - MOVBU 2(X7), X22 - MOVBU 3(X5), X23 - MOVBU 3(X7), X24 + MOVBU 2(X10), X21 + MOVBU 2(X12), X22 + MOVBU 3(X10), X23 + MOVBU 3(X12), X24 BEQ X21, X22, loop4c - SLTU X22, X21, X10 - SLTU X21, X22, X11 + SLTU X22, X21, X5 + SLTU X21, X22, X6 JMP cmp_ret loop4c: BEQ X23, X24, loop4d - SLTU X24, X23, X10 - SLTU X23, X24, X11 + SLTU X24, X23, X5 + SLTU X23, X24, X6 JMP cmp_ret loop4d: - ADD $4, X5 - ADD $4, X7 - ADD $-4, X10 - BGE X10, X11, loop4 + ADD $4, X10 + ADD $4, X12 + ADD $-4, X5 + BGE X5, X6, loop4 loop1: - BEQZ X10, cmp_len - MOVBU 0(X5), X13 - MOVBU 0(X7), X14 - BNE X13, X14, cmp - ADD $1, X5 - ADD $1, X7 - ADD $-1, X10 + BEQZ X5, cmp_len + MOVBU 0(X10), X8 + MOVBU 0(X12), X9 + BNE X8, X9, cmp + ADD $1, X10 + ADD $1, X12 + ADD $-1, X5 JMP loop1 // Compare 8 bytes of memory in X15/X16 that are known to differ. cmp8a: MOV $0xff, X19 cmp8a_loop: - AND X15, X19, X13 - AND X16, X19, X14 - BNE X13, X14, cmp + AND X15, X19, X8 + AND X16, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8a_loop @@ -167,19 +185,21 @@ cmp8a_loop: cmp8b: MOV $0xff, X19 cmp8b_loop: - AND X17, X19, X13 - AND X18, X19, X14 - BNE X13, X14, cmp + AND X17, X19, X8 + AND X18, X19, X9 + BNE X8, X9, cmp SLLI $8, X19 JMP cmp8b_loop cmp_len: - MOV X6, X13 - MOV X8, X14 + MOV X11, X8 + MOV X13, X9 cmp: - SLTU X14, X13, X10 - SLTU X13, X14, X11 + SLTU X9, X8, X5 + SLTU X8, X9, X6 cmp_ret: - SUB X10, X11, X12 - MOV X12, (X9) + SUB X5, X6, X10 +#ifndef GOEXPERIMENT_regabiargs + MOV X10, (X14) +#endif RET |