diff options
author | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
---|---|---|
committer | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
commit | a16e30d162c1c7408db7821e7b9513cefa09c6ca (patch) | |
tree | af752ba9ba44c547df39bb0af9bff79f610ba9d5 /src/internal/bytealg/equal_mips64x.s | |
parent | 91e4d2d57bc341dd82c98247117114c851380aef (diff) | |
parent | cf6cfba4d5358404dd890f6025e573a4b2156543 (diff) | |
download | go-a16e30d162c1c7408db7821e7b9513cefa09c6ca.tar.gz go-a16e30d162c1c7408db7821e7b9513cefa09c6ca.zip |
[dev.link] all: merge branch 'master' into dev.linkdev.link
Clean merge.
Change-Id: Ia7b2808bc649790198d34c226a61d9e569084dc5
Diffstat (limited to 'src/internal/bytealg/equal_mips64x.s')
-rw-r--r-- | src/internal/bytealg/equal_mips64x.s | 72 |
1 files changed, 68 insertions, 4 deletions
diff --git a/src/internal/bytealg/equal_mips64x.s b/src/internal/bytealg/equal_mips64x.s index 58dc4303b4..641e3ff06c 100644 --- a/src/internal/bytealg/equal_mips64x.s +++ b/src/internal/bytealg/equal_mips64x.s @@ -16,18 +16,82 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 BEQ R1, R2, eq MOVV size+16(FP), R3 ADDV R1, R3, R4 -loop: - BNE R1, R4, test + + // chunk size is 16 + SGTU $16, R3, R8 + BEQ R0, R8, chunk_entry + +byte_loop: + BNE R1, R4, byte_test MOVV $1, R1 MOVB R1, ret+24(FP) RET -test: +byte_test: MOVBU (R1), R6 ADDV $1, R1 MOVBU (R2), R7 ADDV $1, R2 - BEQ R6, R7, loop + BEQ R6, R7, byte_loop + JMP not_eq + +chunk_entry: + // make sure both a and b are aligned + OR R1, R2, R9 + AND $0x7, R9 + BNE R0, R9, byte_loop + JMP chunk_loop_1 + +chunk_loop: + // chunk size is 16 + SGTU $16, R3, R8 + BNE R0, R8, chunk_tail_8 +chunk_loop_1: + MOVV (R1), R6 + MOVV (R2), R7 + BNE R6, R7, not_eq + MOVV 8(R1), R12 + MOVV 8(R2), R13 + ADDV $16, R1 + ADDV $16, R2 + SUBV $16, R3 + BEQ R12, R13, chunk_loop + JMP not_eq + +chunk_tail_8: + AND $8, R3, R14 + BEQ R0, R14, chunk_tail_4 + MOVV (R1), R6 + MOVV (R2), R7 + BNE R6, R7, not_eq + ADDV $8, R1 + ADDV $8, R2 + +chunk_tail_4: + AND $4, R3, R14 + BEQ R0, R14, chunk_tail_2 + MOVWU (R1), R6 + MOVWU (R2), R7 + BNE R6, R7, not_eq + ADDV $4, R1 + ADDV $4, R2 + +chunk_tail_2: + AND $2, R3, R14 + BEQ R0, R14, chunk_tail_1 + MOVHU (R1), R6 + MOVHU (R2), R7 + BNE R6, R7, not_eq + ADDV $2, R1 + ADDV $2, R2 + +chunk_tail_1: + AND $1, R3, R14 + BEQ R0, R14, eq + MOVBU (R1), R6 + MOVBU (R2), R7 + BEQ R6, R7, eq +not_eq: MOVB R0, ret+24(FP) RET eq: |