aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg/compare_riscv64.s
blob: 7d2f8d6d0b810c12b3c0aca9e4dcd00967c0607b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
#ifndef GOEXPERIMENT_regabiargs
	MOV	a_base+0(FP), X10
	MOV	a_len+8(FP), X11
	MOV	b_base+24(FP), X12
	MOV	b_len+32(FP), X13
	MOV	$ret+48(FP), X14
#else
	// X10 = a_base
	// X11 = a_len
	// X12 = a_cap (unused)
	// X13 = b_base (want in X12)
	// X14 = b_len (want in X13)
	// X15 = b_cap (unused)
	MOV	X13, X12
	MOV	X14, X13
#endif
	JMP	compare<>(SB)

TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
#ifndef GOEXPERIMENT_regabiargs
	MOV	a_base+0(FP), X10
	MOV	a_len+8(FP), X11
	MOV	b_base+16(FP), X12
	MOV	b_len+24(FP), X13
	MOV	$ret+32(FP), X14
#endif
	// X10 = a_base
	// X11 = a_len
	// X12 = b_base
	// X13 = b_len
	JMP	compare<>(SB)

// On entry:
// X10 points to start of a
// X11 length of a
// X12 points to start of b
// X13 length of b
// for non-regabi X14 points to the address to store the return value (-1/0/1)
// for regabi the return value in X10
TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
	BEQ	X10, X12, cmp_len

	MOV	X11, X5
	BGE	X13, X5, use_a_len // X5 = min(len(a), len(b))
	MOV	X13, X5
use_a_len:
	BEQZ	X5, cmp_len

	MOV	$32, X6
	BLT	X5, X6, loop4_check

	// Check alignment - if alignment differs we have to do one byte at a time.
	AND	$3, X10, X7
	AND	$3, X12, X8
	BNE	X7, X8, loop4_check
	BEQZ	X7, loop32_check

	// Check one byte at a time until we reach 8 byte alignment.
	SUB	X7, X5, X5
align:
	ADD	$-1, X7
	MOVBU	0(X10), X8
	MOVBU	0(X12), X9
	BNE	X8, X9, cmp
	ADD	$1, X10
	ADD	$1, X12
	BNEZ	X7, align

loop32_check:
	MOV	$32, X7
	BLT	X5, X7, loop16_check
loop32:
	MOV	0(X10), X15
	MOV	0(X12), X16
	MOV	8(X10), X17
	MOV	8(X12), X18
	BEQ	X15, X16, loop32a
	JMP	cmp8a
loop32a:
	BEQ	X17, X18, loop32b
	JMP	cmp8b
loop32b:
	MOV	16(X10), X15
	MOV	16(X12), X16
	MOV	24(X10), X17
	MOV	24(X12), X18
	BEQ	X15, X16, loop32c
	JMP	cmp8a
loop32c:
	BEQ	X17, X18, loop32d
	JMP	cmp8b
loop32d:
	ADD	$32, X10
	ADD	$32, X12
	ADD	$-32, X5
	BGE	X5, X7, loop32
	BEQZ	X5, cmp_len

loop16_check:
	MOV	$16, X6
	BLT	X5, X6, loop4_check
loop16:
	MOV	0(X10), X15
	MOV	0(X12), X16
	MOV	8(X10), X17
	MOV	8(X12), X18
	BEQ	X15, X16, loop16a
	JMP	cmp8a
loop16a:
	BEQ	X17, X18, loop16b
	JMP	cmp8b
loop16b:
	ADD	$16, X10
	ADD	$16, X12
	ADD	$-16, X5
	BGE	X5, X6, loop16
	BEQZ	X5, cmp_len

loop4_check:
	MOV	$4, X6
	BLT	X5, X6, loop1
loop4:
	MOVBU	0(X10), X8
	MOVBU	0(X12), X9
	MOVBU	1(X10), X15
	MOVBU	1(X12), X16
	BEQ	X8, X9, loop4a
	SLTU	X9, X8, X5
	SLTU	X8, X9, X6
	JMP	cmp_ret
loop4a:
	BEQ	X15, X16, loop4b
	SLTU	X16, X15, X5
	SLTU	X15, X16, X6
	JMP	cmp_ret
loop4b:
	MOVBU	2(X10), X21
	MOVBU	2(X12), X22
	MOVBU	3(X10), X23
	MOVBU	3(X12), X24
	BEQ	X21, X22, loop4c
	SLTU	X22, X21, X5
	SLTU	X21, X22, X6
	JMP	cmp_ret
loop4c:
	BEQ	X23, X24, loop4d
	SLTU	X24, X23, X5
	SLTU	X23, X24, X6
	JMP	cmp_ret
loop4d:
	ADD	$4, X10
	ADD	$4, X12
	ADD	$-4, X5
	BGE	X5, X6, loop4

loop1:
	BEQZ	X5, cmp_len
	MOVBU	0(X10), X8
	MOVBU	0(X12), X9
	BNE	X8, X9, cmp
	ADD	$1, X10
	ADD	$1, X12
	ADD	$-1, X5
	JMP	loop1

	// Compare 8 bytes of memory in X15/X16 that are known to differ.
cmp8a:
	MOV	$0xff, X19
cmp8a_loop:
	AND	X15, X19, X8
	AND	X16, X19, X9
	BNE	X8, X9, cmp
	SLLI	$8, X19
	JMP	cmp8a_loop

	// Compare 8 bytes of memory in X17/X18 that are known to differ.
cmp8b:
	MOV	$0xff, X19
cmp8b_loop:
	AND	X17, X19, X8
	AND	X18, X19, X9
	BNE	X8, X9, cmp
	SLLI	$8, X19
	JMP	cmp8b_loop

cmp_len:
	MOV	X11, X8
	MOV	X13, X9
cmp:
	SLTU	X9, X8, X5
	SLTU	X8, X9, X6
cmp_ret:
	SUB	X5, X6, X10
#ifndef GOEXPERIMENT_regabiargs
	MOV	X10, (X14)
#endif
	RET