aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg/compare_ppc64x.s
blob: fc6f170ca81e2cb08859e8596e0c0c38f6bd4e4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build ppc64 || ppc64le

#include "go_asm.h"
#include "textflag.h"

TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
	// incoming:
	// R3 a addr -> R5
	// R4 a len  -> R3
	// R5 a cap unused
	// R6 b addr -> R6
	// R7 b len  -> R4
	// R8 b cap unused
	MOVD	R3, R5
	MOVD	R4, R3
	MOVD	R7, R4
	CMP     R5,R6,CR7
	CMP	R3,R4,CR6
	BEQ	CR7,equal
#ifdef	GOARCH_ppc64le
	BR	cmpbodyLE<>(SB)
#else
	BR      cmpbodyBE<>(SB)
#endif
equal:
	BEQ	CR6,done
	MOVD	$1, R8
	BGT	CR6,greater
	NEG	R8
greater:
	MOVD	R8, R3
	RET
done:
	MOVD	$0, R3
	RET

TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
	// incoming:
	// R3 a addr -> R5
	// R4 a len  -> R3
	// R5 b addr -> R6
	// R6 b len  -> R4
	MOVD	R6, R7
	MOVD	R5, R6
	MOVD	R3, R5
	MOVD	R4, R3
	MOVD	R7, R4
	CMP     R5,R6,CR7
	CMP	R3,R4,CR6
	BEQ	CR7,equal
#ifdef	GOARCH_ppc64le
	BR	cmpbodyLE<>(SB)
#else
	BR      cmpbodyBE<>(SB)
#endif
equal:
	BEQ	CR6,done
	MOVD	$1, R8
	BGT	CR6,greater
	NEG	R8
greater:
	MOVD	R8, R3
	RET

done:
	MOVD	$0, R3
	RET

// Do an efficient memcmp for ppc64le
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// On exit:
// R3 = return value
TEXT cmpbodyLE<>(SB),NOSPLIT|NOFRAME,$0-0
	MOVD	R3,R8		// set up length
	CMP	R3,R4,CR2	// unequal?
	BC	12,8,setuplen	// BLT CR2
	MOVD	R4,R8		// use R4 for comparison len
setuplen:
	MOVD	R8,CTR		// set up loop counter
	CMP	R8,$8		// only optimize >=8
	BLT	simplecheck
	DCBT	(R5)		// cache hint
	DCBT	(R6)
	CMP	R8,$32		// optimize >= 32
	MOVD	R8,R9
	BLT	setup8a		// 8 byte moves only
setup32a:
	SRADCC	$5,R8,R9	// number of 32 byte chunks
	MOVD	R9,CTR

	// Special processing for 32 bytes or longer.
	// Loading this way is faster and correct as long as the
	// doublewords being compared are equal. Once they
	// are found unequal, reload them in proper byte order
	// to determine greater or less than.
loop32a:
	MOVD	0(R5),R9	// doublewords to compare
	MOVD	0(R6),R10	// get 4 doublewords
	MOVD	8(R5),R14
	MOVD	8(R6),R15
	CMPU	R9,R10		// bytes equal?
	MOVD	$0,R16		// set up for cmpne
	BNE	cmpne		// further compare for LT or GT
	MOVD	16(R5),R9	// get next pair of doublewords
	MOVD	16(R6),R10
	CMPU	R14,R15		// bytes match?
	MOVD	$8,R16		// set up for cmpne
	BNE	cmpne		// further compare for LT or GT
	MOVD	24(R5),R14	// get next pair of doublewords
	MOVD    24(R6),R15
	CMPU	R9,R10		// bytes match?
	MOVD	$16,R16		// set up for cmpne
	BNE	cmpne		// further compare for LT or GT
	MOVD	$-8,R16		// for cmpne, R5,R6 already inc by 32
	ADD	$32,R5		// bump up to next 32
	ADD	$32,R6
	CMPU    R14,R15		// bytes match?
	BC	8,2,loop32a	// br ctr and cr
	BNE	cmpne
	ANDCC	$24,R8,R9	// Any 8 byte chunks?
	BEQ	leftover	// and result is 0
setup8a:
	SRADCC	$3,R9,R9	// get the 8 byte count
	BEQ	leftover	// shifted value is 0
	MOVD	R9,CTR		// loop count for doublewords
loop8:
	MOVDBR	(R5+R0),R9	// doublewords to compare
	MOVDBR	(R6+R0),R10	// LE compare order
	ADD	$8,R5
	ADD	$8,R6
	CMPU	R9,R10		// match?
	BC	8,2,loop8	// bt ctr <> 0 && cr
	BGT	greater
	BLT	less
leftover:
	ANDCC	$7,R8,R9	// check for leftover bytes
	MOVD	R9,CTR		// save the ctr
	BNE	simple		// leftover bytes
	BC	12,10,equal	// test CR2 for length comparison
	BC	12,8,less
	BR	greater
simplecheck:
	CMP	R8,$0		// remaining compare length 0
	BNE	simple		// do simple compare
	BC	12,10,equal	// test CR2 for length comparison
	BC	12,8,less	// 1st len < 2nd len, result less
	BR	greater		// 1st len > 2nd len must be greater
simple:
	MOVBZ	0(R5), R9	// get byte from 1st operand
	ADD	$1,R5
	MOVBZ	0(R6), R10	// get byte from 2nd operand
	ADD	$1,R6
	CMPU	R9, R10
	BC	8,2,simple	// bc ctr <> 0 && cr
	BGT	greater		// 1st > 2nd
	BLT	less		// 1st < 2nd
	BC	12,10,equal	// test CR2 for length comparison
	BC	12,9,greater	// 2nd len > 1st len
	BR	less		// must be less
cmpne:				// only here is not equal
	MOVDBR	(R5+R16),R8	// reload in reverse order
	MOVDBR	(R6+R16),R9
	CMPU	R8,R9		// compare correct endianness
	BGT	greater		// here only if NE
less:
	MOVD	$-1, R3		// return value if A < B
	RET
equal:
	MOVD	$0, R3		// return value if A == B
	RET
greater:
	MOVD	$1, R3		// return value if A > B
	RET

// Do an efficient memcmp for ppc64 (BE)
// R3 = a len
// R4 = b len
// R5 = a addr
// R6 = b addr
// On exit:
// R3 = return value
TEXT cmpbodyBE<>(SB),NOSPLIT|NOFRAME,$0-0
	MOVD	R3,R8		// set up length
	CMP	R3,R4,CR2	// unequal?
	BC	12,8,setuplen	// BLT CR2
	MOVD	R4,R8		// use R4 for comparison len
setuplen:
	MOVD	R8,CTR		// set up loop counter
	CMP	R8,$8		// only optimize >=8
	BLT	simplecheck
	DCBT	(R5)		// cache hint
	DCBT	(R6)
	CMP	R8,$32		// optimize >= 32
	MOVD	R8,R9
	BLT	setup8a		// 8 byte moves only

setup32a:
	SRADCC	$5,R8,R9	// number of 32 byte chunks
	MOVD	R9,CTR
loop32a:
	MOVD	0(R5),R9	// doublewords to compare
	MOVD	0(R6),R10	// get 4 doublewords
	MOVD	8(R5),R14
	MOVD	8(R6),R15
	CMPU	R9,R10		// bytes equal?
	BLT	less		// found to be less
	BGT	greater		// found to be greater
	MOVD	16(R5),R9	// get next pair of doublewords
	MOVD	16(R6),R10
	CMPU	R14,R15		// bytes match?
	BLT	less		// found less
	BGT	greater		// found greater
	MOVD	24(R5),R14	// get next pair of doublewords
	MOVD	24(R6),R15
	CMPU	R9,R10		// bytes match?
	BLT	less		// found to be less
	BGT	greater		// found to be greater
	ADD	$32,R5		// bump up to next 32
	ADD	$32,R6
	CMPU	R14,R15		// bytes match?
	BC	8,2,loop32a	// br ctr and cr
	BLT	less		// with BE, byte ordering is
	BGT	greater		// good for compare
	ANDCC	$24,R8,R9	// Any 8 byte chunks?
	BEQ	leftover	// and result is 0
setup8a:
	SRADCC	$3,R9,R9	// get the 8 byte count
	BEQ	leftover	// shifted value is 0
	MOVD	R9,CTR		// loop count for doublewords
loop8:
	MOVD	(R5),R9
	MOVD	(R6),R10
	ADD	$8,R5
	ADD	$8,R6
	CMPU	R9,R10		// match?
	BC	8,2,loop8	// bt ctr <> 0 && cr
	BGT	greater
	BLT	less
leftover:
	ANDCC	$7,R8,R9	// check for leftover bytes
	MOVD	R9,CTR		// save the ctr
	BNE	simple		// leftover bytes
	BC	12,10,equal	// test CR2 for length comparison
	BC	12,8,less
	BR	greater
simplecheck:
	CMP	R8,$0		// remaining compare length 0
	BNE	simple		// do simple compare
	BC	12,10,equal	// test CR2 for length comparison
	BC 	12,8,less	// 1st len < 2nd len, result less
	BR	greater		// same len, must be equal
simple:
	MOVBZ	0(R5),R9	// get byte from 1st operand
	ADD	$1,R5
	MOVBZ	0(R6),R10	// get byte from 2nd operand
	ADD	$1,R6
	CMPU	R9,R10
	BC	8,2,simple	// bc ctr <> 0 && cr
	BGT	greater		// 1st > 2nd
	BLT	less		// 1st < 2nd
	BC	12,10,equal	// test CR2 for length comparison
	BC	12,9,greater	// 2nd len > 1st len
less:
	MOVD	$-1, R3		// return value if A < B
	RET
equal:
	MOVD	$0, R3		// return value if A == B
	RET
greater:
	MOVD	$1, R3		// return value if A > B
	RET