aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg/equal_amd64.s
blob: 6f12d2a1690380d5e92326a96f775e799165b6e5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
#ifdef GOEXPERIMENT_regabiargs
	// AX = a    (want in SI)
	// BX = b    (want in DI)
	// CX = size (want in BX)
	CMPQ	AX, BX
	JNE	neq
	MOVQ	$1, AX	// return 1
	RET
neq:
	MOVQ	AX, SI
	MOVQ	BX, DI
	MOVQ	CX, BX
	JMP	memeqbody<>(SB)
#else
	MOVQ	a+0(FP), SI
	MOVQ	b+8(FP), DI
	CMPQ	SI, DI
	JEQ	eq
	MOVQ	size+16(FP), BX
	LEAQ	ret+24(FP), AX
	JMP	memeqbody<>(SB)
eq:
	MOVB	$1, ret+24(FP)
	RET
#endif

// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
#ifdef GOEXPERIMENT_regabiargs
	// AX = a       (want in SI)
	// BX = b       (want in DI)
	// 8(DX) = size (want in BX)
	CMPQ	AX, BX
	JNE	neq
	MOVQ	$1, AX	// return 1
	RET
neq:
	MOVQ	AX, SI
	MOVQ	BX, DI
	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
	JMP	memeqbody<>(SB)
#else
	MOVQ	a+0(FP), SI
	MOVQ	b+8(FP), DI
	CMPQ	SI, DI
	JEQ	eq
	MOVQ	8(DX), BX    // compiler stores size at offset 8 in the closure
	LEAQ	ret+16(FP), AX
	JMP	memeqbody<>(SB)
eq:
	MOVB	$1, ret+16(FP)
	RET
#endif

// Input:
//   a in SI
//   b in DI
//   count in BX
#ifndef GOEXPERIMENT_regabiargs
//   address of result byte in AX
#else
// Output:
//   result in AX
#endif
TEXT memeqbody<>(SB),NOSPLIT,$0-0
	CMPQ	BX, $8
	JB	small
	CMPQ	BX, $64
	JB	bigloop
	CMPB	internalcpu·X86+const_offsetX86HasAVX2(SB), $1
	JE	hugeloop_avx2

	// 64 bytes at a time using xmm registers
hugeloop:
	CMPQ	BX, $64
	JB	bigloop
	MOVOU	(SI), X0
	MOVOU	(DI), X1
	MOVOU	16(SI), X2
	MOVOU	16(DI), X3
	MOVOU	32(SI), X4
	MOVOU	32(DI), X5
	MOVOU	48(SI), X6
	MOVOU	48(DI), X7
	PCMPEQB	X1, X0
	PCMPEQB	X3, X2
	PCMPEQB	X5, X4
	PCMPEQB	X7, X6
	PAND	X2, X0
	PAND	X6, X4
	PAND	X4, X0
	PMOVMSKB X0, DX
	ADDQ	$64, SI
	ADDQ	$64, DI
	SUBQ	$64, BX
	CMPL	DX, $0xffff
	JEQ	hugeloop
#ifdef GOEXPERIMENT_regabiargs
	XORQ	AX, AX	// return 0
#else
	MOVB	$0, (AX)
#endif
	RET

	// 64 bytes at a time using ymm registers
hugeloop_avx2:
	CMPQ	BX, $64
	JB	bigloop_avx2
	VMOVDQU	(SI), Y0
	VMOVDQU	(DI), Y1
	VMOVDQU	32(SI), Y2
	VMOVDQU	32(DI), Y3
	VPCMPEQB	Y1, Y0, Y4
	VPCMPEQB	Y2, Y3, Y5
	VPAND	Y4, Y5, Y6
	VPMOVMSKB Y6, DX
	ADDQ	$64, SI
	ADDQ	$64, DI
	SUBQ	$64, BX
	CMPL	DX, $0xffffffff
	JEQ	hugeloop_avx2
	VZEROUPPER
#ifdef GOEXPERIMENT_regabiargs
	XORQ	AX, AX	// return 0
#else
	MOVB	$0, (AX)
#endif
	RET

bigloop_avx2:
	VZEROUPPER

	// 8 bytes at a time using 64-bit register
bigloop:
	CMPQ	BX, $8
	JBE	leftover
	MOVQ	(SI), CX
	MOVQ	(DI), DX
	ADDQ	$8, SI
	ADDQ	$8, DI
	SUBQ	$8, BX
	CMPQ	CX, DX
	JEQ	bigloop
#ifdef GOEXPERIMENT_regabiargs
	XORQ	AX, AX	// return 0
#else
	MOVB	$0, (AX)
#endif
	RET

	// remaining 0-8 bytes
leftover:
	MOVQ	-8(SI)(BX*1), CX
	MOVQ	-8(DI)(BX*1), DX
	CMPQ	CX, DX
#ifdef GOEXPERIMENT_regabiargs
	SETEQ	AX
#else
	SETEQ	(AX)
#endif
	RET

small:
	CMPQ	BX, $0
	JEQ	equal

	LEAQ	0(BX*8), CX
	NEGQ	CX

	CMPB	SI, $0xf8
	JA	si_high

	// load at SI won't cross a page boundary.
	MOVQ	(SI), SI
	JMP	si_finish
si_high:
	// address ends in 11111xxx. Load up to bytes we want, move to correct position.
	MOVQ	-8(SI)(BX*1), SI
	SHRQ	CX, SI
si_finish:

	// same for DI.
	CMPB	DI, $0xf8
	JA	di_high
	MOVQ	(DI), DI
	JMP	di_finish
di_high:
	MOVQ	-8(DI)(BX*1), DI
	SHRQ	CX, DI
di_finish:

	SUBQ	SI, DI
	SHLQ	CX, DI
equal:
#ifdef GOEXPERIMENT_regabiargs
	SETEQ	AX
#else
	SETEQ	(AX)
#endif
	RET