1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "go_asm.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT,$0-25
#ifdef GOEXPERIMENT_regabiargs
// AX = a (want in SI)
// BX = b (want in DI)
// CX = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ CX, BX
JMP memeqbody<>(SB)
#else
MOVQ a+0(FP), SI
MOVQ b+8(FP), DI
CMPQ SI, DI
JEQ eq
MOVQ size+16(FP), BX
LEAQ ret+24(FP), AX
JMP memeqbody<>(SB)
eq:
MOVB $1, ret+24(FP)
RET
#endif
// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
#ifdef GOEXPERIMENT_regabiargs
// AX = a (want in SI)
// BX = b (want in DI)
// 8(DX) = size (want in BX)
CMPQ AX, BX
JNE neq
MOVQ $1, AX // return 1
RET
neq:
MOVQ AX, SI
MOVQ BX, DI
MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
JMP memeqbody<>(SB)
#else
MOVQ a+0(FP), SI
MOVQ b+8(FP), DI
CMPQ SI, DI
JEQ eq
MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
LEAQ ret+16(FP), AX
JMP memeqbody<>(SB)
eq:
MOVB $1, ret+16(FP)
RET
#endif
// Input:
// a in SI
// b in DI
// count in BX
#ifndef GOEXPERIMENT_regabiargs
// address of result byte in AX
#else
// Output:
// result in AX
#endif
TEXT memeqbody<>(SB),NOSPLIT,$0-0
CMPQ BX, $8
JB small
CMPQ BX, $64
JB bigloop
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
// 64 bytes at a time using xmm registers
hugeloop:
CMPQ BX, $64
JB bigloop
MOVOU (SI), X0
MOVOU (DI), X1
MOVOU 16(SI), X2
MOVOU 16(DI), X3
MOVOU 32(SI), X4
MOVOU 32(DI), X5
MOVOU 48(SI), X6
MOVOU 48(DI), X7
PCMPEQB X1, X0
PCMPEQB X3, X2
PCMPEQB X5, X4
PCMPEQB X7, X6
PAND X2, X0
PAND X6, X4
PAND X4, X0
PMOVMSKB X0, DX
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, BX
CMPL DX, $0xffff
JEQ hugeloop
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX)
#endif
RET
// 64 bytes at a time using ymm registers
hugeloop_avx2:
CMPQ BX, $64
JB bigloop_avx2
VMOVDQU (SI), Y0
VMOVDQU (DI), Y1
VMOVDQU 32(SI), Y2
VMOVDQU 32(DI), Y3
VPCMPEQB Y1, Y0, Y4
VPCMPEQB Y2, Y3, Y5
VPAND Y4, Y5, Y6
VPMOVMSKB Y6, DX
ADDQ $64, SI
ADDQ $64, DI
SUBQ $64, BX
CMPL DX, $0xffffffff
JEQ hugeloop_avx2
VZEROUPPER
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX)
#endif
RET
bigloop_avx2:
VZEROUPPER
// 8 bytes at a time using 64-bit register
bigloop:
CMPQ BX, $8
JBE leftover
MOVQ (SI), CX
MOVQ (DI), DX
ADDQ $8, SI
ADDQ $8, DI
SUBQ $8, BX
CMPQ CX, DX
JEQ bigloop
#ifdef GOEXPERIMENT_regabiargs
XORQ AX, AX // return 0
#else
MOVB $0, (AX)
#endif
RET
// remaining 0-8 bytes
leftover:
MOVQ -8(SI)(BX*1), CX
MOVQ -8(DI)(BX*1), DX
CMPQ CX, DX
#ifdef GOEXPERIMENT_regabiargs
SETEQ AX
#else
SETEQ (AX)
#endif
RET
small:
CMPQ BX, $0
JEQ equal
LEAQ 0(BX*8), CX
NEGQ CX
CMPB SI, $0xf8
JA si_high
// load at SI won't cross a page boundary.
MOVQ (SI), SI
JMP si_finish
si_high:
// address ends in 11111xxx. Load up to bytes we want, move to correct position.
MOVQ -8(SI)(BX*1), SI
SHRQ CX, SI
si_finish:
// same for DI.
CMPB DI, $0xf8
JA di_high
MOVQ (DI), DI
JMP di_finish
di_high:
MOVQ -8(DI)(BX*1), DI
SHRQ CX, DI
di_finish:
SUBQ SI, DI
SHLQ CX, DI
equal:
#ifdef GOEXPERIMENT_regabiargs
SETEQ AX
#else
SETEQ (AX)
#endif
RET
|