aboutsummaryrefslogtreecommitdiff
path: root/src/internal/bytealg/equal_arm64.s
blob: cf5cf54e5971a33223f909696503c1bace31baea (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "go_asm.h"
#include "textflag.h"

// memequal(a, b unsafe.Pointer, size uintptr) bool
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
#ifndef GOEXPERIMENT_regabiargs
	MOVD	size+16(FP), R2
#endif
	// short path to handle 0-byte case
	CBZ	R2, equal
#ifndef GOEXPERIMENT_regabiargs
	MOVD	a+0(FP), R0
	MOVD	b+8(FP), R1
	MOVD	$ret+24(FP), R8
#endif
	B	memeqbody<>(SB)
equal:
	MOVD	$1, R0
#ifndef GOEXPERIMENT_regabiargs
	MOVB	R0, ret+24(FP)
#endif
	RET

// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
#ifndef GOEXPERIMENT_regabiargs
	MOVD	a+0(FP), R0
	MOVD	b+8(FP), R1
#endif
	CMP	R0, R1
	BEQ	eq
	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
	CBZ	R2, eq
#ifndef GOEXPERIMENT_regabiargs
	MOVD	$ret+16(FP), R8
#endif
	B	memeqbody<>(SB)
eq:
	MOVD	$1, R0
#ifndef GOEXPERIMENT_regabiargs
	MOVB	R0, ret+16(FP)
#endif
	RET

// input:
// R0: pointer a
// R1: pointer b
// R2: data len
#ifdef GOEXPERIMENT_regabiargs
// at return: result in R0
#else
// R8: address to put result
#endif

TEXT memeqbody<>(SB),NOSPLIT,$0
	CMP	$1, R2
	// handle 1-byte special case for better performance
	BEQ	one
	CMP	$16, R2
	// handle specially if length < 16
	BLO	tail
	BIC	$0x3f, R2, R3
	CBZ	R3, chunk16
	// work with 64-byte chunks
	ADD	R3, R0, R6	// end of chunks
chunk64_loop:
	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
	VCMEQ	V0.D2, V4.D2, V8.D2
	VCMEQ	V1.D2, V5.D2, V9.D2
	VCMEQ	V2.D2, V6.D2, V10.D2
	VCMEQ	V3.D2, V7.D2, V11.D2
	VAND	V8.B16, V9.B16, V8.B16
	VAND	V8.B16, V10.B16, V8.B16
	VAND	V8.B16, V11.B16, V8.B16
	CMP	R0, R6
	VMOV	V8.D[0], R4
	VMOV	V8.D[1], R5
	CBZ	R4, not_equal
	CBZ	R5, not_equal
	BNE	chunk64_loop
	AND	$0x3f, R2, R2
	CBZ	R2, equal
chunk16:
	// work with 16-byte chunks
	BIC	$0xf, R2, R3
	CBZ	R3, tail
	ADD	R3, R0, R6	// end of chunks
chunk16_loop:
	LDP.P	16(R0), (R4, R5)
	LDP.P	16(R1), (R7, R9)
	EOR	R4, R7
	CBNZ	R7, not_equal
	EOR	R5, R9
	CBNZ	R9, not_equal
	CMP	R0, R6
	BNE	chunk16_loop
	AND	$0xf, R2, R2
	CBZ	R2, equal
tail:
	// special compare of tail with length < 16
	TBZ	$3, R2, lt_8
	MOVD	(R0), R4
	MOVD	(R1), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	SUB	$8, R2, R6	// offset of the last 8 bytes
	MOVD	(R0)(R6), R4
	MOVD	(R1)(R6), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	B	equal
lt_8:
	TBZ	$2, R2, lt_4
	MOVWU	(R0), R4
	MOVWU	(R1), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	SUB	$4, R2, R6	// offset of the last 4 bytes
	MOVWU	(R0)(R6), R4
	MOVWU	(R1)(R6), R5
	EOR	R4, R5
	CBNZ	R5, not_equal
	B	equal
lt_4:
	TBZ	$1, R2, lt_2
	MOVHU.P	2(R0), R4
	MOVHU.P	2(R1), R5
	CMP	R4, R5
	BNE	not_equal
lt_2:
	TBZ	$0, R2, equal
one:
	MOVBU	(R0), R4
	MOVBU	(R1), R5
	CMP	R4, R5
	BNE	not_equal
equal:
	MOVD	$1, R0
#ifndef GOEXPERIMENT_regabiargs
	MOVB	R0, (R8)
#endif
	RET
not_equal:
#ifdef GOEXPERIMENT_regabiargs
	MOVB	ZR, R0
#else
	MOVB	ZR, (R8)
#endif
	RET