internal/bytealg: move IndexByte asssembly to the new bytealg package

Move the IndexByte function from the runtime to a new bytealg package. The new package will eventually hold all the optimized assembly for groveling through byte slices and strings. It seems a better home for this code than randomly keeping it in runtime. Once this is in, the next step is to move the other functions (Compare, Equal, ...). Update #19792 This change seems complicated enough that we might just declare "not worth it" and abandon. Opinions welcome. The core assembly is all unchanged, except minor modifications where the code reads cpu feature bits. The wrapper functions have been cleaned up as they are now actually checked by vet. Change-Id: I9fa75bee5d85db3a65b3fd3b7997e60367523796 Reviewed-on: https://go-review.googlesource.com/98016 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
author: Keith Randall <khr@google.com> 2018-03-01 16:38:41 -0800
committer: Keith Randall <khr@golang.org> 2018-03-02 22:46:15 +0000
commit: 403ab0f2214f583db84a2dae275389be92072a35 (patch)
tree: 9ff2fc82235a27acd95595f2152f03eec83fa71e /src/runtime/asm_arm64.s
parent: dcedcaa5fb15748cc2e5cb9461fc6b4f4fc942cb (diff)
download: go-403ab0f2214f583db84a2dae275389be92072a35.tar.gz
go-403ab0f2214f583db84a2dae275389be92072a35.zip
1 files changed, 0 insertions, 120 deletions
diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s
index 2e08013097..6abb9945e2 100644
--- a/src/runtime/asm_arm64.s
+++ b/src/runtime/asm_arm64.s
@@ -800,126 +800,6 @@ samebytes:
 //
 // functions for other packages
 //
-TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
-	MOVD	b+0(FP), R0
-	MOVD	b_len+8(FP), R2
-	MOVBU	c+24(FP), R1
-	MOVD	$ret+32(FP), R8
-	B	runtime·indexbytebody<>(SB)
-
-TEXT strings·IndexByte(SB),NOSPLIT,$0-32
-	MOVD	s+0(FP), R0
-	MOVD	s_len+8(FP), R2
-	MOVBU	c+16(FP), R1
-	MOVD	$ret+24(FP), R8
-	B	runtime·indexbytebody<>(SB)
-
-// input:
-//   R0: data
-//   R1: byte to search
-//   R2: data len
-//   R8: address to put result
-TEXT runtime·indexbytebody<>(SB),NOSPLIT,$0
-	// Core algorithm:
-	// For each 32-byte chunk we calculate a 64-bit syndrome value,
-	// with two bits per byte. For each tuple, bit 0 is set if the
-	// relevant byte matched the requested character and bit 1 is
-	// not used (faster than using a 32bit syndrome). Since the bits
-	// in the syndrome reflect exactly the order in which things occur
-	// in the original string, counting trailing zeros allows to
-	// identify exactly which byte has matched.
-
-	CBZ	R2, fail
-	MOVD	R0, R11
-	// Magic constant 0x40100401 allows us to identify
-	// which lane matches the requested byte.
-	// 0x40100401 = ((1<<0) + (4<<8) + (16<<16) + (64<<24))
-	// Different bytes have different bit masks (i.e: 1, 4, 16, 64)
-	MOVD	$0x40100401, R5
-	VMOV	R1, V0.B16
-	// Work with aligned 32-byte chunks
-	BIC	$0x1f, R0, R3
-	VMOV	R5, V5.S4
-	ANDS	$0x1f, R0, R9
-	AND	$0x1f, R2, R10
-	BEQ	loop
-
-	// Input string is not 32-byte aligned. We calculate the
-	// syndrome value for the aligned 32 bytes block containing
-	// the first bytes and mask off the irrelevant part.
-	VLD1.P	(R3), [V1.B16, V2.B16]
-	SUB	$0x20, R9, R4
-	ADDS	R4, R2, R2
-	VCMEQ	V0.B16, V1.B16, V3.B16
-	VCMEQ	V0.B16, V2.B16, V4.B16
-	VAND	V5.B16, V3.B16, V3.B16
-	VAND	V5.B16, V4.B16, V4.B16
-	VADDP	V4.B16, V3.B16, V6.B16 // 256->128
-	VADDP	V6.B16, V6.B16, V6.B16 // 128->64
-	VMOV	V6.D[0], R6
-	// Clear the irrelevant lower bits
-	LSL	$1, R9, R4
-	LSR	R4, R6, R6
-	LSL	R4, R6, R6
-	// The first block can also be the last
-	BLS	masklast
-	// Have we found something already?
-	CBNZ	R6, tail
-
-loop:
-	VLD1.P	(R3), [V1.B16, V2.B16]
-	SUBS	$0x20, R2, R2
-	VCMEQ	V0.B16, V1.B16, V3.B16
-	VCMEQ	V0.B16, V2.B16, V4.B16
-	// If we're out of data we finish regardless of the result
-	BLS	end
-	// Use a fast check for the termination condition
-	VORR	V4.B16, V3.B16, V6.B16
-	VADDP	V6.D2, V6.D2, V6.D2
-	VMOV	V6.D[0], R6
-	// We're not out of data, loop if we haven't found the character
-	CBZ	R6, loop
-
-end:
-	// Termination condition found, let's calculate the syndrome value
-	VAND	V5.B16, V3.B16, V3.B16
-	VAND	V5.B16, V4.B16, V4.B16
-	VADDP	V4.B16, V3.B16, V6.B16
-	VADDP	V6.B16, V6.B16, V6.B16
-	VMOV	V6.D[0], R6
-	// Only do the clear for the last possible block with less than 32 bytes
-	// Condition flags come from SUBS in the loop
-	BHS	tail
-
-masklast:
-	// Clear the irrelevant upper bits
-	ADD	R9, R10, R4
-	AND	$0x1f, R4, R4
-	SUB	$0x20, R4, R4
-	NEG	R4<<1, R4
-	LSL	R4, R6, R6
-	LSR	R4, R6, R6
-
-tail:
-	// Check that we have found a character
-	CBZ	R6, fail
-	// Count the trailing zeros using bit reversing
-	RBIT	R6, R6
-	// Compensate the last post-increment
-	SUB	$0x20, R3, R3
-	// And count the leading zeros
-	CLZ	R6, R6
-	// R6 is twice the offset into the fragment
-	ADD	R6>>1, R3, R0
-	// Compute the offset result
-	SUB	R11, R0, R0
-	MOVD	R0, (R8)
-	RET
-
-fail:
-	MOVD	$-1, R0
-	MOVD	R0, (R8)
-	RET
 
 // Equal(a, b []byte) bool
 TEXT bytes·Equal(SB),NOSPLIT,$0-49
author	Keith Randall <khr@google.com>	2018-03-01 16:38:41 -0800
committer	Keith Randall <khr@golang.org>	2018-03-02 22:46:15 +0000
commit	403ab0f2214f583db84a2dae275389be92072a35 (patch)
tree	9ff2fc82235a27acd95595f2152f03eec83fa71e /src/runtime/asm_arm64.s
parent	dcedcaa5fb15748cc2e5cb9461fc6b4f4fc942cb (diff)
download	go-403ab0f2214f583db84a2dae275389be92072a35.tar.gz go-403ab0f2214f583db84a2dae275389be92072a35.zip