cmd/asm: correct, complete newly added AVX instructions

Use the standard names, for discoverability. Use the standard register arguments, for correctness. Implement all possible arguments, for completeness. Enable the corresponding tests now that everything is standard. Update the uses in package runtime. Fixes #14068. Change-Id: I8e1af9a41e7d02d98c2a82af3d4cdb3e9204824f Reviewed-on: https://go-review.googlesource.com/18852 Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Rob Pike <r@golang.org>
author: Russ Cox <rsc@golang.org> 2016-01-22 22:25:15 -0500
committer: Russ Cox <rsc@golang.org> 2016-01-24 13:55:18 +0000
commit: 8d881b811d8212ffd1d43e296f2a1c1bf78198ab (patch)
tree: f1ccb58e27f036d1ac432d34340577332b964797 /src/runtime/memclr_amd64.s
parent: 7f620a57d01ec4230a69c4ee96d3809cfd6febab (diff)
download: go-8d881b811d8212ffd1d43e296f2a1c1bf78198ab.tar.gz
go-8d881b811d8212ffd1d43e296f2a1c1bf78198ab.zip
1 files changed, 18 insertions, 18 deletions
diff --git a/src/runtime/memclr_amd64.s b/src/runtime/memclr_amd64.s
index 5e78037df6..c257d59b30 100644
--- a/src/runtime/memclr_amd64.s
+++ b/src/runtime/memclr_amd64.s
@@ -65,40 +65,40 @@ loop:
 	JMP	tail
 
 loop_preheader_avx2:
-	VPXOR X0, X0, X0
+	VPXOR Y0, Y0, Y0
 	// For smaller sizes MOVNTDQ may be faster or slower depending on hardware.
 	// For larger sizes it is always faster, even on dual Xeons with 30M cache.
 	// TODO take into account actual LLC size. E. g. glibc uses LLC size/2.
 	CMPQ    BX, $0x2000000
 	JAE     loop_preheader_avx2_huge
 loop_avx2:
-	MOVHDU	X0, 0(DI)
-	MOVHDU	X0, 32(DI)
-	MOVHDU	X0, 64(DI)
-	MOVHDU	X0, 96(DI)
+	VMOVDQU	Y0, 0(DI)
+	VMOVDQU	Y0, 32(DI)
+	VMOVDQU	Y0, 64(DI)
+	VMOVDQU	Y0, 96(DI)
 	SUBQ	$128, BX
 	ADDQ	$128, DI
 	CMPQ	BX, $128
 	JAE	loop_avx2
-	MOVHDU  X0, -32(DI)(BX*1)
-	MOVHDU  X0, -64(DI)(BX*1)
-	MOVHDU  X0, -96(DI)(BX*1)
-	MOVHDU  X0, -128(DI)(BX*1)
+	VMOVDQU  Y0, -32(DI)(BX*1)
+	VMOVDQU  Y0, -64(DI)(BX*1)
+	VMOVDQU  Y0, -96(DI)(BX*1)
+	VMOVDQU  Y0, -128(DI)(BX*1)
 	VZEROUPPER
 	RET
 loop_preheader_avx2_huge:
 	// Align to 32 byte boundary
-	MOVHDU  X0, 0(DI)
+	VMOVDQU  Y0, 0(DI)
 	MOVQ	DI, SI
 	ADDQ	$32, DI
 	ANDQ	$~31, DI
 	SUBQ	DI, SI
 	ADDQ	SI, BX
 loop_avx2_huge:
-	MOVNTHD	X0, 0(DI)
-	MOVNTHD	X0, 32(DI)
-	MOVNTHD	X0, 64(DI)
-	MOVNTHD	X0, 96(DI)
+	VMOVNTDQ	Y0, 0(DI)
+	VMOVNTDQ	Y0, 32(DI)
+	VMOVNTDQ	Y0, 64(DI)
+	VMOVNTDQ	Y0, 96(DI)
 	SUBQ	$128, BX
 	ADDQ	$128, DI
 	CMPQ	BX, $128
@@ -108,10 +108,10 @@ loop_avx2_huge:
 	// should be used in conjunction with MOVNTDQ instructions..."
 	// [1] 64-ia-32-architectures-software-developer-manual-325462.pdf
 	SFENCE
-	MOVHDU  X0, -32(DI)(BX*1)
-	MOVHDU  X0, -64(DI)(BX*1)
-	MOVHDU  X0, -96(DI)(BX*1)
-	MOVHDU  X0, -128(DI)(BX*1)
+	VMOVDQU  Y0, -32(DI)(BX*1)
+	VMOVDQU  Y0, -64(DI)(BX*1)
+	VMOVDQU  Y0, -96(DI)(BX*1)
+	VMOVDQU  Y0, -128(DI)(BX*1)
 	VZEROUPPER
 	RET
author	Russ Cox <rsc@golang.org>	2016-01-22 22:25:15 -0500
committer	Russ Cox <rsc@golang.org>	2016-01-24 13:55:18 +0000
commit	8d881b811d8212ffd1d43e296f2a1c1bf78198ab (patch)
tree	f1ccb58e27f036d1ac432d34340577332b964797 /src/runtime/memclr_amd64.s
parent	7f620a57d01ec4230a69c4ee96d3809cfd6febab (diff)
download	go-8d881b811d8212ffd1d43e296f2a1c1bf78198ab.tar.gz go-8d881b811d8212ffd1d43e296f2a1c1bf78198ab.zip