[release-branch.go1.12] runtime: ensure memmove write pointer atomically on ARM64

If a pointer write is not atomic, if the GC is running concurrently, it may observe a partially updated pointer, which may point to unallocated or already dead memory. Most pointer writes, like the store instructions generated by the compiler, are already atomic. But we still need to be careful in places like memmove. In memmove, we don't know which bits are pointers (or too expensive to query), so we ensure that all aligned pointer-sized units are written atomically. Fixes #36367. Updates #36101. Change-Id: I1b3ca24c6b1ac8a8aaf9ee470115e9a89ec1b00b Reviewed-on: https://go-review.googlesource.com/c/go/+/212626 Reviewed-by: Austin Clements <austin@google.com> (cherry picked from commit ffbc02761abb47106ce88e09290a31513b5f6c8a) Reviewed-on: https://go-review.googlesource.com/c/go/+/213684 Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
author: Cherry Zhang <cherryyz@google.com> 2019-12-27 12:02:00 -0500
committer: Alexander Rakoczy <alex@golang.org> 2020-01-08 22:22:22 +0000
commit: ae78084ae4db4f7728e1615c95dfd91c175c906d (patch)
tree: 28423e6a3f7ebf0a99f8808c3b5ab7864aaacd13
parent: c5af2aa0037b39db801154451a3f70982751d988 (diff)
download: go-ae78084ae4db4f7728e1615c95dfd91c175c906d.tar.gz
go-ae78084ae4db4f7728e1615c95dfd91c175c906d.zip
1 files changed, 37 insertions, 5 deletions
diff --git a/src/runtime/memmove_arm64.s b/src/runtime/memmove_arm64.s
index dcbead8cf4..4b6b4965af 100644
--- a/src/runtime/memmove_arm64.s
+++ b/src/runtime/memmove_arm64.s
@@ -22,7 +22,7 @@ check:
 	CMP	R3, R4
 	BLT	backward
 
-	// Copying forward proceeds by copying R7/8 words then copying R6 bytes.
+	// Copying forward proceeds by copying R7/32 quadwords then R6 <= 31 tail bytes.
 	// R3 and R4 are advanced as we copy.
 
         // (There may be implementations of armv8 where copying by bytes until
@@ -30,11 +30,12 @@ check:
         // optimization, but the on the one tested so far (xgene) it did not
         // make a significance difference.)
 
-	CBZ	R7, noforwardlarge	// Do we need to do any doubleword-by-doubleword copying?
+	CBZ	R7, noforwardlarge	// Do we need to do any quadword copying?
 
 	ADD	R3, R7, R9	// R9 points just past where we copy by word
 
 forwardlargeloop:
+	// Copy 32 bytes at a time.
 	LDP.P	32(R4), (R8, R10)
 	STP.P	(R8, R10), 32(R3)
 	LDP	-16(R4), (R11, R12)
@@ -43,10 +44,26 @@ forwardlargeloop:
 	CBNZ	R7, forwardlargeloop
 
 noforwardlarge:
-	CBNZ	R6, forwardtail		// Do we need to do any byte-by-byte copying?
+	CBNZ	R6, forwardtail		// Do we need to copy any tail bytes?
 	RET
 
 forwardtail:
+	// There are R6 <= 31 bytes remaining to copy.
+	// This is large enough to still contain pointers,
+	// which must be copied atomically.
+	// Copy the next 16 bytes, then 8 bytes, then any remaining bytes.
+	TBZ	$4, R6, 3(PC)	// write 16 bytes if R6&16 != 0
+	LDP.P	16(R4), (R8, R10)
+	STP.P	(R8, R10), 16(R3)
+
+	TBZ	$3, R6, 3(PC)	// write 8 bytes if R6&8 != 0
+	MOVD.P	8(R4), R8
+	MOVD.P	R8, 8(R3)
+
+	AND	$7, R6
+	CBNZ	R6, 2(PC)
+	RET
+
 	ADD	R3, R6, R9	// R9 points just past the destination memory
 
 forwardtailloop:
@@ -90,7 +107,7 @@ copy1:
 	RET
 
 backward:
-	// Copying backwards proceeds by copying R6 bytes then copying R7/8 words.
+	// Copying backwards first copies R6 <= 31 tail bytes, then R7/32 quadwords.
 	// R3 and R4 are advanced to the end of the destination/source buffers
 	// respectively and moved back as we copy.
 
@@ -99,13 +116,28 @@ backward:
 
 	CBZ	R6, nobackwardtail	// Do we need to do any byte-by-byte copying?
 
-	SUB	R6, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
+	AND	$7, R6, R12
+	CBZ	R12, backwardtaillarge
+
+	SUB	R12, R3, R9	// R9 points at the lowest destination byte that should be copied by byte.
 backwardtailloop:
+	// Copy sub-pointer-size tail.
 	MOVBU.W	-1(R4), R8
 	MOVBU.W	R8, -1(R3)
 	CMP	R9, R3
 	BNE	backwardtailloop
 
+backwardtaillarge:
+	// Do 8/16-byte write if possible.
+	// See comment at forwardtail.
+	TBZ	$3, R6, 3(PC)
+	MOVD.W	-8(R4), R8
+	MOVD.W	R8, -8(R3)
+
+	TBZ	$4, R6, 3(PC)
+	LDP.W	-16(R4), (R8, R10)
+	STP.W	(R8, R10), -16(R3)
+
 nobackwardtail:
 	CBNZ     R7, backwardlarge	// Do we need to do any doubleword-by-doubleword copying?
 	RET
author	Cherry Zhang <cherryyz@google.com>	2019-12-27 12:02:00 -0500
committer	Alexander Rakoczy <alex@golang.org>	2020-01-08 22:22:22 +0000
commit	ae78084ae4db4f7728e1615c95dfd91c175c906d (patch)
tree	28423e6a3f7ebf0a99f8808c3b5ab7864aaacd13
parent	c5af2aa0037b39db801154451a3f70982751d988 (diff)
download	go-ae78084ae4db4f7728e1615c95dfd91c175c906d.tar.gz go-ae78084ae4db4f7728e1615c95dfd91c175c906d.zip