[release-branch.go1.7] hash/crc32: fix optimized s390x implementation

The code wasn't checking to see if the data was still >= 64 bytes long after aligning it. Aligning the data is an optimization and we don't actually need to do it. In fact for smaller sizes it slows things down due to the overhead of calling the generic function. Therefore for now I have simply removed the alignment stage. I have also added a check into the assembly to deliberately trigger a segmentation fault if the data is too short. Fixes #16779. Change-Id: Ic01636d775efc5ec97689f050991cee04ce8fe73 Reviewed-on: https://go-review.googlesource.com/27409 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org> Reviewed-on: https://go-review.googlesource.com/28635
author: Michael Munday <munday@ca.ibm.com> 2016-08-20 21:09:53 -0400
committer: Chris Broadfoot <cbro@golang.org> 2016-09-07 17:46:02 +0000
commit: a5add8c7260b5b7d49488eaf60ab49804df095e6 (patch)
tree: dcc4399bec2735cb170fcf69d9a0c8e9cd22f724
parent: e464c08beffc87cb064e48932699042b3dccc491 (diff)
download: go-a5add8c7260b5b7d49488eaf60ab49804df095e6.tar.gz
go-a5add8c7260b5b7d49488eaf60ab49804df095e6.zip
2 files changed, 15 insertions, 27 deletions
diff --git a/src/hash/crc32/crc32_s390x.go b/src/hash/crc32/crc32_s390x.go
index 2f20690389..befb58f55f 100644
--- a/src/hash/crc32/crc32_s390x.go
+++ b/src/hash/crc32/crc32_s390x.go
@@ -4,14 +4,9 @@
 
 package crc32
 
-import (
-	"unsafe"
-)
-
 const (
 	vxMinLen    = 64
-	vxAlignment = 16
-	vxAlignMask = vxAlignment - 1
+	vxAlignMask = 15 // align to 16 bytes
 )
 
 // hasVectorFacility reports whether the machine has the z/Architecture
@@ -49,20 +44,13 @@ func genericIEEE(crc uint32, p []byte) uint32 {
 	return update(crc, IEEETable, p)
 }
 
-// updateCastagnoli calculates the checksum of p using genericCastagnoli to
-// align the data appropriately for vectorCastagnoli. It avoids using
-// vectorCastagnoli entirely if the length of p is less than or equal to
-// vxMinLen.
+// updateCastagnoli calculates the checksum of p using
+// vectorizedCastagnoli if possible and falling back onto
+// genericCastagnoli as needed.
 func updateCastagnoli(crc uint32, p []byte) uint32 {
 	// Use vectorized function if vector facility is available and
 	// data length is above threshold.
-	if hasVX && len(p) > vxMinLen {
-		pAddr := uintptr(unsafe.Pointer(&p[0]))
-		if pAddr&vxAlignMask != 0 {
-			prealign := vxAlignment - int(pAddr&vxAlignMask)
-			crc = genericCastagnoli(crc, p[:prealign])
-			p = p[prealign:]
-		}
+	if hasVX && len(p) >= vxMinLen {
 		aligned := len(p) & ^vxAlignMask
 		crc = vectorizedCastagnoli(crc, p[:aligned])
 		p = p[aligned:]
@@ -75,19 +63,12 @@ func updateCastagnoli(crc uint32, p []byte) uint32 {
 	return genericCastagnoli(crc, p)
 }
 
-// updateIEEE calculates the checksum of p using genericIEEE to align the data
-// appropriately for vectorIEEE. It avoids using vectorIEEE entirely if the length
-// of p is less than or equal to vxMinLen.
+// updateIEEE calculates the checksum of p using vectorizedIEEE if
+// possible and falling back onto genericIEEE as needed.
 func updateIEEE(crc uint32, p []byte) uint32 {
 	// Use vectorized function if vector facility is available and
 	// data length is above threshold.
-	if hasVX && len(p) > vxMinLen {
-		pAddr := uintptr(unsafe.Pointer(&p[0]))
-		if pAddr&vxAlignMask != 0 {
-			prealign := vxAlignment - int(pAddr&vxAlignMask)
-			crc = genericIEEE(crc, p[:prealign])
-			p = p[prealign:]
-		}
+	if hasVX && len(p) >= vxMinLen {
 		aligned := len(p) & ^vxAlignMask
 		crc = vectorizedIEEE(crc, p[:aligned])
 		p = p[aligned:]
diff --git a/src/hash/crc32/crc32_s390x.s b/src/hash/crc32/crc32_s390x.s
index f8d39f3df9..0b830531f7 100644
--- a/src/hash/crc32/crc32_s390x.s
+++ b/src/hash/crc32/crc32_s390x.s
@@ -128,6 +128,10 @@ TEXT vectorizedBody<>(SB),NOSPLIT,$0
 	VZERO   V0
 	VLVGF   $3, R2, V0
 
+	// Crash if the input size is less than 64-bytes.
+	CMP     R4, $64
+	BLT     crash
+
 	// Load a 64-byte data chunk and XOR with CRC
 	VLM     0(R3), V1, V4    // 64-bytes into V1..V4
 
@@ -243,3 +247,6 @@ done:
 	XOR     $0xffffffff, R2 // NOTW R2
 	MOVWZ   R2, ret + 32(FP)
 	RET
+
+crash:
+	MOVD    $0, (R0) // input size is less than 64-bytes
author	Michael Munday <munday@ca.ibm.com>	2016-08-20 21:09:53 -0400
committer	Chris Broadfoot <cbro@golang.org>	2016-09-07 17:46:02 +0000
commit	a5add8c7260b5b7d49488eaf60ab49804df095e6 (patch)
tree	dcc4399bec2735cb170fcf69d9a0c8e9cd22f724
parent	e464c08beffc87cb064e48932699042b3dccc491 (diff)
download	go-a5add8c7260b5b7d49488eaf60ab49804df095e6.tar.gz go-a5add8c7260b5b7d49488eaf60ab49804df095e6.zip