70 files changed, 3467 insertions, 2207 deletions
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/bits_compat.go b/vendor/golang.org/x/crypto/internal/poly1305/bits_compat.go
new file mode 100644
index 0000000..45b5c96
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/bits_compat.go
@@ -0,0 +1,40 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !go1.13
+// +build !go1.13
+
+package poly1305
+
+// Generic fallbacks for the math/bits intrinsics, copied from
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
+// variable time fallbacks until Go 1.13.
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	sum = x + y + carry
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
+	return
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	diff = x - y - borrow
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
+	return
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	const mask32 = 1<<32 - 1
+	x0 := x & mask32
+	x1 := x >> 32
+	y0 := y & mask32
+	y1 := y >> 32
+	w0 := x0 * y0
+	t := x1*y0 + w0>>32
+	w1 := t & mask32
+	w2 := t >> 32
+	w1 += x0 * y1
+	hi = x1*y1 + w2 + w1>>32
+	lo = x * y
+	return
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/bits_go1.13.go b/vendor/golang.org/x/crypto/internal/poly1305/bits_go1.13.go
new file mode 100644
index 0000000..ed52b34
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/bits_go1.13.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.13
+// +build go1.13
+
+package poly1305
+
+import "math/bits"
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	return bits.Add64(x, y, carry)
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	return bits.Sub64(x, y, borrow)
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	return bits.Mul64(x, y)
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go b/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go
new file mode 100644
index 0000000..f184b67
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go
@@ -0,0 +1,10 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (!amd64 && !ppc64le && !s390x) || !gc || purego
+// +build !amd64,!ppc64le,!s390x !gc purego
+
+package poly1305
+
+type mac struct{ macGeneric }
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/poly1305.go b/vendor/golang.org/x/crypto/internal/poly1305/poly1305.go
new file mode 100644
index 0000000..4aaea81
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/poly1305.go
@@ -0,0 +1,99 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package poly1305 implements Poly1305 one-time message authentication code as
+// specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
+//
+// Poly1305 is a fast, one-time authentication function. It is infeasible for an
+// attacker to generate an authenticator for a message without the key. However, a
+// key must only be used for a single message. Authenticating two different
+// messages with the same key allows an attacker to forge authenticators for other
+// messages with the same key.
+//
+// Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
+// used with a fixed key in order to generate one-time keys from an nonce.
+// However, in this package AES isn't used and the one-time key is specified
+// directly.
+package poly1305
+
+import "crypto/subtle"
+
+// TagSize is the size, in bytes, of a poly1305 authenticator.
+const TagSize = 16
+
+// Sum generates an authenticator for msg using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	h := New(key)
+	h.Write(m)
+	h.Sum(out[:0])
+}
+
+// Verify returns true if mac is a valid authenticator for m with the given key.
+func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
+	var tmp [16]byte
+	Sum(&tmp, m, key)
+	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
+}
+
+// New returns a new MAC computing an authentication
+// tag of all data written to it with the given key.
+// This allows writing the message progressively instead
+// of passing it as a single slice. Common users should use
+// the Sum function instead.
+//
+// The key must be unique for each message, as authenticating
+// two different messages with the same key allows an attacker
+// to forge messages at will.
+func New(key *[32]byte) *MAC {
+	m := &MAC{}
+	initialize(key, &m.macState)
+	return m
+}
+
+// MAC is an io.Writer computing an authentication tag
+// of the data written to it.
+//
+// MAC cannot be used like common hash.Hash implementations,
+// because using a poly1305 key twice breaks its security.
+// Therefore writing data to a running MAC after calling
+// Sum or Verify causes it to panic.
+type MAC struct {
+	mac // platform-dependent implementation
+
+	finalized bool
+}
+
+// Size returns the number of bytes Sum will return.
+func (h *MAC) Size() int { return TagSize }
+
+// Write adds more data to the running message authentication code.
+// It never returns an error.
+//
+// It must not be called after the first call of Sum or Verify.
+func (h *MAC) Write(p []byte) (n int, err error) {
+	if h.finalized {
+		panic("poly1305: write to MAC after Sum or Verify")
+	}
+	return h.mac.Write(p)
+}
+
+// Sum computes the authenticator of all data written to the
+// message authentication code.
+func (h *MAC) Sum(b []byte) []byte {
+	var mac [TagSize]byte
+	h.mac.Sum(&mac)
+	h.finalized = true
+	return append(b, mac[:]...)
+}
+
+// Verify returns whether the authenticator of all data written to
+// the message authentication code matches the expected value.
+func (h *MAC) Verify(expected []byte) bool {
+	var mac [TagSize]byte
+	h.mac.Sum(&mac)
+	h.finalized = true
+	return subtle.ConstantTimeCompare(expected, mac[:]) == 1
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.go
new file mode 100644
index 0000000..6d52233
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.go
@@ -0,0 +1,48 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc && !purego
+// +build gc,!purego
+
+package poly1305
+
+//go:noescape
+func update(state *macState, msg []byte)
+
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }
+
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		update(&h.macState, h.buffer[:])
+	}
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return nn, nil
+}
+
+func (h *mac) Sum(out *[16]byte) {
+	state := h.macState
+	if h.offset > 0 {
+		update(&state, h.buffer[:h.offset])
+	}
+	finalize(out, &state.h, &state.s)
+}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
index 2edae63..1d74f0f 100644
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64,!gccgo,!appengine
+//go:build gc && !purego
+// +build gc,!purego
 
 #include "textflag.h"
 
@@ -54,24 +55,17 @@
 	ADCQ  t3, h1;                  \
 	ADCQ  $0, h2
 
-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
+// func update(state *[7]uint64, msg []byte)
+TEXT ·update(SB), $0-32
+	MOVQ state+0(FP), DI
+	MOVQ msg_base+8(FP), SI
+	MOVQ msg_len+16(FP), R15
 
-// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305(SB), $0-32
-	MOVQ out+0(FP), DI
-	MOVQ m+8(FP), SI
-	MOVQ mlen+16(FP), R15
-	MOVQ key+24(FP), AX
-
-	MOVQ 0(AX), R11
-	MOVQ 8(AX), R12
-	ANDQ ·poly1305Mask<>(SB), R11   // r0
-	ANDQ ·poly1305Mask<>+8(SB), R12 // r1
-	XORQ R8, R8                    // h0
-	XORQ R9, R9                    // h1
-	XORQ R10, R10                  // h2
+	MOVQ 0(DI), R8   // h0
+	MOVQ 8(DI), R9   // h1
+	MOVQ 16(DI), R10 // h2
+	MOVQ 24(DI), R11 // r0
+	MOVQ 32(DI), R12 // r1
 
 	CMPQ R15, $16
 	JB   bytes_between_0_and_15
@@ -109,17 +103,7 @@ flush_buffer:
 	JMP  multiply
 
 done:
-	MOVQ    R8, AX
-	MOVQ    R9, BX
-	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
-	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
-	SBBQ    $3, R10
-	CMOVQCS R8, AX
-	CMOVQCS R9, BX
-	MOVQ    key+24(FP), R8
-	ADDQ    16(R8), AX
-	ADCQ    24(R8), BX
-
-	MOVQ AX, 0(DI)
-	MOVQ BX, 8(DI)
+	MOVQ R8, 0(DI)
+	MOVQ R9, 8(DI)
+	MOVQ R10, 16(DI)
 	RET
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_generic.go b/vendor/golang.org/x/crypto/internal/poly1305/sum_generic.go
new file mode 100644
index 0000000..c942a65
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_generic.go
@@ -0,0 +1,310 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file provides the generic implementation of Sum and MAC. Other files
+// might provide optimized assembly implementations of some of this code.
+
+package poly1305
+
+import "encoding/binary"
+
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
+// for a 64 bytes message is approximately
+//
+//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
+//
+// for some secret r and s. It can be computed sequentially like
+//
+//     for len(msg) > 0:
+//         h += read(msg, 16)
+//         h *= r
+//         h %= 2¹³⁰ - 5
+//     return h + s
+//
+// All the complexity is about doing performant constant-time math on numbers
+// larger than any available numeric type.
+
+func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
+	h := newMACGeneric(key)
+	h.Write(msg)
+	h.Sum(out)
+}
+
+func newMACGeneric(key *[32]byte) macGeneric {
+	m := macGeneric{}
+	initialize(key, &m.macState)
+	return m
+}
+
+// macState holds numbers in saturated 64-bit little-endian limbs. That is,
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
+type macState struct {
+	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
+	// can grow larger during and after rounds. It must, however, remain below
+	// 2 * (2¹³⁰ - 5).
+	h [3]uint64
+	// r and s are the private key components.
+	r [2]uint64
+	s [2]uint64
+}
+
+type macGeneric struct {
+	macState
+
+	buffer [TagSize]byte
+	offset int
+}
+
+// Write splits the incoming message into TagSize chunks, and passes them to
+// update. It buffers incomplete chunks.
+func (h *macGeneric) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		updateGeneric(&h.macState, h.buffer[:])
+	}
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		updateGeneric(&h.macState, p[:n])
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return nn, nil
+}
+
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates
+// the MAC output. It does not modify its state, in order to allow for multiple
+// calls to Sum, even if no Write is allowed after Sum.
+func (h *macGeneric) Sum(out *[TagSize]byte) {
+	state := h.macState
+	if h.offset > 0 {
+		updateGeneric(&state, h.buffer[:h.offset])
+	}
+	finalize(out, &state.h, &state.s)
+}
+
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
+// clears some bits of the secret coefficient to make it possible to implement
+// multiplication more efficiently.
+const (
+	rMask0 = 0x0FFFFFFC0FFFFFFF
+	rMask1 = 0x0FFFFFFC0FFFFFFC
+)
+
+// initialize loads the 256-bit key into the two 128-bit secret values r and s.
+func initialize(key *[32]byte, m *macState) {
+	m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
+	m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
+	m.s[0] = binary.LittleEndian.Uint64(key[16:24])
+	m.s[1] = binary.LittleEndian.Uint64(key[24:32])
+}
+
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
+// bits.Mul64 and bits.Add64 intrinsics.
+type uint128 struct {
+	lo, hi uint64
+}
+
+func mul64(a, b uint64) uint128 {
+	hi, lo := bitsMul64(a, b)
+	return uint128{lo, hi}
+}
+
+func add128(a, b uint128) uint128 {
+	lo, c := bitsAdd64(a.lo, b.lo, 0)
+	hi, c := bitsAdd64(a.hi, b.hi, c)
+	if c != 0 {
+		panic("poly1305: unexpected overflow")
+	}
+	return uint128{lo, hi}
+}
+
+func shiftRightBy2(a uint128) uint128 {
+	a.lo = a.lo>>2 | (a.hi&3)<<62
+	a.hi = a.hi >> 2
+	return a
+}
+
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
+// 128 bits of message, it computes
+//
+//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
+//
+// If the msg length is not a multiple of TagSize, it assumes the last
+// incomplete chunk is the final one.
+func updateGeneric(state *macState, msg []byte) {
+	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
+	r0, r1 := state.r[0], state.r[1]
+
+	for len(msg) > 0 {
+		var c uint64
+
+		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
+		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
+		// reduced at the end of the multiplication below.
+		//
+		// The spec requires us to set a bit just above the message size, not to
+		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
+		// add 1 to the most significant (2¹²⁸) limb, h2.
+		if len(msg) >= TagSize {
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
+			h2 += c + 1
+
+			msg = msg[TagSize:]
+		} else {
+			var buf [TagSize]byte
+			copy(buf[:], msg)
+			buf[len(msg)] = 1
+
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
+			h2 += c
+
+			msg = nil
+		}
+
+		// Multiplication of big number limbs is similar to elementary school
+		// columnar multiplication. Instead of digits, there are 64-bit limbs.
+		//
+		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
+		//
+		//                        h2    h1    h0  x
+		//                              r1    r0  =
+		//                       ----------------
+		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
+		//            +   h2r1  h1r1  h0r1
+		//               ------------------------
+		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
+		//               ------------------------
+		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
+		//     +         m3.lo m2.lo m1.lo m0.lo
+		//        -------------------------------
+		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
+		//
+		// The main difference from pen-and-paper multiplication is that we do
+		// carry propagation in a separate step, as if we wrote two digit sums
+		// at first (the 128-bit limbs), and then carried the tens all at once.
+
+		h0r0 := mul64(h0, r0)
+		h1r0 := mul64(h1, r0)
+		h2r0 := mul64(h2, r0)
+		h0r1 := mul64(h0, r1)
+		h1r1 := mul64(h1, r1)
+		h2r1 := mul64(h2, r1)
+
+		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
+		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
+		// to overflow 64 bits, so we can ignore the high part of the products.
+		//
+		// This also means that the product doesn't have a fifth limb (t4).
+		if h2r0.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+		if h2r1.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+
+		m0 := h0r0
+		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
+		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
+		m3 := h2r1
+
+		t0 := m0.lo
+		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
+		t2, c := bitsAdd64(m2.lo, m1.hi, c)
+		t3, _ := bitsAdd64(m3.lo, m2.hi, c)
+
+		// Now we have the result as 4 64-bit limbs, and we need to reduce it
+		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
+		// a cheap partial reduction according to the reduction identity
+		//
+		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
+		//
+		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
+		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
+		// assumptions we make about h in the rest of the code.
+		//
+		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
+
+		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
+		// Note that the carry bits are effectively shifted left by 2, in other
+		// words, cc = c * 4 for the c in the reduction identity.
+		h0, h1, h2 = t0, t1, t2&maskLow2Bits
+		cc := uint128{t2 & maskNotLow2Bits, t3}
+
+		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		cc = shiftRightBy2(cc)
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
+		//
+		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
+	}
+
+	state.h[0], state.h[1], state.h[2] = h0, h1, h2
+}
+
+const (
+	maskLow2Bits    uint64 = 0x0000000000000003
+	maskNotLow2Bits uint64 = ^maskLow2Bits
+)
+
+// select64 returns x if v == 1 and y if v == 0, in constant time.
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }
+
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
+const (
+	p0 = 0xFFFFFFFFFFFFFFFB
+	p1 = 0xFFFFFFFFFFFFFFFF
+	p2 = 0x0000000000000003
+)
+
+// finalize completes the modular reduction of h and computes
+//
+//     out = h + s  mod  2¹²⁸
+//
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
+	h0, h1, h2 := h[0], h[1], h[2]
+
+	// After the partial reduction in updateGeneric, h might be more than
+	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
+	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
+	// result if the subtraction underflows, and t otherwise.
+
+	hMinusP0, b := bitsSub64(h0, p0, 0)
+	hMinusP1, b := bitsSub64(h1, p1, b)
+	_, b = bitsSub64(h2, p2, b)
+
+	// h = h if h < p else h - p
+	h0 = select64(b, h0, hMinusP0)
+	h1 = select64(b, h1, hMinusP1)
+
+	// Finally, we compute the last Poly1305 step
+	//
+	//     tag = h + s  mod  2¹²⁸
+	//
+	// by just doing a wide addition with the 128 low bits of h and discarding
+	// the overflow.
+	h0, c := bitsAdd64(h0, s[0], 0)
+	h1, _ = bitsAdd64(h1, s[1], c)
+
+	binary.LittleEndian.PutUint64(out[0:8], h0)
+	binary.LittleEndian.PutUint64(out[8:16], h1)
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go
new file mode 100644
index 0000000..4a06994
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go
@@ -0,0 +1,48 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc && !purego
+// +build gc,!purego
+
+package poly1305
+
+//go:noescape
+func update(state *macState, msg []byte)
+
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }
+
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		update(&h.macState, h.buffer[:])
+	}
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return nn, nil
+}
+
+func (h *mac) Sum(out *[16]byte) {
+	state := h.macState
+	if h.offset > 0 {
+		update(&state, h.buffer[:h.offset])
+	}
+	finalize(out, &state.h, &state.s)
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s
new file mode 100644
index 0000000..58422aa
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s
@@ -0,0 +1,182 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc && !purego
+// +build gc,!purego
+
+#include "textflag.h"
+
+// This was ported from the amd64 implementation.
+
+#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
+	MOVD (msg), t0;  \
+	MOVD 8(msg), t1; \
+	MOVD $1, t2;     \
+	ADDC t0, h0, h0; \
+	ADDE t1, h1, h1; \
+	ADDE t2, h2;     \
+	ADD  $16, msg
+
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
+	MULLD  r0, h0, t0;  \
+	MULLD  r0, h1, t4;  \
+	MULHDU r0, h0, t1;  \
+	MULHDU r0, h1, t5;  \
+	ADDC   t4, t1, t1;  \
+	MULLD  r0, h2, t2;  \
+	ADDZE  t5;          \
+	MULHDU r1, h0, t4;  \
+	MULLD  r1, h0, h0;  \
+	ADD    t5, t2, t2;  \
+	ADDC   h0, t1, t1;  \
+	MULLD  h2, r1, t3;  \
+	ADDZE  t4, h0;      \
+	MULHDU r1, h1, t5;  \
+	MULLD  r1, h1, t4;  \
+	ADDC   t4, t2, t2;  \
+	ADDE   t5, t3, t3;  \
+	ADDC   h0, t2, t2;  \
+	MOVD   $-4, t4;     \
+	MOVD   t0, h0;      \
+	MOVD   t1, h1;      \
+	ADDZE  t3;          \
+	ANDCC  $3, t2, h2;  \
+	AND    t2, t4, t0;  \
+	ADDC   t0, h0, h0;  \
+	ADDE   t3, h1, h1;  \
+	SLD    $62, t3, t4; \
+	SRD    $2, t2;      \
+	ADDZE  h2;          \
+	OR     t4, t2, t2;  \
+	SRD    $2, t3;      \
+	ADDC   t2, h0, h0;  \
+	ADDE   t3, h1, h1;  \
+	ADDZE  h2
+
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
+GLOBL ·poly1305Mask<>(SB), RODATA, $16
+
+// func update(state *[7]uint64, msg []byte)
+TEXT ·update(SB), $0-32
+	MOVD state+0(FP), R3
+	MOVD msg_base+8(FP), R4
+	MOVD msg_len+16(FP), R5
+
+	MOVD 0(R3), R8   // h0
+	MOVD 8(R3), R9   // h1
+	MOVD 16(R3), R10 // h2
+	MOVD 24(R3), R11 // r0
+	MOVD 32(R3), R12 // r1
+
+	CMP R5, $16
+	BLT bytes_between_0_and_15
+
+loop:
+	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
+
+multiply:
+	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
+	ADD $-16, R5
+	CMP R5, $16
+	BGE loop
+
+bytes_between_0_and_15:
+	CMP  R5, $0
+	BEQ  done
+	MOVD $0, R16 // h0
+	MOVD $0, R17 // h1
+
+flush_buffer:
+	CMP R5, $8
+	BLE just1
+
+	MOVD $8, R21
+	SUB  R21, R5, R21
+
+	// Greater than 8 -- load the rightmost remaining bytes in msg
+	// and put into R17 (h1)
+	MOVD (R4)(R21), R17
+	MOVD $16, R22
+
+	// Find the offset to those bytes
+	SUB R5, R22, R22
+	SLD $3, R22
+
+	// Shift to get only the bytes in msg
+	SRD R22, R17, R17
+
+	// Put 1 at high end
+	MOVD $1, R23
+	SLD  $3, R21
+	SLD  R21, R23, R23
+	OR   R23, R17, R17
+
+	// Remainder is 8
+	MOVD $8, R5
+
+just1:
+	CMP R5, $8
+	BLT less8
+
+	// Exactly 8
+	MOVD (R4), R16
+
+	CMP R17, $0
+
+	// Check if we've already set R17; if not
+	// set 1 to indicate end of msg.
+	BNE  carry
+	MOVD $1, R17
+	BR   carry
+
+less8:
+	MOVD  $0, R16   // h0
+	MOVD  $0, R22   // shift count
+	CMP   R5, $4
+	BLT   less4
+	MOVWZ (R4), R16
+	ADD   $4, R4
+	ADD   $-4, R5
+	MOVD  $32, R22
+
+less4:
+	CMP   R5, $2
+	BLT   less2
+	MOVHZ (R4), R21
+	SLD   R22, R21, R21
+	OR    R16, R21, R16
+	ADD   $16, R22
+	ADD   $-2, R5
+	ADD   $2, R4
+
+less2:
+	CMP   R5, $0
+	BEQ   insert1
+	MOVBZ (R4), R21
+	SLD   R22, R21, R21
+	OR    R16, R21, R16
+	ADD   $8, R22
+
+insert1:
+	// Insert 1 at end of msg
+	MOVD $1, R21
+	SLD  R22, R21, R21
+	OR   R16, R21, R16
+
+carry:
+	// Add new values to h0, h1, h2
+	ADDC  R16, R8
+	ADDE  R17, R9
+	ADDZE R10, R10
+	MOVD  $16, R5
+	ADD   R5, R4
+	BR    multiply
+
+done:
+	// Save h0, h1, h2 in state
+	MOVD R8, 0(R3)
+	MOVD R9, 8(R3)
+	MOVD R10, 16(R3)
+	RET
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.go b/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.go
new file mode 100644
index 0000000..62cc9f8
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.go
@@ -0,0 +1,76 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc && !purego
+// +build gc,!purego
+
+package poly1305
+
+import (
+	"golang.org/x/sys/cpu"
+)
+
+// updateVX is an assembly implementation of Poly1305 that uses vector
+// instructions. It must only be called if the vector facility (vx) is
+// available.
+//go:noescape
+func updateVX(state *macState, msg []byte)
+
+// mac is a replacement for macGeneric that uses a larger buffer and redirects
+// calls that would have gone to updateGeneric to updateVX if the vector
+// facility is installed.
+//
+// A larger buffer is required for good performance because the vector
+// implementation has a higher fixed cost per call than the generic
+// implementation.
+type mac struct {
+	macState
+
+	buffer [16 * TagSize]byte // size must be a multiple of block size (16)
+	offset int
+}
+
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < len(h.buffer) {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		if cpu.S390X.HasVX {
+			updateVX(&h.macState, h.buffer[:])
+		} else {
+			updateGeneric(&h.macState, h.buffer[:])
+		}
+	}
+
+	tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
+	body := len(p) - tail          // number of bytes to process now
+	if body > 0 {
+		if cpu.S390X.HasVX {
+			updateVX(&h.macState, p[:body])
+		} else {
+			updateGeneric(&h.macState, p[:body])
+		}
+	}
+	h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
+	return nn, nil
+}
+
+func (h *mac) Sum(out *[TagSize]byte) {
+	state := h.macState
+	remainder := h.buffer[:h.offset]
+
+	// Use the generic implementation if we have 2 or fewer blocks left
+	// to sum. The vector implementation has a higher startup time.
+	if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
+		updateVX(&state, remainder)
+	} else if len(remainder) > 0 {
+		updateGeneric(&state, remainder)
+	}
+	finalize(out, &state.h, &state.s)
+}
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.s
new file mode 100644
index 0000000..aa9e049
--- /dev/null
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_s390x.s
@@ -0,0 +1,504 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc && !purego
+// +build gc,!purego
+
+#include "textflag.h"
+
+// This implementation of Poly1305 uses the vector facility (vx)
+// to process up to 2 blocks (32 bytes) per iteration using an
+// algorithm based on the one described in:
+//
+// NEON crypto, Daniel J. Bernstein & Peter Schwabe
+// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
+//
+// This algorithm uses 5 26-bit limbs to represent a 130-bit
+// value. These limbs are, for the most part, zero extended and
+// placed into 64-bit vector register elements. Each vector
+// register is 128-bits wide and so holds 2 of these elements.
+// Using 26-bit limbs allows us plenty of headroom to accommodate
+// accumulations before and after multiplication without
+// overflowing either 32-bits (before multiplication) or 64-bits
+// (after multiplication).
+//
+// In order to parallelise the operations required to calculate
+// the sum we use two separate accumulators and then sum those
+// in an extra final step. For compatibility with the generic
+// implementation we perform this summation at the end of every
+// updateVX call.
+//
+// To use two accumulators we must multiply the message blocks
+// by r² rather than r. Only the final message block should be
+// multiplied by r.
+//
+// Example:
+//
+// We want to calculate the sum (h) for a 64 byte message (m):
+//
+//   h = m[0:16]r⁴ + m[16:32]r³ + m[32:48]r² + m[48:64]r
+//
+// To do this we split the calculation into the even indices
+// and odd indices of the message. These form our SIMD 'lanes':
+//
+//   h = m[ 0:16]r⁴ + m[32:48]r² +   <- lane 0
+//       m[16:32]r³ + m[48:64]r      <- lane 1
+//
+// To calculate this iteratively we refactor so that both lanes
+// are written in terms of r² and r:
+//
+//   h = (m[ 0:16]r² + m[32:48])r² + <- lane 0
+//       (m[16:32]r² + m[48:64])r    <- lane 1
+//                ^             ^
+//                |             coefficients for second iteration
+//                coefficients for first iteration
+//
+// So in this case we would have two iterations. In the first
+// both lanes are multiplied by r². In the second only the
+// first lane is multiplied by r² and the second lane is
+// instead multiplied by r. This gives use the odd and even
+// powers of r that we need from the original equation.
+//
+// Notation:
+//
+//   h - accumulator
+//   r - key
+//   m - message
+//
+//   [a, b]       - SIMD register holding two 64-bit values
+//   [a, b, c, d] - SIMD register holding four 32-bit values
+//   xᵢ[n]        - limb n of variable x with bit width i
+//
+// Limbs are expressed in little endian order, so for 26-bit
+// limbs x₂₆[4] will be the most significant limb and x₂₆[0]
+// will be the least significant limb.
+
+// masking constants
+#define MOD24 V0 // [0x0000000000ffffff, 0x0000000000ffffff] - mask low 24-bits
+#define MOD26 V1 // [0x0000000003ffffff, 0x0000000003ffffff] - mask low 26-bits
+
+// expansion constants (see EXPAND macro)
+#define EX0 V2
+#define EX1 V3
+#define EX2 V4
+
+// key (r², r or 1 depending on context)
+#define R_0 V5
+#define R_1 V6
+#define R_2 V7
+#define R_3 V8
+#define R_4 V9
+
+// precalculated coefficients (5r², 5r or 0 depending on context)
+#define R5_1 V10
+#define R5_2 V11
+#define R5_3 V12
+#define R5_4 V13
+
+// message block (m)
+#define M_0 V14
+#define M_1 V15
+#define M_2 V16
+#define M_3 V17
+#define M_4 V18
+
+// accumulator (h)
+#define H_0 V19
+#define H_1 V20
+#define H_2 V21
+#define H_3 V22
+#define H_4 V23
+
+// temporary registers (for short-lived values)
+#define T_0 V24
+#define T_1 V25
+#define T_2 V26
+#define T_3 V27
+#define T_4 V28
+
+GLOBL ·constants<>(SB), RODATA, $0x30
+// EX0
+DATA ·constants<>+0x00(SB)/8, $0x0006050403020100
+DATA ·constants<>+0x08(SB)/8, $0x1016151413121110
+// EX1
+DATA ·constants<>+0x10(SB)/8, $0x060c0b0a09080706
+DATA ·constants<>+0x18(SB)/8, $0x161c1b1a19181716
+// EX2
+DATA ·constants<>+0x20(SB)/8, $0x0d0d0d0d0d0f0e0d
+DATA ·constants<>+0x28(SB)/8, $0x1d1d1d1d1d1f1e1d
+
+// MULTIPLY multiplies each lane of f and g, partially reduced
+// modulo 2¹³⁰ - 5. The result, h, consists of partial products
+// in each lane that need to be reduced further to produce the
+// final result.
+//
+//   h₁₃₀ = (f₁₃₀g₁₃₀) % 2¹³⁰ + (5f₁₃₀g₁₃₀) / 2¹³⁰
+//
+// Note that the multiplication by 5 of the high bits is
+// achieved by precalculating the multiplication of four of the
+// g coefficients by 5. These are g51-g54.
+#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \
+	VMLOF  f0, g0, h0        \
+	VMLOF  f0, g3, h3        \
+	VMLOF  f0, g1, h1        \
+	VMLOF  f0, g4, h4        \
+	VMLOF  f0, g2, h2        \
+	VMLOF  f1, g54, T_0      \
+	VMLOF  f1, g2, T_3       \
+	VMLOF  f1, g0, T_1       \
+	VMLOF  f1, g3, T_4       \
+	VMLOF  f1, g1, T_2       \
+	VMALOF f2, g53, h0, h0   \
+	VMALOF f2, g1, h3, h3    \
+	VMALOF f2, g54, h1, h1   \
+	VMALOF f2, g2, h4, h4    \
+	VMALOF f2, g0, h2, h2    \
+	VMALOF f3, g52, T_0, T_0 \
+	VMALOF f3, g0, T_3, T_3  \
+	VMALOF f3, g53, T_1, T_1 \
+	VMALOF f3, g1, T_4, T_4  \
+	VMALOF f3, g54, T_2, T_2 \
+	VMALOF f4, g51, h0, h0   \
+	VMALOF f4, g54, h3, h3   \
+	VMALOF f4, g52, h1, h1   \
+	VMALOF f4, g0, h4, h4    \
+	VMALOF f4, g53, h2, h2   \
+	VAG    T_0, h0, h0       \
+	VAG    T_3, h3, h3       \
+	VAG    T_1, h1, h1       \
+	VAG    T_4, h4, h4       \
+	VAG    T_2, h2, h2
+
+// REDUCE performs the following carry operations in four
+// stages, as specified in Bernstein & Schwabe:
+//
+//   1: h₂₆[0]->h₂₆[1] h₂₆[3]->h₂₆[4]
+//   2: h₂₆[1]->h₂₆[2] h₂₆[4]->h₂₆[0]
+//   3: h₂₆[0]->h₂₆[1] h₂₆[2]->h₂₆[3]
+//   4: h₂₆[3]->h₂₆[4]
+//
+// The result is that all of the limbs are limited to 26-bits
+// except for h₂₆[1] and h₂₆[4] which are limited to 27-bits.
+//
+// Note that although each limb is aligned at 26-bit intervals
+// they may contain values that exceed 2²⁶ - 1, hence the need
+// to carry the excess bits in each limb.
+#define REDUCE(h0, h1, h2, h3, h4) \
+	VESRLG $26, h0, T_0  \
+	VESRLG $26, h3, T_1  \
+	VN     MOD26, h0, h0 \
+	VN     MOD26, h3, h3 \
+	VAG    T_0, h1, h1   \
+	VAG    T_1, h4, h4   \
+	VESRLG $26, h1, T_2  \
+	VESRLG $26, h4, T_3  \
+	VN     MOD26, h1, h1 \
+	VN     MOD26, h4, h4 \
+	VESLG  $2, T_3, T_4  \
+	VAG    T_3, T_4, T_4 \
+	VAG    T_2, h2, h2   \
+	VAG    T_4, h0, h0   \
+	VESRLG $26, h2, T_0  \
+	VESRLG $26, h0, T_1  \
+	VN     MOD26, h2, h2 \
+	VN     MOD26, h0, h0 \
+	VAG    T_0, h3, h3   \
+	VAG    T_1, h1, h1   \
+	VESRLG $26, h3, T_2  \
+	VN     MOD26, h3, h3 \
+	VAG    T_2, h4, h4
+
+// EXPAND splits the 128-bit little-endian values in0 and in1
+// into 26-bit big-endian limbs and places the results into
+// the first and second lane of d₂₆[0:4] respectively.
+//
+// The EX0, EX1 and EX2 constants are arrays of byte indices
+// for permutation. The permutation both reverses the bytes
+// in the input and ensures the bytes are copied into the
+// destination limb ready to be shifted into their final
+// position.
+#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \
+	VPERM  in0, in1, EX0, d0 \
+	VPERM  in0, in1, EX1, d2 \
+	VPERM  in0, in1, EX2, d4 \
+	VESRLG $26, d0, d1       \
+	VESRLG $30, d2, d3       \
+	VESRLG $4, d2, d2        \
+	VN     MOD26, d0, d0     \ // [in0₂₆[0], in1₂₆[0]]
+	VN     MOD26, d3, d3     \ // [in0₂₆[3], in1₂₆[3]]
+	VN     MOD26, d1, d1     \ // [in0₂₆[1], in1₂₆[1]]
+	VN     MOD24, d4, d4     \ // [in0₂₆[4], in1₂₆[4]]
+	VN     MOD26, d2, d2     // [in0₂₆[2], in1₂₆[2]]
+
+// func updateVX(state *macState, msg []byte)
+TEXT ·updateVX(SB), NOSPLIT, $0
+	MOVD state+0(FP), R1
+	LMG  msg+8(FP), R2, R3 // R2=msg_base, R3=msg_len
+
+	// load EX0, EX1 and EX2
+	MOVD $·constants<>(SB), R5
+	VLM  (R5), EX0, EX2
+
+	// generate masks
+	VGMG $(64-24), $63, MOD24 // [0x00ffffff, 0x00ffffff]
+	VGMG $(64-26), $63, MOD26 // [0x03ffffff, 0x03ffffff]
+
+	// load h (accumulator) and r (key) from state
+	VZERO T_1               // [0, 0]
+	VL    0(R1), T_0        // [h₆₄[0], h₆₄[1]]
+	VLEG  $0, 16(R1), T_1   // [h₆₄[2], 0]
+	VL    24(R1), T_2       // [r₆₄[0], r₆₄[1]]
+	VPDI  $0, T_0, T_2, T_3 // [h₆₄[0], r₆₄[0]]
+	VPDI  $5, T_0, T_2, T_4 // [h₆₄[1], r₆₄[1]]
+
+	// unpack h and r into 26-bit limbs
+	// note: h₆₄[2] may have the low 3 bits set, so h₂₆[4] is a 27-bit value
+	VN     MOD26, T_3, H_0            // [h₂₆[0], r₂₆[0]]
+	VZERO  H_1                        // [0, 0]
+	VZERO  H_3                        // [0, 0]
+	VGMG   $(64-12-14), $(63-12), T_0 // [0x03fff000, 0x03fff000] - 26-bit mask with low 12 bits masked out
+	VESLG  $24, T_1, T_1              // [h₆₄[2]<<24, 0]
+	VERIMG $-26&63, T_3, MOD26, H_1   // [h₂₆[1], r₂₆[1]]
+	VESRLG $+52&63, T_3, H_2          // [h₂₆[2], r₂₆[2]] - low 12 bits only
+	VERIMG $-14&63, T_4, MOD26, H_3   // [h₂₆[1], r₂₆[1]]
+	VESRLG $40, T_4, H_4              // [h₂₆[4], r₂₆[4]] - low 24 bits only
+	VERIMG $+12&63, T_4, T_0, H_2     // [h₂₆[2], r₂₆[2]] - complete
+	VO     T_1, H_4, H_4              // [h₂₆[4], r₂₆[4]] - complete
+
+	// replicate r across all 4 vector elements
+	VREPF $3, H_0, R_0 // [r₂₆[0], r₂₆[0], r₂₆[0], r₂₆[0]]
+	VREPF $3, H_1, R_1 // [r₂₆[1], r₂₆[1], r₂₆[1], r₂₆[1]]
+	VREPF $3, H_2, R_2 // [r₂₆[2], r₂₆[2], r₂₆[2], r₂₆[2]]
+	VREPF $3, H_3, R_3 // [r₂₆[3], r₂₆[3], r₂₆[3], r₂₆[3]]
+	VREPF $3, H_4, R_4 // [r₂₆[4], r₂₆[4], r₂₆[4], r₂₆[4]]
+
+	// zero out lane 1 of h
+	VLEIG $1, $0, H_0 // [h₂₆[0], 0]
+	VLEIG $1, $0, H_1 // [h₂₆[1], 0]
+	VLEIG $1, $0, H_2 // [h₂₆[2], 0]
+	VLEIG $1, $0, H_3 // [h₂₆[3], 0]
+	VLEIG $1, $0, H_4 // [h₂₆[4], 0]
+
+	// calculate 5r (ignore least significant limb)
+	VREPIF $5, T_0
+	VMLF   T_0, R_1, R5_1 // [5r₂₆[1], 5r₂₆[1], 5r₂₆[1], 5r₂₆[1]]
+	VMLF   T_0, R_2, R5_2 // [5r₂₆[2], 5r₂₆[2], 5r₂₆[2], 5r₂₆[2]]
+	VMLF   T_0, R_3, R5_3 // [5r₂₆[3], 5r₂₆[3], 5r₂₆[3], 5r₂₆[3]]
+	VMLF   T_0, R_4, R5_4 // [5r₂₆[4], 5r₂₆[4], 5r₂₆[4], 5r₂₆[4]]
+
+	// skip r² calculation if we are only calculating one block
+	CMPBLE R3, $16, skip
+
+	// calculate r²
+	MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, M_0, M_1, M_2, M_3, M_4)
+	REDUCE(M_0, M_1, M_2, M_3, M_4)
+	VGBM   $0x0f0f, T_0
+	VERIMG $0, M_0, T_0, R_0 // [r₂₆[0], r²₂₆[0], r₂₆[0], r²₂₆[0]]
+	VERIMG $0, M_1, T_0, R_1 // [r₂₆[1], r²₂₆[1], r₂₆[1], r²₂₆[1]]
+	VERIMG $0, M_2, T_0, R_2 // [r₂₆[2], r²₂₆[2], r₂₆[2], r²₂₆[2]]
+	VERIMG $0, M_3, T_0, R_3 // [r₂₆[3], r²₂₆[3], r₂₆[3], r²₂₆[3]]
+	VERIMG $0, M_4, T_0, R_4 // [r₂₆[4], r²₂₆[4], r₂₆[4], r²₂₆[4]]
+
+	// calculate 5r² (ignore least significant limb)
+	VREPIF $5, T_0
+	VMLF   T_0, R_1, R5_1 // [5r₂₆[1], 5r²₂₆[1], 5r₂₆[1], 5r²₂₆[1]]
+	VMLF   T_0, R_2, R5_2 // [5r₂₆[2], 5r²₂₆[2], 5r₂₆[2], 5r²₂₆[2]]
+	VMLF   T_0, R_3, R5_3 // [5r₂₆[3], 5r²₂₆[3], 5r₂₆[3], 5r²₂₆[3]]
+	VMLF   T_0, R_4, R5_4 // [5r₂₆[4], 5r²₂₆[4], 5r₂₆[4], 5r²₂₆[4]]
+
+loop:
+	CMPBLE R3, $32, b2 // 2 or fewer blocks remaining, need to change key coefficients
+
+	// load next 2 blocks from message
+	VLM (R2), T_0, T_1
+
+	// update message slice
+	SUB  $32, R3
+	MOVD $32(R2), R2
+
+	// unpack message blocks into 26-bit big-endian limbs
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// add 2¹²⁸ to each message block value
+	VLEIB $4, $1, M_4
+	VLEIB $12, $1, M_4
+
+multiply:
+	// accumulate the incoming message
+	VAG H_0, M_0, M_0
+	VAG H_3, M_3, M_3
+	VAG H_1, M_1, M_1
+	VAG H_4, M_4, M_4
+	VAG H_2, M_2, M_2
+
+	// multiply the accumulator by the key coefficient
+	MULTIPLY(M_0, M_1, M_2, M_3, M_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4)
+
+	// carry and partially reduce the partial products
+	REDUCE(H_0, H_1, H_2, H_3, H_4)
+
+	CMPBNE R3, $0, loop
+
+finish:
+	// sum lane 0 and lane 1 and put the result in lane 1
+	VZERO  T_0
+	VSUMQG H_0, T_0, H_0
+	VSUMQG H_3, T_0, H_3
+	VSUMQG H_1, T_0, H_1
+	VSUMQG H_4, T_0, H_4
+	VSUMQG H_2, T_0, H_2
+
+	// reduce again after summation
+	// TODO(mundaym): there might be a more efficient way to do this
+	// now that we only have 1 active lane. For example, we could
+	// simultaneously pack the values as we reduce them.
+	REDUCE(H_0, H_1, H_2, H_3, H_4)
+
+	// carry h[1] through to h[4] so that only h[4] can exceed 2²⁶ - 1
+	// TODO(mundaym): in testing this final carry was unnecessary.
+	// Needs a proof before it can be removed though.
+	VESRLG $26, H_1, T_1
+	VN     MOD26, H_1, H_1
+	VAQ    T_1, H_2, H_2
+	VESRLG $26, H_2, T_2
+	VN     MOD26, H_2, H_2
+	VAQ    T_2, H_3, H_3
+	VESRLG $26, H_3, T_3
+	VN     MOD26, H_3, H_3
+	VAQ    T_3, H_4, H_4
+
+	// h is now < 2(2¹³⁰ - 5)
+	// Pack each lane in h₂₆[0:4] into h₁₂₈[0:1].
+	VESLG $26, H_1, H_1
+	VESLG $26, H_3, H_3
+	VO    H_0, H_1, H_0
+	VO    H_2, H_3, H_2
+	VESLG $4, H_2, H_2
+	VLEIB $7, $48, H_1
+	VSLB  H_1, H_2, H_2
+	VO    H_0, H_2, H_0
+	VLEIB $7, $104, H_1
+	VSLB  H_1, H_4, H_3
+	VO    H_3, H_0, H_0
+	VLEIB $7, $24, H_1
+	VSRLB H_1, H_4, H_1
+
+	// update state
+	VSTEG $1, H_0, 0(R1)
+	VSTEG $0, H_0, 8(R1)
+	VSTEG $1, H_1, 16(R1)
+	RET
+
+b2:  // 2 or fewer blocks remaining
+	CMPBLE R3, $16, b1
+
+	// Load the 2 remaining blocks (17-32 bytes remaining).
+	MOVD $-17(R3), R0    // index of final byte to load modulo 16
+	VL   (R2), T_0       // load full 16 byte block
+	VLL  R0, 16(R2), T_1 // load final (possibly partial) block and pad with zeros to 16 bytes
+
+	// The Poly1305 algorithm requires that a 1 bit be appended to
+	// each message block. If the final block is less than 16 bytes
+	// long then it is easiest to insert the 1 before the message
+	// block is split into 26-bit limbs. If, on the other hand, the
+	// final message block is 16 bytes long then we append the 1 bit
+	// after expansion as normal.
+	MOVBZ  $1, R0
+	MOVD   $-16(R3), R3   // index of byte in last block to insert 1 at (could be 16)
+	CMPBEQ R3, $16, 2(PC) // skip the insertion if the final block is 16 bytes long
+	VLVGB  R3, R0, T_1    // insert 1 into the byte at index R3
+
+	// Split both blocks into 26-bit limbs in the appropriate lanes.
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// Append a 1 byte to the end of the second to last block.
+	VLEIB $4, $1, M_4
+
+	// Append a 1 byte to the end of the last block only if it is a
+	// full 16 byte block.
+	CMPBNE R3, $16, 2(PC)
+	VLEIB  $12, $1, M_4
+
+	// Finally, set up the coefficients for the final multiplication.
+	// We have previously saved r and 5r in the 32-bit even indexes
+	// of the R_[0-4] and R5_[1-4] coefficient registers.
+	//
+	// We want lane 0 to be multiplied by r² so that can be kept the
+	// same. We want lane 1 to be multiplied by r so we need to move
+	// the saved r value into the 32-bit odd index in lane 1 by
+	// rotating the 64-bit lane by 32.
+	VGBM   $0x00ff, T_0         // [0, 0xffffffffffffffff] - mask lane 1 only
+	VERIMG $32, R_0, T_0, R_0   // [_,  r²₂₆[0], _,  r₂₆[0]]
+	VERIMG $32, R_1, T_0, R_1   // [_,  r²₂₆[1], _,  r₂₆[1]]
+	VERIMG $32, R_2, T_0, R_2   // [_,  r²₂₆[2], _,  r₂₆[2]]
+	VERIMG $32, R_3, T_0, R_3   // [_,  r²₂₆[3], _,  r₂₆[3]]
+	VERIMG $32, R_4, T_0, R_4   // [_,  r²₂₆[4], _,  r₂₆[4]]
+	VERIMG $32, R5_1, T_0, R5_1 // [_, 5r²₂₆[1], _, 5r₂₆[1]]
+	VERIMG $32, R5_2, T_0, R5_2 // [_, 5r²₂₆[2], _, 5r₂₆[2]]
+	VERIMG $32, R5_3, T_0, R5_3 // [_, 5r²₂₆[3], _, 5r₂₆[3]]
+	VERIMG $32, R5_4, T_0, R5_4 // [_, 5r²₂₆[4], _, 5r₂₆[4]]
+
+	MOVD $0, R3
+	BR   multiply
+
+skip:
+	CMPBEQ R3, $0, finish
+
+b1:  // 1 block remaining
+
+	// Load the final block (1-16 bytes). This will be placed into
+	// lane 0.
+	MOVD $-1(R3), R0
+	VLL  R0, (R2), T_0 // pad to 16 bytes with zeros
+
+	// The Poly1305 algorithm requires that a 1 bit be appended to
+	// each message block. If the final block is less than 16 bytes
+	// long then it is easiest to insert the 1 before the message
+	// block is split into 26-bit limbs. If, on the other hand, the
+	// final message block is 16 bytes long then we append the 1 bit
+	// after expansion as normal.
+	MOVBZ  $1, R0
+	CMPBEQ R3, $16, 2(PC)
+	VLVGB  R3, R0, T_0
+
+	// Set the message block in lane 1 to the value 0 so that it
+	// can be accumulated without affecting the final result.
+	VZERO T_1
+
+	// Split the final message block into 26-bit limbs in lane 0.
+	// Lane 1 will be contain 0.
+	EXPAND(T_0, T_1, M_0, M_1, M_2, M_3, M_4)
+
+	// Append a 1 byte to the end of the last block only if it is a
+	// full 16 byte block.
+	CMPBNE R3, $16, 2(PC)
+	VLEIB  $4, $1, M_4
+
+	// We have previously saved r and 5r in the 32-bit even indexes
+	// of the R_[0-4] and R5_[1-4] coefficient registers.
+	//
+	// We want lane 0 to be multiplied by r so we need to move the
+	// saved r value into the 32-bit odd index in lane 0. We want
+	// lane 1 to be set to the value 1. This makes multiplication
+	// a no-op. We do this by setting lane 1 in every register to 0
+	// and then just setting the 32-bit index 3 in R_0 to 1.
+	VZERO T_0
+	MOVD  $0, R0
+	MOVD  $0x10111213, R12
+	VLVGP R12, R0, T_1         // [_, 0x10111213, _, 0x00000000]
+	VPERM T_0, R_0, T_1, R_0   // [_,  r₂₆[0], _, 0]
+	VPERM T_0, R_1, T_1, R_1   // [_,  r₂₆[1], _, 0]
+	VPERM T_0, R_2, T_1, R_2   // [_,  r₂₆[2], _, 0]
+	VPERM T_0, R_3, T_1, R_3   // [_,  r₂₆[3], _, 0]
+	VPERM T_0, R_4, T_1, R_4   // [_,  r₂₆[4], _, 0]
+	VPERM T_0, R5_1, T_1, R5_1 // [_, 5r₂₆[1], _, 0]
+	VPERM T_0, R5_2, T_1, R5_2 // [_, 5r₂₆[2], _, 0]
+	VPERM T_0, R5_3, T_1, R5_3 // [_, 5r₂₆[3], _, 0]
+	VPERM T_0, R5_4, T_1, R5_4 // [_, 5r₂₆[4], _, 0]
+
+	// Set the value of lane 1 to be 1.
+	VLEIF $3, $1, R_0 // [_,  r₂₆[0], _, 1]
+
+	MOVD $0, R3
+	BR   multiply
diff --git a/vendor/golang.org/x/crypto/internal/subtle/aliasing.go b/vendor/golang.org/x/crypto/internal/subtle/aliasing.go
index f38797b..4fad24f 100644
--- a/vendor/golang.org/x/crypto/internal/subtle/aliasing.go
+++ b/vendor/golang.org/x/crypto/internal/subtle/aliasing.go
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !appengine
+//go:build !purego
+// +build !purego
 
 // Package subtle implements functions that are often useful in cryptographic
 // code but require careful thought to use correctly.
diff --git a/vendor/golang.org/x/crypto/internal/subtle/aliasing_appengine.go b/vendor/golang.org/x/crypto/internal/subtle/aliasing_purego.go
index 0cc4a8a..80ccbed 100644
--- a/vendor/golang.org/x/crypto/internal/subtle/aliasing_appengine.go
+++ b/vendor/golang.org/x/crypto/internal/subtle/aliasing_purego.go
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build appengine
+//go:build purego
+// +build purego
 
 // Package subtle implements functions that are often useful in cryptographic
 // code but require careful thought to use correctly.
diff --git a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
index a98d1bd..a2973e6 100644
--- a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
+++ b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
@@ -35,8 +35,8 @@ This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html.
 package secretbox // import "golang.org/x/crypto/nacl/secretbox"
 
 import (
+	"golang.org/x/crypto/internal/poly1305"
 	"golang.org/x/crypto/internal/subtle"
-	"golang.org/x/crypto/poly1305"
 	"golang.org/x/crypto/salsa20/salsa"
 )
 
diff --git a/vendor/golang.org/x/crypto/poly1305/poly1305.go b/vendor/golang.org/x/crypto/poly1305/poly1305.go
deleted file mode 100644
index f562fa5..0000000
--- a/vendor/golang.org/x/crypto/poly1305/poly1305.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package poly1305 implements Poly1305 one-time message authentication code as
-specified in https://cr.yp.to/mac/poly1305-20050329.pdf.
-
-Poly1305 is a fast, one-time authentication function. It is infeasible for an
-attacker to generate an authenticator for a message without the key. However, a
-key must only be used for a single message. Authenticating two different
-messages with the same key allows an attacker to forge authenticators for other
-messages with the same key.
-
-Poly1305 was originally coupled with AES in order to make Poly1305-AES. AES was
-used with a fixed key in order to generate one-time keys from an nonce.
-However, in this package AES isn't used and the one-time key is specified
-directly.
-*/
-package poly1305 // import "golang.org/x/crypto/poly1305"
-
-import "crypto/subtle"
-
-// TagSize is the size, in bytes, of a poly1305 authenticator.
-const TagSize = 16
-
-// Verify returns true if mac is a valid authenticator for m with the given
-// key.
-func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
-	var tmp [16]byte
-	Sum(&tmp, m, key)
-	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
deleted file mode 100644
index 4dd72fe..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine
-
-package poly1305
-
-// This function is implemented in sum_amd64.s
-//go:noescape
-func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	var mPtr *byte
-	if len(m) > 0 {
-		mPtr = &m[0]
-	}
-	poly1305(out, mPtr, uint64(len(m)), key)
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.go b/vendor/golang.org/x/crypto/poly1305/sum_arm.go
deleted file mode 100644
index 5dc321c..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.go
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-package poly1305
-
-// This function is implemented in sum_arm.s
-//go:noescape
-func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	var mPtr *byte
-	if len(m) > 0 {
-		mPtr = &m[0]
-	}
-	poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_arm.s b/vendor/golang.org/x/crypto/poly1305/sum_arm.s
deleted file mode 100644
index f70b4ac..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.s
+++ /dev/null
@@ -1,427 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-#include "textflag.h"
-
-// This code was translated into a form compatible with 5a from the public
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
-
-DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
-DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
-DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
-DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
-DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
-GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
-
-// Warning: the linker may use R11 to synthesize certain instructions. Please
-// take care and verify that no synthetic instructions use it.
-
-TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
-	// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
-	// might look like it's only 60 bytes of space but the final four bytes
-	// will be written by another function.) We need to skip over four
-	// bytes of stack because that's saving the value of 'g'.
-	ADD       $4, R13, R8
-	MOVM.IB   [R4-R7], (R8)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVW      $·poly1305_init_constants_armv6<>(SB), R7
-	MOVW      R2, R8
-	MOVW      R2>>26, R9
-	MOVW      R3>>20, g
-	MOVW      R4>>14, R11
-	MOVW      R5>>8, R12
-	ORR       R3<<6, R9, R9
-	ORR       R4<<12, g, g
-	ORR       R5<<18, R11, R11
-	MOVM.IA   (R7), [R2-R6]
-	AND       R8, R2, R2
-	AND       R9, R3, R3
-	AND       g, R4, R4
-	AND       R11, R5, R5
-	AND       R12, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVM.IA   [R2-R6], (R0)
-	ADD       $20, R13, R0
-	MOVM.DA   (R0), [R4-R7]
-	RET
-
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
-	MOVBU (offset+0)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+0)(Rdst); \
-	MOVBU (offset+1)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+1)(Rdst); \
-	MOVBU (offset+2)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+2)(Rdst); \
-	MOVBU (offset+3)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+3)(Rdst)
-
-TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
-	// Needs 24 bytes of stack for saved registers and then 88 bytes of
-	// scratch space after that. We assume that 24 bytes at (R13) have
-	// already been used: four bytes for the link register saved in the
-	// prelude of poly1305_auth_armv6, four bytes for saving the value of g
-	// in that function and 16 bytes of scratch space used around
-	// poly1305_finish_ext_armv6_skip1.
-	ADD     $24, R13, R12
-	MOVM.IB [R4-R8, R14], (R12)
-	MOVW    R0, 88(R13)
-	MOVW    R1, 92(R13)
-	MOVW    R2, 96(R13)
-	MOVW    R1, R14
-	MOVW    R2, R12
-	MOVW    56(R0), R8
-	WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
-	EOR     R6, R6, R6
-	MOVW.EQ $(1<<24), R6
-	MOVW    R6, 84(R13)
-	ADD     $116, R13, g
-	MOVM.IA (R0), [R0-R9]
-	MOVM.IA [R0-R4], (g)
-	CMP     $16, R12
-	BLO     poly1305_blocks_armv6_done
-
-poly1305_blocks_armv6_mainloop:
-	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
-	BEQ     poly1305_blocks_armv6_mainloop_aligned
-	ADD     $100, R13, g
-	MOVW_UNALIGNED(R14, g, R0, 0)
-	MOVW_UNALIGNED(R14, g, R0, 4)
-	MOVW_UNALIGNED(R14, g, R0, 8)
-	MOVW_UNALIGNED(R14, g, R0, 12)
-	MOVM.IA (g), [R0-R3]
-	ADD     $16, R14
-	B       poly1305_blocks_armv6_mainloop_loaded
-
-poly1305_blocks_armv6_mainloop_aligned:
-	MOVM.IA.W (R14), [R0-R3]
-
-poly1305_blocks_armv6_mainloop_loaded:
-	MOVW    R0>>26, g
-	MOVW    R1>>20, R11
-	MOVW    R2>>14, R12
-	MOVW    R14, 92(R13)
-	MOVW    R3>>8, R4
-	ORR     R1<<6, g, g
-	ORR     R2<<12, R11, R11
-	ORR     R3<<18, R12, R12
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, g, g
-	MOVW    84(R13), R3
-	BIC     $0xfc000000, R11, R11
-	BIC     $0xfc000000, R12, R12
-	ADD     R0, R5, R5
-	ADD     g, R6, R6
-	ORR     R3, R4, R4
-	ADD     R11, R7, R7
-	ADD     $116, R13, R14
-	ADD     R12, R8, R8
-	ADD     R4, R9, R9
-	MOVM.IA (R14), [R0-R4]
-	MULLU   R4, R5, (R11, g)
-	MULLU   R3, R5, (R14, R12)
-	MULALU  R3, R6, (R11, g)
-	MULALU  R2, R6, (R14, R12)
-	MULALU  R2, R7, (R11, g)
-	MULALU  R1, R7, (R14, R12)
-	ADD     R4<<2, R4, R4
-	ADD     R3<<2, R3, R3
-	MULALU  R1, R8, (R11, g)
-	MULALU  R0, R8, (R14, R12)
-	MULALU  R0, R9, (R11, g)
-	MULALU  R4, R9, (R14, R12)
-	MOVW    g, 76(R13)
-	MOVW    R11, 80(R13)
-	MOVW    R12, 68(R13)
-	MOVW    R14, 72(R13)
-	MULLU   R2, R5, (R11, g)
-	MULLU   R1, R5, (R14, R12)
-	MULALU  R1, R6, (R11, g)
-	MULALU  R0, R6, (R14, R12)
-	MULALU  R0, R7, (R11, g)
-	MULALU  R4, R7, (R14, R12)
-	ADD     R2<<2, R2, R2
-	ADD     R1<<2, R1, R1
-	MULALU  R4, R8, (R11, g)
-	MULALU  R3, R8, (R14, R12)
-	MULALU  R3, R9, (R11, g)
-	MULALU  R2, R9, (R14, R12)
-	MOVW    g, 60(R13)
-	MOVW    R11, 64(R13)
-	MOVW    R12, 52(R13)
-	MOVW    R14, 56(R13)
-	MULLU   R0, R5, (R11, g)
-	MULALU  R4, R6, (R11, g)
-	MULALU  R3, R7, (R11, g)
-	MULALU  R2, R8, (R11, g)
-	MULALU  R1, R9, (R11, g)
-	ADD     $52, R13, R0
-	MOVM.IA (R0), [R0-R7]
-	MOVW    g>>26, R12
-	MOVW    R4>>26, R14
-	ORR     R11<<6, R12, R12
-	ORR     R5<<6, R14, R14
-	BIC     $0xfc000000, g, g
-	BIC     $0xfc000000, R4, R4
-	ADD.S   R12, R0, R0
-	ADC     $0, R1, R1
-	ADD.S   R14, R6, R6
-	ADC     $0, R7, R7
-	MOVW    R0>>26, R12
-	MOVW    R6>>26, R14
-	ORR     R1<<6, R12, R12
-	ORR     R7<<6, R14, R14
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, R6, R6
-	ADD     R14<<2, R14, R14
-	ADD.S   R12, R2, R2
-	ADC     $0, R3, R3
-	ADD     R14, g, g
-	MOVW    R2>>26, R12
-	MOVW    g>>26, R14
-	ORR     R3<<6, R12, R12
-	BIC     $0xfc000000, g, R5
-	BIC     $0xfc000000, R2, R7
-	ADD     R12, R4, R4
-	ADD     R14, R0, R0
-	MOVW    R4>>26, R12
-	BIC     $0xfc000000, R4, R8
-	ADD     R12, R6, R9
-	MOVW    96(R13), R12
-	MOVW    92(R13), R14
-	MOVW    R0, R6
-	CMP     $32, R12
-	SUB     $16, R12, R12
-	MOVW    R12, 96(R13)
-	BHS     poly1305_blocks_armv6_mainloop
-
-poly1305_blocks_armv6_done:
-	MOVW    88(R13), R12
-	MOVW    R5, 20(R12)
-	MOVW    R6, 24(R12)
-	MOVW    R7, 28(R12)
-	MOVW    R8, 32(R12)
-	MOVW    R9, 36(R12)
-	ADD     $48, R13, R0
-	MOVM.DA (R0), [R4-R8, R14]
-	RET
-
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst); \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst)
-
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
-
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
-TEXT ·poly1305_auth_armv6(SB), $196-16
-	// The value 196, just above, is the sum of 64 (the size of the context
-	// structure) and 132 (the amount of stack needed).
-	//
-	// At this point, the stack pointer (R13) has been moved down. It
-	// points to the saved link register and there's 196 bytes of free
-	// space above it.
-	//
-	// The stack for this function looks like:
-	//
-	// +---------------------
-	// |
-	// | 64 bytes of context structure
-	// |
-	// +---------------------
-	// |
-	// | 112 bytes for poly1305_blocks_armv6
-	// |
-	// +---------------------
-	// | 16 bytes of final block, constructed at
-	// | poly1305_finish_ext_armv6_skip8
-	// +---------------------
-	// | four bytes of saved 'g'
-	// +---------------------
-	// | lr, saved by prelude    <- R13 points here
-	// +---------------------
-	MOVW g, 4(R13)
-
-	MOVW out+0(FP), R4
-	MOVW m+4(FP), R5
-	MOVW mlen+8(FP), R6
-	MOVW key+12(FP), R7
-
-	ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
-	MOVW R7, R1
-
-	// poly1305_init_ext_armv6 will write to the stack from R13+4, but
-	// that's ok because none of the other values have been written yet.
-	BL    poly1305_init_ext_armv6<>(SB)
-	BIC.S $15, R6, R2
-	BEQ   poly1305_auth_armv6_noblocks
-	ADD   $136, R13, R0
-	MOVW  R5, R1
-	ADD   R2, R5, R5
-	SUB   R2, R6, R6
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_auth_armv6_noblocks:
-	ADD  $136, R13, R0
-	MOVW R5, R1
-	MOVW R6, R2
-	MOVW R4, R3
-
-	MOVW  R0, R5
-	MOVW  R1, R6
-	MOVW  R2, R7
-	MOVW  R3, R8
-	AND.S R2, R2, R2
-	BEQ   poly1305_finish_ext_armv6_noremaining
-	EOR   R0, R0
-	ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
-	MOVW  R0, (R9)
-	MOVW  R0, 4(R9)
-	MOVW  R0, 8(R9)
-	MOVW  R0, 12(R9)
-	WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_aligned
-	WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_skip8
-	MOVWP_UNALIGNED(R1, R9, g)
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip8:
-	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip4
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip4:
-	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip2
-	MOVHUP_UNALIGNED(R1, R9, g)
-	B    poly1305_finish_ext_armv6_skip2
-
-poly1305_finish_ext_armv6_aligned:
-	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
-	BEQ       poly1305_finish_ext_armv6_skip8_aligned
-	MOVM.IA.W (R1), [g-R11]
-	MOVM.IA.W [g-R11], (R9)
-
-poly1305_finish_ext_armv6_skip8_aligned:
-	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
-	BEQ    poly1305_finish_ext_armv6_skip4_aligned
-	MOVW.P 4(R1), g
-	MOVW.P g, 4(R9)
-
-poly1305_finish_ext_armv6_skip4_aligned:
-	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip2
-	MOVHU.P 2(R1), g
-	MOVH.P  g, 2(R9)
-
-poly1305_finish_ext_armv6_skip2:
-	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip1
-	MOVBU.P 1(R1), g
-	MOVBU.P g, 1(R9)
-
-poly1305_finish_ext_armv6_skip1:
-	MOVW  $1, R11
-	MOVBU R11, 0(R9)
-	MOVW  R11, 56(R5)
-	MOVW  R5, R0
-	ADD   $8, R13, R1
-	MOVW  $16, R2
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_finish_ext_armv6_noremaining:
-	MOVW      20(R5), R0
-	MOVW      24(R5), R1
-	MOVW      28(R5), R2
-	MOVW      32(R5), R3
-	MOVW      36(R5), R4
-	MOVW      R4>>26, R12
-	BIC       $0xfc000000, R4, R4
-	ADD       R12<<2, R12, R12
-	ADD       R12, R0, R0
-	MOVW      R0>>26, R12
-	BIC       $0xfc000000, R0, R0
-	ADD       R12, R1, R1
-	MOVW      R1>>26, R12
-	BIC       $0xfc000000, R1, R1
-	ADD       R12, R2, R2
-	MOVW      R2>>26, R12
-	BIC       $0xfc000000, R2, R2
-	ADD       R12, R3, R3
-	MOVW      R3>>26, R12
-	BIC       $0xfc000000, R3, R3
-	ADD       R12, R4, R4
-	ADD       $5, R0, R6
-	MOVW      R6>>26, R12
-	BIC       $0xfc000000, R6, R6
-	ADD       R12, R1, R7
-	MOVW      R7>>26, R12
-	BIC       $0xfc000000, R7, R7
-	ADD       R12, R2, g
-	MOVW      g>>26, R12
-	BIC       $0xfc000000, g, g
-	ADD       R12, R3, R11
-	MOVW      $-(1<<26), R12
-	ADD       R11>>26, R12, R12
-	BIC       $0xfc000000, R11, R11
-	ADD       R12, R4, R9
-	MOVW      R9>>31, R12
-	SUB       $1, R12
-	AND       R12, R6, R6
-	AND       R12, R7, R7
-	AND       R12, g, g
-	AND       R12, R11, R11
-	AND       R12, R9, R9
-	MVN       R12, R12
-	AND       R12, R0, R0
-	AND       R12, R1, R1
-	AND       R12, R2, R2
-	AND       R12, R3, R3
-	AND       R12, R4, R4
-	ORR       R6, R0, R0
-	ORR       R7, R1, R1
-	ORR       g, R2, R2
-	ORR       R11, R3, R3
-	ORR       R9, R4, R4
-	ORR       R1<<26, R0, R0
-	MOVW      R1>>6, R1
-	ORR       R2<<20, R1, R1
-	MOVW      R2>>12, R2
-	ORR       R3<<14, R2, R2
-	MOVW      R3>>18, R3
-	ORR       R4<<8, R3, R3
-	MOVW      40(R5), R6
-	MOVW      44(R5), R7
-	MOVW      48(R5), g
-	MOVW      52(R5), R11
-	ADD.S     R6, R0, R0
-	ADC.S     R7, R1, R1
-	ADC.S     g, R2, R2
-	ADC.S     R11, R3, R3
-	MOVM.IA   [R0-R3], (R8)
-	MOVW      R5, R12
-	EOR       R0, R0, R0
-	EOR       R1, R1, R1
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	EOR       R7, R7, R7
-	MOVM.IA.W [R0-R7], (R12)
-	MOVM.IA   [R0-R7], (R12)
-	MOVW      4(R13), g
-	RET
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
deleted file mode 100644
index 751eec5..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,!go1.11 !arm,!amd64,!s390x gccgo appengine nacl
-
-package poly1305
-
-// Sum generates an authenticator for msg using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
-	sumGeneric(out, msg, key)
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_ref.go b/vendor/golang.org/x/crypto/poly1305/sum_ref.go
deleted file mode 100644
index c4d59bd..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_ref.go
+++ /dev/null
@@ -1,139 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package poly1305
-
-import "encoding/binary"
-
-// sumGeneric generates an authenticator for msg using a one-time key and
-// puts the 16-byte result into out. This is the generic implementation of
-// Sum and should be called if no assembly implementation is available.
-func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
-	var (
-		h0, h1, h2, h3, h4 uint32 // the hash accumulators
-		r0, r1, r2, r3, r4 uint64 // the r part of the key
-	)
-
-	r0 = uint64(binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff)
-	r1 = uint64((binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03)
-	r2 = uint64((binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff)
-	r3 = uint64((binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff)
-	r4 = uint64((binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff)
-
-	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
-
-	for len(msg) >= TagSize {
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | (1 << 24)
-
-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
-
-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
-
-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
-
-		msg = msg[TagSize:]
-	}
-
-	if len(msg) > 0 {
-		var block [TagSize]byte
-		off := copy(block[:], msg)
-		block[off] = 0x01
-
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(block[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(block[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(block[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(block[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(block[12:]) >> 8)
-
-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
-
-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
-
-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
-	}
-
-	// h %= p reduction
-	h2 += h1 >> 26
-	h1 &= 0x3ffffff
-	h3 += h2 >> 26
-	h2 &= 0x3ffffff
-	h4 += h3 >> 26
-	h3 &= 0x3ffffff
-	h0 += 5 * (h4 >> 26)
-	h4 &= 0x3ffffff
-	h1 += h0 >> 26
-	h0 &= 0x3ffffff
-
-	// h - p
-	t0 := h0 + 5
-	t1 := h1 + (t0 >> 26)
-	t2 := h2 + (t1 >> 26)
-	t3 := h3 + (t2 >> 26)
-	t4 := h4 + (t3 >> 26) - (1 << 26)
-	t0 &= 0x3ffffff
-	t1 &= 0x3ffffff
-	t2 &= 0x3ffffff
-	t3 &= 0x3ffffff
-
-	// select h if h < p else h - p
-	t_mask := (t4 >> 31) - 1
-	h_mask := ^t_mask
-	h0 = (h0 & h_mask) | (t0 & t_mask)
-	h1 = (h1 & h_mask) | (t1 & t_mask)
-	h2 = (h2 & h_mask) | (t2 & t_mask)
-	h3 = (h3 & h_mask) | (t3 & t_mask)
-	h4 = (h4 & h_mask) | (t4 & t_mask)
-
-	// h %= 2^128
-	h0 |= h1 << 26
-	h1 = ((h1 >> 6) | (h2 << 20))
-	h2 = ((h2 >> 12) | (h3 << 14))
-	h3 = ((h3 >> 18) | (h4 << 8))
-
-	// s: the s part of the key
-	// tag = (h + s) % (2^128)
-	t := uint64(h0) + uint64(binary.LittleEndian.Uint32(key[16:]))
-	h0 = uint32(t)
-	t = uint64(h1) + uint64(binary.LittleEndian.Uint32(key[20:])) + (t >> 32)
-	h1 = uint32(t)
-	t = uint64(h2) + uint64(binary.LittleEndian.Uint32(key[24:])) + (t >> 32)
-	h2 = uint32(t)
-	t = uint64(h3) + uint64(binary.LittleEndian.Uint32(key[28:])) + (t >> 32)
-	h3 = uint32(t)
-
-	binary.LittleEndian.PutUint32(out[0:], h0)
-	binary.LittleEndian.PutUint32(out[4:], h1)
-	binary.LittleEndian.PutUint32(out[8:], h2)
-	binary.LittleEndian.PutUint32(out[12:], h3)
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
deleted file mode 100644
index 7a266ce..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,go1.11,!gccgo,!appengine
-
-package poly1305
-
-// hasVectorFacility reports whether the machine supports
-// the vector facility (vx).
-func hasVectorFacility() bool
-
-// hasVMSLFacility reports whether the machine supports
-// Vector Multiply Sum Logical (VMSL).
-func hasVMSLFacility() bool
-
-var hasVX = hasVectorFacility()
-var hasVMSL = hasVMSLFacility()
-
-// poly1305vx is an assembly implementation of Poly1305 that uses vector
-// instructions. It must only be called if the vector facility (vx) is
-// available.
-//go:noescape
-func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
-
-// poly1305vmsl is an assembly implementation of Poly1305 that uses vector
-// instructions, including VMSL. It must only be called if the vector facility (vx) is
-// available and if VMSL is supported.
-//go:noescape
-func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	if hasVX {
-		var mPtr *byte
-		if len(m) > 0 {
-			mPtr = &m[0]
-		}
-		if hasVMSL && len(m) > 256 {
-			poly1305vmsl(out, mPtr, uint64(len(m)), key)
-		} else {
-			poly1305vx(out, mPtr, uint64(len(m)), key)
-		}
-	} else {
-		sumGeneric(out, m, key)
-	}
-}
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s b/vendor/golang.org/x/crypto/poly1305/sum_s390x.s
deleted file mode 100644
index 356c07a..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s
+++ /dev/null
@@ -1,400 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,go1.11,!gccgo,!appengine
-
-#include "textflag.h"
-
-// Implementation of Poly1305 using the vector facility (vx).
-
-// constants
-#define MOD26 V0
-#define EX0   V1
-#define EX1   V2
-#define EX2   V3
-
-// temporaries
-#define T_0 V4
-#define T_1 V5
-#define T_2 V6
-#define T_3 V7
-#define T_4 V8
-
-// key (r)
-#define R_0  V9
-#define R_1  V10
-#define R_2  V11
-#define R_3  V12
-#define R_4  V13
-#define R5_1 V14
-#define R5_2 V15
-#define R5_3 V16
-#define R5_4 V17
-#define RSAVE_0 R5
-#define RSAVE_1 R6
-#define RSAVE_2 R7
-#define RSAVE_3 R8
-#define RSAVE_4 R9
-#define R5SAVE_1 V28
-#define R5SAVE_2 V29
-#define R5SAVE_3 V30
-#define R5SAVE_4 V31
-
-// message block
-#define F_0 V18
-#define F_1 V19
-#define F_2 V20
-#define F_3 V21
-#define F_4 V22
-
-// accumulator
-#define H_0 V23
-#define H_1 V24
-#define H_2 V25
-#define H_3 V26
-#define H_4 V27
-
-GLOBL ·keyMask<>(SB), RODATA, $16
-DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
-DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
-
-GLOBL ·bswapMask<>(SB), RODATA, $16
-DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
-DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
-
-GLOBL ·constants<>(SB), RODATA, $64
-// MOD26
-DATA ·constants<>+0(SB)/8, $0x3ffffff
-DATA ·constants<>+8(SB)/8, $0x3ffffff
-// EX0
-DATA ·constants<>+16(SB)/8, $0x0006050403020100
-DATA ·constants<>+24(SB)/8, $0x1016151413121110
-// EX1
-DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706
-DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716
-// EX2
-DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d
-DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d
-
-// h = (f*g) % (2**130-5) [partial reduction]
-#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \
-	VMLOF  f0, g0, h0        \
-	VMLOF  f0, g1, h1        \
-	VMLOF  f0, g2, h2        \
-	VMLOF  f0, g3, h3        \
-	VMLOF  f0, g4, h4        \
-	VMLOF  f1, g54, T_0      \
-	VMLOF  f1, g0, T_1       \
-	VMLOF  f1, g1, T_2       \
-	VMLOF  f1, g2, T_3       \
-	VMLOF  f1, g3, T_4       \
-	VMALOF f2, g53, h0, h0   \
-	VMALOF f2, g54, h1, h1   \
-	VMALOF f2, g0, h2, h2    \
-	VMALOF f2, g1, h3, h3    \
-	VMALOF f2, g2, h4, h4    \
-	VMALOF f3, g52, T_0, T_0 \
-	VMALOF f3, g53, T_1, T_1 \
-	VMALOF f3, g54, T_2, T_2 \
-	VMALOF f3, g0, T_3, T_3  \
-	VMALOF f3, g1, T_4, T_4  \
-	VMALOF f4, g51, h0, h0   \
-	VMALOF f4, g52, h1, h1   \
-	VMALOF f4, g53, h2, h2   \
-	VMALOF f4, g54, h3, h3   \
-	VMALOF f4, g0, h4, h4    \
-	VAG    T_0, h0, h0       \
-	VAG    T_1, h1, h1       \
-	VAG    T_2, h2, h2       \
-	VAG    T_3, h3, h3       \
-	VAG    T_4, h4, h4
-
-// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4
-#define REDUCE(h0, h1, h2, h3, h4) \
-	VESRLG $26, h0, T_0  \
-	VESRLG $26, h3, T_1  \
-	VN     MOD26, h0, h0 \
-	VN     MOD26, h3, h3 \
-	VAG    T_0, h1, h1   \
-	VAG    T_1, h4, h4   \
-	VESRLG $26, h1, T_2  \
-	VESRLG $26, h4, T_3  \
-	VN     MOD26, h1, h1 \
-	VN     MOD26, h4, h4 \
-	VESLG  $2, T_3, T_4  \
-	VAG    T_3, T_4, T_4 \
-	VAG    T_2, h2, h2   \
-	VAG    T_4, h0, h0   \
-	VESRLG $26, h2, T_0  \
-	VESRLG $26, h0, T_1  \
-	VN     MOD26, h2, h2 \
-	VN     MOD26, h0, h0 \
-	VAG    T_0, h3, h3   \
-	VAG    T_1, h1, h1   \
-	VESRLG $26, h3, T_2  \
-	VN     MOD26, h3, h3 \
-	VAG    T_2, h4, h4
-
-// expand in0 into d[0] and in1 into d[1]
-#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \
-	VGBM   $0x0707, d1       \ // d1=tmp
-	VPERM  in0, in1, EX2, d4 \
-	VPERM  in0, in1, EX0, d0 \
-	VPERM  in0, in1, EX1, d2 \
-	VN     d1, d4, d4        \
-	VESRLG $26, d0, d1       \
-	VESRLG $30, d2, d3       \
-	VESRLG $4, d2, d2        \
-	VN     MOD26, d0, d0     \
-	VN     MOD26, d1, d1     \
-	VN     MOD26, d2, d2     \
-	VN     MOD26, d3, d3
-
-// pack h4:h0 into h1:h0 (no carry)
-#define PACK(h0, h1, h2, h3, h4) \
-	VESLG $26, h1, h1  \
-	VESLG $26, h3, h3  \
-	VO    h0, h1, h0   \
-	VO    h2, h3, h2   \
-	VESLG $4, h2, h2   \
-	VLEIB $7, $48, h1  \
-	VSLB  h1, h2, h2   \
-	VO    h0, h2, h0   \
-	VLEIB $7, $104, h1 \
-	VSLB  h1, h4, h3   \
-	VO    h3, h0, h0   \
-	VLEIB $7, $24, h1  \
-	VSRLB h1, h4, h1
-
-// if h > 2**130-5 then h -= 2**130-5
-#define MOD(h0, h1, t0, t1, t2) \
-	VZERO t0          \
-	VLEIG $1, $5, t0  \
-	VACCQ h0, t0, t1  \
-	VAQ   h0, t0, t0  \
-	VONE  t2          \
-	VLEIG $1, $-4, t2 \
-	VAQ   t2, t1, t1  \
-	VACCQ h1, t1, t1  \
-	VONE  t2          \
-	VAQ   t2, t1, t1  \
-	VN    h0, t1, t2  \
-	VNC   t0, t1, t1  \
-	VO    t1, t2, h0
-
-// func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305vx(SB), $0-32
-	// This code processes up to 2 blocks (32 bytes) per iteration
-	// using the algorithm described in:
-	// NEON crypto, Daniel J. Bernstein & Peter Schwabe
-	// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
-	LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
-
-	// load MOD26, EX0, EX1 and EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), MOD26, EX2
-
-	// setup r
-	VL   (R4), T_0
-	MOVD $·keyMask<>(SB), R6
-	VL   (R6), T_1
-	VN   T_0, T_1, T_0
-	EXPAND(T_0, T_0, R_0, R_1, R_2, R_3, R_4)
-
-	// setup r*5
-	VLEIG $0, $5, T_0
-	VLEIG $1, $5, T_0
-
-	// store r (for final block)
-	VMLOF T_0, R_1, R5SAVE_1
-	VMLOF T_0, R_2, R5SAVE_2
-	VMLOF T_0, R_3, R5SAVE_3
-	VMLOF T_0, R_4, R5SAVE_4
-	VLGVG $0, R_0, RSAVE_0
-	VLGVG $0, R_1, RSAVE_1
-	VLGVG $0, R_2, RSAVE_2
-	VLGVG $0, R_3, RSAVE_3
-	VLGVG $0, R_4, RSAVE_4
-
-	// skip r**2 calculation
-	CMPBLE R3, $16, skip
-
-	// calculate r**2
-	MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5SAVE_1, R5SAVE_2, R5SAVE_3, R5SAVE_4, H_0, H_1, H_2, H_3, H_4)
-	REDUCE(H_0, H_1, H_2, H_3, H_4)
-	VLEIG $0, $5, T_0
-	VLEIG $1, $5, T_0
-	VMLOF T_0, H_1, R5_1
-	VMLOF T_0, H_2, R5_2
-	VMLOF T_0, H_3, R5_3
-	VMLOF T_0, H_4, R5_4
-	VLR   H_0, R_0
-	VLR   H_1, R_1
-	VLR   H_2, R_2
-	VLR   H_3, R_3
-	VLR   H_4, R_4
-
-	// initialize h
-	VZERO H_0
-	VZERO H_1
-	VZERO H_2
-	VZERO H_3
-	VZERO H_4
-
-loop:
-	CMPBLE R3, $32, b2
-	VLM    (R2), T_0, T_1
-	SUB    $32, R3
-	MOVD   $32(R2), R2
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
-	VLEIB  $4, $1, F_4
-	VLEIB  $12, $1, F_4
-
-multiply:
-	VAG    H_0, F_0, F_0
-	VAG    H_1, F_1, F_1
-	VAG    H_2, F_2, F_2
-	VAG    H_3, F_3, F_3
-	VAG    H_4, F_4, F_4
-	MULTIPLY(F_0, F_1, F_2, F_3, F_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4)
-	REDUCE(H_0, H_1, H_2, H_3, H_4)
-	CMPBNE R3, $0, loop
-
-finish:
-	// sum vectors
-	VZERO  T_0
-	VSUMQG H_0, T_0, H_0
-	VSUMQG H_1, T_0, H_1
-	VSUMQG H_2, T_0, H_2
-	VSUMQG H_3, T_0, H_3
-	VSUMQG H_4, T_0, H_4
-
-	// h may be >= 2*(2**130-5) so we need to reduce it again
-	REDUCE(H_0, H_1, H_2, H_3, H_4)
-
-	// carry h1->h4
-	VESRLG $26, H_1, T_1
-	VN     MOD26, H_1, H_1
-	VAQ    T_1, H_2, H_2
-	VESRLG $26, H_2, T_2
-	VN     MOD26, H_2, H_2
-	VAQ    T_2, H_3, H_3
-	VESRLG $26, H_3, T_3
-	VN     MOD26, H_3, H_3
-	VAQ    T_3, H_4, H_4
-
-	// h is now < 2*(2**130-5)
-	// pack h into h1 (hi) and h0 (lo)
-	PACK(H_0, H_1, H_2, H_3, H_4)
-
-	// if h > 2**130-5 then h -= 2**130-5
-	MOD(H_0, H_1, T_0, T_1, T_2)
-
-	// h += s
-	MOVD  $·bswapMask<>(SB), R5
-	VL    (R5), T_1
-	VL    16(R4), T_0
-	VPERM T_0, T_0, T_1, T_0    // reverse bytes (to big)
-	VAQ   T_0, H_0, H_0
-	VPERM H_0, H_0, T_1, H_0    // reverse bytes (to little)
-	VST   H_0, (R1)
-
-	RET
-
-b2:
-	CMPBLE R3, $16, b1
-
-	// 2 blocks remaining
-	SUB    $17, R3
-	VL     (R2), T_0
-	VLL    R3, 16(R2), T_1
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, T_1
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $12, $1, F_4
-	VLEIB  $4, $1, F_4
-
-	// setup [r²,r]
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, RSAVE_3, R_3
-	VLVGG $1, RSAVE_4, R_4
-	VPDI  $0, R5_1, R5SAVE_1, R5_1
-	VPDI  $0, R5_2, R5SAVE_2, R5_2
-	VPDI  $0, R5_3, R5SAVE_3, R5_3
-	VPDI  $0, R5_4, R5SAVE_4, R5_4
-
-	MOVD $0, R3
-	BR   multiply
-
-skip:
-	VZERO H_0
-	VZERO H_1
-	VZERO H_2
-	VZERO H_3
-	VZERO H_4
-
-	CMPBEQ R3, $0, finish
-
-b1:
-	// 1 block remaining
-	SUB    $1, R3
-	VLL    R3, (R2), T_0
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, T_0
-	VZERO  T_1
-	EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4)
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $4, $1, F_4
-	VLEIG  $1, $1, R_0
-	VZERO  R_1
-	VZERO  R_2
-	VZERO  R_3
-	VZERO  R_4
-	VZERO  R5_1
-	VZERO  R5_2
-	VZERO  R5_3
-	VZERO  R5_4
-
-	// setup [r, 1]
-	VLVGG $0, RSAVE_0, R_0
-	VLVGG $0, RSAVE_1, R_1
-	VLVGG $0, RSAVE_2, R_2
-	VLVGG $0, RSAVE_3, R_3
-	VLVGG $0, RSAVE_4, R_4
-	VPDI  $0, R5SAVE_1, R5_1, R5_1
-	VPDI  $0, R5SAVE_2, R5_2, R5_2
-	VPDI  $0, R5SAVE_3, R5_3, R5_3
-	VPDI  $0, R5SAVE_4, R5_4, R5_4
-
-	MOVD $0, R3
-	BR   multiply
-
-TEXT ·hasVectorFacility(SB), NOSPLIT, $24-1
-	MOVD  $x-24(SP), R1
-	XC    $24, 0(R1), 0(R1) // clear the storage
-	MOVD  $2, R0            // R0 is the number of double words stored -1
-	WORD  $0xB2B01000       // STFLE 0(R1)
-	XOR   R0, R0            // reset the value of R0
-	MOVBZ z-8(SP), R1
-	AND   $0x40, R1
-	BEQ   novector
-
-vectorinstalled:
-	// check if the vector instruction has been enabled
-	VLEIB  $0, $0xF, V16
-	VLGVB  $0, V16, R1
-	CMPBNE R1, $0xF, novector
-	MOVB   $1, ret+0(FP)      // have vx
-	RET
-
-novector:
-	MOVB $0, ret+0(FP) // no vx
-	RET
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s b/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s
deleted file mode 100644
index e548020..0000000
--- a/vendor/golang.org/x/crypto/poly1305/sum_vmsl_s390x.s
+++ /dev/null
@@ -1,931 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,go1.11,!gccgo,!appengine
-
-#include "textflag.h"
-
-// Implementation of Poly1305 using the vector facility (vx) and the VMSL instruction.
-
-// constants
-#define EX0   V1
-#define EX1   V2
-#define EX2   V3
-
-// temporaries
-#define T_0 V4
-#define T_1 V5
-#define T_2 V6
-#define T_3 V7
-#define T_4 V8
-#define T_5 V9
-#define T_6 V10
-#define T_7 V11
-#define T_8 V12
-#define T_9 V13
-#define T_10 V14
-
-// r**2 & r**4
-#define R_0  V15
-#define R_1  V16
-#define R_2  V17
-#define R5_1 V18
-#define R5_2 V19
-// key (r)
-#define RSAVE_0 R7
-#define RSAVE_1 R8
-#define RSAVE_2 R9
-#define R5SAVE_1 R10
-#define R5SAVE_2 R11
-
-// message block
-#define M0 V20
-#define M1 V21
-#define M2 V22
-#define M3 V23
-#define M4 V24
-#define M5 V25
-
-// accumulator
-#define H0_0 V26
-#define H1_0 V27
-#define H2_0 V28
-#define H0_1 V29
-#define H1_1 V30
-#define H2_1 V31
-
-GLOBL ·keyMask<>(SB), RODATA, $16
-DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f
-DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f
-
-GLOBL ·bswapMask<>(SB), RODATA, $16
-DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908
-DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100
-
-GLOBL ·constants<>(SB), RODATA, $48
-// EX0
-DATA ·constants<>+0(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+8(SB)/8, $0x0000050403020100
-// EX1
-DATA ·constants<>+16(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+24(SB)/8, $0x00000a0908070605
-// EX2
-DATA ·constants<>+32(SB)/8, $0x18191a1b1c1d1e1f
-DATA ·constants<>+40(SB)/8, $0x0000000f0e0d0c0b
-
-GLOBL ·c<>(SB), RODATA, $48
-// EX0
-DATA ·c<>+0(SB)/8, $0x0000050403020100
-DATA ·c<>+8(SB)/8, $0x0000151413121110
-// EX1
-DATA ·c<>+16(SB)/8, $0x00000a0908070605
-DATA ·c<>+24(SB)/8, $0x00001a1918171615
-// EX2
-DATA ·c<>+32(SB)/8, $0x0000000f0e0d0c0b
-DATA ·c<>+40(SB)/8, $0x0000001f1e1d1c1b
-
-GLOBL ·reduce<>(SB), RODATA, $32
-// 44 bit
-DATA ·reduce<>+0(SB)/8, $0x0
-DATA ·reduce<>+8(SB)/8, $0xfffffffffff
-// 42 bit
-DATA ·reduce<>+16(SB)/8, $0x0
-DATA ·reduce<>+24(SB)/8, $0x3ffffffffff
-
-// h = (f*g) % (2**130-5) [partial reduction]
-// uses T_0...T_9 temporary registers
-// input: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
-// output: m02_0, m02_1, m02_2, m13_0, m13_1, m13_2
-#define MULTIPLY(m02_0, m02_1, m02_2, m13_0, m13_1, m13_2, r_0, r_1, r_2, r5_1, r5_2, m4_0, m4_1, m4_2, m5_0, m5_1, m5_2, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) \
-	\ // Eliminate the dependency for the last 2 VMSLs
-	VMSLG m02_0, r_2, m4_2, m4_2                       \
-	VMSLG m13_0, r_2, m5_2, m5_2                       \ // 8 VMSLs pipelined
-	VMSLG m02_0, r_0, m4_0, m4_0                       \
-	VMSLG m02_1, r5_2, V0, T_0                         \
-	VMSLG m02_0, r_1, m4_1, m4_1                       \
-	VMSLG m02_1, r_0, V0, T_1                          \
-	VMSLG m02_1, r_1, V0, T_2                          \
-	VMSLG m02_2, r5_1, V0, T_3                         \
-	VMSLG m02_2, r5_2, V0, T_4                         \
-	VMSLG m13_0, r_0, m5_0, m5_0                       \
-	VMSLG m13_1, r5_2, V0, T_5                         \
-	VMSLG m13_0, r_1, m5_1, m5_1                       \
-	VMSLG m13_1, r_0, V0, T_6                          \
-	VMSLG m13_1, r_1, V0, T_7                          \
-	VMSLG m13_2, r5_1, V0, T_8                         \
-	VMSLG m13_2, r5_2, V0, T_9                         \
-	VMSLG m02_2, r_0, m4_2, m4_2                       \
-	VMSLG m13_2, r_0, m5_2, m5_2                       \
-	VAQ   m4_0, T_0, m02_0                             \
-	VAQ   m4_1, T_1, m02_1                             \
-	VAQ   m5_0, T_5, m13_0                             \
-	VAQ   m5_1, T_6, m13_1                             \
-	VAQ   m02_0, T_3, m02_0                            \
-	VAQ   m02_1, T_4, m02_1                            \
-	VAQ   m13_0, T_8, m13_0                            \
-	VAQ   m13_1, T_9, m13_1                            \
-	VAQ   m4_2, T_2, m02_2                             \
-	VAQ   m5_2, T_7, m13_2                             \
-
-// SQUARE uses three limbs of r and r_2*5 to output square of r
-// uses T_1, T_5 and T_7 temporary registers
-// input: r_0, r_1, r_2, r5_2
-// temp: TEMP0, TEMP1, TEMP2
-// output: p0, p1, p2
-#define SQUARE(r_0, r_1, r_2, r5_2, p0, p1, p2, TEMP0, TEMP1, TEMP2) \
-	VMSLG r_0, r_0, p0, p0     \
-	VMSLG r_1, r5_2, V0, TEMP0 \
-	VMSLG r_2, r5_2, p1, p1    \
-	VMSLG r_0, r_1, V0, TEMP1  \
-	VMSLG r_1, r_1, p2, p2     \
-	VMSLG r_0, r_2, V0, TEMP2  \
-	VAQ   TEMP0, p0, p0        \
-	VAQ   TEMP1, p1, p1        \
-	VAQ   TEMP2, p2, p2        \
-	VAQ   TEMP0, p0, p0        \
-	VAQ   TEMP1, p1, p1        \
-	VAQ   TEMP2, p2, p2        \
-
-// carry h0->h1->h2->h0 || h3->h4->h5->h3
-// uses T_2, T_4, T_5, T_7, T_8, T_9
-//       t6,  t7,  t8,  t9, t10, t11
-// input: h0, h1, h2, h3, h4, h5
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11
-// output: h0, h1, h2, h3, h4, h5
-#define REDUCE(h0, h1, h2, h3, h4, h5, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11) \
-	VLM    (R12), t6, t7  \ // 44 and 42 bit clear mask
-	VLEIB  $7, $0x28, t10 \ // 5 byte shift mask
-	VREPIB $4, t8         \ // 4 bit shift mask
-	VREPIB $2, t11        \ // 2 bit shift mask
-	VSRLB  t10, h0, t0    \ // h0 byte shift
-	VSRLB  t10, h1, t1    \ // h1 byte shift
-	VSRLB  t10, h2, t2    \ // h2 byte shift
-	VSRLB  t10, h3, t3    \ // h3 byte shift
-	VSRLB  t10, h4, t4    \ // h4 byte shift
-	VSRLB  t10, h5, t5    \ // h5 byte shift
-	VSRL   t8, t0, t0     \ // h0 bit shift
-	VSRL   t8, t1, t1     \ // h2 bit shift
-	VSRL   t11, t2, t2    \ // h2 bit shift
-	VSRL   t8, t3, t3     \ // h3 bit shift
-	VSRL   t8, t4, t4     \ // h4 bit shift
-	VESLG  $2, t2, t9     \ // h2 carry x5
-	VSRL   t11, t5, t5    \ // h5 bit shift
-	VN     t6, h0, h0     \ // h0 clear carry
-	VAQ    t2, t9, t2     \ // h2 carry x5
-	VESLG  $2, t5, t9     \ // h5 carry x5
-	VN     t6, h1, h1     \ // h1 clear carry
-	VN     t7, h2, h2     \ // h2 clear carry
-	VAQ    t5, t9, t5     \ // h5 carry x5
-	VN     t6, h3, h3     \ // h3 clear carry
-	VN     t6, h4, h4     \ // h4 clear carry
-	VN     t7, h5, h5     \ // h5 clear carry
-	VAQ    t0, h1, h1     \ // h0->h1
-	VAQ    t3, h4, h4     \ // h3->h4
-	VAQ    t1, h2, h2     \ // h1->h2
-	VAQ    t4, h5, h5     \ // h4->h5
-	VAQ    t2, h0, h0     \ // h2->h0
-	VAQ    t5, h3, h3     \ // h5->h3
-	VREPG  $1, t6, t6     \ // 44 and 42 bit masks across both halves
-	VREPG  $1, t7, t7     \
-	VSLDB  $8, h0, h0, h0 \ // set up [h0/1/2, h3/4/5]
-	VSLDB  $8, h1, h1, h1 \
-	VSLDB  $8, h2, h2, h2 \
-	VO     h0, h3, h3     \
-	VO     h1, h4, h4     \
-	VO     h2, h5, h5     \
-	VESRLG $44, h3, t0    \ // 44 bit shift right
-	VESRLG $44, h4, t1    \
-	VESRLG $42, h5, t2    \
-	VN     t6, h3, h3     \ // clear carry bits
-	VN     t6, h4, h4     \
-	VN     t7, h5, h5     \
-	VESLG  $2, t2, t9     \ // multiply carry by 5
-	VAQ    t9, t2, t2     \
-	VAQ    t0, h4, h4     \
-	VAQ    t1, h5, h5     \
-	VAQ    t2, h3, h3     \
-
-// carry h0->h1->h2->h0
-// input: h0, h1, h2
-// temp: t0, t1, t2, t3, t4, t5, t6, t7, t8
-// output: h0, h1, h2
-#define REDUCE2(h0, h1, h2, t0, t1, t2, t3, t4, t5, t6, t7, t8) \
-	VLEIB  $7, $0x28, t3 \ // 5 byte shift mask
-	VREPIB $4, t4        \ // 4 bit shift mask
-	VREPIB $2, t7        \ // 2 bit shift mask
-	VGBM   $0x003F, t5   \ // mask to clear carry bits
-	VSRLB  t3, h0, t0    \
-	VSRLB  t3, h1, t1    \
-	VSRLB  t3, h2, t2    \
-	VESRLG $4, t5, t5    \ // 44 bit clear mask
-	VSRL   t4, t0, t0    \
-	VSRL   t4, t1, t1    \
-	VSRL   t7, t2, t2    \
-	VESRLG $2, t5, t6    \ // 42 bit clear mask
-	VESLG  $2, t2, t8    \
-	VAQ    t8, t2, t2    \
-	VN     t5, h0, h0    \
-	VN     t5, h1, h1    \
-	VN     t6, h2, h2    \
-	VAQ    t0, h1, h1    \
-	VAQ    t1, h2, h2    \
-	VAQ    t2, h0, h0    \
-	VSRLB  t3, h0, t0    \
-	VSRLB  t3, h1, t1    \
-	VSRLB  t3, h2, t2    \
-	VSRL   t4, t0, t0    \
-	VSRL   t4, t1, t1    \
-	VSRL   t7, t2, t2    \
-	VN     t5, h0, h0    \
-	VN     t5, h1, h1    \
-	VESLG  $2, t2, t8    \
-	VN     t6, h2, h2    \
-	VAQ    t0, h1, h1    \
-	VAQ    t8, t2, t2    \
-	VAQ    t1, h2, h2    \
-	VAQ    t2, h0, h0    \
-
-// expands two message blocks into the lower halfs of the d registers
-// moves the contents of the d registers into upper halfs
-// input: in1, in2, d0, d1, d2, d3, d4, d5
-// temp: TEMP0, TEMP1, TEMP2, TEMP3
-// output: d0, d1, d2, d3, d4, d5
-#define EXPACC(in1, in2, d0, d1, d2, d3, d4, d5, TEMP0, TEMP1, TEMP2, TEMP3) \
-	VGBM   $0xff3f, TEMP0      \
-	VGBM   $0xff1f, TEMP1      \
-	VESLG  $4, d1, TEMP2       \
-	VESLG  $4, d4, TEMP3       \
-	VESRLG $4, TEMP0, TEMP0    \
-	VPERM  in1, d0, EX0, d0    \
-	VPERM  in2, d3, EX0, d3    \
-	VPERM  in1, d2, EX2, d2    \
-	VPERM  in2, d5, EX2, d5    \
-	VPERM  in1, TEMP2, EX1, d1 \
-	VPERM  in2, TEMP3, EX1, d4 \
-	VN     TEMP0, d0, d0       \
-	VN     TEMP0, d3, d3       \
-	VESRLG $4, d1, d1          \
-	VESRLG $4, d4, d4          \
-	VN     TEMP1, d2, d2       \
-	VN     TEMP1, d5, d5       \
-	VN     TEMP0, d1, d1       \
-	VN     TEMP0, d4, d4       \
-
-// expands one message block into the lower halfs of the d registers
-// moves the contents of the d registers into upper halfs
-// input: in, d0, d1, d2
-// temp: TEMP0, TEMP1, TEMP2
-// output: d0, d1, d2
-#define EXPACC2(in, d0, d1, d2, TEMP0, TEMP1, TEMP2) \
-	VGBM   $0xff3f, TEMP0     \
-	VESLG  $4, d1, TEMP2      \
-	VGBM   $0xff1f, TEMP1     \
-	VPERM  in, d0, EX0, d0    \
-	VESRLG $4, TEMP0, TEMP0   \
-	VPERM  in, d2, EX2, d2    \
-	VPERM  in, TEMP2, EX1, d1 \
-	VN     TEMP0, d0, d0      \
-	VN     TEMP1, d2, d2      \
-	VESRLG $4, d1, d1         \
-	VN     TEMP0, d1, d1      \
-
-// pack h2:h0 into h1:h0 (no carry)
-// input: h0, h1, h2
-// output: h0, h1, h2
-#define PACK(h0, h1, h2) \
-	VMRLG  h1, h2, h2  \ // copy h1 to upper half h2
-	VESLG  $44, h1, h1 \ // shift limb 1 44 bits, leaving 20
-	VO     h0, h1, h0  \ // combine h0 with 20 bits from limb 1
-	VESRLG $20, h2, h1 \ // put top 24 bits of limb 1 into h1
-	VLEIG  $1, $0, h1  \ // clear h2 stuff from lower half of h1
-	VO     h0, h1, h0  \ // h0 now has 88 bits (limb 0 and 1)
-	VLEIG  $0, $0, h2  \ // clear upper half of h2
-	VESRLG $40, h2, h1 \ // h1 now has upper two bits of result
-	VLEIB  $7, $88, h1 \ // for byte shift (11 bytes)
-	VSLB   h1, h2, h2  \ // shift h2 11 bytes to the left
-	VO     h0, h2, h0  \ // combine h0 with 20 bits from limb 1
-	VLEIG  $0, $0, h1  \ // clear upper half of h1
-
-// if h > 2**130-5 then h -= 2**130-5
-// input: h0, h1
-// temp: t0, t1, t2
-// output: h0
-#define MOD(h0, h1, t0, t1, t2) \
-	VZERO t0          \
-	VLEIG $1, $5, t0  \
-	VACCQ h0, t0, t1  \
-	VAQ   h0, t0, t0  \
-	VONE  t2          \
-	VLEIG $1, $-4, t2 \
-	VAQ   t2, t1, t1  \
-	VACCQ h1, t1, t1  \
-	VONE  t2          \
-	VAQ   t2, t1, t1  \
-	VN    h0, t1, t2  \
-	VNC   t0, t1, t1  \
-	VO    t1, t2, h0  \
-
-// func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]key)
-TEXT ·poly1305vmsl(SB), $0-32
-	// This code processes 6 + up to 4 blocks (32 bytes) per iteration
-	// using the algorithm described in:
-	// NEON crypto, Daniel J. Bernstein & Peter Schwabe
-	// https://cryptojedi.org/papers/neoncrypto-20120320.pdf
-	// And as moddified for VMSL as described in
-	// Accelerating Poly1305 Cryptographic Message Authentication on the z14
-	// O'Farrell et al, CASCON 2017, p48-55
-	// https://ibm.ent.box.com/s/jf9gedj0e9d2vjctfyh186shaztavnht
-
-	LMG   out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key
-	VZERO V0                // c
-
-	// load EX0, EX1 and EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2        // c
-
-	// setup r
-	VL    (R4), T_0
-	MOVD  $·keyMask<>(SB), R6
-	VL    (R6), T_1
-	VN    T_0, T_1, T_0
-	VZERO T_2                 // limbs for r
-	VZERO T_3
-	VZERO T_4
-	EXPACC2(T_0, T_2, T_3, T_4, T_1, T_5, T_7)
-
-	// T_2, T_3, T_4: [0, r]
-
-	// setup r*20
-	VLEIG $0, $0, T_0
-	VLEIG $1, $20, T_0       // T_0: [0, 20]
-	VZERO T_5
-	VZERO T_6
-	VMSLG T_0, T_3, T_5, T_5
-	VMSLG T_0, T_4, T_6, T_6
-
-	// store r for final block in GR
-	VLGVG $1, T_2, RSAVE_0  // c
-	VLGVG $1, T_3, RSAVE_1  // c
-	VLGVG $1, T_4, RSAVE_2  // c
-	VLGVG $1, T_5, R5SAVE_1 // c
-	VLGVG $1, T_6, R5SAVE_2 // c
-
-	// initialize h
-	VZERO H0_0
-	VZERO H1_0
-	VZERO H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	// initialize pointer for reduce constants
-	MOVD $·reduce<>(SB), R12
-
-	// calculate r**2 and 20*(r**2)
-	VZERO R_0
-	VZERO R_1
-	VZERO R_2
-	SQUARE(T_2, T_3, T_4, T_6, R_0, R_1, R_2, T_1, T_5, T_7)
-	REDUCE2(R_0, R_1, R_2, M0, M1, M2, M3, M4, R5_1, R5_2, M5, T_1)
-	VZERO R5_1
-	VZERO R5_2
-	VMSLG T_0, R_1, R5_1, R5_1
-	VMSLG T_0, R_2, R5_2, R5_2
-
-	// skip r**4 calculation if 3 blocks or less
-	CMPBLE R3, $48, b4
-
-	// calculate r**4 and 20*(r**4)
-	VZERO T_8
-	VZERO T_9
-	VZERO T_10
-	SQUARE(R_0, R_1, R_2, R5_2, T_8, T_9, T_10, T_1, T_5, T_7)
-	REDUCE2(T_8, T_9, T_10, M0, M1, M2, M3, M4, T_2, T_3, M5, T_1)
-	VZERO T_2
-	VZERO T_3
-	VMSLG T_0, T_9, T_2, T_2
-	VMSLG T_0, T_10, T_3, T_3
-
-	// put r**2 to the right and r**4 to the left of R_0, R_1, R_2
-	VSLDB $8, T_8, T_8, T_8
-	VSLDB $8, T_9, T_9, T_9
-	VSLDB $8, T_10, T_10, T_10
-	VSLDB $8, T_2, T_2, T_2
-	VSLDB $8, T_3, T_3, T_3
-
-	VO T_8, R_0, R_0
-	VO T_9, R_1, R_1
-	VO T_10, R_2, R_2
-	VO T_2, R5_1, R5_1
-	VO T_3, R5_2, R5_2
-
-	CMPBLE R3, $80, load // less than or equal to 5 blocks in message
-
-	// 6(or 5+1) blocks
-	SUB    $81, R3
-	VLM    (R2), M0, M4
-	VLL    R3, 80(R2), M5
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBGE R3, $16, 2(PC)
-	VLVGB  R3, R0, M5
-	MOVD   $96(R2), R2
-	EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	EXPACC(M2, M3, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	VLEIB  $2, $1, H2_0
-	VLEIB  $2, $1, H2_1
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-
-	VZERO  M0
-	VZERO  M1
-	VZERO  M2
-	VZERO  M3
-	VZERO  T_4
-	VZERO  T_10
-	EXPACC(M4, M5, M0, M1, M2, M3, T_4, T_10, T_0, T_1, T_2, T_3)
-	VLR    T_4, M4
-	VLEIB  $10, $1, M2
-	CMPBLT R3, $16, 2(PC)
-	VLEIB  $10, $1, T_10
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-
-	SUB    $16, R3
-	CMPBLE R3, $0, square
-
-load:
-	// load EX0, EX1 and EX2
-	MOVD $·c<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-loop:
-	CMPBLE R3, $64, add // b4	// last 4 or less blocks left
-
-	// next 4 full blocks
-	VLM  (R2), M2, M5
-	SUB  $64, R3
-	MOVD $64(R2), R2
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, T_0, T_1, T_3, T_4, T_5, T_2, T_7, T_8, T_9)
-
-	// expacc in-lined to create [m2, m3] limbs
-	VGBM   $0x3f3f, T_0     // 44 bit clear mask
-	VGBM   $0x1f1f, T_1     // 40 bit clear mask
-	VPERM  M2, M3, EX0, T_3
-	VESRLG $4, T_0, T_0     // 44 bit clear mask ready
-	VPERM  M2, M3, EX1, T_4
-	VPERM  M2, M3, EX2, T_5
-	VN     T_0, T_3, T_3
-	VESRLG $4, T_4, T_4
-	VN     T_1, T_5, T_5
-	VN     T_0, T_4, T_4
-	VMRHG  H0_1, T_3, H0_0
-	VMRHG  H1_1, T_4, H1_0
-	VMRHG  H2_1, T_5, H2_0
-	VMRLG  H0_1, T_3, H0_1
-	VMRLG  H1_1, T_4, H1_1
-	VMRLG  H2_1, T_5, H2_1
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-	VPERM  M4, M5, EX0, T_3
-	VPERM  M4, M5, EX1, T_4
-	VPERM  M4, M5, EX2, T_5
-	VN     T_0, T_3, T_3
-	VESRLG $4, T_4, T_4
-	VN     T_1, T_5, T_5
-	VN     T_0, T_4, T_4
-	VMRHG  V0, T_3, M0
-	VMRHG  V0, T_4, M1
-	VMRHG  V0, T_5, M2
-	VMRLG  V0, T_3, M3
-	VMRLG  V0, T_4, M4
-	VMRLG  V0, T_5, M5
-	VLEIB  $10, $1, M2
-	VLEIB  $10, $1, M5
-
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	CMPBNE R3, $0, loop
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-
-	// load EX0, EX1, EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-	// sum vectors
-	VAQ H0_0, H0_1, H0_0
-	VAQ H1_0, H1_1, H1_0
-	VAQ H2_0, H2_1, H2_0
-
-	// h may be >= 2*(2**130-5) so we need to reduce it again
-	// M0...M4 are used as temps here
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-next:  // carry h1->h2
-	VLEIB  $7, $0x28, T_1
-	VREPIB $4, T_2
-	VGBM   $0x003F, T_3
-	VESRLG $4, T_3
-
-	// byte shift
-	VSRLB T_1, H1_0, T_4
-
-	// bit shift
-	VSRL T_2, T_4, T_4
-
-	// clear h1 carry bits
-	VN T_3, H1_0, H1_0
-
-	// add carry
-	VAQ T_4, H2_0, H2_0
-
-	// h is now < 2*(2**130-5)
-	// pack h into h1 (hi) and h0 (lo)
-	PACK(H0_0, H1_0, H2_0)
-
-	// if h > 2**130-5 then h -= 2**130-5
-	MOD(H0_0, H1_0, T_0, T_1, T_2)
-
-	// h += s
-	MOVD  $·bswapMask<>(SB), R5
-	VL    (R5), T_1
-	VL    16(R4), T_0
-	VPERM T_0, T_0, T_1, T_0    // reverse bytes (to big)
-	VAQ   T_0, H0_0, H0_0
-	VPERM H0_0, H0_0, T_1, H0_0 // reverse bytes (to little)
-	VST   H0_0, (R1)
-	RET
-
-add:
-	// load EX0, EX1, EX2
-	MOVD $·constants<>(SB), R5
-	VLM  (R5), EX0, EX2
-
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-	CMPBLE R3, $64, b4
-
-b4:
-	CMPBLE R3, $48, b3 // 3 blocks or less
-
-	// 4(3+1) blocks remaining
-	SUB    $49, R3
-	VLM    (R2), M0, M2
-	VLL    R3, 48(R2), M3
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M3
-	MOVD   $64(R2), R2
-	EXPACC(M0, M1, H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_0, T_1, T_2, T_3)
-	VLEIB  $10, $1, H2_0
-	VLEIB  $10, $1, H2_1
-	VZERO  M0
-	VZERO  M1
-	VZERO  M4
-	VZERO  M5
-	VZERO  T_4
-	VZERO  T_10
-	EXPACC(M2, M3, M0, M1, M4, M5, T_4, T_10, T_0, T_1, T_2, T_3)
-	VLR    T_4, M2
-	VLEIB  $10, $1, M4
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_10
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M4, M5, M2, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M3, M4, M5, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG  V0, H0_1, H0_0
-	VMRHG  V0, H1_1, H1_0
-	VMRHG  V0, H2_1, H2_0
-	VMRLG  V0, H0_1, H0_1
-	VMRLG  V0, H1_1, H1_1
-	VMRLG  V0, H2_1, H2_1
-	SUB    $16, R3
-	CMPBLE R3, $0, square // this condition must always hold true!
-
-b3:
-	CMPBLE R3, $32, b2
-
-	// 3 blocks remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, T_10, M5)
-
-	SUB    $33, R3
-	VLM    (R2), M0, M1
-	VLL    R3, 32(R2), M2
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M2
-
-	// H += m0
-	VZERO T_1
-	VZERO T_2
-	VZERO T_3
-	EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)
-	VLEIB $10, $1, T_3
-	VAG   H0_0, T_1, H0_0
-	VAG   H1_0, T_2, H1_0
-	VAG   H2_0, T_3, H2_0
-
-	VZERO M0
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	VZERO T_10
-
-	// (H+m0)*r
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M3, M4, M5, V0, T_10, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_10, H0_1, H1_1, H2_1, T_9)
-
-	// H += m1
-	VZERO V0
-	VZERO T_1
-	VZERO T_2
-	VZERO T_3
-	EXPACC2(M1, T_1, T_2, T_3, T_4, T_5, T_6)
-	VLEIB $10, $1, T_3
-	VAQ   H0_0, T_1, H0_0
-	VAQ   H1_0, T_2, H1_0
-	VAQ   H2_0, T_3, H2_0
-	REDUCE2(H0_0, H1_0, H2_0, M0, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
-
-	// [H, m2] * [r**2, r]
-	EXPACC2(M2, H0_0, H1_0, H2_0, T_1, T_2, T_3)
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, H2_0
-	VZERO  M0
-	VZERO  M1
-	VZERO  M2
-	VZERO  M3
-	VZERO  M4
-	VZERO  M5
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, H0_1, H1_1, M5, T_10)
-	SUB    $16, R3
-	CMPBLE R3, $0, next   // this condition must always hold true!
-
-b2:
-	CMPBLE R3, $16, b1
-
-	// 2 blocks remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, T_10, M0, M1, M2, M3, M4, T_4, T_5, T_2, T_7, T_8, T_9)
-	VMRHG V0, H0_1, H0_0
-	VMRHG V0, H1_1, H1_0
-	VMRHG V0, H2_1, H2_0
-	VMRLG V0, H0_1, H0_1
-	VMRLG V0, H1_1, H1_1
-	VMRLG V0, H2_1, H2_1
-
-	// move h to the left and 0s at the right
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-
-	// get message blocks and append 1 to start
-	SUB    $17, R3
-	VL     (R2), M0
-	VLL    R3, 16(R2), M1
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M1
-	VZERO  T_6
-	VZERO  T_7
-	VZERO  T_8
-	EXPACC2(M0, T_6, T_7, T_8, T_1, T_2, T_3)
-	EXPACC2(M1, T_6, T_7, T_8, T_1, T_2, T_3)
-	VLEIB  $2, $1, T_8
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_8
-
-	// add [m0, m1] to h
-	VAG H0_0, T_6, H0_0
-	VAG H1_0, T_7, H1_0
-	VAG H2_0, T_8, H2_0
-
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	VZERO T_10
-	VZERO M0
-
-	// at this point R_0 .. R5_2 look like [r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M2, M3, M4, M5, T_10, M0, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M2, M3, M4, M5, T_9, H0_1, H1_1, H2_1, T_10)
-	SUB    $16, R3, R3
-	CMPBLE R3, $0, next
-
-b1:
-	CMPBLE R3, $0, next
-
-	// 1 block remaining
-
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// H*[r**2, r]
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-	// set up [0, m0] limbs
-	SUB    $1, R3
-	VLL    R3, (R2), M0
-	ADD    $1, R3
-	MOVBZ  $1, R0
-	CMPBEQ R3, $16, 2(PC)
-	VLVGB  R3, R0, M0
-	VZERO  T_1
-	VZERO  T_2
-	VZERO  T_3
-	EXPACC2(M0, T_1, T_2, T_3, T_4, T_5, T_6)// limbs: [0, m]
-	CMPBNE R3, $16, 2(PC)
-	VLEIB  $10, $1, T_3
-
-	// h+m0
-	VAQ H0_0, T_1, H0_0
-	VAQ H1_0, T_2, H1_0
-	VAQ H2_0, T_3, H2_0
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-
-	BR next
-
-square:
-	// setup [r²,r]
-	VSLDB $8, R_0, R_0, R_0
-	VSLDB $8, R_1, R_1, R_1
-	VSLDB $8, R_2, R_2, R_2
-	VSLDB $8, R5_1, R5_1, R5_1
-	VSLDB $8, R5_2, R5_2, R5_2
-
-	VLVGG $1, RSAVE_0, R_0
-	VLVGG $1, RSAVE_1, R_1
-	VLVGG $1, RSAVE_2, R_2
-	VLVGG $1, R5SAVE_1, R5_1
-	VLVGG $1, R5SAVE_2, R5_2
-
-	// setup [h0, h1]
-	VSLDB $8, H0_0, H0_0, H0_0
-	VSLDB $8, H1_0, H1_0, H1_0
-	VSLDB $8, H2_0, H2_0, H2_0
-	VO    H0_1, H0_0, H0_0
-	VO    H1_1, H1_0, H1_0
-	VO    H2_1, H2_0, H2_0
-	VZERO H0_1
-	VZERO H1_1
-	VZERO H2_1
-
-	VZERO M0
-	VZERO M1
-	VZERO M2
-	VZERO M3
-	VZERO M4
-	VZERO M5
-
-	// (h0*r**2) + (h1*r)
-	MULTIPLY(H0_0, H1_0, H2_0, H0_1, H1_1, H2_1, R_0, R_1, R_2, R5_1, R5_2, M0, M1, M2, M3, M4, M5, T_0, T_1, T_2, T_3, T_4, T_5, T_6, T_7, T_8, T_9)
-	REDUCE2(H0_0, H1_0, H2_0, M0, M1, M2, M3, M4, T_9, T_10, H0_1, M5)
-	BR next
-
-TEXT ·hasVMSLFacility(SB), NOSPLIT, $24-1
-	MOVD  $x-24(SP), R1
-	XC    $24, 0(R1), 0(R1) // clear the storage
-	MOVD  $2, R0            // R0 is the number of double words stored -1
-	WORD  $0xB2B01000       // STFLE 0(R1)
-	XOR   R0, R0            // reset the value of R0
-	MOVBZ z-8(SP), R1
-	AND   $0x01, R1
-	BEQ   novmsl
-
-vectorinstalled:
-	// check if the vector instruction has been enabled
-	VLEIB  $0, $0xF, V16
-	VLGVB  $0, V16, R1
-	CMPBNE R1, $0xF, novmsl
-	MOVB   $1, ret+0(FP)    // have vx
-	RET
-
-novmsl:
-	MOVB $0, ret+0(FP) // no vx
-	RET
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
index f9269c3..c400dfc 100644
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.go
@@ -2,14 +2,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64,!appengine,!gccgo
+//go:build amd64 && !purego && gc
+// +build amd64,!purego,gc
 
 package salsa
 
-// This function is implemented in salsa2020_amd64.s.
-
 //go:noescape
 
+// salsa2020XORKeyStream is implemented in salsa20_amd64.s.
 func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
 
 // XORKeyStream crypts bytes from in to out using the given key and counters.
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
index 22afbdc..c089277 100644
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa2020_amd64.s
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
@@ -2,13 +2,14 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64,!appengine,!gccgo
+//go:build amd64 && !purego && gc
+// +build amd64,!purego,gc
 
 // This code was translated into a form compatible with 6a from the public
 // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
 
 // func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
-// This needs up to 64 bytes at 360(SP); hence the non-obvious frame size.
+// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
 TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVQ out+0(FP),DI
 	MOVQ in+8(FP),SI
@@ -17,10 +18,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVQ key+32(FP),R8
 
 	MOVQ SP,R12
-	MOVQ SP,R9
-	ADDQ $31, R9
-	ANDQ $~31, R9
-	MOVQ R9, SP
+	ADDQ $31, R12
+	ANDQ $~31, R12
 
 	MOVQ DX,R9
 	MOVQ CX,DX
@@ -32,122 +31,116 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL 0(R10),R8
 	MOVL 0(DX),AX
 	MOVL 16(R10),R11
-	MOVL CX,0(SP)
-	MOVL R8, 4 (SP)
-	MOVL AX, 8 (SP)
-	MOVL R11, 12 (SP)
+	MOVL CX,0(R12)
+	MOVL R8, 4 (R12)
+	MOVL AX, 8 (R12)
+	MOVL R11, 12 (R12)
 	MOVL 8(DX),CX
 	MOVL 24(R10),R8
 	MOVL 4(R10),AX
 	MOVL 4(DX),R11
-	MOVL CX,16(SP)
-	MOVL R8, 20 (SP)
-	MOVL AX, 24 (SP)
-	MOVL R11, 28 (SP)
+	MOVL CX,16(R12)
+	MOVL R8, 20 (R12)
+	MOVL AX, 24 (R12)
+	MOVL R11, 28 (R12)
 	MOVL 12(DX),CX
 	MOVL 12(R10),DX
 	MOVL 28(R10),R8
 	MOVL 8(R10),AX
-	MOVL DX,32(SP)
-	MOVL CX, 36 (SP)
-	MOVL R8, 40 (SP)
-	MOVL AX, 44 (SP)
+	MOVL DX,32(R12)
+	MOVL CX, 36 (R12)
+	MOVL R8, 40 (R12)
+	MOVL AX, 44 (R12)
 	MOVQ $1634760805,DX
 	MOVQ $857760878,CX
 	MOVQ $2036477234,R8
 	MOVQ $1797285236,AX
-	MOVL DX,48(SP)
-	MOVL CX, 52 (SP)
-	MOVL R8, 56 (SP)
-	MOVL AX, 60 (SP)
+	MOVL DX,48(R12)
+	MOVL CX, 52 (R12)
+	MOVL R8, 56 (R12)
+	MOVL AX, 60 (R12)
 	CMPQ R9,$256
 	JB BYTESBETWEEN1AND255
-	MOVOA 48(SP),X0
+	MOVOA 48(R12),X0
 	PSHUFL $0X55,X0,X1
 	PSHUFL $0XAA,X0,X2
 	PSHUFL $0XFF,X0,X3
 	PSHUFL $0X00,X0,X0
-	MOVOA X1,64(SP)
-	MOVOA X2,80(SP)
-	MOVOA X3,96(SP)
-	MOVOA X0,112(SP)
-	MOVOA 0(SP),X0
+	MOVOA X1,64(R12)
+	MOVOA X2,80(R12)
+	MOVOA X3,96(R12)
+	MOVOA X0,112(R12)
+	MOVOA 0(R12),X0
 	PSHUFL $0XAA,X0,X1
 	PSHUFL $0XFF,X0,X2
 	PSHUFL $0X00,X0,X3
 	PSHUFL $0X55,X0,X0
-	MOVOA X1,128(SP)
-	MOVOA X2,144(SP)
-	MOVOA X3,160(SP)
-	MOVOA X0,176(SP)
-	MOVOA 16(SP),X0
+	MOVOA X1,128(R12)
+	MOVOA X2,144(R12)
+	MOVOA X3,160(R12)
+	MOVOA X0,176(R12)
+	MOVOA 16(R12),X0
 	PSHUFL $0XFF,X0,X1
 	PSHUFL $0X55,X0,X2
 	PSHUFL $0XAA,X0,X0
-	MOVOA X1,192(SP)
-	MOVOA X2,208(SP)
-	MOVOA X0,224(SP)
-	MOVOA 32(SP),X0
+	MOVOA X1,192(R12)
+	MOVOA X2,208(R12)
+	MOVOA X0,224(R12)
+	MOVOA 32(R12),X0
 	PSHUFL $0X00,X0,X1
 	PSHUFL $0XAA,X0,X2
 	PSHUFL $0XFF,X0,X0
-	MOVOA X1,240(SP)
-	MOVOA X2,256(SP)
-	MOVOA X0,272(SP)
+	MOVOA X1,240(R12)
+	MOVOA X2,256(R12)
+	MOVOA X0,272(R12)
 	BYTESATLEAST256:
-	MOVL 16(SP),DX
-	MOVL  36 (SP),CX
-	MOVL DX,288(SP)
-	MOVL CX,304(SP)
-	ADDQ $1,DX
+	MOVL 16(R12),DX
+	MOVL  36 (R12),CX
+	MOVL DX,288(R12)
+	MOVL CX,304(R12)
 	SHLQ $32,CX
 	ADDQ CX,DX
+	ADDQ $1,DX
 	MOVQ DX,CX
 	SHRQ $32,CX
-	MOVL DX, 292 (SP)
-	MOVL CX, 308 (SP)
+	MOVL DX, 292 (R12)
+	MOVL CX, 308 (R12)
 	ADDQ $1,DX
-	SHLQ $32,CX
-	ADDQ CX,DX
 	MOVQ DX,CX
 	SHRQ $32,CX
-	MOVL DX, 296 (SP)
-	MOVL CX, 312 (SP)
+	MOVL DX, 296 (R12)
+	MOVL CX, 312 (R12)
 	ADDQ $1,DX
-	SHLQ $32,CX
-	ADDQ CX,DX
 	MOVQ DX,CX
 	SHRQ $32,CX
-	MOVL DX, 300 (SP)
-	MOVL CX, 316 (SP)
+	MOVL DX, 300 (R12)
+	MOVL CX, 316 (R12)
 	ADDQ $1,DX
-	SHLQ $32,CX
-	ADDQ CX,DX
 	MOVQ DX,CX
 	SHRQ $32,CX
-	MOVL DX,16(SP)
-	MOVL CX, 36 (SP)
-	MOVQ R9,352(SP)
+	MOVL DX,16(R12)
+	MOVL CX, 36 (R12)
+	MOVQ R9,352(R12)
 	MOVQ $20,DX
-	MOVOA 64(SP),X0
-	MOVOA 80(SP),X1
-	MOVOA 96(SP),X2
-	MOVOA 256(SP),X3
-	MOVOA 272(SP),X4
-	MOVOA 128(SP),X5
-	MOVOA 144(SP),X6
-	MOVOA 176(SP),X7
-	MOVOA 192(SP),X8
-	MOVOA 208(SP),X9
-	MOVOA 224(SP),X10
-	MOVOA 304(SP),X11
-	MOVOA 112(SP),X12
-	MOVOA 160(SP),X13
-	MOVOA 240(SP),X14
-	MOVOA 288(SP),X15
+	MOVOA 64(R12),X0
+	MOVOA 80(R12),X1
+	MOVOA 96(R12),X2
+	MOVOA 256(R12),X3
+	MOVOA 272(R12),X4
+	MOVOA 128(R12),X5
+	MOVOA 144(R12),X6
+	MOVOA 176(R12),X7
+	MOVOA 192(R12),X8
+	MOVOA 208(R12),X9
+	MOVOA 224(R12),X10
+	MOVOA 304(R12),X11
+	MOVOA 112(R12),X12
+	MOVOA 160(R12),X13
+	MOVOA 240(R12),X14
+	MOVOA 288(R12),X15
 	MAINLOOP1:
-	MOVOA X1,320(SP)
-	MOVOA X2,336(SP)
+	MOVOA X1,320(R12)
+	MOVOA X2,336(R12)
 	MOVOA X13,X1
 	PADDL X12,X1
 	MOVOA X1,X2
@@ -197,8 +190,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X1,X12
 	PSRLL $14,X2
 	PXOR X2,X12
-	MOVOA 320(SP),X1
-	MOVOA X12,320(SP)
+	MOVOA 320(R12),X1
+	MOVOA X12,320(R12)
 	MOVOA X9,X2
 	PADDL X7,X2
 	MOVOA X2,X12
@@ -213,8 +206,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X2,X3
 	PSRLL $25,X12
 	PXOR X12,X3
-	MOVOA 336(SP),X2
-	MOVOA X0,336(SP)
+	MOVOA 336(R12),X2
+	MOVOA X0,336(R12)
 	MOVOA X6,X0
 	PADDL X2,X0
 	MOVOA X0,X12
@@ -257,8 +250,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X0,X1
 	PSRLL $14,X12
 	PXOR X12,X1
-	MOVOA 320(SP),X0
-	MOVOA X1,320(SP)
+	MOVOA 320(R12),X0
+	MOVOA X1,320(R12)
 	MOVOA X4,X1
 	PADDL X0,X1
 	MOVOA X1,X12
@@ -273,8 +266,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X1,X2
 	PSRLL $14,X12
 	PXOR X12,X2
-	MOVOA 336(SP),X12
-	MOVOA X2,336(SP)
+	MOVOA 336(R12),X12
+	MOVOA X2,336(R12)
 	MOVOA X14,X1
 	PADDL X12,X1
 	MOVOA X1,X2
@@ -317,8 +310,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X1,X0
 	PSRLL $14,X2
 	PXOR X2,X0
-	MOVOA 320(SP),X1
-	MOVOA X0,320(SP)
+	MOVOA 320(R12),X1
+	MOVOA X0,320(R12)
 	MOVOA X8,X0
 	PADDL X14,X0
 	MOVOA X0,X2
@@ -333,8 +326,8 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X0,X6
 	PSRLL $25,X2
 	PXOR X2,X6
-	MOVOA 336(SP),X2
-	MOVOA X12,336(SP)
+	MOVOA 336(R12),X2
+	MOVOA X12,336(R12)
 	MOVOA X3,X0
 	PADDL X2,X0
 	MOVOA X0,X12
@@ -384,14 +377,14 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PXOR X0,X2
 	PSRLL $14,X12
 	PXOR X12,X2
-	MOVOA 320(SP),X12
-	MOVOA 336(SP),X0
+	MOVOA 320(R12),X12
+	MOVOA 336(R12),X0
 	SUBQ $2,DX
 	JA MAINLOOP1
-	PADDL 112(SP),X12
-	PADDL 176(SP),X7
-	PADDL 224(SP),X10
-	PADDL 272(SP),X4
+	PADDL 112(R12),X12
+	PADDL 176(R12),X7
+	PADDL 224(R12),X10
+	PADDL 272(R12),X4
 	MOVD X12,DX
 	MOVD X7,CX
 	MOVD X10,R8
@@ -452,10 +445,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL CX,196(DI)
 	MOVL R8,200(DI)
 	MOVL R9,204(DI)
-	PADDL 240(SP),X14
-	PADDL 64(SP),X0
-	PADDL 128(SP),X5
-	PADDL 192(SP),X8
+	PADDL 240(R12),X14
+	PADDL 64(R12),X0
+	PADDL 128(R12),X5
+	PADDL 192(R12),X8
 	MOVD X14,DX
 	MOVD X0,CX
 	MOVD X5,R8
@@ -516,10 +509,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL CX,212(DI)
 	MOVL R8,216(DI)
 	MOVL R9,220(DI)
-	PADDL 288(SP),X15
-	PADDL 304(SP),X11
-	PADDL 80(SP),X1
-	PADDL 144(SP),X6
+	PADDL 288(R12),X15
+	PADDL 304(R12),X11
+	PADDL 80(R12),X1
+	PADDL 144(R12),X6
 	MOVD X15,DX
 	MOVD X11,CX
 	MOVD X1,R8
@@ -580,10 +573,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL CX,228(DI)
 	MOVL R8,232(DI)
 	MOVL R9,236(DI)
-	PADDL 160(SP),X13
-	PADDL 208(SP),X9
-	PADDL 256(SP),X3
-	PADDL 96(SP),X2
+	PADDL 160(R12),X13
+	PADDL 208(R12),X9
+	PADDL 256(R12),X3
+	PADDL 96(R12),X2
 	MOVD X13,DX
 	MOVD X9,CX
 	MOVD X3,R8
@@ -644,7 +637,7 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL CX,244(DI)
 	MOVL R8,248(DI)
 	MOVL R9,252(DI)
-	MOVQ 352(SP),R9
+	MOVQ 352(R12),R9
 	SUBQ $256,R9
 	ADDQ $256,SI
 	ADDQ $256,DI
@@ -656,17 +649,17 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	CMPQ R9,$64
 	JAE NOCOPY
 	MOVQ DI,DX
-	LEAQ 360(SP),DI
+	LEAQ 360(R12),DI
 	MOVQ R9,CX
 	REP; MOVSB
-	LEAQ 360(SP),DI
-	LEAQ 360(SP),SI
+	LEAQ 360(R12),DI
+	LEAQ 360(R12),SI
 	NOCOPY:
-	MOVQ R9,352(SP)
-	MOVOA 48(SP),X0
-	MOVOA 0(SP),X1
-	MOVOA 16(SP),X2
-	MOVOA 32(SP),X3
+	MOVQ R9,352(R12)
+	MOVOA 48(R12),X0
+	MOVOA 0(R12),X1
+	MOVOA 16(R12),X2
+	MOVOA 32(R12),X3
 	MOVOA X1,X4
 	MOVQ $20,CX
 	MAINLOOP2:
@@ -797,10 +790,10 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	PSHUFL $0X39,X3,X3
 	PXOR X6,X0
 	JA MAINLOOP2
-	PADDL 48(SP),X0
-	PADDL 0(SP),X1
-	PADDL 16(SP),X2
-	PADDL 32(SP),X3
+	PADDL 48(R12),X0
+	PADDL 0(R12),X1
+	PADDL 16(R12),X2
+	PADDL 32(R12),X3
 	MOVD X0,CX
 	MOVD X1,R8
 	MOVD X2,R9
@@ -861,16 +854,16 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	MOVL R8,44(DI)
 	MOVL R9,28(DI)
 	MOVL AX,12(DI)
-	MOVQ 352(SP),R9
-	MOVL 16(SP),CX
-	MOVL  36 (SP),R8
+	MOVQ 352(R12),R9
+	MOVL 16(R12),CX
+	MOVL  36 (R12),R8
 	ADDQ $1,CX
 	SHLQ $32,R8
 	ADDQ R8,CX
 	MOVQ CX,R8
 	SHRQ $32,R8
-	MOVL CX,16(SP)
-	MOVL R8, 36 (SP)
+	MOVL CX,16(R12)
+	MOVL R8, 36 (R12)
 	CMPQ R9,$64
 	JA BYTESATLEAST65
 	JAE BYTESATLEAST64
@@ -880,7 +873,6 @@ TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
 	REP; MOVSB
 	BYTESATLEAST64:
 	DONE:
-	MOVQ R12,SP
 	RET
 	BYTESATLEAST65:
 	SUBQ $64,R9
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
new file mode 100644
index 0000000..4392cc1
--- /dev/null
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_noasm.go
@@ -0,0 +1,15 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !amd64 || purego || !gc
+// +build !amd64 purego !gc
+
+package salsa
+
+// XORKeyStream crypts bytes from in to out using the given key and counters.
+// In and out must overlap entirely or not at all. Counter
+// contains the raw salsa20 counter bytes (both nonce and block counter).
+func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+	genericXORKeyStream(out, in, counter, key)
+}
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
index 22126d1..68169c6 100644
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_ref.go
@@ -2,8 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64 appengine gccgo
-
 package salsa
 
 const rounds = 20
@@ -202,10 +200,9 @@ func core(out *[64]byte, in *[16]byte, k *[32]byte, c *[16]byte) {
 	out[63] = byte(x15 >> 24)
 }
 
-// XORKeyStream crypts bytes from in to out using the given key and counters.
-// In and out must overlap entirely or not at all. Counter
-// contains the raw salsa20 counter bytes (both nonce and block counter).
-func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
+// genericXORKeyStream is the generic implementation of XORKeyStream to be used
+// when no assembly implementation is available.
+func genericXORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
 	var block [64]byte
 	var counterCopy [16]byte
 	copy(counterCopy[:], counter[:])
diff --git a/vendor/golang.org/x/sys/AUTHORS b/vendor/golang.org/x/sys/AUTHORS
new file mode 100644
index 0000000..15167cd
--- /dev/null
+++ b/vendor/golang.org/x/sys/AUTHORS
@@ -0,0 +1,3 @@
+# This source code refers to The Go Authors for copyright purposes.
+# The master list of authors is in the main Go distribution,
+# visible at http://tip.golang.org/AUTHORS.
diff --git a/vendor/golang.org/x/sys/CONTRIBUTORS b/vendor/golang.org/x/sys/CONTRIBUTORS
new file mode 100644
index 0000000..1c4577e
--- /dev/null
+++ b/vendor/golang.org/x/sys/CONTRIBUTORS
@@ -0,0 +1,3 @@
+# This source code was written by the Go contributors.
+# The master list of contributors is in the main Go distribution,
+# visible at http://tip.golang.org/CONTRIBUTORS.
diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE
new file mode 100644
index 0000000..6a66aea
--- /dev/null
+++ b/vendor/golang.org/x/sys/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/golang.org/x/sys/PATENTS b/vendor/golang.org/x/sys/PATENTS
new file mode 100644
index 0000000..7330990
--- /dev/null
+++ b/vendor/golang.org/x/sys/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
new file mode 100644
index 0000000..db9171c
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/asm_aix_ppc64.s
@@ -0,0 +1,18 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+#include "textflag.h"
+
+//
+// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go
+//
+
+TEXT ·syscall6(SB),NOSPLIT,$0-88
+	JMP	syscall·syscall6(SB)
+
+TEXT ·rawSyscall6(SB),NOSPLIT,$0-88
+	JMP	syscall·rawSyscall6(SB)
diff --git a/vendor/golang.org/x/sys/cpu/byteorder.go b/vendor/golang.org/x/sys/cpu/byteorder.go
new file mode 100644
index 0000000..dcbb14e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/byteorder.go
@@ -0,0 +1,65 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"runtime"
+)
+
+// byteOrder is a subset of encoding/binary.ByteOrder.
+type byteOrder interface {
+	Uint32([]byte) uint32
+	Uint64([]byte) uint64
+}
+
+type littleEndian struct{}
+type bigEndian struct{}
+
+func (littleEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func (littleEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
+
+func (bigEndian) Uint32(b []byte) uint32 {
+	_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+}
+
+func (bigEndian) Uint64(b []byte) uint64 {
+	_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+	return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
+		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
+}
+
+// hostByteOrder returns littleEndian on little-endian machines and
+// bigEndian on big-endian machines.
+func hostByteOrder() byteOrder {
+	switch runtime.GOARCH {
+	case "386", "amd64", "amd64p32",
+		"alpha",
+		"arm", "arm64",
+		"mipsle", "mips64le", "mips64p32le",
+		"nios2",
+		"ppc64le",
+		"riscv", "riscv64",
+		"sh":
+		return littleEndian{}
+	case "armbe", "arm64be",
+		"m68k",
+		"mips", "mips64", "mips64p32",
+		"ppc", "ppc64",
+		"s390", "s390x",
+		"shbe",
+		"sparc", "sparc64":
+		return bigEndian{}
+	}
+	panic("unknown architecture")
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
new file mode 100644
index 0000000..b56886f
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@@ -0,0 +1,287 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package cpu implements processor feature detection for
+// various CPU architectures.
+package cpu
+
+import (
+	"os"
+	"strings"
+)
+
+// Initialized reports whether the CPU features were initialized.
+//
+// For some GOOS/GOARCH combinations initialization of the CPU features depends
+// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
+// Initialized will report false if reading the file fails.
+var Initialized bool
+
+// CacheLinePad is used to pad structs to avoid false sharing.
+type CacheLinePad struct{ _ [cacheLineSize]byte }
+
+// X86 contains the supported CPU features of the
+// current X86/AMD64 platform. If the current platform
+// is not X86/AMD64 then all feature flags are false.
+//
+// X86 is padded to avoid false sharing. Further the HasAVX
+// and HasAVX2 are only set if the OS supports XMM and YMM
+// registers in addition to the CPUID feature bit being set.
+var X86 struct {
+	_                   CacheLinePad
+	HasAES              bool // AES hardware implementation (AES NI)
+	HasADX              bool // Multi-precision add-carry instruction extensions
+	HasAVX              bool // Advanced vector extension
+	HasAVX2             bool // Advanced vector extension 2
+	HasAVX512           bool // Advanced vector extension 512
+	HasAVX512F          bool // Advanced vector extension 512 Foundation Instructions
+	HasAVX512CD         bool // Advanced vector extension 512 Conflict Detection Instructions
+	HasAVX512ER         bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
+	HasAVX512PF         bool // Advanced vector extension 512 Prefetch Instructions Instructions
+	HasAVX512VL         bool // Advanced vector extension 512 Vector Length Extensions
+	HasAVX512BW         bool // Advanced vector extension 512 Byte and Word Instructions
+	HasAVX512DQ         bool // Advanced vector extension 512 Doubleword and Quadword Instructions
+	HasAVX512IFMA       bool // Advanced vector extension 512 Integer Fused Multiply Add
+	HasAVX512VBMI       bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
+	HasAVX5124VNNIW     bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
+	HasAVX5124FMAPS     bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
+	HasAVX512VPOPCNTDQ  bool // Advanced vector extension 512 Double and quad word population count instructions
+	HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
+	HasAVX512VNNI       bool // Advanced vector extension 512 Vector Neural Network Instructions
+	HasAVX512GFNI       bool // Advanced vector extension 512 Galois field New Instructions
+	HasAVX512VAES       bool // Advanced vector extension 512 Vector AES instructions
+	HasAVX512VBMI2      bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
+	HasAVX512BITALG     bool // Advanced vector extension 512 Bit Algorithms
+	HasAVX512BF16       bool // Advanced vector extension 512 BFloat16 Instructions
+	HasBMI1             bool // Bit manipulation instruction set 1
+	HasBMI2             bool // Bit manipulation instruction set 2
+	HasCX16             bool // Compare and exchange 16 Bytes
+	HasERMS             bool // Enhanced REP for MOVSB and STOSB
+	HasFMA              bool // Fused-multiply-add instructions
+	HasOSXSAVE          bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
+	HasPCLMULQDQ        bool // PCLMULQDQ instruction - most often used for AES-GCM
+	HasPOPCNT           bool // Hamming weight instruction POPCNT.
+	HasRDRAND           bool // RDRAND instruction (on-chip random number generator)
+	HasRDSEED           bool // RDSEED instruction (on-chip random number generator)
+	HasSSE2             bool // Streaming SIMD extension 2 (always available on amd64)
+	HasSSE3             bool // Streaming SIMD extension 3
+	HasSSSE3            bool // Supplemental streaming SIMD extension 3
+	HasSSE41            bool // Streaming SIMD extension 4 and 4.1
+	HasSSE42            bool // Streaming SIMD extension 4 and 4.2
+	_                   CacheLinePad
+}
+
+// ARM64 contains the supported CPU features of the
+// current ARMv8(aarch64) platform. If the current platform
+// is not arm64 then all feature flags are false.
+var ARM64 struct {
+	_           CacheLinePad
+	HasFP       bool // Floating-point instruction set (always available)
+	HasASIMD    bool // Advanced SIMD (always available)
+	HasEVTSTRM  bool // Event stream support
+	HasAES      bool // AES hardware implementation
+	HasPMULL    bool // Polynomial multiplication instruction set
+	HasSHA1     bool // SHA1 hardware implementation
+	HasSHA2     bool // SHA2 hardware implementation
+	HasCRC32    bool // CRC32 hardware implementation
+	HasATOMICS  bool // Atomic memory operation instruction set
+	HasFPHP     bool // Half precision floating-point instruction set
+	HasASIMDHP  bool // Advanced SIMD half precision instruction set
+	HasCPUID    bool // CPUID identification scheme registers
+	HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
+	HasJSCVT    bool // Javascript conversion from floating-point to integer
+	HasFCMA     bool // Floating-point multiplication and addition of complex numbers
+	HasLRCPC    bool // Release Consistent processor consistent support
+	HasDCPOP    bool // Persistent memory support
+	HasSHA3     bool // SHA3 hardware implementation
+	HasSM3      bool // SM3 hardware implementation
+	HasSM4      bool // SM4 hardware implementation
+	HasASIMDDP  bool // Advanced SIMD double precision instruction set
+	HasSHA512   bool // SHA512 hardware implementation
+	HasSVE      bool // Scalable Vector Extensions
+	HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
+	_           CacheLinePad
+}
+
+// ARM contains the supported CPU features of the current ARM (32-bit) platform.
+// All feature flags are false if:
+//   1. the current platform is not arm, or
+//   2. the current operating system is not Linux.
+var ARM struct {
+	_           CacheLinePad
+	HasSWP      bool // SWP instruction support
+	HasHALF     bool // Half-word load and store support
+	HasTHUMB    bool // ARM Thumb instruction set
+	Has26BIT    bool // Address space limited to 26-bits
+	HasFASTMUL  bool // 32-bit operand, 64-bit result multiplication support
+	HasFPA      bool // Floating point arithmetic support
+	HasVFP      bool // Vector floating point support
+	HasEDSP     bool // DSP Extensions support
+	HasJAVA     bool // Java instruction set
+	HasIWMMXT   bool // Intel Wireless MMX technology support
+	HasCRUNCH   bool // MaverickCrunch context switching and handling
+	HasTHUMBEE  bool // Thumb EE instruction set
+	HasNEON     bool // NEON instruction set
+	HasVFPv3    bool // Vector floating point version 3 support
+	HasVFPv3D16 bool // Vector floating point version 3 D8-D15
+	HasTLS      bool // Thread local storage support
+	HasVFPv4    bool // Vector floating point version 4 support
+	HasIDIVA    bool // Integer divide instruction support in ARM mode
+	HasIDIVT    bool // Integer divide instruction support in Thumb mode
+	HasVFPD32   bool // Vector floating point version 3 D15-D31
+	HasLPAE     bool // Large Physical Address Extensions
+	HasEVTSTRM  bool // Event stream support
+	HasAES      bool // AES hardware implementation
+	HasPMULL    bool // Polynomial multiplication instruction set
+	HasSHA1     bool // SHA1 hardware implementation
+	HasSHA2     bool // SHA2 hardware implementation
+	HasCRC32    bool // CRC32 hardware implementation
+	_           CacheLinePad
+}
+
+// MIPS64X contains the supported CPU features of the current mips64/mips64le
+// platforms. If the current platform is not mips64/mips64le or the current
+// operating system is not Linux then all feature flags are false.
+var MIPS64X struct {
+	_      CacheLinePad
+	HasMSA bool // MIPS SIMD architecture
+	_      CacheLinePad
+}
+
+// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
+// If the current platform is not ppc64/ppc64le then all feature flags are false.
+//
+// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
+// since there are no optional categories. There are some exceptions that also
+// require kernel support to work (DARN, SCV), so there are feature bits for
+// those as well. The struct is padded to avoid false sharing.
+var PPC64 struct {
+	_        CacheLinePad
+	HasDARN  bool // Hardware random number generator (requires kernel enablement)
+	HasSCV   bool // Syscall vectored (requires kernel enablement)
+	IsPOWER8 bool // ISA v2.07 (POWER8)
+	IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
+	_        CacheLinePad
+}
+
+// S390X contains the supported CPU features of the current IBM Z
+// (s390x) platform. If the current platform is not IBM Z then all
+// feature flags are false.
+//
+// S390X is padded to avoid false sharing. Further HasVX is only set
+// if the OS supports vector registers in addition to the STFLE
+// feature bit being set.
+var S390X struct {
+	_         CacheLinePad
+	HasZARCH  bool // z/Architecture mode is active [mandatory]
+	HasSTFLE  bool // store facility list extended
+	HasLDISP  bool // long (20-bit) displacements
+	HasEIMM   bool // 32-bit immediates
+	HasDFP    bool // decimal floating point
+	HasETF3EH bool // ETF-3 enhanced
+	HasMSA    bool // message security assist (CPACF)
+	HasAES    bool // KM-AES{128,192,256} functions
+	HasAESCBC bool // KMC-AES{128,192,256} functions
+	HasAESCTR bool // KMCTR-AES{128,192,256} functions
+	HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
+	HasGHASH  bool // KIMD-GHASH function
+	HasSHA1   bool // K{I,L}MD-SHA-1 functions
+	HasSHA256 bool // K{I,L}MD-SHA-256 functions
+	HasSHA512 bool // K{I,L}MD-SHA-512 functions
+	HasSHA3   bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
+	HasVX     bool // vector facility
+	HasVXE    bool // vector-enhancements facility 1
+	_         CacheLinePad
+}
+
+func init() {
+	archInit()
+	initOptions()
+	processOptions()
+}
+
+// options contains the cpu debug options that can be used in GODEBUG.
+// Options are arch dependent and are added by the arch specific initOptions functions.
+// Features that are mandatory for the specific GOARCH should have the Required field set
+// (e.g. SSE2 on amd64).
+var options []option
+
+// Option names should be lower case. e.g. avx instead of AVX.
+type option struct {
+	Name      string
+	Feature   *bool
+	Specified bool // whether feature value was specified in GODEBUG
+	Enable    bool // whether feature should be enabled
+	Required  bool // whether feature is mandatory and can not be disabled
+}
+
+func processOptions() {
+	env := os.Getenv("GODEBUG")
+field:
+	for env != "" {
+		field := ""
+		i := strings.IndexByte(env, ',')
+		if i < 0 {
+			field, env = env, ""
+		} else {
+			field, env = env[:i], env[i+1:]
+		}
+		if len(field) < 4 || field[:4] != "cpu." {
+			continue
+		}
+		i = strings.IndexByte(field, '=')
+		if i < 0 {
+			print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n")
+			continue
+		}
+		key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
+
+		var enable bool
+		switch value {
+		case "on":
+			enable = true
+		case "off":
+			enable = false
+		default:
+			print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n")
+			continue field
+		}
+
+		if key == "all" {
+			for i := range options {
+				options[i].Specified = true
+				options[i].Enable = enable || options[i].Required
+			}
+			continue field
+		}
+
+		for i := range options {
+			if options[i].Name == key {
+				options[i].Specified = true
+				options[i].Enable = enable
+				continue field
+			}
+		}
+
+		print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n")
+	}
+
+	for _, o := range options {
+		if !o.Specified {
+			continue
+		}
+
+		if o.Enable && !*o.Feature {
+			print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n")
+			continue
+		}
+
+		if !o.Enable && o.Required {
+			print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n")
+			continue
+		}
+
+		*o.Feature = o.Enable
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_aix.go b/vendor/golang.org/x/sys/cpu/cpu_aix.go
new file mode 100644
index 0000000..8aaeef5
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_aix.go
@@ -0,0 +1,34 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build aix
+// +build aix
+
+package cpu
+
+const (
+	// getsystemcfg constants
+	_SC_IMPL     = 2
+	_IMPL_POWER8 = 0x10000
+	_IMPL_POWER9 = 0x20000
+)
+
+func archInit() {
+	impl := getsystemcfg(_SC_IMPL)
+	if impl&_IMPL_POWER8 != 0 {
+		PPC64.IsPOWER8 = true
+	}
+	if impl&_IMPL_POWER9 != 0 {
+		PPC64.IsPOWER8 = true
+		PPC64.IsPOWER9 = true
+	}
+
+	Initialized = true
+}
+
+func getsystemcfg(label int) (n uint64) {
+	r0, _ := callgetsystemcfg(label)
+	n = uint64(r0)
+	return
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm.go b/vendor/golang.org/x/sys/cpu/cpu_arm.go
new file mode 100644
index 0000000..301b752
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm.go
@@ -0,0 +1,73 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const cacheLineSize = 32
+
+// HWCAP/HWCAP2 bits.
+// These are specific to Linux.
+const (
+	hwcap_SWP       = 1 << 0
+	hwcap_HALF      = 1 << 1
+	hwcap_THUMB     = 1 << 2
+	hwcap_26BIT     = 1 << 3
+	hwcap_FAST_MULT = 1 << 4
+	hwcap_FPA       = 1 << 5
+	hwcap_VFP       = 1 << 6
+	hwcap_EDSP      = 1 << 7
+	hwcap_JAVA      = 1 << 8
+	hwcap_IWMMXT    = 1 << 9
+	hwcap_CRUNCH    = 1 << 10
+	hwcap_THUMBEE   = 1 << 11
+	hwcap_NEON      = 1 << 12
+	hwcap_VFPv3     = 1 << 13
+	hwcap_VFPv3D16  = 1 << 14
+	hwcap_TLS       = 1 << 15
+	hwcap_VFPv4     = 1 << 16
+	hwcap_IDIVA     = 1 << 17
+	hwcap_IDIVT     = 1 << 18
+	hwcap_VFPD32    = 1 << 19
+	hwcap_LPAE      = 1 << 20
+	hwcap_EVTSTRM   = 1 << 21
+
+	hwcap2_AES   = 1 << 0
+	hwcap2_PMULL = 1 << 1
+	hwcap2_SHA1  = 1 << 2
+	hwcap2_SHA2  = 1 << 3
+	hwcap2_CRC32 = 1 << 4
+)
+
+func initOptions() {
+	options = []option{
+		{Name: "pmull", Feature: &ARM.HasPMULL},
+		{Name: "sha1", Feature: &ARM.HasSHA1},
+		{Name: "sha2", Feature: &ARM.HasSHA2},
+		{Name: "swp", Feature: &ARM.HasSWP},
+		{Name: "thumb", Feature: &ARM.HasTHUMB},
+		{Name: "thumbee", Feature: &ARM.HasTHUMBEE},
+		{Name: "tls", Feature: &ARM.HasTLS},
+		{Name: "vfp", Feature: &ARM.HasVFP},
+		{Name: "vfpd32", Feature: &ARM.HasVFPD32},
+		{Name: "vfpv3", Feature: &ARM.HasVFPv3},
+		{Name: "vfpv3d16", Feature: &ARM.HasVFPv3D16},
+		{Name: "vfpv4", Feature: &ARM.HasVFPv4},
+		{Name: "half", Feature: &ARM.HasHALF},
+		{Name: "26bit", Feature: &ARM.Has26BIT},
+		{Name: "fastmul", Feature: &ARM.HasFASTMUL},
+		{Name: "fpa", Feature: &ARM.HasFPA},
+		{Name: "edsp", Feature: &ARM.HasEDSP},
+		{Name: "java", Feature: &ARM.HasJAVA},
+		{Name: "iwmmxt", Feature: &ARM.HasIWMMXT},
+		{Name: "crunch", Feature: &ARM.HasCRUNCH},
+		{Name: "neon", Feature: &ARM.HasNEON},
+		{Name: "idivt", Feature: &ARM.HasIDIVT},
+		{Name: "idiva", Feature: &ARM.HasIDIVA},
+		{Name: "lpae", Feature: &ARM.HasLPAE},
+		{Name: "evtstrm", Feature: &ARM.HasEVTSTRM},
+		{Name: "aes", Feature: &ARM.HasAES},
+		{Name: "crc32", Feature: &ARM.HasCRC32},
+	}
+
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
new file mode 100644
index 0000000..87dd5e3
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
@@ -0,0 +1,172 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import "runtime"
+
+const cacheLineSize = 64
+
+func initOptions() {
+	options = []option{
+		{Name: "fp", Feature: &ARM64.HasFP},
+		{Name: "asimd", Feature: &ARM64.HasASIMD},
+		{Name: "evstrm", Feature: &ARM64.HasEVTSTRM},
+		{Name: "aes", Feature: &ARM64.HasAES},
+		{Name: "fphp", Feature: &ARM64.HasFPHP},
+		{Name: "jscvt", Feature: &ARM64.HasJSCVT},
+		{Name: "lrcpc", Feature: &ARM64.HasLRCPC},
+		{Name: "pmull", Feature: &ARM64.HasPMULL},
+		{Name: "sha1", Feature: &ARM64.HasSHA1},
+		{Name: "sha2", Feature: &ARM64.HasSHA2},
+		{Name: "sha3", Feature: &ARM64.HasSHA3},
+		{Name: "sha512", Feature: &ARM64.HasSHA512},
+		{Name: "sm3", Feature: &ARM64.HasSM3},
+		{Name: "sm4", Feature: &ARM64.HasSM4},
+		{Name: "sve", Feature: &ARM64.HasSVE},
+		{Name: "crc32", Feature: &ARM64.HasCRC32},
+		{Name: "atomics", Feature: &ARM64.HasATOMICS},
+		{Name: "asimdhp", Feature: &ARM64.HasASIMDHP},
+		{Name: "cpuid", Feature: &ARM64.HasCPUID},
+		{Name: "asimrdm", Feature: &ARM64.HasASIMDRDM},
+		{Name: "fcma", Feature: &ARM64.HasFCMA},
+		{Name: "dcpop", Feature: &ARM64.HasDCPOP},
+		{Name: "asimddp", Feature: &ARM64.HasASIMDDP},
+		{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM},
+	}
+}
+
+func archInit() {
+	switch runtime.GOOS {
+	case "freebsd":
+		readARM64Registers()
+	case "linux", "netbsd":
+		doinit()
+	default:
+		// Most platforms don't seem to allow reading these registers.
+		//
+		// OpenBSD:
+		// See https://golang.org/issue/31746
+		setMinimalFeatures()
+	}
+}
+
+// setMinimalFeatures fakes the minimal ARM64 features expected by
+// TestARM64minimalFeatures.
+func setMinimalFeatures() {
+	ARM64.HasASIMD = true
+	ARM64.HasFP = true
+}
+
+func readARM64Registers() {
+	Initialized = true
+
+	parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0())
+}
+
+func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
+	// ID_AA64ISAR0_EL1
+	switch extractBits(isar0, 4, 7) {
+	case 1:
+		ARM64.HasAES = true
+	case 2:
+		ARM64.HasAES = true
+		ARM64.HasPMULL = true
+	}
+
+	switch extractBits(isar0, 8, 11) {
+	case 1:
+		ARM64.HasSHA1 = true
+	}
+
+	switch extractBits(isar0, 12, 15) {
+	case 1:
+		ARM64.HasSHA2 = true
+	case 2:
+		ARM64.HasSHA2 = true
+		ARM64.HasSHA512 = true
+	}
+
+	switch extractBits(isar0, 16, 19) {
+	case 1:
+		ARM64.HasCRC32 = true
+	}
+
+	switch extractBits(isar0, 20, 23) {
+	case 2:
+		ARM64.HasATOMICS = true
+	}
+
+	switch extractBits(isar0, 28, 31) {
+	case 1:
+		ARM64.HasASIMDRDM = true
+	}
+
+	switch extractBits(isar0, 32, 35) {
+	case 1:
+		ARM64.HasSHA3 = true
+	}
+
+	switch extractBits(isar0, 36, 39) {
+	case 1:
+		ARM64.HasSM3 = true
+	}
+
+	switch extractBits(isar0, 40, 43) {
+	case 1:
+		ARM64.HasSM4 = true
+	}
+
+	switch extractBits(isar0, 44, 47) {
+	case 1:
+		ARM64.HasASIMDDP = true
+	}
+
+	// ID_AA64ISAR1_EL1
+	switch extractBits(isar1, 0, 3) {
+	case 1:
+		ARM64.HasDCPOP = true
+	}
+
+	switch extractBits(isar1, 12, 15) {
+	case 1:
+		ARM64.HasJSCVT = true
+	}
+
+	switch extractBits(isar1, 16, 19) {
+	case 1:
+		ARM64.HasFCMA = true
+	}
+
+	switch extractBits(isar1, 20, 23) {
+	case 1:
+		ARM64.HasLRCPC = true
+	}
+
+	// ID_AA64PFR0_EL1
+	switch extractBits(pfr0, 16, 19) {
+	case 0:
+		ARM64.HasFP = true
+	case 1:
+		ARM64.HasFP = true
+		ARM64.HasFPHP = true
+	}
+
+	switch extractBits(pfr0, 20, 23) {
+	case 0:
+		ARM64.HasASIMD = true
+	case 1:
+		ARM64.HasASIMD = true
+		ARM64.HasASIMDHP = true
+	}
+
+	switch extractBits(pfr0, 32, 35) {
+	case 1:
+		ARM64.HasSVE = true
+	}
+}
+
+func extractBits(data uint64, start, end uint) uint {
+	return (uint)(data>>start) & ((1 << (end - start + 1)) - 1)
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.s b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
new file mode 100644
index 0000000..c61f95a
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.s
@@ -0,0 +1,32 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+#include "textflag.h"
+
+// func getisar0() uint64
+TEXT ·getisar0(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 0 into x0
+	// mrs x0, ID_AA64ISAR0_EL1 = d5380600
+	WORD	$0xd5380600
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getisar1() uint64
+TEXT ·getisar1(SB),NOSPLIT,$0-8
+	// get Instruction Set Attributes 1 into x0
+	// mrs x0, ID_AA64ISAR1_EL1 = d5380620
+	WORD	$0xd5380620
+	MOVD	R0, ret+0(FP)
+	RET
+
+// func getpfr0() uint64
+TEXT ·getpfr0(SB),NOSPLIT,$0-8
+	// get Processor Feature Register 0 into x0
+	// mrs x0, ID_AA64PFR0_EL1 = d5380400
+	WORD	$0xd5380400
+	MOVD	R0, ret+0(FP)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
new file mode 100644
index 0000000..ccf542a
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_arm64.go
@@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+package cpu
+
+func getisar0() uint64
+func getisar1() uint64
+func getpfr0() uint64
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
new file mode 100644
index 0000000..0af2f24
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_s390x.go
@@ -0,0 +1,22 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+package cpu
+
+// haveAsmFunctions reports whether the other functions in this file can
+// be safely called.
+func haveAsmFunctions() bool { return true }
+
+// The following feature detection functions are defined in cpu_s390x.s.
+// They are likely to be expensive to call so the results should be cached.
+func stfle() facilityList
+func kmQuery() queryResult
+func kmcQuery() queryResult
+func kmctrQuery() queryResult
+func kmaQuery() queryResult
+func kimdQuery() queryResult
+func klmdQuery() queryResult
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
new file mode 100644
index 0000000..fa7cdb9
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@@ -0,0 +1,17 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gc
+// +build 386 amd64 amd64p32
+// +build gc
+
+package cpu
+
+// cpuid is implemented in cpu_x86.s for gc compiler
+// and in cpu_gccgo.c for gccgo.
+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+
+// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
+// and in cpu_gccgo.c for gccgo.
+func xgetbv() (eax, edx uint32)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
new file mode 100644
index 0000000..2aff318
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_arm64.go
@@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gccgo
+// +build gccgo
+
+package cpu
+
+func getisar0() uint64 { return 0 }
+func getisar1() uint64 { return 0 }
+func getpfr0() uint64  { return 0 }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
new file mode 100644
index 0000000..4bfbda6
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_s390x.go
@@ -0,0 +1,23 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gccgo
+// +build gccgo
+
+package cpu
+
+// haveAsmFunctions reports whether the other functions in this file can
+// be safely called.
+func haveAsmFunctions() bool { return false }
+
+// TODO(mundaym): the following feature detection functions are currently
+// stubs. See https://golang.org/cl/162887 for how to fix this.
+// They are likely to be expensive to call so the results should be cached.
+func stfle() facilityList     { panic("not implemented for gccgo") }
+func kmQuery() queryResult    { panic("not implemented for gccgo") }
+func kmcQuery() queryResult   { panic("not implemented for gccgo") }
+func kmctrQuery() queryResult { panic("not implemented for gccgo") }
+func kmaQuery() queryResult   { panic("not implemented for gccgo") }
+func kimdQuery() queryResult  { panic("not implemented for gccgo") }
+func klmdQuery() queryResult  { panic("not implemented for gccgo") }
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
new file mode 100644
index 0000000..e363c7d
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.c
@@ -0,0 +1,43 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 amd64 amd64p32
+// +build gccgo
+
+#include <cpuid.h>
+#include <stdint.h>
+
+// Need to wrap __get_cpuid_count because it's declared as static.
+int
+gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf,
+                   uint32_t *eax, uint32_t *ebx,
+                   uint32_t *ecx, uint32_t *edx)
+{
+	return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx);
+}
+
+// xgetbv reads the contents of an XCR (Extended Control Register)
+// specified in the ECX register into registers EDX:EAX.
+// Currently, the only supported value for XCR is 0.
+//
+// TODO: Replace with a better alternative:
+//
+//     #include <xsaveintrin.h>
+//
+//     #pragma GCC target("xsave")
+//
+//     void gccgoXgetbv(uint32_t *eax, uint32_t *edx) {
+//       unsigned long long x = _xgetbv(0);
+//       *eax = x & 0xffffffff;
+//       *edx = (x >> 32) & 0xffffffff;
+//     }
+//
+// Note that _xgetbv is defined starting with GCC 8.
+void
+gccgoXgetbv(uint32_t *eax, uint32_t *edx)
+{
+	__asm("  xorl %%ecx, %%ecx\n"
+	      "  xgetbv"
+	    : "=a"(*eax), "=d"(*edx));
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
new file mode 100644
index 0000000..863d415
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@@ -0,0 +1,33 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gccgo
+// +build 386 amd64 amd64p32
+// +build gccgo
+
+package cpu
+
+//extern gccgoGetCpuidCount
+func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32)
+
+func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) {
+	var a, b, c, d uint32
+	gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d)
+	return a, b, c, d
+}
+
+//extern gccgoXgetbv
+func gccgoXgetbv(eax, edx *uint32)
+
+func xgetbv() (eax, edx uint32) {
+	var a, d uint32
+	gccgoXgetbv(&a, &d)
+	return a, d
+}
+
+// gccgo doesn't build on Darwin, per:
+// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
+func darwinSupportsAVX512() bool {
+	return false
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux.go b/vendor/golang.org/x/sys/cpu/cpu_linux.go
new file mode 100644
index 0000000..159a686
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !386 && !amd64 && !amd64p32 && !arm64
+// +build !386,!amd64,!amd64p32,!arm64
+
+package cpu
+
+func archInit() {
+	if err := readHWCAP(); err != nil {
+		return
+	}
+	doinit()
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
new file mode 100644
index 0000000..2057006
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm.go
@@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func doinit() {
+	ARM.HasSWP = isSet(hwCap, hwcap_SWP)
+	ARM.HasHALF = isSet(hwCap, hwcap_HALF)
+	ARM.HasTHUMB = isSet(hwCap, hwcap_THUMB)
+	ARM.Has26BIT = isSet(hwCap, hwcap_26BIT)
+	ARM.HasFASTMUL = isSet(hwCap, hwcap_FAST_MULT)
+	ARM.HasFPA = isSet(hwCap, hwcap_FPA)
+	ARM.HasVFP = isSet(hwCap, hwcap_VFP)
+	ARM.HasEDSP = isSet(hwCap, hwcap_EDSP)
+	ARM.HasJAVA = isSet(hwCap, hwcap_JAVA)
+	ARM.HasIWMMXT = isSet(hwCap, hwcap_IWMMXT)
+	ARM.HasCRUNCH = isSet(hwCap, hwcap_CRUNCH)
+	ARM.HasTHUMBEE = isSet(hwCap, hwcap_THUMBEE)
+	ARM.HasNEON = isSet(hwCap, hwcap_NEON)
+	ARM.HasVFPv3 = isSet(hwCap, hwcap_VFPv3)
+	ARM.HasVFPv3D16 = isSet(hwCap, hwcap_VFPv3D16)
+	ARM.HasTLS = isSet(hwCap, hwcap_TLS)
+	ARM.HasVFPv4 = isSet(hwCap, hwcap_VFPv4)
+	ARM.HasIDIVA = isSet(hwCap, hwcap_IDIVA)
+	ARM.HasIDIVT = isSet(hwCap, hwcap_IDIVT)
+	ARM.HasVFPD32 = isSet(hwCap, hwcap_VFPD32)
+	ARM.HasLPAE = isSet(hwCap, hwcap_LPAE)
+	ARM.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
+	ARM.HasAES = isSet(hwCap2, hwcap2_AES)
+	ARM.HasPMULL = isSet(hwCap2, hwcap2_PMULL)
+	ARM.HasSHA1 = isSet(hwCap2, hwcap2_SHA1)
+	ARM.HasSHA2 = isSet(hwCap2, hwcap2_SHA2)
+	ARM.HasCRC32 = isSet(hwCap2, hwcap2_CRC32)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
new file mode 100644
index 0000000..79a38a0
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
@@ -0,0 +1,71 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+// HWCAP/HWCAP2 bits. These are exposed by Linux.
+const (
+	hwcap_FP       = 1 << 0
+	hwcap_ASIMD    = 1 << 1
+	hwcap_EVTSTRM  = 1 << 2
+	hwcap_AES      = 1 << 3
+	hwcap_PMULL    = 1 << 4
+	hwcap_SHA1     = 1 << 5
+	hwcap_SHA2     = 1 << 6
+	hwcap_CRC32    = 1 << 7
+	hwcap_ATOMICS  = 1 << 8
+	hwcap_FPHP     = 1 << 9
+	hwcap_ASIMDHP  = 1 << 10
+	hwcap_CPUID    = 1 << 11
+	hwcap_ASIMDRDM = 1 << 12
+	hwcap_JSCVT    = 1 << 13
+	hwcap_FCMA     = 1 << 14
+	hwcap_LRCPC    = 1 << 15
+	hwcap_DCPOP    = 1 << 16
+	hwcap_SHA3     = 1 << 17
+	hwcap_SM3      = 1 << 18
+	hwcap_SM4      = 1 << 19
+	hwcap_ASIMDDP  = 1 << 20
+	hwcap_SHA512   = 1 << 21
+	hwcap_SVE      = 1 << 22
+	hwcap_ASIMDFHM = 1 << 23
+)
+
+func doinit() {
+	if err := readHWCAP(); err != nil {
+		// failed to read /proc/self/auxv, try reading registers directly
+		readARM64Registers()
+		return
+	}
+
+	// HWCAP feature bits
+	ARM64.HasFP = isSet(hwCap, hwcap_FP)
+	ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD)
+	ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM)
+	ARM64.HasAES = isSet(hwCap, hwcap_AES)
+	ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL)
+	ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1)
+	ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2)
+	ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32)
+	ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS)
+	ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP)
+	ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP)
+	ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID)
+	ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM)
+	ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT)
+	ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA)
+	ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC)
+	ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP)
+	ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3)
+	ARM64.HasSM3 = isSet(hwCap, hwcap_SM3)
+	ARM64.HasSM4 = isSet(hwCap, hwcap_SM4)
+	ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP)
+	ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
+	ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
+	ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
new file mode 100644
index 0000000..6000db4
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_mips64x.go
@@ -0,0 +1,24 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (mips64 || mips64le)
+// +build linux
+// +build mips64 mips64le
+
+package cpu
+
+// HWCAP bits. These are exposed by the Linux kernel 5.4.
+const (
+	// CPU features
+	hwcap_MIPS_MSA = 1 << 1
+)
+
+func doinit() {
+	// HWCAP feature bits
+	MIPS64X.HasMSA = isSet(hwCap, hwcap_MIPS_MSA)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
new file mode 100644
index 0000000..f4992b1
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@@ -0,0 +1,10 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
+// +build linux,!arm,!arm64,!mips64,!mips64le,!ppc64,!ppc64le,!s390x
+
+package cpu
+
+func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
new file mode 100644
index 0000000..021356d
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_ppc64x.go
@@ -0,0 +1,32 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build linux && (ppc64 || ppc64le)
+// +build linux
+// +build ppc64 ppc64le
+
+package cpu
+
+// HWCAP/HWCAP2 bits. These are exposed by the kernel.
+const (
+	// ISA Level
+	_PPC_FEATURE2_ARCH_2_07 = 0x80000000
+	_PPC_FEATURE2_ARCH_3_00 = 0x00800000
+
+	// CPU features
+	_PPC_FEATURE2_DARN = 0x00200000
+	_PPC_FEATURE2_SCV  = 0x00100000
+)
+
+func doinit() {
+	// HWCAP2 feature bits
+	PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07)
+	PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00)
+	PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN)
+	PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV)
+}
+
+func isSet(hwc uint, value uint) bool {
+	return hwc&value != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
new file mode 100644
index 0000000..1517ac6
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_s390x.go
@@ -0,0 +1,40 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const (
+	// bit mask values from /usr/include/bits/hwcap.h
+	hwcap_ZARCH  = 2
+	hwcap_STFLE  = 4
+	hwcap_MSA    = 8
+	hwcap_LDISP  = 16
+	hwcap_EIMM   = 32
+	hwcap_DFP    = 64
+	hwcap_ETF3EH = 256
+	hwcap_VX     = 2048
+	hwcap_VXE    = 8192
+)
+
+func initS390Xbase() {
+	// test HWCAP bit vector
+	has := func(featureMask uint) bool {
+		return hwCap&featureMask == featureMask
+	}
+
+	// mandatory
+	S390X.HasZARCH = has(hwcap_ZARCH)
+
+	// optional
+	S390X.HasSTFLE = has(hwcap_STFLE)
+	S390X.HasLDISP = has(hwcap_LDISP)
+	S390X.HasEIMM = has(hwcap_EIMM)
+	S390X.HasETF3EH = has(hwcap_ETF3EH)
+	S390X.HasDFP = has(hwcap_DFP)
+	S390X.HasMSA = has(hwcap_MSA)
+	S390X.HasVX = has(hwcap_VX)
+	if S390X.HasVX {
+		S390X.HasVXE = has(hwcap_VXE)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
new file mode 100644
index 0000000..f4063c6
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_mips64x.go
@@ -0,0 +1,16 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips64 || mips64le
+// +build mips64 mips64le
+
+package cpu
+
+const cacheLineSize = 32
+
+func initOptions() {
+	options = []option{
+		{Name: "msa", Feature: &MIPS64X.HasMSA},
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_mipsx.go b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
new file mode 100644
index 0000000..07c4e36
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_mipsx.go
@@ -0,0 +1,12 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build mips || mipsle
+// +build mips mipsle
+
+package cpu
+
+const cacheLineSize = 32
+
+func initOptions() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
new file mode 100644
index 0000000..ebfb3fc
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_netbsd_arm64.go
@@ -0,0 +1,173 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+// Minimal copy of functionality from x/sys/unix so the cpu package can call
+// sysctl without depending on x/sys/unix.
+
+const (
+	_CTL_QUERY = -2
+
+	_SYSCTL_VERS_1 = 0x1000000
+)
+
+var _zero uintptr
+
+func sysctl(mib []int32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) {
+	var _p0 unsafe.Pointer
+	if len(mib) > 0 {
+		_p0 = unsafe.Pointer(&mib[0])
+	} else {
+		_p0 = unsafe.Pointer(&_zero)
+	}
+	_, _, errno := syscall.Syscall6(
+		syscall.SYS___SYSCTL,
+		uintptr(_p0),
+		uintptr(len(mib)),
+		uintptr(unsafe.Pointer(old)),
+		uintptr(unsafe.Pointer(oldlen)),
+		uintptr(unsafe.Pointer(new)),
+		uintptr(newlen))
+	if errno != 0 {
+		return errno
+	}
+	return nil
+}
+
+type sysctlNode struct {
+	Flags          uint32
+	Num            int32
+	Name           [32]int8
+	Ver            uint32
+	__rsvd         uint32
+	Un             [16]byte
+	_sysctl_size   [8]byte
+	_sysctl_func   [8]byte
+	_sysctl_parent [8]byte
+	_sysctl_desc   [8]byte
+}
+
+func sysctlNodes(mib []int32) ([]sysctlNode, error) {
+	var olen uintptr
+
+	// Get a list of all sysctl nodes below the given MIB by performing
+	// a sysctl for the given MIB with CTL_QUERY appended.
+	mib = append(mib, _CTL_QUERY)
+	qnode := sysctlNode{Flags: _SYSCTL_VERS_1}
+	qp := (*byte)(unsafe.Pointer(&qnode))
+	sz := unsafe.Sizeof(qnode)
+	if err := sysctl(mib, nil, &olen, qp, sz); err != nil {
+		return nil, err
+	}
+
+	// Now that we know the size, get the actual nodes.
+	nodes := make([]sysctlNode, olen/sz)
+	np := (*byte)(unsafe.Pointer(&nodes[0]))
+	if err := sysctl(mib, np, &olen, qp, sz); err != nil {
+		return nil, err
+	}
+
+	return nodes, nil
+}
+
+func nametomib(name string) ([]int32, error) {
+	// Split name into components.
+	var parts []string
+	last := 0
+	for i := 0; i < len(name); i++ {
+		if name[i] == '.' {
+			parts = append(parts, name[last:i])
+			last = i + 1
+		}
+	}
+	parts = append(parts, name[last:])
+
+	mib := []int32{}
+	// Discover the nodes and construct the MIB OID.
+	for partno, part := range parts {
+		nodes, err := sysctlNodes(mib)
+		if err != nil {
+			return nil, err
+		}
+		for _, node := range nodes {
+			n := make([]byte, 0)
+			for i := range node.Name {
+				if node.Name[i] != 0 {
+					n = append(n, byte(node.Name[i]))
+				}
+			}
+			if string(n) == part {
+				mib = append(mib, int32(node.Num))
+				break
+			}
+		}
+		if len(mib) != partno+1 {
+			return nil, err
+		}
+	}
+
+	return mib, nil
+}
+
+// aarch64SysctlCPUID is struct aarch64_sysctl_cpu_id from NetBSD's <aarch64/armreg.h>
+type aarch64SysctlCPUID struct {
+	midr      uint64 /* Main ID Register */
+	revidr    uint64 /* Revision ID Register */
+	mpidr     uint64 /* Multiprocessor Affinity Register */
+	aa64dfr0  uint64 /* A64 Debug Feature Register 0 */
+	aa64dfr1  uint64 /* A64 Debug Feature Register 1 */
+	aa64isar0 uint64 /* A64 Instruction Set Attribute Register 0 */
+	aa64isar1 uint64 /* A64 Instruction Set Attribute Register 1 */
+	aa64mmfr0 uint64 /* A64 Memory Model Feature Register 0 */
+	aa64mmfr1 uint64 /* A64 Memory Model Feature Register 1 */
+	aa64mmfr2 uint64 /* A64 Memory Model Feature Register 2 */
+	aa64pfr0  uint64 /* A64 Processor Feature Register 0 */
+	aa64pfr1  uint64 /* A64 Processor Feature Register 1 */
+	aa64zfr0  uint64 /* A64 SVE Feature ID Register 0 */
+	mvfr0     uint32 /* Media and VFP Feature Register 0 */
+	mvfr1     uint32 /* Media and VFP Feature Register 1 */
+	mvfr2     uint32 /* Media and VFP Feature Register 2 */
+	pad       uint32
+	clidr     uint64 /* Cache Level ID Register */
+	ctr       uint64 /* Cache Type Register */
+}
+
+func sysctlCPUID(name string) (*aarch64SysctlCPUID, error) {
+	mib, err := nametomib(name)
+	if err != nil {
+		return nil, err
+	}
+
+	out := aarch64SysctlCPUID{}
+	n := unsafe.Sizeof(out)
+	_, _, errno := syscall.Syscall6(
+		syscall.SYS___SYSCTL,
+		uintptr(unsafe.Pointer(&mib[0])),
+		uintptr(len(mib)),
+		uintptr(unsafe.Pointer(&out)),
+		uintptr(unsafe.Pointer(&n)),
+		uintptr(0),
+		uintptr(0))
+	if errno != 0 {
+		return nil, errno
+	}
+	return &out, nil
+}
+
+func doinit() {
+	cpuid, err := sysctlCPUID("machdep.cpu0.cpu_id")
+	if err != nil {
+		setMinimalFeatures()
+		return
+	}
+	parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64pfr0)
+
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
new file mode 100644
index 0000000..d7b4fb4
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm.go
@@ -0,0 +1,10 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && arm
+// +build !linux,arm
+
+package cpu
+
+func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
new file mode 100644
index 0000000..f8c484f
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_arm64.go
@@ -0,0 +1,10 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && !netbsd && arm64
+// +build !linux,!netbsd,arm64
+
+package cpu
+
+func doinit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
new file mode 100644
index 0000000..0dafe96
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_mips64x.go
@@ -0,0 +1,13 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !linux && (mips64 || mips64le)
+// +build !linux
+// +build mips64 mips64le
+
+package cpu
+
+func archInit() {
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
new file mode 100644
index 0000000..4e8acd1
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_ppc64x.go
@@ -0,0 +1,17 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ppc64 || ppc64le
+// +build ppc64 ppc64le
+
+package cpu
+
+const cacheLineSize = 128
+
+func initOptions() {
+	options = []option{
+		{Name: "darn", Feature: &PPC64.HasDARN},
+		{Name: "scv", Feature: &PPC64.HasSCV},
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
new file mode 100644
index 0000000..bd6c128
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@@ -0,0 +1,12 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build riscv64
+// +build riscv64
+
+package cpu
+
+const cacheLineSize = 32
+
+func initOptions() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_s390x.go
new file mode 100644
index 0000000..5881b88
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.go
@@ -0,0 +1,172 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+const cacheLineSize = 256
+
+func initOptions() {
+	options = []option{
+		{Name: "zarch", Feature: &S390X.HasZARCH, Required: true},
+		{Name: "stfle", Feature: &S390X.HasSTFLE, Required: true},
+		{Name: "ldisp", Feature: &S390X.HasLDISP, Required: true},
+		{Name: "eimm", Feature: &S390X.HasEIMM, Required: true},
+		{Name: "dfp", Feature: &S390X.HasDFP},
+		{Name: "etf3eh", Feature: &S390X.HasETF3EH},
+		{Name: "msa", Feature: &S390X.HasMSA},
+		{Name: "aes", Feature: &S390X.HasAES},
+		{Name: "aescbc", Feature: &S390X.HasAESCBC},
+		{Name: "aesctr", Feature: &S390X.HasAESCTR},
+		{Name: "aesgcm", Feature: &S390X.HasAESGCM},
+		{Name: "ghash", Feature: &S390X.HasGHASH},
+		{Name: "sha1", Feature: &S390X.HasSHA1},
+		{Name: "sha256", Feature: &S390X.HasSHA256},
+		{Name: "sha3", Feature: &S390X.HasSHA3},
+		{Name: "sha512", Feature: &S390X.HasSHA512},
+		{Name: "vx", Feature: &S390X.HasVX},
+		{Name: "vxe", Feature: &S390X.HasVXE},
+	}
+}
+
+// bitIsSet reports whether the bit at index is set. The bit index
+// is in big endian order, so bit index 0 is the leftmost bit.
+func bitIsSet(bits []uint64, index uint) bool {
+	return bits[index/64]&((1<<63)>>(index%64)) != 0
+}
+
+// facility is a bit index for the named facility.
+type facility uint8
+
+const (
+	// mandatory facilities
+	zarch  facility = 1  // z architecture mode is active
+	stflef facility = 7  // store-facility-list-extended
+	ldisp  facility = 18 // long-displacement
+	eimm   facility = 21 // extended-immediate
+
+	// miscellaneous facilities
+	dfp    facility = 42 // decimal-floating-point
+	etf3eh facility = 30 // extended-translation 3 enhancement
+
+	// cryptography facilities
+	msa  facility = 17  // message-security-assist
+	msa3 facility = 76  // message-security-assist extension 3
+	msa4 facility = 77  // message-security-assist extension 4
+	msa5 facility = 57  // message-security-assist extension 5
+	msa8 facility = 146 // message-security-assist extension 8
+	msa9 facility = 155 // message-security-assist extension 9
+
+	// vector facilities
+	vx   facility = 129 // vector facility
+	vxe  facility = 135 // vector-enhancements 1
+	vxe2 facility = 148 // vector-enhancements 2
+)
+
+// facilityList contains the result of an STFLE call.
+// Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type facilityList struct {
+	bits [4]uint64
+}
+
+// Has reports whether the given facilities are present.
+func (s *facilityList) Has(fs ...facility) bool {
+	if len(fs) == 0 {
+		panic("no facility bits provided")
+	}
+	for _, f := range fs {
+		if !bitIsSet(s.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+// function is the code for the named cryptographic function.
+type function uint8
+
+const (
+	// KM{,A,C,CTR} function codes
+	aes128 function = 18 // AES-128
+	aes192 function = 19 // AES-192
+	aes256 function = 20 // AES-256
+
+	// K{I,L}MD function codes
+	sha1     function = 1  // SHA-1
+	sha256   function = 2  // SHA-256
+	sha512   function = 3  // SHA-512
+	sha3_224 function = 32 // SHA3-224
+	sha3_256 function = 33 // SHA3-256
+	sha3_384 function = 34 // SHA3-384
+	sha3_512 function = 35 // SHA3-512
+	shake128 function = 36 // SHAKE-128
+	shake256 function = 37 // SHAKE-256
+
+	// KLMD function codes
+	ghash function = 65 // GHASH
+)
+
+// queryResult contains the result of a Query function
+// call. Bits are numbered in big endian order so the
+// leftmost bit (the MSB) is at index 0.
+type queryResult struct {
+	bits [2]uint64
+}
+
+// Has reports whether the given functions are present.
+func (q *queryResult) Has(fns ...function) bool {
+	if len(fns) == 0 {
+		panic("no function codes provided")
+	}
+	for _, f := range fns {
+		if !bitIsSet(q.bits[:], uint(f)) {
+			return false
+		}
+	}
+	return true
+}
+
+func doinit() {
+	initS390Xbase()
+
+	// We need implementations of stfle, km and so on
+	// to detect cryptographic features.
+	if !haveAsmFunctions() {
+		return
+	}
+
+	// optional cryptographic functions
+	if S390X.HasMSA {
+		aes := []function{aes128, aes192, aes256}
+
+		// cipher message
+		km, kmc := kmQuery(), kmcQuery()
+		S390X.HasAES = km.Has(aes...)
+		S390X.HasAESCBC = kmc.Has(aes...)
+		if S390X.HasSTFLE {
+			facilities := stfle()
+			if facilities.Has(msa4) {
+				kmctr := kmctrQuery()
+				S390X.HasAESCTR = kmctr.Has(aes...)
+			}
+			if facilities.Has(msa8) {
+				kma := kmaQuery()
+				S390X.HasAESGCM = kma.Has(aes...)
+			}
+		}
+
+		// compute message digest
+		kimd := kimdQuery() // intermediate (no padding)
+		klmd := klmdQuery() // last (padding)
+		S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1)
+		S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256)
+		S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512)
+		S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
+		sha3 := []function{
+			sha3_224, sha3_256, sha3_384, sha3_512,
+			shake128, shake256,
+		}
+		S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_s390x.s b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
new file mode 100644
index 0000000..96f81e2
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_s390x.s
@@ -0,0 +1,58 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build gc
+// +build gc
+
+#include "textflag.h"
+
+// func stfle() facilityList
+TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32
+	MOVD $ret+0(FP), R1
+	MOVD $3, R0          // last doubleword index to store
+	XC   $32, (R1), (R1) // clear 4 doublewords (32 bytes)
+	WORD $0xb2b01000     // store facility list extended (STFLE)
+	RET
+
+// func kmQuery() queryResult
+TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KM-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92E0024    // cipher message (KM)
+	RET
+
+// func kmcQuery() queryResult
+TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMC-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92F0024    // cipher message with chaining (KMC)
+	RET
+
+// func kmctrQuery() queryResult
+TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMCTR-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB92D4024    // cipher message with counter (KMCTR)
+	RET
+
+// func kmaQuery() queryResult
+TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KMA-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xb9296024    // cipher message with authentication (KMA)
+	RET
+
+// func kimdQuery() queryResult
+TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KIMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93E0024    // compute intermediate message digest (KIMD)
+	RET
+
+// func klmdQuery() queryResult
+TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16
+	MOVD $0, R0         // set function code to 0 (KLMD-Query)
+	MOVD $ret+0(FP), R1 // address of 16-byte return value
+	WORD $0xB93F0024    // compute last message digest (KLMD)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_wasm.go b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
new file mode 100644
index 0000000..7747d88
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_wasm.go
@@ -0,0 +1,18 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build wasm
+// +build wasm
+
+package cpu
+
+// We're compiling the cpu package for an unknown (software-abstracted) CPU.
+// Make CacheLinePad an empty struct and hope that the usual struct alignment
+// rules are good enough.
+
+const cacheLineSize = 0
+
+func initOptions() {}
+
+func archInit() {}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go
new file mode 100644
index 0000000..f5aacfc
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@@ -0,0 +1,145 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64 || amd64p32
+// +build 386 amd64 amd64p32
+
+package cpu
+
+import "runtime"
+
+const cacheLineSize = 64
+
+func initOptions() {
+	options = []option{
+		{Name: "adx", Feature: &X86.HasADX},
+		{Name: "aes", Feature: &X86.HasAES},
+		{Name: "avx", Feature: &X86.HasAVX},
+		{Name: "avx2", Feature: &X86.HasAVX2},
+		{Name: "avx512", Feature: &X86.HasAVX512},
+		{Name: "avx512f", Feature: &X86.HasAVX512F},
+		{Name: "avx512cd", Feature: &X86.HasAVX512CD},
+		{Name: "avx512er", Feature: &X86.HasAVX512ER},
+		{Name: "avx512pf", Feature: &X86.HasAVX512PF},
+		{Name: "avx512vl", Feature: &X86.HasAVX512VL},
+		{Name: "avx512bw", Feature: &X86.HasAVX512BW},
+		{Name: "avx512dq", Feature: &X86.HasAVX512DQ},
+		{Name: "avx512ifma", Feature: &X86.HasAVX512IFMA},
+		{Name: "avx512vbmi", Feature: &X86.HasAVX512VBMI},
+		{Name: "avx512vnniw", Feature: &X86.HasAVX5124VNNIW},
+		{Name: "avx5124fmaps", Feature: &X86.HasAVX5124FMAPS},
+		{Name: "avx512vpopcntdq", Feature: &X86.HasAVX512VPOPCNTDQ},
+		{Name: "avx512vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ},
+		{Name: "avx512vnni", Feature: &X86.HasAVX512VNNI},
+		{Name: "avx512gfni", Feature: &X86.HasAVX512GFNI},
+		{Name: "avx512vaes", Feature: &X86.HasAVX512VAES},
+		{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2},
+		{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG},
+		{Name: "avx512bf16", Feature: &X86.HasAVX512BF16},
+		{Name: "bmi1", Feature: &X86.HasBMI1},
+		{Name: "bmi2", Feature: &X86.HasBMI2},
+		{Name: "cx16", Feature: &X86.HasCX16},
+		{Name: "erms", Feature: &X86.HasERMS},
+		{Name: "fma", Feature: &X86.HasFMA},
+		{Name: "osxsave", Feature: &X86.HasOSXSAVE},
+		{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ},
+		{Name: "popcnt", Feature: &X86.HasPOPCNT},
+		{Name: "rdrand", Feature: &X86.HasRDRAND},
+		{Name: "rdseed", Feature: &X86.HasRDSEED},
+		{Name: "sse3", Feature: &X86.HasSSE3},
+		{Name: "sse41", Feature: &X86.HasSSE41},
+		{Name: "sse42", Feature: &X86.HasSSE42},
+		{Name: "ssse3", Feature: &X86.HasSSSE3},
+
+		// These capabilities should always be enabled on amd64:
+		{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
+	}
+}
+
+func archInit() {
+
+	Initialized = true
+
+	maxID, _, _, _ := cpuid(0, 0)
+
+	if maxID < 1 {
+		return
+	}
+
+	_, _, ecx1, edx1 := cpuid(1, 0)
+	X86.HasSSE2 = isSet(26, edx1)
+
+	X86.HasSSE3 = isSet(0, ecx1)
+	X86.HasPCLMULQDQ = isSet(1, ecx1)
+	X86.HasSSSE3 = isSet(9, ecx1)
+	X86.HasFMA = isSet(12, ecx1)
+	X86.HasCX16 = isSet(13, ecx1)
+	X86.HasSSE41 = isSet(19, ecx1)
+	X86.HasSSE42 = isSet(20, ecx1)
+	X86.HasPOPCNT = isSet(23, ecx1)
+	X86.HasAES = isSet(25, ecx1)
+	X86.HasOSXSAVE = isSet(27, ecx1)
+	X86.HasRDRAND = isSet(30, ecx1)
+
+	var osSupportsAVX, osSupportsAVX512 bool
+	// For XGETBV, OSXSAVE bit is required and sufficient.
+	if X86.HasOSXSAVE {
+		eax, _ := xgetbv()
+		// Check if XMM and YMM registers have OS support.
+		osSupportsAVX = isSet(1, eax) && isSet(2, eax)
+
+		if runtime.GOOS == "darwin" {
+			// Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
+			// Since users can't rely on mask register contents, let's not advertise AVX-512 support.
+			// See issue 49233.
+			osSupportsAVX512 = false
+		} else {
+			// Check if OPMASK and ZMM registers have OS support.
+			osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
+		}
+	}
+
+	X86.HasAVX = isSet(28, ecx1) && osSupportsAVX
+
+	if maxID < 7 {
+		return
+	}
+
+	_, ebx7, ecx7, edx7 := cpuid(7, 0)
+	X86.HasBMI1 = isSet(3, ebx7)
+	X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
+	X86.HasBMI2 = isSet(8, ebx7)
+	X86.HasERMS = isSet(9, ebx7)
+	X86.HasRDSEED = isSet(18, ebx7)
+	X86.HasADX = isSet(19, ebx7)
+
+	X86.HasAVX512 = isSet(16, ebx7) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
+	if X86.HasAVX512 {
+		X86.HasAVX512F = true
+		X86.HasAVX512CD = isSet(28, ebx7)
+		X86.HasAVX512ER = isSet(27, ebx7)
+		X86.HasAVX512PF = isSet(26, ebx7)
+		X86.HasAVX512VL = isSet(31, ebx7)
+		X86.HasAVX512BW = isSet(30, ebx7)
+		X86.HasAVX512DQ = isSet(17, ebx7)
+		X86.HasAVX512IFMA = isSet(21, ebx7)
+		X86.HasAVX512VBMI = isSet(1, ecx7)
+		X86.HasAVX5124VNNIW = isSet(2, edx7)
+		X86.HasAVX5124FMAPS = isSet(3, edx7)
+		X86.HasAVX512VPOPCNTDQ = isSet(14, ecx7)
+		X86.HasAVX512VPCLMULQDQ = isSet(10, ecx7)
+		X86.HasAVX512VNNI = isSet(11, ecx7)
+		X86.HasAVX512GFNI = isSet(8, ecx7)
+		X86.HasAVX512VAES = isSet(9, ecx7)
+		X86.HasAVX512VBMI2 = isSet(6, ecx7)
+		X86.HasAVX512BITALG = isSet(12, ecx7)
+
+		eax71, _, _, _ := cpuid(7, 1)
+		X86.HasAVX512BF16 = isSet(5, eax71)
+	}
+}
+
+func isSet(bitpos uint, value uint32) bool {
+	return value&(1<<bitpos) != 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_x86.s
new file mode 100644
index 0000000..39acab2
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.s
@@ -0,0 +1,28 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (386 || amd64 || amd64p32) && gc
+// +build 386 amd64 amd64p32
+// +build gc
+
+#include "textflag.h"
+
+// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
+TEXT ·cpuid(SB), NOSPLIT, $0-24
+	MOVL eaxArg+0(FP), AX
+	MOVL ecxArg+4(FP), CX
+	CPUID
+	MOVL AX, eax+8(FP)
+	MOVL BX, ebx+12(FP)
+	MOVL CX, ecx+16(FP)
+	MOVL DX, edx+20(FP)
+	RET
+
+// func xgetbv() (eax, edx uint32)
+TEXT ·xgetbv(SB),NOSPLIT,$0-8
+	MOVL $0, CX
+	XGETBV
+	MOVL AX, eax+0(FP)
+	MOVL DX, edx+4(FP)
+	RET
diff --git a/vendor/golang.org/x/sys/cpu/cpu_zos.go b/vendor/golang.org/x/sys/cpu/cpu_zos.go
new file mode 100644
index 0000000..5f54683
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_zos.go
@@ -0,0 +1,10 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func archInit() {
+	doinit()
+	Initialized = true
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go b/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go
new file mode 100644
index 0000000..ccb1b70
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_zos_s390x.go
@@ -0,0 +1,25 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+func initS390Xbase() {
+	// get the facilities list
+	facilities := stfle()
+
+	// mandatory
+	S390X.HasZARCH = facilities.Has(zarch)
+	S390X.HasSTFLE = facilities.Has(stflef)
+	S390X.HasLDISP = facilities.Has(ldisp)
+	S390X.HasEIMM = facilities.Has(eimm)
+
+	// optional
+	S390X.HasETF3EH = facilities.Has(etf3eh)
+	S390X.HasDFP = facilities.Has(dfp)
+	S390X.HasMSA = facilities.Has(msa)
+	S390X.HasVX = facilities.Has(vx)
+	if S390X.HasVX {
+		S390X.HasVXE = facilities.Has(vxe)
+	}
+}
diff --git a/vendor/golang.org/x/sys/cpu/hwcap_linux.go b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
new file mode 100644
index 0000000..f3baa37
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/hwcap_linux.go
@@ -0,0 +1,56 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+	"io/ioutil"
+)
+
+const (
+	_AT_HWCAP  = 16
+	_AT_HWCAP2 = 26
+
+	procAuxv = "/proc/self/auxv"
+
+	uintSize = int(32 << (^uint(0) >> 63))
+)
+
+// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2
+// These are initialized in cpu_$GOARCH.go
+// and should not be changed after they are initialized.
+var hwCap uint
+var hwCap2 uint
+
+func readHWCAP() error {
+	buf, err := ioutil.ReadFile(procAuxv)
+	if err != nil {
+		// e.g. on android /proc/self/auxv is not accessible, so silently
+		// ignore the error and leave Initialized = false. On some
+		// architectures (e.g. arm64) doinit() implements a fallback
+		// readout and will set Initialized = true again.
+		return err
+	}
+	bo := hostByteOrder()
+	for len(buf) >= 2*(uintSize/8) {
+		var tag, val uint
+		switch uintSize {
+		case 32:
+			tag = uint(bo.Uint32(buf[0:]))
+			val = uint(bo.Uint32(buf[4:]))
+			buf = buf[8:]
+		case 64:
+			tag = uint(bo.Uint64(buf[0:]))
+			val = uint(bo.Uint64(buf[8:]))
+			buf = buf[16:]
+		}
+		switch tag {
+		case _AT_HWCAP:
+			hwCap = val
+		case _AT_HWCAP2:
+			hwCap2 = val
+		}
+	}
+	return nil
+}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
new file mode 100644
index 0000000..9613415
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_gccgo.go
@@ -0,0 +1,27 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Recreate a getsystemcfg syscall handler instead of
+// using the one provided by x/sys/unix to avoid having
+// the dependency between them. (See golang.org/issue/32102)
+// Moreover, this file will be used during the building of
+// gccgo's libgo and thus must not used a CGo method.
+
+//go:build aix && gccgo
+// +build aix,gccgo
+
+package cpu
+
+import (
+	"syscall"
+)
+
+//extern getsystemcfg
+func gccgoGetsystemcfg(label uint32) (r uint64)
+
+func callgetsystemcfg(label int) (r1 uintptr, e1 syscall.Errno) {
+	r1 = uintptr(gccgoGetsystemcfg(uint32(label)))
+	e1 = syscall.GetErrno()
+	return
+}
diff --git a/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
new file mode 100644
index 0000000..904be42
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_aix_ppc64_gc.go
@@ -0,0 +1,36 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimal copy of x/sys/unix so the cpu package can make a
+// system call on AIX without depending on x/sys/unix.
+// (See golang.org/issue/32102)
+
+//go:build aix && ppc64 && gc
+// +build aix,ppc64,gc
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+//go:cgo_import_dynamic libc_getsystemcfg getsystemcfg "libc.a/shr_64.o"
+
+//go:linkname libc_getsystemcfg libc_getsystemcfg
+
+type syscallFunc uintptr
+
+var libc_getsystemcfg syscallFunc
+
+type errno = syscall.Errno
+
+// Implemented in runtime/syscall_aix.go.
+func rawSyscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
+func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno)
+
+func callgetsystemcfg(label int) (r1 uintptr, e1 errno) {
+	r1, _, e1 = syscall6(uintptr(unsafe.Pointer(&libc_getsystemcfg)), 1, uintptr(label), 0, 0, 0, 0, 0)
+	return
+}