aboutsummaryrefslogtreecommitdiff
path: root/src/crypto
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo@golang.org>2021-04-30 17:05:54 -0400
committerFilippo Valsorda <filippo@golang.org>2021-05-09 00:08:05 +0000
commit14c3d2aa596147bd90da298a4b2a70660786c682 (patch)
treef102e9325683979296015a328ee263ff51766786 /src/crypto
parentec4efa42089415b0427a4d30b317cfd7e4a0fe75 (diff)
downloadgo-14c3d2aa596147bd90da298a4b2a70660786c682.tar.gz
go-14c3d2aa596147bd90da298a4b2a70660786c682.zip
crypto/elliptic: import fiat-crypto P-521 field implementation
Fiat Cryptography (https://github.com/mit-plv/fiat-crypto) is a project that produces prime order field implementations (the code that does arithmetic modulo a prime number) based on a formally verified model. The formal verification covers some of the most subtle and hard to test parts of an elliptic curve implementation, like carry chains. It would probably have prevented #20040 and #43786. This CL imports a 64-bit implementation of the P-521 base field, replacing the horribly slow and catastrophically variable time big.Int CurveParams implementation. The code in p521_fiat64.go is generated reproducibly by fiat-crypto, building and running the Dockerfile according to the README. The code in fiat/p521.go is a thin and idiomatic wrapper around the fiat-crypto code. It includes an Invert method generated with the help of github.com/mmcloughlin/addchain. The code in elliptic/p521.go is a line-by-line port of the CurveParams implementation. Lsh(x, N) was replaced with repeated Add(x, x) calls. Mul(x, x) was replaced with Square(x). Mod calls were removed, as all operations are modulo P. Likewise, Add calls to bring values back to positive were removed. The ScalarMult ladder implementation is now constant time, copied from p224ScalarMult. Only other notable changes are adding a p512Point type to keep (x, y, z) together, and making addJacobian and doubleJacobian methods on that type, with the usual receiver semantics to save 4 allocations per step. This amounts to a proof of concept, and is far from a mature elliptic curve implementation. Here's a non-exhaustive list of things that need improvement, most of which are pre-existing issues with crypto/elliptic. Some of these can be fixed without API change, so can't. - Marshal and Unmarshal still use the slow, variable time big.Int arithmetic. The Curve interface does not expose field operations, so we'll have to make our own abstraction. - Point addition uses an incomplete Jacobian formula, which has variable time behaviors for points at infinity and equal points. There are better, complete formulae these days, but I wanted to keep this CL reviewable against the existing code. - The scalar multiplication ladder is still heavily variable time. This is easy to fix and I'll do it in a follow-up CL, but I wanted to keep this one easier to review. - Fundamentally, values have to go in and out of big.Int representation when they pass through the Curve interface, which is both slow and slightly variable-time. - There is no scalar field implementation, so crypto/ecdsa ends up using big.Int for signing. - Extending this to P-384 would involve either duplicating all P-521 code, or coming up with some lower-level interfaces for the base field. Even better, generics, which would maybe let us save heap allocations due to virtual calls. - The readability and idiomaticity of the autogenerated code can improve, although we have a clear abstraction and well-enforced contract, which makes it unlikely we'll have to resort to manually modifying the code. See mit-plv/fiat-crypto#949. - We could also have a 32-bit implementation, since it's almost free to have fiat-crypto generate one. Anyway, it's definitely better than CurveParams, and definitely faster. name old time/op new time/op delta pkg:crypto/elliptic goos:darwin goarch:arm64 ScalarBaseMult/P521-8 4.18ms ± 3% 0.86ms ± 2% -79.50% (p=0.000 n=10+9) ScalarMult/P521-8 4.17ms ± 2% 0.85ms ± 6% -79.68% (p=0.000 n=10+10) pkg:crypto/ecdsa goos:darwin goarch:arm64 Sign/P521-8 4.23ms ± 1% 0.94ms ± 0% -77.70% (p=0.000 n=9+8) Verify/P521-8 8.31ms ± 2% 1.75ms ± 4% -78.99% (p=0.000 n=9+10) GenerateKey/P521-8 4.15ms ± 2% 0.85ms ± 2% -79.49% (p=0.000 n=10+9) name old alloc/op new alloc/op delta pkg:crypto/elliptic goos:darwin goarch:arm64 ScalarBaseMult/P521-8 3.06MB ± 3% 0.00MB ± 0% -99.97% (p=0.000 n=10+10) ScalarMult/P521-8 3.05MB ± 1% 0.00MB ± 0% -99.97% (p=0.000 n=9+10) pkg:crypto/ecdsa goos:darwin goarch:arm64 Sign/P521-8 3.03MB ± 0% 0.01MB ± 0% -99.74% (p=0.000 n=10+8) Verify/P521-8 6.06MB ± 1% 0.00MB ± 0% -99.93% (p=0.000 n=9+9) GenerateKey/P521-8 3.02MB ± 0% 0.00MB ± 0% -99.96% (p=0.000 n=9+10) name old allocs/op new allocs/op delta pkg:crypto/elliptic goos:darwin goarch:arm64 ScalarBaseMult/P521-8 19.8k ± 3% 0.0k ± 0% -99.95% (p=0.000 n=10+10) ScalarMult/P521-8 19.7k ± 1% 0.0k ± 0% -99.95% (p=0.000 n=9+10) pkg:crypto/ecdsa goos:darwin goarch:arm64 Sign/P521-8 19.6k ± 0% 0.1k ± 0% -99.63% (p=0.000 n=10+10) Verify/P521-8 39.2k ± 1% 0.1k ± 0% -99.84% (p=0.000 n=9+10) GenerateKey/P521-8 19.5k ± 0% 0.0k ± 0% -99.91% (p=0.000 n=9+10) Updates #40171 Change-Id: Ic898b09a2388382bf51ec007d9a79d72d44efe10 Reviewed-on: https://go-review.googlesource.com/c/go/+/315271 Run-TryBot: Filippo Valsorda <filippo@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Katie Hockman <katie@golang.org> Trust: Katie Hockman <katie@golang.org> Trust: Filippo Valsorda <filippo@golang.org>
Diffstat (limited to 'src/crypto')
-rw-r--r--src/crypto/elliptic/elliptic.go12
-rw-r--r--src/crypto/elliptic/internal/fiat/Dockerfile12
-rw-r--r--src/crypto/elliptic/internal/fiat/README39
-rw-r--r--src/crypto/elliptic/internal/fiat/p521.go197
-rw-r--r--src/crypto/elliptic/internal/fiat/p521_fiat64.go1856
-rw-r--r--src/crypto/elliptic/internal/fiat/p521_test.go37
-rw-r--r--src/crypto/elliptic/p521.go254
7 files changed, 2395 insertions, 12 deletions
diff --git a/src/crypto/elliptic/elliptic.go b/src/crypto/elliptic/elliptic.go
index f93dc16419..85d105419b 100644
--- a/src/crypto/elliptic/elliptic.go
+++ b/src/crypto/elliptic/elliptic.go
@@ -390,7 +390,6 @@ func UnmarshalCompressed(curve Curve, data []byte) (x, y *big.Int) {
var initonce sync.Once
var p384 *CurveParams
-var p521 *CurveParams
func initAll() {
initP224()
@@ -410,17 +409,6 @@ func initP384() {
p384.BitSize = 384
}
-func initP521() {
- // See FIPS 186-3, section D.2.5
- p521 = &CurveParams{Name: "P-521"}
- p521.P, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", 10)
- p521.N, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397655394245057746333217197532963996371363321113864768612440380340372808892707005449", 10)
- p521.B, _ = new(big.Int).SetString("051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", 16)
- p521.Gx, _ = new(big.Int).SetString("c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", 16)
- p521.Gy, _ = new(big.Int).SetString("11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", 16)
- p521.BitSize = 521
-}
-
// P256 returns a Curve which implements NIST P-256 (FIPS 186-3, section D.2.3),
// also known as secp256r1 or prime256v1. The CurveParams.Name of this Curve is
// "P-256".
diff --git a/src/crypto/elliptic/internal/fiat/Dockerfile b/src/crypto/elliptic/internal/fiat/Dockerfile
new file mode 100644
index 0000000000..7b5ece0e30
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/Dockerfile
@@ -0,0 +1,12 @@
+# Copyright 2021 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+FROM coqorg/coq:8.13.2
+
+RUN git clone https://github.com/mit-plv/fiat-crypto
+RUN cd fiat-crypto && git checkout c076f3550bea2bb7f4cb5766a32594b9e67694f2
+RUN cd fiat-crypto && git submodule update --init --recursive
+RUN cd fiat-crypto && eval $(opam env) && make -j4 standalone-ocaml SKIP_BEDROCK2=1
+
+ENTRYPOINT ["fiat-crypto/src/ExtractionOCaml/unsaturated_solinas"]
diff --git a/src/crypto/elliptic/internal/fiat/README b/src/crypto/elliptic/internal/fiat/README
new file mode 100644
index 0000000000..171f57a327
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/README
@@ -0,0 +1,39 @@
+The code in this package was autogenerated by the fiat-crypto project
+at commit c076f3550 from a formally verified model.
+
+ docker build -t fiat-crypto:c076f3550 .
+ docker run fiat-crypto:c076f3550 --lang Go --no-wide-int --cmovznz-by-mul \
+ --internal-static --public-function-case camelCase --public-type-case camelCase \
+ --private-function-case camelCase --private-type-case camelCase \
+ --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' \
+ --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' \
+ --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' \
+ carry_mul carry_square carry add sub to_bytes from_bytes selectznz \
+ > p521_fiat64.go
+
+It comes under the following license.
+
+ Copyright (c) 2015-2020 The fiat-crypto Authors. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
+ Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+The authors are listed at
+
+ https://github.com/mit-plv/fiat-crypto/blob/master/AUTHORS
diff --git a/src/crypto/elliptic/internal/fiat/p521.go b/src/crypto/elliptic/internal/fiat/p521.go
new file mode 100644
index 0000000000..dc677327e6
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521.go
@@ -0,0 +1,197 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package fiat implements prime order fields using formally verified algorithms
+// from the Fiat Cryptography project.
+package fiat
+
+import (
+ "crypto/subtle"
+ "errors"
+)
+
+// P521Element is an integer modulo 2^521 - 1.
+//
+// The zero value is a valid zero element.
+type P521Element struct {
+ // This element has the following bounds, which are tighter than
+ // the output bounds of some operations. Those operations must be
+ // followed by a carry.
+ //
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000],
+ // [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]
+ x [9]uint64
+}
+
+// One sets e = 1, and returns e.
+func (e *P521Element) One() *P521Element {
+ *e = P521Element{}
+ e.x[0] = 1
+ return e
+}
+
+// Equal returns 1 if e == t, and zero otherwise.
+func (e *P521Element) Equal(t *P521Element) int {
+ eBytes := e.Bytes()
+ tBytes := t.Bytes()
+ return subtle.ConstantTimeCompare(eBytes, tBytes)
+}
+
+var p521ZeroEncoding = new(P521Element).Bytes()
+
+// IsZero returns 1 if e == 0, and zero otherwise.
+func (e *P521Element) IsZero() int {
+ eBytes := e.Bytes()
+ return subtle.ConstantTimeCompare(eBytes, p521ZeroEncoding)
+}
+
+// Set sets e = t, and returns e.
+func (e *P521Element) Set(t *P521Element) *P521Element {
+ e.x = t.x
+ return e
+}
+
+// Bytes returns the 66-byte little-endian encoding of e.
+func (e *P521Element) Bytes() []byte {
+ // This function must be inlined to move the allocation to the parent and
+ // save it from escaping to the heap.
+ var out [66]byte
+ p521ToBytes(&out, &e.x)
+ return out[:]
+}
+
+// SetBytes sets e = v, where v is a little-endian 66-byte encoding, and returns
+// e. If v is not 66 bytes or it encodes a value higher than 2^521 - 1, SetBytes
+// returns nil and an error, and e is unchanged.
+func (e *P521Element) SetBytes(v []byte) (*P521Element, error) {
+ if len(v) != 66 || v[65] > 1 {
+ return nil, errors.New("invalid P-521 field encoding")
+ }
+ var in [66]byte
+ copy(in[:], v)
+ p521FromBytes(&e.x, &in)
+ return e, nil
+}
+
+// Add sets e = t1 + t2, and returns e.
+func (e *P521Element) Add(t1, t2 *P521Element) *P521Element {
+ p521Add(&e.x, &t1.x, &t2.x)
+ p521Carry(&e.x, &e.x)
+ return e
+}
+
+// Sub sets e = t1 - t2, and returns e.
+func (e *P521Element) Sub(t1, t2 *P521Element) *P521Element {
+ p521Sub(&e.x, &t1.x, &t2.x)
+ p521Carry(&e.x, &e.x)
+ return e
+}
+
+// Mul sets e = t1 * t2, and returns e.
+func (e *P521Element) Mul(t1, t2 *P521Element) *P521Element {
+ p521CarryMul(&e.x, &t1.x, &t2.x)
+ return e
+}
+
+// Square sets e = t * t, and returns e.
+func (e *P521Element) Square(t *P521Element) *P521Element {
+ p521CarrySquare(&e.x, &t.x)
+ return e
+}
+
+// Select sets e to a if cond == 1, and to b if cond == 0.
+func (v *P521Element) Select(a, b *P521Element, cond int) *P521Element {
+ p521Selectznz(&v.x, p521Uint1(cond), &b.x, &a.x)
+ return v
+}
+
+// Invert sets e = 1/t, and returns e.
+//
+// If t == 0, Invert returns e = 0.
+func (e *P521Element) Invert(t *P521Element) *P521Element {
+ // Inversion is implemented as exponentiation with exponent p − 2.
+ // The sequence of multiplications and squarings was generated with
+ // github.com/mmcloughlin/addchain v0.2.0.
+
+ var t1, t2 = new(P521Element), new(P521Element)
+
+ // _10 = 2 * 1
+ t1.Square(t)
+
+ // _11 = 1 + _10
+ t1.Mul(t, t1)
+
+ // _1100 = _11 << 2
+ t2.Square(t1)
+ t2.Square(t2)
+
+ // _1111 = _11 + _1100
+ t1.Mul(t1, t2)
+
+ // _11110000 = _1111 << 4
+ t2.Square(t1)
+ for i := 0; i < 3; i++ {
+ t2.Square(t2)
+ }
+
+ // _11111111 = _1111 + _11110000
+ t1.Mul(t1, t2)
+
+ // x16 = _11111111<<8 + _11111111
+ t2.Square(t1)
+ for i := 0; i < 7; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x32 = x16<<16 + x16
+ t2.Square(t1)
+ for i := 0; i < 15; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x64 = x32<<32 + x32
+ t2.Square(t1)
+ for i := 0; i < 31; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x65 = 2*x64 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x129 = x65<<64 + x64
+ for i := 0; i < 64; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x130 = 2*x129 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x259 = x130<<129 + x129
+ for i := 0; i < 129; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // x260 = 2*x259 + 1
+ t2.Square(t1)
+ t2.Mul(t2, t)
+
+ // x519 = x260<<259 + x259
+ for i := 0; i < 259; i++ {
+ t2.Square(t2)
+ }
+ t1.Mul(t1, t2)
+
+ // return x519<<2 + 1
+ t1.Square(t1)
+ t1.Square(t1)
+ return e.Mul(t1, t)
+}
diff --git a/src/crypto/elliptic/internal/fiat/p521_fiat64.go b/src/crypto/elliptic/internal/fiat/p521_fiat64.go
new file mode 100644
index 0000000000..f86283b587
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521_fiat64.go
@@ -0,0 +1,1856 @@
+// Code generated by Fiat Cryptography. DO NOT EDIT.
+//
+// Autogenerated: 'fiat-crypto/src/ExtractionOCaml/unsaturated_solinas' --lang Go --no-wide-int --cmovznz-by-mul --internal-static --public-function-case camelCase --public-type-case camelCase --private-function-case camelCase --private-type-case camelCase --no-prefix-fiat --package-name fiat --doc-text-before-function-name '' --doc-prepend-header 'Code generated by Fiat Cryptography. DO NOT EDIT.' --doc-newline-before-package-declaration p521 64 9 '2^521 - 1' carry_mul carry_square carry add sub to_bytes from_bytes selectznz
+//
+// curve description: p521
+//
+// machine_wordsize = 64 (from "64")
+//
+// requested operations: carry_mul, carry_square, carry, add, sub, to_bytes, from_bytes, selectznz
+//
+// n = 9 (from "9")
+//
+// s-c = 2^521 - [(1, 1)] (from "2^521 - 1")
+//
+// tight_bounds_multiplier = 1 (from "")
+//
+//
+//
+// Computed values:
+//
+// carry_chain = [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1]
+//
+// eval z = z[0] + (z[1] << 58) + (z[2] << 116) + (z[3] << 174) + (z[4] << 232) + (z[5] << 0x122) + (z[6] << 0x15c) + (z[7] << 0x196) + (z[8] << 0x1d0)
+//
+// bytes_eval z = z[0] + (z[1] << 8) + (z[2] << 16) + (z[3] << 24) + (z[4] << 32) + (z[5] << 40) + (z[6] << 48) + (z[7] << 56) + (z[8] << 64) + (z[9] << 72) + (z[10] << 80) + (z[11] << 88) + (z[12] << 96) + (z[13] << 104) + (z[14] << 112) + (z[15] << 120) + (z[16] << 128) + (z[17] << 136) + (z[18] << 144) + (z[19] << 152) + (z[20] << 160) + (z[21] << 168) + (z[22] << 176) + (z[23] << 184) + (z[24] << 192) + (z[25] << 200) + (z[26] << 208) + (z[27] << 216) + (z[28] << 224) + (z[29] << 232) + (z[30] << 240) + (z[31] << 248) + (z[32] << 256) + (z[33] << 0x108) + (z[34] << 0x110) + (z[35] << 0x118) + (z[36] << 0x120) + (z[37] << 0x128) + (z[38] << 0x130) + (z[39] << 0x138) + (z[40] << 0x140) + (z[41] << 0x148) + (z[42] << 0x150) + (z[43] << 0x158) + (z[44] << 0x160) + (z[45] << 0x168) + (z[46] << 0x170) + (z[47] << 0x178) + (z[48] << 0x180) + (z[49] << 0x188) + (z[50] << 0x190) + (z[51] << 0x198) + (z[52] << 0x1a0) + (z[53] << 0x1a8) + (z[54] << 0x1b0) + (z[55] << 0x1b8) + (z[56] << 0x1c0) + (z[57] << 0x1c8) + (z[58] << 0x1d0) + (z[59] << 0x1d8) + (z[60] << 0x1e0) + (z[61] << 0x1e8) + (z[62] << 0x1f0) + (z[63] << 0x1f8) + (z[64] << 2^9) + (z[65] << 0x208)
+//
+// balance = [0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x7fffffffffffffe, 0x3fffffffffffffe]
+
+package fiat
+
+import "math/bits"
+
+type p521Uint1 uint8
+type p521Int1 int8
+
+// p521AddcarryxU64 is a thin wrapper around bits.Add64 that uses p521Uint1 rather than uint64
+func p521AddcarryxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) {
+ sum, carryOut := bits.Add64(x, y, uint64(carry))
+ return sum, p521Uint1(carryOut)
+}
+
+// p521SubborrowxU64 is a thin wrapper around bits.Sub64 that uses p521Uint1 rather than uint64
+func p521SubborrowxU64(x uint64, y uint64, carry p521Uint1) (uint64, p521Uint1) {
+ sum, carryOut := bits.Sub64(x, y, uint64(carry))
+ return sum, p521Uint1(carryOut)
+}
+
+// p521AddcarryxU58 is an addition with carry.
+//
+// Postconditions:
+// out1 = (arg1 + arg2 + arg3) mod 2^58
+// out2 = ⌊(arg1 + arg2 + arg3) / 2^58⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x3ffffffffffffff]
+// arg3: [0x0 ~> 0x3ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x3ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521AddcarryxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((uint64(arg1) + arg2) + arg3)
+ x2 := (x1 & 0x3ffffffffffffff)
+ x3 := p521Uint1((x1 >> 58))
+ *out1 = x2
+ *out2 = x3
+}
+
+// p521SubborrowxU58 is a subtraction with borrow.
+//
+// Postconditions:
+// out1 = (-arg1 + arg2 + -arg3) mod 2^58
+// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^58⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x3ffffffffffffff]
+// arg3: [0x0 ~> 0x3ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x3ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521SubborrowxU58(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((int64(arg2) - int64(arg1)) - int64(arg3))
+ x2 := p521Int1((x1 >> 58))
+ x3 := (uint64(x1) & 0x3ffffffffffffff)
+ *out1 = x3
+ *out2 = (0x0 - p521Uint1(x2))
+}
+
+// p521AddcarryxU57 is an addition with carry.
+//
+// Postconditions:
+// out1 = (arg1 + arg2 + arg3) mod 2^57
+// out2 = ⌊(arg1 + arg2 + arg3) / 2^57⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x1ffffffffffffff]
+// arg3: [0x0 ~> 0x1ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x1ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521AddcarryxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((uint64(arg1) + arg2) + arg3)
+ x2 := (x1 & 0x1ffffffffffffff)
+ x3 := p521Uint1((x1 >> 57))
+ *out1 = x2
+ *out2 = x3
+}
+
+// p521SubborrowxU57 is a subtraction with borrow.
+//
+// Postconditions:
+// out1 = (-arg1 + arg2 + -arg3) mod 2^57
+// out2 = -⌊(-arg1 + arg2 + -arg3) / 2^57⌋
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0x1ffffffffffffff]
+// arg3: [0x0 ~> 0x1ffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0x1ffffffffffffff]
+// out2: [0x0 ~> 0x1]
+func p521SubborrowxU57(out1 *uint64, out2 *p521Uint1, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := ((int64(arg2) - int64(arg1)) - int64(arg3))
+ x2 := p521Int1((x1 >> 57))
+ x3 := (uint64(x1) & 0x1ffffffffffffff)
+ *out1 = x3
+ *out2 = (0x0 - p521Uint1(x2))
+}
+
+// p521CmovznzU64 is a single-word conditional move.
+//
+// Postconditions:
+// out1 = (if arg1 = 0 then arg2 else arg3)
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [0x0 ~> 0xffffffffffffffff]
+// arg3: [0x0 ~> 0xffffffffffffffff]
+// Output Bounds:
+// out1: [0x0 ~> 0xffffffffffffffff]
+func p521CmovznzU64(out1 *uint64, arg1 p521Uint1, arg2 uint64, arg3 uint64) {
+ x1 := (uint64(arg1) * 0xffffffffffffffff)
+ x2 := ((x1 & arg3) | ((^x1) & arg2))
+ *out1 = x2
+}
+
+// p521CarryMul multiplies two field elements and reduces the result.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 * eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// arg2: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521CarryMul(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ var x1 uint64
+ var x2 uint64
+ x2, x1 = bits.Mul64(arg1[8], (arg2[8] * 0x2))
+ var x3 uint64
+ var x4 uint64
+ x4, x3 = bits.Mul64(arg1[8], (arg2[7] * 0x2))
+ var x5 uint64
+ var x6 uint64
+ x6, x5 = bits.Mul64(arg1[8], (arg2[6] * 0x2))
+ var x7 uint64
+ var x8 uint64
+ x8, x7 = bits.Mul64(arg1[8], (arg2[5] * 0x2))
+ var x9 uint64
+ var x10 uint64
+ x10, x9 = bits.Mul64(arg1[8], (arg2[4] * 0x2))
+ var x11 uint64
+ var x12 uint64
+ x12, x11 = bits.Mul64(arg1[8], (arg2[3] * 0x2))
+ var x13 uint64
+ var x14 uint64
+ x14, x13 = bits.Mul64(arg1[8], (arg2[2] * 0x2))
+ var x15 uint64
+ var x16 uint64
+ x16, x15 = bits.Mul64(arg1[8], (arg2[1] * 0x2))
+ var x17 uint64
+ var x18 uint64
+ x18, x17 = bits.Mul64(arg1[7], (arg2[8] * 0x2))
+ var x19 uint64
+ var x20 uint64
+ x20, x19 = bits.Mul64(arg1[7], (arg2[7] * 0x2))
+ var x21 uint64
+ var x22 uint64
+ x22, x21 = bits.Mul64(arg1[7], (arg2[6] * 0x2))
+ var x23 uint64
+ var x24 uint64
+ x24, x23 = bits.Mul64(arg1[7], (arg2[5] * 0x2))
+ var x25 uint64
+ var x26 uint64
+ x26, x25 = bits.Mul64(arg1[7], (arg2[4] * 0x2))
+ var x27 uint64
+ var x28 uint64
+ x28, x27 = bits.Mul64(arg1[7], (arg2[3] * 0x2))
+ var x29 uint64
+ var x30 uint64
+ x30, x29 = bits.Mul64(arg1[7], (arg2[2] * 0x2))
+ var x31 uint64
+ var x32 uint64
+ x32, x31 = bits.Mul64(arg1[6], (arg2[8] * 0x2))
+ var x33 uint64
+ var x34 uint64
+ x34, x33 = bits.Mul64(arg1[6], (arg2[7] * 0x2))
+ var x35 uint64
+ var x36 uint64
+ x36, x35 = bits.Mul64(arg1[6], (arg2[6] * 0x2))
+ var x37 uint64
+ var x38 uint64
+ x38, x37 = bits.Mul64(arg1[6], (arg2[5] * 0x2))
+ var x39 uint64
+ var x40 uint64
+ x40, x39 = bits.Mul64(arg1[6], (arg2[4] * 0x2))
+ var x41 uint64
+ var x42 uint64
+ x42, x41 = bits.Mul64(arg1[6], (arg2[3] * 0x2))
+ var x43 uint64
+ var x44 uint64
+ x44, x43 = bits.Mul64(arg1[5], (arg2[8] * 0x2))
+ var x45 uint64
+ var x46 uint64
+ x46, x45 = bits.Mul64(arg1[5], (arg2[7] * 0x2))
+ var x47 uint64
+ var x48 uint64
+ x48, x47 = bits.Mul64(arg1[5], (arg2[6] * 0x2))
+ var x49 uint64
+ var x50 uint64
+ x50, x49 = bits.Mul64(arg1[5], (arg2[5] * 0x2))
+ var x51 uint64
+ var x52 uint64
+ x52, x51 = bits.Mul64(arg1[5], (arg2[4] * 0x2))
+ var x53 uint64
+ var x54 uint64
+ x54, x53 = bits.Mul64(arg1[4], (arg2[8] * 0x2))
+ var x55 uint64
+ var x56 uint64
+ x56, x55 = bits.Mul64(arg1[4], (arg2[7] * 0x2))
+ var x57 uint64
+ var x58 uint64
+ x58, x57 = bits.Mul64(arg1[4], (arg2[6] * 0x2))
+ var x59 uint64
+ var x60 uint64
+ x60, x59 = bits.Mul64(arg1[4], (arg2[5] * 0x2))
+ var x61 uint64
+ var x62 uint64
+ x62, x61 = bits.Mul64(arg1[3], (arg2[8] * 0x2))
+ var x63 uint64
+ var x64 uint64
+ x64, x63 = bits.Mul64(arg1[3], (arg2[7] * 0x2))
+ var x65 uint64
+ var x66 uint64
+ x66, x65 = bits.Mul64(arg1[3], (arg2[6] * 0x2))
+ var x67 uint64
+ var x68 uint64
+ x68, x67 = bits.Mul64(arg1[2], (arg2[8] * 0x2))
+ var x69 uint64
+ var x70 uint64
+ x70, x69 = bits.Mul64(arg1[2], (arg2[7] * 0x2))
+ var x71 uint64
+ var x72 uint64
+ x72, x71 = bits.Mul64(arg1[1], (arg2[8] * 0x2))
+ var x73 uint64
+ var x74 uint64
+ x74, x73 = bits.Mul64(arg1[8], arg2[0])
+ var x75 uint64
+ var x76 uint64
+ x76, x75 = bits.Mul64(arg1[7], arg2[1])
+ var x77 uint64
+ var x78 uint64
+ x78, x77 = bits.Mul64(arg1[7], arg2[0])
+ var x79 uint64
+ var x80 uint64
+ x80, x79 = bits.Mul64(arg1[6], arg2[2])
+ var x81 uint64
+ var x82 uint64
+ x82, x81 = bits.Mul64(arg1[6], arg2[1])
+ var x83 uint64
+ var x84 uint64
+ x84, x83 = bits.Mul64(arg1[6], arg2[0])
+ var x85 uint64
+ var x86 uint64
+ x86, x85 = bits.Mul64(arg1[5], arg2[3])
+ var x87 uint64
+ var x88 uint64
+ x88, x87 = bits.Mul64(arg1[5], arg2[2])
+ var x89 uint64
+ var x90 uint64
+ x90, x89 = bits.Mul64(arg1[5], arg2[1])
+ var x91 uint64
+ var x92 uint64
+ x92, x91 = bits.Mul64(arg1[5], arg2[0])
+ var x93 uint64
+ var x94 uint64
+ x94, x93 = bits.Mul64(arg1[4], arg2[4])
+ var x95 uint64
+ var x96 uint64
+ x96, x95 = bits.Mul64(arg1[4], arg2[3])
+ var x97 uint64
+ var x98 uint64
+ x98, x97 = bits.Mul64(arg1[4], arg2[2])
+ var x99 uint64
+ var x100 uint64
+ x100, x99 = bits.Mul64(arg1[4], arg2[1])
+ var x101 uint64
+ var x102 uint64
+ x102, x101 = bits.Mul64(arg1[4], arg2[0])
+ var x103 uint64
+ var x104 uint64
+ x104, x103 = bits.Mul64(arg1[3], arg2[5])
+ var x105 uint64
+ var x106 uint64
+ x106, x105 = bits.Mul64(arg1[3], arg2[4])
+ var x107 uint64
+ var x108 uint64
+ x108, x107 = bits.Mul64(arg1[3], arg2[3])
+ var x109 uint64
+ var x110 uint64
+ x110, x109 = bits.Mul64(arg1[3], arg2[2])
+ var x111 uint64
+ var x112 uint64
+ x112, x111 = bits.Mul64(arg1[3], arg2[1])
+ var x113 uint64
+ var x114 uint64
+ x114, x113 = bits.Mul64(arg1[3], arg2[0])
+ var x115 uint64
+ var x116 uint64
+ x116, x115 = bits.Mul64(arg1[2], arg2[6])
+ var x117 uint64
+ var x118 uint64
+ x118, x117 = bits.Mul64(arg1[2], arg2[5])
+ var x119 uint64
+ var x120 uint64
+ x120, x119 = bits.Mul64(arg1[2], arg2[4])
+ var x121 uint64
+ var x122 uint64
+ x122, x121 = bits.Mul64(arg1[2], arg2[3])
+ var x123 uint64
+ var x124 uint64
+ x124, x123 = bits.Mul64(arg1[2], arg2[2])
+ var x125 uint64
+ var x126 uint64
+ x126, x125 = bits.Mul64(arg1[2], arg2[1])
+ var x127 uint64
+ var x128 uint64
+ x128, x127 = bits.Mul64(arg1[2], arg2[0])
+ var x129 uint64
+ var x130 uint64
+ x130, x129 = bits.Mul64(arg1[1], arg2[7])
+ var x131 uint64
+ var x132 uint64
+ x132, x131 = bits.Mul64(arg1[1], arg2[6])
+ var x133 uint64
+ var x134 uint64
+ x134, x133 = bits.Mul64(arg1[1], arg2[5])
+ var x135 uint64
+ var x136 uint64
+ x136, x135 = bits.Mul64(arg1[1], arg2[4])
+ var x137 uint64
+ var x138 uint64
+ x138, x137 = bits.Mul64(arg1[1], arg2[3])
+ var x139 uint64
+ var x140 uint64
+ x140, x139 = bits.Mul64(arg1[1], arg2[2])
+ var x141 uint64
+ var x142 uint64
+ x142, x141 = bits.Mul64(arg1[1], arg2[1])
+ var x143 uint64
+ var x144 uint64
+ x144, x143 = bits.Mul64(arg1[1], arg2[0])
+ var x145 uint64
+ var x146 uint64
+ x146, x145 = bits.Mul64(arg1[0], arg2[8])
+ var x147 uint64
+ var x148 uint64
+ x148, x147 = bits.Mul64(arg1[0], arg2[7])
+ var x149 uint64
+ var x150 uint64
+ x150, x149 = bits.Mul64(arg1[0], arg2[6])
+ var x151 uint64
+ var x152 uint64
+ x152, x151 = bits.Mul64(arg1[0], arg2[5])
+ var x153 uint64
+ var x154 uint64
+ x154, x153 = bits.Mul64(arg1[0], arg2[4])
+ var x155 uint64
+ var x156 uint64
+ x156, x155 = bits.Mul64(arg1[0], arg2[3])
+ var x157 uint64
+ var x158 uint64
+ x158, x157 = bits.Mul64(arg1[0], arg2[2])
+ var x159 uint64
+ var x160 uint64
+ x160, x159 = bits.Mul64(arg1[0], arg2[1])
+ var x161 uint64
+ var x162 uint64
+ x162, x161 = bits.Mul64(arg1[0], arg2[0])
+ var x163 uint64
+ var x164 p521Uint1
+ x163, x164 = p521AddcarryxU64(x29, x15, 0x0)
+ var x165 uint64
+ x165, _ = p521AddcarryxU64(x30, x16, x164)
+ var x167 uint64
+ var x168 p521Uint1
+ x167, x168 = p521AddcarryxU64(x41, x163, 0x0)
+ var x169 uint64
+ x169, _ = p521AddcarryxU64(x42, x165, x168)
+ var x171 uint64
+ var x172 p521Uint1
+ x171, x172 = p521AddcarryxU64(x51, x167, 0x0)
+ var x173 uint64
+ x173, _ = p521AddcarryxU64(x52, x169, x172)
+ var x175 uint64
+ var x176 p521Uint1
+ x175, x176 = p521AddcarryxU64(x59, x171, 0x0)
+ var x177 uint64
+ x177, _ = p521AddcarryxU64(x60, x173, x176)
+ var x179 uint64
+ var x180 p521Uint1
+ x179, x180 = p521AddcarryxU64(x65, x175, 0x0)
+ var x181 uint64
+ x181, _ = p521AddcarryxU64(x66, x177, x180)
+ var x183 uint64
+ var x184 p521Uint1
+ x183, x184 = p521AddcarryxU64(x69, x179, 0x0)
+ var x185 uint64
+ x185, _ = p521AddcarryxU64(x70, x181, x184)
+ var x187 uint64
+ var x188 p521Uint1
+ x187, x188 = p521AddcarryxU64(x71, x183, 0x0)
+ var x189 uint64
+ x189, _ = p521AddcarryxU64(x72, x185, x188)
+ var x191 uint64
+ var x192 p521Uint1
+ x191, x192 = p521AddcarryxU64(x161, x187, 0x0)
+ var x193 uint64
+ x193, _ = p521AddcarryxU64(x162, x189, x192)
+ x195 := ((x191 >> 58) | ((x193 << 6) & 0xffffffffffffffff))
+ x196 := (x193 >> 58)
+ x197 := (x191 & 0x3ffffffffffffff)
+ var x198 uint64
+ var x199 p521Uint1
+ x198, x199 = p521AddcarryxU64(x75, x73, 0x0)
+ var x200 uint64
+ x200, _ = p521AddcarryxU64(x76, x74, x199)
+ var x202 uint64
+ var x203 p521Uint1
+ x202, x203 = p521AddcarryxU64(x79, x198, 0x0)
+ var x204 uint64
+ x204, _ = p521AddcarryxU64(x80, x200, x203)
+ var x206 uint64
+ var x207 p521Uint1
+ x206, x207 = p521AddcarryxU64(x85, x202, 0x0)
+ var x208 uint64
+ x208, _ = p521AddcarryxU64(x86, x204, x207)
+ var x210 uint64
+ var x211 p521Uint1
+ x210, x211 = p521AddcarryxU64(x93, x206, 0x0)
+ var x212 uint64
+ x212, _ = p521AddcarryxU64(x94, x208, x211)
+ var x214 uint64
+ var x215 p521Uint1
+ x214, x215 = p521AddcarryxU64(x103, x210, 0x0)
+ var x216 uint64
+ x216, _ = p521AddcarryxU64(x104, x212, x215)
+ var x218 uint64
+ var x219 p521Uint1
+ x218, x219 = p521AddcarryxU64(x115, x214, 0x0)
+ var x220 uint64
+ x220, _ = p521AddcarryxU64(x116, x216, x219)
+ var x222 uint64
+ var x223 p521Uint1
+ x222, x223 = p521AddcarryxU64(x129, x218, 0x0)
+ var x224 uint64
+ x224, _ = p521AddcarryxU64(x130, x220, x223)
+ var x226 uint64
+ var x227 p521Uint1
+ x226, x227 = p521AddcarryxU64(x145, x222, 0x0)
+ var x228 uint64
+ x228, _ = p521AddcarryxU64(x146, x224, x227)
+ var x230 uint64
+ var x231 p521Uint1
+ x230, x231 = p521AddcarryxU64(x77, x1, 0x0)
+ var x232 uint64
+ x232, _ = p521AddcarryxU64(x78, x2, x231)
+ var x234 uint64
+ var x235 p521Uint1
+ x234, x235 = p521AddcarryxU64(x81, x230, 0x0)
+ var x236 uint64
+ x236, _ = p521AddcarryxU64(x82, x232, x235)
+ var x238 uint64
+ var x239 p521Uint1
+ x238, x239 = p521AddcarryxU64(x87, x234, 0x0)
+ var x240 uint64
+ x240, _ = p521AddcarryxU64(x88, x236, x239)
+ var x242 uint64
+ var x243 p521Uint1
+ x242, x243 = p521AddcarryxU64(x95, x238, 0x0)
+ var x244 uint64
+ x244, _ = p521AddcarryxU64(x96, x240, x243)
+ var x246 uint64
+ var x247 p521Uint1
+ x246, x247 = p521AddcarryxU64(x105, x242, 0x0)
+ var x248 uint64
+ x248, _ = p521AddcarryxU64(x106, x244, x247)
+ var x250 uint64
+ var x251 p521Uint1
+ x250, x251 = p521AddcarryxU64(x117, x246, 0x0)
+ var x252 uint64
+ x252, _ = p521AddcarryxU64(x118, x248, x251)
+ var x254 uint64
+ var x255 p521Uint1
+ x254, x255 = p521AddcarryxU64(x131, x250, 0x0)
+ var x256 uint64
+ x256, _ = p521AddcarryxU64(x132, x252, x255)
+ var x258 uint64
+ var x259 p521Uint1
+ x258, x259 = p521AddcarryxU64(x147, x254, 0x0)
+ var x260 uint64
+ x260, _ = p521AddcarryxU64(x148, x256, x259)
+ var x262 uint64
+ var x263 p521Uint1
+ x262, x263 = p521AddcarryxU64(x17, x3, 0x0)
+ var x264 uint64
+ x264, _ = p521AddcarryxU64(x18, x4, x263)
+ var x266 uint64
+ var x267 p521Uint1
+ x266, x267 = p521AddcarryxU64(x83, x262, 0x0)
+ var x268 uint64
+ x268, _ = p521AddcarryxU64(x84, x264, x267)
+ var x270 uint64
+ var x271 p521Uint1
+ x270, x271 = p521AddcarryxU64(x89, x266, 0x0)
+ var x272 uint64
+ x272, _ = p521AddcarryxU64(x90, x268, x271)
+ var x274 uint64
+ var x275 p521Uint1
+ x274, x275 = p521AddcarryxU64(x97, x270, 0x0)
+ var x276 uint64
+ x276, _ = p521AddcarryxU64(x98, x272, x275)
+ var x278 uint64
+ var x279 p521Uint1
+ x278, x279 = p521AddcarryxU64(x107, x274, 0x0)
+ var x280 uint64
+ x280, _ = p521AddcarryxU64(x108, x276, x279)
+ var x282 uint64
+ var x283 p521Uint1
+ x282, x283 = p521AddcarryxU64(x119, x278, 0x0)
+ var x284 uint64
+ x284, _ = p521AddcarryxU64(x120, x280, x283)
+ var x286 uint64
+ var x287 p521Uint1
+ x286, x287 = p521AddcarryxU64(x133, x282, 0x0)
+ var x288 uint64
+ x288, _ = p521AddcarryxU64(x134, x284, x287)
+ var x290 uint64
+ var x291 p521Uint1
+ x290, x291 = p521AddcarryxU64(x149, x286, 0x0)
+ var x292 uint64
+ x292, _ = p521AddcarryxU64(x150, x288, x291)
+ var x294 uint64
+ var x295 p521Uint1
+ x294, x295 = p521AddcarryxU64(x19, x5, 0x0)
+ var x296 uint64
+ x296, _ = p521AddcarryxU64(x20, x6, x295)
+ var x298 uint64
+ var x299 p521Uint1
+ x298, x299 = p521AddcarryxU64(x31, x294, 0x0)
+ var x300 uint64
+ x300, _ = p521AddcarryxU64(x32, x296, x299)
+ var x302 uint64
+ var x303 p521Uint1
+ x302, x303 = p521AddcarryxU64(x91, x298, 0x0)
+ var x304 uint64
+ x304, _ = p521AddcarryxU64(x92, x300, x303)
+ var x306 uint64
+ var x307 p521Uint1
+ x306, x307 = p521AddcarryxU64(x99, x302, 0x0)
+ var x308 uint64
+ x308, _ = p521AddcarryxU64(x100, x304, x307)
+ var x310 uint64
+ var x311 p521Uint1
+ x310, x311 = p521AddcarryxU64(x109, x306, 0x0)
+ var x312 uint64
+ x312, _ = p521AddcarryxU64(x110, x308, x311)
+ var x314 uint64
+ var x315 p521Uint1
+ x314, x315 = p521AddcarryxU64(x121, x310, 0x0)
+ var x316 uint64
+ x316, _ = p521AddcarryxU64(x122, x312, x315)
+ var x318 uint64
+ var x319 p521Uint1
+ x318, x319 = p521AddcarryxU64(x135, x314, 0x0)
+ var x320 uint64
+ x320, _ = p521AddcarryxU64(x136, x316, x319)
+ var x322 uint64
+ var x323 p521Uint1
+ x322, x323 = p521AddcarryxU64(x151, x318, 0x0)
+ var x324 uint64
+ x324, _ = p521AddcarryxU64(x152, x320, x323)
+ var x326 uint64
+ var x327 p521Uint1
+ x326, x327 = p521AddcarryxU64(x21, x7, 0x0)
+ var x328 uint64
+ x328, _ = p521AddcarryxU64(x22, x8, x327)
+ var x330 uint64
+ var x331 p521Uint1
+ x330, x331 = p521AddcarryxU64(x33, x326, 0x0)
+ var x332 uint64
+ x332, _ = p521AddcarryxU64(x34, x328, x331)
+ var x334 uint64
+ var x335 p521Uint1
+ x334, x335 = p521AddcarryxU64(x43, x330, 0x0)
+ var x336 uint64
+ x336, _ = p521AddcarryxU64(x44, x332, x335)
+ var x338 uint64
+ var x339 p521Uint1
+ x338, x339 = p521AddcarryxU64(x101, x334, 0x0)
+ var x340 uint64
+ x340, _ = p521AddcarryxU64(x102, x336, x339)
+ var x342 uint64
+ var x343 p521Uint1
+ x342, x343 = p521AddcarryxU64(x111, x338, 0x0)
+ var x344 uint64
+ x344, _ = p521AddcarryxU64(x112, x340, x343)
+ var x346 uint64
+ var x347 p521Uint1
+ x346, x347 = p521AddcarryxU64(x123, x342, 0x0)
+ var x348 uint64
+ x348, _ = p521AddcarryxU64(x124, x344, x347)
+ var x350 uint64
+ var x351 p521Uint1
+ x350, x351 = p521AddcarryxU64(x137, x346, 0x0)
+ var x352 uint64
+ x352, _ = p521AddcarryxU64(x138, x348, x351)
+ var x354 uint64
+ var x355 p521Uint1
+ x354, x355 = p521AddcarryxU64(x153, x350, 0x0)
+ var x356 uint64
+ x356, _ = p521AddcarryxU64(x154, x352, x355)
+ var x358 uint64
+ var x359 p521Uint1
+ x358, x359 = p521AddcarryxU64(x23, x9, 0x0)
+ var x360 uint64
+ x360, _ = p521AddcarryxU64(x24, x10, x359)
+ var x362 uint64
+ var x363 p521Uint1
+ x362, x363 = p521AddcarryxU64(x35, x358, 0x0)
+ var x364 uint64
+ x364, _ = p521AddcarryxU64(x36, x360, x363)
+ var x366 uint64
+ var x367 p521Uint1
+ x366, x367 = p521AddcarryxU64(x45, x362, 0x0)
+ var x368 uint64
+ x368, _ = p521AddcarryxU64(x46, x364, x367)
+ var x370 uint64
+ var x371 p521Uint1
+ x370, x371 = p521AddcarryxU64(x53, x366, 0x0)
+ var x372 uint64
+ x372, _ = p521AddcarryxU64(x54, x368, x371)
+ var x374 uint64
+ var x375 p521Uint1
+ x374, x375 = p521AddcarryxU64(x113, x370, 0x0)
+ var x376 uint64
+ x376, _ = p521AddcarryxU64(x114, x372, x375)
+ var x378 uint64
+ var x379 p521Uint1
+ x378, x379 = p521AddcarryxU64(x125, x374, 0x0)
+ var x380 uint64
+ x380, _ = p521AddcarryxU64(x126, x376, x379)
+ var x382 uint64
+ var x383 p521Uint1
+ x382, x383 = p521AddcarryxU64(x139, x378, 0x0)
+ var x384 uint64
+ x384, _ = p521AddcarryxU64(x140, x380, x383)
+ var x386 uint64
+ var x387 p521Uint1
+ x386, x387 = p521AddcarryxU64(x155, x382, 0x0)
+ var x388 uint64
+ x388, _ = p521AddcarryxU64(x156, x384, x387)
+ var x390 uint64
+ var x391 p521Uint1
+ x390, x391 = p521AddcarryxU64(x25, x11, 0x0)
+ var x392 uint64
+ x392, _ = p521AddcarryxU64(x26, x12, x391)
+ var x394 uint64
+ var x395 p521Uint1
+ x394, x395 = p521AddcarryxU64(x37, x390, 0x0)
+ var x396 uint64
+ x396, _ = p521AddcarryxU64(x38, x392, x395)
+ var x398 uint64
+ var x399 p521Uint1
+ x398, x399 = p521AddcarryxU64(x47, x394, 0x0)
+ var x400 uint64
+ x400, _ = p521AddcarryxU64(x48, x396, x399)
+ var x402 uint64
+ var x403 p521Uint1
+ x402, x403 = p521AddcarryxU64(x55, x398, 0x0)
+ var x404 uint64
+ x404, _ = p521AddcarryxU64(x56, x400, x403)
+ var x406 uint64
+ var x407 p521Uint1
+ x406, x407 = p521AddcarryxU64(x61, x402, 0x0)
+ var x408 uint64
+ x408, _ = p521AddcarryxU64(x62, x404, x407)
+ var x410 uint64
+ var x411 p521Uint1
+ x410, x411 = p521AddcarryxU64(x127, x406, 0x0)
+ var x412 uint64
+ x412, _ = p521AddcarryxU64(x128, x408, x411)
+ var x414 uint64
+ var x415 p521Uint1
+ x414, x415 = p521AddcarryxU64(x141, x410, 0x0)
+ var x416 uint64
+ x416, _ = p521AddcarryxU64(x142, x412, x415)
+ var x418 uint64
+ var x419 p521Uint1
+ x418, x419 = p521AddcarryxU64(x157, x414, 0x0)
+ var x420 uint64
+ x420, _ = p521AddcarryxU64(x158, x416, x419)
+ var x422 uint64
+ var x423 p521Uint1
+ x422, x423 = p521AddcarryxU64(x27, x13, 0x0)
+ var x424 uint64
+ x424, _ = p521AddcarryxU64(x28, x14, x423)
+ var x426 uint64
+ var x427 p521Uint1
+ x426, x427 = p521AddcarryxU64(x39, x422, 0x0)
+ var x428 uint64
+ x428, _ = p521AddcarryxU64(x40, x424, x427)
+ var x430 uint64
+ var x431 p521Uint1
+ x430, x431 = p521AddcarryxU64(x49, x426, 0x0)
+ var x432 uint64
+ x432, _ = p521AddcarryxU64(x50, x428, x431)
+ var x434 uint64
+ var x435 p521Uint1
+ x434, x435 = p521AddcarryxU64(x57, x430, 0x0)
+ var x436 uint64
+ x436, _ = p521AddcarryxU64(x58, x432, x435)
+ var x438 uint64
+ var x439 p521Uint1
+ x438, x439 = p521AddcarryxU64(x63, x434, 0x0)
+ var x440 uint64
+ x440, _ = p521AddcarryxU64(x64, x436, x439)
+ var x442 uint64
+ var x443 p521Uint1
+ x442, x443 = p521AddcarryxU64(x67, x438, 0x0)
+ var x444 uint64
+ x444, _ = p521AddcarryxU64(x68, x440, x443)
+ var x446 uint64
+ var x447 p521Uint1
+ x446, x447 = p521AddcarryxU64(x143, x442, 0x0)
+ var x448 uint64
+ x448, _ = p521AddcarryxU64(x144, x444, x447)
+ var x450 uint64
+ var x451 p521Uint1
+ x450, x451 = p521AddcarryxU64(x159, x446, 0x0)
+ var x452 uint64
+ x452, _ = p521AddcarryxU64(x160, x448, x451)
+ var x454 uint64
+ var x455 p521Uint1
+ x454, x455 = p521AddcarryxU64(x195, x450, 0x0)
+ var x456 uint64
+ x456, _ = p521AddcarryxU64(x196, x452, x455)
+ x458 := ((x454 >> 58) | ((x456 << 6) & 0xffffffffffffffff))
+ x459 := (x456 >> 58)
+ x460 := (x454 & 0x3ffffffffffffff)
+ var x461 uint64
+ var x462 p521Uint1
+ x461, x462 = p521AddcarryxU64(x458, x418, 0x0)
+ var x463 uint64
+ x463, _ = p521AddcarryxU64(x459, x420, x462)
+ x465 := ((x461 >> 58) | ((x463 << 6) & 0xffffffffffffffff))
+ x466 := (x463 >> 58)
+ x467 := (x461 & 0x3ffffffffffffff)
+ var x468 uint64
+ var x469 p521Uint1
+ x468, x469 = p521AddcarryxU64(x465, x386, 0x0)
+ var x470 uint64
+ x470, _ = p521AddcarryxU64(x466, x388, x469)
+ x472 := ((x468 >> 58) | ((x470 << 6) & 0xffffffffffffffff))
+ x473 := (x470 >> 58)
+ x474 := (x468 & 0x3ffffffffffffff)
+ var x475 uint64
+ var x476 p521Uint1
+ x475, x476 = p521AddcarryxU64(x472, x354, 0x0)
+ var x477 uint64
+ x477, _ = p521AddcarryxU64(x473, x356, x476)
+ x479 := ((x475 >> 58) | ((x477 << 6) & 0xffffffffffffffff))
+ x480 := (x477 >> 58)
+ x481 := (x475 & 0x3ffffffffffffff)
+ var x482 uint64
+ var x483 p521Uint1
+ x482, x483 = p521AddcarryxU64(x479, x322, 0x0)
+ var x484 uint64
+ x484, _ = p521AddcarryxU64(x480, x324, x483)
+ x486 := ((x482 >> 58) | ((x484 << 6) & 0xffffffffffffffff))
+ x487 := (x484 >> 58)
+ x488 := (x482 & 0x3ffffffffffffff)
+ var x489 uint64
+ var x490 p521Uint1
+ x489, x490 = p521AddcarryxU64(x486, x290, 0x0)
+ var x491 uint64
+ x491, _ = p521AddcarryxU64(x487, x292, x490)
+ x493 := ((x489 >> 58) | ((x491 << 6) & 0xffffffffffffffff))
+ x494 := (x491 >> 58)
+ x495 := (x489 & 0x3ffffffffffffff)
+ var x496 uint64
+ var x497 p521Uint1
+ x496, x497 = p521AddcarryxU64(x493, x258, 0x0)
+ var x498 uint64
+ x498, _ = p521AddcarryxU64(x494, x260, x497)
+ x500 := ((x496 >> 58) | ((x498 << 6) & 0xffffffffffffffff))
+ x501 := (x498 >> 58)
+ x502 := (x496 & 0x3ffffffffffffff)
+ var x503 uint64
+ var x504 p521Uint1
+ x503, x504 = p521AddcarryxU64(x500, x226, 0x0)
+ var x505 uint64
+ x505, _ = p521AddcarryxU64(x501, x228, x504)
+ x507 := ((x503 >> 57) | ((x505 << 7) & 0xffffffffffffffff))
+ x508 := (x505 >> 57)
+ x509 := (x503 & 0x1ffffffffffffff)
+ var x510 uint64
+ var x511 p521Uint1
+ x510, x511 = p521AddcarryxU64(x197, x507, 0x0)
+ x512 := (uint64(x511) + x508)
+ x513 := ((x510 >> 58) | ((x512 << 6) & 0xffffffffffffffff))
+ x514 := (x510 & 0x3ffffffffffffff)
+ x515 := (x513 + x460)
+ x516 := p521Uint1((x515 >> 58))
+ x517 := (x515 & 0x3ffffffffffffff)
+ x518 := (uint64(x516) + x467)
+ out1[0] = x514
+ out1[1] = x517
+ out1[2] = x518
+ out1[3] = x474
+ out1[4] = x481
+ out1[5] = x488
+ out1[6] = x495
+ out1[7] = x502
+ out1[8] = x509
+}
+
+// p521CarrySquare squares a field element and reduces the result.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 * eval arg1) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521CarrySquare(out1 *[9]uint64, arg1 *[9]uint64) {
+ x1 := arg1[8]
+ x2 := (x1 * 0x2)
+ x3 := (arg1[8] * 0x2)
+ x4 := arg1[7]
+ x5 := (x4 * 0x2)
+ x6 := (arg1[7] * 0x2)
+ x7 := arg1[6]
+ x8 := (x7 * 0x2)
+ x9 := (arg1[6] * 0x2)
+ x10 := arg1[5]
+ x11 := (x10 * 0x2)
+ x12 := (arg1[5] * 0x2)
+ x13 := (arg1[4] * 0x2)
+ x14 := (arg1[3] * 0x2)
+ x15 := (arg1[2] * 0x2)
+ x16 := (arg1[1] * 0x2)
+ var x17 uint64
+ var x18 uint64
+ x18, x17 = bits.Mul64(arg1[8], (x1 * 0x2))
+ var x19 uint64
+ var x20 uint64
+ x20, x19 = bits.Mul64(arg1[7], (x2 * 0x2))
+ var x21 uint64
+ var x22 uint64
+ x22, x21 = bits.Mul64(arg1[7], (x4 * 0x2))
+ var x23 uint64
+ var x24 uint64
+ x24, x23 = bits.Mul64(arg1[6], (x2 * 0x2))
+ var x25 uint64
+ var x26 uint64
+ x26, x25 = bits.Mul64(arg1[6], (x5 * 0x2))
+ var x27 uint64
+ var x28 uint64
+ x28, x27 = bits.Mul64(arg1[6], (x7 * 0x2))
+ var x29 uint64
+ var x30 uint64
+ x30, x29 = bits.Mul64(arg1[5], (x2 * 0x2))
+ var x31 uint64
+ var x32 uint64
+ x32, x31 = bits.Mul64(arg1[5], (x5 * 0x2))
+ var x33 uint64
+ var x34 uint64
+ x34, x33 = bits.Mul64(arg1[5], (x8 * 0x2))
+ var x35 uint64
+ var x36 uint64
+ x36, x35 = bits.Mul64(arg1[5], (x10 * 0x2))
+ var x37 uint64
+ var x38 uint64
+ x38, x37 = bits.Mul64(arg1[4], (x2 * 0x2))
+ var x39 uint64
+ var x40 uint64
+ x40, x39 = bits.Mul64(arg1[4], (x5 * 0x2))
+ var x41 uint64
+ var x42 uint64
+ x42, x41 = bits.Mul64(arg1[4], (x8 * 0x2))
+ var x43 uint64
+ var x44 uint64
+ x44, x43 = bits.Mul64(arg1[4], (x11 * 0x2))
+ var x45 uint64
+ var x46 uint64
+ x46, x45 = bits.Mul64(arg1[4], arg1[4])
+ var x47 uint64
+ var x48 uint64
+ x48, x47 = bits.Mul64(arg1[3], (x2 * 0x2))
+ var x49 uint64
+ var x50 uint64
+ x50, x49 = bits.Mul64(arg1[3], (x5 * 0x2))
+ var x51 uint64
+ var x52 uint64
+ x52, x51 = bits.Mul64(arg1[3], (x8 * 0x2))
+ var x53 uint64
+ var x54 uint64
+ x54, x53 = bits.Mul64(arg1[3], x12)
+ var x55 uint64
+ var x56 uint64
+ x56, x55 = bits.Mul64(arg1[3], x13)
+ var x57 uint64
+ var x58 uint64
+ x58, x57 = bits.Mul64(arg1[3], arg1[3])
+ var x59 uint64
+ var x60 uint64
+ x60, x59 = bits.Mul64(arg1[2], (x2 * 0x2))
+ var x61 uint64
+ var x62 uint64
+ x62, x61 = bits.Mul64(arg1[2], (x5 * 0x2))
+ var x63 uint64
+ var x64 uint64
+ x64, x63 = bits.Mul64(arg1[2], x9)
+ var x65 uint64
+ var x66 uint64
+ x66, x65 = bits.Mul64(arg1[2], x12)
+ var x67 uint64
+ var x68 uint64
+ x68, x67 = bits.Mul64(arg1[2], x13)
+ var x69 uint64
+ var x70 uint64
+ x70, x69 = bits.Mul64(arg1[2], x14)
+ var x71 uint64
+ var x72 uint64
+ x72, x71 = bits.Mul64(arg1[2], arg1[2])
+ var x73 uint64
+ var x74 uint64
+ x74, x73 = bits.Mul64(arg1[1], (x2 * 0x2))
+ var x75 uint64
+ var x76 uint64
+ x76, x75 = bits.Mul64(arg1[1], x6)
+ var x77 uint64
+ var x78 uint64
+ x78, x77 = bits.Mul64(arg1[1], x9)
+ var x79 uint64
+ var x80 uint64
+ x80, x79 = bits.Mul64(arg1[1], x12)
+ var x81 uint64
+ var x82 uint64
+ x82, x81 = bits.Mul64(arg1[1], x13)
+ var x83 uint64
+ var x84 uint64
+ x84, x83 = bits.Mul64(arg1[1], x14)
+ var x85 uint64
+ var x86 uint64
+ x86, x85 = bits.Mul64(arg1[1], x15)
+ var x87 uint64
+ var x88 uint64
+ x88, x87 = bits.Mul64(arg1[1], arg1[1])
+ var x89 uint64
+ var x90 uint64
+ x90, x89 = bits.Mul64(arg1[0], x3)
+ var x91 uint64
+ var x92 uint64
+ x92, x91 = bits.Mul64(arg1[0], x6)
+ var x93 uint64
+ var x94 uint64
+ x94, x93 = bits.Mul64(arg1[0], x9)
+ var x95 uint64
+ var x96 uint64
+ x96, x95 = bits.Mul64(arg1[0], x12)
+ var x97 uint64
+ var x98 uint64
+ x98, x97 = bits.Mul64(arg1[0], x13)
+ var x99 uint64
+ var x100 uint64
+ x100, x99 = bits.Mul64(arg1[0], x14)
+ var x101 uint64
+ var x102 uint64
+ x102, x101 = bits.Mul64(arg1[0], x15)
+ var x103 uint64
+ var x104 uint64
+ x104, x103 = bits.Mul64(arg1[0], x16)
+ var x105 uint64
+ var x106 uint64
+ x106, x105 = bits.Mul64(arg1[0], arg1[0])
+ var x107 uint64
+ var x108 p521Uint1
+ x107, x108 = p521AddcarryxU64(x51, x43, 0x0)
+ var x109 uint64
+ x109, _ = p521AddcarryxU64(x52, x44, x108)
+ var x111 uint64
+ var x112 p521Uint1
+ x111, x112 = p521AddcarryxU64(x61, x107, 0x0)
+ var x113 uint64
+ x113, _ = p521AddcarryxU64(x62, x109, x112)
+ var x115 uint64
+ var x116 p521Uint1
+ x115, x116 = p521AddcarryxU64(x73, x111, 0x0)
+ var x117 uint64
+ x117, _ = p521AddcarryxU64(x74, x113, x116)
+ var x119 uint64
+ var x120 p521Uint1
+ x119, x120 = p521AddcarryxU64(x105, x115, 0x0)
+ var x121 uint64
+ x121, _ = p521AddcarryxU64(x106, x117, x120)
+ x123 := ((x119 >> 58) | ((x121 << 6) & 0xffffffffffffffff))
+ x124 := (x121 >> 58)
+ x125 := (x119 & 0x3ffffffffffffff)
+ var x126 uint64
+ var x127 p521Uint1
+ x126, x127 = p521AddcarryxU64(x53, x45, 0x0)
+ var x128 uint64
+ x128, _ = p521AddcarryxU64(x54, x46, x127)
+ var x130 uint64
+ var x131 p521Uint1
+ x130, x131 = p521AddcarryxU64(x63, x126, 0x0)
+ var x132 uint64
+ x132, _ = p521AddcarryxU64(x64, x128, x131)
+ var x134 uint64
+ var x135 p521Uint1
+ x134, x135 = p521AddcarryxU64(x75, x130, 0x0)
+ var x136 uint64
+ x136, _ = p521AddcarryxU64(x76, x132, x135)
+ var x138 uint64
+ var x139 p521Uint1
+ x138, x139 = p521AddcarryxU64(x89, x134, 0x0)
+ var x140 uint64
+ x140, _ = p521AddcarryxU64(x90, x136, x139)
+ var x142 uint64
+ var x143 p521Uint1
+ x142, x143 = p521AddcarryxU64(x55, x17, 0x0)
+ var x144 uint64
+ x144, _ = p521AddcarryxU64(x56, x18, x143)
+ var x146 uint64
+ var x147 p521Uint1
+ x146, x147 = p521AddcarryxU64(x65, x142, 0x0)
+ var x148 uint64
+ x148, _ = p521AddcarryxU64(x66, x144, x147)
+ var x150 uint64
+ var x151 p521Uint1
+ x150, x151 = p521AddcarryxU64(x77, x146, 0x0)
+ var x152 uint64
+ x152, _ = p521AddcarryxU64(x78, x148, x151)
+ var x154 uint64
+ var x155 p521Uint1
+ x154, x155 = p521AddcarryxU64(x91, x150, 0x0)
+ var x156 uint64
+ x156, _ = p521AddcarryxU64(x92, x152, x155)
+ var x158 uint64
+ var x159 p521Uint1
+ x158, x159 = p521AddcarryxU64(x57, x19, 0x0)
+ var x160 uint64
+ x160, _ = p521AddcarryxU64(x58, x20, x159)
+ var x162 uint64
+ var x163 p521Uint1
+ x162, x163 = p521AddcarryxU64(x67, x158, 0x0)
+ var x164 uint64
+ x164, _ = p521AddcarryxU64(x68, x160, x163)
+ var x166 uint64
+ var x167 p521Uint1
+ x166, x167 = p521AddcarryxU64(x79, x162, 0x0)
+ var x168 uint64
+ x168, _ = p521AddcarryxU64(x80, x164, x167)
+ var x170 uint64
+ var x171 p521Uint1
+ x170, x171 = p521AddcarryxU64(x93, x166, 0x0)
+ var x172 uint64
+ x172, _ = p521AddcarryxU64(x94, x168, x171)
+ var x174 uint64
+ var x175 p521Uint1
+ x174, x175 = p521AddcarryxU64(x23, x21, 0x0)
+ var x176 uint64
+ x176, _ = p521AddcarryxU64(x24, x22, x175)
+ var x178 uint64
+ var x179 p521Uint1
+ x178, x179 = p521AddcarryxU64(x69, x174, 0x0)
+ var x180 uint64
+ x180, _ = p521AddcarryxU64(x70, x176, x179)
+ var x182 uint64
+ var x183 p521Uint1
+ x182, x183 = p521AddcarryxU64(x81, x178, 0x0)
+ var x184 uint64
+ x184, _ = p521AddcarryxU64(x82, x180, x183)
+ var x186 uint64
+ var x187 p521Uint1
+ x186, x187 = p521AddcarryxU64(x95, x182, 0x0)
+ var x188 uint64
+ x188, _ = p521AddcarryxU64(x96, x184, x187)
+ var x190 uint64
+ var x191 p521Uint1
+ x190, x191 = p521AddcarryxU64(x29, x25, 0x0)
+ var x192 uint64
+ x192, _ = p521AddcarryxU64(x30, x26, x191)
+ var x194 uint64
+ var x195 p521Uint1
+ x194, x195 = p521AddcarryxU64(x71, x190, 0x0)
+ var x196 uint64
+ x196, _ = p521AddcarryxU64(x72, x192, x195)
+ var x198 uint64
+ var x199 p521Uint1
+ x198, x199 = p521AddcarryxU64(x83, x194, 0x0)
+ var x200 uint64
+ x200, _ = p521AddcarryxU64(x84, x196, x199)
+ var x202 uint64
+ var x203 p521Uint1
+ x202, x203 = p521AddcarryxU64(x97, x198, 0x0)
+ var x204 uint64
+ x204, _ = p521AddcarryxU64(x98, x200, x203)
+ var x206 uint64
+ var x207 p521Uint1
+ x206, x207 = p521AddcarryxU64(x31, x27, 0x0)
+ var x208 uint64
+ x208, _ = p521AddcarryxU64(x32, x28, x207)
+ var x210 uint64
+ var x211 p521Uint1
+ x210, x211 = p521AddcarryxU64(x37, x206, 0x0)
+ var x212 uint64
+ x212, _ = p521AddcarryxU64(x38, x208, x211)
+ var x214 uint64
+ var x215 p521Uint1
+ x214, x215 = p521AddcarryxU64(x85, x210, 0x0)
+ var x216 uint64
+ x216, _ = p521AddcarryxU64(x86, x212, x215)
+ var x218 uint64
+ var x219 p521Uint1
+ x218, x219 = p521AddcarryxU64(x99, x214, 0x0)
+ var x220 uint64
+ x220, _ = p521AddcarryxU64(x100, x216, x219)
+ var x222 uint64
+ var x223 p521Uint1
+ x222, x223 = p521AddcarryxU64(x39, x33, 0x0)
+ var x224 uint64
+ x224, _ = p521AddcarryxU64(x40, x34, x223)
+ var x226 uint64
+ var x227 p521Uint1
+ x226, x227 = p521AddcarryxU64(x47, x222, 0x0)
+ var x228 uint64
+ x228, _ = p521AddcarryxU64(x48, x224, x227)
+ var x230 uint64
+ var x231 p521Uint1
+ x230, x231 = p521AddcarryxU64(x87, x226, 0x0)
+ var x232 uint64
+ x232, _ = p521AddcarryxU64(x88, x228, x231)
+ var x234 uint64
+ var x235 p521Uint1
+ x234, x235 = p521AddcarryxU64(x101, x230, 0x0)
+ var x236 uint64
+ x236, _ = p521AddcarryxU64(x102, x232, x235)
+ var x238 uint64
+ var x239 p521Uint1
+ x238, x239 = p521AddcarryxU64(x41, x35, 0x0)
+ var x240 uint64
+ x240, _ = p521AddcarryxU64(x42, x36, x239)
+ var x242 uint64
+ var x243 p521Uint1
+ x242, x243 = p521AddcarryxU64(x49, x238, 0x0)
+ var x244 uint64
+ x244, _ = p521AddcarryxU64(x50, x240, x243)
+ var x246 uint64
+ var x247 p521Uint1
+ x246, x247 = p521AddcarryxU64(x59, x242, 0x0)
+ var x248 uint64
+ x248, _ = p521AddcarryxU64(x60, x244, x247)
+ var x250 uint64
+ var x251 p521Uint1
+ x250, x251 = p521AddcarryxU64(x103, x246, 0x0)
+ var x252 uint64
+ x252, _ = p521AddcarryxU64(x104, x248, x251)
+ var x254 uint64
+ var x255 p521Uint1
+ x254, x255 = p521AddcarryxU64(x123, x250, 0x0)
+ var x256 uint64
+ x256, _ = p521AddcarryxU64(x124, x252, x255)
+ x258 := ((x254 >> 58) | ((x256 << 6) & 0xffffffffffffffff))
+ x259 := (x256 >> 58)
+ x260 := (x254 & 0x3ffffffffffffff)
+ var x261 uint64
+ var x262 p521Uint1
+ x261, x262 = p521AddcarryxU64(x258, x234, 0x0)
+ var x263 uint64
+ x263, _ = p521AddcarryxU64(x259, x236, x262)
+ x265 := ((x261 >> 58) | ((x263 << 6) & 0xffffffffffffffff))
+ x266 := (x263 >> 58)
+ x267 := (x261 & 0x3ffffffffffffff)
+ var x268 uint64
+ var x269 p521Uint1
+ x268, x269 = p521AddcarryxU64(x265, x218, 0x0)
+ var x270 uint64
+ x270, _ = p521AddcarryxU64(x266, x220, x269)
+ x272 := ((x268 >> 58) | ((x270 << 6) & 0xffffffffffffffff))
+ x273 := (x270 >> 58)
+ x274 := (x268 & 0x3ffffffffffffff)
+ var x275 uint64
+ var x276 p521Uint1
+ x275, x276 = p521AddcarryxU64(x272, x202, 0x0)
+ var x277 uint64
+ x277, _ = p521AddcarryxU64(x273, x204, x276)
+ x279 := ((x275 >> 58) | ((x277 << 6) & 0xffffffffffffffff))
+ x280 := (x277 >> 58)
+ x281 := (x275 & 0x3ffffffffffffff)
+ var x282 uint64
+ var x283 p521Uint1
+ x282, x283 = p521AddcarryxU64(x279, x186, 0x0)
+ var x284 uint64
+ x284, _ = p521AddcarryxU64(x280, x188, x283)
+ x286 := ((x282 >> 58) | ((x284 << 6) & 0xffffffffffffffff))
+ x287 := (x284 >> 58)
+ x288 := (x282 & 0x3ffffffffffffff)
+ var x289 uint64
+ var x290 p521Uint1
+ x289, x290 = p521AddcarryxU64(x286, x170, 0x0)
+ var x291 uint64
+ x291, _ = p521AddcarryxU64(x287, x172, x290)
+ x293 := ((x289 >> 58) | ((x291 << 6) & 0xffffffffffffffff))
+ x294 := (x291 >> 58)
+ x295 := (x289 & 0x3ffffffffffffff)
+ var x296 uint64
+ var x297 p521Uint1
+ x296, x297 = p521AddcarryxU64(x293, x154, 0x0)
+ var x298 uint64
+ x298, _ = p521AddcarryxU64(x294, x156, x297)
+ x300 := ((x296 >> 58) | ((x298 << 6) & 0xffffffffffffffff))
+ x301 := (x298 >> 58)
+ x302 := (x296 & 0x3ffffffffffffff)
+ var x303 uint64
+ var x304 p521Uint1
+ x303, x304 = p521AddcarryxU64(x300, x138, 0x0)
+ var x305 uint64
+ x305, _ = p521AddcarryxU64(x301, x140, x304)
+ x307 := ((x303 >> 57) | ((x305 << 7) & 0xffffffffffffffff))
+ x308 := (x305 >> 57)
+ x309 := (x303 & 0x1ffffffffffffff)
+ var x310 uint64
+ var x311 p521Uint1
+ x310, x311 = p521AddcarryxU64(x125, x307, 0x0)
+ x312 := (uint64(x311) + x308)
+ x313 := ((x310 >> 58) | ((x312 << 6) & 0xffffffffffffffff))
+ x314 := (x310 & 0x3ffffffffffffff)
+ x315 := (x313 + x260)
+ x316 := p521Uint1((x315 >> 58))
+ x317 := (x315 & 0x3ffffffffffffff)
+ x318 := (uint64(x316) + x267)
+ out1[0] = x314
+ out1[1] = x317
+ out1[2] = x318
+ out1[3] = x274
+ out1[4] = x281
+ out1[5] = x288
+ out1[6] = x295
+ out1[7] = x302
+ out1[8] = x309
+}
+
+// p521Carry reduces a field element.
+//
+// Postconditions:
+// eval out1 mod m = eval arg1 mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521Carry(out1 *[9]uint64, arg1 *[9]uint64) {
+ x1 := arg1[0]
+ x2 := ((x1 >> 58) + arg1[1])
+ x3 := ((x2 >> 58) + arg1[2])
+ x4 := ((x3 >> 58) + arg1[3])
+ x5 := ((x4 >> 58) + arg1[4])
+ x6 := ((x5 >> 58) + arg1[5])
+ x7 := ((x6 >> 58) + arg1[6])
+ x8 := ((x7 >> 58) + arg1[7])
+ x9 := ((x8 >> 58) + arg1[8])
+ x10 := ((x1 & 0x3ffffffffffffff) + (x9 >> 57))
+ x11 := (uint64(p521Uint1((x10 >> 58))) + (x2 & 0x3ffffffffffffff))
+ x12 := (x10 & 0x3ffffffffffffff)
+ x13 := (x11 & 0x3ffffffffffffff)
+ x14 := (uint64(p521Uint1((x11 >> 58))) + (x3 & 0x3ffffffffffffff))
+ x15 := (x4 & 0x3ffffffffffffff)
+ x16 := (x5 & 0x3ffffffffffffff)
+ x17 := (x6 & 0x3ffffffffffffff)
+ x18 := (x7 & 0x3ffffffffffffff)
+ x19 := (x8 & 0x3ffffffffffffff)
+ x20 := (x9 & 0x1ffffffffffffff)
+ out1[0] = x12
+ out1[1] = x13
+ out1[2] = x14
+ out1[3] = x15
+ out1[4] = x16
+ out1[5] = x17
+ out1[6] = x18
+ out1[7] = x19
+ out1[8] = x20
+}
+
+// p521Add adds two field elements.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 + eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+func p521Add(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ x1 := (arg1[0] + arg2[0])
+ x2 := (arg1[1] + arg2[1])
+ x3 := (arg1[2] + arg2[2])
+ x4 := (arg1[3] + arg2[3])
+ x5 := (arg1[4] + arg2[4])
+ x6 := (arg1[5] + arg2[5])
+ x7 := (arg1[6] + arg2[6])
+ x8 := (arg1[7] + arg2[7])
+ x9 := (arg1[8] + arg2[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
+
+// p521Sub subtracts two field elements.
+//
+// Postconditions:
+// eval out1 mod m = (eval arg1 - eval arg2) mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// arg2: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0xc00000000000000], [0x0 ~> 0x600000000000000]]
+func p521Sub(out1 *[9]uint64, arg1 *[9]uint64, arg2 *[9]uint64) {
+ x1 := ((0x7fffffffffffffe + arg1[0]) - arg2[0])
+ x2 := ((0x7fffffffffffffe + arg1[1]) - arg2[1])
+ x3 := ((0x7fffffffffffffe + arg1[2]) - arg2[2])
+ x4 := ((0x7fffffffffffffe + arg1[3]) - arg2[3])
+ x5 := ((0x7fffffffffffffe + arg1[4]) - arg2[4])
+ x6 := ((0x7fffffffffffffe + arg1[5]) - arg2[5])
+ x7 := ((0x7fffffffffffffe + arg1[6]) - arg2[6])
+ x8 := ((0x7fffffffffffffe + arg1[7]) - arg2[7])
+ x9 := ((0x3fffffffffffffe + arg1[8]) - arg2[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
+
+// p521ToBytes serializes a field element to bytes in little-endian order.
+//
+// Postconditions:
+// out1 = map (λ x, ⌊((eval arg1 mod m) mod 2^(8 * (x + 1))) / 2^(8 * x)⌋) [0..65]
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+func p521ToBytes(out1 *[66]uint8, arg1 *[9]uint64) {
+ var x1 uint64
+ var x2 p521Uint1
+ p521SubborrowxU58(&x1, &x2, 0x0, arg1[0], 0x3ffffffffffffff)
+ var x3 uint64
+ var x4 p521Uint1
+ p521SubborrowxU58(&x3, &x4, x2, arg1[1], 0x3ffffffffffffff)
+ var x5 uint64
+ var x6 p521Uint1
+ p521SubborrowxU58(&x5, &x6, x4, arg1[2], 0x3ffffffffffffff)
+ var x7 uint64
+ var x8 p521Uint1
+ p521SubborrowxU58(&x7, &x8, x6, arg1[3], 0x3ffffffffffffff)
+ var x9 uint64
+ var x10 p521Uint1
+ p521SubborrowxU58(&x9, &x10, x8, arg1[4], 0x3ffffffffffffff)
+ var x11 uint64
+ var x12 p521Uint1
+ p521SubborrowxU58(&x11, &x12, x10, arg1[5], 0x3ffffffffffffff)
+ var x13 uint64
+ var x14 p521Uint1
+ p521SubborrowxU58(&x13, &x14, x12, arg1[6], 0x3ffffffffffffff)
+ var x15 uint64
+ var x16 p521Uint1
+ p521SubborrowxU58(&x15, &x16, x14, arg1[7], 0x3ffffffffffffff)
+ var x17 uint64
+ var x18 p521Uint1
+ p521SubborrowxU57(&x17, &x18, x16, arg1[8], 0x1ffffffffffffff)
+ var x19 uint64
+ p521CmovznzU64(&x19, x18, uint64(0x0), 0xffffffffffffffff)
+ var x20 uint64
+ var x21 p521Uint1
+ p521AddcarryxU58(&x20, &x21, 0x0, x1, (x19 & 0x3ffffffffffffff))
+ var x22 uint64
+ var x23 p521Uint1
+ p521AddcarryxU58(&x22, &x23, x21, x3, (x19 & 0x3ffffffffffffff))
+ var x24 uint64
+ var x25 p521Uint1
+ p521AddcarryxU58(&x24, &x25, x23, x5, (x19 & 0x3ffffffffffffff))
+ var x26 uint64
+ var x27 p521Uint1
+ p521AddcarryxU58(&x26, &x27, x25, x7, (x19 & 0x3ffffffffffffff))
+ var x28 uint64
+ var x29 p521Uint1
+ p521AddcarryxU58(&x28, &x29, x27, x9, (x19 & 0x3ffffffffffffff))
+ var x30 uint64
+ var x31 p521Uint1
+ p521AddcarryxU58(&x30, &x31, x29, x11, (x19 & 0x3ffffffffffffff))
+ var x32 uint64
+ var x33 p521Uint1
+ p521AddcarryxU58(&x32, &x33, x31, x13, (x19 & 0x3ffffffffffffff))
+ var x34 uint64
+ var x35 p521Uint1
+ p521AddcarryxU58(&x34, &x35, x33, x15, (x19 & 0x3ffffffffffffff))
+ var x36 uint64
+ var x37 p521Uint1
+ p521AddcarryxU57(&x36, &x37, x35, x17, (x19 & 0x1ffffffffffffff))
+ x38 := (x34 << 6)
+ x39 := (x32 << 4)
+ x40 := (x30 << 2)
+ x41 := (x26 << 6)
+ x42 := (x24 << 4)
+ x43 := (x22 << 2)
+ x44 := (uint8(x20) & 0xff)
+ x45 := (x20 >> 8)
+ x46 := (uint8(x45) & 0xff)
+ x47 := (x45 >> 8)
+ x48 := (uint8(x47) & 0xff)
+ x49 := (x47 >> 8)
+ x50 := (uint8(x49) & 0xff)
+ x51 := (x49 >> 8)
+ x52 := (uint8(x51) & 0xff)
+ x53 := (x51 >> 8)
+ x54 := (uint8(x53) & 0xff)
+ x55 := (x53 >> 8)
+ x56 := (uint8(x55) & 0xff)
+ x57 := uint8((x55 >> 8))
+ x58 := (x43 + uint64(x57))
+ x59 := (uint8(x58) & 0xff)
+ x60 := (x58 >> 8)
+ x61 := (uint8(x60) & 0xff)
+ x62 := (x60 >> 8)
+ x63 := (uint8(x62) & 0xff)
+ x64 := (x62 >> 8)
+ x65 := (uint8(x64) & 0xff)
+ x66 := (x64 >> 8)
+ x67 := (uint8(x66) & 0xff)
+ x68 := (x66 >> 8)
+ x69 := (uint8(x68) & 0xff)
+ x70 := (x68 >> 8)
+ x71 := (uint8(x70) & 0xff)
+ x72 := uint8((x70 >> 8))
+ x73 := (x42 + uint64(x72))
+ x74 := (uint8(x73) & 0xff)
+ x75 := (x73 >> 8)
+ x76 := (uint8(x75) & 0xff)
+ x77 := (x75 >> 8)
+ x78 := (uint8(x77) & 0xff)
+ x79 := (x77 >> 8)
+ x80 := (uint8(x79) & 0xff)
+ x81 := (x79 >> 8)
+ x82 := (uint8(x81) & 0xff)
+ x83 := (x81 >> 8)
+ x84 := (uint8(x83) & 0xff)
+ x85 := (x83 >> 8)
+ x86 := (uint8(x85) & 0xff)
+ x87 := uint8((x85 >> 8))
+ x88 := (x41 + uint64(x87))
+ x89 := (uint8(x88) & 0xff)
+ x90 := (x88 >> 8)
+ x91 := (uint8(x90) & 0xff)
+ x92 := (x90 >> 8)
+ x93 := (uint8(x92) & 0xff)
+ x94 := (x92 >> 8)
+ x95 := (uint8(x94) & 0xff)
+ x96 := (x94 >> 8)
+ x97 := (uint8(x96) & 0xff)
+ x98 := (x96 >> 8)
+ x99 := (uint8(x98) & 0xff)
+ x100 := (x98 >> 8)
+ x101 := (uint8(x100) & 0xff)
+ x102 := uint8((x100 >> 8))
+ x103 := (uint8(x28) & 0xff)
+ x104 := (x28 >> 8)
+ x105 := (uint8(x104) & 0xff)
+ x106 := (x104 >> 8)
+ x107 := (uint8(x106) & 0xff)
+ x108 := (x106 >> 8)
+ x109 := (uint8(x108) & 0xff)
+ x110 := (x108 >> 8)
+ x111 := (uint8(x110) & 0xff)
+ x112 := (x110 >> 8)
+ x113 := (uint8(x112) & 0xff)
+ x114 := (x112 >> 8)
+ x115 := (uint8(x114) & 0xff)
+ x116 := uint8((x114 >> 8))
+ x117 := (x40 + uint64(x116))
+ x118 := (uint8(x117) & 0xff)
+ x119 := (x117 >> 8)
+ x120 := (uint8(x119) & 0xff)
+ x121 := (x119 >> 8)
+ x122 := (uint8(x121) & 0xff)
+ x123 := (x121 >> 8)
+ x124 := (uint8(x123) & 0xff)
+ x125 := (x123 >> 8)
+ x126 := (uint8(x125) & 0xff)
+ x127 := (x125 >> 8)
+ x128 := (uint8(x127) & 0xff)
+ x129 := (x127 >> 8)
+ x130 := (uint8(x129) & 0xff)
+ x131 := uint8((x129 >> 8))
+ x132 := (x39 + uint64(x131))
+ x133 := (uint8(x132) & 0xff)
+ x134 := (x132 >> 8)
+ x135 := (uint8(x134) & 0xff)
+ x136 := (x134 >> 8)
+ x137 := (uint8(x136) & 0xff)
+ x138 := (x136 >> 8)
+ x139 := (uint8(x138) & 0xff)
+ x140 := (x138 >> 8)
+ x141 := (uint8(x140) & 0xff)
+ x142 := (x140 >> 8)
+ x143 := (uint8(x142) & 0xff)
+ x144 := (x142 >> 8)
+ x145 := (uint8(x144) & 0xff)
+ x146 := uint8((x144 >> 8))
+ x147 := (x38 + uint64(x146))
+ x148 := (uint8(x147) & 0xff)
+ x149 := (x147 >> 8)
+ x150 := (uint8(x149) & 0xff)
+ x151 := (x149 >> 8)
+ x152 := (uint8(x151) & 0xff)
+ x153 := (x151 >> 8)
+ x154 := (uint8(x153) & 0xff)
+ x155 := (x153 >> 8)
+ x156 := (uint8(x155) & 0xff)
+ x157 := (x155 >> 8)
+ x158 := (uint8(x157) & 0xff)
+ x159 := (x157 >> 8)
+ x160 := (uint8(x159) & 0xff)
+ x161 := uint8((x159 >> 8))
+ x162 := (uint8(x36) & 0xff)
+ x163 := (x36 >> 8)
+ x164 := (uint8(x163) & 0xff)
+ x165 := (x163 >> 8)
+ x166 := (uint8(x165) & 0xff)
+ x167 := (x165 >> 8)
+ x168 := (uint8(x167) & 0xff)
+ x169 := (x167 >> 8)
+ x170 := (uint8(x169) & 0xff)
+ x171 := (x169 >> 8)
+ x172 := (uint8(x171) & 0xff)
+ x173 := (x171 >> 8)
+ x174 := (uint8(x173) & 0xff)
+ x175 := p521Uint1((x173 >> 8))
+ out1[0] = x44
+ out1[1] = x46
+ out1[2] = x48
+ out1[3] = x50
+ out1[4] = x52
+ out1[5] = x54
+ out1[6] = x56
+ out1[7] = x59
+ out1[8] = x61
+ out1[9] = x63
+ out1[10] = x65
+ out1[11] = x67
+ out1[12] = x69
+ out1[13] = x71
+ out1[14] = x74
+ out1[15] = x76
+ out1[16] = x78
+ out1[17] = x80
+ out1[18] = x82
+ out1[19] = x84
+ out1[20] = x86
+ out1[21] = x89
+ out1[22] = x91
+ out1[23] = x93
+ out1[24] = x95
+ out1[25] = x97
+ out1[26] = x99
+ out1[27] = x101
+ out1[28] = x102
+ out1[29] = x103
+ out1[30] = x105
+ out1[31] = x107
+ out1[32] = x109
+ out1[33] = x111
+ out1[34] = x113
+ out1[35] = x115
+ out1[36] = x118
+ out1[37] = x120
+ out1[38] = x122
+ out1[39] = x124
+ out1[40] = x126
+ out1[41] = x128
+ out1[42] = x130
+ out1[43] = x133
+ out1[44] = x135
+ out1[45] = x137
+ out1[46] = x139
+ out1[47] = x141
+ out1[48] = x143
+ out1[49] = x145
+ out1[50] = x148
+ out1[51] = x150
+ out1[52] = x152
+ out1[53] = x154
+ out1[54] = x156
+ out1[55] = x158
+ out1[56] = x160
+ out1[57] = x161
+ out1[58] = x162
+ out1[59] = x164
+ out1[60] = x166
+ out1[61] = x168
+ out1[62] = x170
+ out1[63] = x172
+ out1[64] = x174
+ out1[65] = uint8(x175)
+}
+
+// p521FromBytes deserializes a field element from bytes in little-endian order.
+//
+// Postconditions:
+// eval out1 mod m = bytes_eval arg1 mod m
+//
+// Input Bounds:
+// arg1: [[0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0xff], [0x0 ~> 0x1]]
+// Output Bounds:
+// out1: [[0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x400000000000000], [0x0 ~> 0x200000000000000]]
+func p521FromBytes(out1 *[9]uint64, arg1 *[66]uint8) {
+ x1 := (uint64(p521Uint1(arg1[65])) << 56)
+ x2 := (uint64(arg1[64]) << 48)
+ x3 := (uint64(arg1[63]) << 40)
+ x4 := (uint64(arg1[62]) << 32)
+ x5 := (uint64(arg1[61]) << 24)
+ x6 := (uint64(arg1[60]) << 16)
+ x7 := (uint64(arg1[59]) << 8)
+ x8 := arg1[58]
+ x9 := (uint64(arg1[57]) << 50)
+ x10 := (uint64(arg1[56]) << 42)
+ x11 := (uint64(arg1[55]) << 34)
+ x12 := (uint64(arg1[54]) << 26)
+ x13 := (uint64(arg1[53]) << 18)
+ x14 := (uint64(arg1[52]) << 10)
+ x15 := (uint64(arg1[51]) << 2)
+ x16 := (uint64(arg1[50]) << 52)
+ x17 := (uint64(arg1[49]) << 44)
+ x18 := (uint64(arg1[48]) << 36)
+ x19 := (uint64(arg1[47]) << 28)
+ x20 := (uint64(arg1[46]) << 20)
+ x21 := (uint64(arg1[45]) << 12)
+ x22 := (uint64(arg1[44]) << 4)
+ x23 := (uint64(arg1[43]) << 54)
+ x24 := (uint64(arg1[42]) << 46)
+ x25 := (uint64(arg1[41]) << 38)
+ x26 := (uint64(arg1[40]) << 30)
+ x27 := (uint64(arg1[39]) << 22)
+ x28 := (uint64(arg1[38]) << 14)
+ x29 := (uint64(arg1[37]) << 6)
+ x30 := (uint64(arg1[36]) << 56)
+ x31 := (uint64(arg1[35]) << 48)
+ x32 := (uint64(arg1[34]) << 40)
+ x33 := (uint64(arg1[33]) << 32)
+ x34 := (uint64(arg1[32]) << 24)
+ x35 := (uint64(arg1[31]) << 16)
+ x36 := (uint64(arg1[30]) << 8)
+ x37 := arg1[29]
+ x38 := (uint64(arg1[28]) << 50)
+ x39 := (uint64(arg1[27]) << 42)
+ x40 := (uint64(arg1[26]) << 34)
+ x41 := (uint64(arg1[25]) << 26)
+ x42 := (uint64(arg1[24]) << 18)
+ x43 := (uint64(arg1[23]) << 10)
+ x44 := (uint64(arg1[22]) << 2)
+ x45 := (uint64(arg1[21]) << 52)
+ x46 := (uint64(arg1[20]) << 44)
+ x47 := (uint64(arg1[19]) << 36)
+ x48 := (uint64(arg1[18]) << 28)
+ x49 := (uint64(arg1[17]) << 20)
+ x50 := (uint64(arg1[16]) << 12)
+ x51 := (uint64(arg1[15]) << 4)
+ x52 := (uint64(arg1[14]) << 54)
+ x53 := (uint64(arg1[13]) << 46)
+ x54 := (uint64(arg1[12]) << 38)
+ x55 := (uint64(arg1[11]) << 30)
+ x56 := (uint64(arg1[10]) << 22)
+ x57 := (uint64(arg1[9]) << 14)
+ x58 := (uint64(arg1[8]) << 6)
+ x59 := (uint64(arg1[7]) << 56)
+ x60 := (uint64(arg1[6]) << 48)
+ x61 := (uint64(arg1[5]) << 40)
+ x62 := (uint64(arg1[4]) << 32)
+ x63 := (uint64(arg1[3]) << 24)
+ x64 := (uint64(arg1[2]) << 16)
+ x65 := (uint64(arg1[1]) << 8)
+ x66 := arg1[0]
+ x67 := (x65 + uint64(x66))
+ x68 := (x64 + x67)
+ x69 := (x63 + x68)
+ x70 := (x62 + x69)
+ x71 := (x61 + x70)
+ x72 := (x60 + x71)
+ x73 := (x59 + x72)
+ x74 := (x73 & 0x3ffffffffffffff)
+ x75 := uint8((x73 >> 58))
+ x76 := (x58 + uint64(x75))
+ x77 := (x57 + x76)
+ x78 := (x56 + x77)
+ x79 := (x55 + x78)
+ x80 := (x54 + x79)
+ x81 := (x53 + x80)
+ x82 := (x52 + x81)
+ x83 := (x82 & 0x3ffffffffffffff)
+ x84 := uint8((x82 >> 58))
+ x85 := (x51 + uint64(x84))
+ x86 := (x50 + x85)
+ x87 := (x49 + x86)
+ x88 := (x48 + x87)
+ x89 := (x47 + x88)
+ x90 := (x46 + x89)
+ x91 := (x45 + x90)
+ x92 := (x91 & 0x3ffffffffffffff)
+ x93 := uint8((x91 >> 58))
+ x94 := (x44 + uint64(x93))
+ x95 := (x43 + x94)
+ x96 := (x42 + x95)
+ x97 := (x41 + x96)
+ x98 := (x40 + x97)
+ x99 := (x39 + x98)
+ x100 := (x38 + x99)
+ x101 := (x36 + uint64(x37))
+ x102 := (x35 + x101)
+ x103 := (x34 + x102)
+ x104 := (x33 + x103)
+ x105 := (x32 + x104)
+ x106 := (x31 + x105)
+ x107 := (x30 + x106)
+ x108 := (x107 & 0x3ffffffffffffff)
+ x109 := uint8((x107 >> 58))
+ x110 := (x29 + uint64(x109))
+ x111 := (x28 + x110)
+ x112 := (x27 + x111)
+ x113 := (x26 + x112)
+ x114 := (x25 + x113)
+ x115 := (x24 + x114)
+ x116 := (x23 + x115)
+ x117 := (x116 & 0x3ffffffffffffff)
+ x118 := uint8((x116 >> 58))
+ x119 := (x22 + uint64(x118))
+ x120 := (x21 + x119)
+ x121 := (x20 + x120)
+ x122 := (x19 + x121)
+ x123 := (x18 + x122)
+ x124 := (x17 + x123)
+ x125 := (x16 + x124)
+ x126 := (x125 & 0x3ffffffffffffff)
+ x127 := uint8((x125 >> 58))
+ x128 := (x15 + uint64(x127))
+ x129 := (x14 + x128)
+ x130 := (x13 + x129)
+ x131 := (x12 + x130)
+ x132 := (x11 + x131)
+ x133 := (x10 + x132)
+ x134 := (x9 + x133)
+ x135 := (x7 + uint64(x8))
+ x136 := (x6 + x135)
+ x137 := (x5 + x136)
+ x138 := (x4 + x137)
+ x139 := (x3 + x138)
+ x140 := (x2 + x139)
+ x141 := (x1 + x140)
+ out1[0] = x74
+ out1[1] = x83
+ out1[2] = x92
+ out1[3] = x100
+ out1[4] = x108
+ out1[5] = x117
+ out1[6] = x126
+ out1[7] = x134
+ out1[8] = x141
+}
+
+// p521Selectznz is a multi-limb conditional select.
+//
+// Postconditions:
+// eval out1 = (if arg1 = 0 then eval arg2 else eval arg3)
+//
+// Input Bounds:
+// arg1: [0x0 ~> 0x1]
+// arg2: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+// arg3: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+// Output Bounds:
+// out1: [[0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff], [0x0 ~> 0xffffffffffffffff]]
+func p521Selectznz(out1 *[9]uint64, arg1 p521Uint1, arg2 *[9]uint64, arg3 *[9]uint64) {
+ var x1 uint64
+ p521CmovznzU64(&x1, arg1, arg2[0], arg3[0])
+ var x2 uint64
+ p521CmovznzU64(&x2, arg1, arg2[1], arg3[1])
+ var x3 uint64
+ p521CmovznzU64(&x3, arg1, arg2[2], arg3[2])
+ var x4 uint64
+ p521CmovznzU64(&x4, arg1, arg2[3], arg3[3])
+ var x5 uint64
+ p521CmovznzU64(&x5, arg1, arg2[4], arg3[4])
+ var x6 uint64
+ p521CmovznzU64(&x6, arg1, arg2[5], arg3[5])
+ var x7 uint64
+ p521CmovznzU64(&x7, arg1, arg2[6], arg3[6])
+ var x8 uint64
+ p521CmovznzU64(&x8, arg1, arg2[7], arg3[7])
+ var x9 uint64
+ p521CmovznzU64(&x9, arg1, arg2[8], arg3[8])
+ out1[0] = x1
+ out1[1] = x2
+ out1[2] = x3
+ out1[3] = x4
+ out1[4] = x5
+ out1[5] = x6
+ out1[6] = x7
+ out1[7] = x8
+ out1[8] = x9
+}
diff --git a/src/crypto/elliptic/internal/fiat/p521_test.go b/src/crypto/elliptic/internal/fiat/p521_test.go
new file mode 100644
index 0000000000..661bde397e
--- /dev/null
+++ b/src/crypto/elliptic/internal/fiat/p521_test.go
@@ -0,0 +1,37 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fiat_test
+
+import (
+ "crypto/elliptic/internal/fiat"
+ "crypto/rand"
+ "testing"
+)
+
+func p521Random(t *testing.T) *fiat.P521Element {
+ buf := make([]byte, 66)
+ if _, err := rand.Read(buf); err != nil {
+ t.Fatal(err)
+ }
+ buf[65] &= 1
+ e, err := new(fiat.P521Element).SetBytes(buf)
+ if err != nil {
+ t.Fatal(err)
+ }
+ return e
+}
+
+func TestP521Invert(t *testing.T) {
+ a := p521Random(t)
+ inv := new(fiat.P521Element).Invert(a)
+ one := new(fiat.P521Element).Mul(a, inv)
+ if new(fiat.P521Element).One().Equal(one) != 1 {
+ t.Errorf("a * 1/a != 1; got %x for %x", one.Bytes(), a.Bytes())
+ }
+ inv.Invert(new(fiat.P521Element))
+ if new(fiat.P521Element).Equal(inv) != 1 {
+ t.Errorf("1/0 != 0; got %x", inv.Bytes())
+ }
+}
diff --git a/src/crypto/elliptic/p521.go b/src/crypto/elliptic/p521.go
new file mode 100644
index 0000000000..ac9de63702
--- /dev/null
+++ b/src/crypto/elliptic/p521.go
@@ -0,0 +1,254 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package elliptic
+
+import (
+ "crypto/elliptic/internal/fiat"
+ "math/big"
+)
+
+type p521Curve struct {
+ *CurveParams
+}
+
+var p521 p521Curve
+var p521Params *CurveParams
+
+func initP521() {
+ // See FIPS 186-3, section D.2.5
+ p521.CurveParams = &CurveParams{Name: "P-521"}
+ p521.P, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", 10)
+ p521.N, _ = new(big.Int).SetString("6864797660130609714981900799081393217269435300143305409394463459185543183397655394245057746333217197532963996371363321113864768612440380340372808892707005449", 10)
+ p521.B, _ = new(big.Int).SetString("051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", 16)
+ p521.Gx, _ = new(big.Int).SetString("c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", 16)
+ p521.Gy, _ = new(big.Int).SetString("11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", 16)
+ p521.BitSize = 521
+}
+
+func (curve p521Curve) Params() *CurveParams {
+ return curve.CurveParams
+}
+
+func (curve p521Curve) IsOnCurve(x, y *big.Int) bool {
+ x1 := bigIntToFiatP521(x)
+ y1 := bigIntToFiatP521(y)
+ b := bigIntToFiatP521(curve.B) // TODO: precompute this value.
+
+ // x³ - 3x + b.
+ x3 := new(fiat.P521Element).Square(x1)
+ x3.Mul(x3, x1)
+
+ threeX := new(fiat.P521Element).Add(x1, x1)
+ threeX.Add(threeX, x1)
+
+ x3.Sub(x3, threeX)
+ x3.Add(x3, b)
+
+ // y² = x³ - 3x + b
+ y2 := new(fiat.P521Element).Square(y1)
+
+ return x3.Equal(y2) == 1
+}
+
+type p512Point struct {
+ x, y, z *fiat.P521Element
+}
+
+func fiatP521ToBigInt(x *fiat.P521Element) *big.Int {
+ xBytes := x.Bytes()
+ for i := range xBytes[:len(xBytes)/2] {
+ xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i]
+ }
+ return new(big.Int).SetBytes(xBytes)
+}
+
+// affineFromJacobian brings a point in Jacobian coordinates back to affine
+// coordinates, with (0, 0) representing infinity by convention. It also goes
+// back to big.Int values to match the exposed API.
+func (curve p521Curve) affineFromJacobian(p *p512Point) (x, y *big.Int) {
+ if p.z.IsZero() == 1 {
+ return new(big.Int), new(big.Int)
+ }
+
+ zinv := new(fiat.P521Element).Invert(p.z)
+ zinvsq := new(fiat.P521Element).Mul(zinv, zinv)
+
+ xx := new(fiat.P521Element).Mul(p.x, zinvsq)
+ zinvsq.Mul(zinvsq, zinv)
+ yy := new(fiat.P521Element).Mul(p.y, zinvsq)
+
+ return fiatP521ToBigInt(xx), fiatP521ToBigInt(yy)
+}
+
+func bigIntToFiatP521(x *big.Int) *fiat.P521Element {
+ xBytes := new(big.Int).Mod(x, p521.P).FillBytes(make([]byte, 66))
+ for i := range xBytes[:len(xBytes)/2] {
+ xBytes[i], xBytes[len(xBytes)-i-1] = xBytes[len(xBytes)-i-1], xBytes[i]
+ }
+ x1, err := new(fiat.P521Element).SetBytes(xBytes)
+ if err != nil {
+ // The input is reduced modulo P and encoded in a fixed size bytes
+ // slice, this should be impossible.
+ panic("internal error: bigIntToFiatP521")
+ }
+ return x1
+}
+
+// jacobianFromAffine converts (x, y) affine coordinates into (x, y, z) Jacobian
+// coordinates. It also converts from big.Int to fiat, which is necessarily a
+// messy and variable-time operation, which we can't avoid due to the exposed API.
+func (curve p521Curve) jacobianFromAffine(x, y *big.Int) *p512Point {
+ // (0, 0) is by convention the point at infinity, which can't be represented
+ // in affine coordinates, but is (0, 0, 0) in Jacobian.
+ if x.Sign() == 0 && y.Sign() == 0 {
+ return &p512Point{
+ x: new(fiat.P521Element),
+ y: new(fiat.P521Element),
+ z: new(fiat.P521Element),
+ }
+ }
+ return &p512Point{
+ x: bigIntToFiatP521(x),
+ y: bigIntToFiatP521(y),
+ z: new(fiat.P521Element).One(),
+ }
+}
+
+func (curve p521Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
+ p1 := curve.jacobianFromAffine(x1, y1)
+ p2 := curve.jacobianFromAffine(x2, y2)
+ return curve.affineFromJacobian(p1.addJacobian(p1, p2))
+}
+
+// addJacobian sets q = p1 + p2, and returns q. The points may overlap.
+func (q *p512Point) addJacobian(p1, p2 *p512Point) *p512Point {
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
+ if p1.z.IsZero() == 1 {
+ q.x.Set(p2.x)
+ q.y.Set(p2.y)
+ q.z.Set(p2.z)
+ return q
+ }
+ if p2.z.IsZero() == 1 {
+ q.x.Set(p1.x)
+ q.y.Set(p1.y)
+ q.z.Set(p1.z)
+ return q
+ }
+
+ z1z1 := new(fiat.P521Element).Square(p1.z)
+ z2z2 := new(fiat.P521Element).Square(p2.z)
+
+ u1 := new(fiat.P521Element).Mul(p1.x, z2z2)
+ u2 := new(fiat.P521Element).Mul(p2.x, z1z1)
+ h := new(fiat.P521Element).Sub(u2, u1)
+ xEqual := h.IsZero() == 1
+ i := new(fiat.P521Element).Add(h, h)
+ i.Square(i)
+ j := new(fiat.P521Element).Mul(h, i)
+
+ s1 := new(fiat.P521Element).Mul(p1.y, p2.z)
+ s1.Mul(s1, z2z2)
+ s2 := new(fiat.P521Element).Mul(p2.y, p1.z)
+ s2.Mul(s2, z1z1)
+ r := new(fiat.P521Element).Sub(s2, s1)
+ yEqual := r.IsZero() == 1
+ if xEqual && yEqual {
+ return q.doubleJacobian(p1)
+ }
+ r.Add(r, r)
+ v := new(fiat.P521Element).Mul(u1, i)
+
+ q.x.Set(r)
+ q.x.Square(q.x)
+ q.x.Sub(q.x, j)
+ q.x.Sub(q.x, v)
+ q.x.Sub(q.x, v)
+
+ q.y.Set(r)
+ v.Sub(v, q.x)
+ q.y.Mul(q.y, v)
+ s1.Mul(s1, j)
+ s1.Add(s1, s1)
+ q.y.Sub(q.y, s1)
+
+ q.z.Add(p1.z, p2.z)
+ q.z.Square(q.z)
+ q.z.Sub(q.z, z1z1)
+ q.z.Sub(q.z, z2z2)
+ q.z.Mul(q.z, h)
+
+ return q
+}
+
+func (curve p521Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
+ p := curve.jacobianFromAffine(x1, y1)
+ return curve.affineFromJacobian(p.doubleJacobian(p))
+}
+
+// doubleJacobian sets q = p + p, and returns q. The points may overlap.
+func (q *p512Point) doubleJacobian(p *p512Point) *p512Point {
+ // https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
+ delta := new(fiat.P521Element).Square(p.z)
+ gamma := new(fiat.P521Element).Square(p.y)
+ alpha := new(fiat.P521Element).Sub(p.x, delta)
+ alpha2 := new(fiat.P521Element).Add(p.x, delta)
+ alpha.Mul(alpha, alpha2)
+ alpha2.Set(alpha)
+ alpha.Add(alpha, alpha)
+ alpha.Add(alpha, alpha2)
+
+ beta := alpha2.Mul(p.x, gamma)
+
+ q.x.Square(alpha)
+ beta8 := new(fiat.P521Element).Add(beta, beta)
+ beta8.Add(beta8, beta8)
+ beta8.Add(beta8, beta8)
+ q.x.Sub(q.x, beta8)
+
+ q.z.Add(p.y, p.z)
+ q.z.Square(q.z)
+ q.z.Sub(q.z, gamma)
+ q.z.Sub(q.z, delta)
+
+ beta.Add(beta, beta)
+ beta.Add(beta, beta)
+ beta.Sub(beta, q.x)
+ q.y.Mul(alpha, beta)
+
+ gamma.Square(gamma)
+ gamma.Add(gamma, gamma)
+ gamma.Add(gamma, gamma)
+ gamma.Add(gamma, gamma)
+
+ q.y.Sub(q.y, gamma)
+
+ return q
+}
+
+func (curve p521Curve) ScalarMult(Bx, By *big.Int, k []byte) (*big.Int, *big.Int) {
+ B := curve.jacobianFromAffine(Bx, By)
+ p := &p512Point{
+ x: new(fiat.P521Element),
+ y: new(fiat.P521Element),
+ z: new(fiat.P521Element),
+ }
+
+ for _, byte := range k {
+ for bitNum := 0; bitNum < 8; bitNum++ {
+ p.doubleJacobian(p)
+ if byte&0x80 == 0x80 {
+ p.addJacobian(B, p)
+ }
+ byte <<= 1
+ }
+ }
+
+ return curve.affineFromJacobian(p)
+}
+
+func (curve p521Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
+ return curve.ScalarMult(curve.Gx, curve.Gy, k)
+}