aboutsummaryrefslogtreecommitdiff
path: root/src/ext/ed25519/donna/ed25519-donna-impl-sse2.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/ext/ed25519/donna/ed25519-donna-impl-sse2.h')
-rw-r--r--src/ext/ed25519/donna/ed25519-donna-impl-sse2.h390
1 files changed, 390 insertions, 0 deletions
diff --git a/src/ext/ed25519/donna/ed25519-donna-impl-sse2.h b/src/ext/ed25519/donna/ed25519-donna-impl-sse2.h
new file mode 100644
index 0000000000..5fe3416381
--- /dev/null
+++ b/src/ext/ed25519/donna/ed25519-donna-impl-sse2.h
@@ -0,0 +1,390 @@
+/*
+ conversions
+*/
+
+static void
+ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {
+ packed64bignum25519 ALIGN(16) xz, tt, xzout;
+ curve25519_mul(r->y, p->y, p->z);
+ curve25519_tangle64(xz, p->x, p->z);
+ curve25519_tangleone64(tt, p->t);
+ curve25519_mul_packed64(xzout, xz, tt);
+ curve25519_untangle64(r->x, r->z, xzout);
+}
+
+static void
+ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {
+ packed64bignum25519 ALIGN(16) zy, xt, xx, zz, ty;
+ curve25519_tangle64(ty, p->t, p->y);
+ curve25519_tangleone64(xx, p->x);
+ curve25519_mul_packed64(xt, xx, ty);
+ curve25519_untangle64(r->x, r->t, xt);
+ curve25519_tangleone64(zz, p->z);
+ curve25519_mul_packed64(zy, zz, ty);
+ curve25519_untangle64(r->z, r->y, zy);
+}
+
+static void
+ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {
+ curve25519_sub(p->ysubx, r->y, r->x);
+ curve25519_add(p->xaddy, r->x, r->y);
+ curve25519_copy(p->z, r->z);
+ curve25519_mul(p->t2d, r->t, ge25519_ec2d);
+}
+
+/*
+ adding & doubling
+*/
+
+static void
+ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {
+ bignum25519 ALIGN(16) a,b,c,d;
+ packed32bignum25519 ALIGN(16) xx, yy, yypxx, yymxx, bd, ac, bdmac, bdpac;
+ packed64bignum25519 ALIGN(16) at, bu, atbu, ptz, qtz, cd;
+
+ curve25519_tangle32(yy, p->y, q->y);
+ curve25519_tangle32(xx, p->x, q->x);
+ curve25519_add_packed32(yypxx, yy, xx);
+ curve25519_sub_packed32(yymxx, yy, xx);
+ curve25519_tangle64_from32(at, bu, yymxx, yypxx);
+ curve25519_mul_packed64(atbu, at, bu);
+ curve25519_untangle64(a, b, atbu);
+ curve25519_tangle64(ptz, p->t, p->z);
+ curve25519_tangle64(qtz, q->t, q->z);
+ curve25519_mul_packed64(cd, ptz, qtz);
+ curve25519_untangle64(c, d, cd);
+ curve25519_mul(c, c, ge25519_ec2d);
+ curve25519_add_reduce(d, d, d);
+ /* reduce, so no after_basic is needed later */
+ curve25519_tangle32(bd, b, d);
+ curve25519_tangle32(ac, a, c);
+ curve25519_sub_packed32(bdmac, bd, ac);
+ curve25519_add_packed32(bdpac, bd, ac);
+ curve25519_untangle32(r->x, r->t, bdmac);
+ curve25519_untangle32(r->y, r->z, bdpac);
+}
+
+
+static void
+ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {
+ bignum25519 ALIGN(16) a,b,c,x;
+ packed64bignum25519 ALIGN(16) xy, zx, ab, cx;
+ packed32bignum25519 ALIGN(16) xc, yz, xt, yc, ac, bc;
+
+ curve25519_add(x, p->x, p->y);
+ curve25519_tangle64(xy, p->x, p->y);
+ curve25519_square_packed64(ab, xy);
+ curve25519_untangle64(a, b, ab);
+ curve25519_tangle64(zx, p->z, x);
+ curve25519_square_packed64(cx, zx);
+ curve25519_untangle64(c, x, cx);
+ curve25519_tangle32(bc, b, c);
+ curve25519_tangle32(ac, a, c);
+ curve25519_add_reduce_packed32(yc, bc, ac);
+ curve25519_untangle32(r->y, c, yc);
+ curve25519_sub(r->z, b, a);
+ curve25519_tangle32(yz, r->y, r->z);
+ curve25519_tangle32(xc, x, c);
+ curve25519_sub_after_basic_packed32(xt, xc, yz);
+ curve25519_untangle32(r->x, r->t, xt);
+}
+
+static void
+ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, unsigned char signbit) {
+ const bignum25519 *qb = (const bignum25519 *)q;
+ bignum25519 *rb = (bignum25519 *)r;
+ bignum25519 ALIGN(16) a,b,c;
+ packed64bignum25519 ALIGN(16) ab, yx, aybx;
+ packed32bignum25519 ALIGN(16) bd, ac, bdac;
+
+ curve25519_sub(a, p->y, p->x);
+ curve25519_add(b, p->y, p->x);
+ curve25519_tangle64(ab, a, b);
+ curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
+ curve25519_mul_packed64(aybx, ab, yx);
+ curve25519_untangle64(a, b, aybx);
+ curve25519_add(r->y, b, a);
+ curve25519_add_reduce(r->t, p->z, p->z);
+ curve25519_mul(c, p->t, q->t2d);
+ curve25519_copy(r->z, r->t);
+ curve25519_add(rb[2+signbit], rb[2+signbit], c);
+ curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
+ curve25519_tangle32(ac, a, c);
+ curve25519_sub_packed32(bdac, bd, ac);
+ curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
+}
+
+static void
+ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, unsigned char signbit) {
+ const bignum25519 *qb = (const bignum25519 *)q;
+ bignum25519 *rb = (bignum25519 *)r;
+ bignum25519 ALIGN(16) a,b,c;
+ packed64bignum25519 ALIGN(16) ab, yx, aybx, zt, zt2d, tc;
+ packed32bignum25519 ALIGN(16) bd, ac, bdac;
+
+ curve25519_sub(a, p->y, p->x);
+ curve25519_add(b, p->y, p->x);
+ curve25519_tangle64(ab, a, b);
+ curve25519_tangle64(yx, qb[signbit], qb[signbit^1]);
+ curve25519_mul_packed64(aybx, ab, yx);
+ curve25519_untangle64(a, b, aybx);
+ curve25519_add(r->y, b, a);
+ curve25519_tangle64(zt, p->z, p->t);
+ curve25519_tangle64(zt2d, q->z, q->t2d);
+ curve25519_mul_packed64(tc, zt, zt2d);
+ curve25519_untangle64(r->t, c, tc);
+ curve25519_add_reduce(r->t, r->t, r->t);
+ curve25519_copy(r->z, r->t);
+ curve25519_add(rb[2+signbit], rb[2+signbit], c);
+ curve25519_tangle32(bd, b, rb[2+(signbit^1)]);
+ curve25519_tangle32(ac, a, c);
+ curve25519_sub_packed32(bdac, bd, ac);
+ curve25519_untangle32(r->x, rb[2+(signbit^1)], bdac);
+}
+
+static void
+ge25519_double(ge25519 *r, const ge25519 *p) {
+ ge25519_p1p1 ALIGN(16) t;
+ ge25519_double_p1p1(&t, p);
+ ge25519_p1p1_to_full(r, &t);
+}
+
+static void
+ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {
+ ge25519_p1p1 ALIGN(16) t;
+ ge25519_add_p1p1(&t, p, q);
+ ge25519_p1p1_to_full(r, &t);
+}
+
+static void
+ge25519_double_partial(ge25519 *r, const ge25519 *p) {
+ ge25519_p1p1 ALIGN(16) t;
+ ge25519_double_p1p1(&t, p);
+ ge25519_p1p1_to_partial(r, &t);
+}
+
+static void
+ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {
+ packed64bignum25519 ALIGN(16) ab, yx, aybx, eg, ff, hh, xz, ty;
+ packed32bignum25519 ALIGN(16) bd, ac, bdac;
+ bignum25519 ALIGN(16) a,b,c,d,e,f,g,h;
+
+ curve25519_sub(a, r->y, r->x);
+ curve25519_add(b, r->y, r->x);
+ curve25519_tangle64(ab, a, b);
+ curve25519_tangle64(yx, q->ysubx, q->xaddy);
+ curve25519_mul_packed64(aybx, ab, yx);
+ curve25519_untangle64(a, b, aybx);
+ curve25519_add(h, b, a);
+ curve25519_add_reduce(d, r->z, r->z);
+ curve25519_mul(c, r->t, q->t2d);
+ curve25519_add(g, d, c); /* d is reduced, so no need for after_basic */
+ curve25519_tangle32(bd, b, d);
+ curve25519_tangle32(ac, a, c);
+ curve25519_sub_packed32(bdac, bd, ac); /* d is reduced, so no need for after_basic */
+ curve25519_untangle32(e, f, bdac);
+ curve25519_tangle64(eg, e, g);
+ curve25519_tangleone64(ff, f);
+ curve25519_mul_packed64(xz, eg, ff);
+ curve25519_untangle64(r->x, r->z, xz);
+ curve25519_tangleone64(hh, h);
+ curve25519_mul_packed64(ty, eg, hh);
+ curve25519_untangle64(r->t, r->y, ty);
+}
+
+static void
+ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {
+ ge25519_p1p1 ALIGN(16) t;
+ ge25519 ALIGN(16) f;
+ ge25519_pnielsadd_p1p1(&t, p, q, 0);
+ ge25519_p1p1_to_full(&f, &t);
+ ge25519_full_to_pniels(r, &f);
+}
+
+/*
+ pack & unpack
+*/
+
+static void
+ge25519_pack(unsigned char r[32], const ge25519 *p) {
+ bignum25519 ALIGN(16) tx, ty, zi;
+ unsigned char parity[32];
+ curve25519_recip(zi, p->z);
+ curve25519_mul(tx, p->x, zi);
+ curve25519_mul(ty, p->y, zi);
+ curve25519_contract(r, ty);
+ curve25519_contract(parity, tx);
+ r[31] ^= ((parity[0] & 1) << 7);
+}
+
+
+static int
+ge25519_unpack_negative_vartime(ge25519 *r, const unsigned char p[32]) {
+ static const bignum25519 ALIGN(16) one = {1};
+ static const unsigned char zero[32] = {0};
+ unsigned char parity = p[31] >> 7;
+ unsigned char check[32];
+ bignum25519 ALIGN(16) t, root, num, den, d3;
+
+ curve25519_expand(r->y, p);
+ curve25519_copy(r->z, one);
+ curve25519_square_times(num, r->y, 1); /* x = y^2 */
+ curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */
+ curve25519_sub_reduce(num, num, r->z); /* x = y^2 - 1 */
+ curve25519_add(den, den, r->z); /* den = dy^2 + 1 */
+
+ /* Computation of sqrt(num/den) */
+ /* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */
+ curve25519_square_times(t, den, 1);
+ curve25519_mul(d3, t, den);
+ curve25519_square_times(r->x, d3, 1);
+ curve25519_mul(r->x, r->x, den);
+ curve25519_mul(r->x, r->x, num);
+ curve25519_pow_two252m3(r->x, r->x);
+
+ /* 2. computation of r->x = t * num * den^3 */
+ curve25519_mul(r->x, r->x, d3);
+ curve25519_mul(r->x, r->x, num);
+
+ /* 3. Check if either of the roots works: */
+ curve25519_square_times(t, r->x, 1);
+ curve25519_mul(t, t, den);
+ curve25519_copy(root, t);
+ curve25519_sub_reduce(root, root, num);
+ curve25519_contract(check, root);
+ if (!ed25519_verify(check, zero, 32)) {
+ curve25519_add_reduce(t, t, num);
+ curve25519_contract(check, t);
+ if (!ed25519_verify(check, zero, 32))
+ return 0;
+ curve25519_mul(r->x, r->x, ge25519_sqrtneg1);
+ }
+
+ curve25519_contract(check, r->x);
+ if ((check[0] & 1) == parity) {
+ curve25519_copy(t, r->x);
+ curve25519_neg(r->x, t);
+ }
+ curve25519_mul(r->t, r->x, r->y);
+ return 1;
+}
+
+
+
+/*
+ scalarmults
+*/
+
+#define S1_SWINDOWSIZE 5
+#define S1_TABLE_SIZE (1<<(S1_SWINDOWSIZE-2))
+#define S2_SWINDOWSIZE 7
+#define S2_TABLE_SIZE (1<<(S2_SWINDOWSIZE-2))
+
+static void
+ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {
+ signed char slide1[256], slide2[256];
+ ge25519_pniels ALIGN(16) pre1[S1_TABLE_SIZE];
+ ge25519 ALIGN(16) d1;
+ ge25519_p1p1 ALIGN(16) t;
+ int32_t i;
+
+ contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);
+ contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);
+
+ ge25519_double(&d1, p1);
+ ge25519_full_to_pniels(pre1, p1);
+ for (i = 0; i < S1_TABLE_SIZE - 1; i++)
+ ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);
+
+ /* set neutral */
+ memset(r, 0, sizeof(ge25519));
+ r->y[0] = 1;
+ r->z[0] = 1;
+
+ i = 255;
+ while ((i >= 0) && !(slide1[i] | slide2[i]))
+ i--;
+
+ for (; i >= 0; i--) {
+ ge25519_double_p1p1(&t, r);
+
+ if (slide1[i]) {
+ ge25519_p1p1_to_full(r, &t);
+ ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (unsigned char)slide1[i] >> 7);
+ }
+
+ if (slide2[i]) {
+ ge25519_p1p1_to_full(r, &t);
+ ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (unsigned char)slide2[i] >> 7);
+ }
+
+ ge25519_p1p1_to_partial(r, &t);
+ }
+}
+
+#if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)
+
+static uint32_t
+ge25519_windowb_equal(uint32_t b, uint32_t c) {
+ return ((b ^ c) - 1) >> 31;
+}
+
+static void
+ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
+ bignum25519 ALIGN(16) neg;
+ uint32_t sign = (uint32_t)((unsigned char)b >> 7);
+ uint32_t mask = ~(sign - 1);
+ uint32_t u = (b + mask) ^ mask;
+ uint32_t i;
+
+ /* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */
+ uint8_t ALIGN(16) packed[96] = {0};
+ packed[0] = 1;
+ packed[32] = 1;
+
+ for (i = 0; i < 8; i++)
+ curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));
+
+ /* expand in to t */
+ curve25519_expand(t->ysubx, packed + 0);
+ curve25519_expand(t->xaddy, packed + 32);
+ curve25519_expand(t->t2d , packed + 64);
+
+ /* adjust for sign */
+ curve25519_swap_conditional(t->ysubx, t->xaddy, sign);
+ curve25519_neg(neg, t->t2d);
+ curve25519_swap_conditional(t->t2d, neg, sign);
+}
+
+#endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */
+
+static void
+ge25519_scalarmult_base_niels(ge25519 *r, const uint8_t table[256][96], const bignum256modm s) {
+ signed char b[64];
+ uint32_t i;
+ ge25519_niels ALIGN(16) t;
+
+ contract256_window4_modm(b, s);
+
+ ge25519_scalarmult_base_choose_niels(&t, table, 0, b[1]);
+ curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);
+ curve25519_add_reduce(r->y, t.xaddy, t.ysubx);
+ memset(r->z, 0, sizeof(bignum25519));
+ r->z[0] = 2;
+ curve25519_copy(r->t, t.t2d);
+ for (i = 3; i < 64; i += 2) {
+ ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
+ ge25519_nielsadd2(r, &t);
+ }
+ ge25519_double_partial(r, r);
+ ge25519_double_partial(r, r);
+ ge25519_double_partial(r, r);
+ ge25519_double(r, r);
+ ge25519_scalarmult_base_choose_niels(&t, table, 0, b[0]);
+ curve25519_mul(t.t2d, t.t2d, ge25519_ecd);
+ ge25519_nielsadd2(r, &t);
+ for(i = 2; i < 64; i += 2) {
+ ge25519_scalarmult_base_choose_niels(&t, table, i / 2, b[i]);
+ ge25519_nielsadd2(r, &t);
+ }
+}