1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
#include "fe.h"
#include "crypto_int64.h"
/*
h = f * f
Can overlap h with f.
Preconditions:
|f| bounded by 1.65*2^26,1.65*2^25,1.65*2^26,1.65*2^25,etc.
Postconditions:
|h| bounded by 1.01*2^25,1.01*2^24,1.01*2^25,1.01*2^24,etc.
*/
/*
See fe_mul.c for discussion of implementation strategy.
*/
void fe_sq(fe h,const fe f)
{
crypto_int32 f0 = f[0];
crypto_int32 f1 = f[1];
crypto_int32 f2 = f[2];
crypto_int32 f3 = f[3];
crypto_int32 f4 = f[4];
crypto_int32 f5 = f[5];
crypto_int32 f6 = f[6];
crypto_int32 f7 = f[7];
crypto_int32 f8 = f[8];
crypto_int32 f9 = f[9];
crypto_int32 f0_2 = 2 * f0;
crypto_int32 f1_2 = 2 * f1;
crypto_int32 f2_2 = 2 * f2;
crypto_int32 f3_2 = 2 * f3;
crypto_int32 f4_2 = 2 * f4;
crypto_int32 f5_2 = 2 * f5;
crypto_int32 f6_2 = 2 * f6;
crypto_int32 f7_2 = 2 * f7;
crypto_int32 f5_38 = 38 * f5; /* 1.959375*2^30 */
crypto_int32 f6_19 = 19 * f6; /* 1.959375*2^30 */
crypto_int32 f7_38 = 38 * f7; /* 1.959375*2^30 */
crypto_int32 f8_19 = 19 * f8; /* 1.959375*2^30 */
crypto_int32 f9_38 = 38 * f9; /* 1.959375*2^30 */
crypto_int64 f0f0 = f0 * (crypto_int64) f0;
crypto_int64 f0f1_2 = f0_2 * (crypto_int64) f1;
crypto_int64 f0f2_2 = f0_2 * (crypto_int64) f2;
crypto_int64 f0f3_2 = f0_2 * (crypto_int64) f3;
crypto_int64 f0f4_2 = f0_2 * (crypto_int64) f4;
crypto_int64 f0f5_2 = f0_2 * (crypto_int64) f5;
crypto_int64 f0f6_2 = f0_2 * (crypto_int64) f6;
crypto_int64 f0f7_2 = f0_2 * (crypto_int64) f7;
crypto_int64 f0f8_2 = f0_2 * (crypto_int64) f8;
crypto_int64 f0f9_2 = f0_2 * (crypto_int64) f9;
crypto_int64 f1f1_2 = f1_2 * (crypto_int64) f1;
crypto_int64 f1f2_2 = f1_2 * (crypto_int64) f2;
crypto_int64 f1f3_4 = f1_2 * (crypto_int64) f3_2;
crypto_int64 f1f4_2 = f1_2 * (crypto_int64) f4;
crypto_int64 f1f5_4 = f1_2 * (crypto_int64) f5_2;
crypto_int64 f1f6_2 = f1_2 * (crypto_int64) f6;
crypto_int64 f1f7_4 = f1_2 * (crypto_int64) f7_2;
crypto_int64 f1f8_2 = f1_2 * (crypto_int64) f8;
crypto_int64 f1f9_76 = f1_2 * (crypto_int64) f9_38;
crypto_int64 f2f2 = f2 * (crypto_int64) f2;
crypto_int64 f2f3_2 = f2_2 * (crypto_int64) f3;
crypto_int64 f2f4_2 = f2_2 * (crypto_int64) f4;
crypto_int64 f2f5_2 = f2_2 * (crypto_int64) f5;
crypto_int64 f2f6_2 = f2_2 * (crypto_int64) f6;
crypto_int64 f2f7_2 = f2_2 * (crypto_int64) f7;
crypto_int64 f2f8_38 = f2_2 * (crypto_int64) f8_19;
crypto_int64 f2f9_38 = f2 * (crypto_int64) f9_38;
crypto_int64 f3f3_2 = f3_2 * (crypto_int64) f3;
crypto_int64 f3f4_2 = f3_2 * (crypto_int64) f4;
crypto_int64 f3f5_4 = f3_2 * (crypto_int64) f5_2;
crypto_int64 f3f6_2 = f3_2 * (crypto_int64) f6;
crypto_int64 f3f7_76 = f3_2 * (crypto_int64) f7_38;
crypto_int64 f3f8_38 = f3_2 * (crypto_int64) f8_19;
crypto_int64 f3f9_76 = f3_2 * (crypto_int64) f9_38;
crypto_int64 f4f4 = f4 * (crypto_int64) f4;
crypto_int64 f4f5_2 = f4_2 * (crypto_int64) f5;
crypto_int64 f4f6_38 = f4_2 * (crypto_int64) f6_19;
crypto_int64 f4f7_38 = f4 * (crypto_int64) f7_38;
crypto_int64 f4f8_38 = f4_2 * (crypto_int64) f8_19;
crypto_int64 f4f9_38 = f4 * (crypto_int64) f9_38;
crypto_int64 f5f5_38 = f5 * (crypto_int64) f5_38;
crypto_int64 f5f6_38 = f5_2 * (crypto_int64) f6_19;
crypto_int64 f5f7_76 = f5_2 * (crypto_int64) f7_38;
crypto_int64 f5f8_38 = f5_2 * (crypto_int64) f8_19;
crypto_int64 f5f9_76 = f5_2 * (crypto_int64) f9_38;
crypto_int64 f6f6_19 = f6 * (crypto_int64) f6_19;
crypto_int64 f6f7_38 = f6 * (crypto_int64) f7_38;
crypto_int64 f6f8_38 = f6_2 * (crypto_int64) f8_19;
crypto_int64 f6f9_38 = f6 * (crypto_int64) f9_38;
crypto_int64 f7f7_38 = f7 * (crypto_int64) f7_38;
crypto_int64 f7f8_38 = f7_2 * (crypto_int64) f8_19;
crypto_int64 f7f9_76 = f7_2 * (crypto_int64) f9_38;
crypto_int64 f8f8_19 = f8 * (crypto_int64) f8_19;
crypto_int64 f8f9_38 = f8 * (crypto_int64) f9_38;
crypto_int64 f9f9_38 = f9 * (crypto_int64) f9_38;
crypto_int64 h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38;
crypto_int64 h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38;
crypto_int64 h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19;
crypto_int64 h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38;
crypto_int64 h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38;
crypto_int64 h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38;
crypto_int64 h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19;
crypto_int64 h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38;
crypto_int64 h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38;
crypto_int64 h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2;
crypto_int64 carry0;
crypto_int64 carry1;
crypto_int64 carry2;
crypto_int64 carry3;
crypto_int64 carry4;
crypto_int64 carry5;
crypto_int64 carry6;
crypto_int64 carry7;
crypto_int64 carry8;
crypto_int64 carry9;
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
carry1 = (h1 + (crypto_int64) (1<<24)) >> 25; h2 += carry1; h1 -= SHL64(carry1,25);
carry5 = (h5 + (crypto_int64) (1<<24)) >> 25; h6 += carry5; h5 -= SHL64(carry5,25);
carry2 = (h2 + (crypto_int64) (1<<25)) >> 26; h3 += carry2; h2 -= SHL64(carry2,26);
carry6 = (h6 + (crypto_int64) (1<<25)) >> 26; h7 += carry6; h6 -= SHL64(carry6,26);
carry3 = (h3 + (crypto_int64) (1<<24)) >> 25; h4 += carry3; h3 -= SHL64(carry3,25);
carry7 = (h7 + (crypto_int64) (1<<24)) >> 25; h8 += carry7; h7 -= SHL64(carry7,25);
carry4 = (h4 + (crypto_int64) (1<<25)) >> 26; h5 += carry4; h4 -= SHL64(carry4,26);
carry8 = (h8 + (crypto_int64) (1<<25)) >> 26; h9 += carry8; h8 -= SHL64(carry8,26);
carry9 = (h9 + (crypto_int64) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= SHL64(carry9,25);
carry0 = (h0 + (crypto_int64) (1<<25)) >> 26; h1 += carry0; h0 -= SHL64(carry0,26);
h[0] = (crypto_int32) h0;
h[1] = (crypto_int32) h1;
h[2] = (crypto_int32) h2;
h[3] = (crypto_int32) h3;
h[4] = (crypto_int32) h4;
h[5] = (crypto_int32) h5;
h[6] = (crypto_int32) h6;
h[7] = (crypto_int32) h7;
h[8] = (crypto_int32) h8;
h[9] = (crypto_int32) h9;
}
|