summaryrefslogtreecommitdiff
path: root/src/ext/ed25519/donna/ed25519-donna-batchverify.h
blob: 43c4923b3eadcb5750ed77325128b8dd52d99fd8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
/*
	Ed25519 batch verification
*/

#define max_batch_size 64
#define heap_batch_size ((max_batch_size * 2) + 1)

/* which limb is the 128th bit in? */
static const size_t limb128bits = (128 + bignum256modm_bits_per_limb - 1) / bignum256modm_bits_per_limb;

typedef size_t heap_index_t;

typedef struct batch_heap_t {
	unsigned char r[heap_batch_size][16]; /* 128 bit random values */
	ge25519 points[heap_batch_size];
	bignum256modm scalars[heap_batch_size];
	heap_index_t heap[heap_batch_size];
	size_t size;
} batch_heap;

/* swap two values in the heap */
static void
heap_swap(heap_index_t *heap, size_t a, size_t b) {
	heap_index_t temp;
	temp = heap[a];
	heap[a] = heap[b];
	heap[b] = temp;
}

/* add the scalar at the end of the list to the heap */
static void
heap_insert_next(batch_heap *heap) {
	size_t node = heap->size, parent;
	heap_index_t *pheap = heap->heap;
	bignum256modm *scalars = heap->scalars;

	/* insert at the bottom */
	pheap[node] = (heap_index_t)node;

	/* sift node up to its sorted spot */
	parent = (node - 1) / 2;
	while (node && lt256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], bignum256modm_limb_size - 1)) {
		heap_swap(pheap, parent, node);
		node = parent;
		parent = (node - 1) / 2;
	}
	heap->size++;
}

/* update the heap when the root element is updated */
static void
heap_updated_root(batch_heap *heap, size_t limbsize) {
	size_t node, parent, childr, childl;
	heap_index_t *pheap = heap->heap;
	bignum256modm *scalars = heap->scalars;

	/* sift root to the bottom */
	parent = 0;
	node = 1;
	childl = 1;
	childr = 2;
	while ((childr < heap->size)) {
		node = lt256_modm_batch(scalars[pheap[childl]], scalars[pheap[childr]], limbsize) ? childr : childl;
		heap_swap(pheap, parent, node);
		parent = node;
		childl = (parent * 2) + 1;
		childr = childl + 1;
	}

	/* sift root back up to its sorted spot */
	parent = (node - 1) / 2;
	while (node && lte256_modm_batch(scalars[pheap[parent]], scalars[pheap[node]], limbsize)) {
		heap_swap(pheap, parent, node);
		node = parent;
		parent = (node - 1) / 2;
	}
}

/* build the heap with count elements, count must be >= 3 */
static void
heap_build(batch_heap *heap, size_t count) {
	heap->heap[0] = 0;
	heap->size = 0;
	while (heap->size < count)
		heap_insert_next(heap);
}

/* extend the heap to contain new_count elements */
static void
heap_extend(batch_heap *heap, size_t new_count) {
	while (heap->size < new_count)
		heap_insert_next(heap);
}

/* get the top 2 elements of the heap */
static void
heap_get_top2(batch_heap *heap, heap_index_t *max1, heap_index_t *max2, size_t limbsize) {
	heap_index_t h0 = heap->heap[0], h1 = heap->heap[1], h2 = heap->heap[2];
	if (lt256_modm_batch(heap->scalars[h1], heap->scalars[h2], limbsize))
		h1 = h2;
	*max1 = h0;
	*max2 = h1;
}

/* */
static void
ge25519_multi_scalarmult_vartime_final(ge25519 *r, ge25519 *point, bignum256modm scalar) {
	const bignum256modm_element_t topbit = ((bignum256modm_element_t)1 << (bignum256modm_bits_per_limb - 1));
	size_t limb = limb128bits;
	bignum256modm_element_t flag;

	if (isone256_modm_batch(scalar)) {
		/* this will happen most of the time after bos-carter */
		*r = *point;
		return;
	} else if (iszero256_modm_batch(scalar)) {
		/* this will only happen if all scalars == 0 */
		memset(r, 0, sizeof(*r));
		r->y[0] = 1;
		r->z[0] = 1;
		return;
	}

	*r = *point;

	/* find the limb where first bit is set */
	while (!scalar[limb])
		limb--;

	/* find the first bit */
	flag = topbit;
	while ((scalar[limb] & flag) == 0)
		flag >>= 1;

	/* exponentiate */
	for (;;) {
		ge25519_double(r, r);
		if (scalar[limb] & flag)
			ge25519_add(r, r, point);

		flag >>= 1;
		if (!flag) {
			if (!limb--)
				break;
			flag = topbit;
		}
	}
}

/* count must be >= 5 */
static void
ge25519_multi_scalarmult_vartime(ge25519 *r, batch_heap *heap, size_t count) {
	heap_index_t max1, max2;

	/* start with the full limb size */
	size_t limbsize = bignum256modm_limb_size - 1;

	/* whether the heap has been extended to include the 128 bit scalars */
	int extended = 0;

	/* grab an odd number of scalars to build the heap, unknown limb sizes */
	heap_build(heap, ((count + 1) / 2) | 1);

	for (;;) {
		heap_get_top2(heap, &max1, &max2, limbsize);

		/* only one scalar remaining, we're done */
		if (iszero256_modm_batch(heap->scalars[max2]))
			break;

		/* exhausted another limb? */
		if (!heap->scalars[max1][limbsize])
			limbsize -= 1;

		/* can we extend to the 128 bit scalars? */
		if (!extended && isatmost128bits256_modm_batch(heap->scalars[max1])) {
			heap_extend(heap, count);
			heap_get_top2(heap, &max1, &max2, limbsize);
			extended = 1;
		}

		sub256_modm_batch(heap->scalars[max1], heap->scalars[max1], heap->scalars[max2], limbsize);
		ge25519_add(&heap->points[max2], &heap->points[max2], &heap->points[max1]);
		heap_updated_root(heap, limbsize);
	}

	ge25519_multi_scalarmult_vartime_final(r, &heap->points[max1], heap->scalars[max1]);
}

/* not actually used for anything other than testing */
unsigned char batch_point_buffer[3][32];

static int
ge25519_is_neutral_vartime(const ge25519 *p) {
	static const unsigned char zero[32] = {0};
	unsigned char point_buffer[3][32];
	curve25519_contract(point_buffer[0], p->x);
	curve25519_contract(point_buffer[1], p->y);
	curve25519_contract(point_buffer[2], p->z);
	memcpy(batch_point_buffer[1], point_buffer[1], 32);
	return (memcmp(point_buffer[0], zero, 32) == 0) && (memcmp(point_buffer[1], point_buffer[2], 32) == 0);
}

int
ED25519_FN(ed25519_sign_open_batch) (const unsigned char **m, size_t *mlen, const unsigned char **pk, const unsigned char **RS, size_t num, int *valid) {
	batch_heap ALIGN(16) batch;
	ge25519 ALIGN(16) p;
	bignum256modm *r_scalars;
	size_t i, batchsize;
	unsigned char hram[64];
	int ret = 0;

	for (i = 0; i < num; i++)
		valid[i] = 1;

	while (num > 3) {
		batchsize = (num > max_batch_size) ? max_batch_size : num;

		/* generate r (scalars[batchsize+1]..scalars[2*batchsize] */
		ED25519_FN(ed25519_randombytes_unsafe) (batch.r, batchsize * 16);
		r_scalars = &batch.scalars[batchsize + 1];
		for (i = 0; i < batchsize; i++)
			expand256_modm(r_scalars[i], batch.r[i], 16);

		/* compute scalars[0] = ((r1s1 + r2s2 + ...)) */
		for (i = 0; i < batchsize; i++) {
			expand256_modm(batch.scalars[i], RS[i] + 32, 32);
			mul256_modm(batch.scalars[i], batch.scalars[i], r_scalars[i]);
		}
		for (i = 1; i < batchsize; i++)
			add256_modm(batch.scalars[0], batch.scalars[0], batch.scalars[i]);

		/* compute scalars[1]..scalars[batchsize] as r[i]*H(R[i],A[i],m[i]) */
		for (i = 0; i < batchsize; i++) {
			ed25519_hram(hram, RS[i], pk[i], m[i], mlen[i]);
			expand256_modm(batch.scalars[i+1], hram, 64);
			mul256_modm(batch.scalars[i+1], batch.scalars[i+1], r_scalars[i]);
		}

		/* compute points */
		batch.points[0] = ge25519_basepoint;
		for (i = 0; i < batchsize; i++)
			if (!ge25519_unpack_negative_vartime(&batch.points[i+1], pk[i]))
				goto fallback;
		for (i = 0; i < batchsize; i++)
			if (!ge25519_unpack_negative_vartime(&batch.points[batchsize+i+1], RS[i]))
				goto fallback;

		ge25519_multi_scalarmult_vartime(&p, &batch, (batchsize * 2) + 1);
		if (!ge25519_is_neutral_vartime(&p)) {
			ret |= 2;

			fallback:
			for (i = 0; i < batchsize; i++) {
				valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
				ret |= (valid[i] ^ 1);
			}
		}

		m += batchsize;
		mlen += batchsize;
		pk += batchsize;
		RS += batchsize;
		num -= batchsize;
		valid += batchsize;
	}

	for (i = 0; i < num; i++) {
		valid[i] = ED25519_FN(ed25519_sign_open) (m[i], mlen[i], pk[i], RS[i]) ? 0 : 1;
		ret |= (valid[i] ^ 1);
	}

	return ret;
}