1 #include <stddef.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "crypto_verify_32.h"
7 #include "private/common.h"
8 #include "private/ed25519_ref10.h"
9 #include "utils.h"
10
11 static inline uint64_t
load_3(const unsigned char * in)12 load_3(const unsigned char *in)
13 {
14 uint64_t result;
15
16 result = (uint64_t) in[0];
17 result |= ((uint64_t) in[1]) << 8;
18 result |= ((uint64_t) in[2]) << 16;
19
20 return result;
21 }
22
23 static inline uint64_t
load_4(const unsigned char * in)24 load_4(const unsigned char *in)
25 {
26 uint64_t result;
27
28 result = (uint64_t) in[0];
29 result |= ((uint64_t) in[1]) << 8;
30 result |= ((uint64_t) in[2]) << 16;
31 result |= ((uint64_t) in[3]) << 24;
32
33 return result;
34 }
35
36 /*
37 * Field arithmetic:
38 * Use 5*51 bit limbs on 64-bit systems with support for 128 bit arithmetic,
39 * and 10*25.5 bit limbs elsewhere.
40 *
41 * Functions used elsewhere that are candidates for inlining are defined
42 * via "private/curve25519_ref10.h".
43 */
44
45 #ifdef HAVE_TI_MODE
46 # include "fe_51/constants.h"
47 # include "fe_51/fe.h"
48 #else
49 # include "fe_25_5/constants.h"
50 # include "fe_25_5/fe.h"
51 #endif
52
53 void
fe25519_invert(fe25519 out,const fe25519 z)54 fe25519_invert(fe25519 out, const fe25519 z)
55 {
56 fe25519 t0;
57 fe25519 t1;
58 fe25519 t2;
59 fe25519 t3;
60 int i;
61
62 fe25519_sq(t0, z);
63 fe25519_sq(t1, t0);
64 fe25519_sq(t1, t1);
65 fe25519_mul(t1, z, t1);
66 fe25519_mul(t0, t0, t1);
67 fe25519_sq(t2, t0);
68 fe25519_mul(t1, t1, t2);
69 fe25519_sq(t2, t1);
70 for (i = 1; i < 5; ++i) {
71 fe25519_sq(t2, t2);
72 }
73 fe25519_mul(t1, t2, t1);
74 fe25519_sq(t2, t1);
75 for (i = 1; i < 10; ++i) {
76 fe25519_sq(t2, t2);
77 }
78 fe25519_mul(t2, t2, t1);
79 fe25519_sq(t3, t2);
80 for (i = 1; i < 20; ++i) {
81 fe25519_sq(t3, t3);
82 }
83 fe25519_mul(t2, t3, t2);
84 fe25519_sq(t2, t2);
85 for (i = 1; i < 10; ++i) {
86 fe25519_sq(t2, t2);
87 }
88 fe25519_mul(t1, t2, t1);
89 fe25519_sq(t2, t1);
90 for (i = 1; i < 50; ++i) {
91 fe25519_sq(t2, t2);
92 }
93 fe25519_mul(t2, t2, t1);
94 fe25519_sq(t3, t2);
95 for (i = 1; i < 100; ++i) {
96 fe25519_sq(t3, t3);
97 }
98 fe25519_mul(t2, t3, t2);
99 fe25519_sq(t2, t2);
100 for (i = 1; i < 50; ++i) {
101 fe25519_sq(t2, t2);
102 }
103 fe25519_mul(t1, t2, t1);
104 fe25519_sq(t1, t1);
105 for (i = 1; i < 5; ++i) {
106 fe25519_sq(t1, t1);
107 }
108 fe25519_mul(out, t1, t0);
109 }
110
111 static void
fe25519_pow22523(fe25519 out,const fe25519 z)112 fe25519_pow22523(fe25519 out, const fe25519 z)
113 {
114 fe25519 t0;
115 fe25519 t1;
116 fe25519 t2;
117 int i;
118
119 fe25519_sq(t0, z);
120 fe25519_sq(t1, t0);
121 fe25519_sq(t1, t1);
122 fe25519_mul(t1, z, t1);
123 fe25519_mul(t0, t0, t1);
124 fe25519_sq(t0, t0);
125 fe25519_mul(t0, t1, t0);
126 fe25519_sq(t1, t0);
127 for (i = 1; i < 5; ++i) {
128 fe25519_sq(t1, t1);
129 }
130 fe25519_mul(t0, t1, t0);
131 fe25519_sq(t1, t0);
132 for (i = 1; i < 10; ++i) {
133 fe25519_sq(t1, t1);
134 }
135 fe25519_mul(t1, t1, t0);
136 fe25519_sq(t2, t1);
137 for (i = 1; i < 20; ++i) {
138 fe25519_sq(t2, t2);
139 }
140 fe25519_mul(t1, t2, t1);
141 fe25519_sq(t1, t1);
142 for (i = 1; i < 10; ++i) {
143 fe25519_sq(t1, t1);
144 }
145 fe25519_mul(t0, t1, t0);
146 fe25519_sq(t1, t0);
147 for (i = 1; i < 50; ++i) {
148 fe25519_sq(t1, t1);
149 }
150 fe25519_mul(t1, t1, t0);
151 fe25519_sq(t2, t1);
152 for (i = 1; i < 100; ++i) {
153 fe25519_sq(t2, t2);
154 }
155 fe25519_mul(t1, t2, t1);
156 fe25519_sq(t1, t1);
157 for (i = 1; i < 50; ++i) {
158 fe25519_sq(t1, t1);
159 }
160 fe25519_mul(t0, t1, t0);
161 fe25519_sq(t0, t0);
162 fe25519_sq(t0, t0);
163 fe25519_mul(out, t0, z);
164 }
165
166 /*
167 r = p + q
168 */
169
170 void
ge25519_add(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)171 ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
172 {
173 fe25519 t0;
174
175 fe25519_add(r->X, p->Y, p->X);
176 fe25519_sub(r->Y, p->Y, p->X);
177 fe25519_mul(r->Z, r->X, q->YplusX);
178 fe25519_mul(r->Y, r->Y, q->YminusX);
179 fe25519_mul(r->T, q->T2d, p->T);
180 fe25519_mul(r->X, p->Z, q->Z);
181 fe25519_add(t0, r->X, r->X);
182 fe25519_sub(r->X, r->Z, r->Y);
183 fe25519_add(r->Y, r->Z, r->Y);
184 fe25519_add(r->Z, t0, r->T);
185 fe25519_sub(r->T, t0, r->T);
186 }
187
188 static void
slide_vartime(signed char * r,const unsigned char * a)189 slide_vartime(signed char *r, const unsigned char *a)
190 {
191 int i;
192 int b;
193 int k;
194 int ribs;
195 int cmp;
196
197 for (i = 0; i < 256; ++i) {
198 r[i] = 1 & (a[i >> 3] >> (i & 7));
199 }
200 for (i = 0; i < 256; ++i) {
201 if (! r[i]) {
202 continue;
203 }
204 for (b = 1; b <= 6 && i + b < 256; ++b) {
205 if (! r[i + b]) {
206 continue;
207 }
208 ribs = r[i + b] << b;
209 cmp = r[i] + ribs;
210 if (cmp <= 15) {
211 r[i] = cmp;
212 r[i + b] = 0;
213 } else {
214 cmp = r[i] - ribs;
215 if (cmp < -15) {
216 break;
217 }
218 r[i] = cmp;
219 for (k = i + b; k < 256; ++k) {
220 if (! r[k]) {
221 r[k] = 1;
222 break;
223 }
224 r[k] = 0;
225 }
226 }
227 }
228 }
229 }
230
231 int
ge25519_frombytes(ge25519_p3 * h,const unsigned char * s)232 ge25519_frombytes(ge25519_p3 *h, const unsigned char *s)
233 {
234 fe25519 u;
235 fe25519 v;
236 fe25519 v3;
237 fe25519 vxx;
238 fe25519 m_root_check, p_root_check;
239 fe25519 negx;
240 fe25519 x_sqrtm1;
241 int has_m_root, has_p_root;
242
243 fe25519_frombytes(h->Y, s);
244 fe25519_1(h->Z);
245 fe25519_sq(u, h->Y);
246 fe25519_mul(v, u, d);
247 fe25519_sub(u, u, h->Z); /* u = y^2-1 */
248 fe25519_add(v, v, h->Z); /* v = dy^2+1 */
249
250 fe25519_sq(v3, v);
251 fe25519_mul(v3, v3, v); /* v3 = v^3 */
252 fe25519_sq(h->X, v3);
253 fe25519_mul(h->X, h->X, v);
254 fe25519_mul(h->X, h->X, u); /* x = uv^7 */
255
256 fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
257 fe25519_mul(h->X, h->X, v3);
258 fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
259
260 fe25519_sq(vxx, h->X);
261 fe25519_mul(vxx, vxx, v);
262 fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
263 fe25519_add(p_root_check, vxx, u); /* vx^2+u */
264 has_m_root = fe25519_iszero(m_root_check);
265 has_p_root = fe25519_iszero(p_root_check);
266 fe25519_mul(x_sqrtm1, h->X, sqrtm1); /* x*sqrt(-1) */
267 fe25519_cmov(h->X, x_sqrtm1, 1 - has_m_root);
268
269 fe25519_neg(negx, h->X);
270 fe25519_cmov(h->X, negx, fe25519_isnegative(h->X) ^ (s[31] >> 7));
271 fe25519_mul(h->T, h->X, h->Y);
272
273 return (has_m_root | has_p_root) - 1;
274 }
275
276 int
ge25519_frombytes_negate_vartime(ge25519_p3 * h,const unsigned char * s)277 ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s)
278 {
279 fe25519 u;
280 fe25519 v;
281 fe25519 v3;
282 fe25519 vxx;
283 fe25519 m_root_check, p_root_check;
284
285 fe25519_frombytes(h->Y, s);
286 fe25519_1(h->Z);
287 fe25519_sq(u, h->Y);
288 fe25519_mul(v, u, d);
289 fe25519_sub(u, u, h->Z); /* u = y^2-1 */
290 fe25519_add(v, v, h->Z); /* v = dy^2+1 */
291
292 fe25519_sq(v3, v);
293 fe25519_mul(v3, v3, v); /* v3 = v^3 */
294 fe25519_sq(h->X, v3);
295 fe25519_mul(h->X, h->X, v);
296 fe25519_mul(h->X, h->X, u); /* x = uv^7 */
297
298 fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
299 fe25519_mul(h->X, h->X, v3);
300 fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
301
302 fe25519_sq(vxx, h->X);
303 fe25519_mul(vxx, vxx, v);
304 fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
305 if (fe25519_iszero(m_root_check) == 0) {
306 fe25519_add(p_root_check, vxx, u); /* vx^2+u */
307 if (fe25519_iszero(p_root_check) == 0) {
308 return -1;
309 }
310 fe25519_mul(h->X, h->X, sqrtm1);
311 }
312
313 if (fe25519_isnegative(h->X) == (s[31] >> 7)) {
314 fe25519_neg(h->X, h->X);
315 }
316 fe25519_mul(h->T, h->X, h->Y);
317
318 return 0;
319 }
320
321 /*
322 r = p + q
323 */
324
325 static void
ge25519_madd(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)326 ge25519_madd(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
327 {
328 fe25519 t0;
329
330 fe25519_add(r->X, p->Y, p->X);
331 fe25519_sub(r->Y, p->Y, p->X);
332 fe25519_mul(r->Z, r->X, q->yplusx);
333 fe25519_mul(r->Y, r->Y, q->yminusx);
334 fe25519_mul(r->T, q->xy2d, p->T);
335 fe25519_add(t0, p->Z, p->Z);
336 fe25519_sub(r->X, r->Z, r->Y);
337 fe25519_add(r->Y, r->Z, r->Y);
338 fe25519_add(r->Z, t0, r->T);
339 fe25519_sub(r->T, t0, r->T);
340 }
341
342 /*
343 r = p - q
344 */
345
346 static void
ge25519_msub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)347 ge25519_msub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
348 {
349 fe25519 t0;
350
351 fe25519_add(r->X, p->Y, p->X);
352 fe25519_sub(r->Y, p->Y, p->X);
353 fe25519_mul(r->Z, r->X, q->yminusx);
354 fe25519_mul(r->Y, r->Y, q->yplusx);
355 fe25519_mul(r->T, q->xy2d, p->T);
356 fe25519_add(t0, p->Z, p->Z);
357 fe25519_sub(r->X, r->Z, r->Y);
358 fe25519_add(r->Y, r->Z, r->Y);
359 fe25519_sub(r->Z, t0, r->T);
360 fe25519_add(r->T, t0, r->T);
361 }
362
363 /*
364 r = p
365 */
366
367 void
ge25519_p1p1_to_p2(ge25519_p2 * r,const ge25519_p1p1 * p)368 ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
369 {
370 fe25519_mul(r->X, p->X, p->T);
371 fe25519_mul(r->Y, p->Y, p->Z);
372 fe25519_mul(r->Z, p->Z, p->T);
373 }
374
375 /*
376 r = p
377 */
378
379 void
ge25519_p1p1_to_p3(ge25519_p3 * r,const ge25519_p1p1 * p)380 ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
381 {
382 fe25519_mul(r->X, p->X, p->T);
383 fe25519_mul(r->Y, p->Y, p->Z);
384 fe25519_mul(r->Z, p->Z, p->T);
385 fe25519_mul(r->T, p->X, p->Y);
386 }
387
388 static void
ge25519_p2_0(ge25519_p2 * h)389 ge25519_p2_0(ge25519_p2 *h)
390 {
391 fe25519_0(h->X);
392 fe25519_1(h->Y);
393 fe25519_1(h->Z);
394 }
395
396 /*
397 r = 2 * p
398 */
399
400 static void
ge25519_p2_dbl(ge25519_p1p1 * r,const ge25519_p2 * p)401 ge25519_p2_dbl(ge25519_p1p1 *r, const ge25519_p2 *p)
402 {
403 fe25519 t0;
404
405 fe25519_sq(r->X, p->X);
406 fe25519_sq(r->Z, p->Y);
407 fe25519_sq2(r->T, p->Z);
408 fe25519_add(r->Y, p->X, p->Y);
409 fe25519_sq(t0, r->Y);
410 fe25519_add(r->Y, r->Z, r->X);
411 fe25519_sub(r->Z, r->Z, r->X);
412 fe25519_sub(r->X, t0, r->Y);
413 fe25519_sub(r->T, r->T, r->Z);
414 }
415
416 static void
ge25519_p3_0(ge25519_p3 * h)417 ge25519_p3_0(ge25519_p3 *h)
418 {
419 fe25519_0(h->X);
420 fe25519_1(h->Y);
421 fe25519_1(h->Z);
422 fe25519_0(h->T);
423 }
424
425 static void
ge25519_cached_0(ge25519_cached * h)426 ge25519_cached_0(ge25519_cached *h)
427 {
428 fe25519_1(h->YplusX);
429 fe25519_1(h->YminusX);
430 fe25519_1(h->Z);
431 fe25519_0(h->T2d);
432 }
433
434 /*
435 r = p
436 */
437
438 void
ge25519_p3_to_cached(ge25519_cached * r,const ge25519_p3 * p)439 ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p)
440 {
441 fe25519_add(r->YplusX, p->Y, p->X);
442 fe25519_sub(r->YminusX, p->Y, p->X);
443 fe25519_copy(r->Z, p->Z);
444 fe25519_mul(r->T2d, p->T, d2);
445 }
446
447 static void
ge25519_p3_to_precomp(ge25519_precomp * pi,const ge25519_p3 * p)448 ge25519_p3_to_precomp(ge25519_precomp *pi, const ge25519_p3 *p)
449 {
450 fe25519 recip;
451 fe25519 x;
452 fe25519 y;
453 fe25519 xy;
454
455 fe25519_invert(recip, p->Z);
456 fe25519_mul(x, p->X, recip);
457 fe25519_mul(y, p->Y, recip);
458 fe25519_add(pi->yplusx, y, x);
459 fe25519_sub(pi->yminusx, y, x);
460 fe25519_mul(xy, x, y);
461 fe25519_mul(pi->xy2d, xy, d2);
462 }
463
464 /*
465 r = p
466 */
467
468 static void
ge25519_p3_to_p2(ge25519_p2 * r,const ge25519_p3 * p)469 ge25519_p3_to_p2(ge25519_p2 *r, const ge25519_p3 *p)
470 {
471 fe25519_copy(r->X, p->X);
472 fe25519_copy(r->Y, p->Y);
473 fe25519_copy(r->Z, p->Z);
474 }
475
476 void
ge25519_p3_tobytes(unsigned char * s,const ge25519_p3 * h)477 ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h)
478 {
479 fe25519 recip;
480 fe25519 x;
481 fe25519 y;
482
483 fe25519_invert(recip, h->Z);
484 fe25519_mul(x, h->X, recip);
485 fe25519_mul(y, h->Y, recip);
486 fe25519_tobytes(s, y);
487 s[31] ^= fe25519_isnegative(x) << 7;
488 }
489
490 /*
491 r = 2 * p
492 */
493
494 static void
ge25519_p3_dbl(ge25519_p1p1 * r,const ge25519_p3 * p)495 ge25519_p3_dbl(ge25519_p1p1 *r, const ge25519_p3 *p)
496 {
497 ge25519_p2 q;
498 ge25519_p3_to_p2(&q, p);
499 ge25519_p2_dbl(r, &q);
500 }
501
502 static void
ge25519_precomp_0(ge25519_precomp * h)503 ge25519_precomp_0(ge25519_precomp *h)
504 {
505 fe25519_1(h->yplusx);
506 fe25519_1(h->yminusx);
507 fe25519_0(h->xy2d);
508 }
509
510 static unsigned char
equal(signed char b,signed char c)511 equal(signed char b, signed char c)
512 {
513 unsigned char ub = b;
514 unsigned char uc = c;
515 unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
516 uint32_t y = x; /* 0: yes; 1..255: no */
517
518 y -= 1; /* 4294967295: yes; 0..254: no */
519 y >>= 31; /* 1: yes; 0: no */
520
521 return y;
522 }
523
524 static unsigned char
negative(signed char b)525 negative(signed char b)
526 {
527 /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
528 uint64_t x = b;
529
530 x >>= 63; /* 1: yes; 0: no */
531
532 return x;
533 }
534
535 static void
ge25519_cmov(ge25519_precomp * t,const ge25519_precomp * u,unsigned char b)536 ge25519_cmov(ge25519_precomp *t, const ge25519_precomp *u, unsigned char b)
537 {
538 fe25519_cmov(t->yplusx, u->yplusx, b);
539 fe25519_cmov(t->yminusx, u->yminusx, b);
540 fe25519_cmov(t->xy2d, u->xy2d, b);
541 }
542
543 static void
ge25519_cmov_cached(ge25519_cached * t,const ge25519_cached * u,unsigned char b)544 ge25519_cmov_cached(ge25519_cached *t, const ge25519_cached *u, unsigned char b)
545 {
546 fe25519_cmov(t->YplusX, u->YplusX, b);
547 fe25519_cmov(t->YminusX, u->YminusX, b);
548 fe25519_cmov(t->Z, u->Z, b);
549 fe25519_cmov(t->T2d, u->T2d, b);
550 }
551
552 static void
ge25519_select(ge25519_precomp * t,const ge25519_precomp precomp[8],const signed char b)553 ge25519_select(ge25519_precomp *t, const ge25519_precomp precomp[8], const signed char b)
554 {
555 ge25519_precomp minust;
556 const unsigned char bnegative = negative(b);
557 const unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
558
559 ge25519_precomp_0(t);
560 ge25519_cmov(t, &precomp[0], equal(babs, 1));
561 ge25519_cmov(t, &precomp[1], equal(babs, 2));
562 ge25519_cmov(t, &precomp[2], equal(babs, 3));
563 ge25519_cmov(t, &precomp[3], equal(babs, 4));
564 ge25519_cmov(t, &precomp[4], equal(babs, 5));
565 ge25519_cmov(t, &precomp[5], equal(babs, 6));
566 ge25519_cmov(t, &precomp[6], equal(babs, 7));
567 ge25519_cmov(t, &precomp[7], equal(babs, 8));
568 fe25519_copy(minust.yplusx, t->yminusx);
569 fe25519_copy(minust.yminusx, t->yplusx);
570 fe25519_neg(minust.xy2d, t->xy2d);
571 ge25519_cmov(t, &minust, bnegative);
572 }
573
574 static void
ge25519_select_base(ge25519_precomp * t,const int pos,const signed char b)575 ge25519_select_base(ge25519_precomp *t, const int pos, const signed char b)
576 {
577 static const ge25519_precomp base[32][8] = { /* base[i][j] = (j+1)*256^i*B */
578 #ifdef HAVE_TI_MODE
579 # include "fe_51/base.h"
580 #else
581 # include "fe_25_5/base.h"
582 #endif
583 };
584 ge25519_select(t, base[pos], b);
585 }
586
587 static void
ge25519_select_cached(ge25519_cached * t,const ge25519_cached cached[8],const signed char b)588 ge25519_select_cached(ge25519_cached *t, const ge25519_cached cached[8], const signed char b)
589 {
590 ge25519_cached minust;
591 const unsigned char bnegative = negative(b);
592 const unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
593
594 ge25519_cached_0(t);
595 ge25519_cmov_cached(t, &cached[0], equal(babs, 1));
596 ge25519_cmov_cached(t, &cached[1], equal(babs, 2));
597 ge25519_cmov_cached(t, &cached[2], equal(babs, 3));
598 ge25519_cmov_cached(t, &cached[3], equal(babs, 4));
599 ge25519_cmov_cached(t, &cached[4], equal(babs, 5));
600 ge25519_cmov_cached(t, &cached[5], equal(babs, 6));
601 ge25519_cmov_cached(t, &cached[6], equal(babs, 7));
602 ge25519_cmov_cached(t, &cached[7], equal(babs, 8));
603 fe25519_copy(minust.YplusX, t->YminusX);
604 fe25519_copy(minust.YminusX, t->YplusX);
605 fe25519_copy(minust.Z, t->Z);
606 fe25519_neg(minust.T2d, t->T2d);
607 ge25519_cmov_cached(t, &minust, bnegative);
608 }
609
610 /*
611 r = p - q
612 */
613
614 void
ge25519_sub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)615 ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
616 {
617 fe25519 t0;
618
619 fe25519_add(r->X, p->Y, p->X);
620 fe25519_sub(r->Y, p->Y, p->X);
621 fe25519_mul(r->Z, r->X, q->YminusX);
622 fe25519_mul(r->Y, r->Y, q->YplusX);
623 fe25519_mul(r->T, q->T2d, p->T);
624 fe25519_mul(r->X, p->Z, q->Z);
625 fe25519_add(t0, r->X, r->X);
626 fe25519_sub(r->X, r->Z, r->Y);
627 fe25519_add(r->Y, r->Z, r->Y);
628 fe25519_sub(r->Z, t0, r->T);
629 fe25519_add(r->T, t0, r->T);
630 }
631
632 void
ge25519_tobytes(unsigned char * s,const ge25519_p2 * h)633 ge25519_tobytes(unsigned char *s, const ge25519_p2 *h)
634 {
635 fe25519 recip;
636 fe25519 x;
637 fe25519 y;
638
639 fe25519_invert(recip, h->Z);
640 fe25519_mul(x, h->X, recip);
641 fe25519_mul(y, h->Y, recip);
642 fe25519_tobytes(s, y);
643 s[31] ^= fe25519_isnegative(x) << 7;
644 }
645
646 /*
647 r = a * A + b * B
648 where a = a[0]+256*a[1]+...+256^31 a[31].
649 and b = b[0]+256*b[1]+...+256^31 b[31].
650 B is the Ed25519 base point (x,4/5) with x positive.
651
652 Only used for signatures verification.
653 */
654
655 void
ge25519_double_scalarmult_vartime(ge25519_p2 * r,const unsigned char * a,const ge25519_p3 * A,const unsigned char * b)656 ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
657 const ge25519_p3 *A, const unsigned char *b)
658 {
659 static const ge25519_precomp Bi[8] = {
660 #ifdef HAVE_TI_MODE
661 # include "fe_51/base2.h"
662 #else
663 # include "fe_25_5/base2.h"
664 #endif
665 };
666 signed char aslide[256];
667 signed char bslide[256];
668 ge25519_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
669 ge25519_p1p1 t;
670 ge25519_p3 u;
671 ge25519_p3 A2;
672 int i;
673
674 slide_vartime(aslide, a);
675 slide_vartime(bslide, b);
676
677 ge25519_p3_to_cached(&Ai[0], A);
678
679 ge25519_p3_dbl(&t, A);
680 ge25519_p1p1_to_p3(&A2, &t);
681
682 ge25519_add(&t, &A2, &Ai[0]);
683 ge25519_p1p1_to_p3(&u, &t);
684 ge25519_p3_to_cached(&Ai[1], &u);
685
686 ge25519_add(&t, &A2, &Ai[1]);
687 ge25519_p1p1_to_p3(&u, &t);
688 ge25519_p3_to_cached(&Ai[2], &u);
689
690 ge25519_add(&t, &A2, &Ai[2]);
691 ge25519_p1p1_to_p3(&u, &t);
692 ge25519_p3_to_cached(&Ai[3], &u);
693
694 ge25519_add(&t, &A2, &Ai[3]);
695 ge25519_p1p1_to_p3(&u, &t);
696 ge25519_p3_to_cached(&Ai[4], &u);
697
698 ge25519_add(&t, &A2, &Ai[4]);
699 ge25519_p1p1_to_p3(&u, &t);
700 ge25519_p3_to_cached(&Ai[5], &u);
701
702 ge25519_add(&t, &A2, &Ai[5]);
703 ge25519_p1p1_to_p3(&u, &t);
704 ge25519_p3_to_cached(&Ai[6], &u);
705
706 ge25519_add(&t, &A2, &Ai[6]);
707 ge25519_p1p1_to_p3(&u, &t);
708 ge25519_p3_to_cached(&Ai[7], &u);
709
710 ge25519_p2_0(r);
711
712 for (i = 255; i >= 0; --i) {
713 if (aslide[i] || bslide[i]) {
714 break;
715 }
716 }
717
718 for (; i >= 0; --i) {
719 ge25519_p2_dbl(&t, r);
720
721 if (aslide[i] > 0) {
722 ge25519_p1p1_to_p3(&u, &t);
723 ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
724 } else if (aslide[i] < 0) {
725 ge25519_p1p1_to_p3(&u, &t);
726 ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
727 }
728
729 if (bslide[i] > 0) {
730 ge25519_p1p1_to_p3(&u, &t);
731 ge25519_madd(&t, &u, &Bi[bslide[i] / 2]);
732 } else if (bslide[i] < 0) {
733 ge25519_p1p1_to_p3(&u, &t);
734 ge25519_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
735 }
736
737 ge25519_p1p1_to_p2(r, &t);
738 }
739 }
740
741 /*
742 h = a * p
743 where a = a[0]+256*a[1]+...+256^31 a[31]
744
745 Preconditions:
746 a[31] <= 127
747
748 p is public
749 */
750
751 void
ge25519_scalarmult(ge25519_p3 * h,const unsigned char * a,const ge25519_p3 * p)752 ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p)
753 {
754 signed char e[64];
755 signed char carry;
756 ge25519_p1p1 r;
757 ge25519_p2 s;
758 ge25519_p1p1 t2, t3, t4, t5, t6, t7, t8;
759 ge25519_p3 p2, p3, p4, p5, p6, p7, p8;
760 ge25519_cached pi[8];
761 ge25519_cached t;
762 int i;
763
764 ge25519_p3_to_cached(&pi[1 - 1], p); /* p */
765
766 ge25519_p3_dbl(&t2, p);
767 ge25519_p1p1_to_p3(&p2, &t2);
768 ge25519_p3_to_cached(&pi[2 - 1], &p2); /* 2p = 2*p */
769
770 ge25519_add(&t3, p, &pi[2 - 1]);
771 ge25519_p1p1_to_p3(&p3, &t3);
772 ge25519_p3_to_cached(&pi[3 - 1], &p3); /* 3p = 2p+p */
773
774 ge25519_p3_dbl(&t4, &p2);
775 ge25519_p1p1_to_p3(&p4, &t4);
776 ge25519_p3_to_cached(&pi[4 - 1], &p4); /* 4p = 2*2p */
777
778 ge25519_add(&t5, p, &pi[4 - 1]);
779 ge25519_p1p1_to_p3(&p5, &t5);
780 ge25519_p3_to_cached(&pi[5 - 1], &p5); /* 5p = 4p+p */
781
782 ge25519_p3_dbl(&t6, &p3);
783 ge25519_p1p1_to_p3(&p6, &t6);
784 ge25519_p3_to_cached(&pi[6 - 1], &p6); /* 6p = 2*3p */
785
786 ge25519_add(&t7, p, &pi[6 - 1]);
787 ge25519_p1p1_to_p3(&p7, &t7);
788 ge25519_p3_to_cached(&pi[7 - 1], &p7); /* 7p = 6p+p */
789
790 ge25519_p3_dbl(&t8, &p4);
791 ge25519_p1p1_to_p3(&p8, &t8);
792 ge25519_p3_to_cached(&pi[8 - 1], &p8); /* 8p = 2*4p */
793
794 for (i = 0; i < 32; ++i) {
795 e[2 * i + 0] = (a[i] >> 0) & 15;
796 e[2 * i + 1] = (a[i] >> 4) & 15;
797 }
798 /* each e[i] is between 0 and 15 */
799 /* e[63] is between 0 and 7 */
800
801 carry = 0;
802 for (i = 0; i < 63; ++i) {
803 e[i] += carry;
804 carry = e[i] + 8;
805 carry >>= 4;
806 e[i] -= carry * ((signed char) 1 << 4);
807 }
808 e[63] += carry;
809 /* each e[i] is between -8 and 8 */
810
811 ge25519_p3_0(h);
812
813 for (i = 63; i != 0; i--) {
814 ge25519_select_cached(&t, pi, e[i]);
815 ge25519_add(&r, h, &t);
816
817 ge25519_p1p1_to_p2(&s, &r);
818 ge25519_p2_dbl(&r, &s);
819 ge25519_p1p1_to_p2(&s, &r);
820 ge25519_p2_dbl(&r, &s);
821 ge25519_p1p1_to_p2(&s, &r);
822 ge25519_p2_dbl(&r, &s);
823 ge25519_p1p1_to_p2(&s, &r);
824 ge25519_p2_dbl(&r, &s);
825
826 ge25519_p1p1_to_p3(h, &r); /* *16 */
827 }
828 ge25519_select_cached(&t, pi, e[i]);
829 ge25519_add(&r, h, &t);
830
831 ge25519_p1p1_to_p3(h, &r);
832 }
833
834 /*
835 h = a * B (with precomputation)
836 where a = a[0]+256*a[1]+...+256^31 a[31]
837 B is the Ed25519 base point (x,4/5) with x positive
838 (as bytes: 0x5866666666666666666666666666666666666666666666666666666666666666)
839
840 Preconditions:
841 a[31] <= 127
842 */
843
844 void
ge25519_scalarmult_base(ge25519_p3 * h,const unsigned char * a)845 ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a)
846 {
847 signed char e[64];
848 signed char carry;
849 ge25519_p1p1 r;
850 ge25519_p2 s;
851 ge25519_precomp t;
852 int i;
853
854 for (i = 0; i < 32; ++i) {
855 e[2 * i + 0] = (a[i] >> 0) & 15;
856 e[2 * i + 1] = (a[i] >> 4) & 15;
857 }
858 /* each e[i] is between 0 and 15 */
859 /* e[63] is between 0 and 7 */
860
861 carry = 0;
862 for (i = 0; i < 63; ++i) {
863 e[i] += carry;
864 carry = e[i] + 8;
865 carry >>= 4;
866 e[i] -= carry * ((signed char) 1 << 4);
867 }
868 e[63] += carry;
869 /* each e[i] is between -8 and 8 */
870
871 ge25519_p3_0(h);
872
873 for (i = 1; i < 64; i += 2) {
874 ge25519_select_base(&t, i / 2, e[i]);
875 ge25519_madd(&r, h, &t);
876 ge25519_p1p1_to_p3(h, &r);
877 }
878
879 ge25519_p3_dbl(&r, h);
880 ge25519_p1p1_to_p2(&s, &r);
881 ge25519_p2_dbl(&r, &s);
882 ge25519_p1p1_to_p2(&s, &r);
883 ge25519_p2_dbl(&r, &s);
884 ge25519_p1p1_to_p2(&s, &r);
885 ge25519_p2_dbl(&r, &s);
886 ge25519_p1p1_to_p3(h, &r);
887
888 for (i = 0; i < 64; i += 2) {
889 ge25519_select_base(&t, i / 2, e[i]);
890 ge25519_madd(&r, h, &t);
891 ge25519_p1p1_to_p3(h, &r);
892 }
893 }
894
895 /* multiply by the order of the main subgroup l = 2^252+27742317777372353535851937790883648493 */
896 static void
ge25519_mul_l(ge25519_p3 * r,const ge25519_p3 * A)897 ge25519_mul_l(ge25519_p3 *r, const ge25519_p3 *A)
898 {
899 static const signed char aslide[253] = {
900 13, 0, 0, 0, 0, -1, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, -13, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 3, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
901 };
902 ge25519_cached Ai[8];
903 ge25519_p1p1 t;
904 ge25519_p3 u;
905 ge25519_p3 A2;
906 int i;
907
908 ge25519_p3_to_cached(&Ai[0], A);
909 ge25519_p3_dbl(&t, A);
910 ge25519_p1p1_to_p3(&A2, &t);
911 ge25519_add(&t, &A2, &Ai[0]);
912 ge25519_p1p1_to_p3(&u, &t);
913 ge25519_p3_to_cached(&Ai[1], &u);
914 ge25519_add(&t, &A2, &Ai[1]);
915 ge25519_p1p1_to_p3(&u, &t);
916 ge25519_p3_to_cached(&Ai[2], &u);
917 ge25519_add(&t, &A2, &Ai[2]);
918 ge25519_p1p1_to_p3(&u, &t);
919 ge25519_p3_to_cached(&Ai[3], &u);
920 ge25519_add(&t, &A2, &Ai[3]);
921 ge25519_p1p1_to_p3(&u, &t);
922 ge25519_p3_to_cached(&Ai[4], &u);
923 ge25519_add(&t, &A2, &Ai[4]);
924 ge25519_p1p1_to_p3(&u, &t);
925 ge25519_p3_to_cached(&Ai[5], &u);
926 ge25519_add(&t, &A2, &Ai[5]);
927 ge25519_p1p1_to_p3(&u, &t);
928 ge25519_p3_to_cached(&Ai[6], &u);
929 ge25519_add(&t, &A2, &Ai[6]);
930 ge25519_p1p1_to_p3(&u, &t);
931 ge25519_p3_to_cached(&Ai[7], &u);
932
933 ge25519_p3_0(r);
934
935 for (i = 252; i >= 0; --i) {
936 ge25519_p3_dbl(&t, r);
937
938 if (aslide[i] > 0) {
939 ge25519_p1p1_to_p3(&u, &t);
940 ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
941 } else if (aslide[i] < 0) {
942 ge25519_p1p1_to_p3(&u, &t);
943 ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
944 }
945
946 ge25519_p1p1_to_p3(r, &t);
947 }
948 }
949
950 int
ge25519_is_on_curve(const ge25519_p3 * p)951 ge25519_is_on_curve(const ge25519_p3 *p)
952 {
953 fe25519 x2;
954 fe25519 y2;
955 fe25519 z2;
956 fe25519 z4;
957 fe25519 t0;
958 fe25519 t1;
959
960 fe25519_sq(x2, p->X);
961 fe25519_sq(y2, p->Y);
962 fe25519_sq(z2, p->Z);
963 fe25519_sub(t0, y2, x2);
964 fe25519_mul(t0, t0, z2);
965
966 fe25519_mul(t1, x2, y2);
967 fe25519_mul(t1, t1, d);
968 fe25519_sq(z4, z2);
969 fe25519_add(t1, t1, z4);
970 fe25519_sub(t0, t0, t1);
971
972 return fe25519_iszero(t0);
973 }
974
975 int
ge25519_is_on_main_subgroup(const ge25519_p3 * p)976 ge25519_is_on_main_subgroup(const ge25519_p3 *p)
977 {
978 ge25519_p3 pl;
979
980 ge25519_mul_l(&pl, p);
981
982 return fe25519_iszero(pl.X);
983 }
984
985 int
ge25519_is_canonical(const unsigned char * s)986 ge25519_is_canonical(const unsigned char *s)
987 {
988 unsigned char c;
989 unsigned char d;
990 unsigned int i;
991
992 c = (s[31] & 0x7f) ^ 0x7f;
993 for (i = 30; i > 0; i--) {
994 c |= s[i] ^ 0xff;
995 }
996 c = (((unsigned int) c) - 1U) >> 8;
997 d = (0xed - 1U - (unsigned int) s[0]) >> 8;
998
999 return 1 - (c & d & 1);
1000 }
1001
1002 int
ge25519_has_small_order(const unsigned char s[32])1003 ge25519_has_small_order(const unsigned char s[32])
1004 {
1005 CRYPTO_ALIGN(16)
1006 static const unsigned char blacklist[][32] = {
1007 /* 0 (order 4) */
1008 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1010 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1011 /* 1 (order 1) */
1012 { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1013 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1014 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1015 /* 2707385501144840649318225287225658788936804267575313519463743609750303402022
1016 (order 8) */
1017 { 0x26, 0xe8, 0x95, 0x8f, 0xc2, 0xb2, 0x27, 0xb0, 0x45, 0xc3, 0xf4,
1018 0x89, 0xf2, 0xef, 0x98, 0xf0, 0xd5, 0xdf, 0xac, 0x05, 0xd3, 0xc6,
1019 0x33, 0x39, 0xb1, 0x38, 0x02, 0x88, 0x6d, 0x53, 0xfc, 0x05 },
1020 /* 55188659117513257062467267217118295137698188065244968500265048394206261417927
1021 (order 8) */
1022 { 0xc7, 0x17, 0x6a, 0x70, 0x3d, 0x4d, 0xd8, 0x4f, 0xba, 0x3c, 0x0b,
1023 0x76, 0x0d, 0x10, 0x67, 0x0f, 0x2a, 0x20, 0x53, 0xfa, 0x2c, 0x39,
1024 0xcc, 0xc6, 0x4e, 0xc7, 0xfd, 0x77, 0x92, 0xac, 0x03, 0x7a },
1025 /* p-1 (order 2) */
1026 { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1027 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1028 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1029 /* p (=0, order 4) */
1030 { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1031 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1032 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1033 /* p+1 (=1, order 1) */
1034 { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1035 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1036 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }
1037 };
1038 unsigned char c[7] = { 0 };
1039 unsigned int k;
1040 size_t i, j;
1041
1042 COMPILER_ASSERT(7 == sizeof blacklist / sizeof blacklist[0]);
1043 for (j = 0; j < 31; j++) {
1044 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1045 c[i] |= s[j] ^ blacklist[i][j];
1046 }
1047 }
1048 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1049 c[i] |= (s[j] & 0x7f) ^ blacklist[i][j];
1050 }
1051 k = 0;
1052 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1053 k |= (c[i] - 1);
1054 }
1055 return (int) ((k >> 8) & 1);
1056 }
1057
1058 /*
1059 Input:
1060 a[0]+256*a[1]+...+256^31*a[31] = a
1061 b[0]+256*b[1]+...+256^31*b[31] = b
1062 c[0]+256*c[1]+...+256^31*c[31] = c
1063 *
1064 Output:
1065 s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1066 where l = 2^252 + 27742317777372353535851937790883648493.
1067 */
1068
1069 void
sc25519_muladd(unsigned char * s,const unsigned char * a,const unsigned char * b,const unsigned char * c)1070 sc25519_muladd(unsigned char *s, const unsigned char *a,
1071 const unsigned char *b, const unsigned char *c)
1072 {
1073 int64_t a0 = 2097151 & load_3(a);
1074 int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
1075 int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
1076 int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
1077 int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
1078 int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
1079 int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
1080 int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
1081 int64_t a8 = 2097151 & load_3(a + 21);
1082 int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
1083 int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1084 int64_t a11 = (load_4(a + 28) >> 7);
1085
1086 int64_t b0 = 2097151 & load_3(b);
1087 int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
1088 int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
1089 int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
1090 int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
1091 int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
1092 int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
1093 int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
1094 int64_t b8 = 2097151 & load_3(b + 21);
1095 int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
1096 int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1097 int64_t b11 = (load_4(b + 28) >> 7);
1098
1099 int64_t c0 = 2097151 & load_3(c);
1100 int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
1101 int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
1102 int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
1103 int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
1104 int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
1105 int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
1106 int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
1107 int64_t c8 = 2097151 & load_3(c + 21);
1108 int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
1109 int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1110 int64_t c11 = (load_4(c + 28) >> 7);
1111
1112 int64_t s0;
1113 int64_t s1;
1114 int64_t s2;
1115 int64_t s3;
1116 int64_t s4;
1117 int64_t s5;
1118 int64_t s6;
1119 int64_t s7;
1120 int64_t s8;
1121 int64_t s9;
1122 int64_t s10;
1123 int64_t s11;
1124 int64_t s12;
1125 int64_t s13;
1126 int64_t s14;
1127 int64_t s15;
1128 int64_t s16;
1129 int64_t s17;
1130 int64_t s18;
1131 int64_t s19;
1132 int64_t s20;
1133 int64_t s21;
1134 int64_t s22;
1135 int64_t s23;
1136
1137 int64_t carry0;
1138 int64_t carry1;
1139 int64_t carry2;
1140 int64_t carry3;
1141 int64_t carry4;
1142 int64_t carry5;
1143 int64_t carry6;
1144 int64_t carry7;
1145 int64_t carry8;
1146 int64_t carry9;
1147 int64_t carry10;
1148 int64_t carry11;
1149 int64_t carry12;
1150 int64_t carry13;
1151 int64_t carry14;
1152 int64_t carry15;
1153 int64_t carry16;
1154 int64_t carry17;
1155 int64_t carry18;
1156 int64_t carry19;
1157 int64_t carry20;
1158 int64_t carry21;
1159 int64_t carry22;
1160
1161 s0 = c0 + a0 * b0;
1162 s1 = c1 + a0 * b1 + a1 * b0;
1163 s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
1164 s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
1165 s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
1166 s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
1167 s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
1168 a6 * b0;
1169 s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
1170 a6 * b1 + a7 * b0;
1171 s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
1172 a6 * b2 + a7 * b1 + a8 * b0;
1173 s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
1174 a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
1175 s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
1176 a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
1177 s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
1178 a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
1179 s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
1180 a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
1181 s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
1182 a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
1183 s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
1184 a9 * b5 + a10 * b4 + a11 * b3;
1185 s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
1186 a10 * b5 + a11 * b4;
1187 s16 =
1188 a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
1189 s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
1190 s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
1191 s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
1192 s20 = a9 * b11 + a10 * b10 + a11 * b9;
1193 s21 = a10 * b11 + a11 * b10;
1194 s22 = a11 * b11;
1195 s23 = 0;
1196
1197 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1198 s1 += carry0;
1199 s0 -= carry0 * ((uint64_t) 1L << 21);
1200 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1201 s3 += carry2;
1202 s2 -= carry2 * ((uint64_t) 1L << 21);
1203 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1204 s5 += carry4;
1205 s4 -= carry4 * ((uint64_t) 1L << 21);
1206 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1207 s7 += carry6;
1208 s6 -= carry6 * ((uint64_t) 1L << 21);
1209 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1210 s9 += carry8;
1211 s8 -= carry8 * ((uint64_t) 1L << 21);
1212 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1213 s11 += carry10;
1214 s10 -= carry10 * ((uint64_t) 1L << 21);
1215 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1216 s13 += carry12;
1217 s12 -= carry12 * ((uint64_t) 1L << 21);
1218 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1219 s15 += carry14;
1220 s14 -= carry14 * ((uint64_t) 1L << 21);
1221 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1222 s17 += carry16;
1223 s16 -= carry16 * ((uint64_t) 1L << 21);
1224 carry18 = (s18 + (int64_t) (1L << 20)) >> 21;
1225 s19 += carry18;
1226 s18 -= carry18 * ((uint64_t) 1L << 21);
1227 carry20 = (s20 + (int64_t) (1L << 20)) >> 21;
1228 s21 += carry20;
1229 s20 -= carry20 * ((uint64_t) 1L << 21);
1230 carry22 = (s22 + (int64_t) (1L << 20)) >> 21;
1231 s23 += carry22;
1232 s22 -= carry22 * ((uint64_t) 1L << 21);
1233
1234 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1235 s2 += carry1;
1236 s1 -= carry1 * ((uint64_t) 1L << 21);
1237 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1238 s4 += carry3;
1239 s3 -= carry3 * ((uint64_t) 1L << 21);
1240 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1241 s6 += carry5;
1242 s5 -= carry5 * ((uint64_t) 1L << 21);
1243 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1244 s8 += carry7;
1245 s7 -= carry7 * ((uint64_t) 1L << 21);
1246 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1247 s10 += carry9;
1248 s9 -= carry9 * ((uint64_t) 1L << 21);
1249 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1250 s12 += carry11;
1251 s11 -= carry11 * ((uint64_t) 1L << 21);
1252 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1253 s14 += carry13;
1254 s13 -= carry13 * ((uint64_t) 1L << 21);
1255 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1256 s16 += carry15;
1257 s15 -= carry15 * ((uint64_t) 1L << 21);
1258 carry17 = (s17 + (int64_t) (1L << 20)) >> 21;
1259 s18 += carry17;
1260 s17 -= carry17 * ((uint64_t) 1L << 21);
1261 carry19 = (s19 + (int64_t) (1L << 20)) >> 21;
1262 s20 += carry19;
1263 s19 -= carry19 * ((uint64_t) 1L << 21);
1264 carry21 = (s21 + (int64_t) (1L << 20)) >> 21;
1265 s22 += carry21;
1266 s21 -= carry21 * ((uint64_t) 1L << 21);
1267
1268 s11 += s23 * 666643;
1269 s12 += s23 * 470296;
1270 s13 += s23 * 654183;
1271 s14 -= s23 * 997805;
1272 s15 += s23 * 136657;
1273 s16 -= s23 * 683901;
1274
1275 s10 += s22 * 666643;
1276 s11 += s22 * 470296;
1277 s12 += s22 * 654183;
1278 s13 -= s22 * 997805;
1279 s14 += s22 * 136657;
1280 s15 -= s22 * 683901;
1281
1282 s9 += s21 * 666643;
1283 s10 += s21 * 470296;
1284 s11 += s21 * 654183;
1285 s12 -= s21 * 997805;
1286 s13 += s21 * 136657;
1287 s14 -= s21 * 683901;
1288
1289 s8 += s20 * 666643;
1290 s9 += s20 * 470296;
1291 s10 += s20 * 654183;
1292 s11 -= s20 * 997805;
1293 s12 += s20 * 136657;
1294 s13 -= s20 * 683901;
1295
1296 s7 += s19 * 666643;
1297 s8 += s19 * 470296;
1298 s9 += s19 * 654183;
1299 s10 -= s19 * 997805;
1300 s11 += s19 * 136657;
1301 s12 -= s19 * 683901;
1302
1303 s6 += s18 * 666643;
1304 s7 += s18 * 470296;
1305 s8 += s18 * 654183;
1306 s9 -= s18 * 997805;
1307 s10 += s18 * 136657;
1308 s11 -= s18 * 683901;
1309
1310 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1311 s7 += carry6;
1312 s6 -= carry6 * ((uint64_t) 1L << 21);
1313 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1314 s9 += carry8;
1315 s8 -= carry8 * ((uint64_t) 1L << 21);
1316 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1317 s11 += carry10;
1318 s10 -= carry10 * ((uint64_t) 1L << 21);
1319 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1320 s13 += carry12;
1321 s12 -= carry12 * ((uint64_t) 1L << 21);
1322 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1323 s15 += carry14;
1324 s14 -= carry14 * ((uint64_t) 1L << 21);
1325 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1326 s17 += carry16;
1327 s16 -= carry16 * ((uint64_t) 1L << 21);
1328
1329 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1330 s8 += carry7;
1331 s7 -= carry7 * ((uint64_t) 1L << 21);
1332 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1333 s10 += carry9;
1334 s9 -= carry9 * ((uint64_t) 1L << 21);
1335 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1336 s12 += carry11;
1337 s11 -= carry11 * ((uint64_t) 1L << 21);
1338 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1339 s14 += carry13;
1340 s13 -= carry13 * ((uint64_t) 1L << 21);
1341 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1342 s16 += carry15;
1343 s15 -= carry15 * ((uint64_t) 1L << 21);
1344
1345 s5 += s17 * 666643;
1346 s6 += s17 * 470296;
1347 s7 += s17 * 654183;
1348 s8 -= s17 * 997805;
1349 s9 += s17 * 136657;
1350 s10 -= s17 * 683901;
1351
1352 s4 += s16 * 666643;
1353 s5 += s16 * 470296;
1354 s6 += s16 * 654183;
1355 s7 -= s16 * 997805;
1356 s8 += s16 * 136657;
1357 s9 -= s16 * 683901;
1358
1359 s3 += s15 * 666643;
1360 s4 += s15 * 470296;
1361 s5 += s15 * 654183;
1362 s6 -= s15 * 997805;
1363 s7 += s15 * 136657;
1364 s8 -= s15 * 683901;
1365
1366 s2 += s14 * 666643;
1367 s3 += s14 * 470296;
1368 s4 += s14 * 654183;
1369 s5 -= s14 * 997805;
1370 s6 += s14 * 136657;
1371 s7 -= s14 * 683901;
1372
1373 s1 += s13 * 666643;
1374 s2 += s13 * 470296;
1375 s3 += s13 * 654183;
1376 s4 -= s13 * 997805;
1377 s5 += s13 * 136657;
1378 s6 -= s13 * 683901;
1379
1380 s0 += s12 * 666643;
1381 s1 += s12 * 470296;
1382 s2 += s12 * 654183;
1383 s3 -= s12 * 997805;
1384 s4 += s12 * 136657;
1385 s5 -= s12 * 683901;
1386 s12 = 0;
1387
1388 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1389 s1 += carry0;
1390 s0 -= carry0 * ((uint64_t) 1L << 21);
1391 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1392 s3 += carry2;
1393 s2 -= carry2 * ((uint64_t) 1L << 21);
1394 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1395 s5 += carry4;
1396 s4 -= carry4 * ((uint64_t) 1L << 21);
1397 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1398 s7 += carry6;
1399 s6 -= carry6 * ((uint64_t) 1L << 21);
1400 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1401 s9 += carry8;
1402 s8 -= carry8 * ((uint64_t) 1L << 21);
1403 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1404 s11 += carry10;
1405 s10 -= carry10 * ((uint64_t) 1L << 21);
1406
1407 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1408 s2 += carry1;
1409 s1 -= carry1 * ((uint64_t) 1L << 21);
1410 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1411 s4 += carry3;
1412 s3 -= carry3 * ((uint64_t) 1L << 21);
1413 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1414 s6 += carry5;
1415 s5 -= carry5 * ((uint64_t) 1L << 21);
1416 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1417 s8 += carry7;
1418 s7 -= carry7 * ((uint64_t) 1L << 21);
1419 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1420 s10 += carry9;
1421 s9 -= carry9 * ((uint64_t) 1L << 21);
1422 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1423 s12 += carry11;
1424 s11 -= carry11 * ((uint64_t) 1L << 21);
1425
1426 s0 += s12 * 666643;
1427 s1 += s12 * 470296;
1428 s2 += s12 * 654183;
1429 s3 -= s12 * 997805;
1430 s4 += s12 * 136657;
1431 s5 -= s12 * 683901;
1432 s12 = 0;
1433
1434 carry0 = s0 >> 21;
1435 s1 += carry0;
1436 s0 -= carry0 * ((uint64_t) 1L << 21);
1437 carry1 = s1 >> 21;
1438 s2 += carry1;
1439 s1 -= carry1 * ((uint64_t) 1L << 21);
1440 carry2 = s2 >> 21;
1441 s3 += carry2;
1442 s2 -= carry2 * ((uint64_t) 1L << 21);
1443 carry3 = s3 >> 21;
1444 s4 += carry3;
1445 s3 -= carry3 * ((uint64_t) 1L << 21);
1446 carry4 = s4 >> 21;
1447 s5 += carry4;
1448 s4 -= carry4 * ((uint64_t) 1L << 21);
1449 carry5 = s5 >> 21;
1450 s6 += carry5;
1451 s5 -= carry5 * ((uint64_t) 1L << 21);
1452 carry6 = s6 >> 21;
1453 s7 += carry6;
1454 s6 -= carry6 * ((uint64_t) 1L << 21);
1455 carry7 = s7 >> 21;
1456 s8 += carry7;
1457 s7 -= carry7 * ((uint64_t) 1L << 21);
1458 carry8 = s8 >> 21;
1459 s9 += carry8;
1460 s8 -= carry8 * ((uint64_t) 1L << 21);
1461 carry9 = s9 >> 21;
1462 s10 += carry9;
1463 s9 -= carry9 * ((uint64_t) 1L << 21);
1464 carry10 = s10 >> 21;
1465 s11 += carry10;
1466 s10 -= carry10 * ((uint64_t) 1L << 21);
1467 carry11 = s11 >> 21;
1468 s12 += carry11;
1469 s11 -= carry11 * ((uint64_t) 1L << 21);
1470
1471 s0 += s12 * 666643;
1472 s1 += s12 * 470296;
1473 s2 += s12 * 654183;
1474 s3 -= s12 * 997805;
1475 s4 += s12 * 136657;
1476 s5 -= s12 * 683901;
1477
1478 carry0 = s0 >> 21;
1479 s1 += carry0;
1480 s0 -= carry0 * ((uint64_t) 1L << 21);
1481 carry1 = s1 >> 21;
1482 s2 += carry1;
1483 s1 -= carry1 * ((uint64_t) 1L << 21);
1484 carry2 = s2 >> 21;
1485 s3 += carry2;
1486 s2 -= carry2 * ((uint64_t) 1L << 21);
1487 carry3 = s3 >> 21;
1488 s4 += carry3;
1489 s3 -= carry3 * ((uint64_t) 1L << 21);
1490 carry4 = s4 >> 21;
1491 s5 += carry4;
1492 s4 -= carry4 * ((uint64_t) 1L << 21);
1493 carry5 = s5 >> 21;
1494 s6 += carry5;
1495 s5 -= carry5 * ((uint64_t) 1L << 21);
1496 carry6 = s6 >> 21;
1497 s7 += carry6;
1498 s6 -= carry6 * ((uint64_t) 1L << 21);
1499 carry7 = s7 >> 21;
1500 s8 += carry7;
1501 s7 -= carry7 * ((uint64_t) 1L << 21);
1502 carry8 = s8 >> 21;
1503 s9 += carry8;
1504 s8 -= carry8 * ((uint64_t) 1L << 21);
1505 carry9 = s9 >> 21;
1506 s10 += carry9;
1507 s9 -= carry9 * ((uint64_t) 1L << 21);
1508 carry10 = s10 >> 21;
1509 s11 += carry10;
1510 s10 -= carry10 * ((uint64_t) 1L << 21);
1511
1512 s[0] = s0 >> 0;
1513 s[1] = s0 >> 8;
1514 s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1515 s[3] = s1 >> 3;
1516 s[4] = s1 >> 11;
1517 s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1518 s[6] = s2 >> 6;
1519 s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1520 s[8] = s3 >> 1;
1521 s[9] = s3 >> 9;
1522 s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1523 s[11] = s4 >> 4;
1524 s[12] = s4 >> 12;
1525 s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1526 s[14] = s5 >> 7;
1527 s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1528 s[16] = s6 >> 2;
1529 s[17] = s6 >> 10;
1530 s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1531 s[19] = s7 >> 5;
1532 s[20] = s7 >> 13;
1533 s[21] = s8 >> 0;
1534 s[22] = s8 >> 8;
1535 s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1536 s[24] = s9 >> 3;
1537 s[25] = s9 >> 11;
1538 s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1539 s[27] = s10 >> 6;
1540 s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1541 s[29] = s11 >> 1;
1542 s[30] = s11 >> 9;
1543 s[31] = s11 >> 17;
1544 }
1545
1546 /*
1547 Input:
1548 s[0]+256*s[1]+...+256^63*s[63] = s
1549 *
1550 Output:
1551 s[0]+256*s[1]+...+256^31*s[31] = s mod l
1552 where l = 2^252 + 27742317777372353535851937790883648493.
1553 Overwrites s in place.
1554 */
1555
1556 void
sc25519_reduce(unsigned char * s)1557 sc25519_reduce(unsigned char *s)
1558 {
1559 int64_t s0 = 2097151 & load_3(s);
1560 int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
1561 int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
1562 int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
1563 int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
1564 int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
1565 int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
1566 int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
1567 int64_t s8 = 2097151 & load_3(s + 21);
1568 int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
1569 int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1570 int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1571 int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1572 int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1573 int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1574 int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1575 int64_t s16 = 2097151 & load_3(s + 42);
1576 int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1577 int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1578 int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1579 int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1580 int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1581 int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1582 int64_t s23 = (load_4(s + 60) >> 3);
1583
1584 int64_t carry0;
1585 int64_t carry1;
1586 int64_t carry2;
1587 int64_t carry3;
1588 int64_t carry4;
1589 int64_t carry5;
1590 int64_t carry6;
1591 int64_t carry7;
1592 int64_t carry8;
1593 int64_t carry9;
1594 int64_t carry10;
1595 int64_t carry11;
1596 int64_t carry12;
1597 int64_t carry13;
1598 int64_t carry14;
1599 int64_t carry15;
1600 int64_t carry16;
1601
1602 s11 += s23 * 666643;
1603 s12 += s23 * 470296;
1604 s13 += s23 * 654183;
1605 s14 -= s23 * 997805;
1606 s15 += s23 * 136657;
1607 s16 -= s23 * 683901;
1608
1609 s10 += s22 * 666643;
1610 s11 += s22 * 470296;
1611 s12 += s22 * 654183;
1612 s13 -= s22 * 997805;
1613 s14 += s22 * 136657;
1614 s15 -= s22 * 683901;
1615
1616 s9 += s21 * 666643;
1617 s10 += s21 * 470296;
1618 s11 += s21 * 654183;
1619 s12 -= s21 * 997805;
1620 s13 += s21 * 136657;
1621 s14 -= s21 * 683901;
1622
1623 s8 += s20 * 666643;
1624 s9 += s20 * 470296;
1625 s10 += s20 * 654183;
1626 s11 -= s20 * 997805;
1627 s12 += s20 * 136657;
1628 s13 -= s20 * 683901;
1629
1630 s7 += s19 * 666643;
1631 s8 += s19 * 470296;
1632 s9 += s19 * 654183;
1633 s10 -= s19 * 997805;
1634 s11 += s19 * 136657;
1635 s12 -= s19 * 683901;
1636
1637 s6 += s18 * 666643;
1638 s7 += s18 * 470296;
1639 s8 += s18 * 654183;
1640 s9 -= s18 * 997805;
1641 s10 += s18 * 136657;
1642 s11 -= s18 * 683901;
1643
1644 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1645 s7 += carry6;
1646 s6 -= carry6 * ((uint64_t) 1L << 21);
1647 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1648 s9 += carry8;
1649 s8 -= carry8 * ((uint64_t) 1L << 21);
1650 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1651 s11 += carry10;
1652 s10 -= carry10 * ((uint64_t) 1L << 21);
1653 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1654 s13 += carry12;
1655 s12 -= carry12 * ((uint64_t) 1L << 21);
1656 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1657 s15 += carry14;
1658 s14 -= carry14 * ((uint64_t) 1L << 21);
1659 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1660 s17 += carry16;
1661 s16 -= carry16 * ((uint64_t) 1L << 21);
1662
1663 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1664 s8 += carry7;
1665 s7 -= carry7 * ((uint64_t) 1L << 21);
1666 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1667 s10 += carry9;
1668 s9 -= carry9 * ((uint64_t) 1L << 21);
1669 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1670 s12 += carry11;
1671 s11 -= carry11 * ((uint64_t) 1L << 21);
1672 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1673 s14 += carry13;
1674 s13 -= carry13 * ((uint64_t) 1L << 21);
1675 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1676 s16 += carry15;
1677 s15 -= carry15 * ((uint64_t) 1L << 21);
1678
1679 s5 += s17 * 666643;
1680 s6 += s17 * 470296;
1681 s7 += s17 * 654183;
1682 s8 -= s17 * 997805;
1683 s9 += s17 * 136657;
1684 s10 -= s17 * 683901;
1685
1686 s4 += s16 * 666643;
1687 s5 += s16 * 470296;
1688 s6 += s16 * 654183;
1689 s7 -= s16 * 997805;
1690 s8 += s16 * 136657;
1691 s9 -= s16 * 683901;
1692
1693 s3 += s15 * 666643;
1694 s4 += s15 * 470296;
1695 s5 += s15 * 654183;
1696 s6 -= s15 * 997805;
1697 s7 += s15 * 136657;
1698 s8 -= s15 * 683901;
1699
1700 s2 += s14 * 666643;
1701 s3 += s14 * 470296;
1702 s4 += s14 * 654183;
1703 s5 -= s14 * 997805;
1704 s6 += s14 * 136657;
1705 s7 -= s14 * 683901;
1706
1707 s1 += s13 * 666643;
1708 s2 += s13 * 470296;
1709 s3 += s13 * 654183;
1710 s4 -= s13 * 997805;
1711 s5 += s13 * 136657;
1712 s6 -= s13 * 683901;
1713
1714 s0 += s12 * 666643;
1715 s1 += s12 * 470296;
1716 s2 += s12 * 654183;
1717 s3 -= s12 * 997805;
1718 s4 += s12 * 136657;
1719 s5 -= s12 * 683901;
1720 s12 = 0;
1721
1722 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1723 s1 += carry0;
1724 s0 -= carry0 * ((uint64_t) 1L << 21);
1725 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1726 s3 += carry2;
1727 s2 -= carry2 * ((uint64_t) 1L << 21);
1728 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1729 s5 += carry4;
1730 s4 -= carry4 * ((uint64_t) 1L << 21);
1731 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1732 s7 += carry6;
1733 s6 -= carry6 * ((uint64_t) 1L << 21);
1734 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1735 s9 += carry8;
1736 s8 -= carry8 * ((uint64_t) 1L << 21);
1737 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1738 s11 += carry10;
1739 s10 -= carry10 * ((uint64_t) 1L << 21);
1740
1741 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1742 s2 += carry1;
1743 s1 -= carry1 * ((uint64_t) 1L << 21);
1744 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1745 s4 += carry3;
1746 s3 -= carry3 * ((uint64_t) 1L << 21);
1747 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1748 s6 += carry5;
1749 s5 -= carry5 * ((uint64_t) 1L << 21);
1750 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1751 s8 += carry7;
1752 s7 -= carry7 * ((uint64_t) 1L << 21);
1753 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1754 s10 += carry9;
1755 s9 -= carry9 * ((uint64_t) 1L << 21);
1756 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1757 s12 += carry11;
1758 s11 -= carry11 * ((uint64_t) 1L << 21);
1759
1760 s0 += s12 * 666643;
1761 s1 += s12 * 470296;
1762 s2 += s12 * 654183;
1763 s3 -= s12 * 997805;
1764 s4 += s12 * 136657;
1765 s5 -= s12 * 683901;
1766 s12 = 0;
1767
1768 carry0 = s0 >> 21;
1769 s1 += carry0;
1770 s0 -= carry0 * ((uint64_t) 1L << 21);
1771 carry1 = s1 >> 21;
1772 s2 += carry1;
1773 s1 -= carry1 * ((uint64_t) 1L << 21);
1774 carry2 = s2 >> 21;
1775 s3 += carry2;
1776 s2 -= carry2 * ((uint64_t) 1L << 21);
1777 carry3 = s3 >> 21;
1778 s4 += carry3;
1779 s3 -= carry3 * ((uint64_t) 1L << 21);
1780 carry4 = s4 >> 21;
1781 s5 += carry4;
1782 s4 -= carry4 * ((uint64_t) 1L << 21);
1783 carry5 = s5 >> 21;
1784 s6 += carry5;
1785 s5 -= carry5 * ((uint64_t) 1L << 21);
1786 carry6 = s6 >> 21;
1787 s7 += carry6;
1788 s6 -= carry6 * ((uint64_t) 1L << 21);
1789 carry7 = s7 >> 21;
1790 s8 += carry7;
1791 s7 -= carry7 * ((uint64_t) 1L << 21);
1792 carry8 = s8 >> 21;
1793 s9 += carry8;
1794 s8 -= carry8 * ((uint64_t) 1L << 21);
1795 carry9 = s9 >> 21;
1796 s10 += carry9;
1797 s9 -= carry9 * ((uint64_t) 1L << 21);
1798 carry10 = s10 >> 21;
1799 s11 += carry10;
1800 s10 -= carry10 * ((uint64_t) 1L << 21);
1801 carry11 = s11 >> 21;
1802 s12 += carry11;
1803 s11 -= carry11 * ((uint64_t) 1L << 21);
1804
1805 s0 += s12 * 666643;
1806 s1 += s12 * 470296;
1807 s2 += s12 * 654183;
1808 s3 -= s12 * 997805;
1809 s4 += s12 * 136657;
1810 s5 -= s12 * 683901;
1811
1812 carry0 = s0 >> 21;
1813 s1 += carry0;
1814 s0 -= carry0 * ((uint64_t) 1L << 21);
1815 carry1 = s1 >> 21;
1816 s2 += carry1;
1817 s1 -= carry1 * ((uint64_t) 1L << 21);
1818 carry2 = s2 >> 21;
1819 s3 += carry2;
1820 s2 -= carry2 * ((uint64_t) 1L << 21);
1821 carry3 = s3 >> 21;
1822 s4 += carry3;
1823 s3 -= carry3 * ((uint64_t) 1L << 21);
1824 carry4 = s4 >> 21;
1825 s5 += carry4;
1826 s4 -= carry4 * ((uint64_t) 1L << 21);
1827 carry5 = s5 >> 21;
1828 s6 += carry5;
1829 s5 -= carry5 * ((uint64_t) 1L << 21);
1830 carry6 = s6 >> 21;
1831 s7 += carry6;
1832 s6 -= carry6 * ((uint64_t) 1L << 21);
1833 carry7 = s7 >> 21;
1834 s8 += carry7;
1835 s7 -= carry7 * ((uint64_t) 1L << 21);
1836 carry8 = s8 >> 21;
1837 s9 += carry8;
1838 s8 -= carry8 * ((uint64_t) 1L << 21);
1839 carry9 = s9 >> 21;
1840 s10 += carry9;
1841 s9 -= carry9 * ((uint64_t) 1L << 21);
1842 carry10 = s10 >> 21;
1843 s11 += carry10;
1844 s10 -= carry10 * ((uint64_t) 1L << 21);
1845
1846 s[0] = s0 >> 0;
1847 s[1] = s0 >> 8;
1848 s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1849 s[3] = s1 >> 3;
1850 s[4] = s1 >> 11;
1851 s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1852 s[6] = s2 >> 6;
1853 s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1854 s[8] = s3 >> 1;
1855 s[9] = s3 >> 9;
1856 s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1857 s[11] = s4 >> 4;
1858 s[12] = s4 >> 12;
1859 s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1860 s[14] = s5 >> 7;
1861 s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1862 s[16] = s6 >> 2;
1863 s[17] = s6 >> 10;
1864 s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1865 s[19] = s7 >> 5;
1866 s[20] = s7 >> 13;
1867 s[21] = s8 >> 0;
1868 s[22] = s8 >> 8;
1869 s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1870 s[24] = s9 >> 3;
1871 s[25] = s9 >> 11;
1872 s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1873 s[27] = s10 >> 6;
1874 s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1875 s[29] = s11 >> 1;
1876 s[30] = s11 >> 9;
1877 s[31] = s11 >> 17;
1878 }
1879
1880 int
sc25519_is_canonical(const unsigned char * s)1881 sc25519_is_canonical(const unsigned char *s)
1882 {
1883 /* 2^252+27742317777372353535851937790883648493 */
1884 static const unsigned char L[32] = {
1885 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7,
1886 0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1887 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
1888 };
1889 unsigned char c = 0;
1890 unsigned char n = 1;
1891 unsigned int i = 32;
1892
1893 do {
1894 i--;
1895 c |= ((s[i] - L[i]) >> 8) & n;
1896 n &= ((s[i] ^ L[i]) - 1) >> 8;
1897 } while (i != 0);
1898
1899 return (c != 0);
1900 }
1901
1902 static void
chi25519(fe25519 out,const fe25519 z)1903 chi25519(fe25519 out, const fe25519 z)
1904 {
1905 fe25519 t0, t1, t2, t3;
1906 int i;
1907
1908 fe25519_sq(t0, z);
1909 fe25519_mul(t1, t0, z);
1910 fe25519_sq(t0, t1);
1911 fe25519_sq(t2, t0);
1912 fe25519_sq(t2, t2);
1913 fe25519_mul(t2, t2, t0);
1914 fe25519_mul(t1, t2, z);
1915 fe25519_sq(t2, t1);
1916
1917 for (i = 1; i < 5; i++) {
1918 fe25519_sq(t2, t2);
1919 }
1920 fe25519_mul(t1, t2, t1);
1921 fe25519_sq(t2, t1);
1922 for (i = 1; i < 10; i++) {
1923 fe25519_sq(t2, t2);
1924 }
1925 fe25519_mul(t2, t2, t1);
1926 fe25519_sq(t3, t2);
1927 for (i = 1; i < 20; i++) {
1928 fe25519_sq(t3, t3);
1929 }
1930 fe25519_mul(t2, t3, t2);
1931 fe25519_sq(t2, t2);
1932 for (i = 1; i < 10; i++) {
1933 fe25519_sq(t2, t2);
1934 }
1935 fe25519_mul(t1, t2, t1);
1936 fe25519_sq(t2, t1);
1937 for (i = 1; i < 50; i++) {
1938 fe25519_sq(t2, t2);
1939 }
1940 fe25519_mul(t2, t2, t1);
1941 fe25519_sq(t3, t2);
1942 for (i = 1; i < 100; i++) {
1943 fe25519_sq(t3, t3);
1944 }
1945 fe25519_mul(t2, t3, t2);
1946 fe25519_sq(t2, t2);
1947 for (i = 1; i < 50; i++) {
1948 fe25519_sq(t2, t2);
1949 }
1950 fe25519_mul(t1, t2, t1);
1951 fe25519_sq(t1, t1);
1952 for (i = 1; i < 4; i++) {
1953 fe25519_sq(t1, t1);
1954 }
1955 fe25519_mul(out, t1, t0);
1956 }
1957
1958 void
ge25519_from_uniform(unsigned char s[32],const unsigned char r[32])1959 ge25519_from_uniform(unsigned char s[32], const unsigned char r[32])
1960 {
1961 fe25519 e;
1962 fe25519 negx;
1963 fe25519 rr2;
1964 fe25519 x, x2, x3;
1965 ge25519_p3 p3;
1966 ge25519_p1p1 p1;
1967 ge25519_p2 p2;
1968 unsigned int e_is_minus_1;
1969 unsigned char x_sign;
1970
1971 memcpy(s, r, 32);
1972 x_sign = s[31] & 0x80;
1973 s[31] &= 0x7f;
1974
1975 fe25519_frombytes(rr2, s);
1976
1977 /* elligator */
1978 fe25519_sq2(rr2, rr2);
1979 rr2[0]++;
1980 fe25519_invert(rr2, rr2);
1981 fe25519_mul(x, curve25519_A, rr2);
1982 fe25519_neg(x, x);
1983
1984 fe25519_sq(x2, x);
1985 fe25519_mul(x3, x, x2);
1986 fe25519_add(e, x3, x);
1987 fe25519_mul(x2, x2, curve25519_A);
1988 fe25519_add(e, x2, e);
1989
1990 chi25519(e, e);
1991
1992 fe25519_tobytes(s, e);
1993 e_is_minus_1 = s[1] & 1;
1994 fe25519_neg(negx, x);
1995 fe25519_cmov(x, negx, e_is_minus_1);
1996 fe25519_0(x2);
1997 fe25519_cmov(x2, curve25519_A, e_is_minus_1);
1998 fe25519_sub(x, x, x2);
1999
2000 /* yed = (x-1)/(x+1) */
2001 {
2002 fe25519 one;
2003 fe25519 x_plus_one;
2004 fe25519 x_plus_one_inv;
2005 fe25519 x_minus_one;
2006 fe25519 yed;
2007
2008 fe25519_1(one);
2009 fe25519_add(x_plus_one, x, one);
2010 fe25519_sub(x_minus_one, x, one);
2011 fe25519_invert(x_plus_one_inv, x_plus_one);
2012 fe25519_mul(yed, x_minus_one, x_plus_one_inv);
2013 fe25519_tobytes(s, yed);
2014 }
2015
2016 /* recover x */
2017 s[31] |= x_sign;
2018 if (ge25519_frombytes(&p3, s) != 0) {
2019 abort(); /* LCOV_EXCL_LINE */
2020 }
2021
2022 /* multiply by the cofactor */
2023 ge25519_p3_dbl(&p1, &p3);
2024 ge25519_p1p1_to_p2(&p2, &p1);
2025 ge25519_p2_dbl(&p1, &p2);
2026 ge25519_p1p1_to_p2(&p2, &p1);
2027 ge25519_p2_dbl(&p1, &p2);
2028 ge25519_p1p1_to_p3(&p3, &p1);
2029
2030 ge25519_p3_tobytes(s, &p3);
2031 }
2032