1 /* -*- mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2 /* This file is adapted from the SPAKE edwards25519 code in BoringSSL. */
3 /*
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to
10 * deal in the Software without restriction, including without limitation the
11 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 * sell copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * IN THE SOFTWARE.
25 */
26 /*
27 * Copyright (c) 2015-2016, Google Inc.
28 *
29 * Permission to use, copy, modify, and/or distribute this software for any
30 * purpose with or without fee is hereby granted, provided that the above
31 * copyright notice and this permission notice appear in all copies.
32 *
33 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
34 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
35 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
37 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
38 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
39 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40 */
41
42 /*
43 * This code is adapted from the BoringSSL edwards25519 SPAKE2 implementation
44 * from third_party/fiat and crypto/spake25519.c, with the following
45 * adaptations:
46 *
47 * - The M and N points are the ones from draft-irtf-cfrg-spake2-05. The
48 * BoringSSL M and N points were determined similarly, but were not
49 * restricted to members of the generator subgroup, so they use only one hash
50 * iteration for both points. The intent in BoringSSL had been to multiply w
51 * by the cofactor so that wM and wN would be in the subgroup, but as that
52 * step was accidentally omitted, a hack had to be introduced after the fact
53 * to add multiples of the prime order to the scalar. That hack is not
54 * present in this code, and the SPAKE preauth spec does not multiply w by
55 * the cofactor as it is unnecessary if M and N are chosen from the subgroup.
56 *
57 * - The SPAKE code is modified to fit the groups.h interface and the SPAKE
58 * preauth spec.
59 *
60 * - The required declarations and code are all here in one file (except for
61 * the generator point table, which is still in a separate header), so all of
62 * the functions are declared static.
63 *
64 * - BORINGSSL_CURVE25519_64BIT is defined here using autoconf tests.
65 *
66 * - curve25519_32.h and curve25519_64.h are combined into edwards25519_fiat.h
67 * (conditionalized on BORINGSSL_CURVE25519_64BIT) for predictable dependency
68 * generation. The fiat_25519_selectznz and fiat_25519_carry_scmul_121666
69 * functions were removed from both branches as they are not used here (the
70 * former because it is not used by the BoringSSL code and the latter because
71 * it is only used by the X25519 code). The fiat_25519_int128 and
72 * fiat_25519_uint128 typedefs were adjusted to work with older versions of
73 * gcc.
74 *
75 * - fe_cmov() has the initial "Silence an unused function warning" part
76 * removed, as we removed fiat_25519_selectznz instead.
77 *
78 * - The field element bounds assertion checks are disabled by default, as they
79 * slow the code down by roughly a factor of two. The
80 * OPENSSL_COMPILE_ASSERT() in fe_copy_lt() is changed to a regular assert
81 * and is also conditionalized. Do a build and "make check" with
82 * EDWARDS25519_ASSERTS defined when updating this code.
83 *
84 * - The copyright comments at the top are formatted the way we do so in other
85 * source files, for ease of extraction.
86 *
87 * - Declarations in for loops conflict with our compiler configuration in
88 * older versions of gcc, so they are moved outside of the for loop.
89 *
90 * - The preprocessor symbol OPENSSL_SMALL is changed to CONFIG_SMALL.
91 *
92 * - OPENSSL_memset and OPENSSL_memmove are changed to memset and memmove, in
93 * each case verifying that they are used with nonzero length arguments.
94 *
95 * - CRYPTO_memcmp is changed to k5_bcmp.
96 *
97 * - Functions used only by X25519 or Ed25519 interfaces but not SPAKE are
98 * removed, taking care to check for unused functions in both the 64-bit and
99 * 32-bit preprocessor branches. ge_p3_dbl() is unused here if CONFIG_SMALL
100 * is defined, so it is placed inside #ifndef CONFIG_SMALL.
101 */
102
103 // Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
104 // 20141124 (https://bench.cr.yp.to/supercop.html). That code is released as
105 // public domain but parts have been replaced with code generated by Fiat
106 // (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
107
108 #include "groups.h"
109 #include "iana.h"
110
111 #ifdef __GNUC__
112 #pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
113 #endif
114
115 #if SIZEOF_SIZE_T >= 8 && defined(HAVE___INT128_T) && defined(HAVE___UINT128_T)
116 #define BORINGSSL_CURVE25519_64BIT
117 typedef __int128_t int128_t;
118 typedef __uint128_t uint128_t;
119 #endif
120
121 /* From BoringSSL third-party/fiat/internal.h */
122
123 #if defined(BORINGSSL_CURVE25519_64BIT)
124 // fe means field element. Here the field is \Z/(2^255-19). An element t,
125 // entries t[0]...t[4], represents the integer t[0]+2^51 t[1]+2^102 t[2]+2^153
126 // t[3]+2^204 t[4].
127 // fe limbs are bounded by 1.125*2^51.
128 // Multiplication and carrying produce fe from fe_loose.
129 typedef struct fe { uint64_t v[5]; } fe;
130
131 // fe_loose limbs are bounded by 3.375*2^51.
132 // Addition and subtraction produce fe_loose from (fe, fe).
133 typedef struct fe_loose { uint64_t v[5]; } fe_loose;
134 #else
135 // fe means field element. Here the field is \Z/(2^255-19). An element t,
136 // entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
137 // t[3]+2^102 t[4]+...+2^230 t[9].
138 // fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
139 // Multiplication and carrying produce fe from fe_loose.
140 typedef struct fe { uint32_t v[10]; } fe;
141
142 // fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc.
143 // Addition and subtraction produce fe_loose from (fe, fe).
144 typedef struct fe_loose { uint32_t v[10]; } fe_loose;
145 #endif
146
147 // ge means group element.
148 //
149 // Here the group is the set of pairs (x,y) of field elements (see fe.h)
150 // satisfying -x^2 + y^2 = 1 + d x^2y^2
151 // where d = -121665/121666.
152 //
153 // Representations:
154 // ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
155 // ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
156 // ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
157 // ge_precomp (Duif): (y+x,y-x,2dxy)
158
159 typedef struct {
160 fe X;
161 fe Y;
162 fe Z;
163 } ge_p2;
164
165 typedef struct {
166 fe X;
167 fe Y;
168 fe Z;
169 fe T;
170 } ge_p3;
171
172 typedef struct {
173 fe_loose X;
174 fe_loose Y;
175 fe_loose Z;
176 fe_loose T;
177 } ge_p1p1;
178
179 typedef struct {
180 fe_loose yplusx;
181 fe_loose yminusx;
182 fe_loose xy2d;
183 } ge_precomp;
184
185 typedef struct {
186 fe_loose YplusX;
187 fe_loose YminusX;
188 fe_loose Z;
189 fe_loose T2d;
190 } ge_cached;
191
192 #include "edwards25519_tables.h"
193 #include "edwards25519_fiat.h"
194
195 /* From BoringSSL third-party/fiat/curve25519.c */
196
load_3(const uint8_t * in)197 static uint64_t load_3(const uint8_t *in) {
198 uint64_t result;
199 result = (uint64_t)in[0];
200 result |= ((uint64_t)in[1]) << 8;
201 result |= ((uint64_t)in[2]) << 16;
202 return result;
203 }
204
load_4(const uint8_t * in)205 static uint64_t load_4(const uint8_t *in) {
206 uint64_t result;
207 result = (uint64_t)in[0];
208 result |= ((uint64_t)in[1]) << 8;
209 result |= ((uint64_t)in[2]) << 16;
210 result |= ((uint64_t)in[3]) << 24;
211 return result;
212 }
213
214
215 // Field operations.
216
217 #if defined(BORINGSSL_CURVE25519_64BIT)
218
219 typedef uint64_t fe_limb_t;
220 #define FE_NUM_LIMBS 5
221
222 // assert_fe asserts that |f| satisfies bounds:
223 //
224 // [[0x0 ~> 0x8cccccccccccc],
225 // [0x0 ~> 0x8cccccccccccc],
226 // [0x0 ~> 0x8cccccccccccc],
227 // [0x0 ~> 0x8cccccccccccc],
228 // [0x0 ~> 0x8cccccccccccc]]
229 //
230 // See comments in edwards25519_fiat.h for which functions use these bounds for
231 // inputs or outputs.
232 #define assert_fe(f) \
233 do { \
234 for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
235 assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc)); \
236 } \
237 } while (0)
238
239 // assert_fe_loose asserts that |f| satisfies bounds:
240 //
241 // [[0x0 ~> 0x1a666666666664],
242 // [0x0 ~> 0x1a666666666664],
243 // [0x0 ~> 0x1a666666666664],
244 // [0x0 ~> 0x1a666666666664],
245 // [0x0 ~> 0x1a666666666664]]
246 //
247 // See comments in edwards25519_fiat.h for which functions use these bounds for
248 // inputs or outputs.
249 #define assert_fe_loose(f) \
250 do { \
251 for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
252 assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664)); \
253 } \
254 } while (0)
255
256 #else
257
258 typedef uint32_t fe_limb_t;
259 #define FE_NUM_LIMBS 10
260
261 // assert_fe asserts that |f| satisfies bounds:
262 //
263 // [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
264 // [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
265 // [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
266 // [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
267 // [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
268 //
269 // See comments in edwards25519_fiat.h for which functions use these bounds for
270 // inputs or outputs.
271 #define assert_fe(f) \
272 do { \
273 for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
274 assert(f[_assert_fe_i] <= \
275 ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u)); \
276 } \
277 } while (0)
278
279 // assert_fe_loose asserts that |f| satisfies bounds:
280 //
281 // [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
282 // [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
283 // [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
284 // [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
285 // [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
286 //
287 // See comments in edwards25519_fiat.h for which functions use these bounds for
288 // inputs or outputs.
289 #define assert_fe_loose(f) \
290 do { \
291 for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
292 assert(f[_assert_fe_i] <= \
293 ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u)); \
294 } \
295 } while (0)
296
297 #endif // BORINGSSL_CURVE25519_64BIT
298
299 #ifndef EDWARDS25519_ASSERTS
300 #undef assert_fe
301 #undef assert_fe_loose
302 #define assert_fe(f)
303 #define assert_fe_loose(f)
304 #endif
305
fe_frombytes_strict(fe * h,const uint8_t s[32])306 static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
307 // |fiat_25519_from_bytes| requires the top-most bit be clear.
308 assert((s[31] & 0x80) == 0);
309 fiat_25519_from_bytes(h->v, s);
310 assert_fe(h->v);
311 }
312
fe_frombytes(fe * h,const uint8_t s[32])313 static void fe_frombytes(fe *h, const uint8_t s[32]) {
314 uint8_t s_copy[32];
315 memcpy(s_copy, s, 32);
316 s_copy[31] &= 0x7f;
317 fe_frombytes_strict(h, s_copy);
318 }
319
fe_tobytes(uint8_t s[32],const fe * f)320 static void fe_tobytes(uint8_t s[32], const fe *f) {
321 assert_fe(f->v);
322 fiat_25519_to_bytes(s, f->v);
323 }
324
325 // h = 0
fe_0(fe * h)326 static void fe_0(fe *h) {
327 memset(h, 0, sizeof(fe));
328 }
329
fe_loose_0(fe_loose * h)330 static void fe_loose_0(fe_loose *h) {
331 memset(h, 0, sizeof(fe_loose));
332 }
333
334 // h = 1
fe_1(fe * h)335 static void fe_1(fe *h) {
336 memset(h, 0, sizeof(fe));
337 h->v[0] = 1;
338 }
339
fe_loose_1(fe_loose * h)340 static void fe_loose_1(fe_loose *h) {
341 memset(h, 0, sizeof(fe_loose));
342 h->v[0] = 1;
343 }
344
345 // h = f + g
346 // Can overlap h with f or g.
fe_add(fe_loose * h,const fe * f,const fe * g)347 static void fe_add(fe_loose *h, const fe *f, const fe *g) {
348 assert_fe(f->v);
349 assert_fe(g->v);
350 fiat_25519_add(h->v, f->v, g->v);
351 assert_fe_loose(h->v);
352 }
353
354 // h = f - g
355 // Can overlap h with f or g.
fe_sub(fe_loose * h,const fe * f,const fe * g)356 static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
357 assert_fe(f->v);
358 assert_fe(g->v);
359 fiat_25519_sub(h->v, f->v, g->v);
360 assert_fe_loose(h->v);
361 }
362
fe_carry(fe * h,const fe_loose * f)363 static void fe_carry(fe *h, const fe_loose* f) {
364 assert_fe_loose(f->v);
365 fiat_25519_carry(h->v, f->v);
366 assert_fe(h->v);
367 }
368
fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],const fe_limb_t in1[FE_NUM_LIMBS],const fe_limb_t in2[FE_NUM_LIMBS])369 static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
370 const fe_limb_t in1[FE_NUM_LIMBS],
371 const fe_limb_t in2[FE_NUM_LIMBS]) {
372 assert_fe_loose(in1);
373 assert_fe_loose(in2);
374 fiat_25519_carry_mul(out, in1, in2);
375 assert_fe(out);
376 }
377
fe_mul_ltt(fe_loose * h,const fe * f,const fe * g)378 static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
379 fe_mul_impl(h->v, f->v, g->v);
380 }
381
fe_mul_llt(fe_loose * h,const fe_loose * f,const fe * g)382 static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) {
383 fe_mul_impl(h->v, f->v, g->v);
384 }
385
fe_mul_ttt(fe * h,const fe * f,const fe * g)386 static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
387 fe_mul_impl(h->v, f->v, g->v);
388 }
389
fe_mul_tlt(fe * h,const fe_loose * f,const fe * g)390 static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
391 fe_mul_impl(h->v, f->v, g->v);
392 }
393
fe_mul_ttl(fe * h,const fe * f,const fe_loose * g)394 static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
395 fe_mul_impl(h->v, f->v, g->v);
396 }
397
fe_mul_tll(fe * h,const fe_loose * f,const fe_loose * g)398 static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
399 fe_mul_impl(h->v, f->v, g->v);
400 }
401
fe_sq_tl(fe * h,const fe_loose * f)402 static void fe_sq_tl(fe *h, const fe_loose *f) {
403 assert_fe_loose(f->v);
404 fiat_25519_carry_square(h->v, f->v);
405 assert_fe(h->v);
406 }
407
fe_sq_tt(fe * h,const fe * f)408 static void fe_sq_tt(fe *h, const fe *f) {
409 assert_fe_loose(f->v);
410 fiat_25519_carry_square(h->v, f->v);
411 assert_fe(h->v);
412 }
413
414 // h = -f
fe_neg(fe_loose * h,const fe * f)415 static void fe_neg(fe_loose *h, const fe *f) {
416 assert_fe(f->v);
417 fiat_25519_opp(h->v, f->v);
418 assert_fe_loose(h->v);
419 }
420
421 // Replace (f,g) with (g,g) if b == 1;
422 // replace (f,g) with (f,g) if b == 0.
423 //
424 // Preconditions: b in {0,1}.
fe_cmov(fe_loose * f,const fe_loose * g,fe_limb_t b)425 static void fe_cmov(fe_loose *f, const fe_loose *g, fe_limb_t b) {
426 b = 0-b;
427 unsigned i;
428 for (i = 0; i < FE_NUM_LIMBS; i++) {
429 fe_limb_t x = f->v[i] ^ g->v[i];
430 x &= b;
431 f->v[i] ^= x;
432 }
433 }
434
435 // h = f
fe_copy(fe * h,const fe * f)436 static void fe_copy(fe *h, const fe *f) {
437 memmove(h, f, sizeof(fe));
438 }
439
fe_copy_lt(fe_loose * h,const fe * f)440 static void fe_copy_lt(fe_loose *h, const fe *f) {
441 #ifdef EDWARDS25519_ASSERTS
442 assert(sizeof(fe_loose) == sizeof(fe));
443 #endif
444 memmove(h, f, sizeof(fe));
445 }
446 #if !defined(CONFIG_SMALL)
fe_copy_ll(fe_loose * h,const fe_loose * f)447 static void fe_copy_ll(fe_loose *h, const fe_loose *f) {
448 memmove(h, f, sizeof(fe_loose));
449 }
450 #endif // !defined(CONFIG_SMALL)
451
fe_loose_invert(fe * out,const fe_loose * z)452 static void fe_loose_invert(fe *out, const fe_loose *z) {
453 fe t0;
454 fe t1;
455 fe t2;
456 fe t3;
457 int i;
458
459 fe_sq_tl(&t0, z);
460 fe_sq_tt(&t1, &t0);
461 for (i = 1; i < 2; ++i) {
462 fe_sq_tt(&t1, &t1);
463 }
464 fe_mul_tlt(&t1, z, &t1);
465 fe_mul_ttt(&t0, &t0, &t1);
466 fe_sq_tt(&t2, &t0);
467 fe_mul_ttt(&t1, &t1, &t2);
468 fe_sq_tt(&t2, &t1);
469 for (i = 1; i < 5; ++i) {
470 fe_sq_tt(&t2, &t2);
471 }
472 fe_mul_ttt(&t1, &t2, &t1);
473 fe_sq_tt(&t2, &t1);
474 for (i = 1; i < 10; ++i) {
475 fe_sq_tt(&t2, &t2);
476 }
477 fe_mul_ttt(&t2, &t2, &t1);
478 fe_sq_tt(&t3, &t2);
479 for (i = 1; i < 20; ++i) {
480 fe_sq_tt(&t3, &t3);
481 }
482 fe_mul_ttt(&t2, &t3, &t2);
483 fe_sq_tt(&t2, &t2);
484 for (i = 1; i < 10; ++i) {
485 fe_sq_tt(&t2, &t2);
486 }
487 fe_mul_ttt(&t1, &t2, &t1);
488 fe_sq_tt(&t2, &t1);
489 for (i = 1; i < 50; ++i) {
490 fe_sq_tt(&t2, &t2);
491 }
492 fe_mul_ttt(&t2, &t2, &t1);
493 fe_sq_tt(&t3, &t2);
494 for (i = 1; i < 100; ++i) {
495 fe_sq_tt(&t3, &t3);
496 }
497 fe_mul_ttt(&t2, &t3, &t2);
498 fe_sq_tt(&t2, &t2);
499 for (i = 1; i < 50; ++i) {
500 fe_sq_tt(&t2, &t2);
501 }
502 fe_mul_ttt(&t1, &t2, &t1);
503 fe_sq_tt(&t1, &t1);
504 for (i = 1; i < 5; ++i) {
505 fe_sq_tt(&t1, &t1);
506 }
507 fe_mul_ttt(out, &t1, &t0);
508 }
509
fe_invert(fe * out,const fe * z)510 static void fe_invert(fe *out, const fe *z) {
511 fe_loose l;
512 fe_copy_lt(&l, z);
513 fe_loose_invert(out, &l);
514 }
515
516 // return 0 if f == 0
517 // return 1 if f != 0
fe_isnonzero(const fe_loose * f)518 static int fe_isnonzero(const fe_loose *f) {
519 fe tight;
520 fe_carry(&tight, f);
521 uint8_t s[32];
522 fe_tobytes(s, &tight);
523
524 static const uint8_t zero[32] = {0};
525 return k5_bcmp(s, zero, sizeof(zero)) != 0;
526 }
527
528 // return 1 if f is in {1,3,5,...,q-2}
529 // return 0 if f is in {0,2,4,...,q-1}
fe_isnegative(const fe * f)530 static int fe_isnegative(const fe *f) {
531 uint8_t s[32];
532 fe_tobytes(s, f);
533 return s[0] & 1;
534 }
535
fe_sq2_tt(fe * h,const fe * f)536 static void fe_sq2_tt(fe *h, const fe *f) {
537 // h = f^2
538 fe_sq_tt(h, f);
539
540 // h = h + h
541 fe_loose tmp;
542 fe_add(&tmp, h, h);
543 fe_carry(h, &tmp);
544 }
545
fe_pow22523(fe * out,const fe * z)546 static void fe_pow22523(fe *out, const fe *z) {
547 fe t0;
548 fe t1;
549 fe t2;
550 int i;
551
552 fe_sq_tt(&t0, z);
553 fe_sq_tt(&t1, &t0);
554 for (i = 1; i < 2; ++i) {
555 fe_sq_tt(&t1, &t1);
556 }
557 fe_mul_ttt(&t1, z, &t1);
558 fe_mul_ttt(&t0, &t0, &t1);
559 fe_sq_tt(&t0, &t0);
560 fe_mul_ttt(&t0, &t1, &t0);
561 fe_sq_tt(&t1, &t0);
562 for (i = 1; i < 5; ++i) {
563 fe_sq_tt(&t1, &t1);
564 }
565 fe_mul_ttt(&t0, &t1, &t0);
566 fe_sq_tt(&t1, &t0);
567 for (i = 1; i < 10; ++i) {
568 fe_sq_tt(&t1, &t1);
569 }
570 fe_mul_ttt(&t1, &t1, &t0);
571 fe_sq_tt(&t2, &t1);
572 for (i = 1; i < 20; ++i) {
573 fe_sq_tt(&t2, &t2);
574 }
575 fe_mul_ttt(&t1, &t2, &t1);
576 fe_sq_tt(&t1, &t1);
577 for (i = 1; i < 10; ++i) {
578 fe_sq_tt(&t1, &t1);
579 }
580 fe_mul_ttt(&t0, &t1, &t0);
581 fe_sq_tt(&t1, &t0);
582 for (i = 1; i < 50; ++i) {
583 fe_sq_tt(&t1, &t1);
584 }
585 fe_mul_ttt(&t1, &t1, &t0);
586 fe_sq_tt(&t2, &t1);
587 for (i = 1; i < 100; ++i) {
588 fe_sq_tt(&t2, &t2);
589 }
590 fe_mul_ttt(&t1, &t2, &t1);
591 fe_sq_tt(&t1, &t1);
592 for (i = 1; i < 50; ++i) {
593 fe_sq_tt(&t1, &t1);
594 }
595 fe_mul_ttt(&t0, &t1, &t0);
596 fe_sq_tt(&t0, &t0);
597 for (i = 1; i < 2; ++i) {
598 fe_sq_tt(&t0, &t0);
599 }
600 fe_mul_ttt(out, &t0, z);
601 }
602
603
604 // Group operations.
605
x25519_ge_tobytes(uint8_t s[32],const ge_p2 * h)606 static void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
607 fe recip;
608 fe x;
609 fe y;
610
611 fe_invert(&recip, &h->Z);
612 fe_mul_ttt(&x, &h->X, &recip);
613 fe_mul_ttt(&y, &h->Y, &recip);
614 fe_tobytes(s, &y);
615 s[31] ^= fe_isnegative(&x) << 7;
616 }
617
x25519_ge_frombytes_vartime(ge_p3 * h,const uint8_t s[32])618 static int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
619 fe u;
620 fe_loose v;
621 fe v3;
622 fe vxx;
623 fe_loose check;
624
625 fe_frombytes(&h->Y, s);
626 fe_1(&h->Z);
627 fe_sq_tt(&v3, &h->Y);
628 fe_mul_ttt(&vxx, &v3, &d);
629 fe_sub(&v, &v3, &h->Z); // u = y^2-1
630 fe_carry(&u, &v);
631 fe_add(&v, &vxx, &h->Z); // v = dy^2+1
632
633 fe_sq_tl(&v3, &v);
634 fe_mul_ttl(&v3, &v3, &v); // v3 = v^3
635 fe_sq_tt(&h->X, &v3);
636 fe_mul_ttl(&h->X, &h->X, &v);
637 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7
638
639 fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8)
640 fe_mul_ttt(&h->X, &h->X, &v3);
641 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8)
642
643 fe_sq_tt(&vxx, &h->X);
644 fe_mul_ttl(&vxx, &vxx, &v);
645 fe_sub(&check, &vxx, &u);
646 if (fe_isnonzero(&check)) {
647 fe_add(&check, &vxx, &u);
648 if (fe_isnonzero(&check)) {
649 return 0;
650 }
651 fe_mul_ttt(&h->X, &h->X, &sqrtm1);
652 }
653
654 if (fe_isnegative(&h->X) != (s[31] >> 7)) {
655 fe_loose t;
656 fe_neg(&t, &h->X);
657 fe_carry(&h->X, &t);
658 }
659
660 fe_mul_ttt(&h->T, &h->X, &h->Y);
661 return 1;
662 }
663
ge_p2_0(ge_p2 * h)664 static void ge_p2_0(ge_p2 *h) {
665 fe_0(&h->X);
666 fe_1(&h->Y);
667 fe_1(&h->Z);
668 }
669
ge_p3_0(ge_p3 * h)670 static void ge_p3_0(ge_p3 *h) {
671 fe_0(&h->X);
672 fe_1(&h->Y);
673 fe_1(&h->Z);
674 fe_0(&h->T);
675 }
676
ge_cached_0(ge_cached * h)677 static void ge_cached_0(ge_cached *h) {
678 fe_loose_1(&h->YplusX);
679 fe_loose_1(&h->YminusX);
680 fe_loose_1(&h->Z);
681 fe_loose_0(&h->T2d);
682 }
683
ge_precomp_0(ge_precomp * h)684 static void ge_precomp_0(ge_precomp *h) {
685 fe_loose_1(&h->yplusx);
686 fe_loose_1(&h->yminusx);
687 fe_loose_0(&h->xy2d);
688 }
689
690 // r = p
ge_p3_to_p2(ge_p2 * r,const ge_p3 * p)691 static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
692 fe_copy(&r->X, &p->X);
693 fe_copy(&r->Y, &p->Y);
694 fe_copy(&r->Z, &p->Z);
695 }
696
697 // r = p
x25519_ge_p3_to_cached(ge_cached * r,const ge_p3 * p)698 static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
699 fe_add(&r->YplusX, &p->Y, &p->X);
700 fe_sub(&r->YminusX, &p->Y, &p->X);
701 fe_copy_lt(&r->Z, &p->Z);
702 fe_mul_ltt(&r->T2d, &p->T, &d2);
703 }
704
705 // r = p
x25519_ge_p1p1_to_p2(ge_p2 * r,const ge_p1p1 * p)706 static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
707 fe_mul_tll(&r->X, &p->X, &p->T);
708 fe_mul_tll(&r->Y, &p->Y, &p->Z);
709 fe_mul_tll(&r->Z, &p->Z, &p->T);
710 }
711
712 // r = p
x25519_ge_p1p1_to_p3(ge_p3 * r,const ge_p1p1 * p)713 static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
714 fe_mul_tll(&r->X, &p->X, &p->T);
715 fe_mul_tll(&r->Y, &p->Y, &p->Z);
716 fe_mul_tll(&r->Z, &p->Z, &p->T);
717 fe_mul_tll(&r->T, &p->X, &p->Y);
718 }
719
720 // r = p
ge_p1p1_to_cached(ge_cached * r,const ge_p1p1 * p)721 static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
722 ge_p3 t;
723 x25519_ge_p1p1_to_p3(&t, p);
724 x25519_ge_p3_to_cached(r, &t);
725 }
726
727 // r = 2 * p
ge_p2_dbl(ge_p1p1 * r,const ge_p2 * p)728 static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
729 fe trX, trZ, trT;
730 fe t0;
731
732 fe_sq_tt(&trX, &p->X);
733 fe_sq_tt(&trZ, &p->Y);
734 fe_sq2_tt(&trT, &p->Z);
735 fe_add(&r->Y, &p->X, &p->Y);
736 fe_sq_tl(&t0, &r->Y);
737
738 fe_add(&r->Y, &trZ, &trX);
739 fe_sub(&r->Z, &trZ, &trX);
740 fe_carry(&trZ, &r->Y);
741 fe_sub(&r->X, &t0, &trZ);
742 fe_carry(&trZ, &r->Z);
743 fe_sub(&r->T, &trT, &trZ);
744 }
745
746 #ifndef CONFIG_SMALL
747 // r = 2 * p
ge_p3_dbl(ge_p1p1 * r,const ge_p3 * p)748 static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
749 ge_p2 q;
750 ge_p3_to_p2(&q, p);
751 ge_p2_dbl(r, &q);
752 }
753 #endif
754
755 // r = p + q
ge_madd(ge_p1p1 * r,const ge_p3 * p,const ge_precomp * q)756 static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
757 fe trY, trZ, trT;
758
759 fe_add(&r->X, &p->Y, &p->X);
760 fe_sub(&r->Y, &p->Y, &p->X);
761 fe_mul_tll(&trZ, &r->X, &q->yplusx);
762 fe_mul_tll(&trY, &r->Y, &q->yminusx);
763 fe_mul_tlt(&trT, &q->xy2d, &p->T);
764 fe_add(&r->T, &p->Z, &p->Z);
765 fe_sub(&r->X, &trZ, &trY);
766 fe_add(&r->Y, &trZ, &trY);
767 fe_carry(&trZ, &r->T);
768 fe_add(&r->Z, &trZ, &trT);
769 fe_sub(&r->T, &trZ, &trT);
770 }
771
772 // r = p + q
x25519_ge_add(ge_p1p1 * r,const ge_p3 * p,const ge_cached * q)773 static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
774 fe trX, trY, trZ, trT;
775
776 fe_add(&r->X, &p->Y, &p->X);
777 fe_sub(&r->Y, &p->Y, &p->X);
778 fe_mul_tll(&trZ, &r->X, &q->YplusX);
779 fe_mul_tll(&trY, &r->Y, &q->YminusX);
780 fe_mul_tlt(&trT, &q->T2d, &p->T);
781 fe_mul_ttl(&trX, &p->Z, &q->Z);
782 fe_add(&r->T, &trX, &trX);
783 fe_sub(&r->X, &trZ, &trY);
784 fe_add(&r->Y, &trZ, &trY);
785 fe_carry(&trZ, &r->T);
786 fe_add(&r->Z, &trZ, &trT);
787 fe_sub(&r->T, &trZ, &trT);
788 }
789
790 // r = p - q
x25519_ge_sub(ge_p1p1 * r,const ge_p3 * p,const ge_cached * q)791 static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
792 fe trX, trY, trZ, trT;
793
794 fe_add(&r->X, &p->Y, &p->X);
795 fe_sub(&r->Y, &p->Y, &p->X);
796 fe_mul_tll(&trZ, &r->X, &q->YminusX);
797 fe_mul_tll(&trY, &r->Y, &q->YplusX);
798 fe_mul_tlt(&trT, &q->T2d, &p->T);
799 fe_mul_ttl(&trX, &p->Z, &q->Z);
800 fe_add(&r->T, &trX, &trX);
801 fe_sub(&r->X, &trZ, &trY);
802 fe_add(&r->Y, &trZ, &trY);
803 fe_carry(&trZ, &r->T);
804 fe_sub(&r->Z, &trZ, &trT);
805 fe_add(&r->T, &trZ, &trT);
806 }
807
equal(signed char b,signed char c)808 static uint8_t equal(signed char b, signed char c) {
809 uint8_t ub = b;
810 uint8_t uc = c;
811 uint8_t x = ub ^ uc; // 0: yes; 1..255: no
812 uint32_t y = x; // 0: yes; 1..255: no
813 y -= 1; // 4294967295: yes; 0..254: no
814 y >>= 31; // 1: yes; 0: no
815 return y;
816 }
817
cmov(ge_precomp * t,const ge_precomp * u,uint8_t b)818 static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
819 fe_cmov(&t->yplusx, &u->yplusx, b);
820 fe_cmov(&t->yminusx, &u->yminusx, b);
821 fe_cmov(&t->xy2d, &u->xy2d, b);
822 }
823
x25519_ge_scalarmult_small_precomp(ge_p3 * h,const uint8_t a[32],const uint8_t precomp_table[15* 2* 32])824 static void x25519_ge_scalarmult_small_precomp(
825 ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
826 // precomp_table is first expanded into matching |ge_precomp|
827 // elements.
828 ge_precomp multiples[15];
829
830 unsigned i;
831 for (i = 0; i < 15; i++) {
832 // The precomputed table is assumed to already clear the top bit, so
833 // |fe_frombytes_strict| may be used directly.
834 const uint8_t *bytes = &precomp_table[i*(2 * 32)];
835 fe x, y;
836 fe_frombytes_strict(&x, bytes);
837 fe_frombytes_strict(&y, bytes + 32);
838
839 ge_precomp *out = &multiples[i];
840 fe_add(&out->yplusx, &y, &x);
841 fe_sub(&out->yminusx, &y, &x);
842 fe_mul_ltt(&out->xy2d, &x, &y);
843 fe_mul_llt(&out->xy2d, &out->xy2d, &d2);
844 }
845
846 // See the comment above |k25519SmallPrecomp| about the structure of the
847 // precomputed elements. This loop does 64 additions and 64 doublings to
848 // calculate the result.
849 ge_p3_0(h);
850
851 for (i = 63; i < 64; i--) {
852 unsigned j;
853 signed char index = 0;
854
855 for (j = 0; j < 4; j++) {
856 const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));
857 index |= (bit << j);
858 }
859
860 ge_precomp e;
861 ge_precomp_0(&e);
862
863 for (j = 1; j < 16; j++) {
864 cmov(&e, &multiples[j-1], equal(index, j));
865 }
866
867 ge_cached cached;
868 ge_p1p1 r;
869 x25519_ge_p3_to_cached(&cached, h);
870 x25519_ge_add(&r, h, &cached);
871 x25519_ge_p1p1_to_p3(h, &r);
872
873 ge_madd(&r, h, &e);
874 x25519_ge_p1p1_to_p3(h, &r);
875 }
876 }
877
878 #if defined(CONFIG_SMALL)
879
x25519_ge_scalarmult_base(ge_p3 * h,const uint8_t a[32])880 static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
881 x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);
882 }
883
884 #else
885
negative(signed char b)886 static uint8_t negative(signed char b) {
887 uint32_t x = b;
888 x >>= 31; // 1: yes; 0: no
889 return x;
890 }
891
table_select(ge_precomp * t,int pos,signed char b)892 static void table_select(ge_precomp *t, int pos, signed char b) {
893 ge_precomp minust;
894 uint8_t bnegative = negative(b);
895 uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
896
897 ge_precomp_0(t);
898 cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
899 cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
900 cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
901 cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
902 cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
903 cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
904 cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
905 cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
906 fe_copy_ll(&minust.yplusx, &t->yminusx);
907 fe_copy_ll(&minust.yminusx, &t->yplusx);
908
909 // NOTE: the input table is canonical, but types don't encode it
910 fe tmp;
911 fe_carry(&tmp, &t->xy2d);
912 fe_neg(&minust.xy2d, &tmp);
913
914 cmov(t, &minust, bnegative);
915 }
916
917 // h = a * B
918 // where a = a[0]+256*a[1]+...+256^31 a[31]
919 // B is the Ed25519 base point (x,4/5) with x positive.
920 //
921 // Preconditions:
922 // a[31] <= 127
x25519_ge_scalarmult_base(ge_p3 * h,const uint8_t * a)923 static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
924 signed char e[64];
925 signed char carry;
926 ge_p1p1 r;
927 ge_p2 s;
928 ge_precomp t;
929 int i;
930
931 for (i = 0; i < 32; ++i) {
932 e[2 * i + 0] = (a[i] >> 0) & 15;
933 e[2 * i + 1] = (a[i] >> 4) & 15;
934 }
935 // each e[i] is between 0 and 15
936 // e[63] is between 0 and 7
937
938 carry = 0;
939 for (i = 0; i < 63; ++i) {
940 e[i] += carry;
941 carry = e[i] + 8;
942 carry >>= 4;
943 e[i] -= carry << 4;
944 }
945 e[63] += carry;
946 // each e[i] is between -8 and 8
947
948 ge_p3_0(h);
949 for (i = 1; i < 64; i += 2) {
950 table_select(&t, i / 2, e[i]);
951 ge_madd(&r, h, &t);
952 x25519_ge_p1p1_to_p3(h, &r);
953 }
954
955 ge_p3_dbl(&r, h);
956 x25519_ge_p1p1_to_p2(&s, &r);
957 ge_p2_dbl(&r, &s);
958 x25519_ge_p1p1_to_p2(&s, &r);
959 ge_p2_dbl(&r, &s);
960 x25519_ge_p1p1_to_p2(&s, &r);
961 ge_p2_dbl(&r, &s);
962 x25519_ge_p1p1_to_p3(h, &r);
963
964 for (i = 0; i < 64; i += 2) {
965 table_select(&t, i / 2, e[i]);
966 ge_madd(&r, h, &t);
967 x25519_ge_p1p1_to_p3(h, &r);
968 }
969 }
970
971 #endif
972
cmov_cached(ge_cached * t,ge_cached * u,uint8_t b)973 static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
974 fe_cmov(&t->YplusX, &u->YplusX, b);
975 fe_cmov(&t->YminusX, &u->YminusX, b);
976 fe_cmov(&t->Z, &u->Z, b);
977 fe_cmov(&t->T2d, &u->T2d, b);
978 }
979
980 // r = scalar * A.
981 // where a = a[0]+256*a[1]+...+256^31 a[31].
x25519_ge_scalarmult(ge_p2 * r,const uint8_t * scalar,const ge_p3 * A)982 static void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar,
983 const ge_p3 *A) {
984 ge_p2 Ai_p2[8];
985 ge_cached Ai[16];
986 ge_p1p1 t;
987
988 ge_cached_0(&Ai[0]);
989 x25519_ge_p3_to_cached(&Ai[1], A);
990 ge_p3_to_p2(&Ai_p2[1], A);
991
992 unsigned i;
993 for (i = 2; i < 16; i += 2) {
994 ge_p2_dbl(&t, &Ai_p2[i / 2]);
995 ge_p1p1_to_cached(&Ai[i], &t);
996 if (i < 8) {
997 x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
998 }
999 x25519_ge_add(&t, A, &Ai[i]);
1000 ge_p1p1_to_cached(&Ai[i + 1], &t);
1001 if (i < 7) {
1002 x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
1003 }
1004 }
1005
1006 ge_p2_0(r);
1007 ge_p3 u;
1008
1009 for (i = 0; i < 256; i += 4) {
1010 ge_p2_dbl(&t, r);
1011 x25519_ge_p1p1_to_p2(r, &t);
1012 ge_p2_dbl(&t, r);
1013 x25519_ge_p1p1_to_p2(r, &t);
1014 ge_p2_dbl(&t, r);
1015 x25519_ge_p1p1_to_p2(r, &t);
1016 ge_p2_dbl(&t, r);
1017 x25519_ge_p1p1_to_p3(&u, &t);
1018
1019 uint8_t index = scalar[31 - i/8];
1020 index >>= 4 - (i & 4);
1021 index &= 0xf;
1022
1023 unsigned j;
1024 ge_cached selected;
1025 ge_cached_0(&selected);
1026 for (j = 0; j < 16; j++) {
1027 cmov_cached(&selected, &Ai[j], equal(j, index));
1028 }
1029
1030 x25519_ge_add(&t, &u, &selected);
1031 x25519_ge_p1p1_to_p2(r, &t);
1032 }
1033 }
1034
1035 // int64_lshift21 returns |a << 21| but is defined when shifting bits into the
1036 // sign bit. This works around a language flaw in C.
int64_lshift21(int64_t a)1037 static inline int64_t int64_lshift21(int64_t a) {
1038 return (int64_t)((uint64_t)a << 21);
1039 }
1040
1041 // The set of scalars is \Z/l
1042 // where l = 2^252 + 27742317777372353535851937790883648493.
1043
1044 // Input:
1045 // s[0]+256*s[1]+...+256^63*s[63] = s
1046 //
1047 // Output:
1048 // s[0]+256*s[1]+...+256^31*s[31] = s mod l
1049 // where l = 2^252 + 27742317777372353535851937790883648493.
1050 // Overwrites s in place.
x25519_sc_reduce(uint8_t s[64])1051 static void x25519_sc_reduce(uint8_t s[64]) {
1052 int64_t s0 = 2097151 & load_3(s);
1053 int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
1054 int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
1055 int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
1056 int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
1057 int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
1058 int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
1059 int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
1060 int64_t s8 = 2097151 & load_3(s + 21);
1061 int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
1062 int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1063 int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1064 int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1065 int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1066 int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1067 int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1068 int64_t s16 = 2097151 & load_3(s + 42);
1069 int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1070 int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1071 int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1072 int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1073 int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1074 int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1075 int64_t s23 = (load_4(s + 60) >> 3);
1076 int64_t carry0;
1077 int64_t carry1;
1078 int64_t carry2;
1079 int64_t carry3;
1080 int64_t carry4;
1081 int64_t carry5;
1082 int64_t carry6;
1083 int64_t carry7;
1084 int64_t carry8;
1085 int64_t carry9;
1086 int64_t carry10;
1087 int64_t carry11;
1088 int64_t carry12;
1089 int64_t carry13;
1090 int64_t carry14;
1091 int64_t carry15;
1092 int64_t carry16;
1093
1094 s11 += s23 * 666643;
1095 s12 += s23 * 470296;
1096 s13 += s23 * 654183;
1097 s14 -= s23 * 997805;
1098 s15 += s23 * 136657;
1099 s16 -= s23 * 683901;
1100 s23 = 0;
1101
1102 s10 += s22 * 666643;
1103 s11 += s22 * 470296;
1104 s12 += s22 * 654183;
1105 s13 -= s22 * 997805;
1106 s14 += s22 * 136657;
1107 s15 -= s22 * 683901;
1108 s22 = 0;
1109
1110 s9 += s21 * 666643;
1111 s10 += s21 * 470296;
1112 s11 += s21 * 654183;
1113 s12 -= s21 * 997805;
1114 s13 += s21 * 136657;
1115 s14 -= s21 * 683901;
1116 s21 = 0;
1117
1118 s8 += s20 * 666643;
1119 s9 += s20 * 470296;
1120 s10 += s20 * 654183;
1121 s11 -= s20 * 997805;
1122 s12 += s20 * 136657;
1123 s13 -= s20 * 683901;
1124 s20 = 0;
1125
1126 s7 += s19 * 666643;
1127 s8 += s19 * 470296;
1128 s9 += s19 * 654183;
1129 s10 -= s19 * 997805;
1130 s11 += s19 * 136657;
1131 s12 -= s19 * 683901;
1132 s19 = 0;
1133
1134 s6 += s18 * 666643;
1135 s7 += s18 * 470296;
1136 s8 += s18 * 654183;
1137 s9 -= s18 * 997805;
1138 s10 += s18 * 136657;
1139 s11 -= s18 * 683901;
1140 s18 = 0;
1141
1142 carry6 = (s6 + (1 << 20)) >> 21;
1143 s7 += carry6;
1144 s6 -= int64_lshift21(carry6);
1145 carry8 = (s8 + (1 << 20)) >> 21;
1146 s9 += carry8;
1147 s8 -= int64_lshift21(carry8);
1148 carry10 = (s10 + (1 << 20)) >> 21;
1149 s11 += carry10;
1150 s10 -= int64_lshift21(carry10);
1151 carry12 = (s12 + (1 << 20)) >> 21;
1152 s13 += carry12;
1153 s12 -= int64_lshift21(carry12);
1154 carry14 = (s14 + (1 << 20)) >> 21;
1155 s15 += carry14;
1156 s14 -= int64_lshift21(carry14);
1157 carry16 = (s16 + (1 << 20)) >> 21;
1158 s17 += carry16;
1159 s16 -= int64_lshift21(carry16);
1160
1161 carry7 = (s7 + (1 << 20)) >> 21;
1162 s8 += carry7;
1163 s7 -= int64_lshift21(carry7);
1164 carry9 = (s9 + (1 << 20)) >> 21;
1165 s10 += carry9;
1166 s9 -= int64_lshift21(carry9);
1167 carry11 = (s11 + (1 << 20)) >> 21;
1168 s12 += carry11;
1169 s11 -= int64_lshift21(carry11);
1170 carry13 = (s13 + (1 << 20)) >> 21;
1171 s14 += carry13;
1172 s13 -= int64_lshift21(carry13);
1173 carry15 = (s15 + (1 << 20)) >> 21;
1174 s16 += carry15;
1175 s15 -= int64_lshift21(carry15);
1176
1177 s5 += s17 * 666643;
1178 s6 += s17 * 470296;
1179 s7 += s17 * 654183;
1180 s8 -= s17 * 997805;
1181 s9 += s17 * 136657;
1182 s10 -= s17 * 683901;
1183 s17 = 0;
1184
1185 s4 += s16 * 666643;
1186 s5 += s16 * 470296;
1187 s6 += s16 * 654183;
1188 s7 -= s16 * 997805;
1189 s8 += s16 * 136657;
1190 s9 -= s16 * 683901;
1191 s16 = 0;
1192
1193 s3 += s15 * 666643;
1194 s4 += s15 * 470296;
1195 s5 += s15 * 654183;
1196 s6 -= s15 * 997805;
1197 s7 += s15 * 136657;
1198 s8 -= s15 * 683901;
1199 s15 = 0;
1200
1201 s2 += s14 * 666643;
1202 s3 += s14 * 470296;
1203 s4 += s14 * 654183;
1204 s5 -= s14 * 997805;
1205 s6 += s14 * 136657;
1206 s7 -= s14 * 683901;
1207 s14 = 0;
1208
1209 s1 += s13 * 666643;
1210 s2 += s13 * 470296;
1211 s3 += s13 * 654183;
1212 s4 -= s13 * 997805;
1213 s5 += s13 * 136657;
1214 s6 -= s13 * 683901;
1215 s13 = 0;
1216
1217 s0 += s12 * 666643;
1218 s1 += s12 * 470296;
1219 s2 += s12 * 654183;
1220 s3 -= s12 * 997805;
1221 s4 += s12 * 136657;
1222 s5 -= s12 * 683901;
1223 s12 = 0;
1224
1225 carry0 = (s0 + (1 << 20)) >> 21;
1226 s1 += carry0;
1227 s0 -= int64_lshift21(carry0);
1228 carry2 = (s2 + (1 << 20)) >> 21;
1229 s3 += carry2;
1230 s2 -= int64_lshift21(carry2);
1231 carry4 = (s4 + (1 << 20)) >> 21;
1232 s5 += carry4;
1233 s4 -= int64_lshift21(carry4);
1234 carry6 = (s6 + (1 << 20)) >> 21;
1235 s7 += carry6;
1236 s6 -= int64_lshift21(carry6);
1237 carry8 = (s8 + (1 << 20)) >> 21;
1238 s9 += carry8;
1239 s8 -= int64_lshift21(carry8);
1240 carry10 = (s10 + (1 << 20)) >> 21;
1241 s11 += carry10;
1242 s10 -= int64_lshift21(carry10);
1243
1244 carry1 = (s1 + (1 << 20)) >> 21;
1245 s2 += carry1;
1246 s1 -= int64_lshift21(carry1);
1247 carry3 = (s3 + (1 << 20)) >> 21;
1248 s4 += carry3;
1249 s3 -= int64_lshift21(carry3);
1250 carry5 = (s5 + (1 << 20)) >> 21;
1251 s6 += carry5;
1252 s5 -= int64_lshift21(carry5);
1253 carry7 = (s7 + (1 << 20)) >> 21;
1254 s8 += carry7;
1255 s7 -= int64_lshift21(carry7);
1256 carry9 = (s9 + (1 << 20)) >> 21;
1257 s10 += carry9;
1258 s9 -= int64_lshift21(carry9);
1259 carry11 = (s11 + (1 << 20)) >> 21;
1260 s12 += carry11;
1261 s11 -= int64_lshift21(carry11);
1262
1263 s0 += s12 * 666643;
1264 s1 += s12 * 470296;
1265 s2 += s12 * 654183;
1266 s3 -= s12 * 997805;
1267 s4 += s12 * 136657;
1268 s5 -= s12 * 683901;
1269 s12 = 0;
1270
1271 carry0 = s0 >> 21;
1272 s1 += carry0;
1273 s0 -= int64_lshift21(carry0);
1274 carry1 = s1 >> 21;
1275 s2 += carry1;
1276 s1 -= int64_lshift21(carry1);
1277 carry2 = s2 >> 21;
1278 s3 += carry2;
1279 s2 -= int64_lshift21(carry2);
1280 carry3 = s3 >> 21;
1281 s4 += carry3;
1282 s3 -= int64_lshift21(carry3);
1283 carry4 = s4 >> 21;
1284 s5 += carry4;
1285 s4 -= int64_lshift21(carry4);
1286 carry5 = s5 >> 21;
1287 s6 += carry5;
1288 s5 -= int64_lshift21(carry5);
1289 carry6 = s6 >> 21;
1290 s7 += carry6;
1291 s6 -= int64_lshift21(carry6);
1292 carry7 = s7 >> 21;
1293 s8 += carry7;
1294 s7 -= int64_lshift21(carry7);
1295 carry8 = s8 >> 21;
1296 s9 += carry8;
1297 s8 -= int64_lshift21(carry8);
1298 carry9 = s9 >> 21;
1299 s10 += carry9;
1300 s9 -= int64_lshift21(carry9);
1301 carry10 = s10 >> 21;
1302 s11 += carry10;
1303 s10 -= int64_lshift21(carry10);
1304 carry11 = s11 >> 21;
1305 s12 += carry11;
1306 s11 -= int64_lshift21(carry11);
1307
1308 s0 += s12 * 666643;
1309 s1 += s12 * 470296;
1310 s2 += s12 * 654183;
1311 s3 -= s12 * 997805;
1312 s4 += s12 * 136657;
1313 s5 -= s12 * 683901;
1314 s12 = 0;
1315
1316 carry0 = s0 >> 21;
1317 s1 += carry0;
1318 s0 -= int64_lshift21(carry0);
1319 carry1 = s1 >> 21;
1320 s2 += carry1;
1321 s1 -= int64_lshift21(carry1);
1322 carry2 = s2 >> 21;
1323 s3 += carry2;
1324 s2 -= int64_lshift21(carry2);
1325 carry3 = s3 >> 21;
1326 s4 += carry3;
1327 s3 -= int64_lshift21(carry3);
1328 carry4 = s4 >> 21;
1329 s5 += carry4;
1330 s4 -= int64_lshift21(carry4);
1331 carry5 = s5 >> 21;
1332 s6 += carry5;
1333 s5 -= int64_lshift21(carry5);
1334 carry6 = s6 >> 21;
1335 s7 += carry6;
1336 s6 -= int64_lshift21(carry6);
1337 carry7 = s7 >> 21;
1338 s8 += carry7;
1339 s7 -= int64_lshift21(carry7);
1340 carry8 = s8 >> 21;
1341 s9 += carry8;
1342 s8 -= int64_lshift21(carry8);
1343 carry9 = s9 >> 21;
1344 s10 += carry9;
1345 s9 -= int64_lshift21(carry9);
1346 carry10 = s10 >> 21;
1347 s11 += carry10;
1348 s10 -= int64_lshift21(carry10);
1349
1350 s[0] = s0 >> 0;
1351 s[1] = s0 >> 8;
1352 s[2] = (s0 >> 16) | (s1 << 5);
1353 s[3] = s1 >> 3;
1354 s[4] = s1 >> 11;
1355 s[5] = (s1 >> 19) | (s2 << 2);
1356 s[6] = s2 >> 6;
1357 s[7] = (s2 >> 14) | (s3 << 7);
1358 s[8] = s3 >> 1;
1359 s[9] = s3 >> 9;
1360 s[10] = (s3 >> 17) | (s4 << 4);
1361 s[11] = s4 >> 4;
1362 s[12] = s4 >> 12;
1363 s[13] = (s4 >> 20) | (s5 << 1);
1364 s[14] = s5 >> 7;
1365 s[15] = (s5 >> 15) | (s6 << 6);
1366 s[16] = s6 >> 2;
1367 s[17] = s6 >> 10;
1368 s[18] = (s6 >> 18) | (s7 << 3);
1369 s[19] = s7 >> 5;
1370 s[20] = s7 >> 13;
1371 s[21] = s8 >> 0;
1372 s[22] = s8 >> 8;
1373 s[23] = (s8 >> 16) | (s9 << 5);
1374 s[24] = s9 >> 3;
1375 s[25] = s9 >> 11;
1376 s[26] = (s9 >> 19) | (s10 << 2);
1377 s[27] = s10 >> 6;
1378 s[28] = (s10 >> 14) | (s11 << 7);
1379 s[29] = s11 >> 1;
1380 s[30] = s11 >> 9;
1381 s[31] = s11 >> 17;
1382 }
1383
1384 /* Loosely from BoringSSL crypto/curve25519/spake25519.c */
1385
1386 /*
1387 * Here BoringSSL uses different points, not restricted to the generator
1388 * subgroup, while we use the draft-irtf-cfrg-spake2-05 points. The Python
1389 * code is modified to add the subgroup restriction.
1390 */
1391
1392 // The following precomputation tables are for the following
1393 // points:
1394 //
1395 // N (found in 7 iterations):
1396 // x: 10742253510813957597047979962966927467575235974254765187031601461055699024931
1397 // y: 19796686047937480651099107989427797822652529149428697746066532921705571401683
1398 // encoded: d3bfb518f44f3430f29d0c92af503865a1ed3281dc69b35dd868ba85f886c4ab
1399 //
1400 // M (found in 21 iterations):
1401 // x: 8158688967149231307266666683326742915289288280191350817196911733632187385319
1402 // y: 21622333750659878624441478467798461427617029906629724657331223068277098105040
1403 // encoded: d048032c6ea0b6d697ddc2e86bda85a33adac920f1bf18e1b0c6d166a5cecdaf
1404 //
1405 // These points and their precomputation tables are generated with the
1406 // following Python code.
1407
1408 /*
1409 import hashlib
1410 import ed25519 as E # https://ed25519.cr.yp.to/python/ed25519.py
1411
1412 SEED_N = 'edwards25519 point generation seed (N)'
1413 SEED_M = 'edwards25519 point generation seed (M)'
1414
1415 def genpoint(seed):
1416 v = hashlib.sha256(seed).digest()
1417 it = 1
1418 while True:
1419 try:
1420 x,y = E.decodepoint(v)
1421 if E.scalarmult((x,y), E.l) != [0, 1]:
1422 raise Exception('point has wrong order')
1423 except Exception, e:
1424 print e
1425 it += 1
1426 v = hashlib.sha256(v).digest()
1427 continue
1428 print "Found in %d iterations:" % it
1429 print " x = %d" % x
1430 print " y = %d" % y
1431 print " Encoded (hex)"
1432 print E.encodepoint((x,y)).encode('hex')
1433 return (x,y)
1434
1435 def gentable(P):
1436 t = []
1437 for i in range(1,16):
1438 k = (i >> 3 & 1) * (1 << 192) + \
1439 (i >> 2 & 1) * (1 << 128) + \
1440 (i >> 1 & 1) * (1 << 64) + \
1441 (i & 1)
1442 t.append(E.scalarmult(P, k))
1443 return ''.join(E.encodeint(x) + E.encodeint(y) for (x,y) in t)
1444
1445 def printtable(table, name):
1446 print "static const uint8_t %s[15 * 2 * 32] = {" % name,
1447 for i in range(15 * 2 * 32):
1448 if i % 12 == 0:
1449 print "\n ",
1450 print " 0x%02x," % ord(table[i]),
1451 print "\n};"
1452
1453 if __name__ == "__main__":
1454 print "Searching for N"
1455 N = genpoint(SEED_N)
1456 print "Generating precomputation table for N"
1457 Ntable = gentable(N)
1458 printtable(Ntable, "kSpakeNSmallPrecomp")
1459
1460 print "Searching for M"
1461 M = genpoint(SEED_M)
1462 print "Generating precomputation table for M"
1463 Mtable = gentable(M)
1464 printtable(Mtable, "kSpakeMSmallPrecomp")
1465 */
1466
1467 static const uint8_t kSpakeNSmallPrecomp[15 * 2 * 32] = {
1468 0x23, 0xfc, 0x27, 0x6c, 0x55, 0xaf, 0xb3, 0x9c, 0xd8, 0x99, 0x3a, 0x0d,
1469 0x7f, 0x08, 0xc9, 0xeb, 0x4d, 0x6e, 0x90, 0x99, 0x2f, 0x3c, 0x15, 0x2b,
1470 0x89, 0x5a, 0x0f, 0xf2, 0x67, 0xe6, 0xbf, 0x17, 0xd3, 0xbf, 0xb5, 0x18,
1471 0xf4, 0x4f, 0x34, 0x30, 0xf2, 0x9d, 0x0c, 0x92, 0xaf, 0x50, 0x38, 0x65,
1472 0xa1, 0xed, 0x32, 0x81, 0xdc, 0x69, 0xb3, 0x5d, 0xd8, 0x68, 0xba, 0x85,
1473 0xf8, 0x86, 0xc4, 0x2b, 0x53, 0x93, 0xb1, 0x99, 0x90, 0x30, 0xca, 0xb0,
1474 0xbd, 0xea, 0x14, 0x4c, 0x6f, 0x2b, 0x81, 0x1e, 0x23, 0x45, 0xb2, 0x32,
1475 0x2e, 0x2d, 0xe6, 0xb8, 0x5d, 0xc5, 0x15, 0x91, 0x63, 0x39, 0x18, 0x5b,
1476 0x62, 0x63, 0x9b, 0xf4, 0x8b, 0xe0, 0x34, 0xa2, 0x95, 0x11, 0x92, 0x68,
1477 0x54, 0xb7, 0xf3, 0x91, 0xca, 0x22, 0xad, 0x08, 0xd8, 0x9c, 0xa2, 0xf0,
1478 0xdc, 0x9c, 0x2c, 0x84, 0x32, 0x26, 0xe0, 0x17, 0x89, 0x53, 0x6b, 0xfd,
1479 0x76, 0x97, 0x25, 0xea, 0x99, 0x94, 0xf8, 0x29, 0x7c, 0xc4, 0x53, 0xc0,
1480 0x98, 0x9a, 0x20, 0xdc, 0x70, 0x01, 0x50, 0xaa, 0x05, 0xa3, 0x40, 0x50,
1481 0x66, 0x87, 0x30, 0x19, 0x12, 0xc3, 0xb8, 0x2d, 0x28, 0x8b, 0x7b, 0x48,
1482 0xf7, 0x7b, 0xab, 0x45, 0x70, 0x2e, 0xbb, 0x85, 0xc1, 0x6c, 0xdd, 0x35,
1483 0x00, 0x83, 0x20, 0x13, 0x82, 0x08, 0xaa, 0xa3, 0x03, 0x0f, 0xca, 0x27,
1484 0x3e, 0x8b, 0x52, 0xc2, 0xd7, 0xb1, 0x8c, 0x22, 0xfe, 0x04, 0x4a, 0xf2,
1485 0xe8, 0xac, 0xee, 0x2e, 0xd7, 0x77, 0x34, 0x49, 0xf2, 0xe9, 0xeb, 0x8c,
1486 0xa6, 0xc8, 0xc6, 0xcd, 0x8a, 0x8f, 0x7c, 0x5d, 0x51, 0xc8, 0xfa, 0x6f,
1487 0xb3, 0x93, 0xdb, 0x71, 0xef, 0x3e, 0x6e, 0xa7, 0x85, 0xc7, 0xd4, 0x3e,
1488 0xa2, 0xe2, 0xc0, 0xaa, 0x17, 0xb3, 0xa4, 0x7c, 0xc2, 0x3f, 0x7c, 0x7a,
1489 0xdd, 0x26, 0xde, 0x3e, 0xf1, 0x99, 0x06, 0xf7, 0x69, 0x1b, 0xc9, 0x20,
1490 0x55, 0x4f, 0x86, 0x7a, 0x93, 0x89, 0x68, 0xe9, 0x2b, 0x2d, 0xbc, 0x08,
1491 0x15, 0x5d, 0x2d, 0x0b, 0x4f, 0x1a, 0xb3, 0xd4, 0x8e, 0x77, 0x79, 0x2a,
1492 0x25, 0xf9, 0xb6, 0x46, 0xfb, 0x87, 0x02, 0xa6, 0xe0, 0xd3, 0xba, 0x84,
1493 0xea, 0x3e, 0x58, 0xa5, 0x7f, 0x8f, 0x8c, 0x39, 0x79, 0x28, 0xb5, 0xcf,
1494 0xe4, 0xca, 0x63, 0xdc, 0xac, 0xed, 0x4b, 0x74, 0x1e, 0x94, 0x85, 0x8c,
1495 0xe5, 0xf4, 0x76, 0x6f, 0x20, 0x67, 0x8b, 0xd8, 0xd6, 0x4b, 0xe7, 0x2d,
1496 0xa0, 0xbd, 0xcc, 0x1f, 0xdf, 0x46, 0x9c, 0xa2, 0x49, 0x64, 0xdf, 0x24,
1497 0x00, 0x11, 0x11, 0x45, 0x62, 0x5c, 0xd7, 0x8a, 0x00, 0x02, 0xf5, 0x9b,
1498 0x4f, 0x53, 0x42, 0xc5, 0xd5, 0x55, 0x80, 0x73, 0x9a, 0x5b, 0x31, 0x5a,
1499 0xbd, 0x3a, 0x43, 0xe9, 0x33, 0xe5, 0xaf, 0x1d, 0x92, 0x5e, 0x59, 0x37,
1500 0xae, 0x57, 0xfa, 0x3b, 0xd2, 0x31, 0xae, 0xa6, 0xf9, 0xc9, 0xc1, 0x82,
1501 0xa6, 0xa5, 0xed, 0x24, 0x53, 0x4b, 0x38, 0x22, 0xf2, 0x85, 0x8d, 0x13,
1502 0xa6, 0x5e, 0xd6, 0x57, 0x17, 0xd3, 0x33, 0x38, 0x8d, 0x65, 0xd3, 0xcb,
1503 0x1a, 0xa2, 0x3a, 0x2b, 0xbb, 0x61, 0x53, 0xd7, 0xff, 0xcd, 0x20, 0xb6,
1504 0xbb, 0x8c, 0xab, 0x63, 0xef, 0xb8, 0x26, 0x7e, 0x81, 0x65, 0xaf, 0x90,
1505 0xfc, 0xd2, 0xb6, 0x72, 0xdb, 0xe9, 0x23, 0x78, 0x12, 0x04, 0xc0, 0x03,
1506 0x82, 0xa8, 0x7a, 0x0f, 0x48, 0x6f, 0x82, 0x7f, 0x81, 0xcd, 0xa7, 0x89,
1507 0xdd, 0x86, 0xea, 0x5e, 0xa1, 0x50, 0x14, 0x34, 0x17, 0x64, 0x82, 0x0f,
1508 0xc4, 0x40, 0x20, 0x1d, 0x8f, 0xfe, 0xfa, 0x99, 0xaf, 0x5b, 0xc1, 0x5d,
1509 0xc8, 0x47, 0x07, 0x54, 0x4a, 0x22, 0x56, 0x57, 0xf1, 0x2c, 0x3b, 0x62,
1510 0x7f, 0x12, 0x62, 0xaf, 0xfd, 0xf8, 0x04, 0x11, 0xa8, 0x51, 0xf0, 0x46,
1511 0x5d, 0x79, 0x66, 0xff, 0x8a, 0x06, 0xef, 0x54, 0x64, 0x1b, 0x84, 0x3e,
1512 0x41, 0xf3, 0xfe, 0x19, 0x51, 0xf7, 0x44, 0x9c, 0x16, 0xd3, 0x7a, 0x09,
1513 0x59, 0xf5, 0x47, 0x45, 0xd0, 0x31, 0xef, 0x96, 0x2c, 0xc5, 0xc0, 0xd0,
1514 0x56, 0xef, 0x3f, 0x07, 0x2b, 0xb7, 0x28, 0x49, 0xf5, 0xb1, 0x42, 0x18,
1515 0xcf, 0x77, 0xd8, 0x2b, 0x71, 0x74, 0x80, 0xba, 0x34, 0x52, 0xce, 0x11,
1516 0xfe, 0xc4, 0xb9, 0xeb, 0xf9, 0xc4, 0x5e, 0x1f, 0xd3, 0xde, 0x4b, 0x14,
1517 0xe3, 0x6e, 0xe7, 0xd7, 0x83, 0x59, 0x98, 0xe8, 0x3d, 0x8e, 0xd6, 0x7d,
1518 0xc0, 0x9a, 0x79, 0xb9, 0x83, 0xf1, 0xc1, 0x00, 0x5d, 0x16, 0x1b, 0x44,
1519 0xe9, 0x02, 0xce, 0x99, 0x1e, 0x77, 0xef, 0xca, 0xbc, 0xf0, 0x6a, 0xb9,
1520 0x65, 0x3f, 0x3c, 0xd9, 0xe1, 0x63, 0x0b, 0xbf, 0xaa, 0xa7, 0xe6, 0x6d,
1521 0x6d, 0x3f, 0x44, 0x29, 0xa3, 0x8b, 0x6d, 0xc4, 0x81, 0xa9, 0xc3, 0x5a,
1522 0x90, 0x55, 0x72, 0x61, 0x17, 0x22, 0x7f, 0x3e, 0x5f, 0xfc, 0xba, 0xb3,
1523 0x7a, 0x99, 0x76, 0xe9, 0x20, 0xe5, 0xc5, 0xe8, 0x55, 0x56, 0x0f, 0x7a,
1524 0x48, 0xe7, 0xbc, 0xe1, 0x13, 0xf4, 0x90, 0xef, 0x97, 0x6c, 0x02, 0x89,
1525 0x4d, 0x22, 0x48, 0xda, 0xd3, 0x52, 0x45, 0x31, 0x26, 0xcc, 0xe8, 0x9e,
1526 0x5d, 0xdd, 0x75, 0xe4, 0x1d, 0xbc, 0xb1, 0x08, 0x55, 0xaf, 0x54, 0x70,
1527 0x0d, 0x0c, 0xf3, 0x50, 0xbc, 0x40, 0x83, 0xee, 0xdc, 0x6d, 0x8b, 0x40,
1528 0x79, 0x62, 0x18, 0x37, 0xc4, 0x78, 0x02, 0x58, 0x7c, 0x78, 0xd3, 0x54,
1529 0xed, 0x31, 0xbd, 0x7d, 0x48, 0xcf, 0xb6, 0x11, 0x27, 0x37, 0x9c, 0x86,
1530 0xf7, 0x2e, 0x00, 0x7a, 0x48, 0x1b, 0xa6, 0x72, 0x70, 0x7b, 0x44, 0x45,
1531 0xeb, 0x49, 0xbf, 0xbe, 0x09, 0x78, 0x66, 0x71, 0x12, 0x7f, 0x3d, 0x78,
1532 0x51, 0x24, 0x82, 0xa2, 0xf0, 0x1e, 0x83, 0x81, 0x81, 0x45, 0x53, 0xfd,
1533 0x5e, 0xf3, 0x03, 0x74, 0xbd, 0x23, 0x35, 0xf6, 0x10, 0xdd, 0x7c, 0x73,
1534 0x46, 0x32, 0x09, 0x54, 0x99, 0x95, 0x91, 0x25, 0xb8, 0x32, 0x09, 0xd8,
1535 0x2f, 0x97, 0x50, 0xa3, 0xf5, 0xd6, 0xb1, 0xed, 0x97, 0x51, 0x06, 0x42,
1536 0x12, 0x0c, 0x69, 0x38, 0x09, 0xa0, 0xd8, 0x19, 0x70, 0xf7, 0x8f, 0x61,
1537 0x0d, 0x56, 0x43, 0x66, 0x22, 0x8b, 0x0e, 0x0e, 0xf9, 0x81, 0x9f, 0xac,
1538 0x6f, 0xbf, 0x7d, 0x04, 0x13, 0xf2, 0xe4, 0xeb, 0xfd, 0xbe, 0x4e, 0x56,
1539 0xda, 0xe0, 0x22, 0x6d, 0x1b, 0x25, 0xc8, 0xa5, 0x9c, 0x05, 0x45, 0x52,
1540 0x3c, 0x3a, 0xde, 0x6b, 0xac, 0x9b, 0xf8, 0x81, 0x97, 0x21, 0x46, 0xac,
1541 0x7e, 0x89, 0xf8, 0x49, 0x58, 0xbb, 0x45, 0xac, 0xa2, 0xc4, 0x90, 0x1f,
1542 0xb2, 0xb4, 0xf8, 0xe0, 0xcd, 0xa1, 0x9d, 0x1c, 0xf2, 0xf1, 0xdf, 0xfb,
1543 0x88, 0x4e, 0xe5, 0x41, 0xd8, 0x6e, 0xac, 0x07, 0x87, 0x95, 0x35, 0xa6,
1544 0x12, 0x08, 0x5d, 0x57, 0x5e, 0xaf, 0x71, 0x0f, 0x07, 0x4e, 0x81, 0x77,
1545 0xf1, 0xef, 0xb5, 0x35, 0x5c, 0xfa, 0xf4, 0x4e, 0x42, 0xdc, 0x19, 0xfe,
1546 0xe4, 0xd2, 0xb4, 0x27, 0xfb, 0x34, 0x1f, 0xb2, 0x6f, 0xf2, 0x95, 0xcc,
1547 0xd4, 0x47, 0x63, 0xdc, 0x7e, 0x4f, 0x97, 0x2b, 0x7a, 0xe0, 0x80, 0x31,
1548 };
1549
1550 static const uint8_t kSpakeMSmallPrecomp[15 * 2 * 32] = {
1551 0xe7, 0x45, 0x7e, 0x47, 0x49, 0x69, 0xbd, 0x1b, 0x35, 0x1c, 0x2c, 0x98,
1552 0x03, 0xf3, 0xb3, 0x37, 0xde, 0x39, 0xa5, 0xda, 0xc0, 0x2e, 0xa4, 0xac,
1553 0x7d, 0x08, 0x26, 0xfc, 0x80, 0xa7, 0x09, 0x12, 0xd0, 0x48, 0x03, 0x2c,
1554 0x6e, 0xa0, 0xb6, 0xd6, 0x97, 0xdd, 0xc2, 0xe8, 0x6b, 0xda, 0x85, 0xa3,
1555 0x3a, 0xda, 0xc9, 0x20, 0xf1, 0xbf, 0x18, 0xe1, 0xb0, 0xc6, 0xd1, 0x66,
1556 0xa5, 0xce, 0xcd, 0x2f, 0x80, 0xa8, 0x4e, 0xc3, 0x81, 0xae, 0x68, 0x3b,
1557 0x0d, 0xdb, 0x56, 0x32, 0x2f, 0xa8, 0x97, 0xa0, 0x5c, 0x15, 0xc1, 0xcb,
1558 0x6f, 0x7a, 0x5f, 0xc5, 0x32, 0xfb, 0x49, 0x17, 0x18, 0xfa, 0x85, 0x08,
1559 0x85, 0xf1, 0xe3, 0x11, 0x8e, 0x3d, 0x70, 0x20, 0x38, 0x4e, 0x0c, 0x17,
1560 0xa1, 0xa8, 0x20, 0xd2, 0xb1, 0x1d, 0x05, 0x8d, 0x0f, 0xc9, 0x96, 0x18,
1561 0x9d, 0x8c, 0x89, 0x8f, 0x46, 0x6a, 0x6c, 0x6e, 0x72, 0x03, 0xb2, 0x75,
1562 0x87, 0xd8, 0xa9, 0x60, 0x93, 0x2b, 0x8b, 0x66, 0xee, 0xaf, 0xce, 0x98,
1563 0xcd, 0x6b, 0x7c, 0x6a, 0xbe, 0x19, 0xda, 0x66, 0x7c, 0xda, 0x53, 0xa0,
1564 0xe3, 0x9a, 0x0e, 0x53, 0x3a, 0x7c, 0x73, 0x4a, 0x37, 0xa6, 0x53, 0x23,
1565 0x67, 0x31, 0xce, 0x8a, 0xab, 0xee, 0x72, 0x76, 0xc2, 0xb5, 0x54, 0x42,
1566 0xcf, 0x4b, 0xc7, 0x53, 0x24, 0x59, 0xaf, 0x76, 0x53, 0x10, 0x7e, 0x25,
1567 0x94, 0x5c, 0x23, 0xa6, 0x5e, 0x05, 0xea, 0x14, 0xad, 0x2b, 0xce, 0x50,
1568 0x77, 0xb3, 0x7a, 0x88, 0x4c, 0xf7, 0x74, 0x04, 0x35, 0xa4, 0x0c, 0x9e,
1569 0xee, 0x6a, 0x4c, 0x3c, 0xc1, 0x6a, 0x35, 0x4d, 0x6d, 0x8f, 0x94, 0x95,
1570 0xe4, 0x10, 0xca, 0x46, 0x4e, 0xfa, 0x38, 0x40, 0xeb, 0x1a, 0x1b, 0x5a,
1571 0xff, 0x73, 0x4d, 0xe9, 0xf2, 0xbe, 0x89, 0xf5, 0xd1, 0x72, 0xd0, 0x1a,
1572 0x7b, 0x82, 0x08, 0x19, 0xda, 0x54, 0x44, 0xa5, 0x3d, 0xd8, 0x10, 0x1c,
1573 0xcf, 0x3b, 0xc7, 0x54, 0xd5, 0x11, 0xd7, 0x2a, 0x69, 0x3f, 0xa6, 0x58,
1574 0x74, 0xfd, 0x90, 0xb2, 0xf4, 0xc2, 0x0e, 0xf3, 0x19, 0x8f, 0x51, 0x7c,
1575 0x31, 0x12, 0x79, 0x61, 0x16, 0xb4, 0x2f, 0x2f, 0xd0, 0x88, 0x97, 0xf2,
1576 0xc3, 0x8c, 0xa6, 0xa3, 0x29, 0xff, 0x7e, 0x12, 0x46, 0x2a, 0x9c, 0x09,
1577 0x7c, 0x5f, 0x87, 0x07, 0x6b, 0xa1, 0x9a, 0x57, 0x55, 0x8e, 0xb0, 0x56,
1578 0x5d, 0xc9, 0x4c, 0x5b, 0xae, 0xd3, 0xd0, 0x8e, 0xb8, 0xac, 0xba, 0xe8,
1579 0x54, 0x45, 0x30, 0x14, 0xf6, 0x59, 0x20, 0xc4, 0x03, 0xb7, 0x7a, 0x5d,
1580 0x6b, 0x5a, 0xcb, 0x28, 0x60, 0xf8, 0xef, 0x61, 0x60, 0x78, 0x6b, 0xf5,
1581 0x21, 0x4b, 0x75, 0xc2, 0x77, 0xba, 0x0e, 0x38, 0x98, 0xe0, 0xfb, 0xb7,
1582 0x5f, 0x75, 0x87, 0x04, 0x0c, 0xb4, 0x5c, 0x09, 0x04, 0x00, 0x38, 0x4e,
1583 0x4f, 0x7b, 0x73, 0xe5, 0xdb, 0xdb, 0xf1, 0xf4, 0x5c, 0x64, 0x68, 0xfd,
1584 0xb1, 0x86, 0xe8, 0x89, 0xbe, 0x9c, 0xd4, 0x96, 0x1d, 0xcb, 0xdc, 0x5c,
1585 0xef, 0xd4, 0x33, 0x28, 0xb9, 0xb6, 0xaf, 0x3b, 0xcf, 0x8d, 0x30, 0xba,
1586 0xe8, 0x08, 0xcf, 0x84, 0xba, 0x61, 0x10, 0x9b, 0x62, 0xf6, 0x18, 0x79,
1587 0x66, 0x87, 0x82, 0x7c, 0xaa, 0x71, 0xac, 0xd0, 0xd0, 0x32, 0xb0, 0x54,
1588 0x03, 0xa4, 0xad, 0x3f, 0x72, 0xca, 0x22, 0xff, 0x01, 0x87, 0x08, 0x36,
1589 0x61, 0x22, 0xaa, 0x18, 0xab, 0x3a, 0xbc, 0xf2, 0x78, 0x05, 0xe1, 0x99,
1590 0xa3, 0x59, 0x98, 0xcc, 0x21, 0xc6, 0x2b, 0x51, 0x6d, 0x43, 0x0a, 0x46,
1591 0x50, 0xae, 0x11, 0x7e, 0xd5, 0x23, 0x56, 0xef, 0x83, 0xc8, 0xbf, 0x42,
1592 0xf0, 0x45, 0x52, 0x1f, 0x34, 0xbc, 0x2f, 0xb0, 0xf0, 0xce, 0xf0, 0xec,
1593 0xd0, 0x99, 0x59, 0x2e, 0x1f, 0xab, 0xa8, 0x1e, 0x4b, 0xce, 0x1b, 0x9a,
1594 0x75, 0xc6, 0xc4, 0x71, 0x86, 0xf0, 0x8d, 0xec, 0xb0, 0x30, 0xb9, 0x62,
1595 0xb3, 0xb7, 0xdd, 0x96, 0x29, 0xc8, 0xbf, 0xe9, 0xb0, 0x74, 0x78, 0x7b,
1596 0xf7, 0xea, 0xa3, 0x14, 0x12, 0x56, 0xe0, 0xf3, 0x35, 0x7a, 0x26, 0x4a,
1597 0x4c, 0xe6, 0xdf, 0x13, 0xb5, 0x52, 0xb0, 0x2a, 0x5f, 0x2e, 0xac, 0x34,
1598 0xab, 0x5f, 0x1a, 0x01, 0xe4, 0x15, 0x1a, 0xd1, 0xbf, 0xc9, 0x95, 0x0a,
1599 0xac, 0x1d, 0xe7, 0x53, 0x59, 0x8d, 0xc3, 0x21, 0x78, 0x5e, 0x12, 0x97,
1600 0x8f, 0x4e, 0x1d, 0xf9, 0xe5, 0xe2, 0xc2, 0xc4, 0xba, 0xfb, 0x50, 0x96,
1601 0x5b, 0x43, 0xe8, 0xf7, 0x0d, 0x1b, 0x64, 0x58, 0xbe, 0xd3, 0x95, 0x7f,
1602 0x8e, 0xf1, 0x85, 0x35, 0xba, 0x25, 0x55, 0x2e, 0x02, 0x46, 0x5c, 0xad,
1603 0x1f, 0xc5, 0x03, 0xcc, 0xd0, 0x43, 0x4c, 0xf2, 0x5e, 0x64, 0x0a, 0x89,
1604 0xd9, 0xfd, 0x23, 0x7d, 0x4f, 0xbe, 0x2f, 0x0f, 0x1e, 0x12, 0x4a, 0xd9,
1605 0xf8, 0x82, 0xde, 0x8f, 0x4f, 0x98, 0xb9, 0x90, 0xf6, 0xfa, 0xd1, 0x11,
1606 0xa6, 0xdc, 0x7e, 0x32, 0x48, 0x6a, 0x8a, 0x14, 0x5e, 0x73, 0xb9, 0x6c,
1607 0x0e, 0xc2, 0xf9, 0xcc, 0xf0, 0x32, 0xc8, 0xb5, 0x56, 0xaa, 0x5d, 0xd2,
1608 0x07, 0xf1, 0x6f, 0x33, 0x6f, 0x05, 0x70, 0x49, 0x60, 0x49, 0x23, 0x23,
1609 0x14, 0x0e, 0x4c, 0x58, 0x92, 0xad, 0xa9, 0x50, 0xb1, 0x59, 0x43, 0x96,
1610 0x7b, 0xc1, 0x51, 0x45, 0xef, 0x0d, 0xef, 0xd1, 0xe4, 0xd0, 0xce, 0xdf,
1611 0x6a, 0xbc, 0x1b, 0xbf, 0x7a, 0x87, 0x4e, 0x47, 0x17, 0x9c, 0x34, 0x38,
1612 0xb0, 0x3c, 0xa1, 0x04, 0xfb, 0xe2, 0x66, 0xce, 0xb6, 0x82, 0xbb, 0xad,
1613 0xc3, 0x8e, 0x12, 0x35, 0xbc, 0x17, 0xce, 0x01, 0x2d, 0xa3, 0xa6, 0xb9,
1614 0xfa, 0x84, 0xc2, 0x2f, 0x5a, 0x4a, 0x8c, 0x4c, 0x11, 0x4e, 0xa8, 0x14,
1615 0xcb, 0xb8, 0x99, 0xaa, 0x2e, 0x8c, 0xa0, 0xc9, 0x5f, 0x62, 0x2a, 0x84,
1616 0x66, 0x60, 0x0a, 0x7e, 0xdc, 0x93, 0x17, 0x45, 0x19, 0xb3, 0x93, 0x4c,
1617 0xdc, 0xd0, 0xd5, 0x5c, 0x25, 0xd2, 0xcd, 0x4e, 0x84, 0x4c, 0x73, 0xb3,
1618 0x90, 0xa4, 0x22, 0x05, 0x2c, 0x7c, 0x39, 0x2b, 0x70, 0xd9, 0x61, 0x76,
1619 0xb2, 0x03, 0x71, 0xe9, 0x0e, 0xf8, 0x57, 0x85, 0xad, 0xb1, 0x2f, 0x34,
1620 0xa5, 0x66, 0xb0, 0x0f, 0x75, 0x94, 0x6e, 0x26, 0x79, 0x99, 0xb4, 0xe2,
1621 0xe2, 0xa3, 0x58, 0xdd, 0xb4, 0xfb, 0x74, 0xf4, 0xa1, 0xca, 0xc3, 0x30,
1622 0xe7, 0x86, 0xb2, 0xa2, 0x2c, 0x11, 0xc9, 0x58, 0xe3, 0xc1, 0xa6, 0x5f,
1623 0x86, 0x6a, 0xe7, 0x75, 0xd5, 0xd8, 0x63, 0x95, 0x64, 0x59, 0xbc, 0xb8,
1624 0xb7, 0xf5, 0x12, 0xe3, 0x03, 0xc6, 0x17, 0xea, 0x4e, 0xcb, 0xee, 0x4c,
1625 0xae, 0x03, 0xd1, 0x33, 0xd0, 0x39, 0x36, 0x00, 0x0f, 0xf4, 0x9c, 0xbd,
1626 0x35, 0x96, 0xfd, 0x0d, 0x26, 0xb7, 0x9e, 0xf4, 0x4b, 0x6f, 0x4b, 0xf1,
1627 0xec, 0x11, 0x00, 0x16, 0x21, 0x1e, 0xd4, 0x43, 0x23, 0x8c, 0x4a, 0xfa,
1628 0x9e, 0xd4, 0x2b, 0x36, 0x9a, 0x43, 0x1e, 0x58, 0x31, 0xe8, 0x1f, 0x83,
1629 0x15, 0x20, 0x31, 0x68, 0xfe, 0x27, 0xd3, 0xd8, 0x9b, 0x43, 0x81, 0x8f,
1630 0x57, 0x32, 0x14, 0xe6, 0x9e, 0xbf, 0xd1, 0xfb, 0xdf, 0xad, 0x7a, 0x52,
1631 };
1632
1633 /* left_shift_3 sets |n| to |n|*8, where |n| is represented in little-endian
1634 * order. */
left_shift_3(uint8_t n[32])1635 static void left_shift_3(uint8_t n[32]) {
1636 uint8_t carry = 0;
1637 unsigned i;
1638
1639 for (i = 0; i < 32; i++) {
1640 const uint8_t next_carry = n[i] >> 5;
1641 n[i] = (n[i] << 3) | carry;
1642 carry = next_carry;
1643 }
1644 }
1645
1646 static krb5_error_code
builtin_edwards25519_keygen(krb5_context context,groupdata * gdata,const uint8_t * wbytes,krb5_boolean use_m,uint8_t * priv_out,uint8_t * pub_out)1647 builtin_edwards25519_keygen(krb5_context context, groupdata *gdata,
1648 const uint8_t *wbytes, krb5_boolean use_m,
1649 uint8_t *priv_out, uint8_t *pub_out)
1650 {
1651 uint8_t private[64];
1652 krb5_data data = make_data(private, 32);
1653 krb5_error_code ret;
1654
1655 /* Pick x or y uniformly from [0, p*h) divisible by h. */
1656 ret = krb5_c_random_make_octets(context, &data);
1657 if (ret)
1658 return ret;
1659 memset(private + 32, 0, 32);
1660 x25519_sc_reduce(private);
1661 left_shift_3(private);
1662
1663 /* Compute X=x*G or Y=y*G. */
1664 ge_p3 P;
1665 x25519_ge_scalarmult_base(&P, private);
1666
1667 /* Compute w mod p. */
1668 uint8_t wreduced[64];
1669 memcpy(wreduced, wbytes, 32);
1670 memset(wreduced + 32, 0, 32);
1671 x25519_sc_reduce(wreduced);
1672
1673 /* Compute the mask, w*M or w*N. */
1674 ge_p3 mask;
1675 x25519_ge_scalarmult_small_precomp(&mask, wreduced,
1676 use_m ? kSpakeMSmallPrecomp :
1677 kSpakeNSmallPrecomp);
1678
1679 /* Compute the masked point T=w*M+X or S=w*N+Y. */
1680 ge_cached mask_cached;
1681 x25519_ge_p3_to_cached(&mask_cached, &mask);
1682 ge_p1p1 Pmasked;
1683 x25519_ge_add(&Pmasked, &P, &mask_cached);
1684
1685 /* Encode T or S into pub_out. */
1686 ge_p2 Pmasked_proj;
1687 x25519_ge_p1p1_to_p2(&Pmasked_proj, &Pmasked);
1688 x25519_ge_tobytes(pub_out, &Pmasked_proj);
1689
1690 /* Remember the private key in priv_out. */
1691 memcpy(priv_out, private, 32);
1692 return 0;
1693 }
1694
1695 static krb5_error_code
builtin_edwards25519_result(krb5_context context,groupdata * gdata,const uint8_t * wbytes,const uint8_t * ourpriv,const uint8_t * theirpub,krb5_boolean use_m,uint8_t * elem_out)1696 builtin_edwards25519_result(krb5_context context, groupdata *gdata,
1697 const uint8_t *wbytes, const uint8_t *ourpriv,
1698 const uint8_t *theirpub, krb5_boolean use_m,
1699 uint8_t *elem_out)
1700 {
1701 /*
1702 * Check if the point received from peer is on the curve. This does not
1703 * verify that it is in the generator subgroup, but since our private key is
1704 * a multiple of the cofactor, the shared point will be in the generator
1705 * subgroup even if a rogue peer sends a point which is not.
1706 */
1707 ge_p3 Qmasked;
1708 if (!x25519_ge_frombytes_vartime(&Qmasked, theirpub))
1709 return EINVAL;
1710
1711 /* Compute w mod p. */
1712 uint8_t wreduced[64];
1713 memcpy(wreduced, wbytes, 32);
1714 memset(wreduced + 32, 0, 32);
1715 x25519_sc_reduce(wreduced);
1716
1717 /* Compute the peer's mask, w*M or w*N. */
1718 ge_p3 peers_mask;
1719 x25519_ge_scalarmult_small_precomp(&peers_mask, wreduced,
1720 use_m ? kSpakeMSmallPrecomp :
1721 kSpakeNSmallPrecomp);
1722
1723 ge_cached peers_mask_cached;
1724 x25519_ge_p3_to_cached(&peers_mask_cached, &peers_mask);
1725
1726 /* Compute the peer's unmasked point, T-w*M or S-w*N. */
1727 ge_p1p1 Qcompl;
1728 ge_p3 Qunmasked;
1729 x25519_ge_sub(&Qcompl, &Qmasked, &peers_mask_cached);
1730 x25519_ge_p1p1_to_p3(&Qunmasked, &Qcompl);
1731
1732 /* Multiply by our private value to compute K=x*(S-w*N) or K=y*(T-w*M). */
1733 ge_p2 K;
1734 x25519_ge_scalarmult(&K, ourpriv, &Qunmasked);
1735
1736 /* Encode K into elem_out. */
1737 x25519_ge_tobytes(elem_out, &K);
1738 return 0;
1739 }
1740
1741 static krb5_error_code
builtin_sha256(krb5_context context,groupdata * gdata,const krb5_data * dlist,size_t ndata,uint8_t * result_out)1742 builtin_sha256(krb5_context context, groupdata *gdata, const krb5_data *dlist,
1743 size_t ndata, uint8_t *result_out)
1744 {
1745 return k5_sha256(dlist, ndata, result_out);
1746 }
1747
1748 groupdef builtin_edwards25519 = {
1749 .reg = &spake_iana_edwards25519,
1750 .keygen = builtin_edwards25519_keygen,
1751 .result = builtin_edwards25519_result,
1752 .hash = builtin_sha256
1753 };
1754