xref: /freebsd/crypto/krb5/src/plugins/preauth/spake/edwards25519.c (revision 7f2fe78b9dd5f51c821d771b63d2e096f6fd49e9)
1 /* -*- mode: c; c-basic-offset: 2; indent-tabs-mode: nil -*- */
2 /* This file is adapted from the SPAKE edwards25519 code in BoringSSL. */
3 /*
4  * The MIT License (MIT)
5  *
6  * Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file).
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to
10  * deal in the Software without restriction, including without limitation the
11  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12  * sell copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24  * IN THE SOFTWARE.
25  */
26 /*
27  * Copyright (c) 2015-2016, Google Inc.
28  *
29  * Permission to use, copy, modify, and/or distribute this software for any
30  * purpose with or without fee is hereby granted, provided that the above
31  * copyright notice and this permission notice appear in all copies.
32  *
33  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
34  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
35  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
37  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
38  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
39  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
40  */
41 
42 /*
43  * This code is adapted from the BoringSSL edwards25519 SPAKE2 implementation
44  * from third_party/fiat and crypto/spake25519.c, with the following
45  * adaptations:
46  *
47  * - The M and N points are the ones from draft-irtf-cfrg-spake2-05.  The
48  *   BoringSSL M and N points were determined similarly, but were not
49  *   restricted to members of the generator subgroup, so they use only one hash
50  *   iteration for both points.  The intent in BoringSSL had been to multiply w
51  *   by the cofactor so that wM and wN would be in the subgroup, but as that
52  *   step was accidentally omitted, a hack had to be introduced after the fact
53  *   to add multiples of the prime order to the scalar.  That hack is not
54  *   present in this code, and the SPAKE preauth spec does not multiply w by
55  *   the cofactor as it is unnecessary if M and N are chosen from the subgroup.
56  *
57  * - The SPAKE code is modified to fit the groups.h interface and the SPAKE
58  *   preauth spec.
59  *
60  * - The required declarations and code are all here in one file (except for
61  *   the generator point table, which is still in a separate header), so all of
62  *   the functions are declared static.
63  *
64  * - BORINGSSL_CURVE25519_64BIT is defined here using autoconf tests.
65  *
66  * - curve25519_32.h and curve25519_64.h are combined into edwards25519_fiat.h
67  *   (conditionalized on BORINGSSL_CURVE25519_64BIT) for predictable dependency
68  *   generation.  The fiat_25519_selectznz and fiat_25519_carry_scmul_121666
69  *   functions were removed from both branches as they are not used here (the
70  *   former because it is not used by the BoringSSL code and the latter because
71  *   it is only used by the X25519 code).  The fiat_25519_int128 and
72  *   fiat_25519_uint128 typedefs were adjusted to work with older versions of
73  *   gcc.
74  *
75  * - fe_cmov() has the initial "Silence an unused function warning" part
76  *   removed, as we removed fiat_25519_selectznz instead.
77  *
78  * - The field element bounds assertion checks are disabled by default, as they
79  *   slow the code down by roughly a factor of two.  The
80  *   OPENSSL_COMPILE_ASSERT() in fe_copy_lt() is changed to a regular assert
81  *   and is also conditionalized.  Do a build and "make check" with
82  *   EDWARDS25519_ASSERTS defined when updating this code.
83  *
84  * - The copyright comments at the top are formatted the way we do so in other
85  *   source files, for ease of extraction.
86  *
87  * - Declarations in for loops conflict with our compiler configuration in
88  *   older versions of gcc, so they are moved outside of the for loop.
89  *
90  * - The preprocessor symbol OPENSSL_SMALL is changed to CONFIG_SMALL.
91  *
92  * - OPENSSL_memset and OPENSSL_memmove are changed to memset and memmove, in
93  *   each case verifying that they are used with nonzero length arguments.
94  *
95  * - CRYPTO_memcmp is changed to k5_bcmp.
96  *
97  * - Functions used only by X25519 or Ed25519 interfaces but not SPAKE are
98  *   removed, taking care to check for unused functions in both the 64-bit and
99  *   32-bit preprocessor branches.  ge_p3_dbl() is unused here if CONFIG_SMALL
100  *   is defined, so it is placed inside #ifndef CONFIG_SMALL.
101  */
102 
103 // Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP
104 // 20141124 (https://bench.cr.yp.to/supercop.html). That code is released as
105 // public domain but parts have been replaced with code generated by Fiat
106 // (https://github.com/mit-plv/fiat-crypto), which is MIT licensed.
107 
108 #include "groups.h"
109 #include "iana.h"
110 
111 #ifdef __GNUC__
112 #pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
113 #endif
114 
115 #if SIZEOF_SIZE_T >= 8 && defined(HAVE___INT128_T) && defined(HAVE___UINT128_T)
116 #define BORINGSSL_CURVE25519_64BIT
117 typedef __int128_t int128_t;
118 typedef __uint128_t uint128_t;
119 #endif
120 
121 /* From BoringSSL third-party/fiat/internal.h */
122 
123 #if defined(BORINGSSL_CURVE25519_64BIT)
124 // fe means field element. Here the field is \Z/(2^255-19). An element t,
125 // entries t[0]...t[4], represents the integer t[0]+2^51 t[1]+2^102 t[2]+2^153
126 // t[3]+2^204 t[4].
127 // fe limbs are bounded by 1.125*2^51.
128 // Multiplication and carrying produce fe from fe_loose.
129 typedef struct fe { uint64_t v[5]; } fe;
130 
131 // fe_loose limbs are bounded by 3.375*2^51.
132 // Addition and subtraction produce fe_loose from (fe, fe).
133 typedef struct fe_loose { uint64_t v[5]; } fe_loose;
134 #else
135 // fe means field element. Here the field is \Z/(2^255-19). An element t,
136 // entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
137 // t[3]+2^102 t[4]+...+2^230 t[9].
138 // fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
139 // Multiplication and carrying produce fe from fe_loose.
140 typedef struct fe { uint32_t v[10]; } fe;
141 
142 // fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc.
143 // Addition and subtraction produce fe_loose from (fe, fe).
144 typedef struct fe_loose { uint32_t v[10]; } fe_loose;
145 #endif
146 
147 // ge means group element.
148 //
149 // Here the group is the set of pairs (x,y) of field elements (see fe.h)
150 // satisfying -x^2 + y^2 = 1 + d x^2y^2
151 // where d = -121665/121666.
152 //
153 // Representations:
154 //   ge_p2 (projective): (X:Y:Z) satisfying x=X/Z, y=Y/Z
155 //   ge_p3 (extended): (X:Y:Z:T) satisfying x=X/Z, y=Y/Z, XY=ZT
156 //   ge_p1p1 (completed): ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
157 //   ge_precomp (Duif): (y+x,y-x,2dxy)
158 
159 typedef struct {
160   fe X;
161   fe Y;
162   fe Z;
163 } ge_p2;
164 
165 typedef struct {
166   fe X;
167   fe Y;
168   fe Z;
169   fe T;
170 } ge_p3;
171 
172 typedef struct {
173   fe_loose X;
174   fe_loose Y;
175   fe_loose Z;
176   fe_loose T;
177 } ge_p1p1;
178 
179 typedef struct {
180   fe_loose yplusx;
181   fe_loose yminusx;
182   fe_loose xy2d;
183 } ge_precomp;
184 
185 typedef struct {
186   fe_loose YplusX;
187   fe_loose YminusX;
188   fe_loose Z;
189   fe_loose T2d;
190 } ge_cached;
191 
192 #include "edwards25519_tables.h"
193 #include "edwards25519_fiat.h"
194 
195 /* From BoringSSL third-party/fiat/curve25519.c */
196 
load_3(const uint8_t * in)197 static uint64_t load_3(const uint8_t *in) {
198   uint64_t result;
199   result = (uint64_t)in[0];
200   result |= ((uint64_t)in[1]) << 8;
201   result |= ((uint64_t)in[2]) << 16;
202   return result;
203 }
204 
load_4(const uint8_t * in)205 static uint64_t load_4(const uint8_t *in) {
206   uint64_t result;
207   result = (uint64_t)in[0];
208   result |= ((uint64_t)in[1]) << 8;
209   result |= ((uint64_t)in[2]) << 16;
210   result |= ((uint64_t)in[3]) << 24;
211   return result;
212 }
213 
214 
215 // Field operations.
216 
217 #if defined(BORINGSSL_CURVE25519_64BIT)
218 
219 typedef uint64_t fe_limb_t;
220 #define FE_NUM_LIMBS 5
221 
222 // assert_fe asserts that |f| satisfies bounds:
223 //
224 //  [[0x0 ~> 0x8cccccccccccc],
225 //   [0x0 ~> 0x8cccccccccccc],
226 //   [0x0 ~> 0x8cccccccccccc],
227 //   [0x0 ~> 0x8cccccccccccc],
228 //   [0x0 ~> 0x8cccccccccccc]]
229 //
230 // See comments in edwards25519_fiat.h for which functions use these bounds for
231 // inputs or outputs.
232 #define assert_fe(f)                                                    \
233   do {                                                                  \
234     for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
235       assert(f[_assert_fe_i] <= UINT64_C(0x8cccccccccccc));             \
236     }                                                                   \
237   } while (0)
238 
239 // assert_fe_loose asserts that |f| satisfies bounds:
240 //
241 //  [[0x0 ~> 0x1a666666666664],
242 //   [0x0 ~> 0x1a666666666664],
243 //   [0x0 ~> 0x1a666666666664],
244 //   [0x0 ~> 0x1a666666666664],
245 //   [0x0 ~> 0x1a666666666664]]
246 //
247 // See comments in edwards25519_fiat.h for which functions use these bounds for
248 // inputs or outputs.
249 #define assert_fe_loose(f)                                              \
250   do {                                                                  \
251     for (unsigned _assert_fe_i = 0; _assert_fe_i < 5; _assert_fe_i++) { \
252       assert(f[_assert_fe_i] <= UINT64_C(0x1a666666666664));            \
253     }                                                                   \
254   } while (0)
255 
256 #else
257 
258 typedef uint32_t fe_limb_t;
259 #define FE_NUM_LIMBS 10
260 
261 // assert_fe asserts that |f| satisfies bounds:
262 //
263 //  [[0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
264 //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
265 //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
266 //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333],
267 //   [0x0 ~> 0x4666666], [0x0 ~> 0x2333333]]
268 //
269 // See comments in edwards25519_fiat.h for which functions use these bounds for
270 // inputs or outputs.
271 #define assert_fe(f)                                                     \
272   do {                                                                   \
273     for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
274       assert(f[_assert_fe_i] <=                                          \
275              ((_assert_fe_i & 1) ? 0x2333333u : 0x4666666u));            \
276     }                                                                    \
277   } while (0)
278 
279 // assert_fe_loose asserts that |f| satisfies bounds:
280 //
281 //  [[0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
282 //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
283 //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
284 //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999],
285 //   [0x0 ~> 0xd333332], [0x0 ~> 0x6999999]]
286 //
287 // See comments in edwards25519_fiat.h for which functions use these bounds for
288 // inputs or outputs.
289 #define assert_fe_loose(f)                                               \
290   do {                                                                   \
291     for (unsigned _assert_fe_i = 0; _assert_fe_i < 10; _assert_fe_i++) { \
292       assert(f[_assert_fe_i] <=                                          \
293              ((_assert_fe_i & 1) ? 0x6999999u : 0xd333332u));            \
294     }                                                                    \
295   } while (0)
296 
297 #endif  // BORINGSSL_CURVE25519_64BIT
298 
299 #ifndef EDWARDS25519_ASSERTS
300 #undef assert_fe
301 #undef assert_fe_loose
302 #define assert_fe(f)
303 #define assert_fe_loose(f)
304 #endif
305 
fe_frombytes_strict(fe * h,const uint8_t s[32])306 static void fe_frombytes_strict(fe *h, const uint8_t s[32]) {
307   // |fiat_25519_from_bytes| requires the top-most bit be clear.
308   assert((s[31] & 0x80) == 0);
309   fiat_25519_from_bytes(h->v, s);
310   assert_fe(h->v);
311 }
312 
fe_frombytes(fe * h,const uint8_t s[32])313 static void fe_frombytes(fe *h, const uint8_t s[32]) {
314   uint8_t s_copy[32];
315   memcpy(s_copy, s, 32);
316   s_copy[31] &= 0x7f;
317   fe_frombytes_strict(h, s_copy);
318 }
319 
fe_tobytes(uint8_t s[32],const fe * f)320 static void fe_tobytes(uint8_t s[32], const fe *f) {
321   assert_fe(f->v);
322   fiat_25519_to_bytes(s, f->v);
323 }
324 
325 // h = 0
fe_0(fe * h)326 static void fe_0(fe *h) {
327   memset(h, 0, sizeof(fe));
328 }
329 
fe_loose_0(fe_loose * h)330 static void fe_loose_0(fe_loose *h) {
331   memset(h, 0, sizeof(fe_loose));
332 }
333 
334 // h = 1
fe_1(fe * h)335 static void fe_1(fe *h) {
336   memset(h, 0, sizeof(fe));
337   h->v[0] = 1;
338 }
339 
fe_loose_1(fe_loose * h)340 static void fe_loose_1(fe_loose *h) {
341   memset(h, 0, sizeof(fe_loose));
342   h->v[0] = 1;
343 }
344 
345 // h = f + g
346 // Can overlap h with f or g.
fe_add(fe_loose * h,const fe * f,const fe * g)347 static void fe_add(fe_loose *h, const fe *f, const fe *g) {
348   assert_fe(f->v);
349   assert_fe(g->v);
350   fiat_25519_add(h->v, f->v, g->v);
351   assert_fe_loose(h->v);
352 }
353 
354 // h = f - g
355 // Can overlap h with f or g.
fe_sub(fe_loose * h,const fe * f,const fe * g)356 static void fe_sub(fe_loose *h, const fe *f, const fe *g) {
357   assert_fe(f->v);
358   assert_fe(g->v);
359   fiat_25519_sub(h->v, f->v, g->v);
360   assert_fe_loose(h->v);
361 }
362 
fe_carry(fe * h,const fe_loose * f)363 static void fe_carry(fe *h, const fe_loose* f) {
364   assert_fe_loose(f->v);
365   fiat_25519_carry(h->v, f->v);
366   assert_fe(h->v);
367 }
368 
fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],const fe_limb_t in1[FE_NUM_LIMBS],const fe_limb_t in2[FE_NUM_LIMBS])369 static void fe_mul_impl(fe_limb_t out[FE_NUM_LIMBS],
370                         const fe_limb_t in1[FE_NUM_LIMBS],
371                         const fe_limb_t in2[FE_NUM_LIMBS]) {
372   assert_fe_loose(in1);
373   assert_fe_loose(in2);
374   fiat_25519_carry_mul(out, in1, in2);
375   assert_fe(out);
376 }
377 
fe_mul_ltt(fe_loose * h,const fe * f,const fe * g)378 static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) {
379   fe_mul_impl(h->v, f->v, g->v);
380 }
381 
fe_mul_llt(fe_loose * h,const fe_loose * f,const fe * g)382 static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) {
383   fe_mul_impl(h->v, f->v, g->v);
384 }
385 
fe_mul_ttt(fe * h,const fe * f,const fe * g)386 static void fe_mul_ttt(fe *h, const fe *f, const fe *g) {
387   fe_mul_impl(h->v, f->v, g->v);
388 }
389 
fe_mul_tlt(fe * h,const fe_loose * f,const fe * g)390 static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) {
391   fe_mul_impl(h->v, f->v, g->v);
392 }
393 
fe_mul_ttl(fe * h,const fe * f,const fe_loose * g)394 static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) {
395   fe_mul_impl(h->v, f->v, g->v);
396 }
397 
fe_mul_tll(fe * h,const fe_loose * f,const fe_loose * g)398 static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) {
399   fe_mul_impl(h->v, f->v, g->v);
400 }
401 
fe_sq_tl(fe * h,const fe_loose * f)402 static void fe_sq_tl(fe *h, const fe_loose *f) {
403   assert_fe_loose(f->v);
404   fiat_25519_carry_square(h->v, f->v);
405   assert_fe(h->v);
406 }
407 
fe_sq_tt(fe * h,const fe * f)408 static void fe_sq_tt(fe *h, const fe *f) {
409   assert_fe_loose(f->v);
410   fiat_25519_carry_square(h->v, f->v);
411   assert_fe(h->v);
412 }
413 
414 // h = -f
fe_neg(fe_loose * h,const fe * f)415 static void fe_neg(fe_loose *h, const fe *f) {
416   assert_fe(f->v);
417   fiat_25519_opp(h->v, f->v);
418   assert_fe_loose(h->v);
419 }
420 
421 // Replace (f,g) with (g,g) if b == 1;
422 // replace (f,g) with (f,g) if b == 0.
423 //
424 // Preconditions: b in {0,1}.
fe_cmov(fe_loose * f,const fe_loose * g,fe_limb_t b)425 static void fe_cmov(fe_loose *f, const fe_loose *g, fe_limb_t b) {
426   b = 0-b;
427   unsigned i;
428   for (i = 0; i < FE_NUM_LIMBS; i++) {
429     fe_limb_t x = f->v[i] ^ g->v[i];
430     x &= b;
431     f->v[i] ^= x;
432   }
433 }
434 
435 // h = f
fe_copy(fe * h,const fe * f)436 static void fe_copy(fe *h, const fe *f) {
437   memmove(h, f, sizeof(fe));
438 }
439 
fe_copy_lt(fe_loose * h,const fe * f)440 static void fe_copy_lt(fe_loose *h, const fe *f) {
441 #ifdef EDWARDS25519_ASSERTS
442   assert(sizeof(fe_loose) == sizeof(fe));
443 #endif
444   memmove(h, f, sizeof(fe));
445 }
446 #if !defined(CONFIG_SMALL)
fe_copy_ll(fe_loose * h,const fe_loose * f)447 static void fe_copy_ll(fe_loose *h, const fe_loose *f) {
448   memmove(h, f, sizeof(fe_loose));
449 }
450 #endif // !defined(CONFIG_SMALL)
451 
fe_loose_invert(fe * out,const fe_loose * z)452 static void fe_loose_invert(fe *out, const fe_loose *z) {
453   fe t0;
454   fe t1;
455   fe t2;
456   fe t3;
457   int i;
458 
459   fe_sq_tl(&t0, z);
460   fe_sq_tt(&t1, &t0);
461   for (i = 1; i < 2; ++i) {
462     fe_sq_tt(&t1, &t1);
463   }
464   fe_mul_tlt(&t1, z, &t1);
465   fe_mul_ttt(&t0, &t0, &t1);
466   fe_sq_tt(&t2, &t0);
467   fe_mul_ttt(&t1, &t1, &t2);
468   fe_sq_tt(&t2, &t1);
469   for (i = 1; i < 5; ++i) {
470     fe_sq_tt(&t2, &t2);
471   }
472   fe_mul_ttt(&t1, &t2, &t1);
473   fe_sq_tt(&t2, &t1);
474   for (i = 1; i < 10; ++i) {
475     fe_sq_tt(&t2, &t2);
476   }
477   fe_mul_ttt(&t2, &t2, &t1);
478   fe_sq_tt(&t3, &t2);
479   for (i = 1; i < 20; ++i) {
480     fe_sq_tt(&t3, &t3);
481   }
482   fe_mul_ttt(&t2, &t3, &t2);
483   fe_sq_tt(&t2, &t2);
484   for (i = 1; i < 10; ++i) {
485     fe_sq_tt(&t2, &t2);
486   }
487   fe_mul_ttt(&t1, &t2, &t1);
488   fe_sq_tt(&t2, &t1);
489   for (i = 1; i < 50; ++i) {
490     fe_sq_tt(&t2, &t2);
491   }
492   fe_mul_ttt(&t2, &t2, &t1);
493   fe_sq_tt(&t3, &t2);
494   for (i = 1; i < 100; ++i) {
495     fe_sq_tt(&t3, &t3);
496   }
497   fe_mul_ttt(&t2, &t3, &t2);
498   fe_sq_tt(&t2, &t2);
499   for (i = 1; i < 50; ++i) {
500     fe_sq_tt(&t2, &t2);
501   }
502   fe_mul_ttt(&t1, &t2, &t1);
503   fe_sq_tt(&t1, &t1);
504   for (i = 1; i < 5; ++i) {
505     fe_sq_tt(&t1, &t1);
506   }
507   fe_mul_ttt(out, &t1, &t0);
508 }
509 
fe_invert(fe * out,const fe * z)510 static void fe_invert(fe *out, const fe *z) {
511   fe_loose l;
512   fe_copy_lt(&l, z);
513   fe_loose_invert(out, &l);
514 }
515 
516 // return 0 if f == 0
517 // return 1 if f != 0
fe_isnonzero(const fe_loose * f)518 static int fe_isnonzero(const fe_loose *f) {
519   fe tight;
520   fe_carry(&tight, f);
521   uint8_t s[32];
522   fe_tobytes(s, &tight);
523 
524   static const uint8_t zero[32] = {0};
525   return k5_bcmp(s, zero, sizeof(zero)) != 0;
526 }
527 
528 // return 1 if f is in {1,3,5,...,q-2}
529 // return 0 if f is in {0,2,4,...,q-1}
fe_isnegative(const fe * f)530 static int fe_isnegative(const fe *f) {
531   uint8_t s[32];
532   fe_tobytes(s, f);
533   return s[0] & 1;
534 }
535 
fe_sq2_tt(fe * h,const fe * f)536 static void fe_sq2_tt(fe *h, const fe *f) {
537   // h = f^2
538   fe_sq_tt(h, f);
539 
540   // h = h + h
541   fe_loose tmp;
542   fe_add(&tmp, h, h);
543   fe_carry(h, &tmp);
544 }
545 
fe_pow22523(fe * out,const fe * z)546 static void fe_pow22523(fe *out, const fe *z) {
547   fe t0;
548   fe t1;
549   fe t2;
550   int i;
551 
552   fe_sq_tt(&t0, z);
553   fe_sq_tt(&t1, &t0);
554   for (i = 1; i < 2; ++i) {
555     fe_sq_tt(&t1, &t1);
556   }
557   fe_mul_ttt(&t1, z, &t1);
558   fe_mul_ttt(&t0, &t0, &t1);
559   fe_sq_tt(&t0, &t0);
560   fe_mul_ttt(&t0, &t1, &t0);
561   fe_sq_tt(&t1, &t0);
562   for (i = 1; i < 5; ++i) {
563     fe_sq_tt(&t1, &t1);
564   }
565   fe_mul_ttt(&t0, &t1, &t0);
566   fe_sq_tt(&t1, &t0);
567   for (i = 1; i < 10; ++i) {
568     fe_sq_tt(&t1, &t1);
569   }
570   fe_mul_ttt(&t1, &t1, &t0);
571   fe_sq_tt(&t2, &t1);
572   for (i = 1; i < 20; ++i) {
573     fe_sq_tt(&t2, &t2);
574   }
575   fe_mul_ttt(&t1, &t2, &t1);
576   fe_sq_tt(&t1, &t1);
577   for (i = 1; i < 10; ++i) {
578     fe_sq_tt(&t1, &t1);
579   }
580   fe_mul_ttt(&t0, &t1, &t0);
581   fe_sq_tt(&t1, &t0);
582   for (i = 1; i < 50; ++i) {
583     fe_sq_tt(&t1, &t1);
584   }
585   fe_mul_ttt(&t1, &t1, &t0);
586   fe_sq_tt(&t2, &t1);
587   for (i = 1; i < 100; ++i) {
588     fe_sq_tt(&t2, &t2);
589   }
590   fe_mul_ttt(&t1, &t2, &t1);
591   fe_sq_tt(&t1, &t1);
592   for (i = 1; i < 50; ++i) {
593     fe_sq_tt(&t1, &t1);
594   }
595   fe_mul_ttt(&t0, &t1, &t0);
596   fe_sq_tt(&t0, &t0);
597   for (i = 1; i < 2; ++i) {
598     fe_sq_tt(&t0, &t0);
599   }
600   fe_mul_ttt(out, &t0, z);
601 }
602 
603 
604 // Group operations.
605 
x25519_ge_tobytes(uint8_t s[32],const ge_p2 * h)606 static void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) {
607   fe recip;
608   fe x;
609   fe y;
610 
611   fe_invert(&recip, &h->Z);
612   fe_mul_ttt(&x, &h->X, &recip);
613   fe_mul_ttt(&y, &h->Y, &recip);
614   fe_tobytes(s, &y);
615   s[31] ^= fe_isnegative(&x) << 7;
616 }
617 
x25519_ge_frombytes_vartime(ge_p3 * h,const uint8_t s[32])618 static int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) {
619   fe u;
620   fe_loose v;
621   fe v3;
622   fe vxx;
623   fe_loose check;
624 
625   fe_frombytes(&h->Y, s);
626   fe_1(&h->Z);
627   fe_sq_tt(&v3, &h->Y);
628   fe_mul_ttt(&vxx, &v3, &d);
629   fe_sub(&v, &v3, &h->Z);  // u = y^2-1
630   fe_carry(&u, &v);
631   fe_add(&v, &vxx, &h->Z);  // v = dy^2+1
632 
633   fe_sq_tl(&v3, &v);
634   fe_mul_ttl(&v3, &v3, &v);  // v3 = v^3
635   fe_sq_tt(&h->X, &v3);
636   fe_mul_ttl(&h->X, &h->X, &v);
637   fe_mul_ttt(&h->X, &h->X, &u);  // x = uv^7
638 
639   fe_pow22523(&h->X, &h->X);  // x = (uv^7)^((q-5)/8)
640   fe_mul_ttt(&h->X, &h->X, &v3);
641   fe_mul_ttt(&h->X, &h->X, &u);  // x = uv^3(uv^7)^((q-5)/8)
642 
643   fe_sq_tt(&vxx, &h->X);
644   fe_mul_ttl(&vxx, &vxx, &v);
645   fe_sub(&check, &vxx, &u);
646   if (fe_isnonzero(&check)) {
647     fe_add(&check, &vxx, &u);
648     if (fe_isnonzero(&check)) {
649       return 0;
650     }
651     fe_mul_ttt(&h->X, &h->X, &sqrtm1);
652   }
653 
654   if (fe_isnegative(&h->X) != (s[31] >> 7)) {
655     fe_loose t;
656     fe_neg(&t, &h->X);
657     fe_carry(&h->X, &t);
658   }
659 
660   fe_mul_ttt(&h->T, &h->X, &h->Y);
661   return 1;
662 }
663 
ge_p2_0(ge_p2 * h)664 static void ge_p2_0(ge_p2 *h) {
665   fe_0(&h->X);
666   fe_1(&h->Y);
667   fe_1(&h->Z);
668 }
669 
ge_p3_0(ge_p3 * h)670 static void ge_p3_0(ge_p3 *h) {
671   fe_0(&h->X);
672   fe_1(&h->Y);
673   fe_1(&h->Z);
674   fe_0(&h->T);
675 }
676 
ge_cached_0(ge_cached * h)677 static void ge_cached_0(ge_cached *h) {
678   fe_loose_1(&h->YplusX);
679   fe_loose_1(&h->YminusX);
680   fe_loose_1(&h->Z);
681   fe_loose_0(&h->T2d);
682 }
683 
ge_precomp_0(ge_precomp * h)684 static void ge_precomp_0(ge_precomp *h) {
685   fe_loose_1(&h->yplusx);
686   fe_loose_1(&h->yminusx);
687   fe_loose_0(&h->xy2d);
688 }
689 
690 // r = p
ge_p3_to_p2(ge_p2 * r,const ge_p3 * p)691 static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) {
692   fe_copy(&r->X, &p->X);
693   fe_copy(&r->Y, &p->Y);
694   fe_copy(&r->Z, &p->Z);
695 }
696 
697 // r = p
x25519_ge_p3_to_cached(ge_cached * r,const ge_p3 * p)698 static void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) {
699   fe_add(&r->YplusX, &p->Y, &p->X);
700   fe_sub(&r->YminusX, &p->Y, &p->X);
701   fe_copy_lt(&r->Z, &p->Z);
702   fe_mul_ltt(&r->T2d, &p->T, &d2);
703 }
704 
705 // r = p
x25519_ge_p1p1_to_p2(ge_p2 * r,const ge_p1p1 * p)706 static void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) {
707   fe_mul_tll(&r->X, &p->X, &p->T);
708   fe_mul_tll(&r->Y, &p->Y, &p->Z);
709   fe_mul_tll(&r->Z, &p->Z, &p->T);
710 }
711 
712 // r = p
x25519_ge_p1p1_to_p3(ge_p3 * r,const ge_p1p1 * p)713 static void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) {
714   fe_mul_tll(&r->X, &p->X, &p->T);
715   fe_mul_tll(&r->Y, &p->Y, &p->Z);
716   fe_mul_tll(&r->Z, &p->Z, &p->T);
717   fe_mul_tll(&r->T, &p->X, &p->Y);
718 }
719 
720 // r = p
ge_p1p1_to_cached(ge_cached * r,const ge_p1p1 * p)721 static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) {
722   ge_p3 t;
723   x25519_ge_p1p1_to_p3(&t, p);
724   x25519_ge_p3_to_cached(r, &t);
725 }
726 
727 // r = 2 * p
ge_p2_dbl(ge_p1p1 * r,const ge_p2 * p)728 static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) {
729   fe trX, trZ, trT;
730   fe t0;
731 
732   fe_sq_tt(&trX, &p->X);
733   fe_sq_tt(&trZ, &p->Y);
734   fe_sq2_tt(&trT, &p->Z);
735   fe_add(&r->Y, &p->X, &p->Y);
736   fe_sq_tl(&t0, &r->Y);
737 
738   fe_add(&r->Y, &trZ, &trX);
739   fe_sub(&r->Z, &trZ, &trX);
740   fe_carry(&trZ, &r->Y);
741   fe_sub(&r->X, &t0, &trZ);
742   fe_carry(&trZ, &r->Z);
743   fe_sub(&r->T, &trT, &trZ);
744 }
745 
746 #ifndef CONFIG_SMALL
747 // r = 2 * p
ge_p3_dbl(ge_p1p1 * r,const ge_p3 * p)748 static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) {
749   ge_p2 q;
750   ge_p3_to_p2(&q, p);
751   ge_p2_dbl(r, &q);
752 }
753 #endif
754 
755 // r = p + q
ge_madd(ge_p1p1 * r,const ge_p3 * p,const ge_precomp * q)756 static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) {
757   fe trY, trZ, trT;
758 
759   fe_add(&r->X, &p->Y, &p->X);
760   fe_sub(&r->Y, &p->Y, &p->X);
761   fe_mul_tll(&trZ, &r->X, &q->yplusx);
762   fe_mul_tll(&trY, &r->Y, &q->yminusx);
763   fe_mul_tlt(&trT, &q->xy2d, &p->T);
764   fe_add(&r->T, &p->Z, &p->Z);
765   fe_sub(&r->X, &trZ, &trY);
766   fe_add(&r->Y, &trZ, &trY);
767   fe_carry(&trZ, &r->T);
768   fe_add(&r->Z, &trZ, &trT);
769   fe_sub(&r->T, &trZ, &trT);
770 }
771 
772 // r = p + q
x25519_ge_add(ge_p1p1 * r,const ge_p3 * p,const ge_cached * q)773 static void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
774   fe trX, trY, trZ, trT;
775 
776   fe_add(&r->X, &p->Y, &p->X);
777   fe_sub(&r->Y, &p->Y, &p->X);
778   fe_mul_tll(&trZ, &r->X, &q->YplusX);
779   fe_mul_tll(&trY, &r->Y, &q->YminusX);
780   fe_mul_tlt(&trT, &q->T2d, &p->T);
781   fe_mul_ttl(&trX, &p->Z, &q->Z);
782   fe_add(&r->T, &trX, &trX);
783   fe_sub(&r->X, &trZ, &trY);
784   fe_add(&r->Y, &trZ, &trY);
785   fe_carry(&trZ, &r->T);
786   fe_add(&r->Z, &trZ, &trT);
787   fe_sub(&r->T, &trZ, &trT);
788 }
789 
790 // r = p - q
x25519_ge_sub(ge_p1p1 * r,const ge_p3 * p,const ge_cached * q)791 static void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) {
792   fe trX, trY, trZ, trT;
793 
794   fe_add(&r->X, &p->Y, &p->X);
795   fe_sub(&r->Y, &p->Y, &p->X);
796   fe_mul_tll(&trZ, &r->X, &q->YminusX);
797   fe_mul_tll(&trY, &r->Y, &q->YplusX);
798   fe_mul_tlt(&trT, &q->T2d, &p->T);
799   fe_mul_ttl(&trX, &p->Z, &q->Z);
800   fe_add(&r->T, &trX, &trX);
801   fe_sub(&r->X, &trZ, &trY);
802   fe_add(&r->Y, &trZ, &trY);
803   fe_carry(&trZ, &r->T);
804   fe_sub(&r->Z, &trZ, &trT);
805   fe_add(&r->T, &trZ, &trT);
806 }
807 
equal(signed char b,signed char c)808 static uint8_t equal(signed char b, signed char c) {
809   uint8_t ub = b;
810   uint8_t uc = c;
811   uint8_t x = ub ^ uc;  // 0: yes; 1..255: no
812   uint32_t y = x;       // 0: yes; 1..255: no
813   y -= 1;               // 4294967295: yes; 0..254: no
814   y >>= 31;             // 1: yes; 0: no
815   return y;
816 }
817 
cmov(ge_precomp * t,const ge_precomp * u,uint8_t b)818 static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) {
819   fe_cmov(&t->yplusx, &u->yplusx, b);
820   fe_cmov(&t->yminusx, &u->yminusx, b);
821   fe_cmov(&t->xy2d, &u->xy2d, b);
822 }
823 
x25519_ge_scalarmult_small_precomp(ge_p3 * h,const uint8_t a[32],const uint8_t precomp_table[15* 2* 32])824 static void x25519_ge_scalarmult_small_precomp(
825     ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) {
826   // precomp_table is first expanded into matching |ge_precomp|
827   // elements.
828   ge_precomp multiples[15];
829 
830   unsigned i;
831   for (i = 0; i < 15; i++) {
832     // The precomputed table is assumed to already clear the top bit, so
833     // |fe_frombytes_strict| may be used directly.
834     const uint8_t *bytes = &precomp_table[i*(2 * 32)];
835     fe x, y;
836     fe_frombytes_strict(&x, bytes);
837     fe_frombytes_strict(&y, bytes + 32);
838 
839     ge_precomp *out = &multiples[i];
840     fe_add(&out->yplusx, &y, &x);
841     fe_sub(&out->yminusx, &y, &x);
842     fe_mul_ltt(&out->xy2d, &x, &y);
843     fe_mul_llt(&out->xy2d, &out->xy2d, &d2);
844   }
845 
846   // See the comment above |k25519SmallPrecomp| about the structure of the
847   // precomputed elements. This loop does 64 additions and 64 doublings to
848   // calculate the result.
849   ge_p3_0(h);
850 
851   for (i = 63; i < 64; i--) {
852     unsigned j;
853     signed char index = 0;
854 
855     for (j = 0; j < 4; j++) {
856       const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7));
857       index |= (bit << j);
858     }
859 
860     ge_precomp e;
861     ge_precomp_0(&e);
862 
863     for (j = 1; j < 16; j++) {
864       cmov(&e, &multiples[j-1], equal(index, j));
865     }
866 
867     ge_cached cached;
868     ge_p1p1 r;
869     x25519_ge_p3_to_cached(&cached, h);
870     x25519_ge_add(&r, h, &cached);
871     x25519_ge_p1p1_to_p3(h, &r);
872 
873     ge_madd(&r, h, &e);
874     x25519_ge_p1p1_to_p3(h, &r);
875   }
876 }
877 
878 #if defined(CONFIG_SMALL)
879 
x25519_ge_scalarmult_base(ge_p3 * h,const uint8_t a[32])880 static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) {
881   x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp);
882 }
883 
884 #else
885 
negative(signed char b)886 static uint8_t negative(signed char b) {
887   uint32_t x = b;
888   x >>= 31;  // 1: yes; 0: no
889   return x;
890 }
891 
table_select(ge_precomp * t,int pos,signed char b)892 static void table_select(ge_precomp *t, int pos, signed char b) {
893   ge_precomp minust;
894   uint8_t bnegative = negative(b);
895   uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1);
896 
897   ge_precomp_0(t);
898   cmov(t, &k25519Precomp[pos][0], equal(babs, 1));
899   cmov(t, &k25519Precomp[pos][1], equal(babs, 2));
900   cmov(t, &k25519Precomp[pos][2], equal(babs, 3));
901   cmov(t, &k25519Precomp[pos][3], equal(babs, 4));
902   cmov(t, &k25519Precomp[pos][4], equal(babs, 5));
903   cmov(t, &k25519Precomp[pos][5], equal(babs, 6));
904   cmov(t, &k25519Precomp[pos][6], equal(babs, 7));
905   cmov(t, &k25519Precomp[pos][7], equal(babs, 8));
906   fe_copy_ll(&minust.yplusx, &t->yminusx);
907   fe_copy_ll(&minust.yminusx, &t->yplusx);
908 
909   // NOTE: the input table is canonical, but types don't encode it
910   fe tmp;
911   fe_carry(&tmp, &t->xy2d);
912   fe_neg(&minust.xy2d, &tmp);
913 
914   cmov(t, &minust, bnegative);
915 }
916 
917 // h = a * B
918 // where a = a[0]+256*a[1]+...+256^31 a[31]
919 // B is the Ed25519 base point (x,4/5) with x positive.
920 //
921 // Preconditions:
922 //   a[31] <= 127
x25519_ge_scalarmult_base(ge_p3 * h,const uint8_t * a)923 static void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) {
924   signed char e[64];
925   signed char carry;
926   ge_p1p1 r;
927   ge_p2 s;
928   ge_precomp t;
929   int i;
930 
931   for (i = 0; i < 32; ++i) {
932     e[2 * i + 0] = (a[i] >> 0) & 15;
933     e[2 * i + 1] = (a[i] >> 4) & 15;
934   }
935   // each e[i] is between 0 and 15
936   // e[63] is between 0 and 7
937 
938   carry = 0;
939   for (i = 0; i < 63; ++i) {
940     e[i] += carry;
941     carry = e[i] + 8;
942     carry >>= 4;
943     e[i] -= carry << 4;
944   }
945   e[63] += carry;
946   // each e[i] is between -8 and 8
947 
948   ge_p3_0(h);
949   for (i = 1; i < 64; i += 2) {
950     table_select(&t, i / 2, e[i]);
951     ge_madd(&r, h, &t);
952     x25519_ge_p1p1_to_p3(h, &r);
953   }
954 
955   ge_p3_dbl(&r, h);
956   x25519_ge_p1p1_to_p2(&s, &r);
957   ge_p2_dbl(&r, &s);
958   x25519_ge_p1p1_to_p2(&s, &r);
959   ge_p2_dbl(&r, &s);
960   x25519_ge_p1p1_to_p2(&s, &r);
961   ge_p2_dbl(&r, &s);
962   x25519_ge_p1p1_to_p3(h, &r);
963 
964   for (i = 0; i < 64; i += 2) {
965     table_select(&t, i / 2, e[i]);
966     ge_madd(&r, h, &t);
967     x25519_ge_p1p1_to_p3(h, &r);
968   }
969 }
970 
971 #endif
972 
cmov_cached(ge_cached * t,ge_cached * u,uint8_t b)973 static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) {
974   fe_cmov(&t->YplusX, &u->YplusX, b);
975   fe_cmov(&t->YminusX, &u->YminusX, b);
976   fe_cmov(&t->Z, &u->Z, b);
977   fe_cmov(&t->T2d, &u->T2d, b);
978 }
979 
980 // r = scalar * A.
981 // where a = a[0]+256*a[1]+...+256^31 a[31].
x25519_ge_scalarmult(ge_p2 * r,const uint8_t * scalar,const ge_p3 * A)982 static void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar,
983                                  const ge_p3 *A) {
984   ge_p2 Ai_p2[8];
985   ge_cached Ai[16];
986   ge_p1p1 t;
987 
988   ge_cached_0(&Ai[0]);
989   x25519_ge_p3_to_cached(&Ai[1], A);
990   ge_p3_to_p2(&Ai_p2[1], A);
991 
992   unsigned i;
993   for (i = 2; i < 16; i += 2) {
994     ge_p2_dbl(&t, &Ai_p2[i / 2]);
995     ge_p1p1_to_cached(&Ai[i], &t);
996     if (i < 8) {
997       x25519_ge_p1p1_to_p2(&Ai_p2[i], &t);
998     }
999     x25519_ge_add(&t, A, &Ai[i]);
1000     ge_p1p1_to_cached(&Ai[i + 1], &t);
1001     if (i < 7) {
1002       x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t);
1003     }
1004   }
1005 
1006   ge_p2_0(r);
1007   ge_p3 u;
1008 
1009   for (i = 0; i < 256; i += 4) {
1010     ge_p2_dbl(&t, r);
1011     x25519_ge_p1p1_to_p2(r, &t);
1012     ge_p2_dbl(&t, r);
1013     x25519_ge_p1p1_to_p2(r, &t);
1014     ge_p2_dbl(&t, r);
1015     x25519_ge_p1p1_to_p2(r, &t);
1016     ge_p2_dbl(&t, r);
1017     x25519_ge_p1p1_to_p3(&u, &t);
1018 
1019     uint8_t index = scalar[31 - i/8];
1020     index >>= 4 - (i & 4);
1021     index &= 0xf;
1022 
1023     unsigned j;
1024     ge_cached selected;
1025     ge_cached_0(&selected);
1026     for (j = 0; j < 16; j++) {
1027       cmov_cached(&selected, &Ai[j], equal(j, index));
1028     }
1029 
1030     x25519_ge_add(&t, &u, &selected);
1031     x25519_ge_p1p1_to_p2(r, &t);
1032   }
1033 }
1034 
1035 // int64_lshift21 returns |a << 21| but is defined when shifting bits into the
1036 // sign bit. This works around a language flaw in C.
int64_lshift21(int64_t a)1037 static inline int64_t int64_lshift21(int64_t a) {
1038   return (int64_t)((uint64_t)a << 21);
1039 }
1040 
1041 // The set of scalars is \Z/l
1042 // where l = 2^252 + 27742317777372353535851937790883648493.
1043 
1044 // Input:
1045 //   s[0]+256*s[1]+...+256^63*s[63] = s
1046 //
1047 // Output:
1048 //   s[0]+256*s[1]+...+256^31*s[31] = s mod l
1049 //   where l = 2^252 + 27742317777372353535851937790883648493.
1050 //   Overwrites s in place.
x25519_sc_reduce(uint8_t s[64])1051 static void x25519_sc_reduce(uint8_t s[64]) {
1052   int64_t s0 = 2097151 & load_3(s);
1053   int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
1054   int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
1055   int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
1056   int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
1057   int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
1058   int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
1059   int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
1060   int64_t s8 = 2097151 & load_3(s + 21);
1061   int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
1062   int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1063   int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1064   int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1065   int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1066   int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1067   int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1068   int64_t s16 = 2097151 & load_3(s + 42);
1069   int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1070   int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1071   int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1072   int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1073   int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1074   int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1075   int64_t s23 = (load_4(s + 60) >> 3);
1076   int64_t carry0;
1077   int64_t carry1;
1078   int64_t carry2;
1079   int64_t carry3;
1080   int64_t carry4;
1081   int64_t carry5;
1082   int64_t carry6;
1083   int64_t carry7;
1084   int64_t carry8;
1085   int64_t carry9;
1086   int64_t carry10;
1087   int64_t carry11;
1088   int64_t carry12;
1089   int64_t carry13;
1090   int64_t carry14;
1091   int64_t carry15;
1092   int64_t carry16;
1093 
1094   s11 += s23 * 666643;
1095   s12 += s23 * 470296;
1096   s13 += s23 * 654183;
1097   s14 -= s23 * 997805;
1098   s15 += s23 * 136657;
1099   s16 -= s23 * 683901;
1100   s23 = 0;
1101 
1102   s10 += s22 * 666643;
1103   s11 += s22 * 470296;
1104   s12 += s22 * 654183;
1105   s13 -= s22 * 997805;
1106   s14 += s22 * 136657;
1107   s15 -= s22 * 683901;
1108   s22 = 0;
1109 
1110   s9 += s21 * 666643;
1111   s10 += s21 * 470296;
1112   s11 += s21 * 654183;
1113   s12 -= s21 * 997805;
1114   s13 += s21 * 136657;
1115   s14 -= s21 * 683901;
1116   s21 = 0;
1117 
1118   s8 += s20 * 666643;
1119   s9 += s20 * 470296;
1120   s10 += s20 * 654183;
1121   s11 -= s20 * 997805;
1122   s12 += s20 * 136657;
1123   s13 -= s20 * 683901;
1124   s20 = 0;
1125 
1126   s7 += s19 * 666643;
1127   s8 += s19 * 470296;
1128   s9 += s19 * 654183;
1129   s10 -= s19 * 997805;
1130   s11 += s19 * 136657;
1131   s12 -= s19 * 683901;
1132   s19 = 0;
1133 
1134   s6 += s18 * 666643;
1135   s7 += s18 * 470296;
1136   s8 += s18 * 654183;
1137   s9 -= s18 * 997805;
1138   s10 += s18 * 136657;
1139   s11 -= s18 * 683901;
1140   s18 = 0;
1141 
1142   carry6 = (s6 + (1 << 20)) >> 21;
1143   s7 += carry6;
1144   s6 -= int64_lshift21(carry6);
1145   carry8 = (s8 + (1 << 20)) >> 21;
1146   s9 += carry8;
1147   s8 -= int64_lshift21(carry8);
1148   carry10 = (s10 + (1 << 20)) >> 21;
1149   s11 += carry10;
1150   s10 -= int64_lshift21(carry10);
1151   carry12 = (s12 + (1 << 20)) >> 21;
1152   s13 += carry12;
1153   s12 -= int64_lshift21(carry12);
1154   carry14 = (s14 + (1 << 20)) >> 21;
1155   s15 += carry14;
1156   s14 -= int64_lshift21(carry14);
1157   carry16 = (s16 + (1 << 20)) >> 21;
1158   s17 += carry16;
1159   s16 -= int64_lshift21(carry16);
1160 
1161   carry7 = (s7 + (1 << 20)) >> 21;
1162   s8 += carry7;
1163   s7 -= int64_lshift21(carry7);
1164   carry9 = (s9 + (1 << 20)) >> 21;
1165   s10 += carry9;
1166   s9 -= int64_lshift21(carry9);
1167   carry11 = (s11 + (1 << 20)) >> 21;
1168   s12 += carry11;
1169   s11 -= int64_lshift21(carry11);
1170   carry13 = (s13 + (1 << 20)) >> 21;
1171   s14 += carry13;
1172   s13 -= int64_lshift21(carry13);
1173   carry15 = (s15 + (1 << 20)) >> 21;
1174   s16 += carry15;
1175   s15 -= int64_lshift21(carry15);
1176 
1177   s5 += s17 * 666643;
1178   s6 += s17 * 470296;
1179   s7 += s17 * 654183;
1180   s8 -= s17 * 997805;
1181   s9 += s17 * 136657;
1182   s10 -= s17 * 683901;
1183   s17 = 0;
1184 
1185   s4 += s16 * 666643;
1186   s5 += s16 * 470296;
1187   s6 += s16 * 654183;
1188   s7 -= s16 * 997805;
1189   s8 += s16 * 136657;
1190   s9 -= s16 * 683901;
1191   s16 = 0;
1192 
1193   s3 += s15 * 666643;
1194   s4 += s15 * 470296;
1195   s5 += s15 * 654183;
1196   s6 -= s15 * 997805;
1197   s7 += s15 * 136657;
1198   s8 -= s15 * 683901;
1199   s15 = 0;
1200 
1201   s2 += s14 * 666643;
1202   s3 += s14 * 470296;
1203   s4 += s14 * 654183;
1204   s5 -= s14 * 997805;
1205   s6 += s14 * 136657;
1206   s7 -= s14 * 683901;
1207   s14 = 0;
1208 
1209   s1 += s13 * 666643;
1210   s2 += s13 * 470296;
1211   s3 += s13 * 654183;
1212   s4 -= s13 * 997805;
1213   s5 += s13 * 136657;
1214   s6 -= s13 * 683901;
1215   s13 = 0;
1216 
1217   s0 += s12 * 666643;
1218   s1 += s12 * 470296;
1219   s2 += s12 * 654183;
1220   s3 -= s12 * 997805;
1221   s4 += s12 * 136657;
1222   s5 -= s12 * 683901;
1223   s12 = 0;
1224 
1225   carry0 = (s0 + (1 << 20)) >> 21;
1226   s1 += carry0;
1227   s0 -= int64_lshift21(carry0);
1228   carry2 = (s2 + (1 << 20)) >> 21;
1229   s3 += carry2;
1230   s2 -= int64_lshift21(carry2);
1231   carry4 = (s4 + (1 << 20)) >> 21;
1232   s5 += carry4;
1233   s4 -= int64_lshift21(carry4);
1234   carry6 = (s6 + (1 << 20)) >> 21;
1235   s7 += carry6;
1236   s6 -= int64_lshift21(carry6);
1237   carry8 = (s8 + (1 << 20)) >> 21;
1238   s9 += carry8;
1239   s8 -= int64_lshift21(carry8);
1240   carry10 = (s10 + (1 << 20)) >> 21;
1241   s11 += carry10;
1242   s10 -= int64_lshift21(carry10);
1243 
1244   carry1 = (s1 + (1 << 20)) >> 21;
1245   s2 += carry1;
1246   s1 -= int64_lshift21(carry1);
1247   carry3 = (s3 + (1 << 20)) >> 21;
1248   s4 += carry3;
1249   s3 -= int64_lshift21(carry3);
1250   carry5 = (s5 + (1 << 20)) >> 21;
1251   s6 += carry5;
1252   s5 -= int64_lshift21(carry5);
1253   carry7 = (s7 + (1 << 20)) >> 21;
1254   s8 += carry7;
1255   s7 -= int64_lshift21(carry7);
1256   carry9 = (s9 + (1 << 20)) >> 21;
1257   s10 += carry9;
1258   s9 -= int64_lshift21(carry9);
1259   carry11 = (s11 + (1 << 20)) >> 21;
1260   s12 += carry11;
1261   s11 -= int64_lshift21(carry11);
1262 
1263   s0 += s12 * 666643;
1264   s1 += s12 * 470296;
1265   s2 += s12 * 654183;
1266   s3 -= s12 * 997805;
1267   s4 += s12 * 136657;
1268   s5 -= s12 * 683901;
1269   s12 = 0;
1270 
1271   carry0 = s0 >> 21;
1272   s1 += carry0;
1273   s0 -= int64_lshift21(carry0);
1274   carry1 = s1 >> 21;
1275   s2 += carry1;
1276   s1 -= int64_lshift21(carry1);
1277   carry2 = s2 >> 21;
1278   s3 += carry2;
1279   s2 -= int64_lshift21(carry2);
1280   carry3 = s3 >> 21;
1281   s4 += carry3;
1282   s3 -= int64_lshift21(carry3);
1283   carry4 = s4 >> 21;
1284   s5 += carry4;
1285   s4 -= int64_lshift21(carry4);
1286   carry5 = s5 >> 21;
1287   s6 += carry5;
1288   s5 -= int64_lshift21(carry5);
1289   carry6 = s6 >> 21;
1290   s7 += carry6;
1291   s6 -= int64_lshift21(carry6);
1292   carry7 = s7 >> 21;
1293   s8 += carry7;
1294   s7 -= int64_lshift21(carry7);
1295   carry8 = s8 >> 21;
1296   s9 += carry8;
1297   s8 -= int64_lshift21(carry8);
1298   carry9 = s9 >> 21;
1299   s10 += carry9;
1300   s9 -= int64_lshift21(carry9);
1301   carry10 = s10 >> 21;
1302   s11 += carry10;
1303   s10 -= int64_lshift21(carry10);
1304   carry11 = s11 >> 21;
1305   s12 += carry11;
1306   s11 -= int64_lshift21(carry11);
1307 
1308   s0 += s12 * 666643;
1309   s1 += s12 * 470296;
1310   s2 += s12 * 654183;
1311   s3 -= s12 * 997805;
1312   s4 += s12 * 136657;
1313   s5 -= s12 * 683901;
1314   s12 = 0;
1315 
1316   carry0 = s0 >> 21;
1317   s1 += carry0;
1318   s0 -= int64_lshift21(carry0);
1319   carry1 = s1 >> 21;
1320   s2 += carry1;
1321   s1 -= int64_lshift21(carry1);
1322   carry2 = s2 >> 21;
1323   s3 += carry2;
1324   s2 -= int64_lshift21(carry2);
1325   carry3 = s3 >> 21;
1326   s4 += carry3;
1327   s3 -= int64_lshift21(carry3);
1328   carry4 = s4 >> 21;
1329   s5 += carry4;
1330   s4 -= int64_lshift21(carry4);
1331   carry5 = s5 >> 21;
1332   s6 += carry5;
1333   s5 -= int64_lshift21(carry5);
1334   carry6 = s6 >> 21;
1335   s7 += carry6;
1336   s6 -= int64_lshift21(carry6);
1337   carry7 = s7 >> 21;
1338   s8 += carry7;
1339   s7 -= int64_lshift21(carry7);
1340   carry8 = s8 >> 21;
1341   s9 += carry8;
1342   s8 -= int64_lshift21(carry8);
1343   carry9 = s9 >> 21;
1344   s10 += carry9;
1345   s9 -= int64_lshift21(carry9);
1346   carry10 = s10 >> 21;
1347   s11 += carry10;
1348   s10 -= int64_lshift21(carry10);
1349 
1350   s[0] = s0 >> 0;
1351   s[1] = s0 >> 8;
1352   s[2] = (s0 >> 16) | (s1 << 5);
1353   s[3] = s1 >> 3;
1354   s[4] = s1 >> 11;
1355   s[5] = (s1 >> 19) | (s2 << 2);
1356   s[6] = s2 >> 6;
1357   s[7] = (s2 >> 14) | (s3 << 7);
1358   s[8] = s3 >> 1;
1359   s[9] = s3 >> 9;
1360   s[10] = (s3 >> 17) | (s4 << 4);
1361   s[11] = s4 >> 4;
1362   s[12] = s4 >> 12;
1363   s[13] = (s4 >> 20) | (s5 << 1);
1364   s[14] = s5 >> 7;
1365   s[15] = (s5 >> 15) | (s6 << 6);
1366   s[16] = s6 >> 2;
1367   s[17] = s6 >> 10;
1368   s[18] = (s6 >> 18) | (s7 << 3);
1369   s[19] = s7 >> 5;
1370   s[20] = s7 >> 13;
1371   s[21] = s8 >> 0;
1372   s[22] = s8 >> 8;
1373   s[23] = (s8 >> 16) | (s9 << 5);
1374   s[24] = s9 >> 3;
1375   s[25] = s9 >> 11;
1376   s[26] = (s9 >> 19) | (s10 << 2);
1377   s[27] = s10 >> 6;
1378   s[28] = (s10 >> 14) | (s11 << 7);
1379   s[29] = s11 >> 1;
1380   s[30] = s11 >> 9;
1381   s[31] = s11 >> 17;
1382 }
1383 
1384 /* Loosely from BoringSSL crypto/curve25519/spake25519.c */
1385 
1386 /*
1387  * Here BoringSSL uses different points, not restricted to the generator
1388  * subgroup, while we use the draft-irtf-cfrg-spake2-05 points.  The Python
1389  * code is modified to add the subgroup restriction.
1390  */
1391 
1392 // The following precomputation tables are for the following
1393 // points:
1394 //
1395 // N (found in 7 iterations):
1396 //   x: 10742253510813957597047979962966927467575235974254765187031601461055699024931
1397 //   y: 19796686047937480651099107989427797822652529149428697746066532921705571401683
1398 //   encoded: d3bfb518f44f3430f29d0c92af503865a1ed3281dc69b35dd868ba85f886c4ab
1399 //
1400 // M (found in 21 iterations):
1401 //   x: 8158688967149231307266666683326742915289288280191350817196911733632187385319
1402 //   y: 21622333750659878624441478467798461427617029906629724657331223068277098105040
1403 //   encoded: d048032c6ea0b6d697ddc2e86bda85a33adac920f1bf18e1b0c6d166a5cecdaf
1404 //
1405 // These points and their precomputation tables are generated with the
1406 // following Python code.
1407 
1408 /*
1409 import hashlib
1410 import ed25519 as E  # https://ed25519.cr.yp.to/python/ed25519.py
1411 
1412 SEED_N = 'edwards25519 point generation seed (N)'
1413 SEED_M = 'edwards25519 point generation seed (M)'
1414 
1415 def genpoint(seed):
1416     v = hashlib.sha256(seed).digest()
1417     it = 1
1418     while True:
1419         try:
1420             x,y = E.decodepoint(v)
1421             if E.scalarmult((x,y), E.l) != [0, 1]:
1422                 raise Exception('point has wrong order')
1423         except Exception, e:
1424             print e
1425             it += 1
1426             v = hashlib.sha256(v).digest()
1427             continue
1428         print "Found in %d iterations:" % it
1429         print "  x = %d" % x
1430         print "  y = %d" % y
1431         print " Encoded (hex)"
1432         print E.encodepoint((x,y)).encode('hex')
1433         return (x,y)
1434 
1435 def gentable(P):
1436     t = []
1437     for i in range(1,16):
1438         k = (i >> 3 & 1) * (1 << 192) + \
1439             (i >> 2 & 1) * (1 << 128) + \
1440             (i >> 1 & 1) * (1 <<  64) + \
1441             (i      & 1)
1442         t.append(E.scalarmult(P, k))
1443     return ''.join(E.encodeint(x) + E.encodeint(y) for (x,y) in t)
1444 
1445 def printtable(table, name):
1446     print "static const uint8_t %s[15 * 2 * 32] = {" % name,
1447     for i in range(15 * 2 * 32):
1448         if i % 12 == 0:
1449             print "\n   ",
1450         print " 0x%02x," % ord(table[i]),
1451     print "\n};"
1452 
1453 if __name__ == "__main__":
1454     print "Searching for N"
1455     N = genpoint(SEED_N)
1456     print "Generating precomputation table for N"
1457     Ntable = gentable(N)
1458     printtable(Ntable, "kSpakeNSmallPrecomp")
1459 
1460     print "Searching for M"
1461     M = genpoint(SEED_M)
1462     print "Generating precomputation table for M"
1463     Mtable = gentable(M)
1464     printtable(Mtable, "kSpakeMSmallPrecomp")
1465 */
1466 
1467 static const uint8_t kSpakeNSmallPrecomp[15 * 2 * 32] = {
1468     0x23, 0xfc, 0x27, 0x6c, 0x55, 0xaf, 0xb3, 0x9c, 0xd8, 0x99, 0x3a, 0x0d,
1469     0x7f, 0x08, 0xc9, 0xeb, 0x4d, 0x6e, 0x90, 0x99, 0x2f, 0x3c, 0x15, 0x2b,
1470     0x89, 0x5a, 0x0f, 0xf2, 0x67, 0xe6, 0xbf, 0x17, 0xd3, 0xbf, 0xb5, 0x18,
1471     0xf4, 0x4f, 0x34, 0x30, 0xf2, 0x9d, 0x0c, 0x92, 0xaf, 0x50, 0x38, 0x65,
1472     0xa1, 0xed, 0x32, 0x81, 0xdc, 0x69, 0xb3, 0x5d, 0xd8, 0x68, 0xba, 0x85,
1473     0xf8, 0x86, 0xc4, 0x2b, 0x53, 0x93, 0xb1, 0x99, 0x90, 0x30, 0xca, 0xb0,
1474     0xbd, 0xea, 0x14, 0x4c, 0x6f, 0x2b, 0x81, 0x1e, 0x23, 0x45, 0xb2, 0x32,
1475     0x2e, 0x2d, 0xe6, 0xb8, 0x5d, 0xc5, 0x15, 0x91, 0x63, 0x39, 0x18, 0x5b,
1476     0x62, 0x63, 0x9b, 0xf4, 0x8b, 0xe0, 0x34, 0xa2, 0x95, 0x11, 0x92, 0x68,
1477     0x54, 0xb7, 0xf3, 0x91, 0xca, 0x22, 0xad, 0x08, 0xd8, 0x9c, 0xa2, 0xf0,
1478     0xdc, 0x9c, 0x2c, 0x84, 0x32, 0x26, 0xe0, 0x17, 0x89, 0x53, 0x6b, 0xfd,
1479     0x76, 0x97, 0x25, 0xea, 0x99, 0x94, 0xf8, 0x29, 0x7c, 0xc4, 0x53, 0xc0,
1480     0x98, 0x9a, 0x20, 0xdc, 0x70, 0x01, 0x50, 0xaa, 0x05, 0xa3, 0x40, 0x50,
1481     0x66, 0x87, 0x30, 0x19, 0x12, 0xc3, 0xb8, 0x2d, 0x28, 0x8b, 0x7b, 0x48,
1482     0xf7, 0x7b, 0xab, 0x45, 0x70, 0x2e, 0xbb, 0x85, 0xc1, 0x6c, 0xdd, 0x35,
1483     0x00, 0x83, 0x20, 0x13, 0x82, 0x08, 0xaa, 0xa3, 0x03, 0x0f, 0xca, 0x27,
1484     0x3e, 0x8b, 0x52, 0xc2, 0xd7, 0xb1, 0x8c, 0x22, 0xfe, 0x04, 0x4a, 0xf2,
1485     0xe8, 0xac, 0xee, 0x2e, 0xd7, 0x77, 0x34, 0x49, 0xf2, 0xe9, 0xeb, 0x8c,
1486     0xa6, 0xc8, 0xc6, 0xcd, 0x8a, 0x8f, 0x7c, 0x5d, 0x51, 0xc8, 0xfa, 0x6f,
1487     0xb3, 0x93, 0xdb, 0x71, 0xef, 0x3e, 0x6e, 0xa7, 0x85, 0xc7, 0xd4, 0x3e,
1488     0xa2, 0xe2, 0xc0, 0xaa, 0x17, 0xb3, 0xa4, 0x7c, 0xc2, 0x3f, 0x7c, 0x7a,
1489     0xdd, 0x26, 0xde, 0x3e, 0xf1, 0x99, 0x06, 0xf7, 0x69, 0x1b, 0xc9, 0x20,
1490     0x55, 0x4f, 0x86, 0x7a, 0x93, 0x89, 0x68, 0xe9, 0x2b, 0x2d, 0xbc, 0x08,
1491     0x15, 0x5d, 0x2d, 0x0b, 0x4f, 0x1a, 0xb3, 0xd4, 0x8e, 0x77, 0x79, 0x2a,
1492     0x25, 0xf9, 0xb6, 0x46, 0xfb, 0x87, 0x02, 0xa6, 0xe0, 0xd3, 0xba, 0x84,
1493     0xea, 0x3e, 0x58, 0xa5, 0x7f, 0x8f, 0x8c, 0x39, 0x79, 0x28, 0xb5, 0xcf,
1494     0xe4, 0xca, 0x63, 0xdc, 0xac, 0xed, 0x4b, 0x74, 0x1e, 0x94, 0x85, 0x8c,
1495     0xe5, 0xf4, 0x76, 0x6f, 0x20, 0x67, 0x8b, 0xd8, 0xd6, 0x4b, 0xe7, 0x2d,
1496     0xa0, 0xbd, 0xcc, 0x1f, 0xdf, 0x46, 0x9c, 0xa2, 0x49, 0x64, 0xdf, 0x24,
1497     0x00, 0x11, 0x11, 0x45, 0x62, 0x5c, 0xd7, 0x8a, 0x00, 0x02, 0xf5, 0x9b,
1498     0x4f, 0x53, 0x42, 0xc5, 0xd5, 0x55, 0x80, 0x73, 0x9a, 0x5b, 0x31, 0x5a,
1499     0xbd, 0x3a, 0x43, 0xe9, 0x33, 0xe5, 0xaf, 0x1d, 0x92, 0x5e, 0x59, 0x37,
1500     0xae, 0x57, 0xfa, 0x3b, 0xd2, 0x31, 0xae, 0xa6, 0xf9, 0xc9, 0xc1, 0x82,
1501     0xa6, 0xa5, 0xed, 0x24, 0x53, 0x4b, 0x38, 0x22, 0xf2, 0x85, 0x8d, 0x13,
1502     0xa6, 0x5e, 0xd6, 0x57, 0x17, 0xd3, 0x33, 0x38, 0x8d, 0x65, 0xd3, 0xcb,
1503     0x1a, 0xa2, 0x3a, 0x2b, 0xbb, 0x61, 0x53, 0xd7, 0xff, 0xcd, 0x20, 0xb6,
1504     0xbb, 0x8c, 0xab, 0x63, 0xef, 0xb8, 0x26, 0x7e, 0x81, 0x65, 0xaf, 0x90,
1505     0xfc, 0xd2, 0xb6, 0x72, 0xdb, 0xe9, 0x23, 0x78, 0x12, 0x04, 0xc0, 0x03,
1506     0x82, 0xa8, 0x7a, 0x0f, 0x48, 0x6f, 0x82, 0x7f, 0x81, 0xcd, 0xa7, 0x89,
1507     0xdd, 0x86, 0xea, 0x5e, 0xa1, 0x50, 0x14, 0x34, 0x17, 0x64, 0x82, 0x0f,
1508     0xc4, 0x40, 0x20, 0x1d, 0x8f, 0xfe, 0xfa, 0x99, 0xaf, 0x5b, 0xc1, 0x5d,
1509     0xc8, 0x47, 0x07, 0x54, 0x4a, 0x22, 0x56, 0x57, 0xf1, 0x2c, 0x3b, 0x62,
1510     0x7f, 0x12, 0x62, 0xaf, 0xfd, 0xf8, 0x04, 0x11, 0xa8, 0x51, 0xf0, 0x46,
1511     0x5d, 0x79, 0x66, 0xff, 0x8a, 0x06, 0xef, 0x54, 0x64, 0x1b, 0x84, 0x3e,
1512     0x41, 0xf3, 0xfe, 0x19, 0x51, 0xf7, 0x44, 0x9c, 0x16, 0xd3, 0x7a, 0x09,
1513     0x59, 0xf5, 0x47, 0x45, 0xd0, 0x31, 0xef, 0x96, 0x2c, 0xc5, 0xc0, 0xd0,
1514     0x56, 0xef, 0x3f, 0x07, 0x2b, 0xb7, 0x28, 0x49, 0xf5, 0xb1, 0x42, 0x18,
1515     0xcf, 0x77, 0xd8, 0x2b, 0x71, 0x74, 0x80, 0xba, 0x34, 0x52, 0xce, 0x11,
1516     0xfe, 0xc4, 0xb9, 0xeb, 0xf9, 0xc4, 0x5e, 0x1f, 0xd3, 0xde, 0x4b, 0x14,
1517     0xe3, 0x6e, 0xe7, 0xd7, 0x83, 0x59, 0x98, 0xe8, 0x3d, 0x8e, 0xd6, 0x7d,
1518     0xc0, 0x9a, 0x79, 0xb9, 0x83, 0xf1, 0xc1, 0x00, 0x5d, 0x16, 0x1b, 0x44,
1519     0xe9, 0x02, 0xce, 0x99, 0x1e, 0x77, 0xef, 0xca, 0xbc, 0xf0, 0x6a, 0xb9,
1520     0x65, 0x3f, 0x3c, 0xd9, 0xe1, 0x63, 0x0b, 0xbf, 0xaa, 0xa7, 0xe6, 0x6d,
1521     0x6d, 0x3f, 0x44, 0x29, 0xa3, 0x8b, 0x6d, 0xc4, 0x81, 0xa9, 0xc3, 0x5a,
1522     0x90, 0x55, 0x72, 0x61, 0x17, 0x22, 0x7f, 0x3e, 0x5f, 0xfc, 0xba, 0xb3,
1523     0x7a, 0x99, 0x76, 0xe9, 0x20, 0xe5, 0xc5, 0xe8, 0x55, 0x56, 0x0f, 0x7a,
1524     0x48, 0xe7, 0xbc, 0xe1, 0x13, 0xf4, 0x90, 0xef, 0x97, 0x6c, 0x02, 0x89,
1525     0x4d, 0x22, 0x48, 0xda, 0xd3, 0x52, 0x45, 0x31, 0x26, 0xcc, 0xe8, 0x9e,
1526     0x5d, 0xdd, 0x75, 0xe4, 0x1d, 0xbc, 0xb1, 0x08, 0x55, 0xaf, 0x54, 0x70,
1527     0x0d, 0x0c, 0xf3, 0x50, 0xbc, 0x40, 0x83, 0xee, 0xdc, 0x6d, 0x8b, 0x40,
1528     0x79, 0x62, 0x18, 0x37, 0xc4, 0x78, 0x02, 0x58, 0x7c, 0x78, 0xd3, 0x54,
1529     0xed, 0x31, 0xbd, 0x7d, 0x48, 0xcf, 0xb6, 0x11, 0x27, 0x37, 0x9c, 0x86,
1530     0xf7, 0x2e, 0x00, 0x7a, 0x48, 0x1b, 0xa6, 0x72, 0x70, 0x7b, 0x44, 0x45,
1531     0xeb, 0x49, 0xbf, 0xbe, 0x09, 0x78, 0x66, 0x71, 0x12, 0x7f, 0x3d, 0x78,
1532     0x51, 0x24, 0x82, 0xa2, 0xf0, 0x1e, 0x83, 0x81, 0x81, 0x45, 0x53, 0xfd,
1533     0x5e, 0xf3, 0x03, 0x74, 0xbd, 0x23, 0x35, 0xf6, 0x10, 0xdd, 0x7c, 0x73,
1534     0x46, 0x32, 0x09, 0x54, 0x99, 0x95, 0x91, 0x25, 0xb8, 0x32, 0x09, 0xd8,
1535     0x2f, 0x97, 0x50, 0xa3, 0xf5, 0xd6, 0xb1, 0xed, 0x97, 0x51, 0x06, 0x42,
1536     0x12, 0x0c, 0x69, 0x38, 0x09, 0xa0, 0xd8, 0x19, 0x70, 0xf7, 0x8f, 0x61,
1537     0x0d, 0x56, 0x43, 0x66, 0x22, 0x8b, 0x0e, 0x0e, 0xf9, 0x81, 0x9f, 0xac,
1538     0x6f, 0xbf, 0x7d, 0x04, 0x13, 0xf2, 0xe4, 0xeb, 0xfd, 0xbe, 0x4e, 0x56,
1539     0xda, 0xe0, 0x22, 0x6d, 0x1b, 0x25, 0xc8, 0xa5, 0x9c, 0x05, 0x45, 0x52,
1540     0x3c, 0x3a, 0xde, 0x6b, 0xac, 0x9b, 0xf8, 0x81, 0x97, 0x21, 0x46, 0xac,
1541     0x7e, 0x89, 0xf8, 0x49, 0x58, 0xbb, 0x45, 0xac, 0xa2, 0xc4, 0x90, 0x1f,
1542     0xb2, 0xb4, 0xf8, 0xe0, 0xcd, 0xa1, 0x9d, 0x1c, 0xf2, 0xf1, 0xdf, 0xfb,
1543     0x88, 0x4e, 0xe5, 0x41, 0xd8, 0x6e, 0xac, 0x07, 0x87, 0x95, 0x35, 0xa6,
1544     0x12, 0x08, 0x5d, 0x57, 0x5e, 0xaf, 0x71, 0x0f, 0x07, 0x4e, 0x81, 0x77,
1545     0xf1, 0xef, 0xb5, 0x35, 0x5c, 0xfa, 0xf4, 0x4e, 0x42, 0xdc, 0x19, 0xfe,
1546     0xe4, 0xd2, 0xb4, 0x27, 0xfb, 0x34, 0x1f, 0xb2, 0x6f, 0xf2, 0x95, 0xcc,
1547     0xd4, 0x47, 0x63, 0xdc, 0x7e, 0x4f, 0x97, 0x2b, 0x7a, 0xe0, 0x80, 0x31,
1548 };
1549 
1550 static const uint8_t kSpakeMSmallPrecomp[15 * 2 * 32] = {
1551     0xe7, 0x45, 0x7e, 0x47, 0x49, 0x69, 0xbd, 0x1b, 0x35, 0x1c, 0x2c, 0x98,
1552     0x03, 0xf3, 0xb3, 0x37, 0xde, 0x39, 0xa5, 0xda, 0xc0, 0x2e, 0xa4, 0xac,
1553     0x7d, 0x08, 0x26, 0xfc, 0x80, 0xa7, 0x09, 0x12, 0xd0, 0x48, 0x03, 0x2c,
1554     0x6e, 0xa0, 0xb6, 0xd6, 0x97, 0xdd, 0xc2, 0xe8, 0x6b, 0xda, 0x85, 0xa3,
1555     0x3a, 0xda, 0xc9, 0x20, 0xf1, 0xbf, 0x18, 0xe1, 0xb0, 0xc6, 0xd1, 0x66,
1556     0xa5, 0xce, 0xcd, 0x2f, 0x80, 0xa8, 0x4e, 0xc3, 0x81, 0xae, 0x68, 0x3b,
1557     0x0d, 0xdb, 0x56, 0x32, 0x2f, 0xa8, 0x97, 0xa0, 0x5c, 0x15, 0xc1, 0xcb,
1558     0x6f, 0x7a, 0x5f, 0xc5, 0x32, 0xfb, 0x49, 0x17, 0x18, 0xfa, 0x85, 0x08,
1559     0x85, 0xf1, 0xe3, 0x11, 0x8e, 0x3d, 0x70, 0x20, 0x38, 0x4e, 0x0c, 0x17,
1560     0xa1, 0xa8, 0x20, 0xd2, 0xb1, 0x1d, 0x05, 0x8d, 0x0f, 0xc9, 0x96, 0x18,
1561     0x9d, 0x8c, 0x89, 0x8f, 0x46, 0x6a, 0x6c, 0x6e, 0x72, 0x03, 0xb2, 0x75,
1562     0x87, 0xd8, 0xa9, 0x60, 0x93, 0x2b, 0x8b, 0x66, 0xee, 0xaf, 0xce, 0x98,
1563     0xcd, 0x6b, 0x7c, 0x6a, 0xbe, 0x19, 0xda, 0x66, 0x7c, 0xda, 0x53, 0xa0,
1564     0xe3, 0x9a, 0x0e, 0x53, 0x3a, 0x7c, 0x73, 0x4a, 0x37, 0xa6, 0x53, 0x23,
1565     0x67, 0x31, 0xce, 0x8a, 0xab, 0xee, 0x72, 0x76, 0xc2, 0xb5, 0x54, 0x42,
1566     0xcf, 0x4b, 0xc7, 0x53, 0x24, 0x59, 0xaf, 0x76, 0x53, 0x10, 0x7e, 0x25,
1567     0x94, 0x5c, 0x23, 0xa6, 0x5e, 0x05, 0xea, 0x14, 0xad, 0x2b, 0xce, 0x50,
1568     0x77, 0xb3, 0x7a, 0x88, 0x4c, 0xf7, 0x74, 0x04, 0x35, 0xa4, 0x0c, 0x9e,
1569     0xee, 0x6a, 0x4c, 0x3c, 0xc1, 0x6a, 0x35, 0x4d, 0x6d, 0x8f, 0x94, 0x95,
1570     0xe4, 0x10, 0xca, 0x46, 0x4e, 0xfa, 0x38, 0x40, 0xeb, 0x1a, 0x1b, 0x5a,
1571     0xff, 0x73, 0x4d, 0xe9, 0xf2, 0xbe, 0x89, 0xf5, 0xd1, 0x72, 0xd0, 0x1a,
1572     0x7b, 0x82, 0x08, 0x19, 0xda, 0x54, 0x44, 0xa5, 0x3d, 0xd8, 0x10, 0x1c,
1573     0xcf, 0x3b, 0xc7, 0x54, 0xd5, 0x11, 0xd7, 0x2a, 0x69, 0x3f, 0xa6, 0x58,
1574     0x74, 0xfd, 0x90, 0xb2, 0xf4, 0xc2, 0x0e, 0xf3, 0x19, 0x8f, 0x51, 0x7c,
1575     0x31, 0x12, 0x79, 0x61, 0x16, 0xb4, 0x2f, 0x2f, 0xd0, 0x88, 0x97, 0xf2,
1576     0xc3, 0x8c, 0xa6, 0xa3, 0x29, 0xff, 0x7e, 0x12, 0x46, 0x2a, 0x9c, 0x09,
1577     0x7c, 0x5f, 0x87, 0x07, 0x6b, 0xa1, 0x9a, 0x57, 0x55, 0x8e, 0xb0, 0x56,
1578     0x5d, 0xc9, 0x4c, 0x5b, 0xae, 0xd3, 0xd0, 0x8e, 0xb8, 0xac, 0xba, 0xe8,
1579     0x54, 0x45, 0x30, 0x14, 0xf6, 0x59, 0x20, 0xc4, 0x03, 0xb7, 0x7a, 0x5d,
1580     0x6b, 0x5a, 0xcb, 0x28, 0x60, 0xf8, 0xef, 0x61, 0x60, 0x78, 0x6b, 0xf5,
1581     0x21, 0x4b, 0x75, 0xc2, 0x77, 0xba, 0x0e, 0x38, 0x98, 0xe0, 0xfb, 0xb7,
1582     0x5f, 0x75, 0x87, 0x04, 0x0c, 0xb4, 0x5c, 0x09, 0x04, 0x00, 0x38, 0x4e,
1583     0x4f, 0x7b, 0x73, 0xe5, 0xdb, 0xdb, 0xf1, 0xf4, 0x5c, 0x64, 0x68, 0xfd,
1584     0xb1, 0x86, 0xe8, 0x89, 0xbe, 0x9c, 0xd4, 0x96, 0x1d, 0xcb, 0xdc, 0x5c,
1585     0xef, 0xd4, 0x33, 0x28, 0xb9, 0xb6, 0xaf, 0x3b, 0xcf, 0x8d, 0x30, 0xba,
1586     0xe8, 0x08, 0xcf, 0x84, 0xba, 0x61, 0x10, 0x9b, 0x62, 0xf6, 0x18, 0x79,
1587     0x66, 0x87, 0x82, 0x7c, 0xaa, 0x71, 0xac, 0xd0, 0xd0, 0x32, 0xb0, 0x54,
1588     0x03, 0xa4, 0xad, 0x3f, 0x72, 0xca, 0x22, 0xff, 0x01, 0x87, 0x08, 0x36,
1589     0x61, 0x22, 0xaa, 0x18, 0xab, 0x3a, 0xbc, 0xf2, 0x78, 0x05, 0xe1, 0x99,
1590     0xa3, 0x59, 0x98, 0xcc, 0x21, 0xc6, 0x2b, 0x51, 0x6d, 0x43, 0x0a, 0x46,
1591     0x50, 0xae, 0x11, 0x7e, 0xd5, 0x23, 0x56, 0xef, 0x83, 0xc8, 0xbf, 0x42,
1592     0xf0, 0x45, 0x52, 0x1f, 0x34, 0xbc, 0x2f, 0xb0, 0xf0, 0xce, 0xf0, 0xec,
1593     0xd0, 0x99, 0x59, 0x2e, 0x1f, 0xab, 0xa8, 0x1e, 0x4b, 0xce, 0x1b, 0x9a,
1594     0x75, 0xc6, 0xc4, 0x71, 0x86, 0xf0, 0x8d, 0xec, 0xb0, 0x30, 0xb9, 0x62,
1595     0xb3, 0xb7, 0xdd, 0x96, 0x29, 0xc8, 0xbf, 0xe9, 0xb0, 0x74, 0x78, 0x7b,
1596     0xf7, 0xea, 0xa3, 0x14, 0x12, 0x56, 0xe0, 0xf3, 0x35, 0x7a, 0x26, 0x4a,
1597     0x4c, 0xe6, 0xdf, 0x13, 0xb5, 0x52, 0xb0, 0x2a, 0x5f, 0x2e, 0xac, 0x34,
1598     0xab, 0x5f, 0x1a, 0x01, 0xe4, 0x15, 0x1a, 0xd1, 0xbf, 0xc9, 0x95, 0x0a,
1599     0xac, 0x1d, 0xe7, 0x53, 0x59, 0x8d, 0xc3, 0x21, 0x78, 0x5e, 0x12, 0x97,
1600     0x8f, 0x4e, 0x1d, 0xf9, 0xe5, 0xe2, 0xc2, 0xc4, 0xba, 0xfb, 0x50, 0x96,
1601     0x5b, 0x43, 0xe8, 0xf7, 0x0d, 0x1b, 0x64, 0x58, 0xbe, 0xd3, 0x95, 0x7f,
1602     0x8e, 0xf1, 0x85, 0x35, 0xba, 0x25, 0x55, 0x2e, 0x02, 0x46, 0x5c, 0xad,
1603     0x1f, 0xc5, 0x03, 0xcc, 0xd0, 0x43, 0x4c, 0xf2, 0x5e, 0x64, 0x0a, 0x89,
1604     0xd9, 0xfd, 0x23, 0x7d, 0x4f, 0xbe, 0x2f, 0x0f, 0x1e, 0x12, 0x4a, 0xd9,
1605     0xf8, 0x82, 0xde, 0x8f, 0x4f, 0x98, 0xb9, 0x90, 0xf6, 0xfa, 0xd1, 0x11,
1606     0xa6, 0xdc, 0x7e, 0x32, 0x48, 0x6a, 0x8a, 0x14, 0x5e, 0x73, 0xb9, 0x6c,
1607     0x0e, 0xc2, 0xf9, 0xcc, 0xf0, 0x32, 0xc8, 0xb5, 0x56, 0xaa, 0x5d, 0xd2,
1608     0x07, 0xf1, 0x6f, 0x33, 0x6f, 0x05, 0x70, 0x49, 0x60, 0x49, 0x23, 0x23,
1609     0x14, 0x0e, 0x4c, 0x58, 0x92, 0xad, 0xa9, 0x50, 0xb1, 0x59, 0x43, 0x96,
1610     0x7b, 0xc1, 0x51, 0x45, 0xef, 0x0d, 0xef, 0xd1, 0xe4, 0xd0, 0xce, 0xdf,
1611     0x6a, 0xbc, 0x1b, 0xbf, 0x7a, 0x87, 0x4e, 0x47, 0x17, 0x9c, 0x34, 0x38,
1612     0xb0, 0x3c, 0xa1, 0x04, 0xfb, 0xe2, 0x66, 0xce, 0xb6, 0x82, 0xbb, 0xad,
1613     0xc3, 0x8e, 0x12, 0x35, 0xbc, 0x17, 0xce, 0x01, 0x2d, 0xa3, 0xa6, 0xb9,
1614     0xfa, 0x84, 0xc2, 0x2f, 0x5a, 0x4a, 0x8c, 0x4c, 0x11, 0x4e, 0xa8, 0x14,
1615     0xcb, 0xb8, 0x99, 0xaa, 0x2e, 0x8c, 0xa0, 0xc9, 0x5f, 0x62, 0x2a, 0x84,
1616     0x66, 0x60, 0x0a, 0x7e, 0xdc, 0x93, 0x17, 0x45, 0x19, 0xb3, 0x93, 0x4c,
1617     0xdc, 0xd0, 0xd5, 0x5c, 0x25, 0xd2, 0xcd, 0x4e, 0x84, 0x4c, 0x73, 0xb3,
1618     0x90, 0xa4, 0x22, 0x05, 0x2c, 0x7c, 0x39, 0x2b, 0x70, 0xd9, 0x61, 0x76,
1619     0xb2, 0x03, 0x71, 0xe9, 0x0e, 0xf8, 0x57, 0x85, 0xad, 0xb1, 0x2f, 0x34,
1620     0xa5, 0x66, 0xb0, 0x0f, 0x75, 0x94, 0x6e, 0x26, 0x79, 0x99, 0xb4, 0xe2,
1621     0xe2, 0xa3, 0x58, 0xdd, 0xb4, 0xfb, 0x74, 0xf4, 0xa1, 0xca, 0xc3, 0x30,
1622     0xe7, 0x86, 0xb2, 0xa2, 0x2c, 0x11, 0xc9, 0x58, 0xe3, 0xc1, 0xa6, 0x5f,
1623     0x86, 0x6a, 0xe7, 0x75, 0xd5, 0xd8, 0x63, 0x95, 0x64, 0x59, 0xbc, 0xb8,
1624     0xb7, 0xf5, 0x12, 0xe3, 0x03, 0xc6, 0x17, 0xea, 0x4e, 0xcb, 0xee, 0x4c,
1625     0xae, 0x03, 0xd1, 0x33, 0xd0, 0x39, 0x36, 0x00, 0x0f, 0xf4, 0x9c, 0xbd,
1626     0x35, 0x96, 0xfd, 0x0d, 0x26, 0xb7, 0x9e, 0xf4, 0x4b, 0x6f, 0x4b, 0xf1,
1627     0xec, 0x11, 0x00, 0x16, 0x21, 0x1e, 0xd4, 0x43, 0x23, 0x8c, 0x4a, 0xfa,
1628     0x9e, 0xd4, 0x2b, 0x36, 0x9a, 0x43, 0x1e, 0x58, 0x31, 0xe8, 0x1f, 0x83,
1629     0x15, 0x20, 0x31, 0x68, 0xfe, 0x27, 0xd3, 0xd8, 0x9b, 0x43, 0x81, 0x8f,
1630     0x57, 0x32, 0x14, 0xe6, 0x9e, 0xbf, 0xd1, 0xfb, 0xdf, 0xad, 0x7a, 0x52,
1631 };
1632 
1633 /* left_shift_3 sets |n| to |n|*8, where |n| is represented in little-endian
1634  * order. */
left_shift_3(uint8_t n[32])1635 static void left_shift_3(uint8_t n[32]) {
1636   uint8_t carry = 0;
1637   unsigned i;
1638 
1639   for (i = 0; i < 32; i++) {
1640     const uint8_t next_carry = n[i] >> 5;
1641     n[i] = (n[i] << 3) | carry;
1642     carry = next_carry;
1643   }
1644 }
1645 
1646 static krb5_error_code
builtin_edwards25519_keygen(krb5_context context,groupdata * gdata,const uint8_t * wbytes,krb5_boolean use_m,uint8_t * priv_out,uint8_t * pub_out)1647 builtin_edwards25519_keygen(krb5_context context, groupdata *gdata,
1648                             const uint8_t *wbytes, krb5_boolean use_m,
1649                             uint8_t *priv_out, uint8_t *pub_out)
1650 {
1651   uint8_t private[64];
1652   krb5_data data = make_data(private, 32);
1653   krb5_error_code ret;
1654 
1655   /* Pick x or y uniformly from [0, p*h) divisible by h. */
1656   ret = krb5_c_random_make_octets(context, &data);
1657   if (ret)
1658     return ret;
1659   memset(private + 32, 0, 32);
1660   x25519_sc_reduce(private);
1661   left_shift_3(private);
1662 
1663   /* Compute X=x*G or Y=y*G. */
1664   ge_p3 P;
1665   x25519_ge_scalarmult_base(&P, private);
1666 
1667   /* Compute w mod p. */
1668   uint8_t wreduced[64];
1669   memcpy(wreduced, wbytes, 32);
1670   memset(wreduced + 32, 0, 32);
1671   x25519_sc_reduce(wreduced);
1672 
1673   /* Compute the mask, w*M or w*N. */
1674   ge_p3 mask;
1675   x25519_ge_scalarmult_small_precomp(&mask, wreduced,
1676                                      use_m ? kSpakeMSmallPrecomp :
1677                                      kSpakeNSmallPrecomp);
1678 
1679   /* Compute the masked point T=w*M+X or S=w*N+Y. */
1680   ge_cached mask_cached;
1681   x25519_ge_p3_to_cached(&mask_cached, &mask);
1682   ge_p1p1 Pmasked;
1683   x25519_ge_add(&Pmasked, &P, &mask_cached);
1684 
1685   /* Encode T or S into pub_out. */
1686   ge_p2 Pmasked_proj;
1687   x25519_ge_p1p1_to_p2(&Pmasked_proj, &Pmasked);
1688   x25519_ge_tobytes(pub_out, &Pmasked_proj);
1689 
1690   /* Remember the private key in priv_out. */
1691   memcpy(priv_out, private, 32);
1692   return 0;
1693 }
1694 
1695 static krb5_error_code
builtin_edwards25519_result(krb5_context context,groupdata * gdata,const uint8_t * wbytes,const uint8_t * ourpriv,const uint8_t * theirpub,krb5_boolean use_m,uint8_t * elem_out)1696 builtin_edwards25519_result(krb5_context context, groupdata *gdata,
1697                             const uint8_t *wbytes, const uint8_t *ourpriv,
1698                             const uint8_t *theirpub, krb5_boolean use_m,
1699                             uint8_t *elem_out)
1700 {
1701   /*
1702    * Check if the point received from peer is on the curve.  This does not
1703    * verify that it is in the generator subgroup, but since our private key is
1704    * a multiple of the cofactor, the shared point will be in the generator
1705    * subgroup even if a rogue peer sends a point which is not.
1706    */
1707   ge_p3 Qmasked;
1708   if (!x25519_ge_frombytes_vartime(&Qmasked, theirpub))
1709     return EINVAL;
1710 
1711   /* Compute w mod p. */
1712   uint8_t wreduced[64];
1713   memcpy(wreduced, wbytes, 32);
1714   memset(wreduced + 32, 0, 32);
1715   x25519_sc_reduce(wreduced);
1716 
1717   /* Compute the peer's mask, w*M or w*N. */
1718   ge_p3 peers_mask;
1719   x25519_ge_scalarmult_small_precomp(&peers_mask, wreduced,
1720                                      use_m ? kSpakeMSmallPrecomp :
1721                                      kSpakeNSmallPrecomp);
1722 
1723   ge_cached peers_mask_cached;
1724   x25519_ge_p3_to_cached(&peers_mask_cached, &peers_mask);
1725 
1726   /* Compute the peer's unmasked point, T-w*M or S-w*N. */
1727   ge_p1p1 Qcompl;
1728   ge_p3 Qunmasked;
1729   x25519_ge_sub(&Qcompl, &Qmasked, &peers_mask_cached);
1730   x25519_ge_p1p1_to_p3(&Qunmasked, &Qcompl);
1731 
1732   /* Multiply by our private value to compute K=x*(S-w*N) or K=y*(T-w*M). */
1733   ge_p2 K;
1734   x25519_ge_scalarmult(&K, ourpriv, &Qunmasked);
1735 
1736   /* Encode K into elem_out. */
1737   x25519_ge_tobytes(elem_out, &K);
1738   return 0;
1739 }
1740 
1741 static krb5_error_code
builtin_sha256(krb5_context context,groupdata * gdata,const krb5_data * dlist,size_t ndata,uint8_t * result_out)1742 builtin_sha256(krb5_context context, groupdata *gdata, const krb5_data *dlist,
1743                size_t ndata, uint8_t *result_out)
1744 {
1745   return k5_sha256(dlist, ndata, result_out);
1746 }
1747 
1748 groupdef builtin_edwards25519 = {
1749   .reg = &spake_iana_edwards25519,
1750   .keygen = builtin_edwards25519_keygen,
1751   .result = builtin_edwards25519_result,
1752   .hash = builtin_sha256
1753 };
1754