xref: /freebsd/sys/dev/wg/wg_crypto.c (revision 744bfb213144c63cbaf38d91a1c4f7aebb9b9fbc)
1 /* SPDX-License-Identifier: MIT
2  *
3  * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
4  * Copyright (c) 2022 The FreeBSD Foundation
5  */
6 
7 #include <sys/types.h>
8 #include <sys/systm.h>
9 #include <sys/endian.h>
10 #include <sys/mbuf.h>
11 #include <opencrypto/cryptodev.h>
12 
13 #include "crypto.h"
14 
15 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF
16 static crypto_session_t chacha20_poly1305_sid;
17 #endif
18 
19 #ifndef ARRAY_SIZE
20 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
21 #endif
22 #ifndef noinline
23 #define noinline __attribute__((noinline))
24 #endif
25 #ifndef __aligned
26 #define __aligned(x) __attribute__((aligned(x)))
27 #endif
28 #ifndef DIV_ROUND_UP
29 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
30 #endif
31 
32 #define le32_to_cpup(a) le32toh(*(a))
33 #define le64_to_cpup(a) le64toh(*(a))
34 #define cpu_to_le32(a) htole32(a)
35 #define cpu_to_le64(a) htole64(a)
36 
37 static inline __unused uint32_t get_unaligned_le32(const uint8_t *a)
38 {
39 	uint32_t l;
40 	__builtin_memcpy(&l, a, sizeof(l));
41 	return le32_to_cpup(&l);
42 }
43 static inline __unused uint64_t get_unaligned_le64(const uint8_t *a)
44 {
45 	uint64_t l;
46 	__builtin_memcpy(&l, a, sizeof(l));
47 	return le64_to_cpup(&l);
48 }
49 static inline __unused void put_unaligned_le32(uint32_t s, uint8_t *d)
50 {
51 	uint32_t l = cpu_to_le32(s);
52 	__builtin_memcpy(d, &l, sizeof(l));
53 }
54 static inline __unused void cpu_to_le32_array(uint32_t *buf, unsigned int words)
55 {
56         while (words--) {
57 		*buf = cpu_to_le32(*buf);
58 		++buf;
59 	}
60 }
61 static inline __unused void le32_to_cpu_array(uint32_t *buf, unsigned int words)
62 {
63         while (words--) {
64 		*buf = le32_to_cpup(buf);
65 		++buf;
66         }
67 }
68 static inline __unused uint32_t rol32(uint32_t word, unsigned int shift)
69 {
70         return (word << (shift & 31)) | (word >> ((-shift) & 31));
71 }
72 static inline __unused uint32_t ror32(uint32_t word, unsigned int shift)
73 {
74 	return (word >> (shift & 31)) | (word << ((-shift) & 31));
75 }
76 
77 #if defined(COMPAT_NEED_CHACHA20POLY1305) || defined(COMPAT_NEED_CHACHA20POLY1305_MBUF)
78 static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t len)
79 {
80 	size_t i;
81 
82 	for (i = 0; i < len; ++i)
83 		dst[i] = src1[i] ^ src2[i];
84 }
85 
86 #define QUARTER_ROUND(x, a, b, c, d) ( \
87 	x[a] += x[b], \
88 	x[d] = rol32((x[d] ^ x[a]), 16), \
89 	x[c] += x[d], \
90 	x[b] = rol32((x[b] ^ x[c]), 12), \
91 	x[a] += x[b], \
92 	x[d] = rol32((x[d] ^ x[a]), 8), \
93 	x[c] += x[d], \
94 	x[b] = rol32((x[b] ^ x[c]), 7) \
95 )
96 
97 #define C(i, j) (i * 4 + j)
98 
99 #define DOUBLE_ROUND(x) ( \
100 	/* Column Round */ \
101 	QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
102 	QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
103 	QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
104 	QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
105 	/* Diagonal Round */ \
106 	QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
107 	QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
108 	QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
109 	QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
110 )
111 
112 #define TWENTY_ROUNDS(x) ( \
113 	DOUBLE_ROUND(x), \
114 	DOUBLE_ROUND(x), \
115 	DOUBLE_ROUND(x), \
116 	DOUBLE_ROUND(x), \
117 	DOUBLE_ROUND(x), \
118 	DOUBLE_ROUND(x), \
119 	DOUBLE_ROUND(x), \
120 	DOUBLE_ROUND(x), \
121 	DOUBLE_ROUND(x), \
122 	DOUBLE_ROUND(x) \
123 )
124 
125 enum chacha20_lengths {
126 	CHACHA20_NONCE_SIZE = 16,
127 	CHACHA20_KEY_SIZE = 32,
128 	CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t),
129 	CHACHA20_BLOCK_SIZE = 64,
130 	CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t),
131 	HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
132 	HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
133 };
134 
135 enum chacha20_constants { /* expand 32-byte k */
136 	CHACHA20_CONSTANT_EXPA = 0x61707865U,
137 	CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
138 	CHACHA20_CONSTANT_2_BY = 0x79622d32U,
139 	CHACHA20_CONSTANT_TE_K = 0x6b206574U
140 };
141 
142 struct chacha20_ctx {
143 	union {
144 		uint32_t state[16];
145 		struct {
146 			uint32_t constant[4];
147 			uint32_t key[8];
148 			uint32_t counter[4];
149 		};
150 	};
151 };
152 
153 static void chacha20_init(struct chacha20_ctx *ctx,
154 			  const uint8_t key[CHACHA20_KEY_SIZE],
155 			  const uint64_t nonce)
156 {
157 	ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
158 	ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
159 	ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
160 	ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
161 	ctx->key[0] = get_unaligned_le32(key + 0);
162 	ctx->key[1] = get_unaligned_le32(key + 4);
163 	ctx->key[2] = get_unaligned_le32(key + 8);
164 	ctx->key[3] = get_unaligned_le32(key + 12);
165 	ctx->key[4] = get_unaligned_le32(key + 16);
166 	ctx->key[5] = get_unaligned_le32(key + 20);
167 	ctx->key[6] = get_unaligned_le32(key + 24);
168 	ctx->key[7] = get_unaligned_le32(key + 28);
169 	ctx->counter[0] = 0;
170 	ctx->counter[1] = 0;
171 	ctx->counter[2] = nonce & 0xffffffffU;
172 	ctx->counter[3] = nonce >> 32;
173 }
174 
175 static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream)
176 {
177 	uint32_t x[CHACHA20_BLOCK_WORDS];
178 	int i;
179 
180 	for (i = 0; i < ARRAY_SIZE(x); ++i)
181 		x[i] = ctx->state[i];
182 
183 	TWENTY_ROUNDS(x);
184 
185 	for (i = 0; i < ARRAY_SIZE(x); ++i)
186 		stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
187 
188 	ctx->counter[0] += 1;
189 }
190 
191 static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in,
192 		     uint32_t len)
193 {
194 	uint32_t buf[CHACHA20_BLOCK_WORDS];
195 
196 	while (len >= CHACHA20_BLOCK_SIZE) {
197 		chacha20_block(ctx, buf);
198 		xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE);
199 		len -= CHACHA20_BLOCK_SIZE;
200 		out += CHACHA20_BLOCK_SIZE;
201 		in += CHACHA20_BLOCK_SIZE;
202 	}
203 	if (len) {
204 		chacha20_block(ctx, buf);
205 		xor_cpy(out, in, (uint8_t *)buf, len);
206 	}
207 }
208 
209 static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS],
210 		      const uint8_t nonce[HCHACHA20_NONCE_SIZE],
211 		      const uint8_t key[HCHACHA20_KEY_SIZE])
212 {
213 	uint32_t x[] = { CHACHA20_CONSTANT_EXPA,
214 		    CHACHA20_CONSTANT_ND_3,
215 		    CHACHA20_CONSTANT_2_BY,
216 		    CHACHA20_CONSTANT_TE_K,
217 		    get_unaligned_le32(key +  0),
218 		    get_unaligned_le32(key +  4),
219 		    get_unaligned_le32(key +  8),
220 		    get_unaligned_le32(key + 12),
221 		    get_unaligned_le32(key + 16),
222 		    get_unaligned_le32(key + 20),
223 		    get_unaligned_le32(key + 24),
224 		    get_unaligned_le32(key + 28),
225 		    get_unaligned_le32(nonce +  0),
226 		    get_unaligned_le32(nonce +  4),
227 		    get_unaligned_le32(nonce +  8),
228 		    get_unaligned_le32(nonce + 12)
229 	};
230 
231 	TWENTY_ROUNDS(x);
232 
233 	memcpy(derived_key + 0, x +  0, sizeof(uint32_t) * 4);
234 	memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4);
235 }
236 
237 enum poly1305_lengths {
238 	POLY1305_BLOCK_SIZE = 16,
239 	POLY1305_KEY_SIZE = 32,
240 	POLY1305_MAC_SIZE = 16
241 };
242 
243 struct poly1305_internal {
244 	uint32_t h[5];
245 	uint32_t r[5];
246 	uint32_t s[4];
247 };
248 
249 struct poly1305_ctx {
250 	struct poly1305_internal state;
251 	uint32_t nonce[4];
252 	uint8_t data[POLY1305_BLOCK_SIZE];
253 	size_t num;
254 };
255 
256 static void poly1305_init_core(struct poly1305_internal *st,
257 			       const uint8_t key[16])
258 {
259 	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
260 	st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
261 	st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
262 	st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
263 	st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
264 	st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
265 
266 	/* s = 5*r */
267 	st->s[0] = st->r[1] * 5;
268 	st->s[1] = st->r[2] * 5;
269 	st->s[2] = st->r[3] * 5;
270 	st->s[3] = st->r[4] * 5;
271 
272 	/* h = 0 */
273 	st->h[0] = 0;
274 	st->h[1] = 0;
275 	st->h[2] = 0;
276 	st->h[3] = 0;
277 	st->h[4] = 0;
278 }
279 
280 static void poly1305_blocks_core(struct poly1305_internal *st,
281 				 const uint8_t *input, size_t len,
282 				 const uint32_t padbit)
283 {
284 	const uint32_t hibit = padbit << 24;
285 	uint32_t r0, r1, r2, r3, r4;
286 	uint32_t s1, s2, s3, s4;
287 	uint32_t h0, h1, h2, h3, h4;
288 	uint64_t d0, d1, d2, d3, d4;
289 	uint32_t c;
290 
291 	r0 = st->r[0];
292 	r1 = st->r[1];
293 	r2 = st->r[2];
294 	r3 = st->r[3];
295 	r4 = st->r[4];
296 
297 	s1 = st->s[0];
298 	s2 = st->s[1];
299 	s3 = st->s[2];
300 	s4 = st->s[3];
301 
302 	h0 = st->h[0];
303 	h1 = st->h[1];
304 	h2 = st->h[2];
305 	h3 = st->h[3];
306 	h4 = st->h[4];
307 
308 	while (len >= POLY1305_BLOCK_SIZE) {
309 		/* h += m[i] */
310 		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
311 		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
312 		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
313 		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
314 		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
315 
316 		/* h *= r */
317 		d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) +
318 		     ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) +
319 		     ((uint64_t)h4 * s1);
320 		d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
321 		     ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) +
322 		     ((uint64_t)h4 * s2);
323 		d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
324 		     ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) +
325 		     ((uint64_t)h4 * s3);
326 		d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
327 		     ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
328 		     ((uint64_t)h4 * s4);
329 		d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
330 		     ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
331 		     ((uint64_t)h4 * r0);
332 
333 		/* (partial) h %= p */
334 		c = (uint32_t)(d0 >> 26);
335 		h0 = (uint32_t)d0 & 0x3ffffff;
336 		d1 += c;
337 		c = (uint32_t)(d1 >> 26);
338 		h1 = (uint32_t)d1 & 0x3ffffff;
339 		d2 += c;
340 		c = (uint32_t)(d2 >> 26);
341 		h2 = (uint32_t)d2 & 0x3ffffff;
342 		d3 += c;
343 		c = (uint32_t)(d3 >> 26);
344 		h3 = (uint32_t)d3 & 0x3ffffff;
345 		d4 += c;
346 		c = (uint32_t)(d4 >> 26);
347 		h4 = (uint32_t)d4 & 0x3ffffff;
348 		h0 += c * 5;
349 		c = (h0 >> 26);
350 		h0 = h0 & 0x3ffffff;
351 		h1 += c;
352 
353 		input += POLY1305_BLOCK_SIZE;
354 		len -= POLY1305_BLOCK_SIZE;
355 	}
356 
357 	st->h[0] = h0;
358 	st->h[1] = h1;
359 	st->h[2] = h2;
360 	st->h[3] = h3;
361 	st->h[4] = h4;
362 }
363 
364 static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16],
365 			       const uint32_t nonce[4])
366 {
367 	uint32_t h0, h1, h2, h3, h4, c;
368 	uint32_t g0, g1, g2, g3, g4;
369 	uint64_t f;
370 	uint32_t mask;
371 
372 	/* fully carry h */
373 	h0 = st->h[0];
374 	h1 = st->h[1];
375 	h2 = st->h[2];
376 	h3 = st->h[3];
377 	h4 = st->h[4];
378 
379 	c = h1 >> 26;
380 	h1 = h1 & 0x3ffffff;
381 	h2 += c;
382 	c = h2 >> 26;
383 	h2 = h2 & 0x3ffffff;
384 	h3 += c;
385 	c = h3 >> 26;
386 	h3 = h3 & 0x3ffffff;
387 	h4 += c;
388 	c = h4 >> 26;
389 	h4 = h4 & 0x3ffffff;
390 	h0 += c * 5;
391 	c = h0 >> 26;
392 	h0 = h0 & 0x3ffffff;
393 	h1 += c;
394 
395 	/* compute h + -p */
396 	g0 = h0 + 5;
397 	c = g0 >> 26;
398 	g0 &= 0x3ffffff;
399 	g1 = h1 + c;
400 	c = g1 >> 26;
401 	g1 &= 0x3ffffff;
402 	g2 = h2 + c;
403 	c = g2 >> 26;
404 	g2 &= 0x3ffffff;
405 	g3 = h3 + c;
406 	c = g3 >> 26;
407 	g3 &= 0x3ffffff;
408 	g4 = h4 + c - (1UL << 26);
409 
410 	/* select h if h < p, or h + -p if h >= p */
411 	mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
412 	g0 &= mask;
413 	g1 &= mask;
414 	g2 &= mask;
415 	g3 &= mask;
416 	g4 &= mask;
417 	mask = ~mask;
418 
419 	h0 = (h0 & mask) | g0;
420 	h1 = (h1 & mask) | g1;
421 	h2 = (h2 & mask) | g2;
422 	h3 = (h3 & mask) | g3;
423 	h4 = (h4 & mask) | g4;
424 
425 	/* h = h % (2^128) */
426 	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
427 	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
428 	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
429 	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
430 
431 	/* mac = (h + nonce) % (2^128) */
432 	f = (uint64_t)h0 + nonce[0];
433 	h0 = (uint32_t)f;
434 	f = (uint64_t)h1 + nonce[1] + (f >> 32);
435 	h1 = (uint32_t)f;
436 	f = (uint64_t)h2 + nonce[2] + (f >> 32);
437 	h2 = (uint32_t)f;
438 	f = (uint64_t)h3 + nonce[3] + (f >> 32);
439 	h3 = (uint32_t)f;
440 
441 	put_unaligned_le32(h0, &mac[0]);
442 	put_unaligned_le32(h1, &mac[4]);
443 	put_unaligned_le32(h2, &mac[8]);
444 	put_unaligned_le32(h3, &mac[12]);
445 }
446 
447 static void poly1305_init(struct poly1305_ctx *ctx,
448 			  const uint8_t key[POLY1305_KEY_SIZE])
449 {
450 	ctx->nonce[0] = get_unaligned_le32(&key[16]);
451 	ctx->nonce[1] = get_unaligned_le32(&key[20]);
452 	ctx->nonce[2] = get_unaligned_le32(&key[24]);
453 	ctx->nonce[3] = get_unaligned_le32(&key[28]);
454 
455 	poly1305_init_core(&ctx->state, key);
456 
457 	ctx->num = 0;
458 }
459 
460 static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input,
461 			    size_t len)
462 {
463 	const size_t num = ctx->num;
464 	size_t rem;
465 
466 	if (num) {
467 		rem = POLY1305_BLOCK_SIZE - num;
468 		if (len < rem) {
469 			memcpy(ctx->data + num, input, len);
470 			ctx->num = num + len;
471 			return;
472 		}
473 		memcpy(ctx->data + num, input, rem);
474 		poly1305_blocks_core(&ctx->state, ctx->data,
475 				     POLY1305_BLOCK_SIZE, 1);
476 		input += rem;
477 		len -= rem;
478 	}
479 
480 	rem = len % POLY1305_BLOCK_SIZE;
481 	len -= rem;
482 
483 	if (len >= POLY1305_BLOCK_SIZE) {
484 		poly1305_blocks_core(&ctx->state, input, len, 1);
485 		input += len;
486 	}
487 
488 	if (rem)
489 		memcpy(ctx->data, input, rem);
490 
491 	ctx->num = rem;
492 }
493 
494 static void poly1305_final(struct poly1305_ctx *ctx,
495 			   uint8_t mac[POLY1305_MAC_SIZE])
496 {
497 	size_t num = ctx->num;
498 
499 	if (num) {
500 		ctx->data[num++] = 1;
501 		while (num < POLY1305_BLOCK_SIZE)
502 			ctx->data[num++] = 0;
503 		poly1305_blocks_core(&ctx->state, ctx->data,
504 				     POLY1305_BLOCK_SIZE, 0);
505 	}
506 
507 	poly1305_emit_core(&ctx->state, mac, ctx->nonce);
508 
509 	explicit_bzero(ctx, sizeof(*ctx));
510 }
511 #endif
512 
513 #ifdef COMPAT_NEED_CHACHA20POLY1305
514 static const uint8_t pad0[16] = { 0 };
515 
516 void
517 chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
518 			 const uint8_t *ad, const size_t ad_len,
519 			 const uint64_t nonce,
520 			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
521 {
522 	struct poly1305_ctx poly1305_state;
523 	struct chacha20_ctx chacha20_state;
524 	union {
525 		uint8_t block0[POLY1305_KEY_SIZE];
526 		uint64_t lens[2];
527 	} b = { { 0 } };
528 
529 	chacha20_init(&chacha20_state, key, nonce);
530 	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
531 	poly1305_init(&poly1305_state, b.block0);
532 
533 	poly1305_update(&poly1305_state, ad, ad_len);
534 	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
535 
536 	chacha20(&chacha20_state, dst, src, src_len);
537 
538 	poly1305_update(&poly1305_state, dst, src_len);
539 	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
540 
541 	b.lens[0] = cpu_to_le64(ad_len);
542 	b.lens[1] = cpu_to_le64(src_len);
543 	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
544 
545 	poly1305_final(&poly1305_state, dst + src_len);
546 
547 	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
548 	explicit_bzero(&b, sizeof(b));
549 }
550 
551 bool
552 chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
553 			 const uint8_t *ad, const size_t ad_len,
554 			 const uint64_t nonce,
555 			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
556 {
557 	struct poly1305_ctx poly1305_state;
558 	struct chacha20_ctx chacha20_state;
559 	bool ret;
560 	size_t dst_len;
561 	union {
562 		uint8_t block0[POLY1305_KEY_SIZE];
563 		uint8_t mac[POLY1305_MAC_SIZE];
564 		uint64_t lens[2];
565 	} b = { { 0 } };
566 
567 	if (src_len < POLY1305_MAC_SIZE)
568 		return false;
569 
570 	chacha20_init(&chacha20_state, key, nonce);
571 	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
572 	poly1305_init(&poly1305_state, b.block0);
573 
574 	poly1305_update(&poly1305_state, ad, ad_len);
575 	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
576 
577 	dst_len = src_len - POLY1305_MAC_SIZE;
578 	poly1305_update(&poly1305_state, src, dst_len);
579 	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
580 
581 	b.lens[0] = cpu_to_le64(ad_len);
582 	b.lens[1] = cpu_to_le64(dst_len);
583 	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
584 
585 	poly1305_final(&poly1305_state, b.mac);
586 
587 	ret = timingsafe_bcmp(b.mac, src + dst_len, POLY1305_MAC_SIZE) == 0;
588 	if (ret)
589 		chacha20(&chacha20_state, dst, src, dst_len);
590 
591 	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
592 	explicit_bzero(&b, sizeof(b));
593 
594 	return ret;
595 }
596 
597 void
598 xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
599 			  const size_t src_len, const uint8_t *ad,
600 			  const size_t ad_len,
601 			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
602 			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
603 {
604 	uint32_t derived_key[CHACHA20_KEY_WORDS];
605 
606 	hchacha20(derived_key, nonce, key);
607 	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
608 	chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
609 				 get_unaligned_le64(nonce + 16),
610 				 (uint8_t *)derived_key);
611 	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
612 }
613 
614 bool
615 xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
616 			  const size_t src_len,  const uint8_t *ad,
617 			  const size_t ad_len,
618 			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
619 			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
620 {
621 	bool ret;
622 	uint32_t derived_key[CHACHA20_KEY_WORDS];
623 
624 	hchacha20(derived_key, nonce, key);
625 	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
626 	ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
627 				       get_unaligned_le64(nonce + 16),
628 				       (uint8_t *)derived_key);
629 	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
630 	return ret;
631 }
632 #endif
633 
634 #ifdef COMPAT_NEED_CHACHA20POLY1305_MBUF
635 static inline int
636 chacha20poly1305_crypt_mbuf(struct mbuf *m0, uint64_t nonce,
637 			    const uint8_t key[CHACHA20POLY1305_KEY_SIZE], bool encrypt)
638 {
639 	struct poly1305_ctx poly1305_state;
640 	struct chacha20_ctx chacha20_state;
641 	uint8_t *buf, mbuf_mac[POLY1305_MAC_SIZE];
642 	size_t len, leftover = 0;
643 	struct mbuf *m;
644 	int ret;
645 	union {
646 		uint32_t stream[CHACHA20_BLOCK_WORDS];
647 		uint8_t block0[POLY1305_KEY_SIZE];
648 		uint8_t mac[POLY1305_MAC_SIZE];
649 		uint64_t lens[2];
650 	} b = { { 0 } };
651 
652 	if (!encrypt) {
653 		if (m0->m_pkthdr.len < POLY1305_MAC_SIZE)
654 			return EMSGSIZE;
655 		m_copydata(m0, m0->m_pkthdr.len - POLY1305_MAC_SIZE, POLY1305_MAC_SIZE, mbuf_mac);
656 		m_adj(m0, -POLY1305_MAC_SIZE);
657 	}
658 
659 	chacha20_init(&chacha20_state, key, nonce);
660 	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
661 	poly1305_init(&poly1305_state, b.block0);
662 
663 	for (m = m0; m; m = m->m_next) {
664 		len = m->m_len;
665 		buf = m->m_data;
666 
667 		if (!encrypt)
668 			poly1305_update(&poly1305_state, m->m_data, m->m_len);
669 
670 		if (leftover != 0) {
671 			size_t l = min(len, leftover);
672 			xor_cpy(buf, buf, ((uint8_t *)b.stream) + (CHACHA20_BLOCK_SIZE - leftover), l);
673 			leftover -= l;
674 			buf += l;
675 			len -= l;
676 		}
677 
678 		while (len >= CHACHA20_BLOCK_SIZE) {
679 			chacha20_block(&chacha20_state, b.stream);
680 			xor_cpy(buf, buf, (uint8_t *)b.stream, CHACHA20_BLOCK_SIZE);
681 			buf += CHACHA20_BLOCK_SIZE;
682 			len -= CHACHA20_BLOCK_SIZE;
683 		}
684 
685 		if (len) {
686 			chacha20_block(&chacha20_state, b.stream);
687 			xor_cpy(buf, buf, (uint8_t *)b.stream, len);
688 			leftover = CHACHA20_BLOCK_SIZE - len;
689 		}
690 
691 		if (encrypt)
692 			poly1305_update(&poly1305_state, m->m_data, m->m_len);
693 	}
694 	poly1305_update(&poly1305_state, pad0, (0x10 - m0->m_pkthdr.len) & 0xf);
695 
696 	b.lens[0] = 0;
697 	b.lens[1] = cpu_to_le64(m0->m_pkthdr.len);
698 	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
699 
700 	poly1305_final(&poly1305_state, b.mac);
701 
702 	if (encrypt)
703 		ret = m_append(m0, POLY1305_MAC_SIZE, b.mac) ? 0 : ENOMEM;
704 	else
705 		ret = timingsafe_bcmp(b.mac, mbuf_mac, POLY1305_MAC_SIZE) == 0 ? 0 : EBADMSG;
706 
707 	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
708 	explicit_bzero(&b, sizeof(b));
709 
710 	return ret;
711 }
712 
713 int
714 chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
715 			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
716 {
717 	return chacha20poly1305_crypt_mbuf(m, nonce, key, true);
718 }
719 
720 int
721 chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce,
722 			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
723 {
724 	return chacha20poly1305_crypt_mbuf(m, nonce, key, false);
725 }
726 #else
727 static int
728 crypto_callback(struct cryptop *crp)
729 {
730 	return (0);
731 }
732 
733 int
734 chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce,
735 			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
736 {
737 	static const char blank_tag[POLY1305_HASH_LEN];
738 	struct cryptop crp;
739 	int ret;
740 
741 	if (!m_append(m, POLY1305_HASH_LEN, blank_tag))
742 		return (ENOMEM);
743 	crypto_initreq(&crp, chacha20_poly1305_sid);
744 	crp.crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST;
745 	crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM;
746 	crypto_use_mbuf(&crp, m);
747 	crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN;
748 	crp.crp_digest_start = crp.crp_payload_length;
749 	le64enc(crp.crp_iv, nonce);
750 	crp.crp_cipher_key = key;
751 	crp.crp_callback = crypto_callback;
752 	ret = crypto_dispatch(&crp);
753 	crypto_destroyreq(&crp);
754 	return (ret);
755 }
756 
757 int
758 chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce,
759 			      const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
760 {
761 	struct cryptop crp;
762 	int ret;
763 
764 	if (m->m_pkthdr.len < POLY1305_HASH_LEN)
765 		return (EMSGSIZE);
766 	crypto_initreq(&crp, chacha20_poly1305_sid);
767 	crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST;
768 	crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM;
769 	crypto_use_mbuf(&crp, m);
770 	crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN;
771 	crp.crp_digest_start = crp.crp_payload_length;
772 	le64enc(crp.crp_iv, nonce);
773 	crp.crp_cipher_key = key;
774 	crp.crp_callback = crypto_callback;
775 	ret = crypto_dispatch(&crp);
776 	crypto_destroyreq(&crp);
777 	if (ret)
778 		return (ret);
779 	m_adj(m, -POLY1305_HASH_LEN);
780 	return (0);
781 }
782 #endif
783 
784 #ifdef COMPAT_NEED_BLAKE2S
785 static const uint32_t blake2s_iv[8] = {
786 	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
787 	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
788 };
789 
790 static const uint8_t blake2s_sigma[10][16] = {
791 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
792 	{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
793 	{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
794 	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
795 	{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
796 	{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
797 	{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
798 	{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
799 	{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
800 	{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
801 };
802 
803 static inline void blake2s_set_lastblock(struct blake2s_state *state)
804 {
805 	state->f[0] = -1;
806 }
807 
808 static inline void blake2s_increment_counter(struct blake2s_state *state,
809 					     const uint32_t inc)
810 {
811 	state->t[0] += inc;
812 	state->t[1] += (state->t[0] < inc);
813 }
814 
815 static inline void blake2s_init_param(struct blake2s_state *state,
816 				      const uint32_t param)
817 {
818 	int i;
819 
820 	memset(state, 0, sizeof(*state));
821 	for (i = 0; i < 8; ++i)
822 		state->h[i] = blake2s_iv[i];
823 	state->h[0] ^= param;
824 }
825 
826 void blake2s_init(struct blake2s_state *state, const size_t outlen)
827 {
828 	blake2s_init_param(state, 0x01010000 | outlen);
829 	state->outlen = outlen;
830 }
831 
832 void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
833 		      const uint8_t *key, const size_t keylen)
834 {
835 	uint8_t block[BLAKE2S_BLOCK_SIZE] = { 0 };
836 
837 	blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
838 	state->outlen = outlen;
839 	memcpy(block, key, keylen);
840 	blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
841 	explicit_bzero(block, BLAKE2S_BLOCK_SIZE);
842 }
843 
844 static inline void blake2s_compress(struct blake2s_state *state,
845 				    const uint8_t *block, size_t nblocks,
846 				    const uint32_t inc)
847 {
848 	uint32_t m[16];
849 	uint32_t v[16];
850 	int i;
851 
852 	while (nblocks > 0) {
853 		blake2s_increment_counter(state, inc);
854 		memcpy(m, block, BLAKE2S_BLOCK_SIZE);
855 		le32_to_cpu_array(m, ARRAY_SIZE(m));
856 		memcpy(v, state->h, 32);
857 		v[ 8] = blake2s_iv[0];
858 		v[ 9] = blake2s_iv[1];
859 		v[10] = blake2s_iv[2];
860 		v[11] = blake2s_iv[3];
861 		v[12] = blake2s_iv[4] ^ state->t[0];
862 		v[13] = blake2s_iv[5] ^ state->t[1];
863 		v[14] = blake2s_iv[6] ^ state->f[0];
864 		v[15] = blake2s_iv[7] ^ state->f[1];
865 
866 #define G(r, i, a, b, c, d) do { \
867 	a += b + m[blake2s_sigma[r][2 * i + 0]]; \
868 	d = ror32(d ^ a, 16); \
869 	c += d; \
870 	b = ror32(b ^ c, 12); \
871 	a += b + m[blake2s_sigma[r][2 * i + 1]]; \
872 	d = ror32(d ^ a, 8); \
873 	c += d; \
874 	b = ror32(b ^ c, 7); \
875 } while (0)
876 
877 #define ROUND(r) do { \
878 	G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
879 	G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
880 	G(r, 2, v[2], v[ 6], v[10], v[14]); \
881 	G(r, 3, v[3], v[ 7], v[11], v[15]); \
882 	G(r, 4, v[0], v[ 5], v[10], v[15]); \
883 	G(r, 5, v[1], v[ 6], v[11], v[12]); \
884 	G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
885 	G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
886 } while (0)
887 		ROUND(0);
888 		ROUND(1);
889 		ROUND(2);
890 		ROUND(3);
891 		ROUND(4);
892 		ROUND(5);
893 		ROUND(6);
894 		ROUND(7);
895 		ROUND(8);
896 		ROUND(9);
897 
898 #undef G
899 #undef ROUND
900 
901 		for (i = 0; i < 8; ++i)
902 			state->h[i] ^= v[i] ^ v[i + 8];
903 
904 		block += BLAKE2S_BLOCK_SIZE;
905 		--nblocks;
906 	}
907 }
908 
909 void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen)
910 {
911 	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
912 
913 	if (!inlen)
914 		return;
915 	if (inlen > fill) {
916 		memcpy(state->buf + state->buflen, in, fill);
917 		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
918 		state->buflen = 0;
919 		in += fill;
920 		inlen -= fill;
921 	}
922 	if (inlen > BLAKE2S_BLOCK_SIZE) {
923 		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
924 		/* Hash one less (full) block than strictly possible */
925 		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
926 		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
927 		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
928 	}
929 	memcpy(state->buf + state->buflen, in, inlen);
930 	state->buflen += inlen;
931 }
932 
933 void blake2s_final(struct blake2s_state *state, uint8_t *out)
934 {
935 	blake2s_set_lastblock(state);
936 	memset(state->buf + state->buflen, 0,
937 	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
938 	blake2s_compress(state, state->buf, 1, state->buflen);
939 	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
940 	memcpy(out, state->h, state->outlen);
941 	explicit_bzero(state, sizeof(*state));
942 }
943 #endif
944 
945 #ifdef COMPAT_NEED_CURVE25519
946 /* Below here is fiat's implementation of x25519.
947  *
948  * Copyright (C) 2015-2016 The fiat-crypto Authors.
949  * Copyright (C) 2018-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
950  *
951  * This is a machine-generated formally verified implementation of Curve25519
952  * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
953  * machine generated, it has been tweaked to be suitable for use in the kernel.
954  * It is optimized for 32-bit machines and machines that cannot work efficiently
955  * with 128-bit integer types.
956  */
957 
958 /* fe means field element. Here the field is \Z/(2^255-19). An element t,
959  * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
960  * t[3]+2^102 t[4]+...+2^230 t[9].
961  * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
962  * Multiplication and carrying produce fe from fe_loose.
963  */
964 typedef struct fe { uint32_t v[10]; } fe;
965 
966 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
967  * Addition and subtraction produce fe_loose from (fe, fe).
968  */
969 typedef struct fe_loose { uint32_t v[10]; } fe_loose;
970 
971 static inline void fe_frombytes_impl(uint32_t h[10], const uint8_t *s)
972 {
973 	/* Ignores top bit of s. */
974 	uint32_t a0 = get_unaligned_le32(s);
975 	uint32_t a1 = get_unaligned_le32(s+4);
976 	uint32_t a2 = get_unaligned_le32(s+8);
977 	uint32_t a3 = get_unaligned_le32(s+12);
978 	uint32_t a4 = get_unaligned_le32(s+16);
979 	uint32_t a5 = get_unaligned_le32(s+20);
980 	uint32_t a6 = get_unaligned_le32(s+24);
981 	uint32_t a7 = get_unaligned_le32(s+28);
982 	h[0] = a0&((1<<26)-1);                    /* 26 used, 32-26 left.   26 */
983 	h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 =  6+19 = 25 */
984 	h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
985 	h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) +  6 = 19+ 6 = 25 */
986 	h[4] = (a3>> 6);                          /* (32- 6)              = 26 */
987 	h[5] = a4&((1<<25)-1);                    /*                        25 */
988 	h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 =  7+19 = 26 */
989 	h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
990 	h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) +  6 = 20+ 6 = 26 */
991 	h[9] = (a7>> 6)&((1<<25)-1); /*                                     25 */
992 }
993 
994 static inline void fe_frombytes(fe *h, const uint8_t *s)
995 {
996 	fe_frombytes_impl(h->v, s);
997 }
998 
999 static inline uint8_t /*bool*/
1000 addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
1001 {
1002 	/* This function extracts 25 bits of result and 1 bit of carry
1003 	 * (26 total), so a 32-bit intermediate is sufficient.
1004 	 */
1005 	uint32_t x = a + b + c;
1006 	*low = x & ((1 << 25) - 1);
1007 	return (x >> 25) & 1;
1008 }
1009 
1010 static inline uint8_t /*bool*/
1011 addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
1012 {
1013 	/* This function extracts 26 bits of result and 1 bit of carry
1014 	 * (27 total), so a 32-bit intermediate is sufficient.
1015 	 */
1016 	uint32_t x = a + b + c;
1017 	*low = x & ((1 << 26) - 1);
1018 	return (x >> 26) & 1;
1019 }
1020 
1021 static inline uint8_t /*bool*/
1022 subborrow_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
1023 {
1024 	/* This function extracts 25 bits of result and 1 bit of borrow
1025 	 * (26 total), so a 32-bit intermediate is sufficient.
1026 	 */
1027 	uint32_t x = a - b - c;
1028 	*low = x & ((1 << 25) - 1);
1029 	return x >> 31;
1030 }
1031 
1032 static inline uint8_t /*bool*/
1033 subborrow_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
1034 {
1035 	/* This function extracts 26 bits of result and 1 bit of borrow
1036 	 *(27 total), so a 32-bit intermediate is sufficient.
1037 	 */
1038 	uint32_t x = a - b - c;
1039 	*low = x & ((1 << 26) - 1);
1040 	return x >> 31;
1041 }
1042 
1043 static inline uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz)
1044 {
1045 	t = -!!t; /* all set if nonzero, 0 if 0 */
1046 	return (t&nz) | ((~t)&z);
1047 }
1048 
1049 static inline void fe_freeze(uint32_t out[10], const uint32_t in1[10])
1050 {
1051 	const uint32_t x17 = in1[9];
1052 	const uint32_t x18 = in1[8];
1053 	const uint32_t x16 = in1[7];
1054 	const uint32_t x14 = in1[6];
1055 	const uint32_t x12 = in1[5];
1056 	const uint32_t x10 = in1[4];
1057 	const uint32_t x8 = in1[3];
1058 	const uint32_t x6 = in1[2];
1059 	const uint32_t x4 = in1[1];
1060 	const uint32_t x2 = in1[0];
1061 	uint32_t x20; uint8_t/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
1062 	uint32_t x23; uint8_t/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
1063 	uint32_t x26; uint8_t/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
1064 	uint32_t x29; uint8_t/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
1065 	uint32_t x32; uint8_t/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
1066 	uint32_t x35; uint8_t/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
1067 	uint32_t x38; uint8_t/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
1068 	uint32_t x41; uint8_t/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
1069 	uint32_t x44; uint8_t/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
1070 	uint32_t x47; uint8_t/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
1071 	uint32_t x49 = cmovznz32(x48, 0x0, 0xffffffff);
1072 	uint32_t x50 = (x49 & 0x3ffffed);
1073 	uint32_t x52; uint8_t/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
1074 	uint32_t x54 = (x49 & 0x1ffffff);
1075 	uint32_t x56; uint8_t/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
1076 	uint32_t x58 = (x49 & 0x3ffffff);
1077 	uint32_t x60; uint8_t/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
1078 	uint32_t x62 = (x49 & 0x1ffffff);
1079 	uint32_t x64; uint8_t/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
1080 	uint32_t x66 = (x49 & 0x3ffffff);
1081 	uint32_t x68; uint8_t/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
1082 	uint32_t x70 = (x49 & 0x1ffffff);
1083 	uint32_t x72; uint8_t/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
1084 	uint32_t x74 = (x49 & 0x3ffffff);
1085 	uint32_t x76; uint8_t/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
1086 	uint32_t x78 = (x49 & 0x1ffffff);
1087 	uint32_t x80; uint8_t/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
1088 	uint32_t x82 = (x49 & 0x3ffffff);
1089 	uint32_t x84; uint8_t/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
1090 	uint32_t x86 = (x49 & 0x1ffffff);
1091 	uint32_t x88; addcarryx_u25(x85, x47, x86, &x88);
1092 	out[0] = x52;
1093 	out[1] = x56;
1094 	out[2] = x60;
1095 	out[3] = x64;
1096 	out[4] = x68;
1097 	out[5] = x72;
1098 	out[6] = x76;
1099 	out[7] = x80;
1100 	out[8] = x84;
1101 	out[9] = x88;
1102 }
1103 
1104 static inline void fe_tobytes(uint8_t s[32], const fe *f)
1105 {
1106 	uint32_t h[10];
1107 	fe_freeze(h, f->v);
1108 	s[0] = h[0] >> 0;
1109 	s[1] = h[0] >> 8;
1110 	s[2] = h[0] >> 16;
1111 	s[3] = (h[0] >> 24) | (h[1] << 2);
1112 	s[4] = h[1] >> 6;
1113 	s[5] = h[1] >> 14;
1114 	s[6] = (h[1] >> 22) | (h[2] << 3);
1115 	s[7] = h[2] >> 5;
1116 	s[8] = h[2] >> 13;
1117 	s[9] = (h[2] >> 21) | (h[3] << 5);
1118 	s[10] = h[3] >> 3;
1119 	s[11] = h[3] >> 11;
1120 	s[12] = (h[3] >> 19) | (h[4] << 6);
1121 	s[13] = h[4] >> 2;
1122 	s[14] = h[4] >> 10;
1123 	s[15] = h[4] >> 18;
1124 	s[16] = h[5] >> 0;
1125 	s[17] = h[5] >> 8;
1126 	s[18] = h[5] >> 16;
1127 	s[19] = (h[5] >> 24) | (h[6] << 1);
1128 	s[20] = h[6] >> 7;
1129 	s[21] = h[6] >> 15;
1130 	s[22] = (h[6] >> 23) | (h[7] << 3);
1131 	s[23] = h[7] >> 5;
1132 	s[24] = h[7] >> 13;
1133 	s[25] = (h[7] >> 21) | (h[8] << 4);
1134 	s[26] = h[8] >> 4;
1135 	s[27] = h[8] >> 12;
1136 	s[28] = (h[8] >> 20) | (h[9] << 6);
1137 	s[29] = h[9] >> 2;
1138 	s[30] = h[9] >> 10;
1139 	s[31] = h[9] >> 18;
1140 }
1141 
1142 /* h = f */
1143 static inline void fe_copy(fe *h, const fe *f)
1144 {
1145 	memmove(h, f, sizeof(uint32_t) * 10);
1146 }
1147 
1148 static inline void fe_copy_lt(fe_loose *h, const fe *f)
1149 {
1150 	memmove(h, f, sizeof(uint32_t) * 10);
1151 }
1152 
1153 /* h = 0 */
1154 static inline void fe_0(fe *h)
1155 {
1156 	memset(h, 0, sizeof(uint32_t) * 10);
1157 }
1158 
1159 /* h = 1 */
1160 static inline void fe_1(fe *h)
1161 {
1162 	memset(h, 0, sizeof(uint32_t) * 10);
1163 	h->v[0] = 1;
1164 }
1165 
1166 static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
1167 {
1168 	const uint32_t x20 = in1[9];
1169 	const uint32_t x21 = in1[8];
1170 	const uint32_t x19 = in1[7];
1171 	const uint32_t x17 = in1[6];
1172 	const uint32_t x15 = in1[5];
1173 	const uint32_t x13 = in1[4];
1174 	const uint32_t x11 = in1[3];
1175 	const uint32_t x9 = in1[2];
1176 	const uint32_t x7 = in1[1];
1177 	const uint32_t x5 = in1[0];
1178 	const uint32_t x38 = in2[9];
1179 	const uint32_t x39 = in2[8];
1180 	const uint32_t x37 = in2[7];
1181 	const uint32_t x35 = in2[6];
1182 	const uint32_t x33 = in2[5];
1183 	const uint32_t x31 = in2[4];
1184 	const uint32_t x29 = in2[3];
1185 	const uint32_t x27 = in2[2];
1186 	const uint32_t x25 = in2[1];
1187 	const uint32_t x23 = in2[0];
1188 	out[0] = (x5 + x23);
1189 	out[1] = (x7 + x25);
1190 	out[2] = (x9 + x27);
1191 	out[3] = (x11 + x29);
1192 	out[4] = (x13 + x31);
1193 	out[5] = (x15 + x33);
1194 	out[6] = (x17 + x35);
1195 	out[7] = (x19 + x37);
1196 	out[8] = (x21 + x39);
1197 	out[9] = (x20 + x38);
1198 }
1199 
1200 /* h = f + g
1201  * Can overlap h with f or g.
1202  */
1203 static inline void fe_add(fe_loose *h, const fe *f, const fe *g)
1204 {
1205 	fe_add_impl(h->v, f->v, g->v);
1206 }
1207 
1208 static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
1209 {
1210 	const uint32_t x20 = in1[9];
1211 	const uint32_t x21 = in1[8];
1212 	const uint32_t x19 = in1[7];
1213 	const uint32_t x17 = in1[6];
1214 	const uint32_t x15 = in1[5];
1215 	const uint32_t x13 = in1[4];
1216 	const uint32_t x11 = in1[3];
1217 	const uint32_t x9 = in1[2];
1218 	const uint32_t x7 = in1[1];
1219 	const uint32_t x5 = in1[0];
1220 	const uint32_t x38 = in2[9];
1221 	const uint32_t x39 = in2[8];
1222 	const uint32_t x37 = in2[7];
1223 	const uint32_t x35 = in2[6];
1224 	const uint32_t x33 = in2[5];
1225 	const uint32_t x31 = in2[4];
1226 	const uint32_t x29 = in2[3];
1227 	const uint32_t x27 = in2[2];
1228 	const uint32_t x25 = in2[1];
1229 	const uint32_t x23 = in2[0];
1230 	out[0] = ((0x7ffffda + x5) - x23);
1231 	out[1] = ((0x3fffffe + x7) - x25);
1232 	out[2] = ((0x7fffffe + x9) - x27);
1233 	out[3] = ((0x3fffffe + x11) - x29);
1234 	out[4] = ((0x7fffffe + x13) - x31);
1235 	out[5] = ((0x3fffffe + x15) - x33);
1236 	out[6] = ((0x7fffffe + x17) - x35);
1237 	out[7] = ((0x3fffffe + x19) - x37);
1238 	out[8] = ((0x7fffffe + x21) - x39);
1239 	out[9] = ((0x3fffffe + x20) - x38);
1240 }
1241 
1242 /* h = f - g
1243  * Can overlap h with f or g.
1244  */
1245 static inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
1246 {
1247 	fe_sub_impl(h->v, f->v, g->v);
1248 }
1249 
1250 static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
1251 {
1252 	const uint32_t x20 = in1[9];
1253 	const uint32_t x21 = in1[8];
1254 	const uint32_t x19 = in1[7];
1255 	const uint32_t x17 = in1[6];
1256 	const uint32_t x15 = in1[5];
1257 	const uint32_t x13 = in1[4];
1258 	const uint32_t x11 = in1[3];
1259 	const uint32_t x9 = in1[2];
1260 	const uint32_t x7 = in1[1];
1261 	const uint32_t x5 = in1[0];
1262 	const uint32_t x38 = in2[9];
1263 	const uint32_t x39 = in2[8];
1264 	const uint32_t x37 = in2[7];
1265 	const uint32_t x35 = in2[6];
1266 	const uint32_t x33 = in2[5];
1267 	const uint32_t x31 = in2[4];
1268 	const uint32_t x29 = in2[3];
1269 	const uint32_t x27 = in2[2];
1270 	const uint32_t x25 = in2[1];
1271 	const uint32_t x23 = in2[0];
1272 	uint64_t x40 = ((uint64_t)x23 * x5);
1273 	uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
1274 	uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
1275 	uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
1276 	uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
1277 	uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
1278 	uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
1279 	uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
1280 	uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
1281 	uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
1282 	uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
1283 	uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
1284 	uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
1285 	uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
1286 	uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
1287 	uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
1288 	uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
1289 	uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
1290 	uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
1291 	uint64_t x59 = (x48 + (x58 << 0x4));
1292 	uint64_t x60 = (x59 + (x58 << 0x1));
1293 	uint64_t x61 = (x60 + x58);
1294 	uint64_t x62 = (x47 + (x57 << 0x4));
1295 	uint64_t x63 = (x62 + (x57 << 0x1));
1296 	uint64_t x64 = (x63 + x57);
1297 	uint64_t x65 = (x46 + (x56 << 0x4));
1298 	uint64_t x66 = (x65 + (x56 << 0x1));
1299 	uint64_t x67 = (x66 + x56);
1300 	uint64_t x68 = (x45 + (x55 << 0x4));
1301 	uint64_t x69 = (x68 + (x55 << 0x1));
1302 	uint64_t x70 = (x69 + x55);
1303 	uint64_t x71 = (x44 + (x54 << 0x4));
1304 	uint64_t x72 = (x71 + (x54 << 0x1));
1305 	uint64_t x73 = (x72 + x54);
1306 	uint64_t x74 = (x43 + (x53 << 0x4));
1307 	uint64_t x75 = (x74 + (x53 << 0x1));
1308 	uint64_t x76 = (x75 + x53);
1309 	uint64_t x77 = (x42 + (x52 << 0x4));
1310 	uint64_t x78 = (x77 + (x52 << 0x1));
1311 	uint64_t x79 = (x78 + x52);
1312 	uint64_t x80 = (x41 + (x51 << 0x4));
1313 	uint64_t x81 = (x80 + (x51 << 0x1));
1314 	uint64_t x82 = (x81 + x51);
1315 	uint64_t x83 = (x40 + (x50 << 0x4));
1316 	uint64_t x84 = (x83 + (x50 << 0x1));
1317 	uint64_t x85 = (x84 + x50);
1318 	uint64_t x86 = (x85 >> 0x1a);
1319 	uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
1320 	uint64_t x88 = (x86 + x82);
1321 	uint64_t x89 = (x88 >> 0x19);
1322 	uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
1323 	uint64_t x91 = (x89 + x79);
1324 	uint64_t x92 = (x91 >> 0x1a);
1325 	uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
1326 	uint64_t x94 = (x92 + x76);
1327 	uint64_t x95 = (x94 >> 0x19);
1328 	uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
1329 	uint64_t x97 = (x95 + x73);
1330 	uint64_t x98 = (x97 >> 0x1a);
1331 	uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
1332 	uint64_t x100 = (x98 + x70);
1333 	uint64_t x101 = (x100 >> 0x19);
1334 	uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
1335 	uint64_t x103 = (x101 + x67);
1336 	uint64_t x104 = (x103 >> 0x1a);
1337 	uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
1338 	uint64_t x106 = (x104 + x64);
1339 	uint64_t x107 = (x106 >> 0x19);
1340 	uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
1341 	uint64_t x109 = (x107 + x61);
1342 	uint64_t x110 = (x109 >> 0x1a);
1343 	uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
1344 	uint64_t x112 = (x110 + x49);
1345 	uint64_t x113 = (x112 >> 0x19);
1346 	uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
1347 	uint64_t x115 = (x87 + (0x13 * x113));
1348 	uint32_t x116 = (uint32_t) (x115 >> 0x1a);
1349 	uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
1350 	uint32_t x118 = (x116 + x90);
1351 	uint32_t x119 = (x118 >> 0x19);
1352 	uint32_t x120 = (x118 & 0x1ffffff);
1353 	out[0] = x117;
1354 	out[1] = x120;
1355 	out[2] = (x119 + x93);
1356 	out[3] = x96;
1357 	out[4] = x99;
1358 	out[5] = x102;
1359 	out[6] = x105;
1360 	out[7] = x108;
1361 	out[8] = x111;
1362 	out[9] = x114;
1363 }
1364 
1365 static inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
1366 {
1367 	fe_mul_impl(h->v, f->v, g->v);
1368 }
1369 
1370 static inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
1371 {
1372 	fe_mul_impl(h->v, f->v, g->v);
1373 }
1374 
1375 static inline void
1376 fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
1377 {
1378 	fe_mul_impl(h->v, f->v, g->v);
1379 }
1380 
1381 static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10])
1382 {
1383 	const uint32_t x17 = in1[9];
1384 	const uint32_t x18 = in1[8];
1385 	const uint32_t x16 = in1[7];
1386 	const uint32_t x14 = in1[6];
1387 	const uint32_t x12 = in1[5];
1388 	const uint32_t x10 = in1[4];
1389 	const uint32_t x8 = in1[3];
1390 	const uint32_t x6 = in1[2];
1391 	const uint32_t x4 = in1[1];
1392 	const uint32_t x2 = in1[0];
1393 	uint64_t x19 = ((uint64_t)x2 * x2);
1394 	uint64_t x20 = ((uint64_t)(0x2 * x2) * x4);
1395 	uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6)));
1396 	uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8)));
1397 	uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10));
1398 	uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12)));
1399 	uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12)));
1400 	uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16)));
1401 	uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12))))));
1402 	uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17)));
1403 	uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17)))));
1404 	uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17)));
1405 	uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17))))));
1406 	uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17)));
1407 	uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17)));
1408 	uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17)));
1409 	uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17));
1410 	uint64_t x36 = ((uint64_t)(0x2 * x18) * x17);
1411 	uint64_t x37 = ((uint64_t)(0x2 * x17) * x17);
1412 	uint64_t x38 = (x27 + (x37 << 0x4));
1413 	uint64_t x39 = (x38 + (x37 << 0x1));
1414 	uint64_t x40 = (x39 + x37);
1415 	uint64_t x41 = (x26 + (x36 << 0x4));
1416 	uint64_t x42 = (x41 + (x36 << 0x1));
1417 	uint64_t x43 = (x42 + x36);
1418 	uint64_t x44 = (x25 + (x35 << 0x4));
1419 	uint64_t x45 = (x44 + (x35 << 0x1));
1420 	uint64_t x46 = (x45 + x35);
1421 	uint64_t x47 = (x24 + (x34 << 0x4));
1422 	uint64_t x48 = (x47 + (x34 << 0x1));
1423 	uint64_t x49 = (x48 + x34);
1424 	uint64_t x50 = (x23 + (x33 << 0x4));
1425 	uint64_t x51 = (x50 + (x33 << 0x1));
1426 	uint64_t x52 = (x51 + x33);
1427 	uint64_t x53 = (x22 + (x32 << 0x4));
1428 	uint64_t x54 = (x53 + (x32 << 0x1));
1429 	uint64_t x55 = (x54 + x32);
1430 	uint64_t x56 = (x21 + (x31 << 0x4));
1431 	uint64_t x57 = (x56 + (x31 << 0x1));
1432 	uint64_t x58 = (x57 + x31);
1433 	uint64_t x59 = (x20 + (x30 << 0x4));
1434 	uint64_t x60 = (x59 + (x30 << 0x1));
1435 	uint64_t x61 = (x60 + x30);
1436 	uint64_t x62 = (x19 + (x29 << 0x4));
1437 	uint64_t x63 = (x62 + (x29 << 0x1));
1438 	uint64_t x64 = (x63 + x29);
1439 	uint64_t x65 = (x64 >> 0x1a);
1440 	uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
1441 	uint64_t x67 = (x65 + x61);
1442 	uint64_t x68 = (x67 >> 0x19);
1443 	uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
1444 	uint64_t x70 = (x68 + x58);
1445 	uint64_t x71 = (x70 >> 0x1a);
1446 	uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
1447 	uint64_t x73 = (x71 + x55);
1448 	uint64_t x74 = (x73 >> 0x19);
1449 	uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
1450 	uint64_t x76 = (x74 + x52);
1451 	uint64_t x77 = (x76 >> 0x1a);
1452 	uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
1453 	uint64_t x79 = (x77 + x49);
1454 	uint64_t x80 = (x79 >> 0x19);
1455 	uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
1456 	uint64_t x82 = (x80 + x46);
1457 	uint64_t x83 = (x82 >> 0x1a);
1458 	uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
1459 	uint64_t x85 = (x83 + x43);
1460 	uint64_t x86 = (x85 >> 0x19);
1461 	uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
1462 	uint64_t x88 = (x86 + x40);
1463 	uint64_t x89 = (x88 >> 0x1a);
1464 	uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
1465 	uint64_t x91 = (x89 + x28);
1466 	uint64_t x92 = (x91 >> 0x19);
1467 	uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
1468 	uint64_t x94 = (x66 + (0x13 * x92));
1469 	uint32_t x95 = (uint32_t) (x94 >> 0x1a);
1470 	uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
1471 	uint32_t x97 = (x95 + x69);
1472 	uint32_t x98 = (x97 >> 0x19);
1473 	uint32_t x99 = (x97 & 0x1ffffff);
1474 	out[0] = x96;
1475 	out[1] = x99;
1476 	out[2] = (x98 + x72);
1477 	out[3] = x75;
1478 	out[4] = x78;
1479 	out[5] = x81;
1480 	out[6] = x84;
1481 	out[7] = x87;
1482 	out[8] = x90;
1483 	out[9] = x93;
1484 }
1485 
1486 static inline void fe_sq_tl(fe *h, const fe_loose *f)
1487 {
1488 	fe_sqr_impl(h->v, f->v);
1489 }
1490 
1491 static inline void fe_sq_tt(fe *h, const fe *f)
1492 {
1493 	fe_sqr_impl(h->v, f->v);
1494 }
1495 
1496 static inline void fe_loose_invert(fe *out, const fe_loose *z)
1497 {
1498 	fe t0;
1499 	fe t1;
1500 	fe t2;
1501 	fe t3;
1502 	int i;
1503 
1504 	fe_sq_tl(&t0, z);
1505 	fe_sq_tt(&t1, &t0);
1506 	for (i = 1; i < 2; ++i)
1507 		fe_sq_tt(&t1, &t1);
1508 	fe_mul_tlt(&t1, z, &t1);
1509 	fe_mul_ttt(&t0, &t0, &t1);
1510 	fe_sq_tt(&t2, &t0);
1511 	fe_mul_ttt(&t1, &t1, &t2);
1512 	fe_sq_tt(&t2, &t1);
1513 	for (i = 1; i < 5; ++i)
1514 		fe_sq_tt(&t2, &t2);
1515 	fe_mul_ttt(&t1, &t2, &t1);
1516 	fe_sq_tt(&t2, &t1);
1517 	for (i = 1; i < 10; ++i)
1518 		fe_sq_tt(&t2, &t2);
1519 	fe_mul_ttt(&t2, &t2, &t1);
1520 	fe_sq_tt(&t3, &t2);
1521 	for (i = 1; i < 20; ++i)
1522 		fe_sq_tt(&t3, &t3);
1523 	fe_mul_ttt(&t2, &t3, &t2);
1524 	fe_sq_tt(&t2, &t2);
1525 	for (i = 1; i < 10; ++i)
1526 		fe_sq_tt(&t2, &t2);
1527 	fe_mul_ttt(&t1, &t2, &t1);
1528 	fe_sq_tt(&t2, &t1);
1529 	for (i = 1; i < 50; ++i)
1530 		fe_sq_tt(&t2, &t2);
1531 	fe_mul_ttt(&t2, &t2, &t1);
1532 	fe_sq_tt(&t3, &t2);
1533 	for (i = 1; i < 100; ++i)
1534 		fe_sq_tt(&t3, &t3);
1535 	fe_mul_ttt(&t2, &t3, &t2);
1536 	fe_sq_tt(&t2, &t2);
1537 	for (i = 1; i < 50; ++i)
1538 		fe_sq_tt(&t2, &t2);
1539 	fe_mul_ttt(&t1, &t2, &t1);
1540 	fe_sq_tt(&t1, &t1);
1541 	for (i = 1; i < 5; ++i)
1542 		fe_sq_tt(&t1, &t1);
1543 	fe_mul_ttt(out, &t1, &t0);
1544 }
1545 
1546 static inline void fe_invert(fe *out, const fe *z)
1547 {
1548 	fe_loose l;
1549 	fe_copy_lt(&l, z);
1550 	fe_loose_invert(out, &l);
1551 }
1552 
1553 /* Replace (f,g) with (g,f) if b == 1;
1554  * replace (f,g) with (f,g) if b == 0.
1555  *
1556  * Preconditions: b in {0,1}
1557  */
1558 static inline void fe_cswap(fe *f, fe *g, unsigned int b)
1559 {
1560 	unsigned i;
1561 	b = 0 - b;
1562 	for (i = 0; i < 10; i++) {
1563 		uint32_t x = f->v[i] ^ g->v[i];
1564 		x &= b;
1565 		f->v[i] ^= x;
1566 		g->v[i] ^= x;
1567 	}
1568 }
1569 
1570 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
1571 static inline void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10])
1572 {
1573 	const uint32_t x20 = in1[9];
1574 	const uint32_t x21 = in1[8];
1575 	const uint32_t x19 = in1[7];
1576 	const uint32_t x17 = in1[6];
1577 	const uint32_t x15 = in1[5];
1578 	const uint32_t x13 = in1[4];
1579 	const uint32_t x11 = in1[3];
1580 	const uint32_t x9 = in1[2];
1581 	const uint32_t x7 = in1[1];
1582 	const uint32_t x5 = in1[0];
1583 	const uint32_t x38 = 0;
1584 	const uint32_t x39 = 0;
1585 	const uint32_t x37 = 0;
1586 	const uint32_t x35 = 0;
1587 	const uint32_t x33 = 0;
1588 	const uint32_t x31 = 0;
1589 	const uint32_t x29 = 0;
1590 	const uint32_t x27 = 0;
1591 	const uint32_t x25 = 0;
1592 	const uint32_t x23 = 121666;
1593 	uint64_t x40 = ((uint64_t)x23 * x5);
1594 	uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
1595 	uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
1596 	uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
1597 	uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
1598 	uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
1599 	uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
1600 	uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
1601 	uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
1602 	uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
1603 	uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
1604 	uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
1605 	uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
1606 	uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
1607 	uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
1608 	uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
1609 	uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
1610 	uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
1611 	uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
1612 	uint64_t x59 = (x48 + (x58 << 0x4));
1613 	uint64_t x60 = (x59 + (x58 << 0x1));
1614 	uint64_t x61 = (x60 + x58);
1615 	uint64_t x62 = (x47 + (x57 << 0x4));
1616 	uint64_t x63 = (x62 + (x57 << 0x1));
1617 	uint64_t x64 = (x63 + x57);
1618 	uint64_t x65 = (x46 + (x56 << 0x4));
1619 	uint64_t x66 = (x65 + (x56 << 0x1));
1620 	uint64_t x67 = (x66 + x56);
1621 	uint64_t x68 = (x45 + (x55 << 0x4));
1622 	uint64_t x69 = (x68 + (x55 << 0x1));
1623 	uint64_t x70 = (x69 + x55);
1624 	uint64_t x71 = (x44 + (x54 << 0x4));
1625 	uint64_t x72 = (x71 + (x54 << 0x1));
1626 	uint64_t x73 = (x72 + x54);
1627 	uint64_t x74 = (x43 + (x53 << 0x4));
1628 	uint64_t x75 = (x74 + (x53 << 0x1));
1629 	uint64_t x76 = (x75 + x53);
1630 	uint64_t x77 = (x42 + (x52 << 0x4));
1631 	uint64_t x78 = (x77 + (x52 << 0x1));
1632 	uint64_t x79 = (x78 + x52);
1633 	uint64_t x80 = (x41 + (x51 << 0x4));
1634 	uint64_t x81 = (x80 + (x51 << 0x1));
1635 	uint64_t x82 = (x81 + x51);
1636 	uint64_t x83 = (x40 + (x50 << 0x4));
1637 	uint64_t x84 = (x83 + (x50 << 0x1));
1638 	uint64_t x85 = (x84 + x50);
1639 	uint64_t x86 = (x85 >> 0x1a);
1640 	uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
1641 	uint64_t x88 = (x86 + x82);
1642 	uint64_t x89 = (x88 >> 0x19);
1643 	uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
1644 	uint64_t x91 = (x89 + x79);
1645 	uint64_t x92 = (x91 >> 0x1a);
1646 	uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
1647 	uint64_t x94 = (x92 + x76);
1648 	uint64_t x95 = (x94 >> 0x19);
1649 	uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
1650 	uint64_t x97 = (x95 + x73);
1651 	uint64_t x98 = (x97 >> 0x1a);
1652 	uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
1653 	uint64_t x100 = (x98 + x70);
1654 	uint64_t x101 = (x100 >> 0x19);
1655 	uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
1656 	uint64_t x103 = (x101 + x67);
1657 	uint64_t x104 = (x103 >> 0x1a);
1658 	uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
1659 	uint64_t x106 = (x104 + x64);
1660 	uint64_t x107 = (x106 >> 0x19);
1661 	uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
1662 	uint64_t x109 = (x107 + x61);
1663 	uint64_t x110 = (x109 >> 0x1a);
1664 	uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
1665 	uint64_t x112 = (x110 + x49);
1666 	uint64_t x113 = (x112 >> 0x19);
1667 	uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
1668 	uint64_t x115 = (x87 + (0x13 * x113));
1669 	uint32_t x116 = (uint32_t) (x115 >> 0x1a);
1670 	uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
1671 	uint32_t x118 = (x116 + x90);
1672 	uint32_t x119 = (x118 >> 0x19);
1673 	uint32_t x120 = (x118 & 0x1ffffff);
1674 	out[0] = x117;
1675 	out[1] = x120;
1676 	out[2] = (x119 + x93);
1677 	out[3] = x96;
1678 	out[4] = x99;
1679 	out[5] = x102;
1680 	out[6] = x105;
1681 	out[7] = x108;
1682 	out[8] = x111;
1683 	out[9] = x114;
1684 }
1685 
1686 static inline void fe_mul121666(fe *h, const fe_loose *f)
1687 {
1688 	fe_mul_121666_impl(h->v, f->v);
1689 }
1690 
1691 static const uint8_t curve25519_null_point[CURVE25519_KEY_SIZE];
1692 
1693 bool curve25519(uint8_t out[CURVE25519_KEY_SIZE],
1694 		const uint8_t scalar[CURVE25519_KEY_SIZE],
1695 		const uint8_t point[CURVE25519_KEY_SIZE])
1696 {
1697 	fe x1, x2, z2, x3, z3;
1698 	fe_loose x2l, z2l, x3l;
1699 	unsigned swap = 0;
1700 	int pos;
1701 	uint8_t e[32];
1702 
1703 	memcpy(e, scalar, 32);
1704 	curve25519_clamp_secret(e);
1705 
1706 	/* The following implementation was transcribed to Coq and proven to
1707 	 * correspond to unary scalar multiplication in affine coordinates given
1708 	 * that x1 != 0 is the x coordinate of some point on the curve. It was
1709 	 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
1710 	 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
1711 	 * quantified over the underlying field, so it applies to Curve25519
1712 	 * itself and the quadratic twist of Curve25519. It was not proven in
1713 	 * Coq that prime-field arithmetic correctly simulates extension-field
1714 	 * arithmetic on prime-field values. The decoding of the byte array
1715 	 * representation of e was not considered.
1716 	 *
1717 	 * Specification of Montgomery curves in affine coordinates:
1718 	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
1719 	 *
1720 	 * Proof that these form a group that is isomorphic to a Weierstrass
1721 	 * curve:
1722 	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
1723 	 *
1724 	 * Coq transcription and correctness proof of the loop
1725 	 * (where scalarbits=255):
1726 	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
1727 	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
1728 	 * preconditions: 0 <= e < 2^255 (not necessarily e < order),
1729 	 * fe_invert(0) = 0
1730 	 */
1731 	fe_frombytes(&x1, point);
1732 	fe_1(&x2);
1733 	fe_0(&z2);
1734 	fe_copy(&x3, &x1);
1735 	fe_1(&z3);
1736 
1737 	for (pos = 254; pos >= 0; --pos) {
1738 		fe tmp0, tmp1;
1739 		fe_loose tmp0l, tmp1l;
1740 		/* loop invariant as of right before the test, for the case
1741 		 * where x1 != 0:
1742 		 *   pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
1743 		 *   is nonzero
1744 		 *   let r := e >> (pos+1) in the following equalities of
1745 		 *   projective points:
1746 		 *   to_xz (r*P)     === if swap then (x3, z3) else (x2, z2)
1747 		 *   to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
1748 		 *   x1 is the nonzero x coordinate of the nonzero
1749 		 *   point (r*P-(r+1)*P)
1750 		 */
1751 		unsigned b = 1 & (e[pos / 8] >> (pos & 7));
1752 		swap ^= b;
1753 		fe_cswap(&x2, &x3, swap);
1754 		fe_cswap(&z2, &z3, swap);
1755 		swap = b;
1756 		/* Coq transcription of ladderstep formula (called from
1757 		 * transcribed loop):
1758 		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
1759 		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
1760 		 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
1761 		 * x1  = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
1762 		 */
1763 		fe_sub(&tmp0l, &x3, &z3);
1764 		fe_sub(&tmp1l, &x2, &z2);
1765 		fe_add(&x2l, &x2, &z2);
1766 		fe_add(&z2l, &x3, &z3);
1767 		fe_mul_tll(&z3, &tmp0l, &x2l);
1768 		fe_mul_tll(&z2, &z2l, &tmp1l);
1769 		fe_sq_tl(&tmp0, &tmp1l);
1770 		fe_sq_tl(&tmp1, &x2l);
1771 		fe_add(&x3l, &z3, &z2);
1772 		fe_sub(&z2l, &z3, &z2);
1773 		fe_mul_ttt(&x2, &tmp1, &tmp0);
1774 		fe_sub(&tmp1l, &tmp1, &tmp0);
1775 		fe_sq_tl(&z2, &z2l);
1776 		fe_mul121666(&z3, &tmp1l);
1777 		fe_sq_tl(&x3, &x3l);
1778 		fe_add(&tmp0l, &tmp0, &z3);
1779 		fe_mul_ttt(&z3, &x1, &z2);
1780 		fe_mul_tll(&z2, &tmp1l, &tmp0l);
1781 	}
1782 	/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
1783 	 * else (x2, z2)
1784 	 */
1785 	fe_cswap(&x2, &x3, swap);
1786 	fe_cswap(&z2, &z3, swap);
1787 
1788 	fe_invert(&z2, &z2);
1789 	fe_mul_ttt(&x2, &x2, &z2);
1790 	fe_tobytes(out, &x2);
1791 
1792 	explicit_bzero(&x1, sizeof(x1));
1793 	explicit_bzero(&x2, sizeof(x2));
1794 	explicit_bzero(&z2, sizeof(z2));
1795 	explicit_bzero(&x3, sizeof(x3));
1796 	explicit_bzero(&z3, sizeof(z3));
1797 	explicit_bzero(&x2l, sizeof(x2l));
1798 	explicit_bzero(&z2l, sizeof(z2l));
1799 	explicit_bzero(&x3l, sizeof(x3l));
1800 	explicit_bzero(&e, sizeof(e));
1801 
1802 	return timingsafe_bcmp(out, curve25519_null_point, CURVE25519_KEY_SIZE) != 0;
1803 }
1804 #endif
1805 
1806 int
1807 crypto_init(void)
1808 {
1809 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF
1810 	struct crypto_session_params csp = {
1811 		.csp_mode = CSP_MODE_AEAD,
1812 		.csp_ivlen = sizeof(uint64_t),
1813 		.csp_cipher_alg = CRYPTO_CHACHA20_POLY1305,
1814 		.csp_cipher_klen = CHACHA20POLY1305_KEY_SIZE,
1815 		.csp_flags = CSP_F_SEPARATE_AAD | CSP_F_SEPARATE_OUTPUT
1816 	};
1817 	int ret = crypto_newsession(&chacha20_poly1305_sid, &csp, CRYPTOCAP_F_SOFTWARE);
1818 	if (ret != 0)
1819 		return (ret);
1820 #endif
1821 	return (0);
1822 }
1823 
1824 void
1825 crypto_deinit(void)
1826 {
1827 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF
1828 	crypto_freesession(chacha20_poly1305_sid);
1829 #endif
1830 }
1831