1 /* SPDX-License-Identifier: MIT 2 * 3 * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 4 * Copyright (c) 2022 The FreeBSD Foundation 5 */ 6 7 #include <sys/types.h> 8 #include <sys/systm.h> 9 #include <sys/endian.h> 10 #include <sys/mbuf.h> 11 #include <opencrypto/cryptodev.h> 12 13 #include "crypto.h" 14 15 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF 16 static crypto_session_t chacha20_poly1305_sid; 17 #endif 18 19 #ifndef ARRAY_SIZE 20 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 21 #endif 22 #ifndef noinline 23 #define noinline __attribute__((noinline)) 24 #endif 25 #ifndef __aligned 26 #define __aligned(x) __attribute__((aligned(x))) 27 #endif 28 #ifndef DIV_ROUND_UP 29 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) 30 #endif 31 32 #define le32_to_cpup(a) le32toh(*(a)) 33 #define le64_to_cpup(a) le64toh(*(a)) 34 #define cpu_to_le32(a) htole32(a) 35 #define cpu_to_le64(a) htole64(a) 36 37 static inline __unused uint32_t get_unaligned_le32(const uint8_t *a) 38 { 39 uint32_t l; 40 __builtin_memcpy(&l, a, sizeof(l)); 41 return le32_to_cpup(&l); 42 } 43 static inline __unused uint64_t get_unaligned_le64(const uint8_t *a) 44 { 45 uint64_t l; 46 __builtin_memcpy(&l, a, sizeof(l)); 47 return le64_to_cpup(&l); 48 } 49 static inline __unused void put_unaligned_le32(uint32_t s, uint8_t *d) 50 { 51 uint32_t l = cpu_to_le32(s); 52 __builtin_memcpy(d, &l, sizeof(l)); 53 } 54 static inline __unused void cpu_to_le32_array(uint32_t *buf, unsigned int words) 55 { 56 while (words--) { 57 *buf = cpu_to_le32(*buf); 58 ++buf; 59 } 60 } 61 static inline __unused void le32_to_cpu_array(uint32_t *buf, unsigned int words) 62 { 63 while (words--) { 64 *buf = le32_to_cpup(buf); 65 ++buf; 66 } 67 } 68 static inline __unused uint32_t rol32(uint32_t word, unsigned int shift) 69 { 70 return (word << (shift & 31)) | (word >> ((-shift) & 31)); 71 } 72 static inline __unused uint32_t ror32(uint32_t word, unsigned int shift) 73 { 74 return (word >> (shift & 31)) | (word << ((-shift) & 31)); 75 } 76 77 #if defined(COMPAT_NEED_CHACHA20POLY1305) || defined(COMPAT_NEED_CHACHA20POLY1305_MBUF) 78 static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, size_t len) 79 { 80 size_t i; 81 82 for (i = 0; i < len; ++i) 83 dst[i] = src1[i] ^ src2[i]; 84 } 85 86 #define QUARTER_ROUND(x, a, b, c, d) ( \ 87 x[a] += x[b], \ 88 x[d] = rol32((x[d] ^ x[a]), 16), \ 89 x[c] += x[d], \ 90 x[b] = rol32((x[b] ^ x[c]), 12), \ 91 x[a] += x[b], \ 92 x[d] = rol32((x[d] ^ x[a]), 8), \ 93 x[c] += x[d], \ 94 x[b] = rol32((x[b] ^ x[c]), 7) \ 95 ) 96 97 #define C(i, j) (i * 4 + j) 98 99 #define DOUBLE_ROUND(x) ( \ 100 /* Column Round */ \ 101 QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \ 102 QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \ 103 QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \ 104 QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \ 105 /* Diagonal Round */ \ 106 QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \ 107 QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \ 108 QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \ 109 QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \ 110 ) 111 112 #define TWENTY_ROUNDS(x) ( \ 113 DOUBLE_ROUND(x), \ 114 DOUBLE_ROUND(x), \ 115 DOUBLE_ROUND(x), \ 116 DOUBLE_ROUND(x), \ 117 DOUBLE_ROUND(x), \ 118 DOUBLE_ROUND(x), \ 119 DOUBLE_ROUND(x), \ 120 DOUBLE_ROUND(x), \ 121 DOUBLE_ROUND(x), \ 122 DOUBLE_ROUND(x) \ 123 ) 124 125 enum chacha20_lengths { 126 CHACHA20_NONCE_SIZE = 16, 127 CHACHA20_KEY_SIZE = 32, 128 CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t), 129 CHACHA20_BLOCK_SIZE = 64, 130 CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t), 131 HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE, 132 HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE 133 }; 134 135 enum chacha20_constants { /* expand 32-byte k */ 136 CHACHA20_CONSTANT_EXPA = 0x61707865U, 137 CHACHA20_CONSTANT_ND_3 = 0x3320646eU, 138 CHACHA20_CONSTANT_2_BY = 0x79622d32U, 139 CHACHA20_CONSTANT_TE_K = 0x6b206574U 140 }; 141 142 struct chacha20_ctx { 143 union { 144 uint32_t state[16]; 145 struct { 146 uint32_t constant[4]; 147 uint32_t key[8]; 148 uint32_t counter[4]; 149 }; 150 }; 151 }; 152 153 static void chacha20_init(struct chacha20_ctx *ctx, 154 const uint8_t key[CHACHA20_KEY_SIZE], 155 const uint64_t nonce) 156 { 157 ctx->constant[0] = CHACHA20_CONSTANT_EXPA; 158 ctx->constant[1] = CHACHA20_CONSTANT_ND_3; 159 ctx->constant[2] = CHACHA20_CONSTANT_2_BY; 160 ctx->constant[3] = CHACHA20_CONSTANT_TE_K; 161 ctx->key[0] = get_unaligned_le32(key + 0); 162 ctx->key[1] = get_unaligned_le32(key + 4); 163 ctx->key[2] = get_unaligned_le32(key + 8); 164 ctx->key[3] = get_unaligned_le32(key + 12); 165 ctx->key[4] = get_unaligned_le32(key + 16); 166 ctx->key[5] = get_unaligned_le32(key + 20); 167 ctx->key[6] = get_unaligned_le32(key + 24); 168 ctx->key[7] = get_unaligned_le32(key + 28); 169 ctx->counter[0] = 0; 170 ctx->counter[1] = 0; 171 ctx->counter[2] = nonce & 0xffffffffU; 172 ctx->counter[3] = nonce >> 32; 173 } 174 175 static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream) 176 { 177 uint32_t x[CHACHA20_BLOCK_WORDS]; 178 int i; 179 180 for (i = 0; i < ARRAY_SIZE(x); ++i) 181 x[i] = ctx->state[i]; 182 183 TWENTY_ROUNDS(x); 184 185 for (i = 0; i < ARRAY_SIZE(x); ++i) 186 stream[i] = cpu_to_le32(x[i] + ctx->state[i]); 187 188 ctx->counter[0] += 1; 189 } 190 191 static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in, 192 uint32_t len) 193 { 194 uint32_t buf[CHACHA20_BLOCK_WORDS]; 195 196 while (len >= CHACHA20_BLOCK_SIZE) { 197 chacha20_block(ctx, buf); 198 xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE); 199 len -= CHACHA20_BLOCK_SIZE; 200 out += CHACHA20_BLOCK_SIZE; 201 in += CHACHA20_BLOCK_SIZE; 202 } 203 if (len) { 204 chacha20_block(ctx, buf); 205 xor_cpy(out, in, (uint8_t *)buf, len); 206 } 207 } 208 209 static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS], 210 const uint8_t nonce[HCHACHA20_NONCE_SIZE], 211 const uint8_t key[HCHACHA20_KEY_SIZE]) 212 { 213 uint32_t x[] = { CHACHA20_CONSTANT_EXPA, 214 CHACHA20_CONSTANT_ND_3, 215 CHACHA20_CONSTANT_2_BY, 216 CHACHA20_CONSTANT_TE_K, 217 get_unaligned_le32(key + 0), 218 get_unaligned_le32(key + 4), 219 get_unaligned_le32(key + 8), 220 get_unaligned_le32(key + 12), 221 get_unaligned_le32(key + 16), 222 get_unaligned_le32(key + 20), 223 get_unaligned_le32(key + 24), 224 get_unaligned_le32(key + 28), 225 get_unaligned_le32(nonce + 0), 226 get_unaligned_le32(nonce + 4), 227 get_unaligned_le32(nonce + 8), 228 get_unaligned_le32(nonce + 12) 229 }; 230 231 TWENTY_ROUNDS(x); 232 233 memcpy(derived_key + 0, x + 0, sizeof(uint32_t) * 4); 234 memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4); 235 } 236 237 enum poly1305_lengths { 238 POLY1305_BLOCK_SIZE = 16, 239 POLY1305_KEY_SIZE = 32, 240 POLY1305_MAC_SIZE = 16 241 }; 242 243 struct poly1305_internal { 244 uint32_t h[5]; 245 uint32_t r[5]; 246 uint32_t s[4]; 247 }; 248 249 struct poly1305_ctx { 250 struct poly1305_internal state; 251 uint32_t nonce[4]; 252 uint8_t data[POLY1305_BLOCK_SIZE]; 253 size_t num; 254 }; 255 256 static void poly1305_init_core(struct poly1305_internal *st, 257 const uint8_t key[16]) 258 { 259 /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ 260 st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff; 261 st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03; 262 st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff; 263 st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff; 264 st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff; 265 266 /* s = 5*r */ 267 st->s[0] = st->r[1] * 5; 268 st->s[1] = st->r[2] * 5; 269 st->s[2] = st->r[3] * 5; 270 st->s[3] = st->r[4] * 5; 271 272 /* h = 0 */ 273 st->h[0] = 0; 274 st->h[1] = 0; 275 st->h[2] = 0; 276 st->h[3] = 0; 277 st->h[4] = 0; 278 } 279 280 static void poly1305_blocks_core(struct poly1305_internal *st, 281 const uint8_t *input, size_t len, 282 const uint32_t padbit) 283 { 284 const uint32_t hibit = padbit << 24; 285 uint32_t r0, r1, r2, r3, r4; 286 uint32_t s1, s2, s3, s4; 287 uint32_t h0, h1, h2, h3, h4; 288 uint64_t d0, d1, d2, d3, d4; 289 uint32_t c; 290 291 r0 = st->r[0]; 292 r1 = st->r[1]; 293 r2 = st->r[2]; 294 r3 = st->r[3]; 295 r4 = st->r[4]; 296 297 s1 = st->s[0]; 298 s2 = st->s[1]; 299 s3 = st->s[2]; 300 s4 = st->s[3]; 301 302 h0 = st->h[0]; 303 h1 = st->h[1]; 304 h2 = st->h[2]; 305 h3 = st->h[3]; 306 h4 = st->h[4]; 307 308 while (len >= POLY1305_BLOCK_SIZE) { 309 /* h += m[i] */ 310 h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff; 311 h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff; 312 h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff; 313 h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff; 314 h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit; 315 316 /* h *= r */ 317 d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) + 318 ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) + 319 ((uint64_t)h4 * s1); 320 d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) + 321 ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) + 322 ((uint64_t)h4 * s2); 323 d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) + 324 ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) + 325 ((uint64_t)h4 * s3); 326 d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) + 327 ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) + 328 ((uint64_t)h4 * s4); 329 d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) + 330 ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) + 331 ((uint64_t)h4 * r0); 332 333 /* (partial) h %= p */ 334 c = (uint32_t)(d0 >> 26); 335 h0 = (uint32_t)d0 & 0x3ffffff; 336 d1 += c; 337 c = (uint32_t)(d1 >> 26); 338 h1 = (uint32_t)d1 & 0x3ffffff; 339 d2 += c; 340 c = (uint32_t)(d2 >> 26); 341 h2 = (uint32_t)d2 & 0x3ffffff; 342 d3 += c; 343 c = (uint32_t)(d3 >> 26); 344 h3 = (uint32_t)d3 & 0x3ffffff; 345 d4 += c; 346 c = (uint32_t)(d4 >> 26); 347 h4 = (uint32_t)d4 & 0x3ffffff; 348 h0 += c * 5; 349 c = (h0 >> 26); 350 h0 = h0 & 0x3ffffff; 351 h1 += c; 352 353 input += POLY1305_BLOCK_SIZE; 354 len -= POLY1305_BLOCK_SIZE; 355 } 356 357 st->h[0] = h0; 358 st->h[1] = h1; 359 st->h[2] = h2; 360 st->h[3] = h3; 361 st->h[4] = h4; 362 } 363 364 static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16], 365 const uint32_t nonce[4]) 366 { 367 uint32_t h0, h1, h2, h3, h4, c; 368 uint32_t g0, g1, g2, g3, g4; 369 uint64_t f; 370 uint32_t mask; 371 372 /* fully carry h */ 373 h0 = st->h[0]; 374 h1 = st->h[1]; 375 h2 = st->h[2]; 376 h3 = st->h[3]; 377 h4 = st->h[4]; 378 379 c = h1 >> 26; 380 h1 = h1 & 0x3ffffff; 381 h2 += c; 382 c = h2 >> 26; 383 h2 = h2 & 0x3ffffff; 384 h3 += c; 385 c = h3 >> 26; 386 h3 = h3 & 0x3ffffff; 387 h4 += c; 388 c = h4 >> 26; 389 h4 = h4 & 0x3ffffff; 390 h0 += c * 5; 391 c = h0 >> 26; 392 h0 = h0 & 0x3ffffff; 393 h1 += c; 394 395 /* compute h + -p */ 396 g0 = h0 + 5; 397 c = g0 >> 26; 398 g0 &= 0x3ffffff; 399 g1 = h1 + c; 400 c = g1 >> 26; 401 g1 &= 0x3ffffff; 402 g2 = h2 + c; 403 c = g2 >> 26; 404 g2 &= 0x3ffffff; 405 g3 = h3 + c; 406 c = g3 >> 26; 407 g3 &= 0x3ffffff; 408 g4 = h4 + c - (1UL << 26); 409 410 /* select h if h < p, or h + -p if h >= p */ 411 mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1; 412 g0 &= mask; 413 g1 &= mask; 414 g2 &= mask; 415 g3 &= mask; 416 g4 &= mask; 417 mask = ~mask; 418 419 h0 = (h0 & mask) | g0; 420 h1 = (h1 & mask) | g1; 421 h2 = (h2 & mask) | g2; 422 h3 = (h3 & mask) | g3; 423 h4 = (h4 & mask) | g4; 424 425 /* h = h % (2^128) */ 426 h0 = ((h0) | (h1 << 26)) & 0xffffffff; 427 h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; 428 h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; 429 h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; 430 431 /* mac = (h + nonce) % (2^128) */ 432 f = (uint64_t)h0 + nonce[0]; 433 h0 = (uint32_t)f; 434 f = (uint64_t)h1 + nonce[1] + (f >> 32); 435 h1 = (uint32_t)f; 436 f = (uint64_t)h2 + nonce[2] + (f >> 32); 437 h2 = (uint32_t)f; 438 f = (uint64_t)h3 + nonce[3] + (f >> 32); 439 h3 = (uint32_t)f; 440 441 put_unaligned_le32(h0, &mac[0]); 442 put_unaligned_le32(h1, &mac[4]); 443 put_unaligned_le32(h2, &mac[8]); 444 put_unaligned_le32(h3, &mac[12]); 445 } 446 447 static void poly1305_init(struct poly1305_ctx *ctx, 448 const uint8_t key[POLY1305_KEY_SIZE]) 449 { 450 ctx->nonce[0] = get_unaligned_le32(&key[16]); 451 ctx->nonce[1] = get_unaligned_le32(&key[20]); 452 ctx->nonce[2] = get_unaligned_le32(&key[24]); 453 ctx->nonce[3] = get_unaligned_le32(&key[28]); 454 455 poly1305_init_core(&ctx->state, key); 456 457 ctx->num = 0; 458 } 459 460 static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input, 461 size_t len) 462 { 463 const size_t num = ctx->num; 464 size_t rem; 465 466 if (num) { 467 rem = POLY1305_BLOCK_SIZE - num; 468 if (len < rem) { 469 memcpy(ctx->data + num, input, len); 470 ctx->num = num + len; 471 return; 472 } 473 memcpy(ctx->data + num, input, rem); 474 poly1305_blocks_core(&ctx->state, ctx->data, 475 POLY1305_BLOCK_SIZE, 1); 476 input += rem; 477 len -= rem; 478 } 479 480 rem = len % POLY1305_BLOCK_SIZE; 481 len -= rem; 482 483 if (len >= POLY1305_BLOCK_SIZE) { 484 poly1305_blocks_core(&ctx->state, input, len, 1); 485 input += len; 486 } 487 488 if (rem) 489 memcpy(ctx->data, input, rem); 490 491 ctx->num = rem; 492 } 493 494 static void poly1305_final(struct poly1305_ctx *ctx, 495 uint8_t mac[POLY1305_MAC_SIZE]) 496 { 497 size_t num = ctx->num; 498 499 if (num) { 500 ctx->data[num++] = 1; 501 while (num < POLY1305_BLOCK_SIZE) 502 ctx->data[num++] = 0; 503 poly1305_blocks_core(&ctx->state, ctx->data, 504 POLY1305_BLOCK_SIZE, 0); 505 } 506 507 poly1305_emit_core(&ctx->state, mac, ctx->nonce); 508 509 explicit_bzero(ctx, sizeof(*ctx)); 510 } 511 #endif 512 513 #ifdef COMPAT_NEED_CHACHA20POLY1305 514 static const uint8_t pad0[16] = { 0 }; 515 516 void 517 chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, 518 const uint8_t *ad, const size_t ad_len, 519 const uint64_t nonce, 520 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 521 { 522 struct poly1305_ctx poly1305_state; 523 struct chacha20_ctx chacha20_state; 524 union { 525 uint8_t block0[POLY1305_KEY_SIZE]; 526 uint64_t lens[2]; 527 } b = { { 0 } }; 528 529 chacha20_init(&chacha20_state, key, nonce); 530 chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0)); 531 poly1305_init(&poly1305_state, b.block0); 532 533 poly1305_update(&poly1305_state, ad, ad_len); 534 poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf); 535 536 chacha20(&chacha20_state, dst, src, src_len); 537 538 poly1305_update(&poly1305_state, dst, src_len); 539 poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf); 540 541 b.lens[0] = cpu_to_le64(ad_len); 542 b.lens[1] = cpu_to_le64(src_len); 543 poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens)); 544 545 poly1305_final(&poly1305_state, dst + src_len); 546 547 explicit_bzero(&chacha20_state, sizeof(chacha20_state)); 548 explicit_bzero(&b, sizeof(b)); 549 } 550 551 bool 552 chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len, 553 const uint8_t *ad, const size_t ad_len, 554 const uint64_t nonce, 555 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 556 { 557 struct poly1305_ctx poly1305_state; 558 struct chacha20_ctx chacha20_state; 559 bool ret; 560 size_t dst_len; 561 union { 562 uint8_t block0[POLY1305_KEY_SIZE]; 563 uint8_t mac[POLY1305_MAC_SIZE]; 564 uint64_t lens[2]; 565 } b = { { 0 } }; 566 567 if (src_len < POLY1305_MAC_SIZE) 568 return false; 569 570 chacha20_init(&chacha20_state, key, nonce); 571 chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0)); 572 poly1305_init(&poly1305_state, b.block0); 573 574 poly1305_update(&poly1305_state, ad, ad_len); 575 poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf); 576 577 dst_len = src_len - POLY1305_MAC_SIZE; 578 poly1305_update(&poly1305_state, src, dst_len); 579 poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf); 580 581 b.lens[0] = cpu_to_le64(ad_len); 582 b.lens[1] = cpu_to_le64(dst_len); 583 poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens)); 584 585 poly1305_final(&poly1305_state, b.mac); 586 587 ret = timingsafe_bcmp(b.mac, src + dst_len, POLY1305_MAC_SIZE) == 0; 588 if (ret) 589 chacha20(&chacha20_state, dst, src, dst_len); 590 591 explicit_bzero(&chacha20_state, sizeof(chacha20_state)); 592 explicit_bzero(&b, sizeof(b)); 593 594 return ret; 595 } 596 597 void 598 xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, 599 const size_t src_len, const uint8_t *ad, 600 const size_t ad_len, 601 const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE], 602 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 603 { 604 uint32_t derived_key[CHACHA20_KEY_WORDS]; 605 606 hchacha20(derived_key, nonce, key); 607 cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key)); 608 chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, 609 get_unaligned_le64(nonce + 16), 610 (uint8_t *)derived_key); 611 explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE); 612 } 613 614 bool 615 xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, 616 const size_t src_len, const uint8_t *ad, 617 const size_t ad_len, 618 const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE], 619 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 620 { 621 bool ret; 622 uint32_t derived_key[CHACHA20_KEY_WORDS]; 623 624 hchacha20(derived_key, nonce, key); 625 cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key)); 626 ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, 627 get_unaligned_le64(nonce + 16), 628 (uint8_t *)derived_key); 629 explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE); 630 return ret; 631 } 632 #endif 633 634 #ifdef COMPAT_NEED_CHACHA20POLY1305_MBUF 635 static inline int 636 chacha20poly1305_crypt_mbuf(struct mbuf *m0, uint64_t nonce, 637 const uint8_t key[CHACHA20POLY1305_KEY_SIZE], bool encrypt) 638 { 639 struct poly1305_ctx poly1305_state; 640 struct chacha20_ctx chacha20_state; 641 uint8_t *buf, mbuf_mac[POLY1305_MAC_SIZE]; 642 size_t len, leftover = 0; 643 struct mbuf *m; 644 int ret; 645 union { 646 uint32_t stream[CHACHA20_BLOCK_WORDS]; 647 uint8_t block0[POLY1305_KEY_SIZE]; 648 uint8_t mac[POLY1305_MAC_SIZE]; 649 uint64_t lens[2]; 650 } b = { { 0 } }; 651 652 if (!encrypt) { 653 if (m0->m_pkthdr.len < POLY1305_MAC_SIZE) 654 return EMSGSIZE; 655 m_copydata(m0, m0->m_pkthdr.len - POLY1305_MAC_SIZE, POLY1305_MAC_SIZE, mbuf_mac); 656 m_adj(m0, -POLY1305_MAC_SIZE); 657 } 658 659 chacha20_init(&chacha20_state, key, nonce); 660 chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0)); 661 poly1305_init(&poly1305_state, b.block0); 662 663 for (m = m0; m; m = m->m_next) { 664 len = m->m_len; 665 buf = m->m_data; 666 667 if (!encrypt) 668 poly1305_update(&poly1305_state, m->m_data, m->m_len); 669 670 if (leftover != 0) { 671 size_t l = min(len, leftover); 672 xor_cpy(buf, buf, ((uint8_t *)b.stream) + (CHACHA20_BLOCK_SIZE - leftover), l); 673 leftover -= l; 674 buf += l; 675 len -= l; 676 } 677 678 while (len >= CHACHA20_BLOCK_SIZE) { 679 chacha20_block(&chacha20_state, b.stream); 680 xor_cpy(buf, buf, (uint8_t *)b.stream, CHACHA20_BLOCK_SIZE); 681 buf += CHACHA20_BLOCK_SIZE; 682 len -= CHACHA20_BLOCK_SIZE; 683 } 684 685 if (len) { 686 chacha20_block(&chacha20_state, b.stream); 687 xor_cpy(buf, buf, (uint8_t *)b.stream, len); 688 leftover = CHACHA20_BLOCK_SIZE - len; 689 } 690 691 if (encrypt) 692 poly1305_update(&poly1305_state, m->m_data, m->m_len); 693 } 694 poly1305_update(&poly1305_state, pad0, (0x10 - m0->m_pkthdr.len) & 0xf); 695 696 b.lens[0] = 0; 697 b.lens[1] = cpu_to_le64(m0->m_pkthdr.len); 698 poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens)); 699 700 poly1305_final(&poly1305_state, b.mac); 701 702 if (encrypt) 703 ret = m_append(m0, POLY1305_MAC_SIZE, b.mac) ? 0 : ENOMEM; 704 else 705 ret = timingsafe_bcmp(b.mac, mbuf_mac, POLY1305_MAC_SIZE) == 0 ? 0 : EBADMSG; 706 707 explicit_bzero(&chacha20_state, sizeof(chacha20_state)); 708 explicit_bzero(&b, sizeof(b)); 709 710 return ret; 711 } 712 713 int 714 chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce, 715 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 716 { 717 return chacha20poly1305_crypt_mbuf(m, nonce, key, true); 718 } 719 720 int 721 chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce, 722 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 723 { 724 return chacha20poly1305_crypt_mbuf(m, nonce, key, false); 725 } 726 #else 727 static int 728 crypto_callback(struct cryptop *crp) 729 { 730 return (0); 731 } 732 733 int 734 chacha20poly1305_encrypt_mbuf(struct mbuf *m, const uint64_t nonce, 735 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 736 { 737 static const char blank_tag[POLY1305_HASH_LEN]; 738 struct cryptop crp; 739 int ret; 740 741 if (!m_append(m, POLY1305_HASH_LEN, blank_tag)) 742 return (ENOMEM); 743 crypto_initreq(&crp, chacha20_poly1305_sid); 744 crp.crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST; 745 crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM; 746 crypto_use_mbuf(&crp, m); 747 crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN; 748 crp.crp_digest_start = crp.crp_payload_length; 749 le64enc(crp.crp_iv, nonce); 750 crp.crp_cipher_key = key; 751 crp.crp_callback = crypto_callback; 752 ret = crypto_dispatch(&crp); 753 crypto_destroyreq(&crp); 754 return (ret); 755 } 756 757 int 758 chacha20poly1305_decrypt_mbuf(struct mbuf *m, const uint64_t nonce, 759 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]) 760 { 761 struct cryptop crp; 762 int ret; 763 764 if (m->m_pkthdr.len < POLY1305_HASH_LEN) 765 return (EMSGSIZE); 766 crypto_initreq(&crp, chacha20_poly1305_sid); 767 crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST; 768 crp.crp_flags = CRYPTO_F_IV_SEPARATE | CRYPTO_F_CBIMM; 769 crypto_use_mbuf(&crp, m); 770 crp.crp_payload_length = m->m_pkthdr.len - POLY1305_HASH_LEN; 771 crp.crp_digest_start = crp.crp_payload_length; 772 le64enc(crp.crp_iv, nonce); 773 crp.crp_cipher_key = key; 774 crp.crp_callback = crypto_callback; 775 ret = crypto_dispatch(&crp); 776 crypto_destroyreq(&crp); 777 if (ret) 778 return (ret); 779 m_adj(m, -POLY1305_HASH_LEN); 780 return (0); 781 } 782 #endif 783 784 #ifdef COMPAT_NEED_BLAKE2S 785 static const uint32_t blake2s_iv[8] = { 786 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, 787 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL 788 }; 789 790 static const uint8_t blake2s_sigma[10][16] = { 791 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, 792 { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, 793 { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, 794 { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, 795 { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, 796 { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, 797 { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, 798 { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, 799 { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, 800 { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, 801 }; 802 803 static inline void blake2s_set_lastblock(struct blake2s_state *state) 804 { 805 state->f[0] = -1; 806 } 807 808 static inline void blake2s_increment_counter(struct blake2s_state *state, 809 const uint32_t inc) 810 { 811 state->t[0] += inc; 812 state->t[1] += (state->t[0] < inc); 813 } 814 815 static inline void blake2s_init_param(struct blake2s_state *state, 816 const uint32_t param) 817 { 818 int i; 819 820 memset(state, 0, sizeof(*state)); 821 for (i = 0; i < 8; ++i) 822 state->h[i] = blake2s_iv[i]; 823 state->h[0] ^= param; 824 } 825 826 void blake2s_init(struct blake2s_state *state, const size_t outlen) 827 { 828 blake2s_init_param(state, 0x01010000 | outlen); 829 state->outlen = outlen; 830 } 831 832 void blake2s_init_key(struct blake2s_state *state, const size_t outlen, 833 const uint8_t *key, const size_t keylen) 834 { 835 uint8_t block[BLAKE2S_BLOCK_SIZE] = { 0 }; 836 837 blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen); 838 state->outlen = outlen; 839 memcpy(block, key, keylen); 840 blake2s_update(state, block, BLAKE2S_BLOCK_SIZE); 841 explicit_bzero(block, BLAKE2S_BLOCK_SIZE); 842 } 843 844 static inline void blake2s_compress(struct blake2s_state *state, 845 const uint8_t *block, size_t nblocks, 846 const uint32_t inc) 847 { 848 uint32_t m[16]; 849 uint32_t v[16]; 850 int i; 851 852 while (nblocks > 0) { 853 blake2s_increment_counter(state, inc); 854 memcpy(m, block, BLAKE2S_BLOCK_SIZE); 855 le32_to_cpu_array(m, ARRAY_SIZE(m)); 856 memcpy(v, state->h, 32); 857 v[ 8] = blake2s_iv[0]; 858 v[ 9] = blake2s_iv[1]; 859 v[10] = blake2s_iv[2]; 860 v[11] = blake2s_iv[3]; 861 v[12] = blake2s_iv[4] ^ state->t[0]; 862 v[13] = blake2s_iv[5] ^ state->t[1]; 863 v[14] = blake2s_iv[6] ^ state->f[0]; 864 v[15] = blake2s_iv[7] ^ state->f[1]; 865 866 #define G(r, i, a, b, c, d) do { \ 867 a += b + m[blake2s_sigma[r][2 * i + 0]]; \ 868 d = ror32(d ^ a, 16); \ 869 c += d; \ 870 b = ror32(b ^ c, 12); \ 871 a += b + m[blake2s_sigma[r][2 * i + 1]]; \ 872 d = ror32(d ^ a, 8); \ 873 c += d; \ 874 b = ror32(b ^ c, 7); \ 875 } while (0) 876 877 #define ROUND(r) do { \ 878 G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ 879 G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ 880 G(r, 2, v[2], v[ 6], v[10], v[14]); \ 881 G(r, 3, v[3], v[ 7], v[11], v[15]); \ 882 G(r, 4, v[0], v[ 5], v[10], v[15]); \ 883 G(r, 5, v[1], v[ 6], v[11], v[12]); \ 884 G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ 885 G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ 886 } while (0) 887 ROUND(0); 888 ROUND(1); 889 ROUND(2); 890 ROUND(3); 891 ROUND(4); 892 ROUND(5); 893 ROUND(6); 894 ROUND(7); 895 ROUND(8); 896 ROUND(9); 897 898 #undef G 899 #undef ROUND 900 901 for (i = 0; i < 8; ++i) 902 state->h[i] ^= v[i] ^ v[i + 8]; 903 904 block += BLAKE2S_BLOCK_SIZE; 905 --nblocks; 906 } 907 } 908 909 void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen) 910 { 911 const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; 912 913 if (!inlen) 914 return; 915 if (inlen > fill) { 916 memcpy(state->buf + state->buflen, in, fill); 917 blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); 918 state->buflen = 0; 919 in += fill; 920 inlen -= fill; 921 } 922 if (inlen > BLAKE2S_BLOCK_SIZE) { 923 const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); 924 /* Hash one less (full) block than strictly possible */ 925 blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); 926 in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); 927 inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); 928 } 929 memcpy(state->buf + state->buflen, in, inlen); 930 state->buflen += inlen; 931 } 932 933 void blake2s_final(struct blake2s_state *state, uint8_t *out) 934 { 935 blake2s_set_lastblock(state); 936 memset(state->buf + state->buflen, 0, 937 BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ 938 blake2s_compress(state, state->buf, 1, state->buflen); 939 cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); 940 memcpy(out, state->h, state->outlen); 941 explicit_bzero(state, sizeof(*state)); 942 } 943 #endif 944 945 #ifdef COMPAT_NEED_CURVE25519 946 /* Below here is fiat's implementation of x25519. 947 * 948 * Copyright (C) 2015-2016 The fiat-crypto Authors. 949 * Copyright (C) 2018-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 950 * 951 * This is a machine-generated formally verified implementation of Curve25519 952 * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally 953 * machine generated, it has been tweaked to be suitable for use in the kernel. 954 * It is optimized for 32-bit machines and machines that cannot work efficiently 955 * with 128-bit integer types. 956 */ 957 958 /* fe means field element. Here the field is \Z/(2^255-19). An element t, 959 * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77 960 * t[3]+2^102 t[4]+...+2^230 t[9]. 961 * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc. 962 * Multiplication and carrying produce fe from fe_loose. 963 */ 964 typedef struct fe { uint32_t v[10]; } fe; 965 966 /* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc 967 * Addition and subtraction produce fe_loose from (fe, fe). 968 */ 969 typedef struct fe_loose { uint32_t v[10]; } fe_loose; 970 971 static inline void fe_frombytes_impl(uint32_t h[10], const uint8_t *s) 972 { 973 /* Ignores top bit of s. */ 974 uint32_t a0 = get_unaligned_le32(s); 975 uint32_t a1 = get_unaligned_le32(s+4); 976 uint32_t a2 = get_unaligned_le32(s+8); 977 uint32_t a3 = get_unaligned_le32(s+12); 978 uint32_t a4 = get_unaligned_le32(s+16); 979 uint32_t a5 = get_unaligned_le32(s+20); 980 uint32_t a6 = get_unaligned_le32(s+24); 981 uint32_t a7 = get_unaligned_le32(s+28); 982 h[0] = a0&((1<<26)-1); /* 26 used, 32-26 left. 26 */ 983 h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 = 6+19 = 25 */ 984 h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */ 985 h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) + 6 = 19+ 6 = 25 */ 986 h[4] = (a3>> 6); /* (32- 6) = 26 */ 987 h[5] = a4&((1<<25)-1); /* 25 */ 988 h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 = 7+19 = 26 */ 989 h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */ 990 h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) + 6 = 20+ 6 = 26 */ 991 h[9] = (a7>> 6)&((1<<25)-1); /* 25 */ 992 } 993 994 static inline void fe_frombytes(fe *h, const uint8_t *s) 995 { 996 fe_frombytes_impl(h->v, s); 997 } 998 999 static inline uint8_t /*bool*/ 1000 addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low) 1001 { 1002 /* This function extracts 25 bits of result and 1 bit of carry 1003 * (26 total), so a 32-bit intermediate is sufficient. 1004 */ 1005 uint32_t x = a + b + c; 1006 *low = x & ((1 << 25) - 1); 1007 return (x >> 25) & 1; 1008 } 1009 1010 static inline uint8_t /*bool*/ 1011 addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low) 1012 { 1013 /* This function extracts 26 bits of result and 1 bit of carry 1014 * (27 total), so a 32-bit intermediate is sufficient. 1015 */ 1016 uint32_t x = a + b + c; 1017 *low = x & ((1 << 26) - 1); 1018 return (x >> 26) & 1; 1019 } 1020 1021 static inline uint8_t /*bool*/ 1022 subborrow_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low) 1023 { 1024 /* This function extracts 25 bits of result and 1 bit of borrow 1025 * (26 total), so a 32-bit intermediate is sufficient. 1026 */ 1027 uint32_t x = a - b - c; 1028 *low = x & ((1 << 25) - 1); 1029 return x >> 31; 1030 } 1031 1032 static inline uint8_t /*bool*/ 1033 subborrow_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low) 1034 { 1035 /* This function extracts 26 bits of result and 1 bit of borrow 1036 *(27 total), so a 32-bit intermediate is sufficient. 1037 */ 1038 uint32_t x = a - b - c; 1039 *low = x & ((1 << 26) - 1); 1040 return x >> 31; 1041 } 1042 1043 static inline uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz) 1044 { 1045 t = -!!t; /* all set if nonzero, 0 if 0 */ 1046 return (t&nz) | ((~t)&z); 1047 } 1048 1049 static inline void fe_freeze(uint32_t out[10], const uint32_t in1[10]) 1050 { 1051 const uint32_t x17 = in1[9]; 1052 const uint32_t x18 = in1[8]; 1053 const uint32_t x16 = in1[7]; 1054 const uint32_t x14 = in1[6]; 1055 const uint32_t x12 = in1[5]; 1056 const uint32_t x10 = in1[4]; 1057 const uint32_t x8 = in1[3]; 1058 const uint32_t x6 = in1[2]; 1059 const uint32_t x4 = in1[1]; 1060 const uint32_t x2 = in1[0]; 1061 uint32_t x20; uint8_t/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20); 1062 uint32_t x23; uint8_t/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23); 1063 uint32_t x26; uint8_t/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26); 1064 uint32_t x29; uint8_t/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29); 1065 uint32_t x32; uint8_t/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32); 1066 uint32_t x35; uint8_t/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35); 1067 uint32_t x38; uint8_t/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38); 1068 uint32_t x41; uint8_t/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41); 1069 uint32_t x44; uint8_t/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44); 1070 uint32_t x47; uint8_t/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47); 1071 uint32_t x49 = cmovznz32(x48, 0x0, 0xffffffff); 1072 uint32_t x50 = (x49 & 0x3ffffed); 1073 uint32_t x52; uint8_t/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52); 1074 uint32_t x54 = (x49 & 0x1ffffff); 1075 uint32_t x56; uint8_t/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56); 1076 uint32_t x58 = (x49 & 0x3ffffff); 1077 uint32_t x60; uint8_t/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60); 1078 uint32_t x62 = (x49 & 0x1ffffff); 1079 uint32_t x64; uint8_t/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64); 1080 uint32_t x66 = (x49 & 0x3ffffff); 1081 uint32_t x68; uint8_t/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68); 1082 uint32_t x70 = (x49 & 0x1ffffff); 1083 uint32_t x72; uint8_t/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72); 1084 uint32_t x74 = (x49 & 0x3ffffff); 1085 uint32_t x76; uint8_t/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76); 1086 uint32_t x78 = (x49 & 0x1ffffff); 1087 uint32_t x80; uint8_t/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80); 1088 uint32_t x82 = (x49 & 0x3ffffff); 1089 uint32_t x84; uint8_t/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84); 1090 uint32_t x86 = (x49 & 0x1ffffff); 1091 uint32_t x88; addcarryx_u25(x85, x47, x86, &x88); 1092 out[0] = x52; 1093 out[1] = x56; 1094 out[2] = x60; 1095 out[3] = x64; 1096 out[4] = x68; 1097 out[5] = x72; 1098 out[6] = x76; 1099 out[7] = x80; 1100 out[8] = x84; 1101 out[9] = x88; 1102 } 1103 1104 static inline void fe_tobytes(uint8_t s[32], const fe *f) 1105 { 1106 uint32_t h[10]; 1107 fe_freeze(h, f->v); 1108 s[0] = h[0] >> 0; 1109 s[1] = h[0] >> 8; 1110 s[2] = h[0] >> 16; 1111 s[3] = (h[0] >> 24) | (h[1] << 2); 1112 s[4] = h[1] >> 6; 1113 s[5] = h[1] >> 14; 1114 s[6] = (h[1] >> 22) | (h[2] << 3); 1115 s[7] = h[2] >> 5; 1116 s[8] = h[2] >> 13; 1117 s[9] = (h[2] >> 21) | (h[3] << 5); 1118 s[10] = h[3] >> 3; 1119 s[11] = h[3] >> 11; 1120 s[12] = (h[3] >> 19) | (h[4] << 6); 1121 s[13] = h[4] >> 2; 1122 s[14] = h[4] >> 10; 1123 s[15] = h[4] >> 18; 1124 s[16] = h[5] >> 0; 1125 s[17] = h[5] >> 8; 1126 s[18] = h[5] >> 16; 1127 s[19] = (h[5] >> 24) | (h[6] << 1); 1128 s[20] = h[6] >> 7; 1129 s[21] = h[6] >> 15; 1130 s[22] = (h[6] >> 23) | (h[7] << 3); 1131 s[23] = h[7] >> 5; 1132 s[24] = h[7] >> 13; 1133 s[25] = (h[7] >> 21) | (h[8] << 4); 1134 s[26] = h[8] >> 4; 1135 s[27] = h[8] >> 12; 1136 s[28] = (h[8] >> 20) | (h[9] << 6); 1137 s[29] = h[9] >> 2; 1138 s[30] = h[9] >> 10; 1139 s[31] = h[9] >> 18; 1140 } 1141 1142 /* h = f */ 1143 static inline void fe_copy(fe *h, const fe *f) 1144 { 1145 memmove(h, f, sizeof(uint32_t) * 10); 1146 } 1147 1148 static inline void fe_copy_lt(fe_loose *h, const fe *f) 1149 { 1150 memmove(h, f, sizeof(uint32_t) * 10); 1151 } 1152 1153 /* h = 0 */ 1154 static inline void fe_0(fe *h) 1155 { 1156 memset(h, 0, sizeof(uint32_t) * 10); 1157 } 1158 1159 /* h = 1 */ 1160 static inline void fe_1(fe *h) 1161 { 1162 memset(h, 0, sizeof(uint32_t) * 10); 1163 h->v[0] = 1; 1164 } 1165 1166 static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) 1167 { 1168 const uint32_t x20 = in1[9]; 1169 const uint32_t x21 = in1[8]; 1170 const uint32_t x19 = in1[7]; 1171 const uint32_t x17 = in1[6]; 1172 const uint32_t x15 = in1[5]; 1173 const uint32_t x13 = in1[4]; 1174 const uint32_t x11 = in1[3]; 1175 const uint32_t x9 = in1[2]; 1176 const uint32_t x7 = in1[1]; 1177 const uint32_t x5 = in1[0]; 1178 const uint32_t x38 = in2[9]; 1179 const uint32_t x39 = in2[8]; 1180 const uint32_t x37 = in2[7]; 1181 const uint32_t x35 = in2[6]; 1182 const uint32_t x33 = in2[5]; 1183 const uint32_t x31 = in2[4]; 1184 const uint32_t x29 = in2[3]; 1185 const uint32_t x27 = in2[2]; 1186 const uint32_t x25 = in2[1]; 1187 const uint32_t x23 = in2[0]; 1188 out[0] = (x5 + x23); 1189 out[1] = (x7 + x25); 1190 out[2] = (x9 + x27); 1191 out[3] = (x11 + x29); 1192 out[4] = (x13 + x31); 1193 out[5] = (x15 + x33); 1194 out[6] = (x17 + x35); 1195 out[7] = (x19 + x37); 1196 out[8] = (x21 + x39); 1197 out[9] = (x20 + x38); 1198 } 1199 1200 /* h = f + g 1201 * Can overlap h with f or g. 1202 */ 1203 static inline void fe_add(fe_loose *h, const fe *f, const fe *g) 1204 { 1205 fe_add_impl(h->v, f->v, g->v); 1206 } 1207 1208 static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) 1209 { 1210 const uint32_t x20 = in1[9]; 1211 const uint32_t x21 = in1[8]; 1212 const uint32_t x19 = in1[7]; 1213 const uint32_t x17 = in1[6]; 1214 const uint32_t x15 = in1[5]; 1215 const uint32_t x13 = in1[4]; 1216 const uint32_t x11 = in1[3]; 1217 const uint32_t x9 = in1[2]; 1218 const uint32_t x7 = in1[1]; 1219 const uint32_t x5 = in1[0]; 1220 const uint32_t x38 = in2[9]; 1221 const uint32_t x39 = in2[8]; 1222 const uint32_t x37 = in2[7]; 1223 const uint32_t x35 = in2[6]; 1224 const uint32_t x33 = in2[5]; 1225 const uint32_t x31 = in2[4]; 1226 const uint32_t x29 = in2[3]; 1227 const uint32_t x27 = in2[2]; 1228 const uint32_t x25 = in2[1]; 1229 const uint32_t x23 = in2[0]; 1230 out[0] = ((0x7ffffda + x5) - x23); 1231 out[1] = ((0x3fffffe + x7) - x25); 1232 out[2] = ((0x7fffffe + x9) - x27); 1233 out[3] = ((0x3fffffe + x11) - x29); 1234 out[4] = ((0x7fffffe + x13) - x31); 1235 out[5] = ((0x3fffffe + x15) - x33); 1236 out[6] = ((0x7fffffe + x17) - x35); 1237 out[7] = ((0x3fffffe + x19) - x37); 1238 out[8] = ((0x7fffffe + x21) - x39); 1239 out[9] = ((0x3fffffe + x20) - x38); 1240 } 1241 1242 /* h = f - g 1243 * Can overlap h with f or g. 1244 */ 1245 static inline void fe_sub(fe_loose *h, const fe *f, const fe *g) 1246 { 1247 fe_sub_impl(h->v, f->v, g->v); 1248 } 1249 1250 static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) 1251 { 1252 const uint32_t x20 = in1[9]; 1253 const uint32_t x21 = in1[8]; 1254 const uint32_t x19 = in1[7]; 1255 const uint32_t x17 = in1[6]; 1256 const uint32_t x15 = in1[5]; 1257 const uint32_t x13 = in1[4]; 1258 const uint32_t x11 = in1[3]; 1259 const uint32_t x9 = in1[2]; 1260 const uint32_t x7 = in1[1]; 1261 const uint32_t x5 = in1[0]; 1262 const uint32_t x38 = in2[9]; 1263 const uint32_t x39 = in2[8]; 1264 const uint32_t x37 = in2[7]; 1265 const uint32_t x35 = in2[6]; 1266 const uint32_t x33 = in2[5]; 1267 const uint32_t x31 = in2[4]; 1268 const uint32_t x29 = in2[3]; 1269 const uint32_t x27 = in2[2]; 1270 const uint32_t x25 = in2[1]; 1271 const uint32_t x23 = in2[0]; 1272 uint64_t x40 = ((uint64_t)x23 * x5); 1273 uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); 1274 uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); 1275 uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); 1276 uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); 1277 uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); 1278 uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); 1279 uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); 1280 uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); 1281 uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); 1282 uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); 1283 uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); 1284 uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); 1285 uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); 1286 uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); 1287 uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); 1288 uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); 1289 uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); 1290 uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); 1291 uint64_t x59 = (x48 + (x58 << 0x4)); 1292 uint64_t x60 = (x59 + (x58 << 0x1)); 1293 uint64_t x61 = (x60 + x58); 1294 uint64_t x62 = (x47 + (x57 << 0x4)); 1295 uint64_t x63 = (x62 + (x57 << 0x1)); 1296 uint64_t x64 = (x63 + x57); 1297 uint64_t x65 = (x46 + (x56 << 0x4)); 1298 uint64_t x66 = (x65 + (x56 << 0x1)); 1299 uint64_t x67 = (x66 + x56); 1300 uint64_t x68 = (x45 + (x55 << 0x4)); 1301 uint64_t x69 = (x68 + (x55 << 0x1)); 1302 uint64_t x70 = (x69 + x55); 1303 uint64_t x71 = (x44 + (x54 << 0x4)); 1304 uint64_t x72 = (x71 + (x54 << 0x1)); 1305 uint64_t x73 = (x72 + x54); 1306 uint64_t x74 = (x43 + (x53 << 0x4)); 1307 uint64_t x75 = (x74 + (x53 << 0x1)); 1308 uint64_t x76 = (x75 + x53); 1309 uint64_t x77 = (x42 + (x52 << 0x4)); 1310 uint64_t x78 = (x77 + (x52 << 0x1)); 1311 uint64_t x79 = (x78 + x52); 1312 uint64_t x80 = (x41 + (x51 << 0x4)); 1313 uint64_t x81 = (x80 + (x51 << 0x1)); 1314 uint64_t x82 = (x81 + x51); 1315 uint64_t x83 = (x40 + (x50 << 0x4)); 1316 uint64_t x84 = (x83 + (x50 << 0x1)); 1317 uint64_t x85 = (x84 + x50); 1318 uint64_t x86 = (x85 >> 0x1a); 1319 uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); 1320 uint64_t x88 = (x86 + x82); 1321 uint64_t x89 = (x88 >> 0x19); 1322 uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); 1323 uint64_t x91 = (x89 + x79); 1324 uint64_t x92 = (x91 >> 0x1a); 1325 uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); 1326 uint64_t x94 = (x92 + x76); 1327 uint64_t x95 = (x94 >> 0x19); 1328 uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); 1329 uint64_t x97 = (x95 + x73); 1330 uint64_t x98 = (x97 >> 0x1a); 1331 uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); 1332 uint64_t x100 = (x98 + x70); 1333 uint64_t x101 = (x100 >> 0x19); 1334 uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); 1335 uint64_t x103 = (x101 + x67); 1336 uint64_t x104 = (x103 >> 0x1a); 1337 uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); 1338 uint64_t x106 = (x104 + x64); 1339 uint64_t x107 = (x106 >> 0x19); 1340 uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); 1341 uint64_t x109 = (x107 + x61); 1342 uint64_t x110 = (x109 >> 0x1a); 1343 uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); 1344 uint64_t x112 = (x110 + x49); 1345 uint64_t x113 = (x112 >> 0x19); 1346 uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); 1347 uint64_t x115 = (x87 + (0x13 * x113)); 1348 uint32_t x116 = (uint32_t) (x115 >> 0x1a); 1349 uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); 1350 uint32_t x118 = (x116 + x90); 1351 uint32_t x119 = (x118 >> 0x19); 1352 uint32_t x120 = (x118 & 0x1ffffff); 1353 out[0] = x117; 1354 out[1] = x120; 1355 out[2] = (x119 + x93); 1356 out[3] = x96; 1357 out[4] = x99; 1358 out[5] = x102; 1359 out[6] = x105; 1360 out[7] = x108; 1361 out[8] = x111; 1362 out[9] = x114; 1363 } 1364 1365 static inline void fe_mul_ttt(fe *h, const fe *f, const fe *g) 1366 { 1367 fe_mul_impl(h->v, f->v, g->v); 1368 } 1369 1370 static inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) 1371 { 1372 fe_mul_impl(h->v, f->v, g->v); 1373 } 1374 1375 static inline void 1376 fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) 1377 { 1378 fe_mul_impl(h->v, f->v, g->v); 1379 } 1380 1381 static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) 1382 { 1383 const uint32_t x17 = in1[9]; 1384 const uint32_t x18 = in1[8]; 1385 const uint32_t x16 = in1[7]; 1386 const uint32_t x14 = in1[6]; 1387 const uint32_t x12 = in1[5]; 1388 const uint32_t x10 = in1[4]; 1389 const uint32_t x8 = in1[3]; 1390 const uint32_t x6 = in1[2]; 1391 const uint32_t x4 = in1[1]; 1392 const uint32_t x2 = in1[0]; 1393 uint64_t x19 = ((uint64_t)x2 * x2); 1394 uint64_t x20 = ((uint64_t)(0x2 * x2) * x4); 1395 uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6))); 1396 uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8))); 1397 uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10)); 1398 uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12))); 1399 uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12))); 1400 uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16))); 1401 uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12)))))); 1402 uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17))); 1403 uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17))))); 1404 uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17))); 1405 uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17)))))); 1406 uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17))); 1407 uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17))); 1408 uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17))); 1409 uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17)); 1410 uint64_t x36 = ((uint64_t)(0x2 * x18) * x17); 1411 uint64_t x37 = ((uint64_t)(0x2 * x17) * x17); 1412 uint64_t x38 = (x27 + (x37 << 0x4)); 1413 uint64_t x39 = (x38 + (x37 << 0x1)); 1414 uint64_t x40 = (x39 + x37); 1415 uint64_t x41 = (x26 + (x36 << 0x4)); 1416 uint64_t x42 = (x41 + (x36 << 0x1)); 1417 uint64_t x43 = (x42 + x36); 1418 uint64_t x44 = (x25 + (x35 << 0x4)); 1419 uint64_t x45 = (x44 + (x35 << 0x1)); 1420 uint64_t x46 = (x45 + x35); 1421 uint64_t x47 = (x24 + (x34 << 0x4)); 1422 uint64_t x48 = (x47 + (x34 << 0x1)); 1423 uint64_t x49 = (x48 + x34); 1424 uint64_t x50 = (x23 + (x33 << 0x4)); 1425 uint64_t x51 = (x50 + (x33 << 0x1)); 1426 uint64_t x52 = (x51 + x33); 1427 uint64_t x53 = (x22 + (x32 << 0x4)); 1428 uint64_t x54 = (x53 + (x32 << 0x1)); 1429 uint64_t x55 = (x54 + x32); 1430 uint64_t x56 = (x21 + (x31 << 0x4)); 1431 uint64_t x57 = (x56 + (x31 << 0x1)); 1432 uint64_t x58 = (x57 + x31); 1433 uint64_t x59 = (x20 + (x30 << 0x4)); 1434 uint64_t x60 = (x59 + (x30 << 0x1)); 1435 uint64_t x61 = (x60 + x30); 1436 uint64_t x62 = (x19 + (x29 << 0x4)); 1437 uint64_t x63 = (x62 + (x29 << 0x1)); 1438 uint64_t x64 = (x63 + x29); 1439 uint64_t x65 = (x64 >> 0x1a); 1440 uint32_t x66 = ((uint32_t)x64 & 0x3ffffff); 1441 uint64_t x67 = (x65 + x61); 1442 uint64_t x68 = (x67 >> 0x19); 1443 uint32_t x69 = ((uint32_t)x67 & 0x1ffffff); 1444 uint64_t x70 = (x68 + x58); 1445 uint64_t x71 = (x70 >> 0x1a); 1446 uint32_t x72 = ((uint32_t)x70 & 0x3ffffff); 1447 uint64_t x73 = (x71 + x55); 1448 uint64_t x74 = (x73 >> 0x19); 1449 uint32_t x75 = ((uint32_t)x73 & 0x1ffffff); 1450 uint64_t x76 = (x74 + x52); 1451 uint64_t x77 = (x76 >> 0x1a); 1452 uint32_t x78 = ((uint32_t)x76 & 0x3ffffff); 1453 uint64_t x79 = (x77 + x49); 1454 uint64_t x80 = (x79 >> 0x19); 1455 uint32_t x81 = ((uint32_t)x79 & 0x1ffffff); 1456 uint64_t x82 = (x80 + x46); 1457 uint64_t x83 = (x82 >> 0x1a); 1458 uint32_t x84 = ((uint32_t)x82 & 0x3ffffff); 1459 uint64_t x85 = (x83 + x43); 1460 uint64_t x86 = (x85 >> 0x19); 1461 uint32_t x87 = ((uint32_t)x85 & 0x1ffffff); 1462 uint64_t x88 = (x86 + x40); 1463 uint64_t x89 = (x88 >> 0x1a); 1464 uint32_t x90 = ((uint32_t)x88 & 0x3ffffff); 1465 uint64_t x91 = (x89 + x28); 1466 uint64_t x92 = (x91 >> 0x19); 1467 uint32_t x93 = ((uint32_t)x91 & 0x1ffffff); 1468 uint64_t x94 = (x66 + (0x13 * x92)); 1469 uint32_t x95 = (uint32_t) (x94 >> 0x1a); 1470 uint32_t x96 = ((uint32_t)x94 & 0x3ffffff); 1471 uint32_t x97 = (x95 + x69); 1472 uint32_t x98 = (x97 >> 0x19); 1473 uint32_t x99 = (x97 & 0x1ffffff); 1474 out[0] = x96; 1475 out[1] = x99; 1476 out[2] = (x98 + x72); 1477 out[3] = x75; 1478 out[4] = x78; 1479 out[5] = x81; 1480 out[6] = x84; 1481 out[7] = x87; 1482 out[8] = x90; 1483 out[9] = x93; 1484 } 1485 1486 static inline void fe_sq_tl(fe *h, const fe_loose *f) 1487 { 1488 fe_sqr_impl(h->v, f->v); 1489 } 1490 1491 static inline void fe_sq_tt(fe *h, const fe *f) 1492 { 1493 fe_sqr_impl(h->v, f->v); 1494 } 1495 1496 static inline void fe_loose_invert(fe *out, const fe_loose *z) 1497 { 1498 fe t0; 1499 fe t1; 1500 fe t2; 1501 fe t3; 1502 int i; 1503 1504 fe_sq_tl(&t0, z); 1505 fe_sq_tt(&t1, &t0); 1506 for (i = 1; i < 2; ++i) 1507 fe_sq_tt(&t1, &t1); 1508 fe_mul_tlt(&t1, z, &t1); 1509 fe_mul_ttt(&t0, &t0, &t1); 1510 fe_sq_tt(&t2, &t0); 1511 fe_mul_ttt(&t1, &t1, &t2); 1512 fe_sq_tt(&t2, &t1); 1513 for (i = 1; i < 5; ++i) 1514 fe_sq_tt(&t2, &t2); 1515 fe_mul_ttt(&t1, &t2, &t1); 1516 fe_sq_tt(&t2, &t1); 1517 for (i = 1; i < 10; ++i) 1518 fe_sq_tt(&t2, &t2); 1519 fe_mul_ttt(&t2, &t2, &t1); 1520 fe_sq_tt(&t3, &t2); 1521 for (i = 1; i < 20; ++i) 1522 fe_sq_tt(&t3, &t3); 1523 fe_mul_ttt(&t2, &t3, &t2); 1524 fe_sq_tt(&t2, &t2); 1525 for (i = 1; i < 10; ++i) 1526 fe_sq_tt(&t2, &t2); 1527 fe_mul_ttt(&t1, &t2, &t1); 1528 fe_sq_tt(&t2, &t1); 1529 for (i = 1; i < 50; ++i) 1530 fe_sq_tt(&t2, &t2); 1531 fe_mul_ttt(&t2, &t2, &t1); 1532 fe_sq_tt(&t3, &t2); 1533 for (i = 1; i < 100; ++i) 1534 fe_sq_tt(&t3, &t3); 1535 fe_mul_ttt(&t2, &t3, &t2); 1536 fe_sq_tt(&t2, &t2); 1537 for (i = 1; i < 50; ++i) 1538 fe_sq_tt(&t2, &t2); 1539 fe_mul_ttt(&t1, &t2, &t1); 1540 fe_sq_tt(&t1, &t1); 1541 for (i = 1; i < 5; ++i) 1542 fe_sq_tt(&t1, &t1); 1543 fe_mul_ttt(out, &t1, &t0); 1544 } 1545 1546 static inline void fe_invert(fe *out, const fe *z) 1547 { 1548 fe_loose l; 1549 fe_copy_lt(&l, z); 1550 fe_loose_invert(out, &l); 1551 } 1552 1553 /* Replace (f,g) with (g,f) if b == 1; 1554 * replace (f,g) with (f,g) if b == 0. 1555 * 1556 * Preconditions: b in {0,1} 1557 */ 1558 static inline void fe_cswap(fe *f, fe *g, unsigned int b) 1559 { 1560 unsigned i; 1561 b = 0 - b; 1562 for (i = 0; i < 10; i++) { 1563 uint32_t x = f->v[i] ^ g->v[i]; 1564 x &= b; 1565 f->v[i] ^= x; 1566 g->v[i] ^= x; 1567 } 1568 } 1569 1570 /* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/ 1571 static inline void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10]) 1572 { 1573 const uint32_t x20 = in1[9]; 1574 const uint32_t x21 = in1[8]; 1575 const uint32_t x19 = in1[7]; 1576 const uint32_t x17 = in1[6]; 1577 const uint32_t x15 = in1[5]; 1578 const uint32_t x13 = in1[4]; 1579 const uint32_t x11 = in1[3]; 1580 const uint32_t x9 = in1[2]; 1581 const uint32_t x7 = in1[1]; 1582 const uint32_t x5 = in1[0]; 1583 const uint32_t x38 = 0; 1584 const uint32_t x39 = 0; 1585 const uint32_t x37 = 0; 1586 const uint32_t x35 = 0; 1587 const uint32_t x33 = 0; 1588 const uint32_t x31 = 0; 1589 const uint32_t x29 = 0; 1590 const uint32_t x27 = 0; 1591 const uint32_t x25 = 0; 1592 const uint32_t x23 = 121666; 1593 uint64_t x40 = ((uint64_t)x23 * x5); 1594 uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); 1595 uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); 1596 uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); 1597 uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); 1598 uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); 1599 uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); 1600 uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); 1601 uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); 1602 uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); 1603 uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); 1604 uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); 1605 uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); 1606 uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); 1607 uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); 1608 uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); 1609 uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); 1610 uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); 1611 uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); 1612 uint64_t x59 = (x48 + (x58 << 0x4)); 1613 uint64_t x60 = (x59 + (x58 << 0x1)); 1614 uint64_t x61 = (x60 + x58); 1615 uint64_t x62 = (x47 + (x57 << 0x4)); 1616 uint64_t x63 = (x62 + (x57 << 0x1)); 1617 uint64_t x64 = (x63 + x57); 1618 uint64_t x65 = (x46 + (x56 << 0x4)); 1619 uint64_t x66 = (x65 + (x56 << 0x1)); 1620 uint64_t x67 = (x66 + x56); 1621 uint64_t x68 = (x45 + (x55 << 0x4)); 1622 uint64_t x69 = (x68 + (x55 << 0x1)); 1623 uint64_t x70 = (x69 + x55); 1624 uint64_t x71 = (x44 + (x54 << 0x4)); 1625 uint64_t x72 = (x71 + (x54 << 0x1)); 1626 uint64_t x73 = (x72 + x54); 1627 uint64_t x74 = (x43 + (x53 << 0x4)); 1628 uint64_t x75 = (x74 + (x53 << 0x1)); 1629 uint64_t x76 = (x75 + x53); 1630 uint64_t x77 = (x42 + (x52 << 0x4)); 1631 uint64_t x78 = (x77 + (x52 << 0x1)); 1632 uint64_t x79 = (x78 + x52); 1633 uint64_t x80 = (x41 + (x51 << 0x4)); 1634 uint64_t x81 = (x80 + (x51 << 0x1)); 1635 uint64_t x82 = (x81 + x51); 1636 uint64_t x83 = (x40 + (x50 << 0x4)); 1637 uint64_t x84 = (x83 + (x50 << 0x1)); 1638 uint64_t x85 = (x84 + x50); 1639 uint64_t x86 = (x85 >> 0x1a); 1640 uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); 1641 uint64_t x88 = (x86 + x82); 1642 uint64_t x89 = (x88 >> 0x19); 1643 uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); 1644 uint64_t x91 = (x89 + x79); 1645 uint64_t x92 = (x91 >> 0x1a); 1646 uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); 1647 uint64_t x94 = (x92 + x76); 1648 uint64_t x95 = (x94 >> 0x19); 1649 uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); 1650 uint64_t x97 = (x95 + x73); 1651 uint64_t x98 = (x97 >> 0x1a); 1652 uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); 1653 uint64_t x100 = (x98 + x70); 1654 uint64_t x101 = (x100 >> 0x19); 1655 uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); 1656 uint64_t x103 = (x101 + x67); 1657 uint64_t x104 = (x103 >> 0x1a); 1658 uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); 1659 uint64_t x106 = (x104 + x64); 1660 uint64_t x107 = (x106 >> 0x19); 1661 uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); 1662 uint64_t x109 = (x107 + x61); 1663 uint64_t x110 = (x109 >> 0x1a); 1664 uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); 1665 uint64_t x112 = (x110 + x49); 1666 uint64_t x113 = (x112 >> 0x19); 1667 uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); 1668 uint64_t x115 = (x87 + (0x13 * x113)); 1669 uint32_t x116 = (uint32_t) (x115 >> 0x1a); 1670 uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); 1671 uint32_t x118 = (x116 + x90); 1672 uint32_t x119 = (x118 >> 0x19); 1673 uint32_t x120 = (x118 & 0x1ffffff); 1674 out[0] = x117; 1675 out[1] = x120; 1676 out[2] = (x119 + x93); 1677 out[3] = x96; 1678 out[4] = x99; 1679 out[5] = x102; 1680 out[6] = x105; 1681 out[7] = x108; 1682 out[8] = x111; 1683 out[9] = x114; 1684 } 1685 1686 static inline void fe_mul121666(fe *h, const fe_loose *f) 1687 { 1688 fe_mul_121666_impl(h->v, f->v); 1689 } 1690 1691 static const uint8_t curve25519_null_point[CURVE25519_KEY_SIZE]; 1692 1693 bool curve25519(uint8_t out[CURVE25519_KEY_SIZE], 1694 const uint8_t scalar[CURVE25519_KEY_SIZE], 1695 const uint8_t point[CURVE25519_KEY_SIZE]) 1696 { 1697 fe x1, x2, z2, x3, z3; 1698 fe_loose x2l, z2l, x3l; 1699 unsigned swap = 0; 1700 int pos; 1701 uint8_t e[32]; 1702 1703 memcpy(e, scalar, 32); 1704 curve25519_clamp_secret(e); 1705 1706 /* The following implementation was transcribed to Coq and proven to 1707 * correspond to unary scalar multiplication in affine coordinates given 1708 * that x1 != 0 is the x coordinate of some point on the curve. It was 1709 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives 1710 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was 1711 * quantified over the underlying field, so it applies to Curve25519 1712 * itself and the quadratic twist of Curve25519. It was not proven in 1713 * Coq that prime-field arithmetic correctly simulates extension-field 1714 * arithmetic on prime-field values. The decoding of the byte array 1715 * representation of e was not considered. 1716 * 1717 * Specification of Montgomery curves in affine coordinates: 1718 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27> 1719 * 1720 * Proof that these form a group that is isomorphic to a Weierstrass 1721 * curve: 1722 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35> 1723 * 1724 * Coq transcription and correctness proof of the loop 1725 * (where scalarbits=255): 1726 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118> 1727 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278> 1728 * preconditions: 0 <= e < 2^255 (not necessarily e < order), 1729 * fe_invert(0) = 0 1730 */ 1731 fe_frombytes(&x1, point); 1732 fe_1(&x2); 1733 fe_0(&z2); 1734 fe_copy(&x3, &x1); 1735 fe_1(&z3); 1736 1737 for (pos = 254; pos >= 0; --pos) { 1738 fe tmp0, tmp1; 1739 fe_loose tmp0l, tmp1l; 1740 /* loop invariant as of right before the test, for the case 1741 * where x1 != 0: 1742 * pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 1743 * is nonzero 1744 * let r := e >> (pos+1) in the following equalities of 1745 * projective points: 1746 * to_xz (r*P) === if swap then (x3, z3) else (x2, z2) 1747 * to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3) 1748 * x1 is the nonzero x coordinate of the nonzero 1749 * point (r*P-(r+1)*P) 1750 */ 1751 unsigned b = 1 & (e[pos / 8] >> (pos & 7)); 1752 swap ^= b; 1753 fe_cswap(&x2, &x3, swap); 1754 fe_cswap(&z2, &z3, swap); 1755 swap = b; 1756 /* Coq transcription of ladderstep formula (called from 1757 * transcribed loop): 1758 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89> 1759 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131> 1760 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217> 1761 * x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147> 1762 */ 1763 fe_sub(&tmp0l, &x3, &z3); 1764 fe_sub(&tmp1l, &x2, &z2); 1765 fe_add(&x2l, &x2, &z2); 1766 fe_add(&z2l, &x3, &z3); 1767 fe_mul_tll(&z3, &tmp0l, &x2l); 1768 fe_mul_tll(&z2, &z2l, &tmp1l); 1769 fe_sq_tl(&tmp0, &tmp1l); 1770 fe_sq_tl(&tmp1, &x2l); 1771 fe_add(&x3l, &z3, &z2); 1772 fe_sub(&z2l, &z3, &z2); 1773 fe_mul_ttt(&x2, &tmp1, &tmp0); 1774 fe_sub(&tmp1l, &tmp1, &tmp0); 1775 fe_sq_tl(&z2, &z2l); 1776 fe_mul121666(&z3, &tmp1l); 1777 fe_sq_tl(&x3, &x3l); 1778 fe_add(&tmp0l, &tmp0, &z3); 1779 fe_mul_ttt(&z3, &x1, &z2); 1780 fe_mul_tll(&z2, &tmp1l, &tmp0l); 1781 } 1782 /* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) 1783 * else (x2, z2) 1784 */ 1785 fe_cswap(&x2, &x3, swap); 1786 fe_cswap(&z2, &z3, swap); 1787 1788 fe_invert(&z2, &z2); 1789 fe_mul_ttt(&x2, &x2, &z2); 1790 fe_tobytes(out, &x2); 1791 1792 explicit_bzero(&x1, sizeof(x1)); 1793 explicit_bzero(&x2, sizeof(x2)); 1794 explicit_bzero(&z2, sizeof(z2)); 1795 explicit_bzero(&x3, sizeof(x3)); 1796 explicit_bzero(&z3, sizeof(z3)); 1797 explicit_bzero(&x2l, sizeof(x2l)); 1798 explicit_bzero(&z2l, sizeof(z2l)); 1799 explicit_bzero(&x3l, sizeof(x3l)); 1800 explicit_bzero(&e, sizeof(e)); 1801 1802 return timingsafe_bcmp(out, curve25519_null_point, CURVE25519_KEY_SIZE) != 0; 1803 } 1804 #endif 1805 1806 int 1807 crypto_init(void) 1808 { 1809 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF 1810 struct crypto_session_params csp = { 1811 .csp_mode = CSP_MODE_AEAD, 1812 .csp_ivlen = sizeof(uint64_t), 1813 .csp_cipher_alg = CRYPTO_CHACHA20_POLY1305, 1814 .csp_cipher_klen = CHACHA20POLY1305_KEY_SIZE, 1815 .csp_flags = CSP_F_SEPARATE_AAD | CSP_F_SEPARATE_OUTPUT 1816 }; 1817 int ret = crypto_newsession(&chacha20_poly1305_sid, &csp, CRYPTOCAP_F_SOFTWARE); 1818 if (ret != 0) 1819 return (ret); 1820 #endif 1821 return (0); 1822 } 1823 1824 void 1825 crypto_deinit(void) 1826 { 1827 #ifndef COMPAT_NEED_CHACHA20POLY1305_MBUF 1828 crypto_freesession(chacha20_poly1305_sid); 1829 #endif 1830 } 1831