1 /* 2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * Make a random integer of the provided size. The size is encoded. 29 * The header word is untouched. 30 */ 31 static void 32 mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize) 33 { 34 size_t u, len; 35 unsigned m; 36 37 len = (esize + 15) >> 4; 38 (*rng)->generate(rng, x + 1, len * sizeof(uint16_t)); 39 for (u = 1; u < len; u ++) { 40 x[u] &= 0x7FFF; 41 } 42 m = esize & 15; 43 if (m == 0) { 44 x[len] &= 0x7FFF; 45 } else { 46 x[len] &= 0x7FFF >> (15 - m); 47 } 48 } 49 50 /* 51 * This is the big-endian unsigned representation of the product of 52 * all small primes from 13 to 1481. 53 */ 54 static const unsigned char SMALL_PRIMES[] = { 55 0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A, 56 0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7, 57 0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37, 58 0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5, 59 0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E, 60 0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6, 61 0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C, 62 0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40, 63 0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50, 64 0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7, 65 0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3, 66 0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E, 67 0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC, 68 0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08, 69 0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B, 70 0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22, 71 0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77, 72 0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E, 73 0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80, 74 0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8, 75 0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2, 76 0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC, 77 0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54, 78 0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74, 79 0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C, 80 0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD 81 }; 82 83 /* 84 * We need temporary values for at least 7 integers of the same size 85 * as a factor (including header word); more space helps with performance 86 * (in modular exponentiations), but we much prefer to remain under 87 * 2 kilobytes in total, to save stack space. The macro TEMPS below 88 * exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE 89 * is greater than 4350 (default value is 4096, so the 2-kB limit is 90 * maintained unless BR_MAX_RSA_SIZE was modified). 91 */ 92 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 93 #define TEMPS MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15)) 94 95 /* 96 * Perform trial division on a candidate prime. This computes 97 * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The 98 * br_i15_moddiv() function will report an error if y is not invertible 99 * modulo x. Returned value is 1 on success (none of the small primes 100 * divides x), 0 on error (a non-trivial GCD is obtained). 101 * 102 * This function assumes that x is odd. 103 */ 104 static uint32_t 105 trial_divisions(const uint16_t *x, uint16_t *t) 106 { 107 uint16_t *y; 108 uint16_t x0i; 109 110 y = t; 111 t += 1 + ((x[0] + 15) >> 4); 112 x0i = br_i15_ninv15(x[1]); 113 br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x); 114 return br_i15_moddiv(y, y, x, x0i, t); 115 } 116 117 /* 118 * Perform n rounds of Miller-Rabin on the candidate prime x. This 119 * function assumes that x = 3 mod 4. 120 * 121 * Returned value is 1 on success (all rounds completed successfully), 122 * 0 otherwise. 123 */ 124 static uint32_t 125 miller_rabin(const br_prng_class **rng, const uint16_t *x, int n, 126 uint16_t *t, size_t tlen) 127 { 128 /* 129 * Since x = 3 mod 4, the Miller-Rabin test is simple: 130 * - get a random base a (such that 1 < a < x-1) 131 * - compute z = a^((x-1)/2) mod x 132 * - if z != 1 and z != x-1, the number x is composite 133 * 134 * We generate bases 'a' randomly with a size which is 135 * one bit less than x, which ensures that a < x-1. It 136 * is not useful to verify that a > 1 because the probability 137 * that we get a value a equal to 0 or 1 is much smaller 138 * than the probability of our Miller-Rabin tests not to 139 * detect a composite, which is already quite smaller than the 140 * probability of the hardware misbehaving and return a 141 * composite integer because of some glitch (e.g. bad RAM 142 * or ill-timed cosmic ray). 143 */ 144 unsigned char *xm1d2; 145 size_t xlen, xm1d2_len, xm1d2_len_u16, u; 146 uint32_t asize; 147 unsigned cc; 148 uint16_t x0i; 149 150 /* 151 * Compute (x-1)/2 (encoded). 152 */ 153 xm1d2 = (unsigned char *)t; 154 xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3; 155 br_i15_encode(xm1d2, xm1d2_len, x); 156 cc = 0; 157 for (u = 0; u < xm1d2_len; u ++) { 158 unsigned w; 159 160 w = xm1d2[u]; 161 xm1d2[u] = (unsigned char)((w >> 1) | cc); 162 cc = w << 7; 163 } 164 165 /* 166 * We used some words of the provided buffer for (x-1)/2. 167 */ 168 xm1d2_len_u16 = (xm1d2_len + 1) >> 1; 169 t += xm1d2_len_u16; 170 tlen -= xm1d2_len_u16; 171 172 xlen = (x[0] + 15) >> 4; 173 asize = x[0] - 1 - EQ0(x[0] & 15); 174 x0i = br_i15_ninv15(x[1]); 175 while (n -- > 0) { 176 uint16_t *a; 177 uint32_t eq1, eqm1; 178 179 /* 180 * Generate a random base. We don't need the base to be 181 * really uniform modulo x, so we just get a random 182 * number which is one bit shorter than x. 183 */ 184 a = t; 185 a[0] = x[0]; 186 a[xlen] = 0; 187 mkrand(rng, a, asize); 188 189 /* 190 * Compute a^((x-1)/2) mod x. We assume here that the 191 * function will not fail (the temporary array is large 192 * enough). 193 */ 194 br_i15_modpow_opt(a, xm1d2, xm1d2_len, 195 x, x0i, t + 1 + xlen, tlen - 1 - xlen); 196 197 /* 198 * We must obtain either 1 or x-1. Note that x is odd, 199 * hence x-1 differs from x only in its low word (no 200 * carry). 201 */ 202 eq1 = a[1] ^ 1; 203 eqm1 = a[1] ^ (x[1] - 1); 204 for (u = 2; u <= xlen; u ++) { 205 eq1 |= a[u]; 206 eqm1 |= a[u] ^ x[u]; 207 } 208 209 if ((EQ0(eq1) | EQ0(eqm1)) == 0) { 210 return 0; 211 } 212 } 213 return 1; 214 } 215 216 /* 217 * Create a random prime of the provided size. 'size' is the _encoded_ 218 * bit length. The two top bits and the two bottom bits are set to 1. 219 */ 220 static void 221 mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize, 222 uint32_t pubexp, uint16_t *t, size_t tlen) 223 { 224 size_t len; 225 226 x[0] = esize; 227 len = (esize + 15) >> 4; 228 for (;;) { 229 size_t u; 230 uint32_t m3, m5, m7, m11; 231 int rounds; 232 233 /* 234 * Generate random bits. We force the two top bits and the 235 * two bottom bits to 1. 236 */ 237 mkrand(rng, x, esize); 238 if ((esize & 15) == 0) { 239 x[len] |= 0x6000; 240 } else if ((esize & 15) == 1) { 241 x[len] |= 0x0001; 242 x[len - 1] |= 0x4000; 243 } else { 244 x[len] |= 0x0003 << ((esize & 15) - 2); 245 } 246 x[1] |= 0x0003; 247 248 /* 249 * Trial division with low primes (3, 5, 7 and 11). We 250 * use the following properties: 251 * 252 * 2^2 = 1 mod 3 253 * 2^4 = 1 mod 5 254 * 2^3 = 1 mod 7 255 * 2^10 = 1 mod 11 256 */ 257 m3 = 0; 258 m5 = 0; 259 m7 = 0; 260 m11 = 0; 261 for (u = 0; u < len; u ++) { 262 uint32_t w; 263 264 w = x[1 + u]; 265 m3 += w << (u & 1); 266 m3 = (m3 & 0xFF) + (m3 >> 8); 267 m5 += w << ((4 - u) & 3); 268 m5 = (m5 & 0xFF) + (m5 >> 8); 269 m7 += w; 270 m7 = (m7 & 0x1FF) + (m7 >> 9); 271 m11 += w << (5 & -(u & 1)); 272 m11 = (m11 & 0x3FF) + (m11 >> 10); 273 } 274 275 /* 276 * Maximum values of m* at this point: 277 * m3: 511 278 * m5: 2310 279 * m7: 510 280 * m11: 2047 281 * We use the same properties to make further reductions. 282 */ 283 284 m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 46 */ 285 m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 16 */ 286 m3 = ((m3 * 43) >> 5) & 3; 287 288 m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 263 */ 289 m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 30 */ 290 m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 15 */ 291 m5 -= 10 & -GT(m5, 9); 292 m5 -= 5 & -GT(m5, 4); 293 294 m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 69 */ 295 m7 = (m7 & 7) + (m7 >> 3); /* max: 14 */ 296 m7 = ((m7 * 147) >> 7) & 7; 297 298 /* 299 * 2^5 = 32 = -1 mod 11. 300 */ 301 m11 = (m11 & 0x1F) + 66 - (m11 >> 5); /* max: 97 */ 302 m11 -= 88 & -GT(m11, 87); 303 m11 -= 44 & -GT(m11, 43); 304 m11 -= 22 & -GT(m11, 21); 305 m11 -= 11 & -GT(m11, 10); 306 307 /* 308 * If any of these modulo is 0, then the candidate is 309 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the 310 * corresponding modulus is 1, then the candidate must 311 * be rejected, because we need e to be invertible 312 * modulo p-1. We can use simple comparisons here 313 * because they won't leak information on a candidate 314 * that we keep, only on one that we reject (and is thus 315 * not secret). 316 */ 317 if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) { 318 continue; 319 } 320 if ((pubexp == 3 && m3 == 1) 321 || (pubexp == 5 && m5 == 1) 322 || (pubexp == 7 && m7 == 1) 323 || (pubexp == 11 && m11 == 1)) 324 { 325 continue; 326 } 327 328 /* 329 * More trial divisions. 330 */ 331 if (!trial_divisions(x, t)) { 332 continue; 333 } 334 335 /* 336 * Miller-Rabin algorithm. Since we selected a random 337 * integer, not a maliciously crafted integer, we can use 338 * relatively few rounds to lower the risk of a false 339 * positive (i.e. declaring prime a non-prime) under 340 * 2^(-80). It is not useful to lower the probability much 341 * below that, since that would be substantially below 342 * the probability of the hardware misbehaving. Sufficient 343 * numbers of rounds are extracted from the Handbook of 344 * Applied Cryptography, note 4.49 (page 149). 345 * 346 * Since we work on the encoded size (esize), we need to 347 * compare with encoded thresholds. 348 */ 349 if (esize < 320) { 350 rounds = 12; 351 } else if (esize < 480) { 352 rounds = 9; 353 } else if (esize < 693) { 354 rounds = 6; 355 } else if (esize < 906) { 356 rounds = 4; 357 } else if (esize < 1386) { 358 rounds = 3; 359 } else { 360 rounds = 2; 361 } 362 363 if (miller_rabin(rng, x, rounds, t, tlen)) { 364 return; 365 } 366 } 367 } 368 369 /* 370 * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided 371 * as parameter (with announced bit length equal to that of p). This 372 * function computes d = 1/e mod p-1 (for an odd integer e). Returned 373 * value is 1 on success, 0 on error (an error is reported if e is not 374 * invertible modulo p-1). 375 * 376 * The temporary buffer (t) must have room for at least 4 integers of 377 * the size of p. 378 */ 379 static uint32_t 380 invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t) 381 { 382 uint16_t *f; 383 uint32_t r; 384 385 f = t; 386 t += 1 + ((m[0] + 15) >> 4); 387 388 /* 389 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd. 390 */ 391 br_i15_zero(d, m[0]); 392 d[1] = 1; 393 br_i15_zero(f, m[0]); 394 f[1] = e & 0x7FFF; 395 f[2] = (e >> 15) & 0x7FFF; 396 f[3] = e >> 30; 397 r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t); 398 399 /* 400 * We really want d = 1/e mod p-1, with p = 2m. By the CRT, 401 * the result is either the d we got, or d + m. 402 * 403 * Let's write e*d = 1 + k*m, for some integer k. Integers e 404 * and m are odd. If d is odd, then e*d is odd, which implies 405 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and 406 * thus d is already fine. Conversely, if d is even, then k 407 * is odd, and we must add m to d in order to get the correct 408 * result. 409 */ 410 br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1))); 411 412 return r; 413 } 414 415 /* 416 * Swap two buffers in RAM. They must be disjoint. 417 */ 418 static void 419 bufswap(void *b1, void *b2, size_t len) 420 { 421 size_t u; 422 unsigned char *buf1, *buf2; 423 424 buf1 = b1; 425 buf2 = b2; 426 for (u = 0; u < len; u ++) { 427 unsigned w; 428 429 w = buf1[u]; 430 buf1[u] = buf2[u]; 431 buf2[u] = w; 432 } 433 } 434 435 /* see bearssl_rsa.h */ 436 uint32_t 437 br_rsa_i15_keygen(const br_prng_class **rng, 438 br_rsa_private_key *sk, void *kbuf_priv, 439 br_rsa_public_key *pk, void *kbuf_pub, 440 unsigned size, uint32_t pubexp) 441 { 442 uint32_t esize_p, esize_q; 443 size_t plen, qlen, tlen; 444 uint16_t *p, *q, *t; 445 uint16_t tmp[TEMPS]; 446 uint32_t r; 447 448 if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) { 449 return 0; 450 } 451 if (pubexp == 0) { 452 pubexp = 3; 453 } else if (pubexp == 1 || (pubexp & 1) == 0) { 454 return 0; 455 } 456 457 esize_p = (size + 1) >> 1; 458 esize_q = size - esize_p; 459 sk->n_bitlen = size; 460 sk->p = kbuf_priv; 461 sk->plen = (esize_p + 7) >> 3; 462 sk->q = sk->p + sk->plen; 463 sk->qlen = (esize_q + 7) >> 3; 464 sk->dp = sk->q + sk->qlen; 465 sk->dplen = sk->plen; 466 sk->dq = sk->dp + sk->dplen; 467 sk->dqlen = sk->qlen; 468 sk->iq = sk->dq + sk->dqlen; 469 sk->iqlen = sk->plen; 470 471 if (pk != NULL) { 472 pk->n = kbuf_pub; 473 pk->nlen = (size + 7) >> 3; 474 pk->e = pk->n + pk->nlen; 475 pk->elen = 4; 476 br_enc32be(pk->e, pubexp); 477 while (*pk->e == 0) { 478 pk->e ++; 479 pk->elen --; 480 } 481 } 482 483 /* 484 * We now switch to encoded sizes. 485 * 486 * floor((x * 17477) / (2^18)) is equal to floor(x/15) for all 487 * integers x from 0 to 23833. 488 */ 489 esize_p += MUL15(esize_p, 17477) >> 18; 490 esize_q += MUL15(esize_q, 17477) >> 18; 491 plen = (esize_p + 15) >> 4; 492 qlen = (esize_q + 15) >> 4; 493 p = tmp; 494 q = p + 1 + plen; 495 t = q + 1 + qlen; 496 tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen); 497 498 /* 499 * When looking for primes p and q, we temporarily divide 500 * candidates by 2, in order to compute the inverse of the 501 * public exponent. 502 */ 503 504 for (;;) { 505 mkprime(rng, p, esize_p, pubexp, t, tlen); 506 br_i15_rshift(p, 1); 507 if (invert_pubexp(t, p, pubexp, t + 1 + plen)) { 508 br_i15_add(p, p, 1); 509 p[1] |= 1; 510 br_i15_encode(sk->p, sk->plen, p); 511 br_i15_encode(sk->dp, sk->dplen, t); 512 break; 513 } 514 } 515 516 for (;;) { 517 mkprime(rng, q, esize_q, pubexp, t, tlen); 518 br_i15_rshift(q, 1); 519 if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) { 520 br_i15_add(q, q, 1); 521 q[1] |= 1; 522 br_i15_encode(sk->q, sk->qlen, q); 523 br_i15_encode(sk->dq, sk->dqlen, t); 524 break; 525 } 526 } 527 528 /* 529 * If p and q have the same size, then it is possible that q > p 530 * (when the target modulus size is odd, we generate p with a 531 * greater bit length than q). If q > p, we want to swap p and q 532 * (and also dp and dq) for two reasons: 533 * - The final step below (inversion of q modulo p) is easier if 534 * p > q. 535 * - While BearSSL's RSA code is perfectly happy with RSA keys such 536 * that p < q, some other implementations have restrictions and 537 * require p > q. 538 * 539 * Note that we can do a simple non-constant-time swap here, 540 * because the only information we leak here is that we insist on 541 * returning p and q such that p > q, which is not a secret. 542 */ 543 if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) { 544 bufswap(p, q, (1 + plen) * sizeof *p); 545 bufswap(sk->p, sk->q, sk->plen); 546 bufswap(sk->dp, sk->dq, sk->dplen); 547 } 548 549 /* 550 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p. 551 * 552 * We ensured that p >= q, so this is just a matter of updating the 553 * header word for q (and possibly adding an extra word). 554 * 555 * Theoretically, the call below may fail, in case we were 556 * extraordinarily unlucky, and p = q. Another failure case is if 557 * Miller-Rabin failed us _twice_, and p and q are non-prime and 558 * have a factor is common. We report the error mostly because it 559 * is cheap and we can, but in practice this never happens (or, at 560 * least, it happens way less often than hardware glitches). 561 */ 562 q[0] = p[0]; 563 if (plen > qlen) { 564 q[plen] = 0; 565 t ++; 566 tlen --; 567 } 568 br_i15_zero(t, p[0]); 569 t[1] = 1; 570 r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen); 571 br_i15_encode(sk->iq, sk->iqlen, t); 572 573 /* 574 * Compute the public modulus too, if required. 575 */ 576 if (pk != NULL) { 577 br_i15_zero(t, p[0]); 578 br_i15_mulacc(t, p, q); 579 br_i15_encode(pk->n, pk->nlen, t); 580 } 581 582 return r; 583 } 584