1 /* 2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* see bearssl_rsa.h */ 28 size_t 29 br_rsa_i31_compute_privexp(void *d, 30 const br_rsa_private_key *sk, uint32_t e) 31 { 32 /* 33 * We want to invert e modulo phi = (p-1)(q-1). This first 34 * requires computing phi, which is easy since we have the factors 35 * p and q in the private key structure. 36 * 37 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer. 38 * We could invert e modulo phi/4 then patch the result to 39 * modulo phi, but this would involve assembling three modulus-wide 40 * values (phi/4, 1 and e) and calling moddiv, that requires 41 * three more temporaries, for a total of six big integers, or 42 * slightly more than 3 kB of stack space for RSA-4096. This 43 * exceeds our stack requirements. 44 * 45 * Instead, we first use one step of the extended GCD: 46 * 47 * - We compute phi = k*e + r (Euclidean division of phi by e). 48 * If public exponent e is correct, then r != 0 (e must be 49 * invertible modulo phi). We also have k != 0 since we 50 * enforce non-ridiculously-small factors. 51 * 52 * - We find small u, v such that u*e - v*r = 1 (using a 53 * binary GCD; we can arrange for u < r and v < e, i.e. all 54 * values fit on 32 bits). 55 * 56 * - Solution is: d = u + v*k 57 * This last computation is exact: since u < r and v < e, 58 * the above implies d < r + e*((phi-r)/e) = phi 59 */ 60 61 uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12]; 62 uint32_t *p, *q, *k, *m, *z, *phi; 63 const unsigned char *pbuf, *qbuf; 64 size_t plen, qlen, u, len, dlen; 65 uint32_t r, a, b, u0, v0, u1, v1, he, hr; 66 int i; 67 68 /* 69 * Check that e is correct. 70 */ 71 if (e < 3 || (e & 1) == 0) { 72 return 0; 73 } 74 75 /* 76 * Check lengths of p and q, and that they are both odd. 77 */ 78 pbuf = sk->p; 79 plen = sk->plen; 80 while (plen > 0 && *pbuf == 0) { 81 pbuf ++; 82 plen --; 83 } 84 if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8) 85 || (pbuf[plen - 1] & 1) != 1) 86 { 87 return 0; 88 } 89 qbuf = sk->q; 90 qlen = sk->qlen; 91 while (qlen > 0 && *qbuf == 0) { 92 qbuf ++; 93 qlen --; 94 } 95 if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8) 96 || (qbuf[qlen - 1] & 1) != 1) 97 { 98 return 0; 99 } 100 101 /* 102 * Output length is that of the modulus. 103 */ 104 dlen = (sk->n_bitlen + 7) >> 3; 105 if (d == NULL) { 106 return dlen; 107 } 108 109 p = tmp; 110 br_i31_decode(p, pbuf, plen); 111 plen = (p[0] + 31) >> 5; 112 q = p + 1 + plen; 113 br_i31_decode(q, qbuf, qlen); 114 qlen = (q[0] + 31) >> 5; 115 116 /* 117 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that 118 * we do not need anymore). The mulacc function sets the announced 119 * bit length of t to be the sum of the announced bit lengths of 120 * p-1 and q-1, which is usually exact but may overshoot by one 1 121 * bit in some cases; we readjust it to its true length. 122 */ 123 p[1] --; 124 q[1] --; 125 phi = q + 1 + qlen; 126 br_i31_zero(phi, p[0]); 127 br_i31_mulacc(phi, p, q); 128 len = (phi[0] + 31) >> 5; 129 memmove(tmp, phi, (1 + len) * sizeof *phi); 130 phi = tmp; 131 phi[0] = br_i31_bit_length(phi + 1, len); 132 len = (phi[0] + 31) >> 5; 133 134 /* 135 * Divide phi by public exponent e. The final remainder r must be 136 * non-zero (otherwise, the key is invalid). The quotient is k, 137 * which we write over phi, since we don't need phi after that. 138 */ 139 r = 0; 140 for (u = len; u >= 1; u --) { 141 /* 142 * Upon entry, r < e, and phi[u] < 2^31; hence, 143 * hi:lo < e*2^31. Thus, the produced word k[u] 144 * must be lower than 2^31, and the new remainder r 145 * is lower than e. 146 */ 147 uint32_t hi, lo; 148 149 hi = r >> 1; 150 lo = (r << 31) + phi[u]; 151 phi[u] = br_divrem(hi, lo, e, &r); 152 } 153 if (r == 0) { 154 return 0; 155 } 156 k = phi; 157 158 /* 159 * Compute u and v such that u*e - v*r = GCD(e,r). We use 160 * a binary GCD algorithm, with 6 extra integers a, b, 161 * u0, u1, v0 and v1. Initial values are: 162 * a = e u0 = 1 v0 = 0 163 * b = r u1 = r v1 = e-1 164 * The following invariants are maintained: 165 * a = u0*e - v0*r 166 * b = u1*e - v1*r 167 * 0 < a <= e 168 * 0 < b <= r 169 * 0 <= u0 <= r 170 * 0 <= v0 <= e 171 * 0 <= u1 <= r 172 * 0 <= v1 <= e 173 * 174 * At each iteration, we reduce either a or b by one bit, and 175 * adjust u0, u1, v0 and v1 to maintain the invariants: 176 * - if a is even, then a <- a/2 177 * - otherwise, if b is even, then b <- b/2 178 * - otherwise, if a > b, then a <- (a-b)/2 179 * - otherwise, if b > a, then b <- (b-a)/2 180 * Algorithm stops when a = b. At that point, the common value 181 * is the GCD of e and r; it must be 1 (otherwise, the private 182 * key or public exponent is not valid). The (u0,v0) or (u1,v1) 183 * pairs are the solution we are looking for. 184 * 185 * Since either a or b is reduced by at least 1 bit at each 186 * iteration, 62 iterations are enough to reach the end 187 * condition. 188 * 189 * To maintain the invariants, we must compute the same operations 190 * on the u* and v* values that we do on a and b: 191 * - When a is divided by 2, u0 and v0 must be divided by 2. 192 * - When b is divided by 2, u1 and v1 must be divided by 2. 193 * - When b is subtracted from a, u1 and v1 are subtracted from 194 * u0 and v0, respectively. 195 * - When a is subtracted from b, u0 and v0 are subtracted from 196 * u1 and v1, respectively. 197 * 198 * However, we want to keep the u* and v* values in their proper 199 * ranges. The following remarks apply: 200 * 201 * - When a is divided by 2, then a is even. Therefore: 202 * 203 * * If r is odd, then u0 and v0 must have the same parity; 204 * if they are both odd, then adding r to u0 and e to v0 205 * makes them both even, and the division by 2 brings them 206 * back to the proper range. 207 * 208 * * If r is even, then u0 must be even; if v0 is odd, then 209 * adding r to u0 and e to v0 makes them both even, and the 210 * division by 2 brings them back to the proper range. 211 * 212 * Thus, all we need to do is to look at the parity of v0, 213 * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid 214 * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the 215 * division (r+1 does not overflow since r < e; and (e/2)+1 216 * is equal to (e+1)/2 since e is odd). 217 * 218 * - When we subtract b from a, three cases may occur: 219 * 220 * * u1 <= u0 and v1 <= v0: just do the subtractions 221 * 222 * * u1 > u0 and v1 > v0: compute: 223 * (u0, v0) <- (u0 + r - u1, v0 + e - v1) 224 * 225 * * u1 <= u0 and v1 > v0: compute: 226 * (u0, v0) <- (u0 + r - u1, v0 + e - v1) 227 * 228 * The fourth case (u1 > u0 and v1 <= v0) is not possible 229 * because it would contradict "b < a" (which is the reason 230 * why we subtract b from a). 231 * 232 * The tricky case is the third one: from the equations, it 233 * seems that u0 may go out of range. However, the invariants 234 * and ranges of other values imply that, in that case, the 235 * new u0 does not actually exceed the range. 236 * 237 * We can thus handle the subtraction by adding (r,e) based 238 * solely on the comparison between v0 and v1. 239 */ 240 a = e; 241 b = r; 242 u0 = 1; 243 v0 = 0; 244 u1 = r; 245 v1 = e - 1; 246 hr = (r + 1) >> 1; 247 he = (e >> 1) + 1; 248 for (i = 0; i < 62; i ++) { 249 uint32_t oa, ob, agtb, bgta; 250 uint32_t sab, sba, da, db; 251 uint32_t ctl; 252 253 oa = a & 1; /* 1 if a is odd */ 254 ob = b & 1; /* 1 if b is odd */ 255 agtb = GT(a, b); /* 1 if a > b */ 256 bgta = GT(b, a); /* 1 if b > a */ 257 258 sab = oa & ob & agtb; /* 1 if a <- a-b */ 259 sba = oa & ob & bgta; /* 1 if b <- b-a */ 260 261 /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */ 262 ctl = GT(v1, v0); 263 a -= b & -sab; 264 u0 -= (u1 - (r & -ctl)) & -sab; 265 v0 -= (v1 - (e & -ctl)) & -sab; 266 267 /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */ 268 ctl = GT(v0, v1); 269 b -= a & -sba; 270 u1 -= (u0 - (r & -ctl)) & -sba; 271 v1 -= (v0 - (e & -ctl)) & -sba; 272 273 da = NOT(oa) | sab; /* 1 if a <- a/2 */ 274 db = (oa & NOT(ob)) | sba; /* 1 if b <- b/2 */ 275 276 /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */ 277 ctl = v0 & 1; 278 a ^= (a ^ (a >> 1)) & -da; 279 u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da; 280 v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da; 281 282 /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */ 283 ctl = v1 & 1; 284 b ^= (b ^ (b >> 1)) & -db; 285 u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db; 286 v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db; 287 } 288 289 /* 290 * Check that the GCD is indeed 1. If not, then the key is invalid 291 * (and there's no harm in leaking that piece of information). 292 */ 293 if (a != 1) { 294 return 0; 295 } 296 297 /* 298 * Now we have u0*e - v0*r = 1. Let's compute the result as: 299 * d = u0 + v0*k 300 * We still have k in the tmp[] array, and its announced bit 301 * length is that of phi. 302 */ 303 m = k + 1 + len; 304 m[0] = (1 << 5) + 1; /* bit length is 32 bits, encoded */ 305 m[1] = v0 & 0x7FFFFFFF; 306 m[2] = v0 >> 31; 307 z = m + 3; 308 br_i31_zero(z, k[0]); 309 z[1] = u0 & 0x7FFFFFFF; 310 z[2] = u0 >> 31; 311 br_i31_mulacc(z, k, m); 312 313 /* 314 * Encode the result. 315 */ 316 br_i31_encode(d, dlen, z); 317 return dlen; 318 } 319