1*0957b409SSimon J. Gerraty /*
2*0957b409SSimon J. Gerraty * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
3*0957b409SSimon J. Gerraty *
4*0957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining
5*0957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the
6*0957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including
7*0957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish,
8*0957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to
9*0957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to
10*0957b409SSimon J. Gerraty * the following conditions:
11*0957b409SSimon J. Gerraty *
12*0957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be
13*0957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software.
14*0957b409SSimon J. Gerraty *
15*0957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16*0957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17*0957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18*0957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19*0957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20*0957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21*0957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*0957b409SSimon J. Gerraty * SOFTWARE.
23*0957b409SSimon J. Gerraty */
24*0957b409SSimon J. Gerraty
25*0957b409SSimon J. Gerraty #include "inner.h"
26*0957b409SSimon J. Gerraty
27*0957b409SSimon J. Gerraty /* see bearssl_rsa.h */
28*0957b409SSimon J. Gerraty size_t
br_rsa_i31_compute_privexp(void * d,const br_rsa_private_key * sk,uint32_t e)29*0957b409SSimon J. Gerraty br_rsa_i31_compute_privexp(void *d,
30*0957b409SSimon J. Gerraty const br_rsa_private_key *sk, uint32_t e)
31*0957b409SSimon J. Gerraty {
32*0957b409SSimon J. Gerraty /*
33*0957b409SSimon J. Gerraty * We want to invert e modulo phi = (p-1)(q-1). This first
34*0957b409SSimon J. Gerraty * requires computing phi, which is easy since we have the factors
35*0957b409SSimon J. Gerraty * p and q in the private key structure.
36*0957b409SSimon J. Gerraty *
37*0957b409SSimon J. Gerraty * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
38*0957b409SSimon J. Gerraty * We could invert e modulo phi/4 then patch the result to
39*0957b409SSimon J. Gerraty * modulo phi, but this would involve assembling three modulus-wide
40*0957b409SSimon J. Gerraty * values (phi/4, 1 and e) and calling moddiv, that requires
41*0957b409SSimon J. Gerraty * three more temporaries, for a total of six big integers, or
42*0957b409SSimon J. Gerraty * slightly more than 3 kB of stack space for RSA-4096. This
43*0957b409SSimon J. Gerraty * exceeds our stack requirements.
44*0957b409SSimon J. Gerraty *
45*0957b409SSimon J. Gerraty * Instead, we first use one step of the extended GCD:
46*0957b409SSimon J. Gerraty *
47*0957b409SSimon J. Gerraty * - We compute phi = k*e + r (Euclidean division of phi by e).
48*0957b409SSimon J. Gerraty * If public exponent e is correct, then r != 0 (e must be
49*0957b409SSimon J. Gerraty * invertible modulo phi). We also have k != 0 since we
50*0957b409SSimon J. Gerraty * enforce non-ridiculously-small factors.
51*0957b409SSimon J. Gerraty *
52*0957b409SSimon J. Gerraty * - We find small u, v such that u*e - v*r = 1 (using a
53*0957b409SSimon J. Gerraty * binary GCD; we can arrange for u < r and v < e, i.e. all
54*0957b409SSimon J. Gerraty * values fit on 32 bits).
55*0957b409SSimon J. Gerraty *
56*0957b409SSimon J. Gerraty * - Solution is: d = u + v*k
57*0957b409SSimon J. Gerraty * This last computation is exact: since u < r and v < e,
58*0957b409SSimon J. Gerraty * the above implies d < r + e*((phi-r)/e) = phi
59*0957b409SSimon J. Gerraty */
60*0957b409SSimon J. Gerraty
61*0957b409SSimon J. Gerraty uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12];
62*0957b409SSimon J. Gerraty uint32_t *p, *q, *k, *m, *z, *phi;
63*0957b409SSimon J. Gerraty const unsigned char *pbuf, *qbuf;
64*0957b409SSimon J. Gerraty size_t plen, qlen, u, len, dlen;
65*0957b409SSimon J. Gerraty uint32_t r, a, b, u0, v0, u1, v1, he, hr;
66*0957b409SSimon J. Gerraty int i;
67*0957b409SSimon J. Gerraty
68*0957b409SSimon J. Gerraty /*
69*0957b409SSimon J. Gerraty * Check that e is correct.
70*0957b409SSimon J. Gerraty */
71*0957b409SSimon J. Gerraty if (e < 3 || (e & 1) == 0) {
72*0957b409SSimon J. Gerraty return 0;
73*0957b409SSimon J. Gerraty }
74*0957b409SSimon J. Gerraty
75*0957b409SSimon J. Gerraty /*
76*0957b409SSimon J. Gerraty * Check lengths of p and q, and that they are both odd.
77*0957b409SSimon J. Gerraty */
78*0957b409SSimon J. Gerraty pbuf = sk->p;
79*0957b409SSimon J. Gerraty plen = sk->plen;
80*0957b409SSimon J. Gerraty while (plen > 0 && *pbuf == 0) {
81*0957b409SSimon J. Gerraty pbuf ++;
82*0957b409SSimon J. Gerraty plen --;
83*0957b409SSimon J. Gerraty }
84*0957b409SSimon J. Gerraty if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
85*0957b409SSimon J. Gerraty || (pbuf[plen - 1] & 1) != 1)
86*0957b409SSimon J. Gerraty {
87*0957b409SSimon J. Gerraty return 0;
88*0957b409SSimon J. Gerraty }
89*0957b409SSimon J. Gerraty qbuf = sk->q;
90*0957b409SSimon J. Gerraty qlen = sk->qlen;
91*0957b409SSimon J. Gerraty while (qlen > 0 && *qbuf == 0) {
92*0957b409SSimon J. Gerraty qbuf ++;
93*0957b409SSimon J. Gerraty qlen --;
94*0957b409SSimon J. Gerraty }
95*0957b409SSimon J. Gerraty if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
96*0957b409SSimon J. Gerraty || (qbuf[qlen - 1] & 1) != 1)
97*0957b409SSimon J. Gerraty {
98*0957b409SSimon J. Gerraty return 0;
99*0957b409SSimon J. Gerraty }
100*0957b409SSimon J. Gerraty
101*0957b409SSimon J. Gerraty /*
102*0957b409SSimon J. Gerraty * Output length is that of the modulus.
103*0957b409SSimon J. Gerraty */
104*0957b409SSimon J. Gerraty dlen = (sk->n_bitlen + 7) >> 3;
105*0957b409SSimon J. Gerraty if (d == NULL) {
106*0957b409SSimon J. Gerraty return dlen;
107*0957b409SSimon J. Gerraty }
108*0957b409SSimon J. Gerraty
109*0957b409SSimon J. Gerraty p = tmp;
110*0957b409SSimon J. Gerraty br_i31_decode(p, pbuf, plen);
111*0957b409SSimon J. Gerraty plen = (p[0] + 31) >> 5;
112*0957b409SSimon J. Gerraty q = p + 1 + plen;
113*0957b409SSimon J. Gerraty br_i31_decode(q, qbuf, qlen);
114*0957b409SSimon J. Gerraty qlen = (q[0] + 31) >> 5;
115*0957b409SSimon J. Gerraty
116*0957b409SSimon J. Gerraty /*
117*0957b409SSimon J. Gerraty * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
118*0957b409SSimon J. Gerraty * we do not need anymore). The mulacc function sets the announced
119*0957b409SSimon J. Gerraty * bit length of t to be the sum of the announced bit lengths of
120*0957b409SSimon J. Gerraty * p-1 and q-1, which is usually exact but may overshoot by one 1
121*0957b409SSimon J. Gerraty * bit in some cases; we readjust it to its true length.
122*0957b409SSimon J. Gerraty */
123*0957b409SSimon J. Gerraty p[1] --;
124*0957b409SSimon J. Gerraty q[1] --;
125*0957b409SSimon J. Gerraty phi = q + 1 + qlen;
126*0957b409SSimon J. Gerraty br_i31_zero(phi, p[0]);
127*0957b409SSimon J. Gerraty br_i31_mulacc(phi, p, q);
128*0957b409SSimon J. Gerraty len = (phi[0] + 31) >> 5;
129*0957b409SSimon J. Gerraty memmove(tmp, phi, (1 + len) * sizeof *phi);
130*0957b409SSimon J. Gerraty phi = tmp;
131*0957b409SSimon J. Gerraty phi[0] = br_i31_bit_length(phi + 1, len);
132*0957b409SSimon J. Gerraty len = (phi[0] + 31) >> 5;
133*0957b409SSimon J. Gerraty
134*0957b409SSimon J. Gerraty /*
135*0957b409SSimon J. Gerraty * Divide phi by public exponent e. The final remainder r must be
136*0957b409SSimon J. Gerraty * non-zero (otherwise, the key is invalid). The quotient is k,
137*0957b409SSimon J. Gerraty * which we write over phi, since we don't need phi after that.
138*0957b409SSimon J. Gerraty */
139*0957b409SSimon J. Gerraty r = 0;
140*0957b409SSimon J. Gerraty for (u = len; u >= 1; u --) {
141*0957b409SSimon J. Gerraty /*
142*0957b409SSimon J. Gerraty * Upon entry, r < e, and phi[u] < 2^31; hence,
143*0957b409SSimon J. Gerraty * hi:lo < e*2^31. Thus, the produced word k[u]
144*0957b409SSimon J. Gerraty * must be lower than 2^31, and the new remainder r
145*0957b409SSimon J. Gerraty * is lower than e.
146*0957b409SSimon J. Gerraty */
147*0957b409SSimon J. Gerraty uint32_t hi, lo;
148*0957b409SSimon J. Gerraty
149*0957b409SSimon J. Gerraty hi = r >> 1;
150*0957b409SSimon J. Gerraty lo = (r << 31) + phi[u];
151*0957b409SSimon J. Gerraty phi[u] = br_divrem(hi, lo, e, &r);
152*0957b409SSimon J. Gerraty }
153*0957b409SSimon J. Gerraty if (r == 0) {
154*0957b409SSimon J. Gerraty return 0;
155*0957b409SSimon J. Gerraty }
156*0957b409SSimon J. Gerraty k = phi;
157*0957b409SSimon J. Gerraty
158*0957b409SSimon J. Gerraty /*
159*0957b409SSimon J. Gerraty * Compute u and v such that u*e - v*r = GCD(e,r). We use
160*0957b409SSimon J. Gerraty * a binary GCD algorithm, with 6 extra integers a, b,
161*0957b409SSimon J. Gerraty * u0, u1, v0 and v1. Initial values are:
162*0957b409SSimon J. Gerraty * a = e u0 = 1 v0 = 0
163*0957b409SSimon J. Gerraty * b = r u1 = r v1 = e-1
164*0957b409SSimon J. Gerraty * The following invariants are maintained:
165*0957b409SSimon J. Gerraty * a = u0*e - v0*r
166*0957b409SSimon J. Gerraty * b = u1*e - v1*r
167*0957b409SSimon J. Gerraty * 0 < a <= e
168*0957b409SSimon J. Gerraty * 0 < b <= r
169*0957b409SSimon J. Gerraty * 0 <= u0 <= r
170*0957b409SSimon J. Gerraty * 0 <= v0 <= e
171*0957b409SSimon J. Gerraty * 0 <= u1 <= r
172*0957b409SSimon J. Gerraty * 0 <= v1 <= e
173*0957b409SSimon J. Gerraty *
174*0957b409SSimon J. Gerraty * At each iteration, we reduce either a or b by one bit, and
175*0957b409SSimon J. Gerraty * adjust u0, u1, v0 and v1 to maintain the invariants:
176*0957b409SSimon J. Gerraty * - if a is even, then a <- a/2
177*0957b409SSimon J. Gerraty * - otherwise, if b is even, then b <- b/2
178*0957b409SSimon J. Gerraty * - otherwise, if a > b, then a <- (a-b)/2
179*0957b409SSimon J. Gerraty * - otherwise, if b > a, then b <- (b-a)/2
180*0957b409SSimon J. Gerraty * Algorithm stops when a = b. At that point, the common value
181*0957b409SSimon J. Gerraty * is the GCD of e and r; it must be 1 (otherwise, the private
182*0957b409SSimon J. Gerraty * key or public exponent is not valid). The (u0,v0) or (u1,v1)
183*0957b409SSimon J. Gerraty * pairs are the solution we are looking for.
184*0957b409SSimon J. Gerraty *
185*0957b409SSimon J. Gerraty * Since either a or b is reduced by at least 1 bit at each
186*0957b409SSimon J. Gerraty * iteration, 62 iterations are enough to reach the end
187*0957b409SSimon J. Gerraty * condition.
188*0957b409SSimon J. Gerraty *
189*0957b409SSimon J. Gerraty * To maintain the invariants, we must compute the same operations
190*0957b409SSimon J. Gerraty * on the u* and v* values that we do on a and b:
191*0957b409SSimon J. Gerraty * - When a is divided by 2, u0 and v0 must be divided by 2.
192*0957b409SSimon J. Gerraty * - When b is divided by 2, u1 and v1 must be divided by 2.
193*0957b409SSimon J. Gerraty * - When b is subtracted from a, u1 and v1 are subtracted from
194*0957b409SSimon J. Gerraty * u0 and v0, respectively.
195*0957b409SSimon J. Gerraty * - When a is subtracted from b, u0 and v0 are subtracted from
196*0957b409SSimon J. Gerraty * u1 and v1, respectively.
197*0957b409SSimon J. Gerraty *
198*0957b409SSimon J. Gerraty * However, we want to keep the u* and v* values in their proper
199*0957b409SSimon J. Gerraty * ranges. The following remarks apply:
200*0957b409SSimon J. Gerraty *
201*0957b409SSimon J. Gerraty * - When a is divided by 2, then a is even. Therefore:
202*0957b409SSimon J. Gerraty *
203*0957b409SSimon J. Gerraty * * If r is odd, then u0 and v0 must have the same parity;
204*0957b409SSimon J. Gerraty * if they are both odd, then adding r to u0 and e to v0
205*0957b409SSimon J. Gerraty * makes them both even, and the division by 2 brings them
206*0957b409SSimon J. Gerraty * back to the proper range.
207*0957b409SSimon J. Gerraty *
208*0957b409SSimon J. Gerraty * * If r is even, then u0 must be even; if v0 is odd, then
209*0957b409SSimon J. Gerraty * adding r to u0 and e to v0 makes them both even, and the
210*0957b409SSimon J. Gerraty * division by 2 brings them back to the proper range.
211*0957b409SSimon J. Gerraty *
212*0957b409SSimon J. Gerraty * Thus, all we need to do is to look at the parity of v0,
213*0957b409SSimon J. Gerraty * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
214*0957b409SSimon J. Gerraty * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
215*0957b409SSimon J. Gerraty * division (r+1 does not overflow since r < e; and (e/2)+1
216*0957b409SSimon J. Gerraty * is equal to (e+1)/2 since e is odd).
217*0957b409SSimon J. Gerraty *
218*0957b409SSimon J. Gerraty * - When we subtract b from a, three cases may occur:
219*0957b409SSimon J. Gerraty *
220*0957b409SSimon J. Gerraty * * u1 <= u0 and v1 <= v0: just do the subtractions
221*0957b409SSimon J. Gerraty *
222*0957b409SSimon J. Gerraty * * u1 > u0 and v1 > v0: compute:
223*0957b409SSimon J. Gerraty * (u0, v0) <- (u0 + r - u1, v0 + e - v1)
224*0957b409SSimon J. Gerraty *
225*0957b409SSimon J. Gerraty * * u1 <= u0 and v1 > v0: compute:
226*0957b409SSimon J. Gerraty * (u0, v0) <- (u0 + r - u1, v0 + e - v1)
227*0957b409SSimon J. Gerraty *
228*0957b409SSimon J. Gerraty * The fourth case (u1 > u0 and v1 <= v0) is not possible
229*0957b409SSimon J. Gerraty * because it would contradict "b < a" (which is the reason
230*0957b409SSimon J. Gerraty * why we subtract b from a).
231*0957b409SSimon J. Gerraty *
232*0957b409SSimon J. Gerraty * The tricky case is the third one: from the equations, it
233*0957b409SSimon J. Gerraty * seems that u0 may go out of range. However, the invariants
234*0957b409SSimon J. Gerraty * and ranges of other values imply that, in that case, the
235*0957b409SSimon J. Gerraty * new u0 does not actually exceed the range.
236*0957b409SSimon J. Gerraty *
237*0957b409SSimon J. Gerraty * We can thus handle the subtraction by adding (r,e) based
238*0957b409SSimon J. Gerraty * solely on the comparison between v0 and v1.
239*0957b409SSimon J. Gerraty */
240*0957b409SSimon J. Gerraty a = e;
241*0957b409SSimon J. Gerraty b = r;
242*0957b409SSimon J. Gerraty u0 = 1;
243*0957b409SSimon J. Gerraty v0 = 0;
244*0957b409SSimon J. Gerraty u1 = r;
245*0957b409SSimon J. Gerraty v1 = e - 1;
246*0957b409SSimon J. Gerraty hr = (r + 1) >> 1;
247*0957b409SSimon J. Gerraty he = (e >> 1) + 1;
248*0957b409SSimon J. Gerraty for (i = 0; i < 62; i ++) {
249*0957b409SSimon J. Gerraty uint32_t oa, ob, agtb, bgta;
250*0957b409SSimon J. Gerraty uint32_t sab, sba, da, db;
251*0957b409SSimon J. Gerraty uint32_t ctl;
252*0957b409SSimon J. Gerraty
253*0957b409SSimon J. Gerraty oa = a & 1; /* 1 if a is odd */
254*0957b409SSimon J. Gerraty ob = b & 1; /* 1 if b is odd */
255*0957b409SSimon J. Gerraty agtb = GT(a, b); /* 1 if a > b */
256*0957b409SSimon J. Gerraty bgta = GT(b, a); /* 1 if b > a */
257*0957b409SSimon J. Gerraty
258*0957b409SSimon J. Gerraty sab = oa & ob & agtb; /* 1 if a <- a-b */
259*0957b409SSimon J. Gerraty sba = oa & ob & bgta; /* 1 if b <- b-a */
260*0957b409SSimon J. Gerraty
261*0957b409SSimon J. Gerraty /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
262*0957b409SSimon J. Gerraty ctl = GT(v1, v0);
263*0957b409SSimon J. Gerraty a -= b & -sab;
264*0957b409SSimon J. Gerraty u0 -= (u1 - (r & -ctl)) & -sab;
265*0957b409SSimon J. Gerraty v0 -= (v1 - (e & -ctl)) & -sab;
266*0957b409SSimon J. Gerraty
267*0957b409SSimon J. Gerraty /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
268*0957b409SSimon J. Gerraty ctl = GT(v0, v1);
269*0957b409SSimon J. Gerraty b -= a & -sba;
270*0957b409SSimon J. Gerraty u1 -= (u0 - (r & -ctl)) & -sba;
271*0957b409SSimon J. Gerraty v1 -= (v0 - (e & -ctl)) & -sba;
272*0957b409SSimon J. Gerraty
273*0957b409SSimon J. Gerraty da = NOT(oa) | sab; /* 1 if a <- a/2 */
274*0957b409SSimon J. Gerraty db = (oa & NOT(ob)) | sba; /* 1 if b <- b/2 */
275*0957b409SSimon J. Gerraty
276*0957b409SSimon J. Gerraty /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
277*0957b409SSimon J. Gerraty ctl = v0 & 1;
278*0957b409SSimon J. Gerraty a ^= (a ^ (a >> 1)) & -da;
279*0957b409SSimon J. Gerraty u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
280*0957b409SSimon J. Gerraty v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
281*0957b409SSimon J. Gerraty
282*0957b409SSimon J. Gerraty /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
283*0957b409SSimon J. Gerraty ctl = v1 & 1;
284*0957b409SSimon J. Gerraty b ^= (b ^ (b >> 1)) & -db;
285*0957b409SSimon J. Gerraty u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
286*0957b409SSimon J. Gerraty v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
287*0957b409SSimon J. Gerraty }
288*0957b409SSimon J. Gerraty
289*0957b409SSimon J. Gerraty /*
290*0957b409SSimon J. Gerraty * Check that the GCD is indeed 1. If not, then the key is invalid
291*0957b409SSimon J. Gerraty * (and there's no harm in leaking that piece of information).
292*0957b409SSimon J. Gerraty */
293*0957b409SSimon J. Gerraty if (a != 1) {
294*0957b409SSimon J. Gerraty return 0;
295*0957b409SSimon J. Gerraty }
296*0957b409SSimon J. Gerraty
297*0957b409SSimon J. Gerraty /*
298*0957b409SSimon J. Gerraty * Now we have u0*e - v0*r = 1. Let's compute the result as:
299*0957b409SSimon J. Gerraty * d = u0 + v0*k
300*0957b409SSimon J. Gerraty * We still have k in the tmp[] array, and its announced bit
301*0957b409SSimon J. Gerraty * length is that of phi.
302*0957b409SSimon J. Gerraty */
303*0957b409SSimon J. Gerraty m = k + 1 + len;
304*0957b409SSimon J. Gerraty m[0] = (1 << 5) + 1; /* bit length is 32 bits, encoded */
305*0957b409SSimon J. Gerraty m[1] = v0 & 0x7FFFFFFF;
306*0957b409SSimon J. Gerraty m[2] = v0 >> 31;
307*0957b409SSimon J. Gerraty z = m + 3;
308*0957b409SSimon J. Gerraty br_i31_zero(z, k[0]);
309*0957b409SSimon J. Gerraty z[1] = u0 & 0x7FFFFFFF;
310*0957b409SSimon J. Gerraty z[2] = u0 >> 31;
311*0957b409SSimon J. Gerraty br_i31_mulacc(z, k, m);
312*0957b409SSimon J. Gerraty
313*0957b409SSimon J. Gerraty /*
314*0957b409SSimon J. Gerraty * Encode the result.
315*0957b409SSimon J. Gerraty */
316*0957b409SSimon J. Gerraty br_i31_encode(d, dlen, z);
317*0957b409SSimon J. Gerraty return dlen;
318*0957b409SSimon J. Gerraty }
319