1*0957b409SSimon J. Gerraty /*
2*0957b409SSimon J. Gerraty * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3*0957b409SSimon J. Gerraty *
4*0957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining
5*0957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the
6*0957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including
7*0957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish,
8*0957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to
9*0957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to
10*0957b409SSimon J. Gerraty * the following conditions:
11*0957b409SSimon J. Gerraty *
12*0957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be
13*0957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software.
14*0957b409SSimon J. Gerraty *
15*0957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16*0957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17*0957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18*0957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19*0957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20*0957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21*0957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22*0957b409SSimon J. Gerraty * SOFTWARE.
23*0957b409SSimon J. Gerraty */
24*0957b409SSimon J. Gerraty
25*0957b409SSimon J. Gerraty #include "inner.h"
26*0957b409SSimon J. Gerraty
27*0957b409SSimon J. Gerraty #if BR_INT128 || BR_UMUL128
28*0957b409SSimon J. Gerraty
29*0957b409SSimon J. Gerraty #define U (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
30*0957b409SSimon J. Gerraty #define TLEN (4 * U) /* TLEN is counted in 64-bit words */
31*0957b409SSimon J. Gerraty
32*0957b409SSimon J. Gerraty /* see bearssl_rsa.h */
33*0957b409SSimon J. Gerraty uint32_t
br_rsa_i62_private(unsigned char * x,const br_rsa_private_key * sk)34*0957b409SSimon J. Gerraty br_rsa_i62_private(unsigned char *x, const br_rsa_private_key *sk)
35*0957b409SSimon J. Gerraty {
36*0957b409SSimon J. Gerraty const unsigned char *p, *q;
37*0957b409SSimon J. Gerraty size_t plen, qlen;
38*0957b409SSimon J. Gerraty size_t fwlen;
39*0957b409SSimon J. Gerraty uint32_t p0i, q0i;
40*0957b409SSimon J. Gerraty size_t xlen, u;
41*0957b409SSimon J. Gerraty uint64_t tmp[TLEN];
42*0957b409SSimon J. Gerraty long z;
43*0957b409SSimon J. Gerraty uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
44*0957b409SSimon J. Gerraty uint32_t r;
45*0957b409SSimon J. Gerraty
46*0957b409SSimon J. Gerraty /*
47*0957b409SSimon J. Gerraty * Compute the actual lengths of p and q, in bytes.
48*0957b409SSimon J. Gerraty * These lengths are not considered secret (we cannot really hide
49*0957b409SSimon J. Gerraty * them anyway in constant-time code).
50*0957b409SSimon J. Gerraty */
51*0957b409SSimon J. Gerraty p = sk->p;
52*0957b409SSimon J. Gerraty plen = sk->plen;
53*0957b409SSimon J. Gerraty while (plen > 0 && *p == 0) {
54*0957b409SSimon J. Gerraty p ++;
55*0957b409SSimon J. Gerraty plen --;
56*0957b409SSimon J. Gerraty }
57*0957b409SSimon J. Gerraty q = sk->q;
58*0957b409SSimon J. Gerraty qlen = sk->qlen;
59*0957b409SSimon J. Gerraty while (qlen > 0 && *q == 0) {
60*0957b409SSimon J. Gerraty q ++;
61*0957b409SSimon J. Gerraty qlen --;
62*0957b409SSimon J. Gerraty }
63*0957b409SSimon J. Gerraty
64*0957b409SSimon J. Gerraty /*
65*0957b409SSimon J. Gerraty * Compute the maximum factor length, in words.
66*0957b409SSimon J. Gerraty */
67*0957b409SSimon J. Gerraty z = (long)(plen > qlen ? plen : qlen) << 3;
68*0957b409SSimon J. Gerraty fwlen = 1;
69*0957b409SSimon J. Gerraty while (z > 0) {
70*0957b409SSimon J. Gerraty z -= 31;
71*0957b409SSimon J. Gerraty fwlen ++;
72*0957b409SSimon J. Gerraty }
73*0957b409SSimon J. Gerraty
74*0957b409SSimon J. Gerraty /*
75*0957b409SSimon J. Gerraty * Convert size to 62-bit words.
76*0957b409SSimon J. Gerraty */
77*0957b409SSimon J. Gerraty fwlen = (fwlen + 1) >> 1;
78*0957b409SSimon J. Gerraty
79*0957b409SSimon J. Gerraty /*
80*0957b409SSimon J. Gerraty * We need to fit at least 6 values in the stack buffer.
81*0957b409SSimon J. Gerraty */
82*0957b409SSimon J. Gerraty if (6 * fwlen > TLEN) {
83*0957b409SSimon J. Gerraty return 0;
84*0957b409SSimon J. Gerraty }
85*0957b409SSimon J. Gerraty
86*0957b409SSimon J. Gerraty /*
87*0957b409SSimon J. Gerraty * Compute signature length (in bytes).
88*0957b409SSimon J. Gerraty */
89*0957b409SSimon J. Gerraty xlen = (sk->n_bitlen + 7) >> 3;
90*0957b409SSimon J. Gerraty
91*0957b409SSimon J. Gerraty /*
92*0957b409SSimon J. Gerraty * Decode q.
93*0957b409SSimon J. Gerraty */
94*0957b409SSimon J. Gerraty mq = (uint32_t *)tmp;
95*0957b409SSimon J. Gerraty br_i31_decode(mq, q, qlen);
96*0957b409SSimon J. Gerraty
97*0957b409SSimon J. Gerraty /*
98*0957b409SSimon J. Gerraty * Decode p.
99*0957b409SSimon J. Gerraty */
100*0957b409SSimon J. Gerraty t1 = (uint32_t *)(tmp + fwlen);
101*0957b409SSimon J. Gerraty br_i31_decode(t1, p, plen);
102*0957b409SSimon J. Gerraty
103*0957b409SSimon J. Gerraty /*
104*0957b409SSimon J. Gerraty * Compute the modulus (product of the two factors), to compare
105*0957b409SSimon J. Gerraty * it with the source value. We use br_i31_mulacc(), since it's
106*0957b409SSimon J. Gerraty * already used later on.
107*0957b409SSimon J. Gerraty */
108*0957b409SSimon J. Gerraty t2 = (uint32_t *)(tmp + 2 * fwlen);
109*0957b409SSimon J. Gerraty br_i31_zero(t2, mq[0]);
110*0957b409SSimon J. Gerraty br_i31_mulacc(t2, mq, t1);
111*0957b409SSimon J. Gerraty
112*0957b409SSimon J. Gerraty /*
113*0957b409SSimon J. Gerraty * We encode the modulus into bytes, to perform the comparison
114*0957b409SSimon J. Gerraty * with bytes. We know that the product length, in bytes, is
115*0957b409SSimon J. Gerraty * exactly xlen.
116*0957b409SSimon J. Gerraty * The comparison actually computes the carry when subtracting
117*0957b409SSimon J. Gerraty * the modulus from the source value; that carry must be 1 for
118*0957b409SSimon J. Gerraty * a value in the correct range. We keep it in r, which is our
119*0957b409SSimon J. Gerraty * accumulator for the error code.
120*0957b409SSimon J. Gerraty */
121*0957b409SSimon J. Gerraty t3 = (uint32_t *)(tmp + 4 * fwlen);
122*0957b409SSimon J. Gerraty br_i31_encode(t3, xlen, t2);
123*0957b409SSimon J. Gerraty u = xlen;
124*0957b409SSimon J. Gerraty r = 0;
125*0957b409SSimon J. Gerraty while (u > 0) {
126*0957b409SSimon J. Gerraty uint32_t wn, wx;
127*0957b409SSimon J. Gerraty
128*0957b409SSimon J. Gerraty u --;
129*0957b409SSimon J. Gerraty wn = ((unsigned char *)t3)[u];
130*0957b409SSimon J. Gerraty wx = x[u];
131*0957b409SSimon J. Gerraty r = ((wx - (wn + r)) >> 8) & 1;
132*0957b409SSimon J. Gerraty }
133*0957b409SSimon J. Gerraty
134*0957b409SSimon J. Gerraty /*
135*0957b409SSimon J. Gerraty * Move the decoded p to another temporary buffer.
136*0957b409SSimon J. Gerraty */
137*0957b409SSimon J. Gerraty mp = (uint32_t *)(tmp + 2 * fwlen);
138*0957b409SSimon J. Gerraty memmove(mp, t1, 2 * fwlen * sizeof *t1);
139*0957b409SSimon J. Gerraty
140*0957b409SSimon J. Gerraty /*
141*0957b409SSimon J. Gerraty * Compute s2 = x^dq mod q.
142*0957b409SSimon J. Gerraty */
143*0957b409SSimon J. Gerraty q0i = br_i31_ninv31(mq[1]);
144*0957b409SSimon J. Gerraty s2 = (uint32_t *)(tmp + fwlen);
145*0957b409SSimon J. Gerraty br_i31_decode_reduce(s2, x, xlen, mq);
146*0957b409SSimon J. Gerraty r &= br_i62_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
147*0957b409SSimon J. Gerraty tmp + 3 * fwlen, TLEN - 3 * fwlen);
148*0957b409SSimon J. Gerraty
149*0957b409SSimon J. Gerraty /*
150*0957b409SSimon J. Gerraty * Compute s1 = x^dp mod p.
151*0957b409SSimon J. Gerraty */
152*0957b409SSimon J. Gerraty p0i = br_i31_ninv31(mp[1]);
153*0957b409SSimon J. Gerraty s1 = (uint32_t *)(tmp + 3 * fwlen);
154*0957b409SSimon J. Gerraty br_i31_decode_reduce(s1, x, xlen, mp);
155*0957b409SSimon J. Gerraty r &= br_i62_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
156*0957b409SSimon J. Gerraty tmp + 4 * fwlen, TLEN - 4 * fwlen);
157*0957b409SSimon J. Gerraty
158*0957b409SSimon J. Gerraty /*
159*0957b409SSimon J. Gerraty * Compute:
160*0957b409SSimon J. Gerraty * h = (s1 - s2)*(1/q) mod p
161*0957b409SSimon J. Gerraty * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
162*0957b409SSimon J. Gerraty * unclear about whether p may be lower than q (some existing,
163*0957b409SSimon J. Gerraty * widely deployed implementations of RSA don't tolerate p < q),
164*0957b409SSimon J. Gerraty * but we want to support that occurrence, so we need to use the
165*0957b409SSimon J. Gerraty * reduction function.
166*0957b409SSimon J. Gerraty *
167*0957b409SSimon J. Gerraty * Since we use br_i31_decode_reduce() for iq (purportedly, the
168*0957b409SSimon J. Gerraty * inverse of q modulo p), we also tolerate improperly large
169*0957b409SSimon J. Gerraty * values for this parameter.
170*0957b409SSimon J. Gerraty */
171*0957b409SSimon J. Gerraty t1 = (uint32_t *)(tmp + 4 * fwlen);
172*0957b409SSimon J. Gerraty t2 = (uint32_t *)(tmp + 5 * fwlen);
173*0957b409SSimon J. Gerraty br_i31_reduce(t2, s2, mp);
174*0957b409SSimon J. Gerraty br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
175*0957b409SSimon J. Gerraty br_i31_to_monty(s1, mp);
176*0957b409SSimon J. Gerraty br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp);
177*0957b409SSimon J. Gerraty br_i31_montymul(t2, s1, t1, mp, p0i);
178*0957b409SSimon J. Gerraty
179*0957b409SSimon J. Gerraty /*
180*0957b409SSimon J. Gerraty * h is now in t2. We compute the final result:
181*0957b409SSimon J. Gerraty * s = s2 + q*h
182*0957b409SSimon J. Gerraty * All these operations are non-modular.
183*0957b409SSimon J. Gerraty *
184*0957b409SSimon J. Gerraty * We need mq, s2 and t2. We use the t3 buffer as destination.
185*0957b409SSimon J. Gerraty * The buffers mp, s1 and t1 are no longer needed, so we can
186*0957b409SSimon J. Gerraty * reuse them for t3. Moreover, the first step of the computation
187*0957b409SSimon J. Gerraty * is to copy s2 into t3, after which s2 is not needed. Right
188*0957b409SSimon J. Gerraty * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
189*0957b409SSimon J. Gerraty * Therefore, we have ample room for t3 by simply using s2.
190*0957b409SSimon J. Gerraty */
191*0957b409SSimon J. Gerraty t3 = s2;
192*0957b409SSimon J. Gerraty br_i31_mulacc(t3, mq, t2);
193*0957b409SSimon J. Gerraty
194*0957b409SSimon J. Gerraty /*
195*0957b409SSimon J. Gerraty * Encode the result. Since we already checked the value of xlen,
196*0957b409SSimon J. Gerraty * we can just use it right away.
197*0957b409SSimon J. Gerraty */
198*0957b409SSimon J. Gerraty br_i31_encode(x, xlen, t3);
199*0957b409SSimon J. Gerraty
200*0957b409SSimon J. Gerraty /*
201*0957b409SSimon J. Gerraty * The only error conditions remaining at that point are invalid
202*0957b409SSimon J. Gerraty * values for p and q (even integers).
203*0957b409SSimon J. Gerraty */
204*0957b409SSimon J. Gerraty return p0i & q0i & r;
205*0957b409SSimon J. Gerraty }
206*0957b409SSimon J. Gerraty
207*0957b409SSimon J. Gerraty /* see bearssl_rsa.h */
208*0957b409SSimon J. Gerraty br_rsa_private
br_rsa_i62_private_get(void)209*0957b409SSimon J. Gerraty br_rsa_i62_private_get(void)
210*0957b409SSimon J. Gerraty {
211*0957b409SSimon J. Gerraty return &br_rsa_i62_private;
212*0957b409SSimon J. Gerraty }
213*0957b409SSimon J. Gerraty
214*0957b409SSimon J. Gerraty #else
215*0957b409SSimon J. Gerraty
216*0957b409SSimon J. Gerraty /* see bearssl_rsa.h */
217*0957b409SSimon J. Gerraty br_rsa_private
br_rsa_i62_private_get(void)218*0957b409SSimon J. Gerraty br_rsa_i62_private_get(void)
219*0957b409SSimon J. Gerraty {
220*0957b409SSimon J. Gerraty return 0;
221*0957b409SSimon J. Gerraty }
222*0957b409SSimon J. Gerraty
223*0957b409SSimon J. Gerraty #endif
224