1*0957b409SSimon J. Gerraty /* 2*0957b409SSimon J. Gerraty * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3*0957b409SSimon J. Gerraty * 4*0957b409SSimon J. Gerraty * Permission is hereby granted, free of charge, to any person obtaining 5*0957b409SSimon J. Gerraty * a copy of this software and associated documentation files (the 6*0957b409SSimon J. Gerraty * "Software"), to deal in the Software without restriction, including 7*0957b409SSimon J. Gerraty * without limitation the rights to use, copy, modify, merge, publish, 8*0957b409SSimon J. Gerraty * distribute, sublicense, and/or sell copies of the Software, and to 9*0957b409SSimon J. Gerraty * permit persons to whom the Software is furnished to do so, subject to 10*0957b409SSimon J. Gerraty * the following conditions: 11*0957b409SSimon J. Gerraty * 12*0957b409SSimon J. Gerraty * The above copyright notice and this permission notice shall be 13*0957b409SSimon J. Gerraty * included in all copies or substantial portions of the Software. 14*0957b409SSimon J. Gerraty * 15*0957b409SSimon J. Gerraty * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16*0957b409SSimon J. Gerraty * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17*0957b409SSimon J. Gerraty * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18*0957b409SSimon J. Gerraty * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19*0957b409SSimon J. Gerraty * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20*0957b409SSimon J. Gerraty * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21*0957b409SSimon J. Gerraty * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22*0957b409SSimon J. Gerraty * SOFTWARE. 23*0957b409SSimon J. Gerraty */ 24*0957b409SSimon J. Gerraty 25*0957b409SSimon J. Gerraty #include "inner.h" 26*0957b409SSimon J. Gerraty 27*0957b409SSimon J. Gerraty #define U (2 + ((BR_MAX_RSA_FACTOR + 14) / 15)) 28*0957b409SSimon J. Gerraty #define TLEN (8 * U) 29*0957b409SSimon J. Gerraty 30*0957b409SSimon J. Gerraty /* see bearssl_rsa.h */ 31*0957b409SSimon J. Gerraty uint32_t 32*0957b409SSimon J. Gerraty br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk) 33*0957b409SSimon J. Gerraty { 34*0957b409SSimon J. Gerraty const unsigned char *p, *q; 35*0957b409SSimon J. Gerraty size_t plen, qlen; 36*0957b409SSimon J. Gerraty size_t fwlen; 37*0957b409SSimon J. Gerraty uint16_t p0i, q0i; 38*0957b409SSimon J. Gerraty size_t xlen, u; 39*0957b409SSimon J. Gerraty uint16_t tmp[1 + TLEN]; 40*0957b409SSimon J. Gerraty long z; 41*0957b409SSimon J. Gerraty uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; 42*0957b409SSimon J. Gerraty uint32_t r; 43*0957b409SSimon J. Gerraty 44*0957b409SSimon J. Gerraty /* 45*0957b409SSimon J. Gerraty * Compute the actual lengths of p and q, in bytes. 46*0957b409SSimon J. Gerraty * These lengths are not considered secret (we cannot really hide 47*0957b409SSimon J. Gerraty * them anyway in constant-time code). 48*0957b409SSimon J. Gerraty */ 49*0957b409SSimon J. Gerraty p = sk->p; 50*0957b409SSimon J. Gerraty plen = sk->plen; 51*0957b409SSimon J. Gerraty while (plen > 0 && *p == 0) { 52*0957b409SSimon J. Gerraty p ++; 53*0957b409SSimon J. Gerraty plen --; 54*0957b409SSimon J. Gerraty } 55*0957b409SSimon J. Gerraty q = sk->q; 56*0957b409SSimon J. Gerraty qlen = sk->qlen; 57*0957b409SSimon J. Gerraty while (qlen > 0 && *q == 0) { 58*0957b409SSimon J. Gerraty q ++; 59*0957b409SSimon J. Gerraty qlen --; 60*0957b409SSimon J. Gerraty } 61*0957b409SSimon J. Gerraty 62*0957b409SSimon J. Gerraty /* 63*0957b409SSimon J. Gerraty * Compute the maximum factor length, in words. 64*0957b409SSimon J. Gerraty */ 65*0957b409SSimon J. Gerraty z = (long)(plen > qlen ? plen : qlen) << 3; 66*0957b409SSimon J. Gerraty fwlen = 1; 67*0957b409SSimon J. Gerraty while (z > 0) { 68*0957b409SSimon J. Gerraty z -= 15; 69*0957b409SSimon J. Gerraty fwlen ++; 70*0957b409SSimon J. Gerraty } 71*0957b409SSimon J. Gerraty /* 72*0957b409SSimon J. Gerraty * Round up the word length to an even number. 73*0957b409SSimon J. Gerraty */ 74*0957b409SSimon J. Gerraty fwlen += (fwlen & 1); 75*0957b409SSimon J. Gerraty 76*0957b409SSimon J. Gerraty /* 77*0957b409SSimon J. Gerraty * We need to fit at least 6 values in the stack buffer. 78*0957b409SSimon J. Gerraty */ 79*0957b409SSimon J. Gerraty if (6 * fwlen > TLEN) { 80*0957b409SSimon J. Gerraty return 0; 81*0957b409SSimon J. Gerraty } 82*0957b409SSimon J. Gerraty 83*0957b409SSimon J. Gerraty /* 84*0957b409SSimon J. Gerraty * Compute signature length (in bytes). 85*0957b409SSimon J. Gerraty */ 86*0957b409SSimon J. Gerraty xlen = (sk->n_bitlen + 7) >> 3; 87*0957b409SSimon J. Gerraty 88*0957b409SSimon J. Gerraty /* 89*0957b409SSimon J. Gerraty * Ensure 32-bit alignment for value words. 90*0957b409SSimon J. Gerraty */ 91*0957b409SSimon J. Gerraty mq = tmp; 92*0957b409SSimon J. Gerraty if (((uintptr_t)mq & 2) == 0) { 93*0957b409SSimon J. Gerraty mq ++; 94*0957b409SSimon J. Gerraty } 95*0957b409SSimon J. Gerraty 96*0957b409SSimon J. Gerraty /* 97*0957b409SSimon J. Gerraty * Decode q. 98*0957b409SSimon J. Gerraty */ 99*0957b409SSimon J. Gerraty br_i15_decode(mq, q, qlen); 100*0957b409SSimon J. Gerraty 101*0957b409SSimon J. Gerraty /* 102*0957b409SSimon J. Gerraty * Decode p. 103*0957b409SSimon J. Gerraty */ 104*0957b409SSimon J. Gerraty t1 = mq + fwlen; 105*0957b409SSimon J. Gerraty br_i15_decode(t1, p, plen); 106*0957b409SSimon J. Gerraty 107*0957b409SSimon J. Gerraty /* 108*0957b409SSimon J. Gerraty * Compute the modulus (product of the two factors), to compare 109*0957b409SSimon J. Gerraty * it with the source value. We use br_i15_mulacc(), since it's 110*0957b409SSimon J. Gerraty * already used later on. 111*0957b409SSimon J. Gerraty */ 112*0957b409SSimon J. Gerraty t2 = mq + 2 * fwlen; 113*0957b409SSimon J. Gerraty br_i15_zero(t2, mq[0]); 114*0957b409SSimon J. Gerraty br_i15_mulacc(t2, mq, t1); 115*0957b409SSimon J. Gerraty 116*0957b409SSimon J. Gerraty /* 117*0957b409SSimon J. Gerraty * We encode the modulus into bytes, to perform the comparison 118*0957b409SSimon J. Gerraty * with bytes. We know that the product length, in bytes, is 119*0957b409SSimon J. Gerraty * exactly xlen. 120*0957b409SSimon J. Gerraty * The comparison actually computes the carry when subtracting 121*0957b409SSimon J. Gerraty * the modulus from the source value; that carry must be 1 for 122*0957b409SSimon J. Gerraty * a value in the correct range. We keep it in r, which is our 123*0957b409SSimon J. Gerraty * accumulator for the error code. 124*0957b409SSimon J. Gerraty */ 125*0957b409SSimon J. Gerraty t3 = mq + 4 * fwlen; 126*0957b409SSimon J. Gerraty br_i15_encode(t3, xlen, t2); 127*0957b409SSimon J. Gerraty u = xlen; 128*0957b409SSimon J. Gerraty r = 0; 129*0957b409SSimon J. Gerraty while (u > 0) { 130*0957b409SSimon J. Gerraty uint32_t wn, wx; 131*0957b409SSimon J. Gerraty 132*0957b409SSimon J. Gerraty u --; 133*0957b409SSimon J. Gerraty wn = ((unsigned char *)t3)[u]; 134*0957b409SSimon J. Gerraty wx = x[u]; 135*0957b409SSimon J. Gerraty r = ((wx - (wn + r)) >> 8) & 1; 136*0957b409SSimon J. Gerraty } 137*0957b409SSimon J. Gerraty 138*0957b409SSimon J. Gerraty /* 139*0957b409SSimon J. Gerraty * Move the decoded p to another temporary buffer. 140*0957b409SSimon J. Gerraty */ 141*0957b409SSimon J. Gerraty mp = mq + 2 * fwlen; 142*0957b409SSimon J. Gerraty memmove(mp, t1, fwlen * sizeof *t1); 143*0957b409SSimon J. Gerraty 144*0957b409SSimon J. Gerraty /* 145*0957b409SSimon J. Gerraty * Compute s2 = x^dq mod q. 146*0957b409SSimon J. Gerraty */ 147*0957b409SSimon J. Gerraty q0i = br_i15_ninv15(mq[1]); 148*0957b409SSimon J. Gerraty s2 = mq + fwlen; 149*0957b409SSimon J. Gerraty br_i15_decode_reduce(s2, x, xlen, mq); 150*0957b409SSimon J. Gerraty r &= br_i15_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i, 151*0957b409SSimon J. Gerraty mq + 3 * fwlen, TLEN - 3 * fwlen); 152*0957b409SSimon J. Gerraty 153*0957b409SSimon J. Gerraty /* 154*0957b409SSimon J. Gerraty * Compute s1 = x^dq mod q. 155*0957b409SSimon J. Gerraty */ 156*0957b409SSimon J. Gerraty p0i = br_i15_ninv15(mp[1]); 157*0957b409SSimon J. Gerraty s1 = mq + 3 * fwlen; 158*0957b409SSimon J. Gerraty br_i15_decode_reduce(s1, x, xlen, mp); 159*0957b409SSimon J. Gerraty r &= br_i15_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i, 160*0957b409SSimon J. Gerraty mq + 4 * fwlen, TLEN - 4 * fwlen); 161*0957b409SSimon J. Gerraty 162*0957b409SSimon J. Gerraty /* 163*0957b409SSimon J. Gerraty * Compute: 164*0957b409SSimon J. Gerraty * h = (s1 - s2)*(1/q) mod p 165*0957b409SSimon J. Gerraty * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is 166*0957b409SSimon J. Gerraty * unclear about whether p may be lower than q (some existing, 167*0957b409SSimon J. Gerraty * widely deployed implementations of RSA don't tolerate p < q), 168*0957b409SSimon J. Gerraty * but we want to support that occurrence, so we need to use the 169*0957b409SSimon J. Gerraty * reduction function. 170*0957b409SSimon J. Gerraty * 171*0957b409SSimon J. Gerraty * Since we use br_i15_decode_reduce() for iq (purportedly, the 172*0957b409SSimon J. Gerraty * inverse of q modulo p), we also tolerate improperly large 173*0957b409SSimon J. Gerraty * values for this parameter. 174*0957b409SSimon J. Gerraty */ 175*0957b409SSimon J. Gerraty t1 = mq + 4 * fwlen; 176*0957b409SSimon J. Gerraty t2 = mq + 5 * fwlen; 177*0957b409SSimon J. Gerraty br_i15_reduce(t2, s2, mp); 178*0957b409SSimon J. Gerraty br_i15_add(s1, mp, br_i15_sub(s1, t2, 1)); 179*0957b409SSimon J. Gerraty br_i15_to_monty(s1, mp); 180*0957b409SSimon J. Gerraty br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp); 181*0957b409SSimon J. Gerraty br_i15_montymul(t2, s1, t1, mp, p0i); 182*0957b409SSimon J. Gerraty 183*0957b409SSimon J. Gerraty /* 184*0957b409SSimon J. Gerraty * h is now in t2. We compute the final result: 185*0957b409SSimon J. Gerraty * s = s2 + q*h 186*0957b409SSimon J. Gerraty * All these operations are non-modular. 187*0957b409SSimon J. Gerraty * 188*0957b409SSimon J. Gerraty * We need mq, s2 and t2. We use the t3 buffer as destination. 189*0957b409SSimon J. Gerraty * The buffers mp, s1 and t1 are no longer needed, so we can 190*0957b409SSimon J. Gerraty * reuse them for t3. Moreover, the first step of the computation 191*0957b409SSimon J. Gerraty * is to copy s2 into t3, after which s2 is not needed. Right 192*0957b409SSimon J. Gerraty * now, mq is in slot 0, s2 is in slot 1, and t2 in slot 5. 193*0957b409SSimon J. Gerraty * Therefore, we have ample room for t3 by simply using s2. 194*0957b409SSimon J. Gerraty */ 195*0957b409SSimon J. Gerraty t3 = s2; 196*0957b409SSimon J. Gerraty br_i15_mulacc(t3, mq, t2); 197*0957b409SSimon J. Gerraty 198*0957b409SSimon J. Gerraty /* 199*0957b409SSimon J. Gerraty * Encode the result. Since we already checked the value of xlen, 200*0957b409SSimon J. Gerraty * we can just use it right away. 201*0957b409SSimon J. Gerraty */ 202*0957b409SSimon J. Gerraty br_i15_encode(x, xlen, t3); 203*0957b409SSimon J. Gerraty 204*0957b409SSimon J. Gerraty /* 205*0957b409SSimon J. Gerraty * The only error conditions remaining at that point are invalid 206*0957b409SSimon J. Gerraty * values for p and q (even integers). 207*0957b409SSimon J. Gerraty */ 208*0957b409SSimon J. Gerraty return p0i & q0i & r; 209*0957b409SSimon J. Gerraty } 210