1 /* 2 * Copyright (C) 2017 - This file is part of libecc project 3 * 4 * Authors: 5 * Ryad BENADJILA <ryadbenadjila@gmail.com> 6 * Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr> 7 * Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr> 8 * 9 * Contributors: 10 * Nicolas VIVET <nicolas.vivet@ssi.gouv.fr> 11 * Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr> 12 * 13 * This software is licensed under a dual BSD and GPL v2 license. 14 * See LICENSE file at the root folder of the project. 15 */ 16 #include <libecc/nn/nn_mul_redc1.h> 17 #include <libecc/nn/nn_mul_public.h> 18 #include <libecc/nn/nn_add.h> 19 #include <libecc/nn/nn_logical.h> 20 #include <libecc/nn/nn_div_public.h> 21 #include <libecc/nn/nn_modinv.h> 22 #include <libecc/nn/nn.h> 23 24 /* 25 * Given an odd number p, compute Montgomery coefficients r, r_square 26 * as well as mpinv so that: 27 * 28 * - r = 2^p_rounded_bitlen mod (p), where 29 * p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of 30 * minimum number of words required to store p) 31 * - r_square = r^2 mod (p) 32 * - mpinv = -p^-1 mod (2^WORDSIZE). 33 * 34 * Aliasing of outputs with the input is possible since p_in is 35 * copied in local p at the beginning of the function. 36 * 37 * The function returns 0 on success, -1 on error. out parameters 'r', 38 * 'r_square' and 'mpinv' are only meaningful on success. 39 */ 40 int nn_compute_redc1_coefs(nn_t r, nn_t r_square, nn_src_t p_in, word_t *mpinv) 41 { 42 bitcnt_t p_rounded_bitlen; 43 nn p, tmp_nn1, tmp_nn2; 44 word_t _mpinv; 45 int ret, isodd; 46 p.magic = tmp_nn1.magic = tmp_nn2.magic = WORD(0); 47 48 ret = nn_check_initialized(p_in); EG(ret, err); 49 ret = nn_init(&p, 0); EG(ret, err); 50 ret = nn_copy(&p, p_in); EG(ret, err); 51 MUST_HAVE((mpinv != NULL), ret, err); 52 53 /* 54 * In order for our reciprocal division routines to work, it is 55 * expected that the bit length (including leading zeroes) of 56 * input prime p is >= 2 * wlen where wlen is the number of bits 57 * of a word size. 58 */ 59 if (p.wlen < 2) { 60 ret = nn_set_wlen(&p, 2); EG(ret, err); 61 } 62 63 ret = nn_init(r, 0); EG(ret, err); 64 ret = nn_init(r_square, 0); EG(ret, err); 65 ret = nn_init(&tmp_nn1, 0); EG(ret, err); 66 ret = nn_init(&tmp_nn2, 0); EG(ret, err); 67 68 /* p_rounded_bitlen = bitlen of p rounded to word size */ 69 p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen); 70 71 /* _mpinv = 2^wlen - (modinv(prime, 2^wlen)) */ 72 ret = nn_set_wlen(&tmp_nn1, 2); EG(ret, err); 73 tmp_nn1.val[1] = WORD(1); 74 ret = nn_copy(&tmp_nn2, &tmp_nn1); EG(ret, err); 75 ret = nn_modinv_2exp(&tmp_nn1, &p, WORD_BITS, &isodd); EG(ret, err); 76 ret = nn_sub(&tmp_nn1, &tmp_nn2, &tmp_nn1); EG(ret, err); 77 _mpinv = tmp_nn1.val[0]; 78 79 /* r = (0x1 << p_rounded_bitlen) (p) */ 80 ret = nn_one(r); EG(ret, err); 81 ret = nn_lshift(r, r, p_rounded_bitlen); EG(ret, err); 82 ret = nn_mod(r, r, &p); EG(ret, err); 83 84 /* 85 * r_square = (0x1 << (2*p_rounded_bitlen)) (p) 86 * We are supposed to handle NN numbers of size at least two times 87 * the biggest prime we use. Thus, we should be able to compute r_square 88 * with a multiplication followed by a reduction. (NB: we cannot use our 89 * Montgomery primitives at this point since we are computing its 90 * constants!) 91 */ 92 /* Check we have indeed enough space for our r_square computation */ 93 MUST_HAVE(!(NN_MAX_BIT_LEN < (2 * p_rounded_bitlen)), ret, err); 94 95 ret = nn_sqr(r_square, r); EG(ret, err); 96 ret = nn_mod(r_square, r_square, &p); EG(ret, err); 97 98 (*mpinv) = _mpinv; 99 100 err: 101 nn_uninit(&p); 102 nn_uninit(&tmp_nn1); 103 nn_uninit(&tmp_nn2); 104 105 return ret; 106 } 107 108 /* 109 * Perform Montgomery multiplication, that is usual multiplication 110 * followed by reduction modulo p. 111 * 112 * Inputs are supposed to be < p (i.e. taken modulo p). 113 * 114 * This uses the CIOS algorithm from Koc et al. 115 * 116 * The p input is the modulo number of the Montgomery multiplication, 117 * and mpinv is -p^(-1) mod (2^WORDSIZE). 118 * 119 * The function does not check input parameters are initialized. This 120 * MUST be done by the caller. 121 * 122 * The function returns 0 on success, -1 on error. 123 */ 124 ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p, 125 word_t mpinv) 126 { 127 word_t prod_high, prod_low, carry, acc, m; 128 unsigned int i, j, len, len_mul; 129 /* a and b inputs such that len(b) <= len(a) */ 130 nn_src_t a, b; 131 int ret, cmp; 132 u8 old_wlen; 133 134 /* 135 * These comparisons are input hypothesis and does not "break" 136 * the following computation. However performance loss exists 137 * when this check is always done, this is why we use our 138 * SHOULD_HAVE primitive. 139 */ 140 SHOULD_HAVE((!nn_cmp(in1, p, &cmp)) && (cmp < 0), ret, err); 141 SHOULD_HAVE((!nn_cmp(in2, p, &cmp)) && (cmp < 0), ret, err); 142 143 ret = nn_init(out, 0); EG(ret, err); 144 145 /* Check which one of in1 or in2 is the biggest */ 146 a = (in1->wlen <= in2->wlen) ? in2 : in1; 147 b = (in1->wlen <= in2->wlen) ? in1 : in2; 148 149 /* 150 * The inputs might have been reduced due to trimming 151 * because of leading zeroes. It is important for our 152 * Montgomery algorithm to work on sizes consistent with 153 * out prime p real bit size. Thus, we expand the output 154 * size to the size of p. 155 */ 156 ret = nn_set_wlen(out, p->wlen); EG(ret, err); 157 158 len = out->wlen; 159 len_mul = b->wlen; 160 /* 161 * We extend out to store carries. We first check that we 162 * do not have an overflow on the NN size. 163 */ 164 MUST_HAVE(((WORD_BITS * (out->wlen + 1)) <= NN_MAX_BIT_LEN), ret, err); 165 old_wlen = out->wlen; 166 out->wlen = (u8)(out->wlen + 1); 167 168 /* 169 * This can be skipped if the first iteration of the for loop 170 * is separated. 171 */ 172 for (i = 0; i < out->wlen; i++) { 173 out->val[i] = 0; 174 } 175 for (i = 0; i < len; i++) { 176 carry = WORD(0); 177 for (j = 0; j < len_mul; j++) { 178 WORD_MUL(prod_high, prod_low, a->val[i], b->val[j]); 179 prod_low = (word_t)(prod_low + carry); 180 prod_high = (word_t)(prod_high + (prod_low < carry)); 181 out->val[j] = (word_t)(out->val[j] + prod_low); 182 carry = (word_t)(prod_high + (out->val[j] < prod_low)); 183 } 184 for (; j < len; j++) { 185 out->val[j] = (word_t)(out->val[j] + carry); 186 carry = (word_t)(out->val[j] < carry); 187 } 188 out->val[j] = (word_t)(out->val[j] + carry); 189 acc = (word_t)(out->val[j] < carry); 190 191 m = (word_t)(out->val[0] * mpinv); 192 WORD_MUL(prod_high, prod_low, m, p->val[0]); 193 prod_low = (word_t)(prod_low + out->val[0]); 194 carry = (word_t)(prod_high + (prod_low < out->val[0])); 195 for (j = 1; j < len; j++) { 196 WORD_MUL(prod_high, prod_low, m, p->val[j]); 197 prod_low = (word_t)(prod_low + carry); 198 prod_high = (word_t)(prod_high + (prod_low < carry)); 199 out->val[j - 1] = (word_t)(prod_low + out->val[j]); 200 carry = (word_t)(prod_high + (out->val[j - 1] < prod_low)); 201 } 202 out->val[j - 1] = (word_t)(carry + out->val[j]); 203 carry = (word_t)(out->val[j - 1] < out->val[j]); 204 out->val[j] = (word_t)(acc + carry); 205 } 206 /* 207 * Note that at this stage the msw of out is either 0 or 1. 208 * If out > p we need to subtract p from out. 209 */ 210 ret = nn_cmp(out, p, &cmp); EG(ret, err); 211 ret = nn_cnd_sub(cmp >= 0, out, out, p); EG(ret, err); 212 MUST_HAVE((!nn_cmp(out, p, &cmp)) && (cmp < 0), ret, err); 213 /* We restore out wlen. */ 214 out->wlen = old_wlen; 215 216 err: 217 return ret; 218 } 219 220 /* 221 * Wrapper for previous function handling aliasing of one of the input 222 * paramter with out, through a copy. The function does not check 223 * input parameters are initialized. This MUST be done by the caller. 224 */ 225 ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1_aliased(nn_t out, nn_src_t in1, nn_src_t in2, 226 nn_src_t p, word_t mpinv) 227 { 228 nn out_cpy; 229 int ret; 230 out_cpy.magic = WORD(0); 231 232 ret = _nn_mul_redc1(&out_cpy, in1, in2, p, mpinv); EG(ret, err); 233 ret = nn_init(out, out_cpy.wlen); EG(ret, err); 234 ret = nn_copy(out, &out_cpy); 235 236 err: 237 nn_uninit(&out_cpy); 238 239 return ret; 240 } 241 242 /* 243 * Public version, handling possible aliasing of out parameter with 244 * one of the input parameters. 245 */ 246 int nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p, 247 word_t mpinv) 248 { 249 int ret; 250 251 ret = nn_check_initialized(in1); EG(ret, err); 252 ret = nn_check_initialized(in2); EG(ret, err); 253 ret = nn_check_initialized(p); EG(ret, err); 254 255 /* Handle possible output aliasing */ 256 if ((out == in1) || (out == in2) || (out == p)) { 257 ret = _nn_mul_redc1_aliased(out, in1, in2, p, mpinv); 258 } else { 259 ret = _nn_mul_redc1(out, in1, in2, p, mpinv); 260 } 261 262 err: 263 return ret; 264 } 265 266 /* 267 * Compute in1 * in2 mod p where in1 and in2 are numbers < p. 268 * When p is an odd number, the function redcifies in1 and in2 269 * parameters, does the computation and then unredcifies the 270 * result. When p is an even number, we use an unoptimized mul 271 * then mod operations sequence. 272 * 273 * From a mathematical standpoint, the computation is equivalent 274 * to performing: 275 * 276 * nn_mul(&tmp2, in1, in2); 277 * nn_mod(&out, &tmp2, q); 278 * 279 * but the modular reduction is done progressively during 280 * Montgomery reduction when p is odd (which brings more efficiency). 281 * 282 * Inputs are supposed to be < p (i.e. taken modulo p). 283 * 284 * The function returns 0 on success, -1 on error. 285 */ 286 int nn_mod_mul(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p_in) 287 { 288 nn r_square, p; 289 nn in1_tmp, in2_tmp; 290 word_t mpinv; 291 int ret, isodd; 292 r_square.magic = in1_tmp.magic = in2_tmp.magic = p.magic = WORD(0); 293 294 /* When p_in is even, we cannot work with Montgomery multiplication */ 295 ret = nn_isodd(p_in, &isodd); EG(ret, err); 296 if(!isodd){ 297 /* When p_in is even, we fallback to less efficient mul then mod */ 298 ret = nn_mul(out, in1, in2); EG(ret, err); 299 ret = nn_mod(out, out, p_in); EG(ret, err); 300 } 301 else{ 302 /* Here, p_in is odd and we can use redcification */ 303 ret = nn_copy(&p, p_in); EG(ret, err); 304 305 /* 306 * In order for our reciprocal division routines to work, it is 307 * expected that the bit length (including leading zeroes) of 308 * input prime p is >= 2 * wlen where wlen is the number of bits 309 * of a word size. 310 */ 311 if (p.wlen < 2) { 312 ret = nn_set_wlen(&p, 2); EG(ret, err); 313 } 314 315 /* Compute Mongtomery coefs. 316 * NOTE: in1_tmp holds a dummy value here after the operation. 317 */ 318 ret = nn_compute_redc1_coefs(&in1_tmp, &r_square, &p, &mpinv); EG(ret, err); 319 320 /* redcify in1 and in2 */ 321 ret = nn_mul_redc1(&in1_tmp, in1, &r_square, &p, mpinv); EG(ret, err); 322 ret = nn_mul_redc1(&in2_tmp, in2, &r_square, &p, mpinv); EG(ret, err); 323 324 /* Compute in1 * in2 mod p in montgomery world. 325 * NOTE: r_square holds the result after the operation. 326 */ 327 ret = nn_mul_redc1(&r_square, &in1_tmp, &in2_tmp, &p, mpinv); EG(ret, err); 328 329 /* Come back to real world by unredcifying result */ 330 ret = nn_init(&in1_tmp, 0); EG(ret, err); 331 ret = nn_one(&in1_tmp); EG(ret, err); 332 ret = nn_mul_redc1(out, &r_square, &in1_tmp, &p, mpinv); EG(ret, err); 333 } 334 335 err: 336 nn_uninit(&p); 337 nn_uninit(&r_square); 338 nn_uninit(&in1_tmp); 339 nn_uninit(&in2_tmp); 340 341 return ret; 342 } 343