1 /*
2 * Copyright (C) 2017 - This file is part of libecc project
3 *
4 * Authors:
5 * Ryad BENADJILA <ryadbenadjila@gmail.com>
6 * Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
7 * Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
8 *
9 * Contributors:
10 * Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
11 * Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
12 *
13 * This software is licensed under a dual BSD and GPL v2 license.
14 * See LICENSE file at the root folder of the project.
15 */
16 #include <libecc/nn/nn_mul_redc1.h>
17 #include <libecc/nn/nn_mul_public.h>
18 #include <libecc/nn/nn_add.h>
19 #include <libecc/nn/nn_logical.h>
20 #include <libecc/nn/nn_div_public.h>
21 #include <libecc/nn/nn_modinv.h>
22 #include <libecc/nn/nn.h>
23
24 /*
25 * Given an odd number p, compute Montgomery coefficients r, r_square
26 * as well as mpinv so that:
27 *
28 * - r = 2^p_rounded_bitlen mod (p), where
29 * p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
30 * minimum number of words required to store p)
31 * - r_square = r^2 mod (p)
32 * - mpinv = -p^-1 mod (2^WORDSIZE).
33 *
34 * Aliasing of outputs with the input is possible since p_in is
35 * copied in local p at the beginning of the function.
36 *
37 * The function returns 0 on success, -1 on error. out parameters 'r',
38 * 'r_square' and 'mpinv' are only meaningful on success.
39 */
nn_compute_redc1_coefs(nn_t r,nn_t r_square,nn_src_t p_in,word_t * mpinv)40 int nn_compute_redc1_coefs(nn_t r, nn_t r_square, nn_src_t p_in, word_t *mpinv)
41 {
42 bitcnt_t p_rounded_bitlen;
43 nn p, tmp_nn1, tmp_nn2;
44 word_t _mpinv;
45 int ret, isodd;
46 p.magic = tmp_nn1.magic = tmp_nn2.magic = WORD(0);
47
48 ret = nn_check_initialized(p_in); EG(ret, err);
49 ret = nn_init(&p, 0); EG(ret, err);
50 ret = nn_copy(&p, p_in); EG(ret, err);
51 MUST_HAVE((mpinv != NULL), ret, err);
52
53 /*
54 * In order for our reciprocal division routines to work, it is
55 * expected that the bit length (including leading zeroes) of
56 * input prime p is >= 2 * wlen where wlen is the number of bits
57 * of a word size.
58 */
59 if (p.wlen < 2) {
60 ret = nn_set_wlen(&p, 2); EG(ret, err);
61 }
62
63 ret = nn_init(r, 0); EG(ret, err);
64 ret = nn_init(r_square, 0); EG(ret, err);
65 ret = nn_init(&tmp_nn1, 0); EG(ret, err);
66 ret = nn_init(&tmp_nn2, 0); EG(ret, err);
67
68 /* p_rounded_bitlen = bitlen of p rounded to word size */
69 p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
70
71 /* _mpinv = 2^wlen - (modinv(prime, 2^wlen)) */
72 ret = nn_set_wlen(&tmp_nn1, 2); EG(ret, err);
73 tmp_nn1.val[1] = WORD(1);
74 ret = nn_copy(&tmp_nn2, &tmp_nn1); EG(ret, err);
75 ret = nn_modinv_2exp(&tmp_nn1, &p, WORD_BITS, &isodd); EG(ret, err);
76 ret = nn_sub(&tmp_nn1, &tmp_nn2, &tmp_nn1); EG(ret, err);
77 _mpinv = tmp_nn1.val[0];
78
79 /* r = (0x1 << p_rounded_bitlen) (p) */
80 ret = nn_one(r); EG(ret, err);
81 ret = nn_lshift(r, r, p_rounded_bitlen); EG(ret, err);
82 ret = nn_mod(r, r, &p); EG(ret, err);
83
84 /*
85 * r_square = (0x1 << (2*p_rounded_bitlen)) (p)
86 * We are supposed to handle NN numbers of size at least two times
87 * the biggest prime we use. Thus, we should be able to compute r_square
88 * with a multiplication followed by a reduction. (NB: we cannot use our
89 * Montgomery primitives at this point since we are computing its
90 * constants!)
91 */
92 /* Check we have indeed enough space for our r_square computation */
93 MUST_HAVE(!(NN_MAX_BIT_LEN < (2 * p_rounded_bitlen)), ret, err);
94
95 ret = nn_sqr(r_square, r); EG(ret, err);
96 ret = nn_mod(r_square, r_square, &p); EG(ret, err);
97
98 (*mpinv) = _mpinv;
99
100 err:
101 nn_uninit(&p);
102 nn_uninit(&tmp_nn1);
103 nn_uninit(&tmp_nn2);
104
105 return ret;
106 }
107
108 /*
109 * Perform Montgomery multiplication, that is usual multiplication
110 * followed by reduction modulo p.
111 *
112 * Inputs are supposed to be < p (i.e. taken modulo p).
113 *
114 * This uses the CIOS algorithm from Koc et al.
115 *
116 * The p input is the modulo number of the Montgomery multiplication,
117 * and mpinv is -p^(-1) mod (2^WORDSIZE).
118 *
119 * The function does not check input parameters are initialized. This
120 * MUST be done by the caller.
121 *
122 * The function returns 0 on success, -1 on error.
123 */
_nn_mul_redc1(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)124 ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
125 word_t mpinv)
126 {
127 word_t prod_high, prod_low, carry, acc, m;
128 unsigned int i, j, len, len_mul;
129 /* a and b inputs such that len(b) <= len(a) */
130 nn_src_t a, b;
131 int ret, cmp;
132 u8 old_wlen;
133
134 /*
135 * These comparisons are input hypothesis and does not "break"
136 * the following computation. However performance loss exists
137 * when this check is always done, this is why we use our
138 * SHOULD_HAVE primitive.
139 */
140 SHOULD_HAVE((!nn_cmp(in1, p, &cmp)) && (cmp < 0), ret, err);
141 SHOULD_HAVE((!nn_cmp(in2, p, &cmp)) && (cmp < 0), ret, err);
142
143 ret = nn_init(out, 0); EG(ret, err);
144
145 /* Check which one of in1 or in2 is the biggest */
146 a = (in1->wlen <= in2->wlen) ? in2 : in1;
147 b = (in1->wlen <= in2->wlen) ? in1 : in2;
148
149 /*
150 * The inputs might have been reduced due to trimming
151 * because of leading zeroes. It is important for our
152 * Montgomery algorithm to work on sizes consistent with
153 * out prime p real bit size. Thus, we expand the output
154 * size to the size of p.
155 */
156 ret = nn_set_wlen(out, p->wlen); EG(ret, err);
157
158 len = out->wlen;
159 len_mul = b->wlen;
160 /*
161 * We extend out to store carries. We first check that we
162 * do not have an overflow on the NN size.
163 */
164 MUST_HAVE(((WORD_BITS * (out->wlen + 1)) <= NN_MAX_BIT_LEN), ret, err);
165 old_wlen = out->wlen;
166 out->wlen = (u8)(out->wlen + 1);
167
168 /*
169 * This can be skipped if the first iteration of the for loop
170 * is separated.
171 */
172 for (i = 0; i < out->wlen; i++) {
173 out->val[i] = 0;
174 }
175 for (i = 0; i < len; i++) {
176 carry = WORD(0);
177 for (j = 0; j < len_mul; j++) {
178 WORD_MUL(prod_high, prod_low, a->val[i], b->val[j]);
179 prod_low = (word_t)(prod_low + carry);
180 prod_high = (word_t)(prod_high + (prod_low < carry));
181 out->val[j] = (word_t)(out->val[j] + prod_low);
182 carry = (word_t)(prod_high + (out->val[j] < prod_low));
183 }
184 for (; j < len; j++) {
185 out->val[j] = (word_t)(out->val[j] + carry);
186 carry = (word_t)(out->val[j] < carry);
187 }
188 out->val[j] = (word_t)(out->val[j] + carry);
189 acc = (word_t)(out->val[j] < carry);
190
191 m = (word_t)(out->val[0] * mpinv);
192 WORD_MUL(prod_high, prod_low, m, p->val[0]);
193 prod_low = (word_t)(prod_low + out->val[0]);
194 carry = (word_t)(prod_high + (prod_low < out->val[0]));
195 for (j = 1; j < len; j++) {
196 WORD_MUL(prod_high, prod_low, m, p->val[j]);
197 prod_low = (word_t)(prod_low + carry);
198 prod_high = (word_t)(prod_high + (prod_low < carry));
199 out->val[j - 1] = (word_t)(prod_low + out->val[j]);
200 carry = (word_t)(prod_high + (out->val[j - 1] < prod_low));
201 }
202 out->val[j - 1] = (word_t)(carry + out->val[j]);
203 carry = (word_t)(out->val[j - 1] < out->val[j]);
204 out->val[j] = (word_t)(acc + carry);
205 }
206 /*
207 * Note that at this stage the msw of out is either 0 or 1.
208 * If out > p we need to subtract p from out.
209 */
210 ret = nn_cmp(out, p, &cmp); EG(ret, err);
211 ret = nn_cnd_sub(cmp >= 0, out, out, p); EG(ret, err);
212 MUST_HAVE((!nn_cmp(out, p, &cmp)) && (cmp < 0), ret, err);
213 /* We restore out wlen. */
214 out->wlen = old_wlen;
215
216 err:
217 return ret;
218 }
219
220 /*
221 * Wrapper for previous function handling aliasing of one of the input
222 * paramter with out, through a copy. The function does not check
223 * input parameters are initialized. This MUST be done by the caller.
224 */
_nn_mul_redc1_aliased(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)225 ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1_aliased(nn_t out, nn_src_t in1, nn_src_t in2,
226 nn_src_t p, word_t mpinv)
227 {
228 nn out_cpy;
229 int ret;
230 out_cpy.magic = WORD(0);
231
232 ret = _nn_mul_redc1(&out_cpy, in1, in2, p, mpinv); EG(ret, err);
233 ret = nn_init(out, out_cpy.wlen); EG(ret, err);
234 ret = nn_copy(out, &out_cpy);
235
236 err:
237 nn_uninit(&out_cpy);
238
239 return ret;
240 }
241
242 /*
243 * Public version, handling possible aliasing of out parameter with
244 * one of the input parameters.
245 */
nn_mul_redc1(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)246 int nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
247 word_t mpinv)
248 {
249 int ret;
250
251 ret = nn_check_initialized(in1); EG(ret, err);
252 ret = nn_check_initialized(in2); EG(ret, err);
253 ret = nn_check_initialized(p); EG(ret, err);
254
255 /* Handle possible output aliasing */
256 if ((out == in1) || (out == in2) || (out == p)) {
257 ret = _nn_mul_redc1_aliased(out, in1, in2, p, mpinv);
258 } else {
259 ret = _nn_mul_redc1(out, in1, in2, p, mpinv);
260 }
261
262 err:
263 return ret;
264 }
265
266 /*
267 * Compute in1 * in2 mod p where in1 and in2 are numbers < p.
268 * When p is an odd number, the function redcifies in1 and in2
269 * parameters, does the computation and then unredcifies the
270 * result. When p is an even number, we use an unoptimized mul
271 * then mod operations sequence.
272 *
273 * From a mathematical standpoint, the computation is equivalent
274 * to performing:
275 *
276 * nn_mul(&tmp2, in1, in2);
277 * nn_mod(&out, &tmp2, q);
278 *
279 * but the modular reduction is done progressively during
280 * Montgomery reduction when p is odd (which brings more efficiency).
281 *
282 * Inputs are supposed to be < p (i.e. taken modulo p).
283 *
284 * The function returns 0 on success, -1 on error.
285 */
nn_mod_mul(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p_in)286 int nn_mod_mul(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p_in)
287 {
288 nn r_square, p;
289 nn in1_tmp, in2_tmp;
290 word_t mpinv;
291 int ret, isodd;
292 r_square.magic = in1_tmp.magic = in2_tmp.magic = p.magic = WORD(0);
293
294 /* When p_in is even, we cannot work with Montgomery multiplication */
295 ret = nn_isodd(p_in, &isodd); EG(ret, err);
296 if(!isodd){
297 /* When p_in is even, we fallback to less efficient mul then mod */
298 ret = nn_mul(out, in1, in2); EG(ret, err);
299 ret = nn_mod(out, out, p_in); EG(ret, err);
300 }
301 else{
302 /* Here, p_in is odd and we can use redcification */
303 ret = nn_copy(&p, p_in); EG(ret, err);
304
305 /*
306 * In order for our reciprocal division routines to work, it is
307 * expected that the bit length (including leading zeroes) of
308 * input prime p is >= 2 * wlen where wlen is the number of bits
309 * of a word size.
310 */
311 if (p.wlen < 2) {
312 ret = nn_set_wlen(&p, 2); EG(ret, err);
313 }
314
315 /* Compute Mongtomery coefs.
316 * NOTE: in1_tmp holds a dummy value here after the operation.
317 */
318 ret = nn_compute_redc1_coefs(&in1_tmp, &r_square, &p, &mpinv); EG(ret, err);
319
320 /* redcify in1 and in2 */
321 ret = nn_mul_redc1(&in1_tmp, in1, &r_square, &p, mpinv); EG(ret, err);
322 ret = nn_mul_redc1(&in2_tmp, in2, &r_square, &p, mpinv); EG(ret, err);
323
324 /* Compute in1 * in2 mod p in montgomery world.
325 * NOTE: r_square holds the result after the operation.
326 */
327 ret = nn_mul_redc1(&r_square, &in1_tmp, &in2_tmp, &p, mpinv); EG(ret, err);
328
329 /* Come back to real world by unredcifying result */
330 ret = nn_init(&in1_tmp, 0); EG(ret, err);
331 ret = nn_one(&in1_tmp); EG(ret, err);
332 ret = nn_mul_redc1(out, &r_square, &in1_tmp, &p, mpinv); EG(ret, err);
333 }
334
335 err:
336 nn_uninit(&p);
337 nn_uninit(&r_square);
338 nn_uninit(&in1_tmp);
339 nn_uninit(&in2_tmp);
340
341 return ret;
342 }
343