1*f0865ec9SKyle Evans /*
2*f0865ec9SKyle Evans * Copyright (C) 2017 - This file is part of libecc project
3*f0865ec9SKyle Evans *
4*f0865ec9SKyle Evans * Authors:
5*f0865ec9SKyle Evans * Ryad BENADJILA <ryadbenadjila@gmail.com>
6*f0865ec9SKyle Evans * Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
7*f0865ec9SKyle Evans * Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
8*f0865ec9SKyle Evans *
9*f0865ec9SKyle Evans * Contributors:
10*f0865ec9SKyle Evans * Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
11*f0865ec9SKyle Evans * Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
12*f0865ec9SKyle Evans *
13*f0865ec9SKyle Evans * This software is licensed under a dual BSD and GPL v2 license.
14*f0865ec9SKyle Evans * See LICENSE file at the root folder of the project.
15*f0865ec9SKyle Evans */
16*f0865ec9SKyle Evans #include <libecc/nn/nn_mul_redc1.h>
17*f0865ec9SKyle Evans #include <libecc/nn/nn_mul_public.h>
18*f0865ec9SKyle Evans #include <libecc/nn/nn_add.h>
19*f0865ec9SKyle Evans #include <libecc/nn/nn_logical.h>
20*f0865ec9SKyle Evans #include <libecc/nn/nn_div_public.h>
21*f0865ec9SKyle Evans #include <libecc/nn/nn_modinv.h>
22*f0865ec9SKyle Evans #include <libecc/nn/nn.h>
23*f0865ec9SKyle Evans
24*f0865ec9SKyle Evans /*
25*f0865ec9SKyle Evans * Given an odd number p, compute Montgomery coefficients r, r_square
26*f0865ec9SKyle Evans * as well as mpinv so that:
27*f0865ec9SKyle Evans *
28*f0865ec9SKyle Evans * - r = 2^p_rounded_bitlen mod (p), where
29*f0865ec9SKyle Evans * p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
30*f0865ec9SKyle Evans * minimum number of words required to store p)
31*f0865ec9SKyle Evans * - r_square = r^2 mod (p)
32*f0865ec9SKyle Evans * - mpinv = -p^-1 mod (2^WORDSIZE).
33*f0865ec9SKyle Evans *
34*f0865ec9SKyle Evans * Aliasing of outputs with the input is possible since p_in is
35*f0865ec9SKyle Evans * copied in local p at the beginning of the function.
36*f0865ec9SKyle Evans *
37*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error. out parameters 'r',
38*f0865ec9SKyle Evans * 'r_square' and 'mpinv' are only meaningful on success.
39*f0865ec9SKyle Evans */
nn_compute_redc1_coefs(nn_t r,nn_t r_square,nn_src_t p_in,word_t * mpinv)40*f0865ec9SKyle Evans int nn_compute_redc1_coefs(nn_t r, nn_t r_square, nn_src_t p_in, word_t *mpinv)
41*f0865ec9SKyle Evans {
42*f0865ec9SKyle Evans bitcnt_t p_rounded_bitlen;
43*f0865ec9SKyle Evans nn p, tmp_nn1, tmp_nn2;
44*f0865ec9SKyle Evans word_t _mpinv;
45*f0865ec9SKyle Evans int ret, isodd;
46*f0865ec9SKyle Evans p.magic = tmp_nn1.magic = tmp_nn2.magic = WORD(0);
47*f0865ec9SKyle Evans
48*f0865ec9SKyle Evans ret = nn_check_initialized(p_in); EG(ret, err);
49*f0865ec9SKyle Evans ret = nn_init(&p, 0); EG(ret, err);
50*f0865ec9SKyle Evans ret = nn_copy(&p, p_in); EG(ret, err);
51*f0865ec9SKyle Evans MUST_HAVE((mpinv != NULL), ret, err);
52*f0865ec9SKyle Evans
53*f0865ec9SKyle Evans /*
54*f0865ec9SKyle Evans * In order for our reciprocal division routines to work, it is
55*f0865ec9SKyle Evans * expected that the bit length (including leading zeroes) of
56*f0865ec9SKyle Evans * input prime p is >= 2 * wlen where wlen is the number of bits
57*f0865ec9SKyle Evans * of a word size.
58*f0865ec9SKyle Evans */
59*f0865ec9SKyle Evans if (p.wlen < 2) {
60*f0865ec9SKyle Evans ret = nn_set_wlen(&p, 2); EG(ret, err);
61*f0865ec9SKyle Evans }
62*f0865ec9SKyle Evans
63*f0865ec9SKyle Evans ret = nn_init(r, 0); EG(ret, err);
64*f0865ec9SKyle Evans ret = nn_init(r_square, 0); EG(ret, err);
65*f0865ec9SKyle Evans ret = nn_init(&tmp_nn1, 0); EG(ret, err);
66*f0865ec9SKyle Evans ret = nn_init(&tmp_nn2, 0); EG(ret, err);
67*f0865ec9SKyle Evans
68*f0865ec9SKyle Evans /* p_rounded_bitlen = bitlen of p rounded to word size */
69*f0865ec9SKyle Evans p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
70*f0865ec9SKyle Evans
71*f0865ec9SKyle Evans /* _mpinv = 2^wlen - (modinv(prime, 2^wlen)) */
72*f0865ec9SKyle Evans ret = nn_set_wlen(&tmp_nn1, 2); EG(ret, err);
73*f0865ec9SKyle Evans tmp_nn1.val[1] = WORD(1);
74*f0865ec9SKyle Evans ret = nn_copy(&tmp_nn2, &tmp_nn1); EG(ret, err);
75*f0865ec9SKyle Evans ret = nn_modinv_2exp(&tmp_nn1, &p, WORD_BITS, &isodd); EG(ret, err);
76*f0865ec9SKyle Evans ret = nn_sub(&tmp_nn1, &tmp_nn2, &tmp_nn1); EG(ret, err);
77*f0865ec9SKyle Evans _mpinv = tmp_nn1.val[0];
78*f0865ec9SKyle Evans
79*f0865ec9SKyle Evans /* r = (0x1 << p_rounded_bitlen) (p) */
80*f0865ec9SKyle Evans ret = nn_one(r); EG(ret, err);
81*f0865ec9SKyle Evans ret = nn_lshift(r, r, p_rounded_bitlen); EG(ret, err);
82*f0865ec9SKyle Evans ret = nn_mod(r, r, &p); EG(ret, err);
83*f0865ec9SKyle Evans
84*f0865ec9SKyle Evans /*
85*f0865ec9SKyle Evans * r_square = (0x1 << (2*p_rounded_bitlen)) (p)
86*f0865ec9SKyle Evans * We are supposed to handle NN numbers of size at least two times
87*f0865ec9SKyle Evans * the biggest prime we use. Thus, we should be able to compute r_square
88*f0865ec9SKyle Evans * with a multiplication followed by a reduction. (NB: we cannot use our
89*f0865ec9SKyle Evans * Montgomery primitives at this point since we are computing its
90*f0865ec9SKyle Evans * constants!)
91*f0865ec9SKyle Evans */
92*f0865ec9SKyle Evans /* Check we have indeed enough space for our r_square computation */
93*f0865ec9SKyle Evans MUST_HAVE(!(NN_MAX_BIT_LEN < (2 * p_rounded_bitlen)), ret, err);
94*f0865ec9SKyle Evans
95*f0865ec9SKyle Evans ret = nn_sqr(r_square, r); EG(ret, err);
96*f0865ec9SKyle Evans ret = nn_mod(r_square, r_square, &p); EG(ret, err);
97*f0865ec9SKyle Evans
98*f0865ec9SKyle Evans (*mpinv) = _mpinv;
99*f0865ec9SKyle Evans
100*f0865ec9SKyle Evans err:
101*f0865ec9SKyle Evans nn_uninit(&p);
102*f0865ec9SKyle Evans nn_uninit(&tmp_nn1);
103*f0865ec9SKyle Evans nn_uninit(&tmp_nn2);
104*f0865ec9SKyle Evans
105*f0865ec9SKyle Evans return ret;
106*f0865ec9SKyle Evans }
107*f0865ec9SKyle Evans
108*f0865ec9SKyle Evans /*
109*f0865ec9SKyle Evans * Perform Montgomery multiplication, that is usual multiplication
110*f0865ec9SKyle Evans * followed by reduction modulo p.
111*f0865ec9SKyle Evans *
112*f0865ec9SKyle Evans * Inputs are supposed to be < p (i.e. taken modulo p).
113*f0865ec9SKyle Evans *
114*f0865ec9SKyle Evans * This uses the CIOS algorithm from Koc et al.
115*f0865ec9SKyle Evans *
116*f0865ec9SKyle Evans * The p input is the modulo number of the Montgomery multiplication,
117*f0865ec9SKyle Evans * and mpinv is -p^(-1) mod (2^WORDSIZE).
118*f0865ec9SKyle Evans *
119*f0865ec9SKyle Evans * The function does not check input parameters are initialized. This
120*f0865ec9SKyle Evans * MUST be done by the caller.
121*f0865ec9SKyle Evans *
122*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error.
123*f0865ec9SKyle Evans */
_nn_mul_redc1(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)124*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
125*f0865ec9SKyle Evans word_t mpinv)
126*f0865ec9SKyle Evans {
127*f0865ec9SKyle Evans word_t prod_high, prod_low, carry, acc, m;
128*f0865ec9SKyle Evans unsigned int i, j, len, len_mul;
129*f0865ec9SKyle Evans /* a and b inputs such that len(b) <= len(a) */
130*f0865ec9SKyle Evans nn_src_t a, b;
131*f0865ec9SKyle Evans int ret, cmp;
132*f0865ec9SKyle Evans u8 old_wlen;
133*f0865ec9SKyle Evans
134*f0865ec9SKyle Evans /*
135*f0865ec9SKyle Evans * These comparisons are input hypothesis and does not "break"
136*f0865ec9SKyle Evans * the following computation. However performance loss exists
137*f0865ec9SKyle Evans * when this check is always done, this is why we use our
138*f0865ec9SKyle Evans * SHOULD_HAVE primitive.
139*f0865ec9SKyle Evans */
140*f0865ec9SKyle Evans SHOULD_HAVE((!nn_cmp(in1, p, &cmp)) && (cmp < 0), ret, err);
141*f0865ec9SKyle Evans SHOULD_HAVE((!nn_cmp(in2, p, &cmp)) && (cmp < 0), ret, err);
142*f0865ec9SKyle Evans
143*f0865ec9SKyle Evans ret = nn_init(out, 0); EG(ret, err);
144*f0865ec9SKyle Evans
145*f0865ec9SKyle Evans /* Check which one of in1 or in2 is the biggest */
146*f0865ec9SKyle Evans a = (in1->wlen <= in2->wlen) ? in2 : in1;
147*f0865ec9SKyle Evans b = (in1->wlen <= in2->wlen) ? in1 : in2;
148*f0865ec9SKyle Evans
149*f0865ec9SKyle Evans /*
150*f0865ec9SKyle Evans * The inputs might have been reduced due to trimming
151*f0865ec9SKyle Evans * because of leading zeroes. It is important for our
152*f0865ec9SKyle Evans * Montgomery algorithm to work on sizes consistent with
153*f0865ec9SKyle Evans * out prime p real bit size. Thus, we expand the output
154*f0865ec9SKyle Evans * size to the size of p.
155*f0865ec9SKyle Evans */
156*f0865ec9SKyle Evans ret = nn_set_wlen(out, p->wlen); EG(ret, err);
157*f0865ec9SKyle Evans
158*f0865ec9SKyle Evans len = out->wlen;
159*f0865ec9SKyle Evans len_mul = b->wlen;
160*f0865ec9SKyle Evans /*
161*f0865ec9SKyle Evans * We extend out to store carries. We first check that we
162*f0865ec9SKyle Evans * do not have an overflow on the NN size.
163*f0865ec9SKyle Evans */
164*f0865ec9SKyle Evans MUST_HAVE(((WORD_BITS * (out->wlen + 1)) <= NN_MAX_BIT_LEN), ret, err);
165*f0865ec9SKyle Evans old_wlen = out->wlen;
166*f0865ec9SKyle Evans out->wlen = (u8)(out->wlen + 1);
167*f0865ec9SKyle Evans
168*f0865ec9SKyle Evans /*
169*f0865ec9SKyle Evans * This can be skipped if the first iteration of the for loop
170*f0865ec9SKyle Evans * is separated.
171*f0865ec9SKyle Evans */
172*f0865ec9SKyle Evans for (i = 0; i < out->wlen; i++) {
173*f0865ec9SKyle Evans out->val[i] = 0;
174*f0865ec9SKyle Evans }
175*f0865ec9SKyle Evans for (i = 0; i < len; i++) {
176*f0865ec9SKyle Evans carry = WORD(0);
177*f0865ec9SKyle Evans for (j = 0; j < len_mul; j++) {
178*f0865ec9SKyle Evans WORD_MUL(prod_high, prod_low, a->val[i], b->val[j]);
179*f0865ec9SKyle Evans prod_low = (word_t)(prod_low + carry);
180*f0865ec9SKyle Evans prod_high = (word_t)(prod_high + (prod_low < carry));
181*f0865ec9SKyle Evans out->val[j] = (word_t)(out->val[j] + prod_low);
182*f0865ec9SKyle Evans carry = (word_t)(prod_high + (out->val[j] < prod_low));
183*f0865ec9SKyle Evans }
184*f0865ec9SKyle Evans for (; j < len; j++) {
185*f0865ec9SKyle Evans out->val[j] = (word_t)(out->val[j] + carry);
186*f0865ec9SKyle Evans carry = (word_t)(out->val[j] < carry);
187*f0865ec9SKyle Evans }
188*f0865ec9SKyle Evans out->val[j] = (word_t)(out->val[j] + carry);
189*f0865ec9SKyle Evans acc = (word_t)(out->val[j] < carry);
190*f0865ec9SKyle Evans
191*f0865ec9SKyle Evans m = (word_t)(out->val[0] * mpinv);
192*f0865ec9SKyle Evans WORD_MUL(prod_high, prod_low, m, p->val[0]);
193*f0865ec9SKyle Evans prod_low = (word_t)(prod_low + out->val[0]);
194*f0865ec9SKyle Evans carry = (word_t)(prod_high + (prod_low < out->val[0]));
195*f0865ec9SKyle Evans for (j = 1; j < len; j++) {
196*f0865ec9SKyle Evans WORD_MUL(prod_high, prod_low, m, p->val[j]);
197*f0865ec9SKyle Evans prod_low = (word_t)(prod_low + carry);
198*f0865ec9SKyle Evans prod_high = (word_t)(prod_high + (prod_low < carry));
199*f0865ec9SKyle Evans out->val[j - 1] = (word_t)(prod_low + out->val[j]);
200*f0865ec9SKyle Evans carry = (word_t)(prod_high + (out->val[j - 1] < prod_low));
201*f0865ec9SKyle Evans }
202*f0865ec9SKyle Evans out->val[j - 1] = (word_t)(carry + out->val[j]);
203*f0865ec9SKyle Evans carry = (word_t)(out->val[j - 1] < out->val[j]);
204*f0865ec9SKyle Evans out->val[j] = (word_t)(acc + carry);
205*f0865ec9SKyle Evans }
206*f0865ec9SKyle Evans /*
207*f0865ec9SKyle Evans * Note that at this stage the msw of out is either 0 or 1.
208*f0865ec9SKyle Evans * If out > p we need to subtract p from out.
209*f0865ec9SKyle Evans */
210*f0865ec9SKyle Evans ret = nn_cmp(out, p, &cmp); EG(ret, err);
211*f0865ec9SKyle Evans ret = nn_cnd_sub(cmp >= 0, out, out, p); EG(ret, err);
212*f0865ec9SKyle Evans MUST_HAVE((!nn_cmp(out, p, &cmp)) && (cmp < 0), ret, err);
213*f0865ec9SKyle Evans /* We restore out wlen. */
214*f0865ec9SKyle Evans out->wlen = old_wlen;
215*f0865ec9SKyle Evans
216*f0865ec9SKyle Evans err:
217*f0865ec9SKyle Evans return ret;
218*f0865ec9SKyle Evans }
219*f0865ec9SKyle Evans
220*f0865ec9SKyle Evans /*
221*f0865ec9SKyle Evans * Wrapper for previous function handling aliasing of one of the input
222*f0865ec9SKyle Evans * paramter with out, through a copy. The function does not check
223*f0865ec9SKyle Evans * input parameters are initialized. This MUST be done by the caller.
224*f0865ec9SKyle Evans */
_nn_mul_redc1_aliased(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)225*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_mul_redc1_aliased(nn_t out, nn_src_t in1, nn_src_t in2,
226*f0865ec9SKyle Evans nn_src_t p, word_t mpinv)
227*f0865ec9SKyle Evans {
228*f0865ec9SKyle Evans nn out_cpy;
229*f0865ec9SKyle Evans int ret;
230*f0865ec9SKyle Evans out_cpy.magic = WORD(0);
231*f0865ec9SKyle Evans
232*f0865ec9SKyle Evans ret = _nn_mul_redc1(&out_cpy, in1, in2, p, mpinv); EG(ret, err);
233*f0865ec9SKyle Evans ret = nn_init(out, out_cpy.wlen); EG(ret, err);
234*f0865ec9SKyle Evans ret = nn_copy(out, &out_cpy);
235*f0865ec9SKyle Evans
236*f0865ec9SKyle Evans err:
237*f0865ec9SKyle Evans nn_uninit(&out_cpy);
238*f0865ec9SKyle Evans
239*f0865ec9SKyle Evans return ret;
240*f0865ec9SKyle Evans }
241*f0865ec9SKyle Evans
242*f0865ec9SKyle Evans /*
243*f0865ec9SKyle Evans * Public version, handling possible aliasing of out parameter with
244*f0865ec9SKyle Evans * one of the input parameters.
245*f0865ec9SKyle Evans */
nn_mul_redc1(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p,word_t mpinv)246*f0865ec9SKyle Evans int nn_mul_redc1(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p,
247*f0865ec9SKyle Evans word_t mpinv)
248*f0865ec9SKyle Evans {
249*f0865ec9SKyle Evans int ret;
250*f0865ec9SKyle Evans
251*f0865ec9SKyle Evans ret = nn_check_initialized(in1); EG(ret, err);
252*f0865ec9SKyle Evans ret = nn_check_initialized(in2); EG(ret, err);
253*f0865ec9SKyle Evans ret = nn_check_initialized(p); EG(ret, err);
254*f0865ec9SKyle Evans
255*f0865ec9SKyle Evans /* Handle possible output aliasing */
256*f0865ec9SKyle Evans if ((out == in1) || (out == in2) || (out == p)) {
257*f0865ec9SKyle Evans ret = _nn_mul_redc1_aliased(out, in1, in2, p, mpinv);
258*f0865ec9SKyle Evans } else {
259*f0865ec9SKyle Evans ret = _nn_mul_redc1(out, in1, in2, p, mpinv);
260*f0865ec9SKyle Evans }
261*f0865ec9SKyle Evans
262*f0865ec9SKyle Evans err:
263*f0865ec9SKyle Evans return ret;
264*f0865ec9SKyle Evans }
265*f0865ec9SKyle Evans
266*f0865ec9SKyle Evans /*
267*f0865ec9SKyle Evans * Compute in1 * in2 mod p where in1 and in2 are numbers < p.
268*f0865ec9SKyle Evans * When p is an odd number, the function redcifies in1 and in2
269*f0865ec9SKyle Evans * parameters, does the computation and then unredcifies the
270*f0865ec9SKyle Evans * result. When p is an even number, we use an unoptimized mul
271*f0865ec9SKyle Evans * then mod operations sequence.
272*f0865ec9SKyle Evans *
273*f0865ec9SKyle Evans * From a mathematical standpoint, the computation is equivalent
274*f0865ec9SKyle Evans * to performing:
275*f0865ec9SKyle Evans *
276*f0865ec9SKyle Evans * nn_mul(&tmp2, in1, in2);
277*f0865ec9SKyle Evans * nn_mod(&out, &tmp2, q);
278*f0865ec9SKyle Evans *
279*f0865ec9SKyle Evans * but the modular reduction is done progressively during
280*f0865ec9SKyle Evans * Montgomery reduction when p is odd (which brings more efficiency).
281*f0865ec9SKyle Evans *
282*f0865ec9SKyle Evans * Inputs are supposed to be < p (i.e. taken modulo p).
283*f0865ec9SKyle Evans *
284*f0865ec9SKyle Evans * The function returns 0 on success, -1 on error.
285*f0865ec9SKyle Evans */
nn_mod_mul(nn_t out,nn_src_t in1,nn_src_t in2,nn_src_t p_in)286*f0865ec9SKyle Evans int nn_mod_mul(nn_t out, nn_src_t in1, nn_src_t in2, nn_src_t p_in)
287*f0865ec9SKyle Evans {
288*f0865ec9SKyle Evans nn r_square, p;
289*f0865ec9SKyle Evans nn in1_tmp, in2_tmp;
290*f0865ec9SKyle Evans word_t mpinv;
291*f0865ec9SKyle Evans int ret, isodd;
292*f0865ec9SKyle Evans r_square.magic = in1_tmp.magic = in2_tmp.magic = p.magic = WORD(0);
293*f0865ec9SKyle Evans
294*f0865ec9SKyle Evans /* When p_in is even, we cannot work with Montgomery multiplication */
295*f0865ec9SKyle Evans ret = nn_isodd(p_in, &isodd); EG(ret, err);
296*f0865ec9SKyle Evans if(!isodd){
297*f0865ec9SKyle Evans /* When p_in is even, we fallback to less efficient mul then mod */
298*f0865ec9SKyle Evans ret = nn_mul(out, in1, in2); EG(ret, err);
299*f0865ec9SKyle Evans ret = nn_mod(out, out, p_in); EG(ret, err);
300*f0865ec9SKyle Evans }
301*f0865ec9SKyle Evans else{
302*f0865ec9SKyle Evans /* Here, p_in is odd and we can use redcification */
303*f0865ec9SKyle Evans ret = nn_copy(&p, p_in); EG(ret, err);
304*f0865ec9SKyle Evans
305*f0865ec9SKyle Evans /*
306*f0865ec9SKyle Evans * In order for our reciprocal division routines to work, it is
307*f0865ec9SKyle Evans * expected that the bit length (including leading zeroes) of
308*f0865ec9SKyle Evans * input prime p is >= 2 * wlen where wlen is the number of bits
309*f0865ec9SKyle Evans * of a word size.
310*f0865ec9SKyle Evans */
311*f0865ec9SKyle Evans if (p.wlen < 2) {
312*f0865ec9SKyle Evans ret = nn_set_wlen(&p, 2); EG(ret, err);
313*f0865ec9SKyle Evans }
314*f0865ec9SKyle Evans
315*f0865ec9SKyle Evans /* Compute Mongtomery coefs.
316*f0865ec9SKyle Evans * NOTE: in1_tmp holds a dummy value here after the operation.
317*f0865ec9SKyle Evans */
318*f0865ec9SKyle Evans ret = nn_compute_redc1_coefs(&in1_tmp, &r_square, &p, &mpinv); EG(ret, err);
319*f0865ec9SKyle Evans
320*f0865ec9SKyle Evans /* redcify in1 and in2 */
321*f0865ec9SKyle Evans ret = nn_mul_redc1(&in1_tmp, in1, &r_square, &p, mpinv); EG(ret, err);
322*f0865ec9SKyle Evans ret = nn_mul_redc1(&in2_tmp, in2, &r_square, &p, mpinv); EG(ret, err);
323*f0865ec9SKyle Evans
324*f0865ec9SKyle Evans /* Compute in1 * in2 mod p in montgomery world.
325*f0865ec9SKyle Evans * NOTE: r_square holds the result after the operation.
326*f0865ec9SKyle Evans */
327*f0865ec9SKyle Evans ret = nn_mul_redc1(&r_square, &in1_tmp, &in2_tmp, &p, mpinv); EG(ret, err);
328*f0865ec9SKyle Evans
329*f0865ec9SKyle Evans /* Come back to real world by unredcifying result */
330*f0865ec9SKyle Evans ret = nn_init(&in1_tmp, 0); EG(ret, err);
331*f0865ec9SKyle Evans ret = nn_one(&in1_tmp); EG(ret, err);
332*f0865ec9SKyle Evans ret = nn_mul_redc1(out, &r_square, &in1_tmp, &p, mpinv); EG(ret, err);
333*f0865ec9SKyle Evans }
334*f0865ec9SKyle Evans
335*f0865ec9SKyle Evans err:
336*f0865ec9SKyle Evans nn_uninit(&p);
337*f0865ec9SKyle Evans nn_uninit(&r_square);
338*f0865ec9SKyle Evans nn_uninit(&in1_tmp);
339*f0865ec9SKyle Evans nn_uninit(&in2_tmp);
340*f0865ec9SKyle Evans
341*f0865ec9SKyle Evans return ret;
342*f0865ec9SKyle Evans }
343