xref: /freebsd/crypto/libecc/src/nn/nn_div.c (revision f0865ec9906d5a18fa2a3b61381f22ce16e606ad)
1*f0865ec9SKyle Evans /*
2*f0865ec9SKyle Evans  *  Copyright (C) 2017 - This file is part of libecc project
3*f0865ec9SKyle Evans  *
4*f0865ec9SKyle Evans  *  Authors:
5*f0865ec9SKyle Evans  *      Ryad BENADJILA <ryadbenadjila@gmail.com>
6*f0865ec9SKyle Evans  *      Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
7*f0865ec9SKyle Evans  *      Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
8*f0865ec9SKyle Evans  *
9*f0865ec9SKyle Evans  *  Contributors:
10*f0865ec9SKyle Evans  *      Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
11*f0865ec9SKyle Evans  *      Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
12*f0865ec9SKyle Evans  *
13*f0865ec9SKyle Evans  *  This software is licensed under a dual BSD and GPL v2 license.
14*f0865ec9SKyle Evans  *  See LICENSE file at the root folder of the project.
15*f0865ec9SKyle Evans  */
16*f0865ec9SKyle Evans #include <libecc/nn/nn_mul_public.h>
17*f0865ec9SKyle Evans #include <libecc/nn/nn_logical.h>
18*f0865ec9SKyle Evans #include <libecc/nn/nn_add.h>
19*f0865ec9SKyle Evans #include <libecc/nn/nn.h>
20*f0865ec9SKyle Evans /* Use internal API header */
21*f0865ec9SKyle Evans #include "nn_div.h"
22*f0865ec9SKyle Evans 
23*f0865ec9SKyle Evans /*
24*f0865ec9SKyle Evans  * Some helper functions to perform operations on an arbitrary part
25*f0865ec9SKyle Evans  * of a multiprecision number.
26*f0865ec9SKyle Evans  * This is exactly the same code as for operations on the least significant
27*f0865ec9SKyle Evans  * part of a multiprecision number except for the starting point in the
28*f0865ec9SKyle Evans  * array representing it.
29*f0865ec9SKyle Evans  * Done in *constant time*.
30*f0865ec9SKyle Evans  *
31*f0865ec9SKyle Evans  * Operations producing an output are in place.
32*f0865ec9SKyle Evans  */
33*f0865ec9SKyle Evans 
34*f0865ec9SKyle Evans /*
35*f0865ec9SKyle Evans  * Compare all the bits of in2 with the same number of bits in in1 starting at
36*f0865ec9SKyle Evans  * 'shift' position in in1. in1 must be long enough for that purpose, i.e.
37*f0865ec9SKyle Evans  * in1->wlen >= (in2->wlen + shift). The comparison value is provided in
38*f0865ec9SKyle Evans  * 'cmp' parameter. The function returns 0 on success, -1 on error.
39*f0865ec9SKyle Evans  *
40*f0865ec9SKyle Evans  * The function is an internal helper; it expects initialized nn in1 and
41*f0865ec9SKyle Evans  * in2: it does not verify that.
42*f0865ec9SKyle Evans  */
_nn_cmp_shift(nn_src_t in1,nn_src_t in2,u8 shift,int * cmp)43*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp)
44*f0865ec9SKyle Evans {
45*f0865ec9SKyle Evans 	int ret, mask, tmp;
46*f0865ec9SKyle Evans 	u8 i;
47*f0865ec9SKyle Evans 
48*f0865ec9SKyle Evans 	MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err);
49*f0865ec9SKyle Evans 	MUST_HAVE((cmp != NULL), ret, err);
50*f0865ec9SKyle Evans 
51*f0865ec9SKyle Evans 	tmp = 0;
52*f0865ec9SKyle Evans 	for (i = in2->wlen; i > 0; i--) {
53*f0865ec9SKyle Evans 		mask = (!(tmp & 0x1));
54*f0865ec9SKyle Evans 		tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask);
55*f0865ec9SKyle Evans 		tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask);
56*f0865ec9SKyle Evans 	}
57*f0865ec9SKyle Evans 	(*cmp) = tmp;
58*f0865ec9SKyle Evans 	ret = 0;
59*f0865ec9SKyle Evans 
60*f0865ec9SKyle Evans err:
61*f0865ec9SKyle Evans 	return ret;
62*f0865ec9SKyle Evans }
63*f0865ec9SKyle Evans 
64*f0865ec9SKyle Evans /*
65*f0865ec9SKyle Evans  * Conditionally subtract a shifted version of in from out, i.e.:
66*f0865ec9SKyle Evans  *   - if cnd == 1, out <- out - (in << shift)
67*f0865ec9SKyle Evans  *   - if cnd == 0, out <- out
68*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error. On success, 'borrow'
69*f0865ec9SKyle Evans  * provides the possible borrow resulting from the subtraction. 'borrow'
70*f0865ec9SKyle Evans  * is not meaningful on error.
71*f0865ec9SKyle Evans  *
72*f0865ec9SKyle Evans  * The function is an internal helper; it expects initialized nn out and
73*f0865ec9SKyle Evans  * in: it does not verify that.
74*f0865ec9SKyle Evans  */
_nn_cnd_sub_shift(int cnd,nn_t out,nn_src_t in,u8 shift,word_t * borrow)75*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in,
76*f0865ec9SKyle Evans 			    u8 shift, word_t *borrow)
77*f0865ec9SKyle Evans {
78*f0865ec9SKyle Evans 	word_t tmp, borrow1, borrow2, _borrow = WORD(0);
79*f0865ec9SKyle Evans 	word_t mask = WORD_MASK_IFNOTZERO(cnd);
80*f0865ec9SKyle Evans 	int ret;
81*f0865ec9SKyle Evans 	u8 i;
82*f0865ec9SKyle Evans 
83*f0865ec9SKyle Evans 	MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
84*f0865ec9SKyle Evans 	MUST_HAVE((borrow != NULL), ret, err);
85*f0865ec9SKyle Evans 
86*f0865ec9SKyle Evans 	/*
87*f0865ec9SKyle Evans 	 *  Perform subtraction one word at a time,
88*f0865ec9SKyle Evans 	 *  propagating the borrow.
89*f0865ec9SKyle Evans 	 */
90*f0865ec9SKyle Evans 	for (i = 0; i < in->wlen; i++) {
91*f0865ec9SKyle Evans 		tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask));
92*f0865ec9SKyle Evans 		borrow1 = (word_t)(tmp > out->val[shift + i]);
93*f0865ec9SKyle Evans 		out->val[shift + i] = (word_t)(tmp - _borrow);
94*f0865ec9SKyle Evans 		borrow2 = (word_t)(out->val[shift + i] > tmp);
95*f0865ec9SKyle Evans 		/* There is at most one borrow going out. */
96*f0865ec9SKyle Evans 		_borrow = (word_t)(borrow1 | borrow2);
97*f0865ec9SKyle Evans 	}
98*f0865ec9SKyle Evans 
99*f0865ec9SKyle Evans 	(*borrow) = _borrow;
100*f0865ec9SKyle Evans 	ret = 0;
101*f0865ec9SKyle Evans 
102*f0865ec9SKyle Evans err:
103*f0865ec9SKyle Evans 	return ret;
104*f0865ec9SKyle Evans }
105*f0865ec9SKyle Evans 
106*f0865ec9SKyle Evans /*
107*f0865ec9SKyle Evans  * Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return
108*f0865ec9SKyle Evans  * borrow. The function returns 0 on success, -1 on error. 'borrow' is
109*f0865ec9SKyle Evans  * meaningful only on success.
110*f0865ec9SKyle Evans  *
111*f0865ec9SKyle Evans  * The function is an internal helper; it expects initialized nn out and
112*f0865ec9SKyle Evans  * in: it does not verify that.
113*f0865ec9SKyle Evans  */
_nn_submul_word_shift(nn_t out,nn_src_t in,word_t w,u8 shift,word_t * borrow)114*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift,
115*f0865ec9SKyle Evans 				word_t *borrow)
116*f0865ec9SKyle Evans {
117*f0865ec9SKyle Evans 	word_t _borrow = WORD(0), prod_high, prod_low, tmp;
118*f0865ec9SKyle Evans 	int ret;
119*f0865ec9SKyle Evans 	u8 i;
120*f0865ec9SKyle Evans 
121*f0865ec9SKyle Evans 	MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
122*f0865ec9SKyle Evans 	MUST_HAVE((borrow != NULL), ret, err);
123*f0865ec9SKyle Evans 
124*f0865ec9SKyle Evans 	for (i = 0; i < in->wlen; i++) {
125*f0865ec9SKyle Evans 		/*
126*f0865ec9SKyle Evans 		 * Compute the result of the multiplication of
127*f0865ec9SKyle Evans 		 * two words.
128*f0865ec9SKyle Evans 		 */
129*f0865ec9SKyle Evans 		WORD_MUL(prod_high, prod_low, in->val[i], w);
130*f0865ec9SKyle Evans 
131*f0865ec9SKyle Evans 		/*
132*f0865ec9SKyle Evans 		 * And add previous borrow.
133*f0865ec9SKyle Evans 		 */
134*f0865ec9SKyle Evans 		prod_low = (word_t)(prod_low + _borrow);
135*f0865ec9SKyle Evans 		prod_high = (word_t)(prod_high + (prod_low < _borrow));
136*f0865ec9SKyle Evans 
137*f0865ec9SKyle Evans 		/*
138*f0865ec9SKyle Evans 		 * Subtract computed word at current position in result.
139*f0865ec9SKyle Evans 		 */
140*f0865ec9SKyle Evans 		tmp = (word_t)(out->val[shift + i] - prod_low);
141*f0865ec9SKyle Evans 		_borrow = (word_t)(prod_high + (tmp > out->val[shift + i]));
142*f0865ec9SKyle Evans 		out->val[shift + i] = tmp;
143*f0865ec9SKyle Evans 	}
144*f0865ec9SKyle Evans 
145*f0865ec9SKyle Evans 	(*borrow) = _borrow;
146*f0865ec9SKyle Evans 	ret = 0;
147*f0865ec9SKyle Evans 
148*f0865ec9SKyle Evans err:
149*f0865ec9SKyle Evans 	return ret;
150*f0865ec9SKyle Evans }
151*f0865ec9SKyle Evans 
152*f0865ec9SKyle Evans /*
153*f0865ec9SKyle Evans  * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
154*f0865ec9SKyle Evans  * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on
155*f0865ec9SKyle Evans  * return. * Computation are performed in *constant time*, only depending on
156*f0865ec9SKyle Evans  * the lengths of 'a' and 'b', but not on the values of 'a' and 'b'.
157*f0865ec9SKyle Evans  *
158*f0865ec9SKyle Evans  * This uses the above function to perform arithmetic on arbitrary parts
159*f0865ec9SKyle Evans  * of multiprecision numbers.
160*f0865ec9SKyle Evans  *
161*f0865ec9SKyle Evans  * The algorithm used is schoolbook division:
162*f0865ec9SKyle Evans  * + the quotient is computed word by word,
163*f0865ec9SKyle Evans  * + a small division of the MSW is performed to obtain an
164*f0865ec9SKyle Evans  *   approximation of the MSW of the quotient,
165*f0865ec9SKyle Evans  * + the approximation is corrected to obtain the correct
166*f0865ec9SKyle Evans  *   multiprecision MSW of the quotient,
167*f0865ec9SKyle Evans  * + the corresponding product is subtracted from the dividend,
168*f0865ec9SKyle Evans  * + the same procedure is used for the following word of the quotient.
169*f0865ec9SKyle Evans  *
170*f0865ec9SKyle Evans  * It is assumed that:
171*f0865ec9SKyle Evans  * + b is normalized: the MSB of its MSW is 1,
172*f0865ec9SKyle Evans  * + the most significant part of a is smaller than b,
173*f0865ec9SKyle Evans  * + a precomputed reciprocal
174*f0865ec9SKyle Evans  *     v = floor(B^3/(d+1)) - B
175*f0865ec9SKyle Evans  *   where d is the MSW of the (normalized) divisor
176*f0865ec9SKyle Evans  *   is given to perform the small 3-by-2 division.
177*f0865ec9SKyle Evans  * + using this reciprocal, the approximated quotient is always
178*f0865ec9SKyle Evans  *   too small and at most one multiprecision correction is needed.
179*f0865ec9SKyle Evans  *
180*f0865ec9SKyle Evans  * It returns 0 on sucess, -1 on error.
181*f0865ec9SKyle Evans  *
182*f0865ec9SKyle Evans  * CAUTION:
183*f0865ec9SKyle Evans  *
184*f0865ec9SKyle Evans  * - The function is expected to be used ONLY by the generic version
185*f0865ec9SKyle Evans  *   nn_divrem_normalized() defined later in the file.
186*f0865ec9SKyle Evans  * - All parameters must have been initialized. Unlike exported/public
187*f0865ec9SKyle Evans  *   functions, this internal helper does not verify that nn parameters
188*f0865ec9SKyle Evans  *   have been initialized. Again, this is expected from the caller
189*f0865ec9SKyle Evans  *   (nn_divrem_normalized()).
190*f0865ec9SKyle Evans  * - The function does not support aliasing of 'b' or 'q'. See
191*f0865ec9SKyle Evans  *   _nn_divrem_normalized_aliased() for such a wrapper.
192*f0865ec9SKyle Evans  *
193*f0865ec9SKyle Evans  */
_nn_divrem_normalized(nn_t q,nn_t r,nn_src_t a,nn_src_t b,word_t v)194*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r,
195*f0865ec9SKyle Evans 				 nn_src_t a, nn_src_t b, word_t v)
196*f0865ec9SKyle Evans {
197*f0865ec9SKyle Evans 	word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */
198*f0865ec9SKyle Evans 	int _small, cmp, ret;
199*f0865ec9SKyle Evans 	u8 i;
200*f0865ec9SKyle Evans 
201*f0865ec9SKyle Evans 	MUST_HAVE(!(b->wlen <= 0), ret, err);
202*f0865ec9SKyle Evans 	MUST_HAVE(!(a->wlen <= b->wlen), ret, err);
203*f0865ec9SKyle Evans 	MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err);
204*f0865ec9SKyle Evans 	MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err);
205*f0865ec9SKyle Evans 
206*f0865ec9SKyle Evans 	/* Handle trivial aliasing for a and r */
207*f0865ec9SKyle Evans 	if (r != a) {
208*f0865ec9SKyle Evans 		ret = nn_set_wlen(r, a->wlen); EG(ret, err);
209*f0865ec9SKyle Evans 		ret = nn_copy(r, a); EG(ret, err);
210*f0865ec9SKyle Evans 	}
211*f0865ec9SKyle Evans 
212*f0865ec9SKyle Evans 	ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err);
213*f0865ec9SKyle Evans 
214*f0865ec9SKyle Evans 	/*
215*f0865ec9SKyle Evans 	 * Compute subsequent words of the quotient one by one.
216*f0865ec9SKyle Evans 	 * Perform approximate 3-by-2 division using the precomputed
217*f0865ec9SKyle Evans 	 * reciprocal and correct afterward.
218*f0865ec9SKyle Evans 	 */
219*f0865ec9SKyle Evans 	for (i = r->wlen; i > b->wlen; i--) {
220*f0865ec9SKyle Evans 		u8 shift = (u8)(i - b->wlen - 1);
221*f0865ec9SKyle Evans 
222*f0865ec9SKyle Evans 		/*
223*f0865ec9SKyle Evans 		 * Perform 3-by-2 approximate division:
224*f0865ec9SKyle Evans 		 * <qstar, qh, ql> = <rh, rl> * (v + B)
225*f0865ec9SKyle Evans 		 * We are only interested in qstar.
226*f0865ec9SKyle Evans 		 */
227*f0865ec9SKyle Evans 		rh = r->val[i - 1];
228*f0865ec9SKyle Evans 		rl = r->val[i - 2];
229*f0865ec9SKyle Evans 		/* Perform 2-by-1 multiplication. */
230*f0865ec9SKyle Evans 		WORD_MUL(qh, ql, rl, v);
231*f0865ec9SKyle Evans 		WORD_MUL(qstar, ql, rh, v);
232*f0865ec9SKyle Evans 		/* And propagate carries. */
233*f0865ec9SKyle Evans 		qh = (word_t)(qh + ql);
234*f0865ec9SKyle Evans 		qstar = (word_t)(qstar + (qh < ql));
235*f0865ec9SKyle Evans 		qh = (word_t)(qh + rl);
236*f0865ec9SKyle Evans 		rh = (word_t)(rh + (qh < rl));
237*f0865ec9SKyle Evans 		qstar = (word_t)(qstar + rh);
238*f0865ec9SKyle Evans 
239*f0865ec9SKyle Evans 		/*
240*f0865ec9SKyle Evans 		 * Compute approximate quotient times divisor
241*f0865ec9SKyle Evans 		 * and subtract it from remainder:
242*f0865ec9SKyle Evans 		 * r = r - (b*qstar << B^shift)
243*f0865ec9SKyle Evans 		 */
244*f0865ec9SKyle Evans 		ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err);
245*f0865ec9SKyle Evans 
246*f0865ec9SKyle Evans 		/* Check the approximate quotient was indeed not too large. */
247*f0865ec9SKyle Evans 		MUST_HAVE(!(r->val[i - 1] < borrow), ret, err);
248*f0865ec9SKyle Evans 		r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
249*f0865ec9SKyle Evans 
250*f0865ec9SKyle Evans 		/*
251*f0865ec9SKyle Evans 		 * Check whether the approximate quotient was too small or not.
252*f0865ec9SKyle Evans 		 * At most one multiprecision correction is needed.
253*f0865ec9SKyle Evans 		 */
254*f0865ec9SKyle Evans 		ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
255*f0865ec9SKyle Evans 		_small = ((!!(r->val[i - 1])) | (cmp >= 0));
256*f0865ec9SKyle Evans 		/* Perform conditional multiprecision correction. */
257*f0865ec9SKyle Evans 		ret = _nn_cnd_sub_shift(_small, r, b, shift, &borrow); EG(ret, err);
258*f0865ec9SKyle Evans 		MUST_HAVE(!(r->val[i - 1] != borrow), ret, err);
259*f0865ec9SKyle Evans 		r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
260*f0865ec9SKyle Evans 		/*
261*f0865ec9SKyle Evans 		 * Adjust the quotient if it was too small and set it in the
262*f0865ec9SKyle Evans 		 * multiprecision array.
263*f0865ec9SKyle Evans 		 */
264*f0865ec9SKyle Evans 		qstar = (word_t)(qstar + (word_t)_small);
265*f0865ec9SKyle Evans 		q->val[shift] = qstar;
266*f0865ec9SKyle Evans 		/*
267*f0865ec9SKyle Evans 		 * Check that the MSW of remainder was cancelled out and that
268*f0865ec9SKyle Evans 		 * we could not increase the quotient anymore.
269*f0865ec9SKyle Evans 		 */
270*f0865ec9SKyle Evans 		MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err);
271*f0865ec9SKyle Evans 
272*f0865ec9SKyle Evans 		ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
273*f0865ec9SKyle Evans 		MUST_HAVE(!(cmp >= 0), ret, err);
274*f0865ec9SKyle Evans 
275*f0865ec9SKyle Evans 		ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err);
276*f0865ec9SKyle Evans 	}
277*f0865ec9SKyle Evans 
278*f0865ec9SKyle Evans err:
279*f0865ec9SKyle Evans 	return ret;
280*f0865ec9SKyle Evans }
281*f0865ec9SKyle Evans 
282*f0865ec9SKyle Evans /*
283*f0865ec9SKyle Evans  * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
284*f0865ec9SKyle Evans  * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized.
285*f0865ec9SKyle Evans  * Compared to _nn_divrem_normalized(), this internal version
286*f0865ec9SKyle Evans  * explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r'
287*f0865ec9SKyle Evans  * result is stored in 'b' on success), hence the removal of 'r' parameter from
288*f0865ec9SKyle Evans  * function prototype compared to _nn_divrem_normalized().
289*f0865ec9SKyle Evans  *
290*f0865ec9SKyle Evans  * The computation is performed in *constant time*, see documentation of
291*f0865ec9SKyle Evans  * _nn_divrem_normalized().
292*f0865ec9SKyle Evans  *
293*f0865ec9SKyle Evans  * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
294*f0865ec9SKyle Evans  * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
295*f0865ec9SKyle Evans  * 'b'.
296*f0865ec9SKyle Evans  *
297*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
298*f0865ec9SKyle Evans  *
299*f0865ec9SKyle Evans  * CAUTION:
300*f0865ec9SKyle Evans  *
301*f0865ec9SKyle Evans  * - The function is expected to be used ONLY by the generic version
302*f0865ec9SKyle Evans  *   nn_divrem_normalized() defined later in the file.
303*f0865ec9SKyle Evans  * - All parameters must have been initialized. Unlike exported/public
304*f0865ec9SKyle Evans  *   functions, this internal helper does not verify that nn parameters
305*f0865ec9SKyle Evans  *   have been initialized. Again, this is expected from the caller
306*f0865ec9SKyle Evans  *   (nn_divrem_normalized()).
307*f0865ec9SKyle Evans  * - The function does not support aliasing of 'a' or 'q'.
308*f0865ec9SKyle Evans  *
309*f0865ec9SKyle Evans  */
_nn_divrem_normalized_aliased(nn_t q,nn_src_t a,nn_t b,word_t v)310*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v)
311*f0865ec9SKyle Evans {
312*f0865ec9SKyle Evans 	int ret;
313*f0865ec9SKyle Evans 	nn r;
314*f0865ec9SKyle Evans 	r.magic = WORD(0);
315*f0865ec9SKyle Evans 
316*f0865ec9SKyle Evans 	ret = nn_init(&r, 0); EG(ret, err);
317*f0865ec9SKyle Evans 	ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err);
318*f0865ec9SKyle Evans 	ret = nn_copy(b, &r); EG(ret, err);
319*f0865ec9SKyle Evans 
320*f0865ec9SKyle Evans err:
321*f0865ec9SKyle Evans 	nn_uninit(&r);
322*f0865ec9SKyle Evans 	return ret;
323*f0865ec9SKyle Evans }
324*f0865ec9SKyle Evans 
325*f0865ec9SKyle Evans /*
326*f0865ec9SKyle Evans  * Compute quotient and remainder of Euclidean division, and do not normalize
327*f0865ec9SKyle Evans  * them. Done in *constant time*, see documentation of _nn_divrem_normalized().
328*f0865ec9SKyle Evans  *
329*f0865ec9SKyle Evans  * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
330*f0865ec9SKyle Evans  * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
331*f0865ec9SKyle Evans  * 'b'.
332*f0865ec9SKyle Evans  *
333*f0865ec9SKyle Evans  * Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point
334*f0865ec9SKyle Evans  * to the same nn.
335*f0865ec9SKyle Evans  *
336*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
337*f0865ec9SKyle Evans  */
nn_divrem_normalized(nn_t q,nn_t r,nn_src_t a,nn_src_t b,word_t v)338*f0865ec9SKyle Evans int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v)
339*f0865ec9SKyle Evans {
340*f0865ec9SKyle Evans 	int ret;
341*f0865ec9SKyle Evans 
342*f0865ec9SKyle Evans 	ret = nn_check_initialized(a); EG(ret, err);
343*f0865ec9SKyle Evans 	ret = nn_check_initialized(q); EG(ret, err);
344*f0865ec9SKyle Evans 	ret = nn_check_initialized(r); EG(ret, err);
345*f0865ec9SKyle Evans 
346*f0865ec9SKyle Evans 	/* Unsupported aliasings */
347*f0865ec9SKyle Evans 	MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);
348*f0865ec9SKyle Evans 
349*f0865ec9SKyle Evans 	if (r == b) {
350*f0865ec9SKyle Evans 		ret = _nn_divrem_normalized_aliased(q, a, r, v);
351*f0865ec9SKyle Evans 	} else {
352*f0865ec9SKyle Evans 		ret = nn_check_initialized(b); EG(ret, err);
353*f0865ec9SKyle Evans 		ret = _nn_divrem_normalized(q, r, a, b, v);
354*f0865ec9SKyle Evans 	}
355*f0865ec9SKyle Evans 
356*f0865ec9SKyle Evans err:
357*f0865ec9SKyle Evans 	return ret;
358*f0865ec9SKyle Evans }
359*f0865ec9SKyle Evans 
360*f0865ec9SKyle Evans /*
361*f0865ec9SKyle Evans  * Compute remainder only and do not normalize it.
362*f0865ec9SKyle Evans  * Constant time, see documentation of _nn_divrem_normalized.
363*f0865ec9SKyle Evans  *
364*f0865ec9SKyle Evans  * Support aliasing of inputs and outputs.
365*f0865ec9SKyle Evans  *
366*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
367*f0865ec9SKyle Evans  */
nn_mod_normalized(nn_t r,nn_src_t a,nn_src_t b,word_t v)368*f0865ec9SKyle Evans int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v)
369*f0865ec9SKyle Evans {
370*f0865ec9SKyle Evans 	int ret;
371*f0865ec9SKyle Evans 	nn q;
372*f0865ec9SKyle Evans 	q.magic = WORD(0);
373*f0865ec9SKyle Evans 
374*f0865ec9SKyle Evans 	ret = nn_init(&q, 0); EG(ret, err);
375*f0865ec9SKyle Evans 	ret = nn_divrem_normalized(&q, r, a, b, v);
376*f0865ec9SKyle Evans 
377*f0865ec9SKyle Evans err:
378*f0865ec9SKyle Evans 	nn_uninit(&q);
379*f0865ec9SKyle Evans 	return ret;
380*f0865ec9SKyle Evans }
381*f0865ec9SKyle Evans 
382*f0865ec9SKyle Evans /*
383*f0865ec9SKyle Evans  * Compute quotient and remainder of Euclidean division,
384*f0865ec9SKyle Evans  * and do not normalize them.
385*f0865ec9SKyle Evans  * Done in *constant time*,
386*f0865ec9SKyle Evans  * only depending on the lengths of 'a' and 'b' and the value of 'cnt',
387*f0865ec9SKyle Evans  * but not on the values of 'a' and 'b'.
388*f0865ec9SKyle Evans  *
389*f0865ec9SKyle Evans  * Assume that b has been normalized by a 'cnt' bit shift,
390*f0865ec9SKyle Evans  * that v is the reciprocal of the MSW of 'b',
391*f0865ec9SKyle Evans  * but a is not shifted yet.
392*f0865ec9SKyle Evans  * Useful when multiple multiplication by the same b are performed,
393*f0865ec9SKyle Evans  * e.g. at the fp level.
394*f0865ec9SKyle Evans  *
395*f0865ec9SKyle Evans  * All outputs MUST have been initialized. The function does not support
396*f0865ec9SKyle Evans  * aliasing of 'b' or 'q'. It returns 0 on success, -1 on error.
397*f0865ec9SKyle Evans  *
398*f0865ec9SKyle Evans  * CAUTION:
399*f0865ec9SKyle Evans  *
400*f0865ec9SKyle Evans  * - The function is expected to be used ONLY by the generic version
401*f0865ec9SKyle Evans  *   nn_divrem_normalized() defined later in the file.
402*f0865ec9SKyle Evans  * - All parameters must have been initialized. Unlike exported/public
403*f0865ec9SKyle Evans  *   functions, this internal helper does not verify that
404*f0865ec9SKyle Evans  *   have been initialized. Again, this is expected from the caller
405*f0865ec9SKyle Evans  *   (nn_divrem_unshifted()).
406*f0865ec9SKyle Evans  * - The function does not support aliasing. See
407*f0865ec9SKyle Evans  *   _nn_divrem_unshifted_aliased() for such a wrapper.
408*f0865ec9SKyle Evans  */
_nn_divrem_unshifted(nn_t q,nn_t r,nn_src_t a,nn_src_t b_norm,word_t v,bitcnt_t cnt)409*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm,
410*f0865ec9SKyle Evans 				word_t v, bitcnt_t cnt)
411*f0865ec9SKyle Evans {
412*f0865ec9SKyle Evans 	nn a_shift;
413*f0865ec9SKyle Evans 	u8 new_wlen, b_wlen;
414*f0865ec9SKyle Evans 	int larger, ret, cmp;
415*f0865ec9SKyle Evans 	word_t borrow;
416*f0865ec9SKyle Evans 	a_shift.magic = WORD(0);
417*f0865ec9SKyle Evans 
418*f0865ec9SKyle Evans 	/* Avoid overflow */
419*f0865ec9SKyle Evans 	MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err);
420*f0865ec9SKyle Evans 
421*f0865ec9SKyle Evans 	/* We now know that new_wlen will fit in an u8 */
422*f0865ec9SKyle Evans 	new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt));
423*f0865ec9SKyle Evans 
424*f0865ec9SKyle Evans 	b_wlen = b_norm->wlen;
425*f0865ec9SKyle Evans 	if (new_wlen < b_wlen) { /* trivial case */
426*f0865ec9SKyle Evans 		ret = nn_copy(r, a); EG(ret, err);
427*f0865ec9SKyle Evans 		ret = nn_zero(q);
428*f0865ec9SKyle Evans 		goto err;
429*f0865ec9SKyle Evans 	}
430*f0865ec9SKyle Evans 
431*f0865ec9SKyle Evans 	/* Shift a. */
432*f0865ec9SKyle Evans 	ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err);
433*f0865ec9SKyle Evans 	ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err);
434*f0865ec9SKyle Evans 	ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err);
435*f0865ec9SKyle Evans 	ret = nn_set_wlen(r, new_wlen); EG(ret, err);
436*f0865ec9SKyle Evans 
437*f0865ec9SKyle Evans 	if (new_wlen == b_wlen) {
438*f0865ec9SKyle Evans 		/* Ensure that a is smaller than b. */
439*f0865ec9SKyle Evans 		ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err);
440*f0865ec9SKyle Evans 		larger = (cmp >= 0);
441*f0865ec9SKyle Evans 		ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err);
442*f0865ec9SKyle Evans 		MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err);
443*f0865ec9SKyle Evans 
444*f0865ec9SKyle Evans 		/* Set MSW of quotient. */
445*f0865ec9SKyle Evans 		ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
446*f0865ec9SKyle Evans 		q->val[new_wlen - b_wlen] = (word_t) larger;
447*f0865ec9SKyle Evans 		/* And we are done as the quotient is 0 or 1. */
448*f0865ec9SKyle Evans 	} else if (new_wlen > b_wlen) {
449*f0865ec9SKyle Evans 		/* Ensure that most significant part of a is smaller than b. */
450*f0865ec9SKyle Evans 		ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err);
451*f0865ec9SKyle Evans 		larger = (cmp >= 0);
452*f0865ec9SKyle Evans 		ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err);
453*f0865ec9SKyle Evans 		MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err);
454*f0865ec9SKyle Evans 
455*f0865ec9SKyle Evans 		/*
456*f0865ec9SKyle Evans 		 * Perform division with MSP of a smaller than b. This ensures
457*f0865ec9SKyle Evans 		 * that the quotient is of length a_len - b_len.
458*f0865ec9SKyle Evans 		 */
459*f0865ec9SKyle Evans 		ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err);
460*f0865ec9SKyle Evans 
461*f0865ec9SKyle Evans 		/* Set MSW of quotient. */
462*f0865ec9SKyle Evans 		ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
463*f0865ec9SKyle Evans 		q->val[new_wlen - b_wlen] = (word_t) larger;
464*f0865ec9SKyle Evans 	} /* else a is smaller than b... treated above. */
465*f0865ec9SKyle Evans 
466*f0865ec9SKyle Evans 	ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err);
467*f0865ec9SKyle Evans 	ret = nn_set_wlen(r, b_wlen);
468*f0865ec9SKyle Evans 
469*f0865ec9SKyle Evans err:
470*f0865ec9SKyle Evans 	nn_uninit(&a_shift);
471*f0865ec9SKyle Evans 
472*f0865ec9SKyle Evans 	return ret;
473*f0865ec9SKyle Evans }
474*f0865ec9SKyle Evans 
475*f0865ec9SKyle Evans /*
476*f0865ec9SKyle Evans  * Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success,
477*f0865ec9SKyle Evans  * result 'r' is passed through 'b_norm'.
478*f0865ec9SKyle Evans  *
479*f0865ec9SKyle Evans  * CAUTION:
480*f0865ec9SKyle Evans  *
481*f0865ec9SKyle Evans  * - The function is expected to be used ONLY by the generic version
482*f0865ec9SKyle Evans  *   nn_divrem_normalized() defined later in the file.
483*f0865ec9SKyle Evans  * - All parameter must have been initialized. Unlike exported/public
484*f0865ec9SKyle Evans  *   functions, this internal helper does not verify that nn parameters
485*f0865ec9SKyle Evans  *   have been initialized. Again, this is expected from the caller
486*f0865ec9SKyle Evans  *   (nn_divrem_unshifted()).
487*f0865ec9SKyle Evans  */
_nn_divrem_unshifted_aliased(nn_t q,nn_src_t a,nn_t b_norm,word_t v,bitcnt_t cnt)488*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm,
489*f0865ec9SKyle Evans 					word_t v, bitcnt_t cnt)
490*f0865ec9SKyle Evans {
491*f0865ec9SKyle Evans 	int ret;
492*f0865ec9SKyle Evans 	nn r;
493*f0865ec9SKyle Evans 	r.magic = WORD(0);
494*f0865ec9SKyle Evans 
495*f0865ec9SKyle Evans 	ret = nn_init(&r, 0); EG(ret, err);
496*f0865ec9SKyle Evans 	ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err);
497*f0865ec9SKyle Evans 	ret = nn_copy(b_norm, &r); EG(ret, err);
498*f0865ec9SKyle Evans 
499*f0865ec9SKyle Evans err:
500*f0865ec9SKyle Evans 	nn_uninit(&r);
501*f0865ec9SKyle Evans 	return ret;
502*f0865ec9SKyle Evans }
503*f0865ec9SKyle Evans 
504*f0865ec9SKyle Evans /*
505*f0865ec9SKyle Evans  * Compute quotient and remainder and do not normalize them.
506*f0865ec9SKyle Evans  * Constant time, see documentation of _nn_divrem_unshifted().
507*f0865ec9SKyle Evans  *
508*f0865ec9SKyle Evans  * Alias-supporting version of _nn_divrem_unshifted for 'r' only.
509*f0865ec9SKyle Evans  *
510*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
511*f0865ec9SKyle Evans  */
nn_divrem_unshifted(nn_t q,nn_t r,nn_src_t a,nn_src_t b,word_t v,bitcnt_t cnt)512*f0865ec9SKyle Evans int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b,
513*f0865ec9SKyle Evans 			word_t v, bitcnt_t cnt)
514*f0865ec9SKyle Evans {
515*f0865ec9SKyle Evans 	int ret;
516*f0865ec9SKyle Evans 
517*f0865ec9SKyle Evans 	ret = nn_check_initialized(a); EG(ret, err);
518*f0865ec9SKyle Evans 	ret = nn_check_initialized(q); EG(ret, err);
519*f0865ec9SKyle Evans 	ret = nn_check_initialized(r); EG(ret, err);
520*f0865ec9SKyle Evans 
521*f0865ec9SKyle Evans 	/* Unsupported aliasings */
522*f0865ec9SKyle Evans 	MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);
523*f0865ec9SKyle Evans 
524*f0865ec9SKyle Evans 	if (r == b) {
525*f0865ec9SKyle Evans 		ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt);
526*f0865ec9SKyle Evans 	} else {
527*f0865ec9SKyle Evans 		ret = nn_check_initialized(b); EG(ret, err);
528*f0865ec9SKyle Evans 		ret = _nn_divrem_unshifted(q, r, a, b, v, cnt);
529*f0865ec9SKyle Evans 	}
530*f0865ec9SKyle Evans 
531*f0865ec9SKyle Evans err:
532*f0865ec9SKyle Evans 	return ret;
533*f0865ec9SKyle Evans }
534*f0865ec9SKyle Evans 
535*f0865ec9SKyle Evans /*
536*f0865ec9SKyle Evans  * Compute remainder only and do not normalize it.
537*f0865ec9SKyle Evans  * Constant time, see documentation of _nn_divrem_unshifted.
538*f0865ec9SKyle Evans  *
539*f0865ec9SKyle Evans  * Aliasing of inputs and outputs is possible.
540*f0865ec9SKyle Evans  *
541*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
542*f0865ec9SKyle Evans  */
nn_mod_unshifted(nn_t r,nn_src_t a,nn_src_t b,word_t v,bitcnt_t cnt)543*f0865ec9SKyle Evans int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt)
544*f0865ec9SKyle Evans {
545*f0865ec9SKyle Evans 	nn q;
546*f0865ec9SKyle Evans 	int ret;
547*f0865ec9SKyle Evans 	q.magic = WORD(0);
548*f0865ec9SKyle Evans 
549*f0865ec9SKyle Evans 	ret = nn_init(&q, 0); EG(ret, err);
550*f0865ec9SKyle Evans 	ret = nn_divrem_unshifted(&q, r, a, b, v, cnt);
551*f0865ec9SKyle Evans 
552*f0865ec9SKyle Evans err:
553*f0865ec9SKyle Evans 	nn_uninit(&q);
554*f0865ec9SKyle Evans 
555*f0865ec9SKyle Evans 	return ret;
556*f0865ec9SKyle Evans }
557*f0865ec9SKyle Evans 
558*f0865ec9SKyle Evans /*
559*f0865ec9SKyle Evans  * Helper functions for arithmetic in 2-by-1 division
560*f0865ec9SKyle Evans  * used in the reciprocal computation.
561*f0865ec9SKyle Evans  *
562*f0865ec9SKyle Evans  * These are variations of the nn multiprecision functions
563*f0865ec9SKyle Evans  * acting on arrays of fixed length, in place,
564*f0865ec9SKyle Evans  * and returning carry/borrow.
565*f0865ec9SKyle Evans  *
566*f0865ec9SKyle Evans  * Done in constant time.
567*f0865ec9SKyle Evans  */
568*f0865ec9SKyle Evans 
569*f0865ec9SKyle Evans /*
570*f0865ec9SKyle Evans  * Comparison of two limbs numbers. Internal helper.
571*f0865ec9SKyle Evans  * Checks left to the caller
572*f0865ec9SKyle Evans  */
_wcmp_22(word_t a[2],word_t b[2])573*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2])
574*f0865ec9SKyle Evans {
575*f0865ec9SKyle Evans 	int mask, ret = 0;
576*f0865ec9SKyle Evans 	ret += (a[1] > b[1]);
577*f0865ec9SKyle Evans 	ret -= (a[1] < b[1]);
578*f0865ec9SKyle Evans 	mask = !(ret & 0x1);
579*f0865ec9SKyle Evans 	ret += ((a[0] > b[0]) & mask);
580*f0865ec9SKyle Evans 	ret -= ((a[0] < b[0]) & mask);
581*f0865ec9SKyle Evans 	return ret;
582*f0865ec9SKyle Evans }
583*f0865ec9SKyle Evans 
584*f0865ec9SKyle Evans /*
585*f0865ec9SKyle Evans  * Addition of two limbs numbers with carry returned. Internal helper.
586*f0865ec9SKyle Evans  * Checks left to the caller.
587*f0865ec9SKyle Evans  */
_wadd_22(word_t a[2],word_t b[2])588*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2])
589*f0865ec9SKyle Evans {
590*f0865ec9SKyle Evans 	word_t carry;
591*f0865ec9SKyle Evans 	a[0]  = (word_t)(a[0] + b[0]);
592*f0865ec9SKyle Evans 	carry = (word_t)(a[0] < b[0]);
593*f0865ec9SKyle Evans 	a[1]  = (word_t)(a[1] + carry);
594*f0865ec9SKyle Evans 	carry = (word_t)(a[1] < carry);
595*f0865ec9SKyle Evans 	a[1]  = (word_t)(a[1] + b[1]);
596*f0865ec9SKyle Evans 	carry = (word_t)(carry | (a[1] < b[1]));
597*f0865ec9SKyle Evans 	return carry;
598*f0865ec9SKyle Evans }
599*f0865ec9SKyle Evans 
600*f0865ec9SKyle Evans /*
601*f0865ec9SKyle Evans  * Subtraction of two limbs numbers with borrow returned. Internal helper.
602*f0865ec9SKyle Evans  * Checks left to the caller.
603*f0865ec9SKyle Evans  */
_wsub_22(word_t a[2],word_t b[2])604*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2])
605*f0865ec9SKyle Evans {
606*f0865ec9SKyle Evans 	word_t borrow, tmp;
607*f0865ec9SKyle Evans 	tmp    = (word_t)(a[0] - b[0]);
608*f0865ec9SKyle Evans 	borrow = (word_t)(tmp > a[0]);
609*f0865ec9SKyle Evans 	a[0]   = tmp;
610*f0865ec9SKyle Evans 	tmp    = (word_t)(a[1] - borrow);
611*f0865ec9SKyle Evans 	borrow = (word_t)(tmp > a[1]);
612*f0865ec9SKyle Evans 	a[1]   = (word_t)(tmp - b[1]);
613*f0865ec9SKyle Evans 	borrow = (word_t)(borrow | (a[1] > tmp));
614*f0865ec9SKyle Evans 	return borrow;
615*f0865ec9SKyle Evans }
616*f0865ec9SKyle Evans 
617*f0865ec9SKyle Evans /*
618*f0865ec9SKyle Evans  * Helper macros for conditional subtraction in 2-by-1 division
619*f0865ec9SKyle Evans  * used in the reciprocal computation.
620*f0865ec9SKyle Evans  *
621*f0865ec9SKyle Evans  * Done in constant time.
622*f0865ec9SKyle Evans  */
623*f0865ec9SKyle Evans 
624*f0865ec9SKyle Evans /* Conditional subtraction of a one limb number from a two limbs number. */
625*f0865ec9SKyle Evans #define WORD_CND_SUB_21(cnd, ah, al, b) do {				\
626*f0865ec9SKyle Evans 		word_t tmp, mask;					\
627*f0865ec9SKyle Evans 		mask = WORD_MASK_IFNOTZERO((cnd));			\
628*f0865ec9SKyle Evans 		tmp  = (word_t)((al) - ((b) & mask));			\
629*f0865ec9SKyle Evans 		(ah) = (word_t)((ah) - (tmp > (al)));			\
630*f0865ec9SKyle Evans 		(al) = tmp;						\
631*f0865ec9SKyle Evans 	} while (0)
632*f0865ec9SKyle Evans /* Conditional subtraction of a two limbs number from a two limbs number. */
633*f0865ec9SKyle Evans #define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do {			\
634*f0865ec9SKyle Evans 		word_t tmp, mask;					\
635*f0865ec9SKyle Evans 		mask = WORD_MASK_IFNOTZERO((cnd));			\
636*f0865ec9SKyle Evans 		tmp  = (word_t)((al) - ((bl) & mask));			\
637*f0865ec9SKyle Evans 		(ah) = (word_t)((ah) - (tmp > (al)));			\
638*f0865ec9SKyle Evans 		(al) = tmp;						\
639*f0865ec9SKyle Evans 		(ah) = (word_t)((ah) - ((bh) & mask));			\
640*f0865ec9SKyle Evans 	} while (0)
641*f0865ec9SKyle Evans 
642*f0865ec9SKyle Evans /*
643*f0865ec9SKyle Evans  * divide two words by a normalized word using schoolbook division on half
644*f0865ec9SKyle Evans  * words. This is only used below in the reciprocal computation. No checks
645*f0865ec9SKyle Evans  * are performed on inputs. This is expected to be done by the caller.
646*f0865ec9SKyle Evans  *
647*f0865ec9SKyle Evans  * Returns 0 on success, -1 on error.
648*f0865ec9SKyle Evans  */
_word_divrem(word_t * q,word_t * r,word_t ah,word_t al,word_t b)649*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b)
650*f0865ec9SKyle Evans {
651*f0865ec9SKyle Evans 	word_t bh, bl, qh, ql, rm, rhl[2], phl[2];
652*f0865ec9SKyle Evans 	int larger, ret;
653*f0865ec9SKyle Evans 	u8 j;
654*f0865ec9SKyle Evans 
655*f0865ec9SKyle Evans 	MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err);
656*f0865ec9SKyle Evans 	bh = WRSHIFT((b), HWORD_BITS);
657*f0865ec9SKyle Evans 	bl = WLSHIFT((b), HWORD_BITS);
658*f0865ec9SKyle Evans 	rhl[1] = ah;
659*f0865ec9SKyle Evans 	rhl[0] = al;
660*f0865ec9SKyle Evans 
661*f0865ec9SKyle Evans 	/*
662*f0865ec9SKyle Evans 	 * Compute high part of the quotient. We know from
663*f0865ec9SKyle Evans 	 * MUST_HAVE() check above that bh (a word_t) is not 0
664*f0865ec9SKyle Evans 	 */
665*f0865ec9SKyle Evans 
666*f0865ec9SKyle Evans 	KNOWN_FACT(bh != 0, ret, err);
667*f0865ec9SKyle Evans 	qh = (rhl[1] / bh);
668*f0865ec9SKyle Evans 	qh = WORD_MIN(qh, HWORD_MASK);
669*f0865ec9SKyle Evans 	WORD_MUL(phl[1], phl[0], qh, (b));
670*f0865ec9SKyle Evans 	phl[1] = (WLSHIFT(phl[1], HWORD_BITS) |
671*f0865ec9SKyle Evans 		  WRSHIFT(phl[0], HWORD_BITS));
672*f0865ec9SKyle Evans 	phl[0] = WLSHIFT(phl[0], HWORD_BITS);
673*f0865ec9SKyle Evans 
674*f0865ec9SKyle Evans 	for (j = 0; j < 2; j++) {
675*f0865ec9SKyle Evans 		larger = (_wcmp_22(phl, rhl) > 0);
676*f0865ec9SKyle Evans 		qh = (word_t)(qh - (word_t)larger);
677*f0865ec9SKyle Evans 		WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl);
678*f0865ec9SKyle Evans 	}
679*f0865ec9SKyle Evans 
680*f0865ec9SKyle Evans 	ret = (_wcmp_22(phl, rhl) > 0);
681*f0865ec9SKyle Evans 	MUST_HAVE(!(ret), ret, err);
682*f0865ec9SKyle Evans 	IGNORE_RET_VAL(_wsub_22(rhl, phl));
683*f0865ec9SKyle Evans 	MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err);
684*f0865ec9SKyle Evans 
685*f0865ec9SKyle Evans 	/* Compute low part of the quotient. */
686*f0865ec9SKyle Evans 	rm = (WLSHIFT(rhl[1], HWORD_BITS) |
687*f0865ec9SKyle Evans 	      WRSHIFT(rhl[0], HWORD_BITS));
688*f0865ec9SKyle Evans 	ql = (rm / bh);
689*f0865ec9SKyle Evans 	ql = WORD_MIN(ql, HWORD_MASK);
690*f0865ec9SKyle Evans 	WORD_MUL(phl[1], phl[0], ql, (b));
691*f0865ec9SKyle Evans 
692*f0865ec9SKyle Evans 	for (j = 0; j < 2; j++) {
693*f0865ec9SKyle Evans 		larger = (_wcmp_22(phl, rhl) > 0);
694*f0865ec9SKyle Evans 		ql = (word_t) (ql - (word_t)larger);
695*f0865ec9SKyle Evans 		WORD_CND_SUB_21(larger, phl[1], phl[0], (b));
696*f0865ec9SKyle Evans 	}
697*f0865ec9SKyle Evans 
698*f0865ec9SKyle Evans 	ret = _wcmp_22(phl, rhl) > 0;
699*f0865ec9SKyle Evans 	MUST_HAVE(!(ret), ret, err);
700*f0865ec9SKyle Evans 	IGNORE_RET_VAL(_wsub_22(rhl, phl));
701*f0865ec9SKyle Evans 	/* Set outputs. */
702*f0865ec9SKyle Evans 	MUST_HAVE((rhl[1] == WORD(0)), ret, err);
703*f0865ec9SKyle Evans 	MUST_HAVE(!(rhl[0] >= (b)), ret, err);
704*f0865ec9SKyle Evans 	(*q) = (WLSHIFT(qh, HWORD_BITS) | ql);
705*f0865ec9SKyle Evans 	(*r) = rhl[0];
706*f0865ec9SKyle Evans 	MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err);
707*f0865ec9SKyle Evans 	ret = 0;
708*f0865ec9SKyle Evans 
709*f0865ec9SKyle Evans err:
710*f0865ec9SKyle Evans 	return ret;
711*f0865ec9SKyle Evans }
712*f0865ec9SKyle Evans 
713*f0865ec9SKyle Evans /*
714*f0865ec9SKyle Evans  * Compute the reciprocal of d as
715*f0865ec9SKyle Evans  *	floor(B^3/(d+1)) - B
716*f0865ec9SKyle Evans  * which is used to perform approximate small division using a multiplication.
717*f0865ec9SKyle Evans  *
718*f0865ec9SKyle Evans  * No attempt was made to make it constant time. Indeed, such values are usually
719*f0865ec9SKyle Evans  * precomputed in contexts where constant time is wanted, e.g. in the fp layer.
720*f0865ec9SKyle Evans  *
721*f0865ec9SKyle Evans  * Returns 0 on success, -1 on error.
722*f0865ec9SKyle Evans  */
wreciprocal(word_t dh,word_t dl,word_t * reciprocal)723*f0865ec9SKyle Evans int wreciprocal(word_t dh, word_t dl, word_t *reciprocal)
724*f0865ec9SKyle Evans {
725*f0865ec9SKyle Evans 	word_t q, carry, r[2], t[2];
726*f0865ec9SKyle Evans 	int ret;
727*f0865ec9SKyle Evans 
728*f0865ec9SKyle Evans 	MUST_HAVE((reciprocal != NULL), ret, err);
729*f0865ec9SKyle Evans 
730*f0865ec9SKyle Evans 	if (((word_t)(dh + WORD(1)) == WORD(0)) &&
731*f0865ec9SKyle Evans 	    ((word_t)(dl + WORD(1)) == WORD(0))) {
732*f0865ec9SKyle Evans 		(*reciprocal) = WORD(0);
733*f0865ec9SKyle Evans 		ret = 0;
734*f0865ec9SKyle Evans 		goto err;
735*f0865ec9SKyle Evans 	}
736*f0865ec9SKyle Evans 
737*f0865ec9SKyle Evans 	if ((word_t)(dh + WORD(1)) == WORD(0)) {
738*f0865ec9SKyle Evans 		q = (word_t)(~dh);
739*f0865ec9SKyle Evans 		r[1] = (word_t)(~dl);
740*f0865ec9SKyle Evans 	} else {
741*f0865ec9SKyle Evans 		t[1] = (word_t)(~dh);
742*f0865ec9SKyle Evans 		t[0] = (word_t)(~dl);
743*f0865ec9SKyle Evans 		ret = _word_divrem(&q, r+1, t[1], t[0],
744*f0865ec9SKyle Evans 				   (word_t)(dh + WORD(1))); EG(ret, err);
745*f0865ec9SKyle Evans 	}
746*f0865ec9SKyle Evans 
747*f0865ec9SKyle Evans 	if ((word_t)(dl + WORD(1)) == WORD(0)) {
748*f0865ec9SKyle Evans 		(*reciprocal) = q;
749*f0865ec9SKyle Evans 		ret = 0;
750*f0865ec9SKyle Evans 		goto err;
751*f0865ec9SKyle Evans 	}
752*f0865ec9SKyle Evans 
753*f0865ec9SKyle Evans 	r[0] = WORD(0);
754*f0865ec9SKyle Evans 
755*f0865ec9SKyle Evans 	WORD_MUL(t[1], t[0], q, (word_t)~dl);
756*f0865ec9SKyle Evans 	carry = _wadd_22(r, t);
757*f0865ec9SKyle Evans 
758*f0865ec9SKyle Evans 	t[0] = (word_t)(dl + WORD(1));
759*f0865ec9SKyle Evans 	t[1] = dh;
760*f0865ec9SKyle Evans 	while (carry || (_wcmp_22(r, t) >= 0)) {
761*f0865ec9SKyle Evans 		q++;
762*f0865ec9SKyle Evans 		carry = (word_t)(carry - _wsub_22(r, t));
763*f0865ec9SKyle Evans 	}
764*f0865ec9SKyle Evans 
765*f0865ec9SKyle Evans 	(*reciprocal) = q;
766*f0865ec9SKyle Evans 	ret = 0;
767*f0865ec9SKyle Evans 
768*f0865ec9SKyle Evans err:
769*f0865ec9SKyle Evans 	return ret;
770*f0865ec9SKyle Evans }
771*f0865ec9SKyle Evans 
772*f0865ec9SKyle Evans /*
773*f0865ec9SKyle Evans  * Given an odd number p, compute division coefficients p_normalized,
774*f0865ec9SKyle Evans  * p_shift and p_reciprocal so that:
775*f0865ec9SKyle Evans  *	- p_shift = p_rounded_bitlen - bitsizeof(p), where
776*f0865ec9SKyle Evans  *          o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
777*f0865ec9SKyle Evans  *            minimum number of words required to store p) and
778*f0865ec9SKyle Evans  *          o p_bitlen is the real bit size of p
779*f0865ec9SKyle Evans  *	- p_normalized = p << p_shift
780*f0865ec9SKyle Evans  *	- p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B
781*f0865ec9SKyle Evans  *	  with B = 2^WORDSIZE
782*f0865ec9SKyle Evans  *
783*f0865ec9SKyle Evans  * These coefficients are useful for the optimized shifted variants of NN
784*f0865ec9SKyle Evans  * division and modular functions. Because we have two word_t outputs
785*f0865ec9SKyle Evans  * (p_shift and p_reciprocal), these are passed through word_t pointers.
786*f0865ec9SKyle Evans  * Aliasing of outputs with the input is possible since p_in is copied in
787*f0865ec9SKyle Evans  * local p at the beginning of the function.
788*f0865ec9SKyle Evans  *
789*f0865ec9SKyle Evans  * The function does not support aliasing.
790*f0865ec9SKyle Evans  *
791*f0865ec9SKyle Evans  * The function returns 0 on success, -1 on error.
792*f0865ec9SKyle Evans  */
nn_compute_div_coefs(nn_t p_normalized,word_t * p_shift,word_t * p_reciprocal,nn_src_t p_in)793*f0865ec9SKyle Evans int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift,
794*f0865ec9SKyle Evans 			  word_t *p_reciprocal, nn_src_t p_in)
795*f0865ec9SKyle Evans {
796*f0865ec9SKyle Evans 	bitcnt_t p_rounded_bitlen, p_bitlen;
797*f0865ec9SKyle Evans 	nn p, tmp_nn;
798*f0865ec9SKyle Evans 	int ret;
799*f0865ec9SKyle Evans 	p.magic = tmp_nn.magic = WORD(0);
800*f0865ec9SKyle Evans 
801*f0865ec9SKyle Evans 	ret = nn_check_initialized(p_in); EG(ret, err);
802*f0865ec9SKyle Evans 
803*f0865ec9SKyle Evans 	MUST_HAVE((p_shift != NULL), ret, err);
804*f0865ec9SKyle Evans 	MUST_HAVE((p_reciprocal != NULL), ret, err);
805*f0865ec9SKyle Evans 
806*f0865ec9SKyle Evans 	/* Unsupported aliasing */
807*f0865ec9SKyle Evans 	MUST_HAVE((p_normalized != p_in), ret, err);
808*f0865ec9SKyle Evans 
809*f0865ec9SKyle Evans 	ret = nn_init(&p, 0); EG(ret, err);
810*f0865ec9SKyle Evans 	ret = nn_copy(&p, p_in); EG(ret, err);
811*f0865ec9SKyle Evans 
812*f0865ec9SKyle Evans 	/*
813*f0865ec9SKyle Evans 	 * In order for our reciprocal division routines to work, it is expected
814*f0865ec9SKyle Evans 	 * that the bit length (including leading zeroes) of input prime
815*f0865ec9SKyle Evans 	 * p is >= 2 * wlen where wlen is the number of bits of a word size.
816*f0865ec9SKyle Evans 	 */
817*f0865ec9SKyle Evans 	if (p.wlen < 2) {
818*f0865ec9SKyle Evans 		ret = nn_set_wlen(&p, 2); EG(ret, err);
819*f0865ec9SKyle Evans 	}
820*f0865ec9SKyle Evans 
821*f0865ec9SKyle Evans 	ret = nn_init(p_normalized, 0); EG(ret, err);
822*f0865ec9SKyle Evans 	ret = nn_init(&tmp_nn, 0); EG(ret, err);
823*f0865ec9SKyle Evans 
824*f0865ec9SKyle Evans 	/* p_rounded_bitlen = bitlen of p rounded to word size */
825*f0865ec9SKyle Evans 	p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
826*f0865ec9SKyle Evans 
827*f0865ec9SKyle Evans 	/* p_shift */
828*f0865ec9SKyle Evans 	ret = nn_bitlen(&p, &p_bitlen); EG(ret, err);
829*f0865ec9SKyle Evans 	(*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen);
830*f0865ec9SKyle Evans 
831*f0865ec9SKyle Evans 	/* p_normalized = p << pshift */
832*f0865ec9SKyle Evans 	ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err);
833*f0865ec9SKyle Evans 
834*f0865ec9SKyle Evans 	/* Sanity check to protect the p_reciprocal computation */
835*f0865ec9SKyle Evans 	MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err);
836*f0865ec9SKyle Evans 
837*f0865ec9SKyle Evans 	/*
838*f0865ec9SKyle Evans 	 * p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B
839*f0865ec9SKyle Evans 	 * where B = 2^wlen where wlen = word size in bits. We use our NN
840*f0865ec9SKyle Evans 	 * helper to compute it.
841*f0865ec9SKyle Evans 	 */
842*f0865ec9SKyle Evans 	ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err);
843*f0865ec9SKyle Evans 	ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal);
844*f0865ec9SKyle Evans 
845*f0865ec9SKyle Evans err:
846*f0865ec9SKyle Evans 	nn_uninit(&p);
847*f0865ec9SKyle Evans 	nn_uninit(&tmp_nn);
848*f0865ec9SKyle Evans 
849*f0865ec9SKyle Evans 	return ret;
850*f0865ec9SKyle Evans }
851*f0865ec9SKyle Evans 
852*f0865ec9SKyle Evans /*
853*f0865ec9SKyle Evans  * Compute quotient remainder of Euclidean division.
854*f0865ec9SKyle Evans  *
855*f0865ec9SKyle Evans  * This function is a wrapper to normalize the divisor, i.e. shift it so that
856*f0865ec9SKyle Evans  * the MSB of its MSW is set, and precompute the reciprocal of this MSW to be
857*f0865ec9SKyle Evans  * used to perform small divisions using multiplications during the long
858*f0865ec9SKyle Evans  * schoolbook division. It uses the helper functions/macros above.
859*f0865ec9SKyle Evans  *
860*f0865ec9SKyle Evans  * This is NOT constant time with regards to the word length of a and b,
861*f0865ec9SKyle Evans  * but also the actual bitlength of b as we need to normalize b at the
862*f0865ec9SKyle Evans  * bit level.
863*f0865ec9SKyle Evans  * Moreover the precomputation of the reciprocal is not constant time at all.
864*f0865ec9SKyle Evans  *
865*f0865ec9SKyle Evans  * r need not be initialized, the function does it for the the caller.
866*f0865ec9SKyle Evans  *
867*f0865ec9SKyle Evans  * This function does not support aliasing. This is an internal helper, which
868*f0865ec9SKyle Evans  * expects caller to check parameters.
869*f0865ec9SKyle Evans  *
870*f0865ec9SKyle Evans  * It returns 0 on sucess, -1 on error.
871*f0865ec9SKyle Evans  */
_nn_divrem(nn_t q,nn_t r,nn_src_t a,nn_src_t b)872*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
873*f0865ec9SKyle Evans {
874*f0865ec9SKyle Evans 	nn b_large, b_normalized;
875*f0865ec9SKyle Evans 	bitcnt_t cnt;
876*f0865ec9SKyle Evans 	word_t v;
877*f0865ec9SKyle Evans 	nn_src_t ptr = b;
878*f0865ec9SKyle Evans 	int ret, iszero;
879*f0865ec9SKyle Evans 	b_large.magic = b_normalized.magic = WORD(0);
880*f0865ec9SKyle Evans 
881*f0865ec9SKyle Evans 	ret = nn_init(r, 0); EG(ret, err);
882*f0865ec9SKyle Evans 	ret = nn_init(q, 0); EG(ret, err);
883*f0865ec9SKyle Evans 	ret = nn_init(&b_large, 0); EG(ret, err);
884*f0865ec9SKyle Evans 
885*f0865ec9SKyle Evans 	MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err);
886*f0865ec9SKyle Evans 
887*f0865ec9SKyle Evans 	if (b->wlen == 1) {
888*f0865ec9SKyle Evans 		ret = nn_copy(&b_large, b); EG(ret, err);
889*f0865ec9SKyle Evans 
890*f0865ec9SKyle Evans 		/* Expand our big number with zeroes */
891*f0865ec9SKyle Evans 		ret = nn_set_wlen(&b_large, 2); EG(ret, err);
892*f0865ec9SKyle Evans 
893*f0865ec9SKyle Evans 		/*
894*f0865ec9SKyle Evans 		 * This cast could seem inappropriate, but we are
895*f0865ec9SKyle Evans 		 * sure here that we won't touch ptr since it is only
896*f0865ec9SKyle Evans 		 * given as a const parameter to sub functions.
897*f0865ec9SKyle Evans 		 */
898*f0865ec9SKyle Evans 		ptr = (nn_src_t) &b_large;
899*f0865ec9SKyle Evans 	}
900*f0865ec9SKyle Evans 
901*f0865ec9SKyle Evans 	/* After this, we only handle >= 2 words big numbers */
902*f0865ec9SKyle Evans 	MUST_HAVE(!(ptr->wlen < 2), ret, err);
903*f0865ec9SKyle Evans 
904*f0865ec9SKyle Evans 	ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err);
905*f0865ec9SKyle Evans 	ret = nn_clz(ptr, &cnt); EG(ret, err);
906*f0865ec9SKyle Evans 	ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err);
907*f0865ec9SKyle Evans 	ret = wreciprocal(b_normalized.val[ptr->wlen - 1],
908*f0865ec9SKyle Evans 			  b_normalized.val[ptr->wlen - 2],
909*f0865ec9SKyle Evans 			  &v); /* Not constant time. */ EG(ret, err);
910*f0865ec9SKyle Evans 
911*f0865ec9SKyle Evans 	ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt);
912*f0865ec9SKyle Evans 
913*f0865ec9SKyle Evans err:
914*f0865ec9SKyle Evans 	nn_uninit(&b_normalized);
915*f0865ec9SKyle Evans 	nn_uninit(&b_large);
916*f0865ec9SKyle Evans 
917*f0865ec9SKyle Evans 	return ret;
918*f0865ec9SKyle Evans }
919*f0865ec9SKyle Evans 
920*f0865ec9SKyle Evans /*
921*f0865ec9SKyle Evans  * Returns 0 on succes, -1 on error. Internal helper. Checks on params
922*f0865ec9SKyle Evans  * expected from the caller.
923*f0865ec9SKyle Evans  */
__nn_divrem_notrim_alias(nn_t q,nn_t r,nn_src_t a,nn_src_t b)924*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
925*f0865ec9SKyle Evans {
926*f0865ec9SKyle Evans 	nn a_cpy, b_cpy;
927*f0865ec9SKyle Evans 	int ret;
928*f0865ec9SKyle Evans 	a_cpy.magic = b_cpy.magic = WORD(0);
929*f0865ec9SKyle Evans 
930*f0865ec9SKyle Evans 	ret = nn_init(&a_cpy, 0); EG(ret, err);
931*f0865ec9SKyle Evans 	ret = nn_init(&b_cpy, 0); EG(ret, err);
932*f0865ec9SKyle Evans 	ret = nn_copy(&a_cpy, a); EG(ret, err);
933*f0865ec9SKyle Evans 	ret = nn_copy(&b_cpy, b); EG(ret, err);
934*f0865ec9SKyle Evans 	ret = _nn_divrem(q, r, &a_cpy, &b_cpy);
935*f0865ec9SKyle Evans 
936*f0865ec9SKyle Evans err:
937*f0865ec9SKyle Evans 	nn_uninit(&b_cpy);
938*f0865ec9SKyle Evans 	nn_uninit(&a_cpy);
939*f0865ec9SKyle Evans 
940*f0865ec9SKyle Evans 	return ret;
941*f0865ec9SKyle Evans }
942*f0865ec9SKyle Evans 
943*f0865ec9SKyle Evans 
944*f0865ec9SKyle Evans 
945*f0865ec9SKyle Evans /*
946*f0865ec9SKyle Evans  * Compute quotient and remainder and normalize them.
947*f0865ec9SKyle Evans  * Not constant time, see documentation of _nn_divrem.
948*f0865ec9SKyle Evans  *
949*f0865ec9SKyle Evans  * Aliased version of _nn_divrem. Returns 0 on success,
950*f0865ec9SKyle Evans  * -1 on error.
951*f0865ec9SKyle Evans  */
nn_divrem_notrim(nn_t q,nn_t r,nn_src_t a,nn_src_t b)952*f0865ec9SKyle Evans int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
953*f0865ec9SKyle Evans {
954*f0865ec9SKyle Evans 	int ret;
955*f0865ec9SKyle Evans 
956*f0865ec9SKyle Evans 	/* _nn_divrem initializes q and r */
957*f0865ec9SKyle Evans 	ret = nn_check_initialized(a); EG(ret, err);
958*f0865ec9SKyle Evans 	ret = nn_check_initialized(b); EG(ret, err);
959*f0865ec9SKyle Evans 	MUST_HAVE(((q != NULL) && (r != NULL)), ret, err);
960*f0865ec9SKyle Evans 
961*f0865ec9SKyle Evans 	/*
962*f0865ec9SKyle Evans 	 * Handle aliasing whenever any of the inputs is
963*f0865ec9SKyle Evans 	 * used as an output.
964*f0865ec9SKyle Evans 	 */
965*f0865ec9SKyle Evans 	if ((a == q) || (a == r) || (b == q) || (b == r)) {
966*f0865ec9SKyle Evans 		ret = __nn_divrem_notrim_alias(q, r, a, b);
967*f0865ec9SKyle Evans 	} else {
968*f0865ec9SKyle Evans 		ret = _nn_divrem(q, r, a, b);
969*f0865ec9SKyle Evans 	}
970*f0865ec9SKyle Evans 
971*f0865ec9SKyle Evans err:
972*f0865ec9SKyle Evans 	return ret;
973*f0865ec9SKyle Evans }
974*f0865ec9SKyle Evans 
975*f0865ec9SKyle Evans /*
976*f0865ec9SKyle Evans  * Compute quotient and remainder and normalize them.
977*f0865ec9SKyle Evans  * Not constant time, see documentation of _nn_divrem().
978*f0865ec9SKyle Evans  * Returns 0 on success, -1 on error.
979*f0865ec9SKyle Evans  *
980*f0865ec9SKyle Evans  * Aliasing is supported.
981*f0865ec9SKyle Evans  */
nn_divrem(nn_t q,nn_t r,nn_src_t a,nn_src_t b)982*f0865ec9SKyle Evans int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
983*f0865ec9SKyle Evans {
984*f0865ec9SKyle Evans 	int ret;
985*f0865ec9SKyle Evans 
986*f0865ec9SKyle Evans 	ret = nn_divrem_notrim(q, r, a, b); EG(ret, err);
987*f0865ec9SKyle Evans 
988*f0865ec9SKyle Evans 	/* Normalize (trim) the quotient and rest to avoid size overflow */
989*f0865ec9SKyle Evans 	ret = nn_normalize(q); EG(ret, err);
990*f0865ec9SKyle Evans 	ret = nn_normalize(r);
991*f0865ec9SKyle Evans 
992*f0865ec9SKyle Evans err:
993*f0865ec9SKyle Evans 	return ret;
994*f0865ec9SKyle Evans }
995*f0865ec9SKyle Evans 
996*f0865ec9SKyle Evans /*
997*f0865ec9SKyle Evans  * Compute remainder only and do not normalize it. Not constant time, see
998*f0865ec9SKyle Evans  * documentation of _nn_divrem(). Returns 0 on success, -1 on error.
999*f0865ec9SKyle Evans  *
1000*f0865ec9SKyle Evans  * Aliasing is supported.
1001*f0865ec9SKyle Evans  */
nn_mod_notrim(nn_t r,nn_src_t a,nn_src_t b)1002*f0865ec9SKyle Evans int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b)
1003*f0865ec9SKyle Evans {
1004*f0865ec9SKyle Evans 	int ret;
1005*f0865ec9SKyle Evans 	nn q;
1006*f0865ec9SKyle Evans 	q.magic = WORD(0);
1007*f0865ec9SKyle Evans 
1008*f0865ec9SKyle Evans 	/* nn_divrem() will init q. */
1009*f0865ec9SKyle Evans 	ret = nn_divrem_notrim(&q, r, a, b);
1010*f0865ec9SKyle Evans 
1011*f0865ec9SKyle Evans 	nn_uninit(&q);
1012*f0865ec9SKyle Evans 
1013*f0865ec9SKyle Evans 	return ret;
1014*f0865ec9SKyle Evans }
1015*f0865ec9SKyle Evans 
1016*f0865ec9SKyle Evans /*
1017*f0865ec9SKyle Evans  * Compute remainder only and normalize it. Not constant time, see
1018*f0865ec9SKyle Evans  * documentation of _nn_divrem(). r is initialized by the function.
1019*f0865ec9SKyle Evans  * Returns 0 on success, -1 on error.
1020*f0865ec9SKyle Evans  *
1021*f0865ec9SKyle Evans  * Aliasing is supported.
1022*f0865ec9SKyle Evans  */
nn_mod(nn_t r,nn_src_t a,nn_src_t b)1023*f0865ec9SKyle Evans int nn_mod(nn_t r, nn_src_t a, nn_src_t b)
1024*f0865ec9SKyle Evans {
1025*f0865ec9SKyle Evans 	int ret;
1026*f0865ec9SKyle Evans 	nn q;
1027*f0865ec9SKyle Evans 	q.magic = WORD(0);
1028*f0865ec9SKyle Evans 
1029*f0865ec9SKyle Evans 	/* nn_divrem will init q. */
1030*f0865ec9SKyle Evans 	ret = nn_divrem(&q, r, a, b);
1031*f0865ec9SKyle Evans 
1032*f0865ec9SKyle Evans 	nn_uninit(&q);
1033*f0865ec9SKyle Evans 
1034*f0865ec9SKyle Evans 	return ret;
1035*f0865ec9SKyle Evans }
1036*f0865ec9SKyle Evans 
1037*f0865ec9SKyle Evans /*
1038*f0865ec9SKyle Evans  * Below follow gcd and xgcd non constant time functions for the user ease.
1039*f0865ec9SKyle Evans  */
1040*f0865ec9SKyle Evans 
1041*f0865ec9SKyle Evans /*
1042*f0865ec9SKyle Evans  * Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant
1043*f0865ec9SKyle Evans  * time per the algorithm used. The function returns 0 on success, -1 on
1044*f0865ec9SKyle Evans  * error. internal helper: expect caller to check parameters.
1045*f0865ec9SKyle Evans  */
_nn_xgcd(nn_t g,nn_t u,nn_t v,nn_src_t a,nn_src_t b,int * sign)1046*f0865ec9SKyle Evans ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b,
1047*f0865ec9SKyle Evans 		    int *sign)
1048*f0865ec9SKyle Evans {
1049*f0865ec9SKyle Evans 	nn_t c, d, q, r, u1, v1, u2, v2;
1050*f0865ec9SKyle Evans 	nn scratch[8];
1051*f0865ec9SKyle Evans 	int ret, swap, iszero;
1052*f0865ec9SKyle Evans 	u8 i;
1053*f0865ec9SKyle Evans 
1054*f0865ec9SKyle Evans 	for (i = 0; i < 8; i++){
1055*f0865ec9SKyle Evans 		scratch[i].magic = WORD(0);
1056*f0865ec9SKyle Evans 	}
1057*f0865ec9SKyle Evans 
1058*f0865ec9SKyle Evans 	/*
1059*f0865ec9SKyle Evans 	 * Maintain:
1060*f0865ec9SKyle Evans 	 * |u1 v1| |c| = |a|
1061*f0865ec9SKyle Evans 	 * |u2 v2| |d|   |b|
1062*f0865ec9SKyle Evans 	 * u1, v1, u2, v2 >= 0
1063*f0865ec9SKyle Evans 	 * c >= d
1064*f0865ec9SKyle Evans 	 *
1065*f0865ec9SKyle Evans 	 * Initially:
1066*f0865ec9SKyle Evans 	 * |1  0 | |a| = |a|
1067*f0865ec9SKyle Evans 	 * |0  1 | |b|   |b|
1068*f0865ec9SKyle Evans 	 *
1069*f0865ec9SKyle Evans 	 * At each iteration:
1070*f0865ec9SKyle Evans 	 * c >= d
1071*f0865ec9SKyle Evans 	 * c = q*d + r
1072*f0865ec9SKyle Evans 	 * |u1 v1| = |q*u1+v1 u1|
1073*f0865ec9SKyle Evans 	 * |u2 v2|   |q*u2+v2 u2|
1074*f0865ec9SKyle Evans 	 *
1075*f0865ec9SKyle Evans 	 * Finally, after i steps:
1076*f0865ec9SKyle Evans 	 * |u1 v1| |g| = |a|
1077*f0865ec9SKyle Evans 	 * |u2 v2| |0| = |b|
1078*f0865ec9SKyle Evans 	 *
1079*f0865ec9SKyle Evans 	 * Inverting the matrix:
1080*f0865ec9SKyle Evans 	 * |g| = (-1)^i | v2 -v1| |a|
1081*f0865ec9SKyle Evans 	 * |0|          |-u2  u1| |b|
1082*f0865ec9SKyle Evans 	 */
1083*f0865ec9SKyle Evans 
1084*f0865ec9SKyle Evans 	/*
1085*f0865ec9SKyle Evans 	 * Initialization.
1086*f0865ec9SKyle Evans 	 */
1087*f0865ec9SKyle Evans 	ret = nn_init(g, 0); EG(ret, err);
1088*f0865ec9SKyle Evans 	ret = nn_init(u, 0); EG(ret, err);
1089*f0865ec9SKyle Evans 	ret = nn_init(v, 0); EG(ret, err);
1090*f0865ec9SKyle Evans 	ret = nn_iszero(b, &iszero); EG(ret, err);
1091*f0865ec9SKyle Evans 	if (iszero) {
1092*f0865ec9SKyle Evans 		/* gcd(0, a) = a, and 1*a + 0*b = a */
1093*f0865ec9SKyle Evans 		ret = nn_copy(g, a); EG(ret, err);
1094*f0865ec9SKyle Evans 		ret = nn_one(u); EG(ret, err);
1095*f0865ec9SKyle Evans 		ret = nn_zero(v); EG(ret, err);
1096*f0865ec9SKyle Evans 		(*sign) = 1;
1097*f0865ec9SKyle Evans 		goto err;
1098*f0865ec9SKyle Evans 	}
1099*f0865ec9SKyle Evans 
1100*f0865ec9SKyle Evans 	for (i = 0; i < 8; i++){
1101*f0865ec9SKyle Evans 		ret = nn_init(&scratch[i], 0); EG(ret, err);
1102*f0865ec9SKyle Evans 	}
1103*f0865ec9SKyle Evans 
1104*f0865ec9SKyle Evans 	u1 = &(scratch[0]);
1105*f0865ec9SKyle Evans 	v1 = &(scratch[1]);
1106*f0865ec9SKyle Evans 	u2 = &(scratch[2]);
1107*f0865ec9SKyle Evans 	v2 = &(scratch[3]);
1108*f0865ec9SKyle Evans 	ret = nn_one(u1); EG(ret, err);
1109*f0865ec9SKyle Evans 	ret = nn_zero(v1); EG(ret, err);
1110*f0865ec9SKyle Evans 	ret = nn_zero(u2); EG(ret, err);
1111*f0865ec9SKyle Evans 	ret = nn_one(v2); EG(ret, err);
1112*f0865ec9SKyle Evans 	c = &(scratch[4]);
1113*f0865ec9SKyle Evans 	d = &(scratch[5]);
1114*f0865ec9SKyle Evans 	ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */
1115*f0865ec9SKyle Evans 	ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */
1116*f0865ec9SKyle Evans 	q = &(scratch[6]);
1117*f0865ec9SKyle Evans 	r = &(scratch[7]);
1118*f0865ec9SKyle Evans 	swap = 0;
1119*f0865ec9SKyle Evans 
1120*f0865ec9SKyle Evans 	/*
1121*f0865ec9SKyle Evans 	 * Loop.
1122*f0865ec9SKyle Evans 	 */
1123*f0865ec9SKyle Evans 	ret = nn_iszero(d, &iszero); EG(ret, err);
1124*f0865ec9SKyle Evans 	while (!iszero) {
1125*f0865ec9SKyle Evans 		ret = nn_divrem(q, r, c, d); EG(ret, err);
1126*f0865ec9SKyle Evans 		ret = nn_normalize(q); EG(ret, err);
1127*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1128*f0865ec9SKyle Evans 		ret = nn_copy(c, r); EG(ret, err);
1129*f0865ec9SKyle Evans 		ret = nn_mul(r, q, u1); EG(ret, err);
1130*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1131*f0865ec9SKyle Evans 		ret = nn_add(v1, v1, r); EG(ret, err);
1132*f0865ec9SKyle Evans 		ret = nn_mul(r, q, u2); EG(ret, err);
1133*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1134*f0865ec9SKyle Evans 		ret = nn_add(v2, v2, r); EG(ret, err);
1135*f0865ec9SKyle Evans 		ret = nn_normalize(v1); EG(ret, err);
1136*f0865ec9SKyle Evans 		ret = nn_normalize(v2); EG(ret, err);
1137*f0865ec9SKyle Evans 		swap = 1;
1138*f0865ec9SKyle Evans 		ret = nn_iszero(c, &iszero); EG(ret, err);
1139*f0865ec9SKyle Evans 		if (iszero) {
1140*f0865ec9SKyle Evans 			break;
1141*f0865ec9SKyle Evans 		}
1142*f0865ec9SKyle Evans 		ret = nn_divrem(q, r, d, c); EG(ret, err);
1143*f0865ec9SKyle Evans 		ret = nn_normalize(q); EG(ret, err);
1144*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1145*f0865ec9SKyle Evans 		ret = nn_copy(d, r); EG(ret, err);
1146*f0865ec9SKyle Evans 		ret = nn_mul(r, q, v1); EG(ret, err);
1147*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1148*f0865ec9SKyle Evans 		ret = nn_add(u1, u1, r); EG(ret, err);
1149*f0865ec9SKyle Evans 		ret = nn_mul(r, q, v2); EG(ret, err);
1150*f0865ec9SKyle Evans 		ret = nn_normalize(r); EG(ret, err);
1151*f0865ec9SKyle Evans 		ret = nn_add(u2, u2, r); EG(ret, err);
1152*f0865ec9SKyle Evans 		ret = nn_normalize(u1); EG(ret, err);
1153*f0865ec9SKyle Evans 		ret = nn_normalize(u2); EG(ret, err);
1154*f0865ec9SKyle Evans 		swap = 0;
1155*f0865ec9SKyle Evans 
1156*f0865ec9SKyle Evans 		/* refresh loop condition */
1157*f0865ec9SKyle Evans 		ret = nn_iszero(d, &iszero); EG(ret, err);
1158*f0865ec9SKyle Evans 	}
1159*f0865ec9SKyle Evans 
1160*f0865ec9SKyle Evans 	/* Copies could be skipped. */
1161*f0865ec9SKyle Evans 	if (swap) {
1162*f0865ec9SKyle Evans 		ret = nn_copy(g, d); EG(ret, err);
1163*f0865ec9SKyle Evans 		ret = nn_copy(u, u2); EG(ret, err);
1164*f0865ec9SKyle Evans 		ret = nn_copy(v, u1); EG(ret, err);
1165*f0865ec9SKyle Evans 	} else {
1166*f0865ec9SKyle Evans 		ret = nn_copy(g, c); EG(ret, err);
1167*f0865ec9SKyle Evans 		ret = nn_copy(u, v2); EG(ret, err);
1168*f0865ec9SKyle Evans 		ret = nn_copy(v, v1); EG(ret, err);
1169*f0865ec9SKyle Evans 	}
1170*f0865ec9SKyle Evans 
1171*f0865ec9SKyle Evans 	/* swap = -1 means u <= 0; = 1 means v <= 0 */
1172*f0865ec9SKyle Evans 	(*sign) = swap ? -1 : 1;
1173*f0865ec9SKyle Evans 	ret = 0;
1174*f0865ec9SKyle Evans 
1175*f0865ec9SKyle Evans err:
1176*f0865ec9SKyle Evans 	/*
1177*f0865ec9SKyle Evans 	 * We uninit scratch elements in all cases, i.e. whether or not
1178*f0865ec9SKyle Evans 	 * we return an error or not.
1179*f0865ec9SKyle Evans 	 */
1180*f0865ec9SKyle Evans 	for (i = 0; i < 8; i++){
1181*f0865ec9SKyle Evans 		nn_uninit(&scratch[i]);
1182*f0865ec9SKyle Evans 	}
1183*f0865ec9SKyle Evans 	/* Unitialize output in case of error */
1184*f0865ec9SKyle Evans 	if (ret){
1185*f0865ec9SKyle Evans 		nn_uninit(v);
1186*f0865ec9SKyle Evans 		nn_uninit(u);
1187*f0865ec9SKyle Evans 		nn_uninit(g);
1188*f0865ec9SKyle Evans 	}
1189*f0865ec9SKyle Evans 
1190*f0865ec9SKyle Evans 	return ret;
1191*f0865ec9SKyle Evans }
1192*f0865ec9SKyle Evans 
1193*f0865ec9SKyle Evans /*
1194*f0865ec9SKyle Evans  * Aliased version of xgcd, and no assumption on a and b. Not constant time at
1195*f0865ec9SKyle Evans  * all. returns 0 on success, -1 on error. XXX document 'sign'
1196*f0865ec9SKyle Evans  *
1197*f0865ec9SKyle Evans  * Aliasing is supported.
1198*f0865ec9SKyle Evans  */
nn_xgcd(nn_t g,nn_t u,nn_t v,nn_src_t a,nn_src_t b,int * sign)1199*f0865ec9SKyle Evans int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign)
1200*f0865ec9SKyle Evans {
1201*f0865ec9SKyle Evans 	/* Handle aliasing
1202*f0865ec9SKyle Evans 	 * Note: in order to properly handle aliasing, we accept to lose
1203*f0865ec9SKyle Evans 	 * some "space" on the stack with copies.
1204*f0865ec9SKyle Evans 	 */
1205*f0865ec9SKyle Evans 	nn a_cpy, b_cpy;
1206*f0865ec9SKyle Evans 	nn_src_t a_, b_;
1207*f0865ec9SKyle Evans 	int ret, cmp, _sign;
1208*f0865ec9SKyle Evans 	a_cpy.magic = b_cpy.magic = WORD(0);
1209*f0865ec9SKyle Evans 
1210*f0865ec9SKyle Evans 	/* The internal _nn_xgcd function initializes g, u and v */
1211*f0865ec9SKyle Evans 	ret = nn_check_initialized(a); EG(ret, err);
1212*f0865ec9SKyle Evans 	ret = nn_check_initialized(b); EG(ret, err);
1213*f0865ec9SKyle Evans 	MUST_HAVE((sign != NULL), ret, err);
1214*f0865ec9SKyle Evans 
1215*f0865ec9SKyle Evans 	ret = nn_init(&b_cpy, 0); EG(ret, err);
1216*f0865ec9SKyle Evans 
1217*f0865ec9SKyle Evans 	/* Aliasing of a */
1218*f0865ec9SKyle Evans 	if ((g == a) || (u == a) || (v == a)){
1219*f0865ec9SKyle Evans 		ret = nn_copy(&a_cpy, a); EG(ret, err);
1220*f0865ec9SKyle Evans 		a_ = &a_cpy;
1221*f0865ec9SKyle Evans 	} else {
1222*f0865ec9SKyle Evans 		a_ = a;
1223*f0865ec9SKyle Evans 	}
1224*f0865ec9SKyle Evans 	/* Aliasing of b */
1225*f0865ec9SKyle Evans 	if ((g == b) || (u == b) || (v == b)) {
1226*f0865ec9SKyle Evans 		ret = nn_copy(&b_cpy, b); EG(ret, err);
1227*f0865ec9SKyle Evans 		b_ = &b_cpy;
1228*f0865ec9SKyle Evans 	} else {
1229*f0865ec9SKyle Evans 		b_ = b;
1230*f0865ec9SKyle Evans 	}
1231*f0865ec9SKyle Evans 
1232*f0865ec9SKyle Evans 	ret = nn_cmp(a_, b_, &cmp); EG(ret, err);
1233*f0865ec9SKyle Evans 	if (cmp < 0) {
1234*f0865ec9SKyle Evans 		/* If a < b, swap the inputs */
1235*f0865ec9SKyle Evans 		ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err);
1236*f0865ec9SKyle Evans 		(*sign) = -(_sign);
1237*f0865ec9SKyle Evans 	} else {
1238*f0865ec9SKyle Evans 		ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err);
1239*f0865ec9SKyle Evans 		(*sign) = _sign;
1240*f0865ec9SKyle Evans 	}
1241*f0865ec9SKyle Evans 
1242*f0865ec9SKyle Evans err:
1243*f0865ec9SKyle Evans 	nn_uninit(&b_cpy);
1244*f0865ec9SKyle Evans 	nn_uninit(&a_cpy);
1245*f0865ec9SKyle Evans 
1246*f0865ec9SKyle Evans 	return ret;
1247*f0865ec9SKyle Evans }
1248*f0865ec9SKyle Evans 
1249*f0865ec9SKyle Evans /*
1250*f0865ec9SKyle Evans  * Compute g = gcd(a, b). Internally use the xgcd and drop u and v.
1251*f0865ec9SKyle Evans  * Not constant time at all. Returns 0 on success, -1 on error.
1252*f0865ec9SKyle Evans  * XXX document 'sign'.
1253*f0865ec9SKyle Evans  *
1254*f0865ec9SKyle Evans  * Aliasing is supported.
1255*f0865ec9SKyle Evans  */
nn_gcd(nn_t g,nn_src_t a,nn_src_t b,int * sign)1256*f0865ec9SKyle Evans int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign)
1257*f0865ec9SKyle Evans {
1258*f0865ec9SKyle Evans 	nn u, v;
1259*f0865ec9SKyle Evans 	int ret;
1260*f0865ec9SKyle Evans 	u.magic = v.magic = WORD(0);
1261*f0865ec9SKyle Evans 
1262*f0865ec9SKyle Evans 	/* nn_xgcd will initialize g, u and v and
1263*f0865ec9SKyle Evans 	 * check if a and b are indeed initialized.
1264*f0865ec9SKyle Evans 	 */
1265*f0865ec9SKyle Evans 	ret = nn_xgcd(g, &u, &v, a, b, sign);
1266*f0865ec9SKyle Evans 
1267*f0865ec9SKyle Evans 	nn_uninit(&u);
1268*f0865ec9SKyle Evans 	nn_uninit(&v);
1269*f0865ec9SKyle Evans 
1270*f0865ec9SKyle Evans 	return ret;
1271*f0865ec9SKyle Evans }
1272