xref: /freebsd/crypto/libecc/src/nn/nn_div.c (revision dd21556857e8d40f66bf5ad54754d9d52669ebf7)
1 /*
2  *  Copyright (C) 2017 - This file is part of libecc project
3  *
4  *  Authors:
5  *      Ryad BENADJILA <ryadbenadjila@gmail.com>
6  *      Arnaud EBALARD <arnaud.ebalard@ssi.gouv.fr>
7  *      Jean-Pierre FLORI <jean-pierre.flori@ssi.gouv.fr>
8  *
9  *  Contributors:
10  *      Nicolas VIVET <nicolas.vivet@ssi.gouv.fr>
11  *      Karim KHALFALLAH <karim.khalfallah@ssi.gouv.fr>
12  *
13  *  This software is licensed under a dual BSD and GPL v2 license.
14  *  See LICENSE file at the root folder of the project.
15  */
16 #include <libecc/nn/nn_mul_public.h>
17 #include <libecc/nn/nn_logical.h>
18 #include <libecc/nn/nn_add.h>
19 #include <libecc/nn/nn.h>
20 /* Use internal API header */
21 #include "nn_div.h"
22 
23 /*
24  * Some helper functions to perform operations on an arbitrary part
25  * of a multiprecision number.
26  * This is exactly the same code as for operations on the least significant
27  * part of a multiprecision number except for the starting point in the
28  * array representing it.
29  * Done in *constant time*.
30  *
31  * Operations producing an output are in place.
32  */
33 
34 /*
35  * Compare all the bits of in2 with the same number of bits in in1 starting at
36  * 'shift' position in in1. in1 must be long enough for that purpose, i.e.
37  * in1->wlen >= (in2->wlen + shift). The comparison value is provided in
38  * 'cmp' parameter. The function returns 0 on success, -1 on error.
39  *
40  * The function is an internal helper; it expects initialized nn in1 and
41  * in2: it does not verify that.
42  */
43 ATTRIBUTE_WARN_UNUSED_RET static int _nn_cmp_shift(nn_src_t in1, nn_src_t in2, u8 shift, int *cmp)
44 {
45 	int ret, mask, tmp;
46 	u8 i;
47 
48 	MUST_HAVE((in1->wlen >= (in2->wlen + shift)), ret, err);
49 	MUST_HAVE((cmp != NULL), ret, err);
50 
51 	tmp = 0;
52 	for (i = in2->wlen; i > 0; i--) {
53 		mask = (!(tmp & 0x1));
54 		tmp += ((in1->val[shift + i - 1] > in2->val[i - 1]) & mask);
55 		tmp -= ((in1->val[shift + i - 1] < in2->val[i - 1]) & mask);
56 	}
57 	(*cmp) = tmp;
58 	ret = 0;
59 
60 err:
61 	return ret;
62 }
63 
64 /*
65  * Conditionally subtract a shifted version of in from out, i.e.:
66  *   - if cnd == 1, out <- out - (in << shift)
67  *   - if cnd == 0, out <- out
68  * The function returns 0 on success, -1 on error. On success, 'borrow'
69  * provides the possible borrow resulting from the subtraction. 'borrow'
70  * is not meaningful on error.
71  *
72  * The function is an internal helper; it expects initialized nn out and
73  * in: it does not verify that.
74  */
75 ATTRIBUTE_WARN_UNUSED_RET static int _nn_cnd_sub_shift(int cnd, nn_t out, nn_src_t in,
76 			    u8 shift, word_t *borrow)
77 {
78 	word_t tmp, borrow1, borrow2, _borrow = WORD(0);
79 	word_t mask = WORD_MASK_IFNOTZERO(cnd);
80 	int ret;
81 	u8 i;
82 
83 	MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
84 	MUST_HAVE((borrow != NULL), ret, err);
85 
86 	/*
87 	 *  Perform subtraction one word at a time,
88 	 *  propagating the borrow.
89 	 */
90 	for (i = 0; i < in->wlen; i++) {
91 		tmp = (word_t)(out->val[shift + i] - (in->val[i] & mask));
92 		borrow1 = (word_t)(tmp > out->val[shift + i]);
93 		out->val[shift + i] = (word_t)(tmp - _borrow);
94 		borrow2 = (word_t)(out->val[shift + i] > tmp);
95 		/* There is at most one borrow going out. */
96 		_borrow = (word_t)(borrow1 | borrow2);
97 	}
98 
99 	(*borrow) = _borrow;
100 	ret = 0;
101 
102 err:
103 	return ret;
104 }
105 
106 /*
107  * Subtract a shifted version of 'in' multiplied by 'w' from 'out' and return
108  * borrow. The function returns 0 on success, -1 on error. 'borrow' is
109  * meaningful only on success.
110  *
111  * The function is an internal helper; it expects initialized nn out and
112  * in: it does not verify that.
113  */
114 ATTRIBUTE_WARN_UNUSED_RET static int _nn_submul_word_shift(nn_t out, nn_src_t in, word_t w, u8 shift,
115 				word_t *borrow)
116 {
117 	word_t _borrow = WORD(0), prod_high, prod_low, tmp;
118 	int ret;
119 	u8 i;
120 
121 	MUST_HAVE((out->wlen >= (in->wlen + shift)), ret, err);
122 	MUST_HAVE((borrow != NULL), ret, err);
123 
124 	for (i = 0; i < in->wlen; i++) {
125 		/*
126 		 * Compute the result of the multiplication of
127 		 * two words.
128 		 */
129 		WORD_MUL(prod_high, prod_low, in->val[i], w);
130 
131 		/*
132 		 * And add previous borrow.
133 		 */
134 		prod_low = (word_t)(prod_low + _borrow);
135 		prod_high = (word_t)(prod_high + (prod_low < _borrow));
136 
137 		/*
138 		 * Subtract computed word at current position in result.
139 		 */
140 		tmp = (word_t)(out->val[shift + i] - prod_low);
141 		_borrow = (word_t)(prod_high + (tmp > out->val[shift + i]));
142 		out->val[shift + i] = tmp;
143 	}
144 
145 	(*borrow) = _borrow;
146 	ret = 0;
147 
148 err:
149 	return ret;
150 }
151 
152 /*
153  * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
154  * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized on
155  * return. * Computation are performed in *constant time*, only depending on
156  * the lengths of 'a' and 'b', but not on the values of 'a' and 'b'.
157  *
158  * This uses the above function to perform arithmetic on arbitrary parts
159  * of multiprecision numbers.
160  *
161  * The algorithm used is schoolbook division:
162  * + the quotient is computed word by word,
163  * + a small division of the MSW is performed to obtain an
164  *   approximation of the MSW of the quotient,
165  * + the approximation is corrected to obtain the correct
166  *   multiprecision MSW of the quotient,
167  * + the corresponding product is subtracted from the dividend,
168  * + the same procedure is used for the following word of the quotient.
169  *
170  * It is assumed that:
171  * + b is normalized: the MSB of its MSW is 1,
172  * + the most significant part of a is smaller than b,
173  * + a precomputed reciprocal
174  *     v = floor(B^3/(d+1)) - B
175  *   where d is the MSW of the (normalized) divisor
176  *   is given to perform the small 3-by-2 division.
177  * + using this reciprocal, the approximated quotient is always
178  *   too small and at most one multiprecision correction is needed.
179  *
180  * It returns 0 on sucess, -1 on error.
181  *
182  * CAUTION:
183  *
184  * - The function is expected to be used ONLY by the generic version
185  *   nn_divrem_normalized() defined later in the file.
186  * - All parameters must have been initialized. Unlike exported/public
187  *   functions, this internal helper does not verify that nn parameters
188  *   have been initialized. Again, this is expected from the caller
189  *   (nn_divrem_normalized()).
190  * - The function does not support aliasing of 'b' or 'q'. See
191  *   _nn_divrem_normalized_aliased() for such a wrapper.
192  *
193  */
194 ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized(nn_t q, nn_t r,
195 				 nn_src_t a, nn_src_t b, word_t v)
196 {
197 	word_t borrow, qstar, qh, ql, rh, rl; /* for 3-by-2 div. */
198 	int _small, cmp, ret;
199 	u8 i;
200 
201 	MUST_HAVE(!(b->wlen <= 0), ret, err);
202 	MUST_HAVE(!(a->wlen <= b->wlen), ret, err);
203 	MUST_HAVE(!(!((b->val[b->wlen - 1] >> (WORD_BITS - 1)) == WORD(1))), ret, err);
204 	MUST_HAVE(!_nn_cmp_shift(a, b, (u8)(a->wlen - b->wlen), &cmp) && (cmp < 0), ret, err);
205 
206 	/* Handle trivial aliasing for a and r */
207 	if (r != a) {
208 		ret = nn_set_wlen(r, a->wlen); EG(ret, err);
209 		ret = nn_copy(r, a); EG(ret, err);
210 	}
211 
212 	ret = nn_set_wlen(q, (u8)(r->wlen - b->wlen)); EG(ret, err);
213 
214 	/*
215 	 * Compute subsequent words of the quotient one by one.
216 	 * Perform approximate 3-by-2 division using the precomputed
217 	 * reciprocal and correct afterward.
218 	 */
219 	for (i = r->wlen; i > b->wlen; i--) {
220 		u8 shift = (u8)(i - b->wlen - 1);
221 
222 		/*
223 		 * Perform 3-by-2 approximate division:
224 		 * <qstar, qh, ql> = <rh, rl> * (v + B)
225 		 * We are only interested in qstar.
226 		 */
227 		rh = r->val[i - 1];
228 		rl = r->val[i - 2];
229 		/* Perform 2-by-1 multiplication. */
230 		WORD_MUL(qh, ql, rl, v);
231 		WORD_MUL(qstar, ql, rh, v);
232 		/* And propagate carries. */
233 		qh = (word_t)(qh + ql);
234 		qstar = (word_t)(qstar + (qh < ql));
235 		qh = (word_t)(qh + rl);
236 		rh = (word_t)(rh + (qh < rl));
237 		qstar = (word_t)(qstar + rh);
238 
239 		/*
240 		 * Compute approximate quotient times divisor
241 		 * and subtract it from remainder:
242 		 * r = r - (b*qstar << B^shift)
243 		 */
244 		ret = _nn_submul_word_shift(r, b, qstar, shift, &borrow); EG(ret, err);
245 
246 		/* Check the approximate quotient was indeed not too large. */
247 		MUST_HAVE(!(r->val[i - 1] < borrow), ret, err);
248 		r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
249 
250 		/*
251 		 * Check whether the approximate quotient was too small or not.
252 		 * At most one multiprecision correction is needed.
253 		 */
254 		ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
255 		_small = ((!!(r->val[i - 1])) | (cmp >= 0));
256 		/* Perform conditional multiprecision correction. */
257 		ret = _nn_cnd_sub_shift(_small, r, b, shift, &borrow); EG(ret, err);
258 		MUST_HAVE(!(r->val[i - 1] != borrow), ret, err);
259 		r->val[i - 1] = (word_t)(r->val[i - 1] - borrow);
260 		/*
261 		 * Adjust the quotient if it was too small and set it in the
262 		 * multiprecision array.
263 		 */
264 		qstar = (word_t)(qstar + (word_t)_small);
265 		q->val[shift] = qstar;
266 		/*
267 		 * Check that the MSW of remainder was cancelled out and that
268 		 * we could not increase the quotient anymore.
269 		 */
270 		MUST_HAVE(!(r->val[r->wlen - 1] != WORD(0)), ret, err);
271 
272 		ret = _nn_cmp_shift(r, b, shift, &cmp); EG(ret, err);
273 		MUST_HAVE(!(cmp >= 0), ret, err);
274 
275 		ret = nn_set_wlen(r, (u8)(r->wlen - 1)); EG(ret, err);
276 	}
277 
278 err:
279 	return ret;
280 }
281 
282 /*
283  * Compute quotient 'q' and remainder 'r' of Euclidean division of 'a' by 'b'
284  * (i.e. q and r such that a = b*q + r). 'q' and 'r' are not normalized.
285  * Compared to _nn_divrem_normalized(), this internal version
286  * explicitly handle the case where 'b' and 'r' point to the same nn (i.e. 'r'
287  * result is stored in 'b' on success), hence the removal of 'r' parameter from
288  * function prototype compared to _nn_divrem_normalized().
289  *
290  * The computation is performed in *constant time*, see documentation of
291  * _nn_divrem_normalized().
292  *
293  * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
294  * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
295  * 'b'.
296  *
297  * The function returns 0 on success, -1 on error.
298  *
299  * CAUTION:
300  *
301  * - The function is expected to be used ONLY by the generic version
302  *   nn_divrem_normalized() defined later in the file.
303  * - All parameters must have been initialized. Unlike exported/public
304  *   functions, this internal helper does not verify that nn parameters
305  *   have been initialized. Again, this is expected from the caller
306  *   (nn_divrem_normalized()).
307  * - The function does not support aliasing of 'a' or 'q'.
308  *
309  */
310 ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_normalized_aliased(nn_t q, nn_src_t a, nn_t b, word_t v)
311 {
312 	int ret;
313 	nn r;
314 	r.magic = WORD(0);
315 
316 	ret = nn_init(&r, 0); EG(ret, err);
317 	ret = _nn_divrem_normalized(q, &r, a, b, v); EG(ret, err);
318 	ret = nn_copy(b, &r); EG(ret, err);
319 
320 err:
321 	nn_uninit(&r);
322 	return ret;
323 }
324 
325 /*
326  * Compute quotient and remainder of Euclidean division, and do not normalize
327  * them. Done in *constant time*, see documentation of _nn_divrem_normalized().
328  *
329  * Assume that 'b' is normalized (the MSB of its MSW is set), that 'v' is the
330  * reciprocal of the MSW of 'b' and that the high part of 'a' is smaller than
331  * 'b'.
332  *
333  * Aliasing is supported for 'r' only (with 'b'), i.e. 'r' and 'b' can point
334  * to the same nn.
335  *
336  * The function returns 0 on success, -1 on error.
337  */
338 int nn_divrem_normalized(nn_t q, nn_t r, nn_src_t a, nn_src_t b, word_t v)
339 {
340 	int ret;
341 
342 	ret = nn_check_initialized(a); EG(ret, err);
343 	ret = nn_check_initialized(q); EG(ret, err);
344 	ret = nn_check_initialized(r); EG(ret, err);
345 
346 	/* Unsupported aliasings */
347 	MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);
348 
349 	if (r == b) {
350 		ret = _nn_divrem_normalized_aliased(q, a, r, v);
351 	} else {
352 		ret = nn_check_initialized(b); EG(ret, err);
353 		ret = _nn_divrem_normalized(q, r, a, b, v);
354 	}
355 
356 err:
357 	return ret;
358 }
359 
360 /*
361  * Compute remainder only and do not normalize it.
362  * Constant time, see documentation of _nn_divrem_normalized.
363  *
364  * Support aliasing of inputs and outputs.
365  *
366  * The function returns 0 on success, -1 on error.
367  */
368 int nn_mod_normalized(nn_t r, nn_src_t a, nn_src_t b, word_t v)
369 {
370 	int ret;
371 	nn q;
372 	q.magic = WORD(0);
373 
374 	ret = nn_init(&q, 0); EG(ret, err);
375 	ret = nn_divrem_normalized(&q, r, a, b, v);
376 
377 err:
378 	nn_uninit(&q);
379 	return ret;
380 }
381 
382 /*
383  * Compute quotient and remainder of Euclidean division,
384  * and do not normalize them.
385  * Done in *constant time*,
386  * only depending on the lengths of 'a' and 'b' and the value of 'cnt',
387  * but not on the values of 'a' and 'b'.
388  *
389  * Assume that b has been normalized by a 'cnt' bit shift,
390  * that v is the reciprocal of the MSW of 'b',
391  * but a is not shifted yet.
392  * Useful when multiple multiplication by the same b are performed,
393  * e.g. at the fp level.
394  *
395  * All outputs MUST have been initialized. The function does not support
396  * aliasing of 'b' or 'q'. It returns 0 on success, -1 on error.
397  *
398  * CAUTION:
399  *
400  * - The function is expected to be used ONLY by the generic version
401  *   nn_divrem_normalized() defined later in the file.
402  * - All parameters must have been initialized. Unlike exported/public
403  *   functions, this internal helper does not verify that
404  *   have been initialized. Again, this is expected from the caller
405  *   (nn_divrem_unshifted()).
406  * - The function does not support aliasing. See
407  *   _nn_divrem_unshifted_aliased() for such a wrapper.
408  */
409 ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b_norm,
410 				word_t v, bitcnt_t cnt)
411 {
412 	nn a_shift;
413 	u8 new_wlen, b_wlen;
414 	int larger, ret, cmp;
415 	word_t borrow;
416 	a_shift.magic = WORD(0);
417 
418 	/* Avoid overflow */
419 	MUST_HAVE(((a->wlen + BIT_LEN_WORDS(cnt)) < NN_MAX_WORD_LEN), ret, err);
420 
421 	/* We now know that new_wlen will fit in an u8 */
422 	new_wlen = (u8)(a->wlen + (u8)BIT_LEN_WORDS(cnt));
423 
424 	b_wlen = b_norm->wlen;
425 	if (new_wlen < b_wlen) { /* trivial case */
426 		ret = nn_copy(r, a); EG(ret, err);
427 		ret = nn_zero(q);
428 		goto err;
429 	}
430 
431 	/* Shift a. */
432 	ret = nn_init(&a_shift, (u16)(new_wlen * WORD_BYTES)); EG(ret, err);
433 	ret = nn_set_wlen(&a_shift, new_wlen); EG(ret, err);
434 	ret = nn_lshift_fixedlen(&a_shift, a, cnt); EG(ret, err);
435 	ret = nn_set_wlen(r, new_wlen); EG(ret, err);
436 
437 	if (new_wlen == b_wlen) {
438 		/* Ensure that a is smaller than b. */
439 		ret = nn_cmp(&a_shift, b_norm, &cmp); EG(ret, err);
440 		larger = (cmp >= 0);
441 		ret = nn_cnd_sub(larger, r, &a_shift, b_norm); EG(ret, err);
442 		MUST_HAVE(((!nn_cmp(r, b_norm, &cmp)) && (cmp < 0)), ret, err);
443 
444 		/* Set MSW of quotient. */
445 		ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
446 		q->val[new_wlen - b_wlen] = (word_t) larger;
447 		/* And we are done as the quotient is 0 or 1. */
448 	} else if (new_wlen > b_wlen) {
449 		/* Ensure that most significant part of a is smaller than b. */
450 		ret = _nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp); EG(ret, err);
451 		larger = (cmp >= 0);
452 		ret = _nn_cnd_sub_shift(larger, &a_shift, b_norm, (u8)(new_wlen - b_wlen), &borrow); EG(ret, err);
453 		MUST_HAVE(((!_nn_cmp_shift(&a_shift, b_norm, (u8)(new_wlen - b_wlen), &cmp)) && (cmp < 0)), ret, err);
454 
455 		/*
456 		 * Perform division with MSP of a smaller than b. This ensures
457 		 * that the quotient is of length a_len - b_len.
458 		 */
459 		ret = nn_divrem_normalized(q, r, &a_shift, b_norm, v); EG(ret, err);
460 
461 		/* Set MSW of quotient. */
462 		ret = nn_set_wlen(q, (u8)(new_wlen - b_wlen + 1)); EG(ret, err);
463 		q->val[new_wlen - b_wlen] = (word_t) larger;
464 	} /* else a is smaller than b... treated above. */
465 
466 	ret = nn_rshift_fixedlen(r, r, cnt); EG(ret, err);
467 	ret = nn_set_wlen(r, b_wlen);
468 
469 err:
470 	nn_uninit(&a_shift);
471 
472 	return ret;
473 }
474 
475 /*
476  * Same as previous but handling aliasing of 'r' with 'b_norm', i.e. on success,
477  * result 'r' is passed through 'b_norm'.
478  *
479  * CAUTION:
480  *
481  * - The function is expected to be used ONLY by the generic version
482  *   nn_divrem_normalized() defined later in the file.
483  * - All parameter must have been initialized. Unlike exported/public
484  *   functions, this internal helper does not verify that nn parameters
485  *   have been initialized. Again, this is expected from the caller
486  *   (nn_divrem_unshifted()).
487  */
488 ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem_unshifted_aliased(nn_t q, nn_src_t a, nn_t b_norm,
489 					word_t v, bitcnt_t cnt)
490 {
491 	int ret;
492 	nn r;
493 	r.magic = WORD(0);
494 
495 	ret = nn_init(&r, 0); EG(ret, err);
496 	ret = _nn_divrem_unshifted(q, &r, a, b_norm, v, cnt); EG(ret, err);
497 	ret = nn_copy(b_norm, &r); EG(ret, err);
498 
499 err:
500 	nn_uninit(&r);
501 	return ret;
502 }
503 
504 /*
505  * Compute quotient and remainder and do not normalize them.
506  * Constant time, see documentation of _nn_divrem_unshifted().
507  *
508  * Alias-supporting version of _nn_divrem_unshifted for 'r' only.
509  *
510  * The function returns 0 on success, -1 on error.
511  */
512 int nn_divrem_unshifted(nn_t q, nn_t r, nn_src_t a, nn_src_t b,
513 			word_t v, bitcnt_t cnt)
514 {
515 	int ret;
516 
517 	ret = nn_check_initialized(a); EG(ret, err);
518 	ret = nn_check_initialized(q); EG(ret, err);
519 	ret = nn_check_initialized(r); EG(ret, err);
520 
521 	/* Unsupported aliasings */
522 	MUST_HAVE((q != r) && (q != a) && (q != b), ret, err);
523 
524 	if (r == b) {
525 		ret = _nn_divrem_unshifted_aliased(q, a, r, v, cnt);
526 	} else {
527 		ret = nn_check_initialized(b); EG(ret, err);
528 		ret = _nn_divrem_unshifted(q, r, a, b, v, cnt);
529 	}
530 
531 err:
532 	return ret;
533 }
534 
535 /*
536  * Compute remainder only and do not normalize it.
537  * Constant time, see documentation of _nn_divrem_unshifted.
538  *
539  * Aliasing of inputs and outputs is possible.
540  *
541  * The function returns 0 on success, -1 on error.
542  */
543 int nn_mod_unshifted(nn_t r, nn_src_t a, nn_src_t b, word_t v, bitcnt_t cnt)
544 {
545 	nn q;
546 	int ret;
547 	q.magic = WORD(0);
548 
549 	ret = nn_init(&q, 0); EG(ret, err);
550 	ret = nn_divrem_unshifted(&q, r, a, b, v, cnt);
551 
552 err:
553 	nn_uninit(&q);
554 
555 	return ret;
556 }
557 
558 /*
559  * Helper functions for arithmetic in 2-by-1 division
560  * used in the reciprocal computation.
561  *
562  * These are variations of the nn multiprecision functions
563  * acting on arrays of fixed length, in place,
564  * and returning carry/borrow.
565  *
566  * Done in constant time.
567  */
568 
569 /*
570  * Comparison of two limbs numbers. Internal helper.
571  * Checks left to the caller
572  */
573 ATTRIBUTE_WARN_UNUSED_RET static int _wcmp_22(word_t a[2], word_t b[2])
574 {
575 	int mask, ret = 0;
576 	ret += (a[1] > b[1]);
577 	ret -= (a[1] < b[1]);
578 	mask = !(ret & 0x1);
579 	ret += ((a[0] > b[0]) & mask);
580 	ret -= ((a[0] < b[0]) & mask);
581 	return ret;
582 }
583 
584 /*
585  * Addition of two limbs numbers with carry returned. Internal helper.
586  * Checks left to the caller.
587  */
588 ATTRIBUTE_WARN_UNUSED_RET static word_t _wadd_22(word_t a[2], word_t b[2])
589 {
590 	word_t carry;
591 	a[0]  = (word_t)(a[0] + b[0]);
592 	carry = (word_t)(a[0] < b[0]);
593 	a[1]  = (word_t)(a[1] + carry);
594 	carry = (word_t)(a[1] < carry);
595 	a[1]  = (word_t)(a[1] + b[1]);
596 	carry = (word_t)(carry | (a[1] < b[1]));
597 	return carry;
598 }
599 
600 /*
601  * Subtraction of two limbs numbers with borrow returned. Internal helper.
602  * Checks left to the caller.
603  */
604 ATTRIBUTE_WARN_UNUSED_RET static word_t _wsub_22(word_t a[2], word_t b[2])
605 {
606 	word_t borrow, tmp;
607 	tmp    = (word_t)(a[0] - b[0]);
608 	borrow = (word_t)(tmp > a[0]);
609 	a[0]   = tmp;
610 	tmp    = (word_t)(a[1] - borrow);
611 	borrow = (word_t)(tmp > a[1]);
612 	a[1]   = (word_t)(tmp - b[1]);
613 	borrow = (word_t)(borrow | (a[1] > tmp));
614 	return borrow;
615 }
616 
617 /*
618  * Helper macros for conditional subtraction in 2-by-1 division
619  * used in the reciprocal computation.
620  *
621  * Done in constant time.
622  */
623 
624 /* Conditional subtraction of a one limb number from a two limbs number. */
625 #define WORD_CND_SUB_21(cnd, ah, al, b) do {				\
626 		word_t tmp, mask;					\
627 		mask = WORD_MASK_IFNOTZERO((cnd));			\
628 		tmp  = (word_t)((al) - ((b) & mask));			\
629 		(ah) = (word_t)((ah) - (tmp > (al)));			\
630 		(al) = tmp;						\
631 	} while (0)
632 /* Conditional subtraction of a two limbs number from a two limbs number. */
633 #define WORD_CND_SUB_22(cnd, ah, al, bh, bl) do {			\
634 		word_t tmp, mask;					\
635 		mask = WORD_MASK_IFNOTZERO((cnd));			\
636 		tmp  = (word_t)((al) - ((bl) & mask));			\
637 		(ah) = (word_t)((ah) - (tmp > (al)));			\
638 		(al) = tmp;						\
639 		(ah) = (word_t)((ah) - ((bh) & mask));			\
640 	} while (0)
641 
642 /*
643  * divide two words by a normalized word using schoolbook division on half
644  * words. This is only used below in the reciprocal computation. No checks
645  * are performed on inputs. This is expected to be done by the caller.
646  *
647  * Returns 0 on success, -1 on error.
648  */
649 ATTRIBUTE_WARN_UNUSED_RET static int _word_divrem(word_t *q, word_t *r, word_t ah, word_t al, word_t b)
650 {
651 	word_t bh, bl, qh, ql, rm, rhl[2], phl[2];
652 	int larger, ret;
653 	u8 j;
654 
655 	MUST_HAVE((WRSHIFT((b), (WORD_BITS - 1)) == WORD(1)), ret, err);
656 	bh = WRSHIFT((b), HWORD_BITS);
657 	bl = WLSHIFT((b), HWORD_BITS);
658 	rhl[1] = ah;
659 	rhl[0] = al;
660 
661 	/*
662 	 * Compute high part of the quotient. We know from
663 	 * MUST_HAVE() check above that bh (a word_t) is not 0
664 	 */
665 
666 	KNOWN_FACT(bh != 0, ret, err);
667 	qh = (rhl[1] / bh);
668 	qh = WORD_MIN(qh, HWORD_MASK);
669 	WORD_MUL(phl[1], phl[0], qh, (b));
670 	phl[1] = (WLSHIFT(phl[1], HWORD_BITS) |
671 		  WRSHIFT(phl[0], HWORD_BITS));
672 	phl[0] = WLSHIFT(phl[0], HWORD_BITS);
673 
674 	for (j = 0; j < 2; j++) {
675 		larger = (_wcmp_22(phl, rhl) > 0);
676 		qh = (word_t)(qh - (word_t)larger);
677 		WORD_CND_SUB_22(larger, phl[1], phl[0], bh, bl);
678 	}
679 
680 	ret = (_wcmp_22(phl, rhl) > 0);
681 	MUST_HAVE(!(ret), ret, err);
682 	IGNORE_RET_VAL(_wsub_22(rhl, phl));
683 	MUST_HAVE((WRSHIFT(rhl[1], HWORD_BITS) == 0), ret, err);
684 
685 	/* Compute low part of the quotient. */
686 	rm = (WLSHIFT(rhl[1], HWORD_BITS) |
687 	      WRSHIFT(rhl[0], HWORD_BITS));
688 	ql = (rm / bh);
689 	ql = WORD_MIN(ql, HWORD_MASK);
690 	WORD_MUL(phl[1], phl[0], ql, (b));
691 
692 	for (j = 0; j < 2; j++) {
693 		larger = (_wcmp_22(phl, rhl) > 0);
694 		ql = (word_t) (ql - (word_t)larger);
695 		WORD_CND_SUB_21(larger, phl[1], phl[0], (b));
696 	}
697 
698 	ret = _wcmp_22(phl, rhl) > 0;
699 	MUST_HAVE(!(ret), ret, err);
700 	IGNORE_RET_VAL(_wsub_22(rhl, phl));
701 	/* Set outputs. */
702 	MUST_HAVE((rhl[1] == WORD(0)), ret, err);
703 	MUST_HAVE(!(rhl[0] >= (b)), ret, err);
704 	(*q) = (WLSHIFT(qh, HWORD_BITS) | ql);
705 	(*r) = rhl[0];
706 	MUST_HAVE(!((word_t) ((*q)*(b) + (*r)) != (al)), ret, err);
707 	ret = 0;
708 
709 err:
710 	return ret;
711 }
712 
713 /*
714  * Compute the reciprocal of d as
715  *	floor(B^3/(d+1)) - B
716  * which is used to perform approximate small division using a multiplication.
717  *
718  * No attempt was made to make it constant time. Indeed, such values are usually
719  * precomputed in contexts where constant time is wanted, e.g. in the fp layer.
720  *
721  * Returns 0 on success, -1 on error.
722  */
723 int wreciprocal(word_t dh, word_t dl, word_t *reciprocal)
724 {
725 	word_t q, carry, r[2], t[2];
726 	int ret;
727 
728 	MUST_HAVE((reciprocal != NULL), ret, err);
729 
730 	if (((word_t)(dh + WORD(1)) == WORD(0)) &&
731 	    ((word_t)(dl + WORD(1)) == WORD(0))) {
732 		(*reciprocal) = WORD(0);
733 		ret = 0;
734 		goto err;
735 	}
736 
737 	if ((word_t)(dh + WORD(1)) == WORD(0)) {
738 		q = (word_t)(~dh);
739 		r[1] = (word_t)(~dl);
740 	} else {
741 		t[1] = (word_t)(~dh);
742 		t[0] = (word_t)(~dl);
743 		ret = _word_divrem(&q, r+1, t[1], t[0],
744 				   (word_t)(dh + WORD(1))); EG(ret, err);
745 	}
746 
747 	if ((word_t)(dl + WORD(1)) == WORD(0)) {
748 		(*reciprocal) = q;
749 		ret = 0;
750 		goto err;
751 	}
752 
753 	r[0] = WORD(0);
754 
755 	WORD_MUL(t[1], t[0], q, (word_t)~dl);
756 	carry = _wadd_22(r, t);
757 
758 	t[0] = (word_t)(dl + WORD(1));
759 	t[1] = dh;
760 	while (carry || (_wcmp_22(r, t) >= 0)) {
761 		q++;
762 		carry = (word_t)(carry - _wsub_22(r, t));
763 	}
764 
765 	(*reciprocal) = q;
766 	ret = 0;
767 
768 err:
769 	return ret;
770 }
771 
772 /*
773  * Given an odd number p, compute division coefficients p_normalized,
774  * p_shift and p_reciprocal so that:
775  *	- p_shift = p_rounded_bitlen - bitsizeof(p), where
776  *          o p_rounded_bitlen = BIT_LEN_WORDS(p) (i.e. bit length of
777  *            minimum number of words required to store p) and
778  *          o p_bitlen is the real bit size of p
779  *	- p_normalized = p << p_shift
780  *	- p_reciprocal = B^3 / ((p_normalized >> (pbitlen - 2*WORDSIZE)) + 1) - B
781  *	  with B = 2^WORDSIZE
782  *
783  * These coefficients are useful for the optimized shifted variants of NN
784  * division and modular functions. Because we have two word_t outputs
785  * (p_shift and p_reciprocal), these are passed through word_t pointers.
786  * Aliasing of outputs with the input is possible since p_in is copied in
787  * local p at the beginning of the function.
788  *
789  * The function does not support aliasing.
790  *
791  * The function returns 0 on success, -1 on error.
792  */
793 int nn_compute_div_coefs(nn_t p_normalized, word_t *p_shift,
794 			  word_t *p_reciprocal, nn_src_t p_in)
795 {
796 	bitcnt_t p_rounded_bitlen, p_bitlen;
797 	nn p, tmp_nn;
798 	int ret;
799 	p.magic = tmp_nn.magic = WORD(0);
800 
801 	ret = nn_check_initialized(p_in); EG(ret, err);
802 
803 	MUST_HAVE((p_shift != NULL), ret, err);
804 	MUST_HAVE((p_reciprocal != NULL), ret, err);
805 
806 	/* Unsupported aliasing */
807 	MUST_HAVE((p_normalized != p_in), ret, err);
808 
809 	ret = nn_init(&p, 0); EG(ret, err);
810 	ret = nn_copy(&p, p_in); EG(ret, err);
811 
812 	/*
813 	 * In order for our reciprocal division routines to work, it is expected
814 	 * that the bit length (including leading zeroes) of input prime
815 	 * p is >= 2 * wlen where wlen is the number of bits of a word size.
816 	 */
817 	if (p.wlen < 2) {
818 		ret = nn_set_wlen(&p, 2); EG(ret, err);
819 	}
820 
821 	ret = nn_init(p_normalized, 0); EG(ret, err);
822 	ret = nn_init(&tmp_nn, 0); EG(ret, err);
823 
824 	/* p_rounded_bitlen = bitlen of p rounded to word size */
825 	p_rounded_bitlen = (bitcnt_t)(WORD_BITS * p.wlen);
826 
827 	/* p_shift */
828 	ret = nn_bitlen(&p, &p_bitlen); EG(ret, err);
829 	(*p_shift) = (word_t)(p_rounded_bitlen - p_bitlen);
830 
831 	/* p_normalized = p << pshift */
832 	ret = nn_lshift(p_normalized, &p, (bitcnt_t)(*p_shift)); EG(ret, err);
833 
834 	/* Sanity check to protect the p_reciprocal computation */
835 	MUST_HAVE((p_rounded_bitlen >= (2 * WORDSIZE)), ret, err);
836 
837 	/*
838 	 * p_reciprocal = B^3 / ((p_normalized >> (p_rounded_bitlen - 2 * wlen)) + 1) - B
839 	 * where B = 2^wlen where wlen = word size in bits. We use our NN
840 	 * helper to compute it.
841 	 */
842 	ret = nn_rshift(&tmp_nn, p_normalized, (bitcnt_t)(p_rounded_bitlen - (2 * WORDSIZE))); EG(ret, err);
843 	ret = wreciprocal(tmp_nn.val[1], tmp_nn.val[0], p_reciprocal);
844 
845 err:
846 	nn_uninit(&p);
847 	nn_uninit(&tmp_nn);
848 
849 	return ret;
850 }
851 
852 /*
853  * Compute quotient remainder of Euclidean division.
854  *
855  * This function is a wrapper to normalize the divisor, i.e. shift it so that
856  * the MSB of its MSW is set, and precompute the reciprocal of this MSW to be
857  * used to perform small divisions using multiplications during the long
858  * schoolbook division. It uses the helper functions/macros above.
859  *
860  * This is NOT constant time with regards to the word length of a and b,
861  * but also the actual bitlength of b as we need to normalize b at the
862  * bit level.
863  * Moreover the precomputation of the reciprocal is not constant time at all.
864  *
865  * r need not be initialized, the function does it for the the caller.
866  *
867  * This function does not support aliasing. This is an internal helper, which
868  * expects caller to check parameters.
869  *
870  * It returns 0 on sucess, -1 on error.
871  */
872 ATTRIBUTE_WARN_UNUSED_RET static int _nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
873 {
874 	nn b_large, b_normalized;
875 	bitcnt_t cnt;
876 	word_t v;
877 	nn_src_t ptr = b;
878 	int ret, iszero;
879 	b_large.magic = b_normalized.magic = WORD(0);
880 
881 	ret = nn_init(r, 0); EG(ret, err);
882 	ret = nn_init(q, 0); EG(ret, err);
883 	ret = nn_init(&b_large, 0); EG(ret, err);
884 
885 	MUST_HAVE(!nn_iszero(b, &iszero) && !iszero, ret, err);
886 
887 	if (b->wlen == 1) {
888 		ret = nn_copy(&b_large, b); EG(ret, err);
889 
890 		/* Expand our big number with zeroes */
891 		ret = nn_set_wlen(&b_large, 2); EG(ret, err);
892 
893 		/*
894 		 * This cast could seem inappropriate, but we are
895 		 * sure here that we won't touch ptr since it is only
896 		 * given as a const parameter to sub functions.
897 		 */
898 		ptr = (nn_src_t) &b_large;
899 	}
900 
901 	/* After this, we only handle >= 2 words big numbers */
902 	MUST_HAVE(!(ptr->wlen < 2), ret, err);
903 
904 	ret = nn_init(&b_normalized, (u16)((ptr->wlen) * WORD_BYTES)); EG(ret, err);
905 	ret = nn_clz(ptr, &cnt); EG(ret, err);
906 	ret = nn_lshift_fixedlen(&b_normalized, ptr, cnt); EG(ret, err);
907 	ret = wreciprocal(b_normalized.val[ptr->wlen - 1],
908 			  b_normalized.val[ptr->wlen - 2],
909 			  &v); /* Not constant time. */ EG(ret, err);
910 
911 	ret = _nn_divrem_unshifted(q, r, a, &b_normalized, v, cnt);
912 
913 err:
914 	nn_uninit(&b_normalized);
915 	nn_uninit(&b_large);
916 
917 	return ret;
918 }
919 
920 /*
921  * Returns 0 on succes, -1 on error. Internal helper. Checks on params
922  * expected from the caller.
923  */
924 ATTRIBUTE_WARN_UNUSED_RET static int __nn_divrem_notrim_alias(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
925 {
926 	nn a_cpy, b_cpy;
927 	int ret;
928 	a_cpy.magic = b_cpy.magic = WORD(0);
929 
930 	ret = nn_init(&a_cpy, 0); EG(ret, err);
931 	ret = nn_init(&b_cpy, 0); EG(ret, err);
932 	ret = nn_copy(&a_cpy, a); EG(ret, err);
933 	ret = nn_copy(&b_cpy, b); EG(ret, err);
934 	ret = _nn_divrem(q, r, &a_cpy, &b_cpy);
935 
936 err:
937 	nn_uninit(&b_cpy);
938 	nn_uninit(&a_cpy);
939 
940 	return ret;
941 }
942 
943 
944 
945 /*
946  * Compute quotient and remainder and normalize them.
947  * Not constant time, see documentation of _nn_divrem.
948  *
949  * Aliased version of _nn_divrem. Returns 0 on success,
950  * -1 on error.
951  */
952 int nn_divrem_notrim(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
953 {
954 	int ret;
955 
956 	/* _nn_divrem initializes q and r */
957 	ret = nn_check_initialized(a); EG(ret, err);
958 	ret = nn_check_initialized(b); EG(ret, err);
959 	MUST_HAVE(((q != NULL) && (r != NULL)), ret, err);
960 
961 	/*
962 	 * Handle aliasing whenever any of the inputs is
963 	 * used as an output.
964 	 */
965 	if ((a == q) || (a == r) || (b == q) || (b == r)) {
966 		ret = __nn_divrem_notrim_alias(q, r, a, b);
967 	} else {
968 		ret = _nn_divrem(q, r, a, b);
969 	}
970 
971 err:
972 	return ret;
973 }
974 
975 /*
976  * Compute quotient and remainder and normalize them.
977  * Not constant time, see documentation of _nn_divrem().
978  * Returns 0 on success, -1 on error.
979  *
980  * Aliasing is supported.
981  */
982 int nn_divrem(nn_t q, nn_t r, nn_src_t a, nn_src_t b)
983 {
984 	int ret;
985 
986 	ret = nn_divrem_notrim(q, r, a, b); EG(ret, err);
987 
988 	/* Normalize (trim) the quotient and rest to avoid size overflow */
989 	ret = nn_normalize(q); EG(ret, err);
990 	ret = nn_normalize(r);
991 
992 err:
993 	return ret;
994 }
995 
996 /*
997  * Compute remainder only and do not normalize it. Not constant time, see
998  * documentation of _nn_divrem(). Returns 0 on success, -1 on error.
999  *
1000  * Aliasing is supported.
1001  */
1002 int nn_mod_notrim(nn_t r, nn_src_t a, nn_src_t b)
1003 {
1004 	int ret;
1005 	nn q;
1006 	q.magic = WORD(0);
1007 
1008 	/* nn_divrem() will init q. */
1009 	ret = nn_divrem_notrim(&q, r, a, b);
1010 
1011 	nn_uninit(&q);
1012 
1013 	return ret;
1014 }
1015 
1016 /*
1017  * Compute remainder only and normalize it. Not constant time, see
1018  * documentation of _nn_divrem(). r is initialized by the function.
1019  * Returns 0 on success, -1 on error.
1020  *
1021  * Aliasing is supported.
1022  */
1023 int nn_mod(nn_t r, nn_src_t a, nn_src_t b)
1024 {
1025 	int ret;
1026 	nn q;
1027 	q.magic = WORD(0);
1028 
1029 	/* nn_divrem will init q. */
1030 	ret = nn_divrem(&q, r, a, b);
1031 
1032 	nn_uninit(&q);
1033 
1034 	return ret;
1035 }
1036 
1037 /*
1038  * Below follow gcd and xgcd non constant time functions for the user ease.
1039  */
1040 
1041 /*
1042  * Unaliased version of xgcd, and we suppose that a >= b. Badly non-constant
1043  * time per the algorithm used. The function returns 0 on success, -1 on
1044  * error. internal helper: expect caller to check parameters.
1045  */
1046 ATTRIBUTE_WARN_UNUSED_RET static int _nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b,
1047 		    int *sign)
1048 {
1049 	nn_t c, d, q, r, u1, v1, u2, v2;
1050 	nn scratch[8];
1051 	int ret, swap, iszero;
1052 	u8 i;
1053 
1054 	for (i = 0; i < 8; i++){
1055 		scratch[i].magic = WORD(0);
1056 	}
1057 
1058 	/*
1059 	 * Maintain:
1060 	 * |u1 v1| |c| = |a|
1061 	 * |u2 v2| |d|   |b|
1062 	 * u1, v1, u2, v2 >= 0
1063 	 * c >= d
1064 	 *
1065 	 * Initially:
1066 	 * |1  0 | |a| = |a|
1067 	 * |0  1 | |b|   |b|
1068 	 *
1069 	 * At each iteration:
1070 	 * c >= d
1071 	 * c = q*d + r
1072 	 * |u1 v1| = |q*u1+v1 u1|
1073 	 * |u2 v2|   |q*u2+v2 u2|
1074 	 *
1075 	 * Finally, after i steps:
1076 	 * |u1 v1| |g| = |a|
1077 	 * |u2 v2| |0| = |b|
1078 	 *
1079 	 * Inverting the matrix:
1080 	 * |g| = (-1)^i | v2 -v1| |a|
1081 	 * |0|          |-u2  u1| |b|
1082 	 */
1083 
1084 	/*
1085 	 * Initialization.
1086 	 */
1087 	ret = nn_init(g, 0); EG(ret, err);
1088 	ret = nn_init(u, 0); EG(ret, err);
1089 	ret = nn_init(v, 0); EG(ret, err);
1090 	ret = nn_iszero(b, &iszero); EG(ret, err);
1091 	if (iszero) {
1092 		/* gcd(0, a) = a, and 1*a + 0*b = a */
1093 		ret = nn_copy(g, a); EG(ret, err);
1094 		ret = nn_one(u); EG(ret, err);
1095 		ret = nn_zero(v); EG(ret, err);
1096 		(*sign) = 1;
1097 		goto err;
1098 	}
1099 
1100 	for (i = 0; i < 8; i++){
1101 		ret = nn_init(&scratch[i], 0); EG(ret, err);
1102 	}
1103 
1104 	u1 = &(scratch[0]);
1105 	v1 = &(scratch[1]);
1106 	u2 = &(scratch[2]);
1107 	v2 = &(scratch[3]);
1108 	ret = nn_one(u1); EG(ret, err);
1109 	ret = nn_zero(v1); EG(ret, err);
1110 	ret = nn_zero(u2); EG(ret, err);
1111 	ret = nn_one(v2); EG(ret, err);
1112 	c = &(scratch[4]);
1113 	d = &(scratch[5]);
1114 	ret = nn_copy(c, a); EG(ret, err); /* Copy could be skipped. */
1115 	ret = nn_copy(d, b); EG(ret, err); /* Copy could be skipped. */
1116 	q = &(scratch[6]);
1117 	r = &(scratch[7]);
1118 	swap = 0;
1119 
1120 	/*
1121 	 * Loop.
1122 	 */
1123 	ret = nn_iszero(d, &iszero); EG(ret, err);
1124 	while (!iszero) {
1125 		ret = nn_divrem(q, r, c, d); EG(ret, err);
1126 		ret = nn_normalize(q); EG(ret, err);
1127 		ret = nn_normalize(r); EG(ret, err);
1128 		ret = nn_copy(c, r); EG(ret, err);
1129 		ret = nn_mul(r, q, u1); EG(ret, err);
1130 		ret = nn_normalize(r); EG(ret, err);
1131 		ret = nn_add(v1, v1, r); EG(ret, err);
1132 		ret = nn_mul(r, q, u2); EG(ret, err);
1133 		ret = nn_normalize(r); EG(ret, err);
1134 		ret = nn_add(v2, v2, r); EG(ret, err);
1135 		ret = nn_normalize(v1); EG(ret, err);
1136 		ret = nn_normalize(v2); EG(ret, err);
1137 		swap = 1;
1138 		ret = nn_iszero(c, &iszero); EG(ret, err);
1139 		if (iszero) {
1140 			break;
1141 		}
1142 		ret = nn_divrem(q, r, d, c); EG(ret, err);
1143 		ret = nn_normalize(q); EG(ret, err);
1144 		ret = nn_normalize(r); EG(ret, err);
1145 		ret = nn_copy(d, r); EG(ret, err);
1146 		ret = nn_mul(r, q, v1); EG(ret, err);
1147 		ret = nn_normalize(r); EG(ret, err);
1148 		ret = nn_add(u1, u1, r); EG(ret, err);
1149 		ret = nn_mul(r, q, v2); EG(ret, err);
1150 		ret = nn_normalize(r); EG(ret, err);
1151 		ret = nn_add(u2, u2, r); EG(ret, err);
1152 		ret = nn_normalize(u1); EG(ret, err);
1153 		ret = nn_normalize(u2); EG(ret, err);
1154 		swap = 0;
1155 
1156 		/* refresh loop condition */
1157 		ret = nn_iszero(d, &iszero); EG(ret, err);
1158 	}
1159 
1160 	/* Copies could be skipped. */
1161 	if (swap) {
1162 		ret = nn_copy(g, d); EG(ret, err);
1163 		ret = nn_copy(u, u2); EG(ret, err);
1164 		ret = nn_copy(v, u1); EG(ret, err);
1165 	} else {
1166 		ret = nn_copy(g, c); EG(ret, err);
1167 		ret = nn_copy(u, v2); EG(ret, err);
1168 		ret = nn_copy(v, v1); EG(ret, err);
1169 	}
1170 
1171 	/* swap = -1 means u <= 0; = 1 means v <= 0 */
1172 	(*sign) = swap ? -1 : 1;
1173 	ret = 0;
1174 
1175 err:
1176 	/*
1177 	 * We uninit scratch elements in all cases, i.e. whether or not
1178 	 * we return an error or not.
1179 	 */
1180 	for (i = 0; i < 8; i++){
1181 		nn_uninit(&scratch[i]);
1182 	}
1183 	/* Unitialize output in case of error */
1184 	if (ret){
1185 		nn_uninit(v);
1186 		nn_uninit(u);
1187 		nn_uninit(g);
1188 	}
1189 
1190 	return ret;
1191 }
1192 
1193 /*
1194  * Aliased version of xgcd, and no assumption on a and b. Not constant time at
1195  * all. returns 0 on success, -1 on error. XXX document 'sign'
1196  *
1197  * Aliasing is supported.
1198  */
1199 int nn_xgcd(nn_t g, nn_t u, nn_t v, nn_src_t a, nn_src_t b, int *sign)
1200 {
1201 	/* Handle aliasing
1202 	 * Note: in order to properly handle aliasing, we accept to lose
1203 	 * some "space" on the stack with copies.
1204 	 */
1205 	nn a_cpy, b_cpy;
1206 	nn_src_t a_, b_;
1207 	int ret, cmp, _sign;
1208 	a_cpy.magic = b_cpy.magic = WORD(0);
1209 
1210 	/* The internal _nn_xgcd function initializes g, u and v */
1211 	ret = nn_check_initialized(a); EG(ret, err);
1212 	ret = nn_check_initialized(b); EG(ret, err);
1213 	MUST_HAVE((sign != NULL), ret, err);
1214 
1215 	ret = nn_init(&b_cpy, 0); EG(ret, err);
1216 
1217 	/* Aliasing of a */
1218 	if ((g == a) || (u == a) || (v == a)){
1219 		ret = nn_copy(&a_cpy, a); EG(ret, err);
1220 		a_ = &a_cpy;
1221 	} else {
1222 		a_ = a;
1223 	}
1224 	/* Aliasing of b */
1225 	if ((g == b) || (u == b) || (v == b)) {
1226 		ret = nn_copy(&b_cpy, b); EG(ret, err);
1227 		b_ = &b_cpy;
1228 	} else {
1229 		b_ = b;
1230 	}
1231 
1232 	ret = nn_cmp(a_, b_, &cmp); EG(ret, err);
1233 	if (cmp < 0) {
1234 		/* If a < b, swap the inputs */
1235 		ret = _nn_xgcd(g, v, u, b_, a_, &_sign); EG(ret, err);
1236 		(*sign) = -(_sign);
1237 	} else {
1238 		ret = _nn_xgcd(g, u, v, a_, b_, &_sign); EG(ret, err);
1239 		(*sign) = _sign;
1240 	}
1241 
1242 err:
1243 	nn_uninit(&b_cpy);
1244 	nn_uninit(&a_cpy);
1245 
1246 	return ret;
1247 }
1248 
1249 /*
1250  * Compute g = gcd(a, b). Internally use the xgcd and drop u and v.
1251  * Not constant time at all. Returns 0 on success, -1 on error.
1252  * XXX document 'sign'.
1253  *
1254  * Aliasing is supported.
1255  */
1256 int nn_gcd(nn_t g, nn_src_t a, nn_src_t b, int *sign)
1257 {
1258 	nn u, v;
1259 	int ret;
1260 	u.magic = v.magic = WORD(0);
1261 
1262 	/* nn_xgcd will initialize g, u and v and
1263 	 * check if a and b are indeed initialized.
1264 	 */
1265 	ret = nn_xgcd(g, &u, &v, a, b, sign);
1266 
1267 	nn_uninit(&u);
1268 	nn_uninit(&v);
1269 
1270 	return ret;
1271 }
1272