xref: /titanic_41/usr/src/common/mpi/mpi.c (revision cde2885fdf538266ee2a3b08dee2d5075ce8fa2b)
1 /*
2  *  mpi.c
3  *
4  *  Arbitrary precision integer arithmetic library
5  *
6  * ***** BEGIN LICENSE BLOCK *****
7  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
8  *
9  * The contents of this file are subject to the Mozilla Public License Version
10  * 1.1 (the "License"); you may not use this file except in compliance with
11  * the License. You may obtain a copy of the License at
12  * http://www.mozilla.org/MPL/
13  *
14  * Software distributed under the License is distributed on an "AS IS" basis,
15  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
16  * for the specific language governing rights and limitations under the
17  * License.
18  *
19  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
20  *
21  * The Initial Developer of the Original Code is
22  * Michael J. Fromberger.
23  * Portions created by the Initial Developer are Copyright (C) 1998
24  * the Initial Developer. All Rights Reserved.
25  *
26  * Contributor(s):
27  *   Netscape Communications Corporation
28  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
29  *
30  * Alternatively, the contents of this file may be used under the terms of
31  * either the GNU General Public License Version 2 or later (the "GPL"), or
32  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
33  * in which case the provisions of the GPL or the LGPL are applicable instead
34  * of those above. If you wish to allow use of your version of this file only
35  * under the terms of either the GPL or the LGPL, and not to allow others to
36  * use your version of this file under the terms of the MPL, indicate your
37  * decision by deleting the provisions above and replace them with the notice
38  * and other provisions required by the GPL or the LGPL. If you do not delete
39  * the provisions above, a recipient may use your version of this file under
40  * the terms of any one of the MPL, the GPL or the LGPL.
41  *
42  * ***** END LICENSE BLOCK ***** */
43 /*
44  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
45  * Use is subject to license terms.
46  *
47  * Sun elects to use this software under the MPL license.
48  */
49 
50 #pragma ident	"%Z%%M%	%I%	%E% SMI"
51 
52 /* $Id: mpi.c,v 1.45 2006/09/29 20:12:21 alexei.volkov.bugs%sun.com Exp $ */
53 
54 #include "mpi-priv.h"
55 #if defined(OSF1)
56 #include <c_asm.h>
57 #endif
58 
59 #if MP_LOGTAB
60 /*
61   A table of the logs of 2 for various bases (the 0 and 1 entries of
62   this table are meaningless and should not be referenced).
63 
64   This table is used to compute output lengths for the mp_toradix()
65   function.  Since a number n in radix r takes up about log_r(n)
66   digits, we estimate the output size by taking the least integer
67   greater than log_r(n), where:
68 
69   log_r(n) = log_2(n) * log_r(2)
70 
71   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
72   which are the output bases supported.
73  */
74 #include "logtab.h"
75 #endif
76 
77 /* {{{ Constant strings */
78 
79 /* Constant strings returned by mp_strerror() */
80 static const char *mp_err_string[] = {
81   "unknown result code",     /* say what?            */
82   "boolean true",            /* MP_OKAY, MP_YES      */
83   "boolean false",           /* MP_NO                */
84   "out of memory",           /* MP_MEM               */
85   "argument out of range",   /* MP_RANGE             */
86   "invalid input parameter", /* MP_BADARG            */
87   "result is undefined"      /* MP_UNDEF             */
88 };
89 
90 /* Value to digit maps for radix conversion   */
91 
92 /* s_dmap_1 - standard digits and letters */
93 static const char *s_dmap_1 =
94   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
95 
96 /* }}} */
97 
98 unsigned long mp_allocs;
99 unsigned long mp_frees;
100 unsigned long mp_copies;
101 
102 /* {{{ Default precision manipulation */
103 
104 /* Default precision for newly created mp_int's      */
105 static mp_size s_mp_defprec = MP_DEFPREC;
106 
107 mp_size mp_get_prec(void)
108 {
109   return s_mp_defprec;
110 
111 } /* end mp_get_prec() */
112 
113 void         mp_set_prec(mp_size prec)
114 {
115   if(prec == 0)
116     s_mp_defprec = MP_DEFPREC;
117   else
118     s_mp_defprec = prec;
119 
120 } /* end mp_set_prec() */
121 
122 /* }}} */
123 
124 /*------------------------------------------------------------------------*/
125 /* {{{ mp_init(mp, kmflag) */
126 
127 /*
128   mp_init(mp, kmflag)
129 
130   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
131   MP_MEM if memory could not be allocated for the structure.
132  */
133 
134 mp_err mp_init(mp_int *mp, int kmflag)
135 {
136   return mp_init_size(mp, s_mp_defprec, kmflag);
137 
138 } /* end mp_init() */
139 
140 /* }}} */
141 
142 /* {{{ mp_init_size(mp, prec, kmflag) */
143 
144 /*
145   mp_init_size(mp, prec, kmflag)
146 
147   Initialize a new zero-valued mp_int with at least the given
148   precision; returns MP_OKAY if successful, or MP_MEM if memory could
149   not be allocated for the structure.
150  */
151 
152 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
153 {
154   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
155 
156   prec = MP_ROUNDUP(prec, s_mp_defprec);
157   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
158     return MP_MEM;
159 
160   SIGN(mp) = ZPOS;
161   USED(mp) = 1;
162   ALLOC(mp) = prec;
163 
164   return MP_OKAY;
165 
166 } /* end mp_init_size() */
167 
168 /* }}} */
169 
170 /* {{{ mp_init_copy(mp, from) */
171 
172 /*
173   mp_init_copy(mp, from)
174 
175   Initialize mp as an exact copy of from.  Returns MP_OKAY if
176   successful, MP_MEM if memory could not be allocated for the new
177   structure.
178  */
179 
180 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
181 {
182   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
183 
184   if(mp == from)
185     return MP_OKAY;
186 
187   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
188     return MP_MEM;
189 
190   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
191   USED(mp) = USED(from);
192   ALLOC(mp) = ALLOC(from);
193   SIGN(mp) = SIGN(from);
194   FLAG(mp) = FLAG(from);
195 
196   return MP_OKAY;
197 
198 } /* end mp_init_copy() */
199 
200 /* }}} */
201 
202 /* {{{ mp_copy(from, to) */
203 
204 /*
205   mp_copy(from, to)
206 
207   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
208   'to' has already been initialized (if not, use mp_init_copy()
209   instead). If 'from' and 'to' are identical, nothing happens.
210  */
211 
212 mp_err mp_copy(const mp_int *from, mp_int *to)
213 {
214   ARGCHK(from != NULL && to != NULL, MP_BADARG);
215 
216   if(from == to)
217     return MP_OKAY;
218 
219   ++mp_copies;
220   { /* copy */
221     mp_digit   *tmp;
222 
223     /*
224       If the allocated buffer in 'to' already has enough space to hold
225       all the used digits of 'from', we'll re-use it to avoid hitting
226       the memory allocater more than necessary; otherwise, we'd have
227       to grow anyway, so we just allocate a hunk and make the copy as
228       usual
229      */
230     if(ALLOC(to) >= USED(from)) {
231       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
232       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
233 
234     } else {
235       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
236 	return MP_MEM;
237 
238       s_mp_copy(DIGITS(from), tmp, USED(from));
239 
240       if(DIGITS(to) != NULL) {
241 #if MP_CRYPTO
242 	s_mp_setz(DIGITS(to), ALLOC(to));
243 #endif
244 	s_mp_free(DIGITS(to), ALLOC(to));
245       }
246 
247       DIGITS(to) = tmp;
248       ALLOC(to) = ALLOC(from);
249     }
250 
251     /* Copy the precision and sign from the original */
252     USED(to) = USED(from);
253     SIGN(to) = SIGN(from);
254   } /* end copy */
255 
256   return MP_OKAY;
257 
258 } /* end mp_copy() */
259 
260 /* }}} */
261 
262 /* {{{ mp_exch(mp1, mp2) */
263 
264 /*
265   mp_exch(mp1, mp2)
266 
267   Exchange mp1 and mp2 without allocating any intermediate memory
268   (well, unless you count the stack space needed for this call and the
269   locals it creates...).  This cannot fail.
270  */
271 
272 void mp_exch(mp_int *mp1, mp_int *mp2)
273 {
274 #if MP_ARGCHK == 2
275   assert(mp1 != NULL && mp2 != NULL);
276 #else
277   if(mp1 == NULL || mp2 == NULL)
278     return;
279 #endif
280 
281   s_mp_exch(mp1, mp2);
282 
283 } /* end mp_exch() */
284 
285 /* }}} */
286 
287 /* {{{ mp_clear(mp) */
288 
289 /*
290   mp_clear(mp)
291 
292   Release the storage used by an mp_int, and void its fields so that
293   if someone calls mp_clear() again for the same int later, we won't
294   get tollchocked.
295  */
296 
297 void   mp_clear(mp_int *mp)
298 {
299   if(mp == NULL)
300     return;
301 
302   if(DIGITS(mp) != NULL) {
303 #if MP_CRYPTO
304     s_mp_setz(DIGITS(mp), ALLOC(mp));
305 #endif
306     s_mp_free(DIGITS(mp), ALLOC(mp));
307     DIGITS(mp) = NULL;
308   }
309 
310   USED(mp) = 0;
311   ALLOC(mp) = 0;
312 
313 } /* end mp_clear() */
314 
315 /* }}} */
316 
317 /* {{{ mp_zero(mp) */
318 
319 /*
320   mp_zero(mp)
321 
322   Set mp to zero.  Does not change the allocated size of the structure,
323   and therefore cannot fail (except on a bad argument, which we ignore)
324  */
325 void   mp_zero(mp_int *mp)
326 {
327   if(mp == NULL)
328     return;
329 
330   s_mp_setz(DIGITS(mp), ALLOC(mp));
331   USED(mp) = 1;
332   SIGN(mp) = ZPOS;
333 
334 } /* end mp_zero() */
335 
336 /* }}} */
337 
338 /* {{{ mp_set(mp, d) */
339 
340 void   mp_set(mp_int *mp, mp_digit d)
341 {
342   if(mp == NULL)
343     return;
344 
345   mp_zero(mp);
346   DIGIT(mp, 0) = d;
347 
348 } /* end mp_set() */
349 
350 /* }}} */
351 
352 /* {{{ mp_set_int(mp, z) */
353 
354 mp_err mp_set_int(mp_int *mp, long z)
355 {
356   int            ix;
357   unsigned long  v = labs(z);
358   mp_err         res;
359 
360   ARGCHK(mp != NULL, MP_BADARG);
361 
362   mp_zero(mp);
363   if(z == 0)
364     return MP_OKAY;  /* shortcut for zero */
365 
366   if (sizeof v <= sizeof(mp_digit)) {
367     DIGIT(mp,0) = v;
368   } else {
369     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
370       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
371 	return res;
372 
373       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
374       if (res != MP_OKAY)
375 	return res;
376     }
377   }
378   if(z < 0)
379     SIGN(mp) = NEG;
380 
381   return MP_OKAY;
382 
383 } /* end mp_set_int() */
384 
385 /* }}} */
386 
387 /* {{{ mp_set_ulong(mp, z) */
388 
389 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
390 {
391   int            ix;
392   mp_err         res;
393 
394   ARGCHK(mp != NULL, MP_BADARG);
395 
396   mp_zero(mp);
397   if(z == 0)
398     return MP_OKAY;  /* shortcut for zero */
399 
400   if (sizeof z <= sizeof(mp_digit)) {
401     DIGIT(mp,0) = z;
402   } else {
403     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
404       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
405 	return res;
406 
407       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
408       if (res != MP_OKAY)
409 	return res;
410     }
411   }
412   return MP_OKAY;
413 } /* end mp_set_ulong() */
414 
415 /* }}} */
416 
417 /*------------------------------------------------------------------------*/
418 /* {{{ Digit arithmetic */
419 
420 /* {{{ mp_add_d(a, d, b) */
421 
422 /*
423   mp_add_d(a, d, b)
424 
425   Compute the sum b = a + d, for a single digit d.  Respects the sign of
426   its primary addend (single digits are unsigned anyway).
427  */
428 
429 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
430 {
431   mp_int   tmp;
432   mp_err   res;
433 
434   ARGCHK(a != NULL && b != NULL, MP_BADARG);
435 
436   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
437     return res;
438 
439   if(SIGN(&tmp) == ZPOS) {
440     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
441       goto CLEANUP;
442   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
443     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
444       goto CLEANUP;
445   } else {
446     mp_neg(&tmp, &tmp);
447 
448     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
449   }
450 
451   if(s_mp_cmp_d(&tmp, 0) == 0)
452     SIGN(&tmp) = ZPOS;
453 
454   s_mp_exch(&tmp, b);
455 
456 CLEANUP:
457   mp_clear(&tmp);
458   return res;
459 
460 } /* end mp_add_d() */
461 
462 /* }}} */
463 
464 /* {{{ mp_sub_d(a, d, b) */
465 
466 /*
467   mp_sub_d(a, d, b)
468 
469   Compute the difference b = a - d, for a single digit d.  Respects the
470   sign of its subtrahend (single digits are unsigned anyway).
471  */
472 
473 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
474 {
475   mp_int   tmp;
476   mp_err   res;
477 
478   ARGCHK(a != NULL && b != NULL, MP_BADARG);
479 
480   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
481     return res;
482 
483   if(SIGN(&tmp) == NEG) {
484     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
485       goto CLEANUP;
486   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
487     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
488       goto CLEANUP;
489   } else {
490     mp_neg(&tmp, &tmp);
491 
492     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
493     SIGN(&tmp) = NEG;
494   }
495 
496   if(s_mp_cmp_d(&tmp, 0) == 0)
497     SIGN(&tmp) = ZPOS;
498 
499   s_mp_exch(&tmp, b);
500 
501 CLEANUP:
502   mp_clear(&tmp);
503   return res;
504 
505 } /* end mp_sub_d() */
506 
507 /* }}} */
508 
509 /* {{{ mp_mul_d(a, d, b) */
510 
511 /*
512   mp_mul_d(a, d, b)
513 
514   Compute the product b = a * d, for a single digit d.  Respects the sign
515   of its multiplicand (single digits are unsigned anyway)
516  */
517 
518 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
519 {
520   mp_err  res;
521 
522   ARGCHK(a != NULL && b != NULL, MP_BADARG);
523 
524   if(d == 0) {
525     mp_zero(b);
526     return MP_OKAY;
527   }
528 
529   if((res = mp_copy(a, b)) != MP_OKAY)
530     return res;
531 
532   res = s_mp_mul_d(b, d);
533 
534   return res;
535 
536 } /* end mp_mul_d() */
537 
538 /* }}} */
539 
540 /* {{{ mp_mul_2(a, c) */
541 
542 mp_err mp_mul_2(const mp_int *a, mp_int *c)
543 {
544   mp_err  res;
545 
546   ARGCHK(a != NULL && c != NULL, MP_BADARG);
547 
548   if((res = mp_copy(a, c)) != MP_OKAY)
549     return res;
550 
551   return s_mp_mul_2(c);
552 
553 } /* end mp_mul_2() */
554 
555 /* }}} */
556 
557 /* {{{ mp_div_d(a, d, q, r) */
558 
559 /*
560   mp_div_d(a, d, q, r)
561 
562   Compute the quotient q = a / d and remainder r = a mod d, for a
563   single digit d.  Respects the sign of its divisor (single digits are
564   unsigned anyway).
565  */
566 
567 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
568 {
569   mp_err   res;
570   mp_int   qp;
571   mp_digit rem;
572   int      pow;
573 
574   ARGCHK(a != NULL, MP_BADARG);
575 
576   if(d == 0)
577     return MP_RANGE;
578 
579   /* Shortcut for powers of two ... */
580   if((pow = s_mp_ispow2d(d)) >= 0) {
581     mp_digit  mask;
582 
583     mask = ((mp_digit)1 << pow) - 1;
584     rem = DIGIT(a, 0) & mask;
585 
586     if(q) {
587       mp_copy(a, q);
588       s_mp_div_2d(q, pow);
589     }
590 
591     if(r)
592       *r = rem;
593 
594     return MP_OKAY;
595   }
596 
597   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
598     return res;
599 
600   res = s_mp_div_d(&qp, d, &rem);
601 
602   if(s_mp_cmp_d(&qp, 0) == 0)
603     SIGN(q) = ZPOS;
604 
605   if(r)
606     *r = rem;
607 
608   if(q)
609     s_mp_exch(&qp, q);
610 
611   mp_clear(&qp);
612   return res;
613 
614 } /* end mp_div_d() */
615 
616 /* }}} */
617 
618 /* {{{ mp_div_2(a, c) */
619 
620 /*
621   mp_div_2(a, c)
622 
623   Compute c = a / 2, disregarding the remainder.
624  */
625 
626 mp_err mp_div_2(const mp_int *a, mp_int *c)
627 {
628   mp_err  res;
629 
630   ARGCHK(a != NULL && c != NULL, MP_BADARG);
631 
632   if((res = mp_copy(a, c)) != MP_OKAY)
633     return res;
634 
635   s_mp_div_2(c);
636 
637   return MP_OKAY;
638 
639 } /* end mp_div_2() */
640 
641 /* }}} */
642 
643 /* {{{ mp_expt_d(a, d, b) */
644 
645 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
646 {
647   mp_int   s, x;
648   mp_err   res;
649 
650   ARGCHK(a != NULL && c != NULL, MP_BADARG);
651 
652   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
653     return res;
654   if((res = mp_init_copy(&x, a)) != MP_OKAY)
655     goto X;
656 
657   DIGIT(&s, 0) = 1;
658 
659   while(d != 0) {
660     if(d & 1) {
661       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
662 	goto CLEANUP;
663     }
664 
665     d /= 2;
666 
667     if((res = s_mp_sqr(&x)) != MP_OKAY)
668       goto CLEANUP;
669   }
670 
671   s_mp_exch(&s, c);
672 
673 CLEANUP:
674   mp_clear(&x);
675 X:
676   mp_clear(&s);
677 
678   return res;
679 
680 } /* end mp_expt_d() */
681 
682 /* }}} */
683 
684 /* }}} */
685 
686 /*------------------------------------------------------------------------*/
687 /* {{{ Full arithmetic */
688 
689 /* {{{ mp_abs(a, b) */
690 
691 /*
692   mp_abs(a, b)
693 
694   Compute b = |a|.  'a' and 'b' may be identical.
695  */
696 
697 mp_err mp_abs(const mp_int *a, mp_int *b)
698 {
699   mp_err   res;
700 
701   ARGCHK(a != NULL && b != NULL, MP_BADARG);
702 
703   if((res = mp_copy(a, b)) != MP_OKAY)
704     return res;
705 
706   SIGN(b) = ZPOS;
707 
708   return MP_OKAY;
709 
710 } /* end mp_abs() */
711 
712 /* }}} */
713 
714 /* {{{ mp_neg(a, b) */
715 
716 /*
717   mp_neg(a, b)
718 
719   Compute b = -a.  'a' and 'b' may be identical.
720  */
721 
722 mp_err mp_neg(const mp_int *a, mp_int *b)
723 {
724   mp_err   res;
725 
726   ARGCHK(a != NULL && b != NULL, MP_BADARG);
727 
728   if((res = mp_copy(a, b)) != MP_OKAY)
729     return res;
730 
731   if(s_mp_cmp_d(b, 0) == MP_EQ)
732     SIGN(b) = ZPOS;
733   else
734     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
735 
736   return MP_OKAY;
737 
738 } /* end mp_neg() */
739 
740 /* }}} */
741 
742 /* {{{ mp_add(a, b, c) */
743 
744 /*
745   mp_add(a, b, c)
746 
747   Compute c = a + b.  All parameters may be identical.
748  */
749 
750 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
751 {
752   mp_err  res;
753 
754   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
755 
756   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
757     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
758   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
759     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
760   } else {                          /* different sign: |a|  < |b|   */
761     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
762   }
763 
764   if (s_mp_cmp_d(c, 0) == MP_EQ)
765     SIGN(c) = ZPOS;
766 
767 CLEANUP:
768   return res;
769 
770 } /* end mp_add() */
771 
772 /* }}} */
773 
774 /* {{{ mp_sub(a, b, c) */
775 
776 /*
777   mp_sub(a, b, c)
778 
779   Compute c = a - b.  All parameters may be identical.
780  */
781 
782 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
783 {
784   mp_err  res;
785   int     magDiff;
786 
787   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
788 
789   if (a == b) {
790     mp_zero(c);
791     return MP_OKAY;
792   }
793 
794   if (MP_SIGN(a) != MP_SIGN(b)) {
795     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
796   } else if (!(magDiff = s_mp_cmp(a, b))) {
797     mp_zero(c);
798     res = MP_OKAY;
799   } else if (magDiff > 0) {
800     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
801   } else {
802     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
803     MP_SIGN(c) = !MP_SIGN(a);
804   }
805 
806   if (s_mp_cmp_d(c, 0) == MP_EQ)
807     MP_SIGN(c) = MP_ZPOS;
808 
809 CLEANUP:
810   return res;
811 
812 } /* end mp_sub() */
813 
814 /* }}} */
815 
816 /* {{{ mp_mul(a, b, c) */
817 
818 /*
819   mp_mul(a, b, c)
820 
821   Compute c = a * b.  All parameters may be identical.
822  */
823 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
824 {
825   mp_digit *pb;
826   mp_int   tmp;
827   mp_err   res;
828   mp_size  ib;
829   mp_size  useda, usedb;
830 
831   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
832 
833   if (a == c) {
834     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
835       return res;
836     if (a == b)
837       b = &tmp;
838     a = &tmp;
839   } else if (b == c) {
840     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
841       return res;
842     b = &tmp;
843   } else {
844     MP_DIGITS(&tmp) = 0;
845   }
846 
847   if (MP_USED(a) < MP_USED(b)) {
848     const mp_int *xch = b;	/* switch a and b, to do fewer outer loops */
849     b = a;
850     a = xch;
851   }
852 
853   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
854   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
855     goto CLEANUP;
856 
857 #ifdef NSS_USE_COMBA
858   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
859       if (MP_USED(a) == 4) {
860           s_mp_mul_comba_4(a, b, c);
861           goto CLEANUP;
862       }
863       if (MP_USED(a) == 8) {
864           s_mp_mul_comba_8(a, b, c);
865           goto CLEANUP;
866       }
867       if (MP_USED(a) == 16) {
868           s_mp_mul_comba_16(a, b, c);
869           goto CLEANUP;
870       }
871       if (MP_USED(a) == 32) {
872           s_mp_mul_comba_32(a, b, c);
873           goto CLEANUP;
874       }
875   }
876 #endif
877 
878   pb = MP_DIGITS(b);
879   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
880 
881   /* Outer loop:  Digits of b */
882   useda = MP_USED(a);
883   usedb = MP_USED(b);
884   for (ib = 1; ib < usedb; ib++) {
885     mp_digit b_i    = *pb++;
886 
887     /* Inner product:  Digits of a */
888     if (b_i)
889       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
890     else
891       MP_DIGIT(c, ib + useda) = b_i;
892   }
893 
894   s_mp_clamp(c);
895 
896   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
897     SIGN(c) = ZPOS;
898   else
899     SIGN(c) = NEG;
900 
901 CLEANUP:
902   mp_clear(&tmp);
903   return res;
904 } /* end mp_mul() */
905 
906 /* }}} */
907 
908 /* {{{ mp_sqr(a, sqr) */
909 
910 #if MP_SQUARE
911 /*
912   Computes the square of a.  This can be done more
913   efficiently than a general multiplication, because many of the
914   computation steps are redundant when squaring.  The inner product
915   step is a bit more complicated, but we save a fair number of
916   iterations of the multiplication loop.
917  */
918 
919 /* sqr = a^2;   Caller provides both a and tmp; */
920 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
921 {
922   mp_digit *pa;
923   mp_digit d;
924   mp_err   res;
925   mp_size  ix;
926   mp_int   tmp;
927   int      count;
928 
929   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
930 
931   if (a == sqr) {
932     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
933       return res;
934     a = &tmp;
935   } else {
936     DIGITS(&tmp) = 0;
937     res = MP_OKAY;
938   }
939 
940   ix = 2 * MP_USED(a);
941   if (ix > MP_ALLOC(sqr)) {
942     MP_USED(sqr) = 1;
943     MP_CHECKOK( s_mp_grow(sqr, ix) );
944   }
945   MP_USED(sqr) = ix;
946   MP_DIGIT(sqr, 0) = 0;
947 
948 #ifdef NSS_USE_COMBA
949   if (IS_POWER_OF_2(MP_USED(a))) {
950       if (MP_USED(a) == 4) {
951           s_mp_sqr_comba_4(a, sqr);
952           goto CLEANUP;
953       }
954       if (MP_USED(a) == 8) {
955           s_mp_sqr_comba_8(a, sqr);
956           goto CLEANUP;
957       }
958       if (MP_USED(a) == 16) {
959           s_mp_sqr_comba_16(a, sqr);
960           goto CLEANUP;
961       }
962       if (MP_USED(a) == 32) {
963           s_mp_sqr_comba_32(a, sqr);
964           goto CLEANUP;
965       }
966   }
967 #endif
968 
969   pa = MP_DIGITS(a);
970   count = MP_USED(a) - 1;
971   if (count > 0) {
972     d = *pa++;
973     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
974     for (ix = 3; --count > 0; ix += 2) {
975       d = *pa++;
976       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
977     } /* for(ix ...) */
978     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
979 
980     /* now sqr *= 2 */
981     s_mp_mul_2(sqr);
982   } else {
983     MP_DIGIT(sqr, 1) = 0;
984   }
985 
986   /* now add the squares of the digits of a to sqr. */
987   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
988 
989   SIGN(sqr) = ZPOS;
990   s_mp_clamp(sqr);
991 
992 CLEANUP:
993   mp_clear(&tmp);
994   return res;
995 
996 } /* end mp_sqr() */
997 #endif
998 
999 /* }}} */
1000 
1001 /* {{{ mp_div(a, b, q, r) */
1002 
1003 /*
1004   mp_div(a, b, q, r)
1005 
1006   Compute q = a / b and r = a mod b.  Input parameters may be re-used
1007   as output parameters.  If q or r is NULL, that portion of the
1008   computation will be discarded (although it will still be computed)
1009  */
1010 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1011 {
1012   mp_err   res;
1013   mp_int   *pQ, *pR;
1014   mp_int   qtmp, rtmp, btmp;
1015   int      cmp;
1016   mp_sign  signA;
1017   mp_sign  signB;
1018 
1019   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1020 
1021   signA = MP_SIGN(a);
1022   signB = MP_SIGN(b);
1023 
1024   if(mp_cmp_z(b) == MP_EQ)
1025     return MP_RANGE;
1026 
1027   DIGITS(&qtmp) = 0;
1028   DIGITS(&rtmp) = 0;
1029   DIGITS(&btmp) = 0;
1030 
1031   /* Set up some temporaries... */
1032   if (!r || r == a || r == b) {
1033     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1034     pR = &rtmp;
1035   } else {
1036     MP_CHECKOK( mp_copy(a, r) );
1037     pR = r;
1038   }
1039 
1040   if (!q || q == a || q == b) {
1041     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1042     pQ = &qtmp;
1043   } else {
1044     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1045     pQ = q;
1046     mp_zero(pQ);
1047   }
1048 
1049   /*
1050     If |a| <= |b|, we can compute the solution without division;
1051     otherwise, we actually do the work required.
1052    */
1053   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1054     if (cmp) {
1055       /* r was set to a above. */
1056       mp_zero(pQ);
1057     } else {
1058       mp_set(pQ, 1);
1059       mp_zero(pR);
1060     }
1061   } else {
1062     MP_CHECKOK( mp_init_copy(&btmp, b) );
1063     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1064   }
1065 
1066   /* Compute the signs for the output  */
1067   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1068   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1069   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1070 
1071   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1072     SIGN(pQ) = ZPOS;
1073   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1074     SIGN(pR) = ZPOS;
1075 
1076   /* Copy output, if it is needed      */
1077   if(q && q != pQ)
1078     s_mp_exch(pQ, q);
1079 
1080   if(r && r != pR)
1081     s_mp_exch(pR, r);
1082 
1083 CLEANUP:
1084   mp_clear(&btmp);
1085   mp_clear(&rtmp);
1086   mp_clear(&qtmp);
1087 
1088   return res;
1089 
1090 } /* end mp_div() */
1091 
1092 /* }}} */
1093 
1094 /* {{{ mp_div_2d(a, d, q, r) */
1095 
1096 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1097 {
1098   mp_err  res;
1099 
1100   ARGCHK(a != NULL, MP_BADARG);
1101 
1102   if(q) {
1103     if((res = mp_copy(a, q)) != MP_OKAY)
1104       return res;
1105   }
1106   if(r) {
1107     if((res = mp_copy(a, r)) != MP_OKAY)
1108       return res;
1109   }
1110   if(q) {
1111     s_mp_div_2d(q, d);
1112   }
1113   if(r) {
1114     s_mp_mod_2d(r, d);
1115   }
1116 
1117   return MP_OKAY;
1118 
1119 } /* end mp_div_2d() */
1120 
1121 /* }}} */
1122 
1123 /* {{{ mp_expt(a, b, c) */
1124 
1125 /*
1126   mp_expt(a, b, c)
1127 
1128   Compute c = a ** b, that is, raise a to the b power.  Uses a
1129   standard iterative square-and-multiply technique.
1130  */
1131 
1132 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1133 {
1134   mp_int   s, x;
1135   mp_err   res;
1136   mp_digit d;
1137   int      dig, bit;
1138 
1139   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1140 
1141   if(mp_cmp_z(b) < 0)
1142     return MP_RANGE;
1143 
1144   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1145     return res;
1146 
1147   mp_set(&s, 1);
1148 
1149   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1150     goto X;
1151 
1152   /* Loop over low-order digits in ascending order */
1153   for(dig = 0; dig < (USED(b) - 1); dig++) {
1154     d = DIGIT(b, dig);
1155 
1156     /* Loop over bits of each non-maximal digit */
1157     for(bit = 0; bit < DIGIT_BIT; bit++) {
1158       if(d & 1) {
1159 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1160 	  goto CLEANUP;
1161       }
1162 
1163       d >>= 1;
1164 
1165       if((res = s_mp_sqr(&x)) != MP_OKAY)
1166 	goto CLEANUP;
1167     }
1168   }
1169 
1170   /* Consider now the last digit... */
1171   d = DIGIT(b, dig);
1172 
1173   while(d) {
1174     if(d & 1) {
1175       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1176 	goto CLEANUP;
1177     }
1178 
1179     d >>= 1;
1180 
1181     if((res = s_mp_sqr(&x)) != MP_OKAY)
1182       goto CLEANUP;
1183   }
1184 
1185   if(mp_iseven(b))
1186     SIGN(&s) = SIGN(a);
1187 
1188   res = mp_copy(&s, c);
1189 
1190 CLEANUP:
1191   mp_clear(&x);
1192 X:
1193   mp_clear(&s);
1194 
1195   return res;
1196 
1197 } /* end mp_expt() */
1198 
1199 /* }}} */
1200 
1201 /* {{{ mp_2expt(a, k) */
1202 
1203 /* Compute a = 2^k */
1204 
1205 mp_err mp_2expt(mp_int *a, mp_digit k)
1206 {
1207   ARGCHK(a != NULL, MP_BADARG);
1208 
1209   return s_mp_2expt(a, k);
1210 
1211 } /* end mp_2expt() */
1212 
1213 /* }}} */
1214 
1215 /* {{{ mp_mod(a, m, c) */
1216 
1217 /*
1218   mp_mod(a, m, c)
1219 
1220   Compute c = a (mod m).  Result will always be 0 <= c < m.
1221  */
1222 
1223 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1224 {
1225   mp_err  res;
1226   int     mag;
1227 
1228   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1229 
1230   if(SIGN(m) == NEG)
1231     return MP_RANGE;
1232 
1233   /*
1234      If |a| > m, we need to divide to get the remainder and take the
1235      absolute value.
1236 
1237      If |a| < m, we don't need to do any division, just copy and adjust
1238      the sign (if a is negative).
1239 
1240      If |a| == m, we can simply set the result to zero.
1241 
1242      This order is intended to minimize the average path length of the
1243      comparison chain on common workloads -- the most frequent cases are
1244      that |a| != m, so we do those first.
1245    */
1246   if((mag = s_mp_cmp(a, m)) > 0) {
1247     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1248       return res;
1249 
1250     if(SIGN(c) == NEG) {
1251       if((res = mp_add(c, m, c)) != MP_OKAY)
1252 	return res;
1253     }
1254 
1255   } else if(mag < 0) {
1256     if((res = mp_copy(a, c)) != MP_OKAY)
1257       return res;
1258 
1259     if(mp_cmp_z(a) < 0) {
1260       if((res = mp_add(c, m, c)) != MP_OKAY)
1261 	return res;
1262 
1263     }
1264 
1265   } else {
1266     mp_zero(c);
1267 
1268   }
1269 
1270   return MP_OKAY;
1271 
1272 } /* end mp_mod() */
1273 
1274 /* }}} */
1275 
1276 /* {{{ mp_mod_d(a, d, c) */
1277 
1278 /*
1279   mp_mod_d(a, d, c)
1280 
1281   Compute c = a (mod d).  Result will always be 0 <= c < d
1282  */
1283 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1284 {
1285   mp_err   res;
1286   mp_digit rem;
1287 
1288   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1289 
1290   if(s_mp_cmp_d(a, d) > 0) {
1291     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1292       return res;
1293 
1294   } else {
1295     if(SIGN(a) == NEG)
1296       rem = d - DIGIT(a, 0);
1297     else
1298       rem = DIGIT(a, 0);
1299   }
1300 
1301   if(c)
1302     *c = rem;
1303 
1304   return MP_OKAY;
1305 
1306 } /* end mp_mod_d() */
1307 
1308 /* }}} */
1309 
1310 /* {{{ mp_sqrt(a, b) */
1311 
1312 /*
1313   mp_sqrt(a, b)
1314 
1315   Compute the integer square root of a, and store the result in b.
1316   Uses an integer-arithmetic version of Newton's iterative linear
1317   approximation technique to determine this value; the result has the
1318   following two properties:
1319 
1320      b^2 <= a
1321      (b+1)^2 >= a
1322 
1323   It is a range error to pass a negative value.
1324  */
1325 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1326 {
1327   mp_int   x, t;
1328   mp_err   res;
1329   mp_size  used;
1330 
1331   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1332 
1333   /* Cannot take square root of a negative value */
1334   if(SIGN(a) == NEG)
1335     return MP_RANGE;
1336 
1337   /* Special cases for zero and one, trivial     */
1338   if(mp_cmp_d(a, 1) <= 0)
1339     return mp_copy(a, b);
1340 
1341   /* Initialize the temporaries we'll use below  */
1342   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1343     return res;
1344 
1345   /* Compute an initial guess for the iteration as a itself */
1346   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1347     goto X;
1348 
1349   used = MP_USED(&x);
1350   if (used > 1) {
1351     s_mp_rshd(&x, used / 2);
1352   }
1353 
1354   for(;;) {
1355     /* t = (x * x) - a */
1356     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1357     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1358        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1359       goto CLEANUP;
1360 
1361     /* t = t / 2x       */
1362     s_mp_mul_2(&x);
1363     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1364       goto CLEANUP;
1365     s_mp_div_2(&x);
1366 
1367     /* Terminate the loop, if the quotient is zero */
1368     if(mp_cmp_z(&t) == MP_EQ)
1369       break;
1370 
1371     /* x = x - t       */
1372     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1373       goto CLEANUP;
1374 
1375   }
1376 
1377   /* Copy result to output parameter */
1378   mp_sub_d(&x, 1, &x);
1379   s_mp_exch(&x, b);
1380 
1381  CLEANUP:
1382   mp_clear(&x);
1383  X:
1384   mp_clear(&t);
1385 
1386   return res;
1387 
1388 } /* end mp_sqrt() */
1389 
1390 /* }}} */
1391 
1392 /* }}} */
1393 
1394 /*------------------------------------------------------------------------*/
1395 /* {{{ Modular arithmetic */
1396 
1397 #if MP_MODARITH
1398 /* {{{ mp_addmod(a, b, m, c) */
1399 
1400 /*
1401   mp_addmod(a, b, m, c)
1402 
1403   Compute c = (a + b) mod m
1404  */
1405 
1406 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1407 {
1408   mp_err  res;
1409 
1410   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1411 
1412   if((res = mp_add(a, b, c)) != MP_OKAY)
1413     return res;
1414   if((res = mp_mod(c, m, c)) != MP_OKAY)
1415     return res;
1416 
1417   return MP_OKAY;
1418 
1419 }
1420 
1421 /* }}} */
1422 
1423 /* {{{ mp_submod(a, b, m, c) */
1424 
1425 /*
1426   mp_submod(a, b, m, c)
1427 
1428   Compute c = (a - b) mod m
1429  */
1430 
1431 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1432 {
1433   mp_err  res;
1434 
1435   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1436 
1437   if((res = mp_sub(a, b, c)) != MP_OKAY)
1438     return res;
1439   if((res = mp_mod(c, m, c)) != MP_OKAY)
1440     return res;
1441 
1442   return MP_OKAY;
1443 
1444 }
1445 
1446 /* }}} */
1447 
1448 /* {{{ mp_mulmod(a, b, m, c) */
1449 
1450 /*
1451   mp_mulmod(a, b, m, c)
1452 
1453   Compute c = (a * b) mod m
1454  */
1455 
1456 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1457 {
1458   mp_err  res;
1459 
1460   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1461 
1462   if((res = mp_mul(a, b, c)) != MP_OKAY)
1463     return res;
1464   if((res = mp_mod(c, m, c)) != MP_OKAY)
1465     return res;
1466 
1467   return MP_OKAY;
1468 
1469 }
1470 
1471 /* }}} */
1472 
1473 /* {{{ mp_sqrmod(a, m, c) */
1474 
1475 #if MP_SQUARE
1476 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1477 {
1478   mp_err  res;
1479 
1480   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1481 
1482   if((res = mp_sqr(a, c)) != MP_OKAY)
1483     return res;
1484   if((res = mp_mod(c, m, c)) != MP_OKAY)
1485     return res;
1486 
1487   return MP_OKAY;
1488 
1489 } /* end mp_sqrmod() */
1490 #endif
1491 
1492 /* }}} */
1493 
1494 /* {{{ s_mp_exptmod(a, b, m, c) */
1495 
1496 /*
1497   s_mp_exptmod(a, b, m, c)
1498 
1499   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1500   method with modular reductions at each step. (This is basically the
1501   same code as mp_expt(), except for the addition of the reductions)
1502 
1503   The modular reductions are done using Barrett's algorithm (see
1504   s_mp_reduce() below for details)
1505  */
1506 
1507 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1508 {
1509   mp_int   s, x, mu;
1510   mp_err   res;
1511   mp_digit d;
1512   int      dig, bit;
1513 
1514   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1515 
1516   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1517     return MP_RANGE;
1518 
1519   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1520     return res;
1521   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1522      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1523     goto X;
1524   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1525     goto MU;
1526 
1527   mp_set(&s, 1);
1528 
1529   /* mu = b^2k / m */
1530   s_mp_add_d(&mu, 1);
1531   s_mp_lshd(&mu, 2 * USED(m));
1532   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1533     goto CLEANUP;
1534 
1535   /* Loop over digits of b in ascending order, except highest order */
1536   for(dig = 0; dig < (USED(b) - 1); dig++) {
1537     d = DIGIT(b, dig);
1538 
1539     /* Loop over the bits of the lower-order digits */
1540     for(bit = 0; bit < DIGIT_BIT; bit++) {
1541       if(d & 1) {
1542 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1543 	  goto CLEANUP;
1544 	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1545 	  goto CLEANUP;
1546       }
1547 
1548       d >>= 1;
1549 
1550       if((res = s_mp_sqr(&x)) != MP_OKAY)
1551 	goto CLEANUP;
1552       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1553 	goto CLEANUP;
1554     }
1555   }
1556 
1557   /* Now do the last digit... */
1558   d = DIGIT(b, dig);
1559 
1560   while(d) {
1561     if(d & 1) {
1562       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1563 	goto CLEANUP;
1564       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1565 	goto CLEANUP;
1566     }
1567 
1568     d >>= 1;
1569 
1570     if((res = s_mp_sqr(&x)) != MP_OKAY)
1571       goto CLEANUP;
1572     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1573       goto CLEANUP;
1574   }
1575 
1576   s_mp_exch(&s, c);
1577 
1578  CLEANUP:
1579   mp_clear(&mu);
1580  MU:
1581   mp_clear(&x);
1582  X:
1583   mp_clear(&s);
1584 
1585   return res;
1586 
1587 } /* end s_mp_exptmod() */
1588 
1589 /* }}} */
1590 
1591 /* {{{ mp_exptmod_d(a, d, m, c) */
1592 
1593 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1594 {
1595   mp_int   s, x;
1596   mp_err   res;
1597 
1598   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1599 
1600   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1601     return res;
1602   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1603     goto X;
1604 
1605   mp_set(&s, 1);
1606 
1607   while(d != 0) {
1608     if(d & 1) {
1609       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1610 	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
1611 	goto CLEANUP;
1612     }
1613 
1614     d /= 2;
1615 
1616     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1617        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1618       goto CLEANUP;
1619   }
1620 
1621   s_mp_exch(&s, c);
1622 
1623 CLEANUP:
1624   mp_clear(&x);
1625 X:
1626   mp_clear(&s);
1627 
1628   return res;
1629 
1630 } /* end mp_exptmod_d() */
1631 
1632 /* }}} */
1633 #endif /* if MP_MODARITH */
1634 
1635 /* }}} */
1636 
1637 /*------------------------------------------------------------------------*/
1638 /* {{{ Comparison functions */
1639 
1640 /* {{{ mp_cmp_z(a) */
1641 
1642 /*
1643   mp_cmp_z(a)
1644 
1645   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1646  */
1647 
1648 int    mp_cmp_z(const mp_int *a)
1649 {
1650   if(SIGN(a) == NEG)
1651     return MP_LT;
1652   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1653     return MP_EQ;
1654   else
1655     return MP_GT;
1656 
1657 } /* end mp_cmp_z() */
1658 
1659 /* }}} */
1660 
1661 /* {{{ mp_cmp_d(a, d) */
1662 
1663 /*
1664   mp_cmp_d(a, d)
1665 
1666   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1667  */
1668 
1669 int    mp_cmp_d(const mp_int *a, mp_digit d)
1670 {
1671   ARGCHK(a != NULL, MP_EQ);
1672 
1673   if(SIGN(a) == NEG)
1674     return MP_LT;
1675 
1676   return s_mp_cmp_d(a, d);
1677 
1678 } /* end mp_cmp_d() */
1679 
1680 /* }}} */
1681 
1682 /* {{{ mp_cmp(a, b) */
1683 
1684 int    mp_cmp(const mp_int *a, const mp_int *b)
1685 {
1686   ARGCHK(a != NULL && b != NULL, MP_EQ);
1687 
1688   if(SIGN(a) == SIGN(b)) {
1689     int  mag;
1690 
1691     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1692       return MP_EQ;
1693 
1694     if(SIGN(a) == ZPOS)
1695       return mag;
1696     else
1697       return -mag;
1698 
1699   } else if(SIGN(a) == ZPOS) {
1700     return MP_GT;
1701   } else {
1702     return MP_LT;
1703   }
1704 
1705 } /* end mp_cmp() */
1706 
1707 /* }}} */
1708 
1709 /* {{{ mp_cmp_mag(a, b) */
1710 
1711 /*
1712   mp_cmp_mag(a, b)
1713 
1714   Compares |a| <=> |b|, and returns an appropriate comparison result
1715  */
1716 
1717 int    mp_cmp_mag(mp_int *a, mp_int *b)
1718 {
1719   ARGCHK(a != NULL && b != NULL, MP_EQ);
1720 
1721   return s_mp_cmp(a, b);
1722 
1723 } /* end mp_cmp_mag() */
1724 
1725 /* }}} */
1726 
1727 /* {{{ mp_cmp_int(a, z, kmflag) */
1728 
1729 /*
1730   This just converts z to an mp_int, and uses the existing comparison
1731   routines.  This is sort of inefficient, but it's not clear to me how
1732   frequently this wil get used anyway.  For small positive constants,
1733   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1734  */
1735 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1736 {
1737   mp_int  tmp;
1738   int     out;
1739 
1740   ARGCHK(a != NULL, MP_EQ);
1741 
1742   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1743   out = mp_cmp(a, &tmp);
1744   mp_clear(&tmp);
1745 
1746   return out;
1747 
1748 } /* end mp_cmp_int() */
1749 
1750 /* }}} */
1751 
1752 /* {{{ mp_isodd(a) */
1753 
1754 /*
1755   mp_isodd(a)
1756 
1757   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1758  */
1759 int    mp_isodd(const mp_int *a)
1760 {
1761   ARGCHK(a != NULL, 0);
1762 
1763   return (int)(DIGIT(a, 0) & 1);
1764 
1765 } /* end mp_isodd() */
1766 
1767 /* }}} */
1768 
1769 /* {{{ mp_iseven(a) */
1770 
1771 int    mp_iseven(const mp_int *a)
1772 {
1773   return !mp_isodd(a);
1774 
1775 } /* end mp_iseven() */
1776 
1777 /* }}} */
1778 
1779 /* }}} */
1780 
1781 /*------------------------------------------------------------------------*/
1782 /* {{{ Number theoretic functions */
1783 
1784 #if MP_NUMTH
1785 /* {{{ mp_gcd(a, b, c) */
1786 
1787 /*
1788   Like the old mp_gcd() function, except computes the GCD using the
1789   binary algorithm due to Josef Stein in 1961 (via Knuth).
1790  */
1791 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1792 {
1793   mp_err   res;
1794   mp_int   u, v, t;
1795   mp_size  k = 0;
1796 
1797   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1798 
1799   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1800       return MP_RANGE;
1801   if(mp_cmp_z(a) == MP_EQ) {
1802     return mp_copy(b, c);
1803   } else if(mp_cmp_z(b) == MP_EQ) {
1804     return mp_copy(a, c);
1805   }
1806 
1807   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1808     return res;
1809   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1810     goto U;
1811   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1812     goto V;
1813 
1814   SIGN(&u) = ZPOS;
1815   SIGN(&v) = ZPOS;
1816 
1817   /* Divide out common factors of 2 until at least 1 of a, b is even */
1818   while(mp_iseven(&u) && mp_iseven(&v)) {
1819     s_mp_div_2(&u);
1820     s_mp_div_2(&v);
1821     ++k;
1822   }
1823 
1824   /* Initialize t */
1825   if(mp_isodd(&u)) {
1826     if((res = mp_copy(&v, &t)) != MP_OKAY)
1827       goto CLEANUP;
1828 
1829     /* t = -v */
1830     if(SIGN(&v) == ZPOS)
1831       SIGN(&t) = NEG;
1832     else
1833       SIGN(&t) = ZPOS;
1834 
1835   } else {
1836     if((res = mp_copy(&u, &t)) != MP_OKAY)
1837       goto CLEANUP;
1838 
1839   }
1840 
1841   for(;;) {
1842     while(mp_iseven(&t)) {
1843       s_mp_div_2(&t);
1844     }
1845 
1846     if(mp_cmp_z(&t) == MP_GT) {
1847       if((res = mp_copy(&t, &u)) != MP_OKAY)
1848 	goto CLEANUP;
1849 
1850     } else {
1851       if((res = mp_copy(&t, &v)) != MP_OKAY)
1852 	goto CLEANUP;
1853 
1854       /* v = -t */
1855       if(SIGN(&t) == ZPOS)
1856 	SIGN(&v) = NEG;
1857       else
1858 	SIGN(&v) = ZPOS;
1859     }
1860 
1861     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1862       goto CLEANUP;
1863 
1864     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1865       break;
1866   }
1867 
1868   s_mp_2expt(&v, k);       /* v = 2^k   */
1869   res = mp_mul(&u, &v, c); /* c = u * v */
1870 
1871  CLEANUP:
1872   mp_clear(&v);
1873  V:
1874   mp_clear(&u);
1875  U:
1876   mp_clear(&t);
1877 
1878   return res;
1879 
1880 } /* end mp_gcd() */
1881 
1882 /* }}} */
1883 
1884 /* {{{ mp_lcm(a, b, c) */
1885 
1886 /* We compute the least common multiple using the rule:
1887 
1888    ab = [a, b](a, b)
1889 
1890    ... by computing the product, and dividing out the gcd.
1891  */
1892 
1893 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1894 {
1895   mp_int  gcd, prod;
1896   mp_err  res;
1897 
1898   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1899 
1900   /* Set up temporaries */
1901   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1902     return res;
1903   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1904     goto GCD;
1905 
1906   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1907     goto CLEANUP;
1908   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1909     goto CLEANUP;
1910 
1911   res = mp_div(&prod, &gcd, c, NULL);
1912 
1913  CLEANUP:
1914   mp_clear(&prod);
1915  GCD:
1916   mp_clear(&gcd);
1917 
1918   return res;
1919 
1920 } /* end mp_lcm() */
1921 
1922 /* }}} */
1923 
1924 /* {{{ mp_xgcd(a, b, g, x, y) */
1925 
1926 /*
1927   mp_xgcd(a, b, g, x, y)
1928 
1929   Compute g = (a, b) and values x and y satisfying Bezout's identity
1930   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1931   based on the Stein algorithm used for mp_gcd()
1932   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1933  */
1934 
1935 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1936 {
1937   mp_int   gx, xc, yc, u, v, A, B, C, D;
1938   mp_int  *clean[9];
1939   mp_err   res;
1940   int      last = -1;
1941 
1942   if(mp_cmp_z(b) == 0)
1943     return MP_RANGE;
1944 
1945   /* Initialize all these variables we need */
1946   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1947   clean[++last] = &u;
1948   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1949   clean[++last] = &v;
1950   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1951   clean[++last] = &gx;
1952   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1953   clean[++last] = &A;
1954   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1955   clean[++last] = &B;
1956   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1957   clean[++last] = &C;
1958   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1959   clean[++last] = &D;
1960   MP_CHECKOK( mp_init_copy(&xc, a) );
1961   clean[++last] = &xc;
1962   mp_abs(&xc, &xc);
1963   MP_CHECKOK( mp_init_copy(&yc, b) );
1964   clean[++last] = &yc;
1965   mp_abs(&yc, &yc);
1966 
1967   mp_set(&gx, 1);
1968 
1969   /* Divide by two until at least one of them is odd */
1970   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1971     mp_size nx = mp_trailing_zeros(&xc);
1972     mp_size ny = mp_trailing_zeros(&yc);
1973     mp_size n  = MP_MIN(nx, ny);
1974     s_mp_div_2d(&xc,n);
1975     s_mp_div_2d(&yc,n);
1976     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1977   }
1978 
1979   mp_copy(&xc, &u);
1980   mp_copy(&yc, &v);
1981   mp_set(&A, 1); mp_set(&D, 1);
1982 
1983   /* Loop through binary GCD algorithm */
1984   do {
1985     while(mp_iseven(&u)) {
1986       s_mp_div_2(&u);
1987 
1988       if(mp_iseven(&A) && mp_iseven(&B)) {
1989 	s_mp_div_2(&A); s_mp_div_2(&B);
1990       } else {
1991 	MP_CHECKOK( mp_add(&A, &yc, &A) );
1992 	s_mp_div_2(&A);
1993 	MP_CHECKOK( mp_sub(&B, &xc, &B) );
1994 	s_mp_div_2(&B);
1995       }
1996     }
1997 
1998     while(mp_iseven(&v)) {
1999       s_mp_div_2(&v);
2000 
2001       if(mp_iseven(&C) && mp_iseven(&D)) {
2002 	s_mp_div_2(&C); s_mp_div_2(&D);
2003       } else {
2004 	MP_CHECKOK( mp_add(&C, &yc, &C) );
2005 	s_mp_div_2(&C);
2006 	MP_CHECKOK( mp_sub(&D, &xc, &D) );
2007 	s_mp_div_2(&D);
2008       }
2009     }
2010 
2011     if(mp_cmp(&u, &v) >= 0) {
2012       MP_CHECKOK( mp_sub(&u, &v, &u) );
2013       MP_CHECKOK( mp_sub(&A, &C, &A) );
2014       MP_CHECKOK( mp_sub(&B, &D, &B) );
2015     } else {
2016       MP_CHECKOK( mp_sub(&v, &u, &v) );
2017       MP_CHECKOK( mp_sub(&C, &A, &C) );
2018       MP_CHECKOK( mp_sub(&D, &B, &D) );
2019     }
2020   } while (mp_cmp_z(&u) != 0);
2021 
2022   /* copy results to output */
2023   if(x)
2024     MP_CHECKOK( mp_copy(&C, x) );
2025 
2026   if(y)
2027     MP_CHECKOK( mp_copy(&D, y) );
2028 
2029   if(g)
2030     MP_CHECKOK( mp_mul(&gx, &v, g) );
2031 
2032  CLEANUP:
2033   while(last >= 0)
2034     mp_clear(clean[last--]);
2035 
2036   return res;
2037 
2038 } /* end mp_xgcd() */
2039 
2040 /* }}} */
2041 
2042 mp_size mp_trailing_zeros(const mp_int *mp)
2043 {
2044   mp_digit d;
2045   mp_size  n = 0;
2046   int      ix;
2047 
2048   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2049     return n;
2050 
2051   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2052     n += MP_DIGIT_BIT;
2053   if (!d)
2054     return 0;	/* shouldn't happen, but ... */
2055 #if !defined(MP_USE_UINT_DIGIT)
2056   if (!(d & 0xffffffffU)) {
2057     d >>= 32;
2058     n  += 32;
2059   }
2060 #endif
2061   if (!(d & 0xffffU)) {
2062     d >>= 16;
2063     n  += 16;
2064   }
2065   if (!(d & 0xffU)) {
2066     d >>= 8;
2067     n  += 8;
2068   }
2069   if (!(d & 0xfU)) {
2070     d >>= 4;
2071     n  += 4;
2072   }
2073   if (!(d & 0x3U)) {
2074     d >>= 2;
2075     n  += 2;
2076   }
2077   if (!(d & 0x1U)) {
2078     d >>= 1;
2079     n  += 1;
2080   }
2081 #if MP_ARGCHK == 2
2082   assert(0 != (d & 1));
2083 #endif
2084   return n;
2085 }
2086 
2087 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2088 ** Returns k (positive) or error (negative).
2089 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2090 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2091 */
2092 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2093 {
2094   mp_err res;
2095   mp_err k    = 0;
2096   mp_int d, f, g;
2097 
2098   ARGCHK(a && p && c, MP_BADARG);
2099 
2100   MP_DIGITS(&d) = 0;
2101   MP_DIGITS(&f) = 0;
2102   MP_DIGITS(&g) = 0;
2103   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2104   MP_CHECKOK( mp_init_copy(&f, a) );	/* f = a */
2105   MP_CHECKOK( mp_init_copy(&g, p) );	/* g = p */
2106 
2107   mp_set(c, 1);
2108   mp_zero(&d);
2109 
2110   if (mp_cmp_z(&f) == 0) {
2111     res = MP_UNDEF;
2112   } else
2113   for (;;) {
2114     int diff_sign;
2115     while (mp_iseven(&f)) {
2116       mp_size n = mp_trailing_zeros(&f);
2117       if (!n) {
2118 	res = MP_UNDEF;
2119 	goto CLEANUP;
2120       }
2121       s_mp_div_2d(&f, n);
2122       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2123       k += n;
2124     }
2125     if (mp_cmp_d(&f, 1) == MP_EQ) {	/* f == 1 */
2126       res = k;
2127       break;
2128     }
2129     diff_sign = mp_cmp(&f, &g);
2130     if (diff_sign < 0) {		/* f < g */
2131       s_mp_exch(&f, &g);
2132       s_mp_exch(c, &d);
2133     } else if (diff_sign == 0) {		/* f == g */
2134       res = MP_UNDEF;		/* a and p are not relatively prime */
2135       break;
2136     }
2137     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2138       MP_CHECKOK( mp_sub(&f, &g, &f) );	/* f = f - g */
2139       MP_CHECKOK( mp_sub(c,  &d,  c) );	/* c = c - d */
2140     } else {
2141       MP_CHECKOK( mp_add(&f, &g, &f) );	/* f = f + g */
2142       MP_CHECKOK( mp_add(c,  &d,  c) );	/* c = c + d */
2143     }
2144   }
2145   if (res >= 0) {
2146     while (MP_SIGN(c) != MP_ZPOS) {
2147       MP_CHECKOK( mp_add(c, p, c) );
2148     }
2149     res = k;
2150   }
2151 
2152 CLEANUP:
2153   mp_clear(&d);
2154   mp_clear(&f);
2155   mp_clear(&g);
2156   return res;
2157 }
2158 
2159 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2160 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2161 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2162 */
2163 mp_digit  s_mp_invmod_radix(mp_digit P)
2164 {
2165   mp_digit T = P;
2166   T *= 2 - (P * T);
2167   T *= 2 - (P * T);
2168   T *= 2 - (P * T);
2169   T *= 2 - (P * T);
2170 #if !defined(MP_USE_UINT_DIGIT)
2171   T *= 2 - (P * T);
2172   T *= 2 - (P * T);
2173 #endif
2174   return T;
2175 }
2176 
2177 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2178 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2179 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2180 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2181 */
2182 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2183 {
2184   int      k_orig = k;
2185   mp_digit r;
2186   mp_size  ix;
2187   mp_err   res;
2188 
2189   if (mp_cmp_z(c) < 0) {		/* c < 0 */
2190     MP_CHECKOK( mp_add(c, p, x) );	/* x = c + p */
2191   } else {
2192     MP_CHECKOK( mp_copy(c, x) );	/* x = c */
2193   }
2194 
2195   /* make sure x is large enough */
2196   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2197   ix = MP_MAX(ix, MP_USED(x));
2198   MP_CHECKOK( s_mp_pad(x, ix) );
2199 
2200   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2201 
2202   for (ix = 0; k > 0; ix++) {
2203     int      j = MP_MIN(k, MP_DIGIT_BIT);
2204     mp_digit v = r * MP_DIGIT(x, ix);
2205     if (j < MP_DIGIT_BIT) {
2206       v &= ((mp_digit)1 << j) - 1;	/* v = v mod (2 ** j) */
2207     }
2208     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2209     k -= j;
2210   }
2211   s_mp_clamp(x);
2212   s_mp_div_2d(x, k_orig);
2213   res = MP_OKAY;
2214 
2215 CLEANUP:
2216   return res;
2217 }
2218 
2219 /* compute mod inverse using Schroeppel's method, only if m is odd */
2220 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2221 {
2222   int k;
2223   mp_err  res;
2224   mp_int  x;
2225 
2226   ARGCHK(a && m && c, MP_BADARG);
2227 
2228   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2229     return MP_RANGE;
2230   if (mp_iseven(m))
2231     return MP_UNDEF;
2232 
2233   MP_DIGITS(&x) = 0;
2234 
2235   if (a == c) {
2236     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2237       return res;
2238     if (a == m)
2239       m = &x;
2240     a = &x;
2241   } else if (m == c) {
2242     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2243       return res;
2244     m = &x;
2245   } else {
2246     MP_DIGITS(&x) = 0;
2247   }
2248 
2249   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2250   k = res;
2251   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2252 CLEANUP:
2253   mp_clear(&x);
2254   return res;
2255 }
2256 
2257 /* Known good algorithm for computing modular inverse.  But slow. */
2258 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2259 {
2260   mp_int  g, x;
2261   mp_err  res;
2262 
2263   ARGCHK(a && m && c, MP_BADARG);
2264 
2265   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2266     return MP_RANGE;
2267 
2268   MP_DIGITS(&g) = 0;
2269   MP_DIGITS(&x) = 0;
2270   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2271   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2272 
2273   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2274 
2275   if (mp_cmp_d(&g, 1) != MP_EQ) {
2276     res = MP_UNDEF;
2277     goto CLEANUP;
2278   }
2279 
2280   res = mp_mod(&x, m, c);
2281   SIGN(c) = SIGN(a);
2282 
2283 CLEANUP:
2284   mp_clear(&x);
2285   mp_clear(&g);
2286 
2287   return res;
2288 }
2289 
2290 /* modular inverse where modulus is 2**k. */
2291 /* c = a**-1 mod 2**k */
2292 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2293 {
2294   mp_err res;
2295   mp_size ix = k + 4;
2296   mp_int t0, t1, val, tmp, two2k;
2297 
2298   static const mp_digit d2 = 2;
2299   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2300 
2301   if (mp_iseven(a))
2302     return MP_UNDEF;
2303   if (k <= MP_DIGIT_BIT) {
2304     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2305     if (k < MP_DIGIT_BIT)
2306       i &= ((mp_digit)1 << k) - (mp_digit)1;
2307     mp_set(c, i);
2308     return MP_OKAY;
2309   }
2310   MP_DIGITS(&t0) = 0;
2311   MP_DIGITS(&t1) = 0;
2312   MP_DIGITS(&val) = 0;
2313   MP_DIGITS(&tmp) = 0;
2314   MP_DIGITS(&two2k) = 0;
2315   MP_CHECKOK( mp_init_copy(&val, a) );
2316   s_mp_mod_2d(&val, k);
2317   MP_CHECKOK( mp_init_copy(&t0, &val) );
2318   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2319   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2320   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2321   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2322   do {
2323     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2324     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2325     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2326     s_mp_mod_2d(&t1, k);
2327     while (MP_SIGN(&t1) != MP_ZPOS) {
2328       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2329     }
2330     if (mp_cmp(&t1, &t0) == MP_EQ)
2331       break;
2332     MP_CHECKOK( mp_copy(&t1, &t0) );
2333   } while (--ix > 0);
2334   if (!ix) {
2335     res = MP_UNDEF;
2336   } else {
2337     mp_exch(c, &t1);
2338   }
2339 
2340 CLEANUP:
2341   mp_clear(&t0);
2342   mp_clear(&t1);
2343   mp_clear(&val);
2344   mp_clear(&tmp);
2345   mp_clear(&two2k);
2346   return res;
2347 }
2348 
2349 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2350 {
2351   mp_err res;
2352   mp_size k;
2353   mp_int oddFactor, evenFactor;	/* factors of the modulus */
2354   mp_int oddPart, evenPart;	/* parts to combine via CRT. */
2355   mp_int C2, tmp1, tmp2;
2356 
2357   /*static const mp_digit d1 = 1; */
2358   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2359 
2360   if ((res = s_mp_ispow2(m)) >= 0) {
2361     k = res;
2362     return s_mp_invmod_2d(a, k, c);
2363   }
2364   MP_DIGITS(&oddFactor) = 0;
2365   MP_DIGITS(&evenFactor) = 0;
2366   MP_DIGITS(&oddPart) = 0;
2367   MP_DIGITS(&evenPart) = 0;
2368   MP_DIGITS(&C2)     = 0;
2369   MP_DIGITS(&tmp1)   = 0;
2370   MP_DIGITS(&tmp2)   = 0;
2371 
2372   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2373   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2374   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2375   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2376   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2377   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2378   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2379 
2380   k = mp_trailing_zeros(m);
2381   s_mp_div_2d(&oddFactor, k);
2382   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2383 
2384   /* compute a**-1 mod oddFactor. */
2385   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2386   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2387   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2388 
2389   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2390   /* let m1 = oddFactor,  v1 = oddPart,
2391    * let m2 = evenFactor, v2 = evenPart.
2392    */
2393 
2394   /* Compute C2 = m1**-1 mod m2. */
2395   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2396 
2397   /* compute u = (v2 - v1)*C2 mod m2 */
2398   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2399   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2400   s_mp_mod_2d(&tmp2, k);
2401   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2402     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2403   }
2404 
2405   /* compute answer = v1 + u*m1 */
2406   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2407   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2408   /* not sure this is necessary, but it's low cost if not. */
2409   MP_CHECKOK( mp_mod(c,         m,          c) );
2410 
2411 CLEANUP:
2412   mp_clear(&oddFactor);
2413   mp_clear(&evenFactor);
2414   mp_clear(&oddPart);
2415   mp_clear(&evenPart);
2416   mp_clear(&C2);
2417   mp_clear(&tmp1);
2418   mp_clear(&tmp2);
2419   return res;
2420 }
2421 
2422 
2423 /* {{{ mp_invmod(a, m, c) */
2424 
2425 /*
2426   mp_invmod(a, m, c)
2427 
2428   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2429   This is equivalent to the question of whether (a, m) = 1.  If not,
2430   MP_UNDEF is returned, and there is no inverse.
2431  */
2432 
2433 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2434 {
2435 
2436   ARGCHK(a && m && c, MP_BADARG);
2437 
2438   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2439     return MP_RANGE;
2440 
2441   if (mp_isodd(m)) {
2442     return s_mp_invmod_odd_m(a, m, c);
2443   }
2444   if (mp_iseven(a))
2445     return MP_UNDEF;	/* not invertable */
2446 
2447   return s_mp_invmod_even_m(a, m, c);
2448 
2449 } /* end mp_invmod() */
2450 
2451 /* }}} */
2452 #endif /* if MP_NUMTH */
2453 
2454 /* }}} */
2455 
2456 /*------------------------------------------------------------------------*/
2457 /* {{{ mp_print(mp, ofp) */
2458 
2459 #if MP_IOFUNC
2460 /*
2461   mp_print(mp, ofp)
2462 
2463   Print a textual representation of the given mp_int on the output
2464   stream 'ofp'.  Output is generated using the internal radix.
2465  */
2466 
2467 void   mp_print(mp_int *mp, FILE *ofp)
2468 {
2469   int   ix;
2470 
2471   if(mp == NULL || ofp == NULL)
2472     return;
2473 
2474   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2475 
2476   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2477     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2478   }
2479 
2480 } /* end mp_print() */
2481 
2482 #endif /* if MP_IOFUNC */
2483 
2484 /* }}} */
2485 
2486 /*------------------------------------------------------------------------*/
2487 /* {{{ More I/O Functions */
2488 
2489 /* {{{ mp_read_raw(mp, str, len) */
2490 
2491 /*
2492    mp_read_raw(mp, str, len)
2493 
2494    Read in a raw value (base 256) into the given mp_int
2495  */
2496 
2497 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2498 {
2499   int            ix;
2500   mp_err         res;
2501   unsigned char *ustr = (unsigned char *)str;
2502 
2503   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2504 
2505   mp_zero(mp);
2506 
2507   /* Get sign from first byte */
2508   if(ustr[0])
2509     SIGN(mp) = NEG;
2510   else
2511     SIGN(mp) = ZPOS;
2512 
2513   /* Read the rest of the digits */
2514   for(ix = 1; ix < len; ix++) {
2515     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2516       return res;
2517     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2518       return res;
2519   }
2520 
2521   return MP_OKAY;
2522 
2523 } /* end mp_read_raw() */
2524 
2525 /* }}} */
2526 
2527 /* {{{ mp_raw_size(mp) */
2528 
2529 int    mp_raw_size(mp_int *mp)
2530 {
2531   ARGCHK(mp != NULL, 0);
2532 
2533   return (USED(mp) * sizeof(mp_digit)) + 1;
2534 
2535 } /* end mp_raw_size() */
2536 
2537 /* }}} */
2538 
2539 /* {{{ mp_toraw(mp, str) */
2540 
2541 mp_err mp_toraw(mp_int *mp, char *str)
2542 {
2543   int  ix, jx, pos = 1;
2544 
2545   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2546 
2547   str[0] = (char)SIGN(mp);
2548 
2549   /* Iterate over each digit... */
2550   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2551     mp_digit  d = DIGIT(mp, ix);
2552 
2553     /* Unpack digit bytes, high order first */
2554     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2555       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2556     }
2557   }
2558 
2559   return MP_OKAY;
2560 
2561 } /* end mp_toraw() */
2562 
2563 /* }}} */
2564 
2565 /* {{{ mp_read_radix(mp, str, radix) */
2566 
2567 /*
2568   mp_read_radix(mp, str, radix)
2569 
2570   Read an integer from the given string, and set mp to the resulting
2571   value.  The input is presumed to be in base 10.  Leading non-digit
2572   characters are ignored, and the function reads until a non-digit
2573   character or the end of the string.
2574  */
2575 
2576 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2577 {
2578   int     ix = 0, val = 0;
2579   mp_err  res;
2580   mp_sign sig = ZPOS;
2581 
2582   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2583 	 MP_BADARG);
2584 
2585   mp_zero(mp);
2586 
2587   /* Skip leading non-digit characters until a digit or '-' or '+' */
2588   while(str[ix] &&
2589 	(s_mp_tovalue(str[ix], radix) < 0) &&
2590 	str[ix] != '-' &&
2591 	str[ix] != '+') {
2592     ++ix;
2593   }
2594 
2595   if(str[ix] == '-') {
2596     sig = NEG;
2597     ++ix;
2598   } else if(str[ix] == '+') {
2599     sig = ZPOS; /* this is the default anyway... */
2600     ++ix;
2601   }
2602 
2603   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2604     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2605       return res;
2606     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2607       return res;
2608     ++ix;
2609   }
2610 
2611   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2612     SIGN(mp) = ZPOS;
2613   else
2614     SIGN(mp) = sig;
2615 
2616   return MP_OKAY;
2617 
2618 } /* end mp_read_radix() */
2619 
2620 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2621 {
2622   int     radix = default_radix;
2623   int     cx;
2624   mp_sign sig   = ZPOS;
2625   mp_err  res;
2626 
2627   /* Skip leading non-digit characters until a digit or '-' or '+' */
2628   while ((cx = *str) != 0 &&
2629 	(s_mp_tovalue(cx, radix) < 0) &&
2630 	cx != '-' &&
2631 	cx != '+') {
2632     ++str;
2633   }
2634 
2635   if (cx == '-') {
2636     sig = NEG;
2637     ++str;
2638   } else if (cx == '+') {
2639     sig = ZPOS; /* this is the default anyway... */
2640     ++str;
2641   }
2642 
2643   if (str[0] == '0') {
2644     if ((str[1] | 0x20) == 'x') {
2645       radix = 16;
2646       str += 2;
2647     } else {
2648       radix = 8;
2649       str++;
2650     }
2651   }
2652   res = mp_read_radix(a, str, radix);
2653   if (res == MP_OKAY) {
2654     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2655   }
2656   return res;
2657 }
2658 
2659 /* }}} */
2660 
2661 /* {{{ mp_radix_size(mp, radix) */
2662 
2663 int    mp_radix_size(mp_int *mp, int radix)
2664 {
2665   int  bits;
2666 
2667   if(!mp || radix < 2 || radix > MAX_RADIX)
2668     return 0;
2669 
2670   bits = USED(mp) * DIGIT_BIT - 1;
2671 
2672   return s_mp_outlen(bits, radix);
2673 
2674 } /* end mp_radix_size() */
2675 
2676 /* }}} */
2677 
2678 /* {{{ mp_toradix(mp, str, radix) */
2679 
2680 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2681 {
2682   int  ix, pos = 0;
2683 
2684   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2685   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2686 
2687   if(mp_cmp_z(mp) == MP_EQ) {
2688     str[0] = '0';
2689     str[1] = '\0';
2690   } else {
2691     mp_err   res;
2692     mp_int   tmp;
2693     mp_sign  sgn;
2694     mp_digit rem, rdx = (mp_digit)radix;
2695     char     ch;
2696 
2697     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2698       return res;
2699 
2700     /* Save sign for later, and take absolute value */
2701     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2702 
2703     /* Generate output digits in reverse order      */
2704     while(mp_cmp_z(&tmp) != 0) {
2705       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2706 	mp_clear(&tmp);
2707 	return res;
2708       }
2709 
2710       /* Generate digits, use capital letters */
2711       ch = s_mp_todigit(rem, radix, 0);
2712 
2713       str[pos++] = ch;
2714     }
2715 
2716     /* Add - sign if original value was negative */
2717     if(sgn == NEG)
2718       str[pos++] = '-';
2719 
2720     /* Add trailing NUL to end the string        */
2721     str[pos--] = '\0';
2722 
2723     /* Reverse the digits and sign indicator     */
2724     ix = 0;
2725     while(ix < pos) {
2726       char tmp = str[ix];
2727 
2728       str[ix] = str[pos];
2729       str[pos] = tmp;
2730       ++ix;
2731       --pos;
2732     }
2733 
2734     mp_clear(&tmp);
2735   }
2736 
2737   return MP_OKAY;
2738 
2739 } /* end mp_toradix() */
2740 
2741 /* }}} */
2742 
2743 /* {{{ mp_tovalue(ch, r) */
2744 
2745 int    mp_tovalue(char ch, int r)
2746 {
2747   return s_mp_tovalue(ch, r);
2748 
2749 } /* end mp_tovalue() */
2750 
2751 /* }}} */
2752 
2753 /* }}} */
2754 
2755 /* {{{ mp_strerror(ec) */
2756 
2757 /*
2758   mp_strerror(ec)
2759 
2760   Return a string describing the meaning of error code 'ec'.  The
2761   string returned is allocated in static memory, so the caller should
2762   not attempt to modify or free the memory associated with this
2763   string.
2764  */
2765 const char  *mp_strerror(mp_err ec)
2766 {
2767   int   aec = (ec < 0) ? -ec : ec;
2768 
2769   /* Code values are negative, so the senses of these comparisons
2770      are accurate */
2771   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2772     return mp_err_string[0];  /* unknown error code */
2773   } else {
2774     return mp_err_string[aec + 1];
2775   }
2776 
2777 } /* end mp_strerror() */
2778 
2779 /* }}} */
2780 
2781 /*========================================================================*/
2782 /*------------------------------------------------------------------------*/
2783 /* Static function definitions (internal use only)                        */
2784 
2785 /* {{{ Memory management */
2786 
2787 /* {{{ s_mp_grow(mp, min) */
2788 
2789 /* Make sure there are at least 'min' digits allocated to mp              */
2790 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2791 {
2792   if(min > ALLOC(mp)) {
2793     mp_digit   *tmp;
2794 
2795     /* Set min to next nearest default precision block size */
2796     min = MP_ROUNDUP(min, s_mp_defprec);
2797 
2798     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2799       return MP_MEM;
2800 
2801     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2802 
2803 #if MP_CRYPTO
2804     s_mp_setz(DIGITS(mp), ALLOC(mp));
2805 #endif
2806     s_mp_free(DIGITS(mp), ALLOC(mp));
2807     DIGITS(mp) = tmp;
2808     ALLOC(mp) = min;
2809   }
2810 
2811   return MP_OKAY;
2812 
2813 } /* end s_mp_grow() */
2814 
2815 /* }}} */
2816 
2817 /* {{{ s_mp_pad(mp, min) */
2818 
2819 /* Make sure the used size of mp is at least 'min', growing if needed     */
2820 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2821 {
2822   if(min > USED(mp)) {
2823     mp_err  res;
2824 
2825     /* Make sure there is room to increase precision  */
2826     if (min > ALLOC(mp)) {
2827       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2828 	return res;
2829     } else {
2830       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2831     }
2832 
2833     /* Increase precision; should already be 0-filled */
2834     USED(mp) = min;
2835   }
2836 
2837   return MP_OKAY;
2838 
2839 } /* end s_mp_pad() */
2840 
2841 /* }}} */
2842 
2843 /* {{{ s_mp_setz(dp, count) */
2844 
2845 #if MP_MACRO == 0
2846 /* Set 'count' digits pointed to by dp to be zeroes                       */
2847 void s_mp_setz(mp_digit *dp, mp_size count)
2848 {
2849 #if MP_MEMSET == 0
2850   int  ix;
2851 
2852   for(ix = 0; ix < count; ix++)
2853     dp[ix] = 0;
2854 #else
2855   memset(dp, 0, count * sizeof(mp_digit));
2856 #endif
2857 
2858 } /* end s_mp_setz() */
2859 #endif
2860 
2861 /* }}} */
2862 
2863 /* {{{ s_mp_copy(sp, dp, count) */
2864 
2865 #if MP_MACRO == 0
2866 /* Copy 'count' digits from sp to dp                                      */
2867 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2868 {
2869 #if MP_MEMCPY == 0
2870   int  ix;
2871 
2872   for(ix = 0; ix < count; ix++)
2873     dp[ix] = sp[ix];
2874 #else
2875   memcpy(dp, sp, count * sizeof(mp_digit));
2876 #endif
2877 
2878 } /* end s_mp_copy() */
2879 #endif
2880 
2881 /* }}} */
2882 
2883 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2884 
2885 #if MP_MACRO == 0
2886 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2887 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2888 {
2889   mp_int *mp;
2890   ++mp_allocs;
2891 #ifdef _KERNEL
2892   mp = kmem_zalloc(nb * ni, kmflag);
2893   if (mp != NULL)
2894     FLAG(mp) = kmflag;
2895   return (mp);
2896 #else
2897   return calloc(nb, ni);
2898 #endif
2899 
2900 } /* end s_mp_alloc() */
2901 #endif
2902 
2903 /* }}} */
2904 
2905 /* {{{ s_mp_free(ptr) */
2906 
2907 #if MP_MACRO == 0
2908 /* Free the memory pointed to by ptr                                      */
2909 void     s_mp_free(void *ptr, mp_size alloc)
2910 {
2911   if(ptr) {
2912     ++mp_frees;
2913 #ifdef _KERNEL
2914     kmem_free(ptr, alloc * sizeof (mp_digit));
2915 #else
2916     free(ptr);
2917 #endif
2918   }
2919 } /* end s_mp_free() */
2920 #endif
2921 
2922 /* }}} */
2923 
2924 /* {{{ s_mp_clamp(mp) */
2925 
2926 #if MP_MACRO == 0
2927 /* Remove leading zeroes from the given value                             */
2928 void     s_mp_clamp(mp_int *mp)
2929 {
2930   mp_size used = MP_USED(mp);
2931   while (used > 1 && DIGIT(mp, used - 1) == 0)
2932     --used;
2933   MP_USED(mp) = used;
2934 } /* end s_mp_clamp() */
2935 #endif
2936 
2937 /* }}} */
2938 
2939 /* {{{ s_mp_exch(a, b) */
2940 
2941 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2942 void     s_mp_exch(mp_int *a, mp_int *b)
2943 {
2944   mp_int   tmp;
2945 
2946   tmp = *a;
2947   *a = *b;
2948   *b = tmp;
2949 
2950 } /* end s_mp_exch() */
2951 
2952 /* }}} */
2953 
2954 /* }}} */
2955 
2956 /* {{{ Arithmetic helpers */
2957 
2958 /* {{{ s_mp_lshd(mp, p) */
2959 
2960 /*
2961    Shift mp leftward by p digits, growing if needed, and zero-filling
2962    the in-shifted digits at the right end.  This is a convenient
2963    alternative to multiplication by powers of the radix
2964    The value of USED(mp) must already have been set to the value for
2965    the shifted result.
2966  */
2967 
2968 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2969 {
2970   mp_err  res;
2971   mp_size pos;
2972   int     ix;
2973 
2974   if(p == 0)
2975     return MP_OKAY;
2976 
2977   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2978     return MP_OKAY;
2979 
2980   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2981     return res;
2982 
2983   pos = USED(mp) - 1;
2984 
2985   /* Shift all the significant figures over as needed */
2986   for(ix = pos - p; ix >= 0; ix--)
2987     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2988 
2989   /* Fill the bottom digits with zeroes */
2990   for(ix = 0; ix < p; ix++)
2991     DIGIT(mp, ix) = 0;
2992 
2993   return MP_OKAY;
2994 
2995 } /* end s_mp_lshd() */
2996 
2997 /* }}} */
2998 
2999 /* {{{ s_mp_mul_2d(mp, d) */
3000 
3001 /*
3002   Multiply the integer by 2^d, where d is a number of bits.  This
3003   amounts to a bitwise shift of the value.
3004  */
3005 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
3006 {
3007   mp_err   res;
3008   mp_digit dshift, bshift;
3009   mp_digit mask;
3010 
3011   ARGCHK(mp != NULL,  MP_BADARG);
3012 
3013   dshift = d / MP_DIGIT_BIT;
3014   bshift = d % MP_DIGIT_BIT;
3015   /* bits to be shifted out of the top word */
3016   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3017   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3018 
3019   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3020     return res;
3021 
3022   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3023     return res;
3024 
3025   if (bshift) {
3026     mp_digit *pa = MP_DIGITS(mp);
3027     mp_digit *alim = pa + MP_USED(mp);
3028     mp_digit  prev = 0;
3029 
3030     for (pa += dshift; pa < alim; ) {
3031       mp_digit x = *pa;
3032       *pa++ = (x << bshift) | prev;
3033       prev = x >> (DIGIT_BIT - bshift);
3034     }
3035   }
3036 
3037   s_mp_clamp(mp);
3038   return MP_OKAY;
3039 } /* end s_mp_mul_2d() */
3040 
3041 /* {{{ s_mp_rshd(mp, p) */
3042 
3043 /*
3044    Shift mp rightward by p digits.  Maintains the invariant that
3045    digits above the precision are all zero.  Digits shifted off the
3046    end are lost.  Cannot fail.
3047  */
3048 
3049 void     s_mp_rshd(mp_int *mp, mp_size p)
3050 {
3051   mp_size  ix;
3052   mp_digit *src, *dst;
3053 
3054   if(p == 0)
3055     return;
3056 
3057   /* Shortcut when all digits are to be shifted off */
3058   if(p >= USED(mp)) {
3059     s_mp_setz(DIGITS(mp), ALLOC(mp));
3060     USED(mp) = 1;
3061     SIGN(mp) = ZPOS;
3062     return;
3063   }
3064 
3065   /* Shift all the significant figures over as needed */
3066   dst = MP_DIGITS(mp);
3067   src = dst + p;
3068   for (ix = USED(mp) - p; ix > 0; ix--)
3069     *dst++ = *src++;
3070 
3071   MP_USED(mp) -= p;
3072   /* Fill the top digits with zeroes */
3073   while (p-- > 0)
3074     *dst++ = 0;
3075 
3076 #if 0
3077   /* Strip off any leading zeroes    */
3078   s_mp_clamp(mp);
3079 #endif
3080 
3081 } /* end s_mp_rshd() */
3082 
3083 /* }}} */
3084 
3085 /* {{{ s_mp_div_2(mp) */
3086 
3087 /* Divide by two -- take advantage of radix properties to do it fast      */
3088 void     s_mp_div_2(mp_int *mp)
3089 {
3090   s_mp_div_2d(mp, 1);
3091 
3092 } /* end s_mp_div_2() */
3093 
3094 /* }}} */
3095 
3096 /* {{{ s_mp_mul_2(mp) */
3097 
3098 mp_err s_mp_mul_2(mp_int *mp)
3099 {
3100   mp_digit *pd;
3101   int      ix, used;
3102   mp_digit kin = 0;
3103 
3104   /* Shift digits leftward by 1 bit */
3105   used = MP_USED(mp);
3106   pd = MP_DIGITS(mp);
3107   for (ix = 0; ix < used; ix++) {
3108     mp_digit d = *pd;
3109     *pd++ = (d << 1) | kin;
3110     kin = (d >> (DIGIT_BIT - 1));
3111   }
3112 
3113   /* Deal with rollover from last digit */
3114   if (kin) {
3115     if (ix >= ALLOC(mp)) {
3116       mp_err res;
3117       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3118 	return res;
3119     }
3120 
3121     DIGIT(mp, ix) = kin;
3122     USED(mp) += 1;
3123   }
3124 
3125   return MP_OKAY;
3126 
3127 } /* end s_mp_mul_2() */
3128 
3129 /* }}} */
3130 
3131 /* {{{ s_mp_mod_2d(mp, d) */
3132 
3133 /*
3134   Remainder the integer by 2^d, where d is a number of bits.  This
3135   amounts to a bitwise AND of the value, and does not require the full
3136   division code
3137  */
3138 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3139 {
3140   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3141   mp_size  ix;
3142   mp_digit dmask;
3143 
3144   if(ndig >= USED(mp))
3145     return;
3146 
3147   /* Flush all the bits above 2^d in its digit */
3148   dmask = ((mp_digit)1 << nbit) - 1;
3149   DIGIT(mp, ndig) &= dmask;
3150 
3151   /* Flush all digits above the one with 2^d in it */
3152   for(ix = ndig + 1; ix < USED(mp); ix++)
3153     DIGIT(mp, ix) = 0;
3154 
3155   s_mp_clamp(mp);
3156 
3157 } /* end s_mp_mod_2d() */
3158 
3159 /* }}} */
3160 
3161 /* {{{ s_mp_div_2d(mp, d) */
3162 
3163 /*
3164   Divide the integer by 2^d, where d is a number of bits.  This
3165   amounts to a bitwise shift of the value, and does not require the
3166   full division code (used in Barrett reduction, see below)
3167  */
3168 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3169 {
3170   int       ix;
3171   mp_digit  save, next, mask;
3172 
3173   s_mp_rshd(mp, d / DIGIT_BIT);
3174   d %= DIGIT_BIT;
3175   if (d) {
3176     mask = ((mp_digit)1 << d) - 1;
3177     save = 0;
3178     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3179       next = DIGIT(mp, ix) & mask;
3180       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3181       save = next;
3182     }
3183   }
3184   s_mp_clamp(mp);
3185 
3186 } /* end s_mp_div_2d() */
3187 
3188 /* }}} */
3189 
3190 /* {{{ s_mp_norm(a, b, *d) */
3191 
3192 /*
3193   s_mp_norm(a, b, *d)
3194 
3195   Normalize a and b for division, where b is the divisor.  In order
3196   that we might make good guesses for quotient digits, we want the
3197   leading digit of b to be at least half the radix, which we
3198   accomplish by multiplying a and b by a power of 2.  The exponent
3199   (shift count) is placed in *pd, so that the remainder can be shifted
3200   back at the end of the division process.
3201  */
3202 
3203 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3204 {
3205   mp_digit  d;
3206   mp_digit  mask;
3207   mp_digit  b_msd;
3208   mp_err    res    = MP_OKAY;
3209 
3210   d = 0;
3211   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);	/* mask is msb of digit */
3212   b_msd = DIGIT(b, USED(b) - 1);
3213   while (!(b_msd & mask)) {
3214     b_msd <<= 1;
3215     ++d;
3216   }
3217 
3218   if (d) {
3219     MP_CHECKOK( s_mp_mul_2d(a, d) );
3220     MP_CHECKOK( s_mp_mul_2d(b, d) );
3221   }
3222 
3223   *pd = d;
3224 CLEANUP:
3225   return res;
3226 
3227 } /* end s_mp_norm() */
3228 
3229 /* }}} */
3230 
3231 /* }}} */
3232 
3233 /* {{{ Primitive digit arithmetic */
3234 
3235 /* {{{ s_mp_add_d(mp, d) */
3236 
3237 /* Add d to |mp| in place                                                 */
3238 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3239 {
3240 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3241   mp_word   w, k = 0;
3242   mp_size   ix = 1;
3243 
3244   w = (mp_word)DIGIT(mp, 0) + d;
3245   DIGIT(mp, 0) = ACCUM(w);
3246   k = CARRYOUT(w);
3247 
3248   while(ix < USED(mp) && k) {
3249     w = (mp_word)DIGIT(mp, ix) + k;
3250     DIGIT(mp, ix) = ACCUM(w);
3251     k = CARRYOUT(w);
3252     ++ix;
3253   }
3254 
3255   if(k != 0) {
3256     mp_err  res;
3257 
3258     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3259       return res;
3260 
3261     DIGIT(mp, ix) = (mp_digit)k;
3262   }
3263 
3264   return MP_OKAY;
3265 #else
3266   mp_digit * pmp = MP_DIGITS(mp);
3267   mp_digit sum, mp_i, carry = 0;
3268   mp_err   res = MP_OKAY;
3269   int used = (int)MP_USED(mp);
3270 
3271   mp_i = *pmp;
3272   *pmp++ = sum = d + mp_i;
3273   carry = (sum < d);
3274   while (carry && --used > 0) {
3275     mp_i = *pmp;
3276     *pmp++ = sum = carry + mp_i;
3277     carry = !sum;
3278   }
3279   if (carry && !used) {
3280     /* mp is growing */
3281     used = MP_USED(mp);
3282     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3283     MP_DIGIT(mp, used) = carry;
3284   }
3285 CLEANUP:
3286   return res;
3287 #endif
3288 } /* end s_mp_add_d() */
3289 
3290 /* }}} */
3291 
3292 /* {{{ s_mp_sub_d(mp, d) */
3293 
3294 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3295 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3296 {
3297 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3298   mp_word   w, b = 0;
3299   mp_size   ix = 1;
3300 
3301   /* Compute initial subtraction    */
3302   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3303   b = CARRYOUT(w) ? 0 : 1;
3304   DIGIT(mp, 0) = ACCUM(w);
3305 
3306   /* Propagate borrows leftward     */
3307   while(b && ix < USED(mp)) {
3308     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3309     b = CARRYOUT(w) ? 0 : 1;
3310     DIGIT(mp, ix) = ACCUM(w);
3311     ++ix;
3312   }
3313 
3314   /* Remove leading zeroes          */
3315   s_mp_clamp(mp);
3316 
3317   /* If we have a borrow out, it's a violation of the input invariant */
3318   if(b)
3319     return MP_RANGE;
3320   else
3321     return MP_OKAY;
3322 #else
3323   mp_digit *pmp = MP_DIGITS(mp);
3324   mp_digit mp_i, diff, borrow;
3325   mp_size  used = MP_USED(mp);
3326 
3327   mp_i = *pmp;
3328   *pmp++ = diff = mp_i - d;
3329   borrow = (diff > mp_i);
3330   while (borrow && --used) {
3331     mp_i = *pmp;
3332     *pmp++ = diff = mp_i - borrow;
3333     borrow = (diff > mp_i);
3334   }
3335   s_mp_clamp(mp);
3336   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3337 #endif
3338 } /* end s_mp_sub_d() */
3339 
3340 /* }}} */
3341 
3342 /* {{{ s_mp_mul_d(a, d) */
3343 
3344 /* Compute a = a * d, single digit multiplication                         */
3345 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3346 {
3347   mp_err  res;
3348   mp_size used;
3349   int     pow;
3350 
3351   if (!d) {
3352     mp_zero(a);
3353     return MP_OKAY;
3354   }
3355   if (d == 1)
3356     return MP_OKAY;
3357   if (0 <= (pow = s_mp_ispow2d(d))) {
3358     return s_mp_mul_2d(a, (mp_digit)pow);
3359   }
3360 
3361   used = MP_USED(a);
3362   MP_CHECKOK( s_mp_pad(a, used + 1) );
3363 
3364   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3365 
3366   s_mp_clamp(a);
3367 
3368 CLEANUP:
3369   return res;
3370 
3371 } /* end s_mp_mul_d() */
3372 
3373 /* }}} */
3374 
3375 /* {{{ s_mp_div_d(mp, d, r) */
3376 
3377 /*
3378   s_mp_div_d(mp, d, r)
3379 
3380   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3381   single digit d.  If r is null, the remainder will be discarded.
3382  */
3383 
3384 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3385 {
3386 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3387   mp_word   w = 0, q;
3388 #else
3389   mp_digit  w, q;
3390 #endif
3391   int       ix;
3392   mp_err    res;
3393   mp_int    quot;
3394   mp_int    rem;
3395 
3396   if(d == 0)
3397     return MP_RANGE;
3398   if (d == 1) {
3399     if (r)
3400       *r = 0;
3401     return MP_OKAY;
3402   }
3403   /* could check for power of 2 here, but mp_div_d does that. */
3404   if (MP_USED(mp) == 1) {
3405     mp_digit n   = MP_DIGIT(mp,0);
3406     mp_digit rem;
3407 
3408     q   = n / d;
3409     rem = n % d;
3410     MP_DIGIT(mp,0) = q;
3411     if (r)
3412       *r = rem;
3413     return MP_OKAY;
3414   }
3415 
3416   MP_DIGITS(&rem)  = 0;
3417   MP_DIGITS(&quot) = 0;
3418   /* Make room for the quotient */
3419   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3420 
3421 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3422   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3423     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3424 
3425     if(w >= d) {
3426       q = w / d;
3427       w = w % d;
3428     } else {
3429       q = 0;
3430     }
3431 
3432     s_mp_lshd(&quot, 1);
3433     DIGIT(&quot, 0) = (mp_digit)q;
3434   }
3435 #else
3436   {
3437     mp_digit p;
3438 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3439     mp_digit norm;
3440 #endif
3441 
3442     MP_CHECKOK( mp_init_copy(&rem, mp) );
3443 
3444 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3445     MP_DIGIT(&quot, 0) = d;
3446     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3447     if (norm)
3448       d <<= norm;
3449     MP_DIGIT(&quot, 0) = 0;
3450 #endif
3451 
3452     p = 0;
3453     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3454       w = DIGIT(&rem, ix);
3455 
3456       if (p) {
3457         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3458       } else if (w >= d) {
3459 	q = w / d;
3460 	w = w % d;
3461       } else {
3462 	q = 0;
3463       }
3464 
3465       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3466       DIGIT(&quot, 0) = q;
3467       p = w;
3468     }
3469 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3470     if (norm)
3471       w >>= norm;
3472 #endif
3473   }
3474 #endif
3475 
3476   /* Deliver the remainder, if desired */
3477   if(r)
3478     *r = (mp_digit)w;
3479 
3480   s_mp_clamp(&quot);
3481   mp_exch(&quot, mp);
3482 CLEANUP:
3483   mp_clear(&quot);
3484   mp_clear(&rem);
3485 
3486   return res;
3487 } /* end s_mp_div_d() */
3488 
3489 /* }}} */
3490 
3491 
3492 /* }}} */
3493 
3494 /* {{{ Primitive full arithmetic */
3495 
3496 /* {{{ s_mp_add(a, b) */
3497 
3498 /* Compute a = |a| + |b|                                                  */
3499 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3500 {
3501 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3502   mp_word   w = 0;
3503 #else
3504   mp_digit  d, sum, carry = 0;
3505 #endif
3506   mp_digit *pa, *pb;
3507   mp_size   ix;
3508   mp_size   used;
3509   mp_err    res;
3510 
3511   /* Make sure a has enough precision for the output value */
3512   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3513     return res;
3514 
3515   /*
3516     Add up all digits up to the precision of b.  If b had initially
3517     the same precision as a, or greater, we took care of it by the
3518     padding step above, so there is no problem.  If b had initially
3519     less precision, we'll have to make sure the carry out is duly
3520     propagated upward among the higher-order digits of the sum.
3521    */
3522   pa = MP_DIGITS(a);
3523   pb = MP_DIGITS(b);
3524   used = MP_USED(b);
3525   for(ix = 0; ix < used; ix++) {
3526 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3527     w = w + *pa + *pb++;
3528     *pa++ = ACCUM(w);
3529     w = CARRYOUT(w);
3530 #else
3531     d = *pa;
3532     sum = d + *pb++;
3533     d = (sum < d);			/* detect overflow */
3534     *pa++ = sum += carry;
3535     carry = d + (sum < carry);		/* detect overflow */
3536 #endif
3537   }
3538 
3539   /* If we run out of 'b' digits before we're actually done, make
3540      sure the carries get propagated upward...
3541    */
3542   used = MP_USED(a);
3543 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3544   while (w && ix < used) {
3545     w = w + *pa;
3546     *pa++ = ACCUM(w);
3547     w = CARRYOUT(w);
3548     ++ix;
3549   }
3550 #else
3551   while (carry && ix < used) {
3552     sum = carry + *pa;
3553     *pa++ = sum;
3554     carry = !sum;
3555     ++ix;
3556   }
3557 #endif
3558 
3559   /* If there's an overall carry out, increase precision and include
3560      it.  We could have done this initially, but why touch the memory
3561      allocator unless we're sure we have to?
3562    */
3563 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3564   if (w) {
3565     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3566       return res;
3567 
3568     DIGIT(a, ix) = (mp_digit)w;
3569   }
3570 #else
3571   if (carry) {
3572     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3573       return res;
3574 
3575     DIGIT(a, used) = carry;
3576   }
3577 #endif
3578 
3579   return MP_OKAY;
3580 } /* end s_mp_add() */
3581 
3582 /* }}} */
3583 
3584 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3585 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3586 {
3587   mp_digit *pa, *pb, *pc;
3588 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3589   mp_word   w = 0;
3590 #else
3591   mp_digit  sum, carry = 0, d;
3592 #endif
3593   mp_size   ix;
3594   mp_size   used;
3595   mp_err    res;
3596 
3597   MP_SIGN(c) = MP_SIGN(a);
3598   if (MP_USED(a) < MP_USED(b)) {
3599     const mp_int *xch = a;
3600     a = b;
3601     b = xch;
3602   }
3603 
3604   /* Make sure a has enough precision for the output value */
3605   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3606     return res;
3607 
3608   /*
3609     Add up all digits up to the precision of b.  If b had initially
3610     the same precision as a, or greater, we took care of it by the
3611     exchange step above, so there is no problem.  If b had initially
3612     less precision, we'll have to make sure the carry out is duly
3613     propagated upward among the higher-order digits of the sum.
3614    */
3615   pa = MP_DIGITS(a);
3616   pb = MP_DIGITS(b);
3617   pc = MP_DIGITS(c);
3618   used = MP_USED(b);
3619   for (ix = 0; ix < used; ix++) {
3620 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3621     w = w + *pa++ + *pb++;
3622     *pc++ = ACCUM(w);
3623     w = CARRYOUT(w);
3624 #else
3625     d = *pa++;
3626     sum = d + *pb++;
3627     d = (sum < d);			/* detect overflow */
3628     *pc++ = sum += carry;
3629     carry = d + (sum < carry);		/* detect overflow */
3630 #endif
3631   }
3632 
3633   /* If we run out of 'b' digits before we're actually done, make
3634      sure the carries get propagated upward...
3635    */
3636   for (used = MP_USED(a); ix < used; ++ix) {
3637 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3638     w = w + *pa++;
3639     *pc++ = ACCUM(w);
3640     w = CARRYOUT(w);
3641 #else
3642     *pc++ = sum = carry + *pa++;
3643     carry = (sum < carry);
3644 #endif
3645   }
3646 
3647   /* If there's an overall carry out, increase precision and include
3648      it.  We could have done this initially, but why touch the memory
3649      allocator unless we're sure we have to?
3650    */
3651 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3652   if (w) {
3653     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3654       return res;
3655 
3656     DIGIT(c, used) = (mp_digit)w;
3657     ++used;
3658   }
3659 #else
3660   if (carry) {
3661     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3662       return res;
3663 
3664     DIGIT(c, used) = carry;
3665     ++used;
3666   }
3667 #endif
3668   MP_USED(c) = used;
3669   return MP_OKAY;
3670 }
3671 /* {{{ s_mp_add_offset(a, b, offset) */
3672 
3673 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3674 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3675 {
3676 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3677   mp_word   w, k = 0;
3678 #else
3679   mp_digit  d, sum, carry = 0;
3680 #endif
3681   mp_size   ib;
3682   mp_size   ia;
3683   mp_size   lim;
3684   mp_err    res;
3685 
3686   /* Make sure a has enough precision for the output value */
3687   lim = MP_USED(b) + offset;
3688   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3689     return res;
3690 
3691   /*
3692     Add up all digits up to the precision of b.  If b had initially
3693     the same precision as a, or greater, we took care of it by the
3694     padding step above, so there is no problem.  If b had initially
3695     less precision, we'll have to make sure the carry out is duly
3696     propagated upward among the higher-order digits of the sum.
3697    */
3698   lim = USED(b);
3699   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3700 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3701     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3702     DIGIT(a, ia) = ACCUM(w);
3703     k = CARRYOUT(w);
3704 #else
3705     d = MP_DIGIT(a, ia);
3706     sum = d + MP_DIGIT(b, ib);
3707     d = (sum < d);
3708     MP_DIGIT(a,ia) = sum += carry;
3709     carry = d + (sum < carry);
3710 #endif
3711   }
3712 
3713   /* If we run out of 'b' digits before we're actually done, make
3714      sure the carries get propagated upward...
3715    */
3716 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3717   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3718     w = (mp_word)DIGIT(a, ia) + k;
3719     DIGIT(a, ia) = ACCUM(w);
3720     k = CARRYOUT(w);
3721   }
3722 #else
3723   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3724     d = MP_DIGIT(a, ia);
3725     MP_DIGIT(a,ia) = sum = d + carry;
3726     carry = (sum < d);
3727   }
3728 #endif
3729 
3730   /* If there's an overall carry out, increase precision and include
3731      it.  We could have done this initially, but why touch the memory
3732      allocator unless we're sure we have to?
3733    */
3734 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3735   if(k) {
3736     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3737       return res;
3738 
3739     DIGIT(a, ia) = (mp_digit)k;
3740   }
3741 #else
3742   if (carry) {
3743     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3744       return res;
3745 
3746     DIGIT(a, lim) = carry;
3747   }
3748 #endif
3749   s_mp_clamp(a);
3750 
3751   return MP_OKAY;
3752 
3753 } /* end s_mp_add_offset() */
3754 
3755 /* }}} */
3756 
3757 /* {{{ s_mp_sub(a, b) */
3758 
3759 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3760 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3761 {
3762   mp_digit *pa, *pb, *limit;
3763 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3764   mp_sword  w = 0;
3765 #else
3766   mp_digit  d, diff, borrow = 0;
3767 #endif
3768 
3769   /*
3770     Subtract and propagate borrow.  Up to the precision of b, this
3771     accounts for the digits of b; after that, we just make sure the
3772     carries get to the right place.  This saves having to pad b out to
3773     the precision of a just to make the loops work right...
3774    */
3775   pa = MP_DIGITS(a);
3776   pb = MP_DIGITS(b);
3777   limit = pb + MP_USED(b);
3778   while (pb < limit) {
3779 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3780     w = w + *pa - *pb++;
3781     *pa++ = ACCUM(w);
3782     w >>= MP_DIGIT_BIT;
3783 #else
3784     d = *pa;
3785     diff = d - *pb++;
3786     d = (diff > d);				/* detect borrow */
3787     if (borrow && --diff == MP_DIGIT_MAX)
3788       ++d;
3789     *pa++ = diff;
3790     borrow = d;
3791 #endif
3792   }
3793   limit = MP_DIGITS(a) + MP_USED(a);
3794 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3795   while (w && pa < limit) {
3796     w = w + *pa;
3797     *pa++ = ACCUM(w);
3798     w >>= MP_DIGIT_BIT;
3799   }
3800 #else
3801   while (borrow && pa < limit) {
3802     d = *pa;
3803     *pa++ = diff = d - borrow;
3804     borrow = (diff > d);
3805   }
3806 #endif
3807 
3808   /* Clobber any leading zeroes we created    */
3809   s_mp_clamp(a);
3810 
3811   /*
3812      If there was a borrow out, then |b| > |a| in violation
3813      of our input invariant.  We've already done the work,
3814      but we'll at least complain about it...
3815    */
3816 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3817   return w ? MP_RANGE : MP_OKAY;
3818 #else
3819   return borrow ? MP_RANGE : MP_OKAY;
3820 #endif
3821 } /* end s_mp_sub() */
3822 
3823 /* }}} */
3824 
3825 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3826 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3827 {
3828   mp_digit *pa, *pb, *pc;
3829 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3830   mp_sword  w = 0;
3831 #else
3832   mp_digit  d, diff, borrow = 0;
3833 #endif
3834   int       ix, limit;
3835   mp_err    res;
3836 
3837   MP_SIGN(c) = MP_SIGN(a);
3838 
3839   /* Make sure a has enough precision for the output value */
3840   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3841     return res;
3842 
3843   /*
3844     Subtract and propagate borrow.  Up to the precision of b, this
3845     accounts for the digits of b; after that, we just make sure the
3846     carries get to the right place.  This saves having to pad b out to
3847     the precision of a just to make the loops work right...
3848    */
3849   pa = MP_DIGITS(a);
3850   pb = MP_DIGITS(b);
3851   pc = MP_DIGITS(c);
3852   limit = MP_USED(b);
3853   for (ix = 0; ix < limit; ++ix) {
3854 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3855     w = w + *pa++ - *pb++;
3856     *pc++ = ACCUM(w);
3857     w >>= MP_DIGIT_BIT;
3858 #else
3859     d = *pa++;
3860     diff = d - *pb++;
3861     d = (diff > d);
3862     if (borrow && --diff == MP_DIGIT_MAX)
3863       ++d;
3864     *pc++ = diff;
3865     borrow = d;
3866 #endif
3867   }
3868   for (limit = MP_USED(a); ix < limit; ++ix) {
3869 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3870     w = w + *pa++;
3871     *pc++ = ACCUM(w);
3872     w >>= MP_DIGIT_BIT;
3873 #else
3874     d = *pa++;
3875     *pc++ = diff = d - borrow;
3876     borrow = (diff > d);
3877 #endif
3878   }
3879 
3880   /* Clobber any leading zeroes we created    */
3881   MP_USED(c) = ix;
3882   s_mp_clamp(c);
3883 
3884   /*
3885      If there was a borrow out, then |b| > |a| in violation
3886      of our input invariant.  We've already done the work,
3887      but we'll at least complain about it...
3888    */
3889 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3890   return w ? MP_RANGE : MP_OKAY;
3891 #else
3892   return borrow ? MP_RANGE : MP_OKAY;
3893 #endif
3894 }
3895 /* {{{ s_mp_mul(a, b) */
3896 
3897 /* Compute a = |a| * |b|                                                  */
3898 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3899 {
3900   return mp_mul(a, b, a);
3901 } /* end s_mp_mul() */
3902 
3903 /* }}} */
3904 
3905 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3906 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3907 #define MP_MUL_DxD(a, b, Phi, Plo) \
3908   { unsigned long long product = (unsigned long long)a * b; \
3909     Plo = (mp_digit)product; \
3910     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3911 #elif defined(OSF1)
3912 #define MP_MUL_DxD(a, b, Phi, Plo) \
3913   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3914     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3915 #else
3916 #define MP_MUL_DxD(a, b, Phi, Plo) \
3917   { mp_digit a0b1, a1b0; \
3918     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3919     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3920     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3921     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3922     a1b0 += a0b1; \
3923     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3924     if (a1b0 < a0b1)  \
3925       Phi += MP_HALF_RADIX; \
3926     a1b0 <<= MP_HALF_DIGIT_BIT; \
3927     Plo += a1b0; \
3928     if (Plo < a1b0) \
3929       ++Phi; \
3930   }
3931 #endif
3932 
3933 #if !defined(MP_ASSEMBLY_MULTIPLY)
3934 /* c = a * b */
3935 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3936 {
3937 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3938   mp_digit   d = 0;
3939 
3940   /* Inner product:  Digits of a */
3941   while (a_len--) {
3942     mp_word w = ((mp_word)b * *a++) + d;
3943     *c++ = ACCUM(w);
3944     d = CARRYOUT(w);
3945   }
3946   *c = d;
3947 #else
3948   mp_digit carry = 0;
3949   while (a_len--) {
3950     mp_digit a_i = *a++;
3951     mp_digit a0b0, a1b1;
3952 
3953     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3954 
3955     a0b0 += carry;
3956     if (a0b0 < carry)
3957       ++a1b1;
3958     *c++ = a0b0;
3959     carry = a1b1;
3960   }
3961   *c = carry;
3962 #endif
3963 }
3964 
3965 /* c += a * b */
3966 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3967 			      mp_digit *c)
3968 {
3969 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3970   mp_digit   d = 0;
3971 
3972   /* Inner product:  Digits of a */
3973   while (a_len--) {
3974     mp_word w = ((mp_word)b * *a++) + *c + d;
3975     *c++ = ACCUM(w);
3976     d = CARRYOUT(w);
3977   }
3978   *c = d;
3979 #else
3980   mp_digit carry = 0;
3981   while (a_len--) {
3982     mp_digit a_i = *a++;
3983     mp_digit a0b0, a1b1;
3984 
3985     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3986 
3987     a0b0 += carry;
3988     if (a0b0 < carry)
3989       ++a1b1;
3990     a0b0 += a_i = *c;
3991     if (a0b0 < a_i)
3992       ++a1b1;
3993     *c++ = a0b0;
3994     carry = a1b1;
3995   }
3996   *c = carry;
3997 #endif
3998 }
3999 
4000 /* Presently, this is only used by the Montgomery arithmetic code. */
4001 /* c += a * b */
4002 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
4003 {
4004 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4005   mp_digit   d = 0;
4006 
4007   /* Inner product:  Digits of a */
4008   while (a_len--) {
4009     mp_word w = ((mp_word)b * *a++) + *c + d;
4010     *c++ = ACCUM(w);
4011     d = CARRYOUT(w);
4012   }
4013 
4014   while (d) {
4015     mp_word w = (mp_word)*c + d;
4016     *c++ = ACCUM(w);
4017     d = CARRYOUT(w);
4018   }
4019 #else
4020   mp_digit carry = 0;
4021   while (a_len--) {
4022     mp_digit a_i = *a++;
4023     mp_digit a0b0, a1b1;
4024 
4025     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4026 
4027     a0b0 += carry;
4028     if (a0b0 < carry)
4029       ++a1b1;
4030 
4031     a0b0 += a_i = *c;
4032     if (a0b0 < a_i)
4033       ++a1b1;
4034 
4035     *c++ = a0b0;
4036     carry = a1b1;
4037   }
4038   while (carry) {
4039     mp_digit c_i = *c;
4040     carry += c_i;
4041     *c++ = carry;
4042     carry = carry < c_i;
4043   }
4044 #endif
4045 }
4046 #endif
4047 
4048 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4049 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4050 #define MP_SQR_D(a, Phi, Plo) \
4051   { unsigned long long square = (unsigned long long)a * a; \
4052     Plo = (mp_digit)square; \
4053     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4054 #elif defined(OSF1)
4055 #define MP_SQR_D(a, Phi, Plo) \
4056   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4057     Phi = asm ("umulh %a0, %a0, %v0", a); }
4058 #else
4059 #define MP_SQR_D(a, Phi, Plo) \
4060   { mp_digit Pmid; \
4061     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4062     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4063     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4064     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4065     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4066     Plo += Pmid;  \
4067     if (Plo < Pmid)  \
4068       ++Phi;  \
4069   }
4070 #endif
4071 
4072 #if !defined(MP_ASSEMBLY_SQUARE)
4073 /* Add the squares of the digits of a to the digits of b. */
4074 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4075 {
4076 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4077   mp_word  w;
4078   mp_digit d;
4079   mp_size  ix;
4080 
4081   w  = 0;
4082 #define ADD_SQUARE(n) \
4083     d = pa[n]; \
4084     w += (d * (mp_word)d) + ps[2*n]; \
4085     ps[2*n] = ACCUM(w); \
4086     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4087     ps[2*n+1] = ACCUM(w); \
4088     w = (w >> DIGIT_BIT)
4089 
4090   for (ix = a_len; ix >= 4; ix -= 4) {
4091     ADD_SQUARE(0);
4092     ADD_SQUARE(1);
4093     ADD_SQUARE(2);
4094     ADD_SQUARE(3);
4095     pa += 4;
4096     ps += 8;
4097   }
4098   if (ix) {
4099     ps += 2*ix;
4100     pa += ix;
4101     switch (ix) {
4102     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4103     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4104     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4105     case 0: break;
4106     }
4107   }
4108   while (w) {
4109     w += *ps;
4110     *ps++ = ACCUM(w);
4111     w = (w >> DIGIT_BIT);
4112   }
4113 #else
4114   mp_digit carry = 0;
4115   while (a_len--) {
4116     mp_digit a_i = *pa++;
4117     mp_digit a0a0, a1a1;
4118 
4119     MP_SQR_D(a_i, a1a1, a0a0);
4120 
4121     /* here a1a1 and a0a0 constitute a_i ** 2 */
4122     a0a0 += carry;
4123     if (a0a0 < carry)
4124       ++a1a1;
4125 
4126     /* now add to ps */
4127     a0a0 += a_i = *ps;
4128     if (a0a0 < a_i)
4129       ++a1a1;
4130     *ps++ = a0a0;
4131     a1a1 += a_i = *ps;
4132     carry = (a1a1 < a_i);
4133     *ps++ = a1a1;
4134   }
4135   while (carry) {
4136     mp_digit s_i = *ps;
4137     carry += s_i;
4138     *ps++ = carry;
4139     carry = carry < s_i;
4140   }
4141 #endif
4142 }
4143 #endif
4144 
4145 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4146 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4147 /*
4148 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4149 ** so its high bit is 1.   This code is from NSPR.
4150 */
4151 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4152 		       mp_digit *qp, mp_digit *rp)
4153 {
4154     mp_digit d1, d0, q1, q0;
4155     mp_digit r1, r0, m;
4156 
4157     d1 = divisor >> MP_HALF_DIGIT_BIT;
4158     d0 = divisor & MP_HALF_DIGIT_MAX;
4159     r1 = Nhi % d1;
4160     q1 = Nhi / d1;
4161     m = q1 * d0;
4162     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4163     if (r1 < m) {
4164         q1--, r1 += divisor;
4165         if (r1 >= divisor && r1 < m) {
4166 	    q1--, r1 += divisor;
4167 	}
4168     }
4169     r1 -= m;
4170     r0 = r1 % d1;
4171     q0 = r1 / d1;
4172     m = q0 * d0;
4173     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4174     if (r0 < m) {
4175         q0--, r0 += divisor;
4176         if (r0 >= divisor && r0 < m) {
4177 	    q0--, r0 += divisor;
4178 	}
4179     }
4180     if (qp)
4181 	*qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4182     if (rp)
4183 	*rp = r0 - m;
4184     return MP_OKAY;
4185 }
4186 #endif
4187 
4188 #if MP_SQUARE
4189 /* {{{ s_mp_sqr(a) */
4190 
4191 mp_err   s_mp_sqr(mp_int *a)
4192 {
4193   mp_err   res;
4194   mp_int   tmp;
4195 
4196   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4197     return res;
4198   res = mp_sqr(a, &tmp);
4199   if (res == MP_OKAY) {
4200     s_mp_exch(&tmp, a);
4201   }
4202   mp_clear(&tmp);
4203   return res;
4204 }
4205 
4206 /* }}} */
4207 #endif
4208 
4209 /* {{{ s_mp_div(a, b) */
4210 
4211 /*
4212   s_mp_div(a, b)
4213 
4214   Compute a = a / b and b = a mod b.  Assumes b > a.
4215  */
4216 
4217 mp_err   s_mp_div(mp_int *rem, 	/* i: dividend, o: remainder */
4218                   mp_int *div, 	/* i: divisor                */
4219 		  mp_int *quot)	/* i: 0;        o: quotient  */
4220 {
4221   mp_int   part, t;
4222 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4223   mp_word  q_msd;
4224 #else
4225   mp_digit q_msd;
4226 #endif
4227   mp_err   res;
4228   mp_digit d;
4229   mp_digit div_msd;
4230   int      ix;
4231 
4232   if(mp_cmp_z(div) == 0)
4233     return MP_RANGE;
4234 
4235   /* Shortcut if divisor is power of two */
4236   if((ix = s_mp_ispow2(div)) >= 0) {
4237     MP_CHECKOK( mp_copy(rem, quot) );
4238     s_mp_div_2d(quot, (mp_digit)ix);
4239     s_mp_mod_2d(rem,  (mp_digit)ix);
4240 
4241     return MP_OKAY;
4242   }
4243 
4244   DIGITS(&t) = 0;
4245   MP_SIGN(rem) = ZPOS;
4246   MP_SIGN(div) = ZPOS;
4247 
4248   /* A working temporary for division     */
4249   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4250 
4251   /* Normalize to optimize guessing       */
4252   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4253 
4254   part = *rem;
4255 
4256   /* Perform the division itself...woo!   */
4257   MP_USED(quot) = MP_ALLOC(quot);
4258 
4259   /* Find a partial substring of rem which is at least div */
4260   /* If we didn't find one, we're finished dividing    */
4261   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4262     int i;
4263     int unusedRem;
4264 
4265     unusedRem = MP_USED(rem) - MP_USED(div);
4266     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4267     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4268     MP_USED(&part)   = MP_USED(div);
4269     if (s_mp_cmp(&part, div) < 0) {
4270       -- unusedRem;
4271 #if MP_ARGCHK == 2
4272       assert(unusedRem >= 0);
4273 #endif
4274       -- MP_DIGITS(&part);
4275       ++ MP_USED(&part);
4276       ++ MP_ALLOC(&part);
4277     }
4278 
4279     /* Compute a guess for the next quotient digit       */
4280     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4281     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4282     if (q_msd >= div_msd) {
4283       q_msd = 1;
4284     } else if (MP_USED(&part) > 1) {
4285 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4286       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4287       q_msd /= div_msd;
4288       if (q_msd == RADIX)
4289         --q_msd;
4290 #else
4291       mp_digit r;
4292       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4293 				  div_msd, &q_msd, &r) );
4294 #endif
4295     } else {
4296       q_msd = 0;
4297     }
4298 #if MP_ARGCHK == 2
4299     assert(q_msd > 0); /* This case should never occur any more. */
4300 #endif
4301     if (q_msd <= 0)
4302       break;
4303 
4304     /* See what that multiplies out to                   */
4305     mp_copy(div, &t);
4306     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4307 
4308     /*
4309        If it's too big, back it off.  We should not have to do this
4310        more than once, or, in rare cases, twice.  Knuth describes a
4311        method by which this could be reduced to a maximum of once, but
4312        I didn't implement that here.
4313      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4314      */
4315     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4316       --q_msd;
4317       s_mp_sub(&t, div);	/* t -= div */
4318     }
4319     if (i < 0) {
4320       res = MP_RANGE;
4321       goto CLEANUP;
4322     }
4323 
4324     /* At this point, q_msd should be the right next digit   */
4325     MP_CHECKOK( s_mp_sub(&part, &t) );	/* part -= t */
4326     s_mp_clamp(rem);
4327 
4328     /*
4329       Include the digit in the quotient.  We allocated enough memory
4330       for any quotient we could ever possibly get, so we should not
4331       have to check for failures here
4332      */
4333     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4334   }
4335 
4336   /* Denormalize remainder                */
4337   if (d) {
4338     s_mp_div_2d(rem, d);
4339   }
4340 
4341   s_mp_clamp(quot);
4342 
4343 CLEANUP:
4344   mp_clear(&t);
4345 
4346   return res;
4347 
4348 } /* end s_mp_div() */
4349 
4350 
4351 /* }}} */
4352 
4353 /* {{{ s_mp_2expt(a, k) */
4354 
4355 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4356 {
4357   mp_err    res;
4358   mp_size   dig, bit;
4359 
4360   dig = k / DIGIT_BIT;
4361   bit = k % DIGIT_BIT;
4362 
4363   mp_zero(a);
4364   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4365     return res;
4366 
4367   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4368 
4369   return MP_OKAY;
4370 
4371 } /* end s_mp_2expt() */
4372 
4373 /* }}} */
4374 
4375 /* {{{ s_mp_reduce(x, m, mu) */
4376 
4377 /*
4378   Compute Barrett reduction, x (mod m), given a precomputed value for
4379   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4380   faster than straight division, when many reductions by the same
4381   value of m are required (such as in modular exponentiation).  This
4382   can nearly halve the time required to do modular exponentiation,
4383   as compared to using the full integer divide to reduce.
4384 
4385   This algorithm was derived from the _Handbook of Applied
4386   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4387   pp. 603-604.
4388  */
4389 
4390 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4391 {
4392   mp_int   q;
4393   mp_err   res;
4394 
4395   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4396     return res;
4397 
4398   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4399   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4400   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4401 
4402   /* x = x mod b^(k+1), quick (no division) */
4403   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4404 
4405   /* q = q * m mod b^(k+1), quick (no division) */
4406   s_mp_mul(&q, m);
4407   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4408 
4409   /* x = x - q */
4410   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4411     goto CLEANUP;
4412 
4413   /* If x < 0, add b^(k+1) to it */
4414   if(mp_cmp_z(x) < 0) {
4415     mp_set(&q, 1);
4416     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4417       goto CLEANUP;
4418     if((res = mp_add(x, &q, x)) != MP_OKAY)
4419       goto CLEANUP;
4420   }
4421 
4422   /* Back off if it's too big */
4423   while(mp_cmp(x, m) >= 0) {
4424     if((res = s_mp_sub(x, m)) != MP_OKAY)
4425       break;
4426   }
4427 
4428  CLEANUP:
4429   mp_clear(&q);
4430 
4431   return res;
4432 
4433 } /* end s_mp_reduce() */
4434 
4435 /* }}} */
4436 
4437 /* }}} */
4438 
4439 /* {{{ Primitive comparisons */
4440 
4441 /* {{{ s_mp_cmp(a, b) */
4442 
4443 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4444 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4445 {
4446   mp_size used_a = MP_USED(a);
4447   {
4448     mp_size used_b = MP_USED(b);
4449 
4450     if (used_a > used_b)
4451       goto IS_GT;
4452     if (used_a < used_b)
4453       goto IS_LT;
4454   }
4455   {
4456     mp_digit *pa, *pb;
4457     mp_digit da = 0, db = 0;
4458 
4459 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4460 
4461     pa = MP_DIGITS(a) + used_a;
4462     pb = MP_DIGITS(b) + used_a;
4463     while (used_a >= 4) {
4464       pa     -= 4;
4465       pb     -= 4;
4466       used_a -= 4;
4467       CMP_AB(3);
4468       CMP_AB(2);
4469       CMP_AB(1);
4470       CMP_AB(0);
4471     }
4472     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4473       /* do nothing */;
4474 done:
4475     if (da > db)
4476       goto IS_GT;
4477     if (da < db)
4478       goto IS_LT;
4479   }
4480   return MP_EQ;
4481 IS_LT:
4482   return MP_LT;
4483 IS_GT:
4484   return MP_GT;
4485 } /* end s_mp_cmp() */
4486 
4487 /* }}} */
4488 
4489 /* {{{ s_mp_cmp_d(a, d) */
4490 
4491 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4492 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4493 {
4494   if(USED(a) > 1)
4495     return MP_GT;
4496 
4497   if(DIGIT(a, 0) < d)
4498     return MP_LT;
4499   else if(DIGIT(a, 0) > d)
4500     return MP_GT;
4501   else
4502     return MP_EQ;
4503 
4504 } /* end s_mp_cmp_d() */
4505 
4506 /* }}} */
4507 
4508 /* {{{ s_mp_ispow2(v) */
4509 
4510 /*
4511   Returns -1 if the value is not a power of two; otherwise, it returns
4512   k such that v = 2^k, i.e. lg(v).
4513  */
4514 int      s_mp_ispow2(const mp_int *v)
4515 {
4516   mp_digit d;
4517   int      extra = 0, ix;
4518 
4519   ix = MP_USED(v) - 1;
4520   d = MP_DIGIT(v, ix); /* most significant digit of v */
4521 
4522   extra = s_mp_ispow2d(d);
4523   if (extra < 0 || ix == 0)
4524     return extra;
4525 
4526   while (--ix >= 0) {
4527     if (DIGIT(v, ix) != 0)
4528       return -1; /* not a power of two */
4529     extra += MP_DIGIT_BIT;
4530   }
4531 
4532   return extra;
4533 
4534 } /* end s_mp_ispow2() */
4535 
4536 /* }}} */
4537 
4538 /* {{{ s_mp_ispow2d(d) */
4539 
4540 int      s_mp_ispow2d(mp_digit d)
4541 {
4542   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4543     int pow = 0;
4544 #if defined (MP_USE_UINT_DIGIT)
4545     if (d & 0xffff0000U)
4546       pow += 16;
4547     if (d & 0xff00ff00U)
4548       pow += 8;
4549     if (d & 0xf0f0f0f0U)
4550       pow += 4;
4551     if (d & 0xccccccccU)
4552       pow += 2;
4553     if (d & 0xaaaaaaaaU)
4554       pow += 1;
4555 #elif defined(MP_USE_LONG_LONG_DIGIT)
4556     if (d & 0xffffffff00000000ULL)
4557       pow += 32;
4558     if (d & 0xffff0000ffff0000ULL)
4559       pow += 16;
4560     if (d & 0xff00ff00ff00ff00ULL)
4561       pow += 8;
4562     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4563       pow += 4;
4564     if (d & 0xccccccccccccccccULL)
4565       pow += 2;
4566     if (d & 0xaaaaaaaaaaaaaaaaULL)
4567       pow += 1;
4568 #elif defined(MP_USE_LONG_DIGIT)
4569     if (d & 0xffffffff00000000UL)
4570       pow += 32;
4571     if (d & 0xffff0000ffff0000UL)
4572       pow += 16;
4573     if (d & 0xff00ff00ff00ff00UL)
4574       pow += 8;
4575     if (d & 0xf0f0f0f0f0f0f0f0UL)
4576       pow += 4;
4577     if (d & 0xccccccccccccccccUL)
4578       pow += 2;
4579     if (d & 0xaaaaaaaaaaaaaaaaUL)
4580       pow += 1;
4581 #else
4582 #error "unknown type for mp_digit"
4583 #endif
4584     return pow;
4585   }
4586   return -1;
4587 
4588 } /* end s_mp_ispow2d() */
4589 
4590 /* }}} */
4591 
4592 /* }}} */
4593 
4594 /* {{{ Primitive I/O helpers */
4595 
4596 /* {{{ s_mp_tovalue(ch, r) */
4597 
4598 /*
4599   Convert the given character to its digit value, in the given radix.
4600   If the given character is not understood in the given radix, -1 is
4601   returned.  Otherwise the digit's numeric value is returned.
4602 
4603   The results will be odd if you use a radix < 2 or > 62, you are
4604   expected to know what you're up to.
4605  */
4606 int      s_mp_tovalue(char ch, int r)
4607 {
4608   int    val, xch;
4609 
4610   if(r > 36)
4611     xch = ch;
4612   else
4613     xch = toupper(ch);
4614 
4615   if(isdigit(xch))
4616     val = xch - '0';
4617   else if(isupper(xch))
4618     val = xch - 'A' + 10;
4619   else if(islower(xch))
4620     val = xch - 'a' + 36;
4621   else if(xch == '+')
4622     val = 62;
4623   else if(xch == '/')
4624     val = 63;
4625   else
4626     return -1;
4627 
4628   if(val < 0 || val >= r)
4629     return -1;
4630 
4631   return val;
4632 
4633 } /* end s_mp_tovalue() */
4634 
4635 /* }}} */
4636 
4637 /* {{{ s_mp_todigit(val, r, low) */
4638 
4639 /*
4640   Convert val to a radix-r digit, if possible.  If val is out of range
4641   for r, returns zero.  Otherwise, returns an ASCII character denoting
4642   the value in the given radix.
4643 
4644   The results may be odd if you use a radix < 2 or > 64, you are
4645   expected to know what you're doing.
4646  */
4647 
4648 char     s_mp_todigit(mp_digit val, int r, int low)
4649 {
4650   char   ch;
4651 
4652   if(val >= r)
4653     return 0;
4654 
4655   ch = s_dmap_1[val];
4656 
4657   if(r <= 36 && low)
4658     ch = tolower(ch);
4659 
4660   return ch;
4661 
4662 } /* end s_mp_todigit() */
4663 
4664 /* }}} */
4665 
4666 /* {{{ s_mp_outlen(bits, radix) */
4667 
4668 /*
4669    Return an estimate for how long a string is needed to hold a radix
4670    r representation of a number with 'bits' significant bits, plus an
4671    extra for a zero terminator (assuming C style strings here)
4672  */
4673 int      s_mp_outlen(int bits, int r)
4674 {
4675   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4676 
4677 } /* end s_mp_outlen() */
4678 
4679 /* }}} */
4680 
4681 /* }}} */
4682 
4683 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4684 /* mp_read_unsigned_octets(mp, str, len)
4685    Read in a raw value (base 256) into the given mp_int
4686    No sign bit, number is positive.  Leading zeros ignored.
4687  */
4688 
4689 mp_err
4690 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4691 {
4692   int            count;
4693   mp_err         res;
4694   mp_digit       d;
4695 
4696   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4697 
4698   mp_zero(mp);
4699 
4700   count = len % sizeof(mp_digit);
4701   if (count) {
4702     for (d = 0; count-- > 0; --len) {
4703       d = (d << 8) | *str++;
4704     }
4705     MP_DIGIT(mp, 0) = d;
4706   }
4707 
4708   /* Read the rest of the digits */
4709   for(; len > 0; len -= sizeof(mp_digit)) {
4710     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4711       d = (d << 8) | *str++;
4712     }
4713     if (MP_EQ == mp_cmp_z(mp)) {
4714       if (!d)
4715 	continue;
4716     } else {
4717       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4718 	return res;
4719     }
4720     MP_DIGIT(mp, 0) = d;
4721   }
4722   return MP_OKAY;
4723 } /* end mp_read_unsigned_octets() */
4724 /* }}} */
4725 
4726 /* {{{ mp_unsigned_octet_size(mp) */
4727 int
4728 mp_unsigned_octet_size(const mp_int *mp)
4729 {
4730   int  bytes;
4731   int  ix;
4732   mp_digit  d = 0;
4733 
4734   ARGCHK(mp != NULL, MP_BADARG);
4735   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4736 
4737   bytes = (USED(mp) * sizeof(mp_digit));
4738 
4739   /* subtract leading zeros. */
4740   /* Iterate over each digit... */
4741   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4742     d = DIGIT(mp, ix);
4743     if (d)
4744 	break;
4745     bytes -= sizeof(d);
4746   }
4747   if (!bytes)
4748     return 1;
4749 
4750   /* Have MSD, check digit bytes, high order first */
4751   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4752     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4753     if (x)
4754 	break;
4755     --bytes;
4756   }
4757   return bytes;
4758 } /* end mp_unsigned_octet_size() */
4759 /* }}} */
4760 
4761 /* {{{ mp_to_unsigned_octets(mp, str) */
4762 /* output a buffer of big endian octets no longer than specified. */
4763 mp_err
4764 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4765 {
4766   int  ix, pos = 0;
4767   int  bytes;
4768 
4769   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4770 
4771   bytes = mp_unsigned_octet_size(mp);
4772   ARGCHK(bytes <= maxlen, MP_BADARG);
4773 
4774   /* Iterate over each digit... */
4775   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4776     mp_digit  d = DIGIT(mp, ix);
4777     int       jx;
4778 
4779     /* Unpack digit bytes, high order first */
4780     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4781       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4782       if (!pos && !x)	/* suppress leading zeros */
4783 	continue;
4784       str[pos++] = x;
4785     }
4786   }
4787   if (!pos)
4788     str[pos++] = 0;
4789   return pos;
4790 } /* end mp_to_unsigned_octets() */
4791 /* }}} */
4792 
4793 /* {{{ mp_to_signed_octets(mp, str) */
4794 /* output a buffer of big endian octets no longer than specified. */
4795 mp_err
4796 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4797 {
4798   int  ix, pos = 0;
4799   int  bytes;
4800 
4801   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4802 
4803   bytes = mp_unsigned_octet_size(mp);
4804   ARGCHK(bytes <= maxlen, MP_BADARG);
4805 
4806   /* Iterate over each digit... */
4807   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4808     mp_digit  d = DIGIT(mp, ix);
4809     int       jx;
4810 
4811     /* Unpack digit bytes, high order first */
4812     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4813       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4814       if (!pos) {
4815 	if (!x)		/* suppress leading zeros */
4816 	  continue;
4817 	if (x & 0x80) { /* add one leading zero to make output positive.  */
4818 	  ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4819 	  if (bytes + 1 > maxlen)
4820 	    return MP_BADARG;
4821 	  str[pos++] = 0;
4822 	}
4823       }
4824       str[pos++] = x;
4825     }
4826   }
4827   if (!pos)
4828     str[pos++] = 0;
4829   return pos;
4830 } /* end mp_to_signed_octets() */
4831 /* }}} */
4832 
4833 /* {{{ mp_to_fixlen_octets(mp, str) */
4834 /* output a buffer of big endian octets exactly as long as requested. */
4835 mp_err
4836 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4837 {
4838   int  ix, pos = 0;
4839   int  bytes;
4840 
4841   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4842 
4843   bytes = mp_unsigned_octet_size(mp);
4844   ARGCHK(bytes <= length, MP_BADARG);
4845 
4846   /* place any needed leading zeros */
4847   for (;length > bytes; --length) {
4848 	*str++ = 0;
4849   }
4850 
4851   /* Iterate over each digit... */
4852   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4853     mp_digit  d = DIGIT(mp, ix);
4854     int       jx;
4855 
4856     /* Unpack digit bytes, high order first */
4857     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4858       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4859       if (!pos && !x)	/* suppress leading zeros */
4860 	continue;
4861       str[pos++] = x;
4862     }
4863   }
4864   if (!pos)
4865     str[pos++] = 0;
4866   return MP_OKAY;
4867 } /* end mp_to_fixlen_octets() */
4868 /* }}} */
4869 
4870 
4871 /*------------------------------------------------------------------------*/
4872 /* HERE THERE BE DRAGONS                                                  */
4873