xref: /illumos-gate/usr/src/common/mpi/mpi.c (revision e77c795bcbe51aebd7579fe13cbf2a6d56eca47f)
1 /* BEGIN CSTYLED */
2 /*
3  *  mpi.c
4  *
5  *  Arbitrary precision integer arithmetic library
6  *
7  * ***** BEGIN LICENSE BLOCK *****
8  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
9  *
10  * The contents of this file are subject to the Mozilla Public License Version
11  * 1.1 (the "License"); you may not use this file except in compliance with
12  * the License. You may obtain a copy of the License at
13  * http://www.mozilla.org/MPL/
14  *
15  * Software distributed under the License is distributed on an "AS IS" basis,
16  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
17  * for the specific language governing rights and limitations under the
18  * License.
19  *
20  * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
21  *
22  * The Initial Developer of the Original Code is
23  * Michael J. Fromberger.
24  * Portions created by the Initial Developer are Copyright (C) 1998
25  * the Initial Developer. All Rights Reserved.
26  *
27  * Contributor(s):
28  *   Netscape Communications Corporation
29  *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
30  *
31  * Alternatively, the contents of this file may be used under the terms of
32  * either the GNU General Public License Version 2 or later (the "GPL"), or
33  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34  * in which case the provisions of the GPL or the LGPL are applicable instead
35  * of those above. If you wish to allow use of your version of this file only
36  * under the terms of either the GPL or the LGPL, and not to allow others to
37  * use your version of this file under the terms of the MPL, indicate your
38  * decision by deleting the provisions above and replace them with the notice
39  * and other provisions required by the GPL or the LGPL. If you do not delete
40  * the provisions above, a recipient may use your version of this file under
41  * the terms of any one of the MPL, the GPL or the LGPL.
42  *
43  * ***** END LICENSE BLOCK ***** */
44 /*
45  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
46  *
47  * Sun elects to use this software under the MPL license.
48  */
49 
50 /* $Id: mpi.c,v 1.45 2006/09/29 20:12:21 alexei.volkov.bugs%sun.com Exp $ */
51 
52 #include "mpi-priv.h"
53 #if defined(OSF1)
54 #include <c_asm.h>
55 #endif
56 
57 #if MP_LOGTAB
58 /*
59   A table of the logs of 2 for various bases (the 0 and 1 entries of
60   this table are meaningless and should not be referenced).
61 
62   This table is used to compute output lengths for the mp_toradix()
63   function.  Since a number n in radix r takes up about log_r(n)
64   digits, we estimate the output size by taking the least integer
65   greater than log_r(n), where:
66 
67   log_r(n) = log_2(n) * log_r(2)
68 
69   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
70   which are the output bases supported.
71  */
72 #include "logtab.h"
73 #endif
74 
75 /* {{{ Constant strings */
76 
77 /* Constant strings returned by mp_strerror() */
78 static const char *mp_err_string[] = {
79   "unknown result code",     /* say what?            */
80   "boolean true",            /* MP_OKAY, MP_YES      */
81   "boolean false",           /* MP_NO                */
82   "out of memory",           /* MP_MEM               */
83   "argument out of range",   /* MP_RANGE             */
84   "invalid input parameter", /* MP_BADARG            */
85   "result is undefined"      /* MP_UNDEF             */
86 };
87 
88 /* Value to digit maps for radix conversion   */
89 
90 /* s_dmap_1 - standard digits and letters */
91 static const char *s_dmap_1 =
92   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
93 
94 /* }}} */
95 
96 unsigned long mp_allocs;
97 unsigned long mp_frees;
98 unsigned long mp_copies;
99 
100 /* {{{ Default precision manipulation */
101 
102 /* Default precision for newly created mp_int's      */
103 static mp_size s_mp_defprec = MP_DEFPREC;
104 
105 mp_size mp_get_prec(void)
106 {
107   return s_mp_defprec;
108 
109 } /* end mp_get_prec() */
110 
111 void         mp_set_prec(mp_size prec)
112 {
113   if(prec == 0)
114     s_mp_defprec = MP_DEFPREC;
115   else
116     s_mp_defprec = prec;
117 
118 } /* end mp_set_prec() */
119 
120 /* }}} */
121 
122 /*------------------------------------------------------------------------*/
123 /* {{{ mp_init(mp, kmflag) */
124 
125 /*
126   mp_init(mp, kmflag)
127 
128   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
129   MP_MEM if memory could not be allocated for the structure.
130  */
131 
132 mp_err mp_init(mp_int *mp, int kmflag)
133 {
134   return mp_init_size(mp, s_mp_defprec, kmflag);
135 
136 } /* end mp_init() */
137 
138 /* }}} */
139 
140 /* {{{ mp_init_size(mp, prec, kmflag) */
141 
142 /*
143   mp_init_size(mp, prec, kmflag)
144 
145   Initialize a new zero-valued mp_int with at least the given
146   precision; returns MP_OKAY if successful, or MP_MEM if memory could
147   not be allocated for the structure.
148  */
149 
150 mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
151 {
152   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
153 
154   prec = MP_ROUNDUP(prec, s_mp_defprec);
155   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
156     return MP_MEM;
157 
158   SIGN(mp) = ZPOS;
159   USED(mp) = 1;
160   ALLOC(mp) = prec;
161   FLAG(mp) = kmflag;
162 
163   return MP_OKAY;
164 
165 } /* end mp_init_size() */
166 
167 /* }}} */
168 
169 /* {{{ mp_init_copy(mp, from) */
170 
171 /*
172   mp_init_copy(mp, from)
173 
174   Initialize mp as an exact copy of from.  Returns MP_OKAY if
175   successful, MP_MEM if memory could not be allocated for the new
176   structure.
177  */
178 
179 mp_err mp_init_copy(mp_int *mp, const mp_int *from)
180 {
181   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
182 
183   if(mp == from)
184     return MP_OKAY;
185 
186   if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
187     return MP_MEM;
188 
189   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
190   USED(mp) = USED(from);
191   ALLOC(mp) = ALLOC(from);
192   SIGN(mp) = SIGN(from);
193   FLAG(mp) = FLAG(from);
194 
195   return MP_OKAY;
196 
197 } /* end mp_init_copy() */
198 
199 /* }}} */
200 
201 /* {{{ mp_copy(from, to) */
202 
203 /*
204   mp_copy(from, to)
205 
206   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
207   'to' has already been initialized (if not, use mp_init_copy()
208   instead). If 'from' and 'to' are identical, nothing happens.
209  */
210 
211 mp_err mp_copy(const mp_int *from, mp_int *to)
212 {
213   ARGCHK(from != NULL && to != NULL, MP_BADARG);
214 
215   if(from == to)
216     return MP_OKAY;
217 
218   ++mp_copies;
219   { /* copy */
220     mp_digit   *tmp;
221 
222     /*
223       If the allocated buffer in 'to' already has enough space to hold
224       all the used digits of 'from', we'll re-use it to avoid hitting
225       the memory allocater more than necessary; otherwise, we'd have
226       to grow anyway, so we just allocate a hunk and make the copy as
227       usual
228      */
229     if(ALLOC(to) >= USED(from)) {
230       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
231       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
232 
233     } else {
234       if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
235 	return MP_MEM;
236 
237       s_mp_copy(DIGITS(from), tmp, USED(from));
238 
239       if(DIGITS(to) != NULL) {
240 #if MP_CRYPTO
241 	s_mp_setz(DIGITS(to), ALLOC(to));
242 #endif
243 	s_mp_free(DIGITS(to), ALLOC(to));
244       }
245 
246       DIGITS(to) = tmp;
247       ALLOC(to) = ALLOC(from);
248     }
249 
250     /* Copy the precision and sign from the original */
251     USED(to) = USED(from);
252     SIGN(to) = SIGN(from);
253     FLAG(to) = FLAG(from);
254   } /* end copy */
255 
256   return MP_OKAY;
257 
258 } /* end mp_copy() */
259 
260 /* }}} */
261 
262 /* {{{ mp_exch(mp1, mp2) */
263 
264 /*
265   mp_exch(mp1, mp2)
266 
267   Exchange mp1 and mp2 without allocating any intermediate memory
268   (well, unless you count the stack space needed for this call and the
269   locals it creates...).  This cannot fail.
270  */
271 
272 void mp_exch(mp_int *mp1, mp_int *mp2)
273 {
274 #if MP_ARGCHK == 2
275   assert(mp1 != NULL && mp2 != NULL);
276 #else
277   if(mp1 == NULL || mp2 == NULL)
278     return;
279 #endif
280 
281   s_mp_exch(mp1, mp2);
282 
283 } /* end mp_exch() */
284 
285 /* }}} */
286 
287 /* {{{ mp_clear(mp) */
288 
289 /*
290   mp_clear(mp)
291 
292   Release the storage used by an mp_int, and void its fields so that
293   if someone calls mp_clear() again for the same int later, we won't
294   get tollchocked.
295  */
296 
297 void   mp_clear(mp_int *mp)
298 {
299   if(mp == NULL)
300     return;
301 
302   if(DIGITS(mp) != NULL) {
303 #if MP_CRYPTO
304     s_mp_setz(DIGITS(mp), ALLOC(mp));
305 #endif
306     s_mp_free(DIGITS(mp), ALLOC(mp));
307     DIGITS(mp) = NULL;
308   }
309 
310   USED(mp) = 0;
311   ALLOC(mp) = 0;
312 
313 } /* end mp_clear() */
314 
315 /* }}} */
316 
317 /* {{{ mp_zero(mp) */
318 
319 /*
320   mp_zero(mp)
321 
322   Set mp to zero.  Does not change the allocated size of the structure,
323   and therefore cannot fail (except on a bad argument, which we ignore)
324  */
325 void   mp_zero(mp_int *mp)
326 {
327   if(mp == NULL)
328     return;
329 
330   s_mp_setz(DIGITS(mp), ALLOC(mp));
331   USED(mp) = 1;
332   SIGN(mp) = ZPOS;
333 
334 } /* end mp_zero() */
335 
336 /* }}} */
337 
338 /* {{{ mp_set(mp, d) */
339 
340 void   mp_set(mp_int *mp, mp_digit d)
341 {
342   if(mp == NULL)
343     return;
344 
345   mp_zero(mp);
346   DIGIT(mp, 0) = d;
347 
348 } /* end mp_set() */
349 
350 /* }}} */
351 
352 /* {{{ mp_set_int(mp, z) */
353 
354 mp_err mp_set_int(mp_int *mp, long z)
355 {
356   int            ix;
357   unsigned long  v = labs(z);
358   mp_err         res;
359 
360   ARGCHK(mp != NULL, MP_BADARG);
361 
362   mp_zero(mp);
363   if(z == 0)
364     return MP_OKAY;  /* shortcut for zero */
365 
366   if (sizeof v <= sizeof(mp_digit)) {
367     DIGIT(mp,0) = v;
368   } else {
369     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
370       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
371 	return res;
372 
373       res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
374       if (res != MP_OKAY)
375 	return res;
376     }
377   }
378   if(z < 0)
379     SIGN(mp) = NEG;
380 
381   return MP_OKAY;
382 
383 } /* end mp_set_int() */
384 
385 /* }}} */
386 
387 /* {{{ mp_set_ulong(mp, z) */
388 
389 mp_err mp_set_ulong(mp_int *mp, unsigned long z)
390 {
391   int            ix;
392   mp_err         res;
393 
394   ARGCHK(mp != NULL, MP_BADARG);
395 
396   mp_zero(mp);
397   if(z == 0)
398     return MP_OKAY;  /* shortcut for zero */
399 
400   if (sizeof z <= sizeof(mp_digit)) {
401     DIGIT(mp,0) = z;
402   } else {
403     for (ix = sizeof(long) - 1; ix >= 0; ix--) {
404       if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
405 	return res;
406 
407       res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
408       if (res != MP_OKAY)
409 	return res;
410     }
411   }
412   return MP_OKAY;
413 } /* end mp_set_ulong() */
414 
415 /* }}} */
416 
417 /*------------------------------------------------------------------------*/
418 /* {{{ Digit arithmetic */
419 
420 /* {{{ mp_add_d(a, d, b) */
421 
422 /*
423   mp_add_d(a, d, b)
424 
425   Compute the sum b = a + d, for a single digit d.  Respects the sign of
426   its primary addend (single digits are unsigned anyway).
427  */
428 
429 mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
430 {
431   mp_int   tmp;
432   mp_err   res;
433 
434   ARGCHK(a != NULL && b != NULL, MP_BADARG);
435 
436   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
437     return res;
438 
439   if(SIGN(&tmp) == ZPOS) {
440     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
441       goto CLEANUP;
442   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
443     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
444       goto CLEANUP;
445   } else {
446     mp_neg(&tmp, &tmp);
447 
448     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
449   }
450 
451   if(s_mp_cmp_d(&tmp, 0) == 0)
452     SIGN(&tmp) = ZPOS;
453 
454   s_mp_exch(&tmp, b);
455 
456 CLEANUP:
457   mp_clear(&tmp);
458   return res;
459 
460 } /* end mp_add_d() */
461 
462 /* }}} */
463 
464 /* {{{ mp_sub_d(a, d, b) */
465 
466 /*
467   mp_sub_d(a, d, b)
468 
469   Compute the difference b = a - d, for a single digit d.  Respects the
470   sign of its subtrahend (single digits are unsigned anyway).
471  */
472 
473 mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
474 {
475   mp_int   tmp;
476   mp_err   res;
477 
478   ARGCHK(a != NULL && b != NULL, MP_BADARG);
479 
480   if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
481     return res;
482 
483   if(SIGN(&tmp) == NEG) {
484     if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
485       goto CLEANUP;
486   } else if(s_mp_cmp_d(&tmp, d) >= 0) {
487     if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
488       goto CLEANUP;
489   } else {
490     mp_neg(&tmp, &tmp);
491 
492     DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
493     SIGN(&tmp) = NEG;
494   }
495 
496   if(s_mp_cmp_d(&tmp, 0) == 0)
497     SIGN(&tmp) = ZPOS;
498 
499   s_mp_exch(&tmp, b);
500 
501 CLEANUP:
502   mp_clear(&tmp);
503   return res;
504 
505 } /* end mp_sub_d() */
506 
507 /* }}} */
508 
509 /* {{{ mp_mul_d(a, d, b) */
510 
511 /*
512   mp_mul_d(a, d, b)
513 
514   Compute the product b = a * d, for a single digit d.  Respects the sign
515   of its multiplicand (single digits are unsigned anyway)
516  */
517 
518 mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
519 {
520   mp_err  res;
521 
522   ARGCHK(a != NULL && b != NULL, MP_BADARG);
523 
524   if(d == 0) {
525     mp_zero(b);
526     return MP_OKAY;
527   }
528 
529   if((res = mp_copy(a, b)) != MP_OKAY)
530     return res;
531 
532   res = s_mp_mul_d(b, d);
533 
534   return res;
535 
536 } /* end mp_mul_d() */
537 
538 /* }}} */
539 
540 /* {{{ mp_mul_2(a, c) */
541 
542 mp_err mp_mul_2(const mp_int *a, mp_int *c)
543 {
544   mp_err  res;
545 
546   ARGCHK(a != NULL && c != NULL, MP_BADARG);
547 
548   if((res = mp_copy(a, c)) != MP_OKAY)
549     return res;
550 
551   return s_mp_mul_2(c);
552 
553 } /* end mp_mul_2() */
554 
555 /* }}} */
556 
557 /* {{{ mp_div_d(a, d, q, r) */
558 
559 /*
560   mp_div_d(a, d, q, r)
561 
562   Compute the quotient q = a / d and remainder r = a mod d, for a
563   single digit d.  Respects the sign of its divisor (single digits are
564   unsigned anyway).
565  */
566 
567 mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
568 {
569   mp_err   res;
570   mp_int   qp;
571   mp_digit rem;
572   int      pow;
573 
574   ARGCHK(a != NULL, MP_BADARG);
575 
576   if(d == 0)
577     return MP_RANGE;
578 
579   /* Shortcut for powers of two ... */
580   if((pow = s_mp_ispow2d(d)) >= 0) {
581     mp_digit  mask;
582 
583     mask = ((mp_digit)1 << pow) - 1;
584     rem = DIGIT(a, 0) & mask;
585 
586     if(q) {
587       mp_copy(a, q);
588       s_mp_div_2d(q, pow);
589     }
590 
591     if(r)
592       *r = rem;
593 
594     return MP_OKAY;
595   }
596 
597   if((res = mp_init_copy(&qp, a)) != MP_OKAY)
598     return res;
599 
600   res = s_mp_div_d(&qp, d, &rem);
601 
602   if(s_mp_cmp_d(&qp, 0) == 0)
603     SIGN(q) = ZPOS;
604 
605   if(r)
606     *r = rem;
607 
608   if(q)
609     s_mp_exch(&qp, q);
610 
611   mp_clear(&qp);
612   return res;
613 
614 } /* end mp_div_d() */
615 
616 /* }}} */
617 
618 /* {{{ mp_div_2(a, c) */
619 
620 /*
621   mp_div_2(a, c)
622 
623   Compute c = a / 2, disregarding the remainder.
624  */
625 
626 mp_err mp_div_2(const mp_int *a, mp_int *c)
627 {
628   mp_err  res;
629 
630   ARGCHK(a != NULL && c != NULL, MP_BADARG);
631 
632   if((res = mp_copy(a, c)) != MP_OKAY)
633     return res;
634 
635   s_mp_div_2(c);
636 
637   return MP_OKAY;
638 
639 } /* end mp_div_2() */
640 
641 /* }}} */
642 
643 /* {{{ mp_expt_d(a, d, b) */
644 
645 mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
646 {
647   mp_int   s, x;
648   mp_err   res;
649 
650   ARGCHK(a != NULL && c != NULL, MP_BADARG);
651 
652   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
653     return res;
654   if((res = mp_init_copy(&x, a)) != MP_OKAY)
655     goto X;
656 
657   DIGIT(&s, 0) = 1;
658 
659   while(d != 0) {
660     if(d & 1) {
661       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
662 	goto CLEANUP;
663     }
664 
665     d /= 2;
666 
667     if((res = s_mp_sqr(&x)) != MP_OKAY)
668       goto CLEANUP;
669   }
670 
671   s_mp_exch(&s, c);
672 
673 CLEANUP:
674   mp_clear(&x);
675 X:
676   mp_clear(&s);
677 
678   return res;
679 
680 } /* end mp_expt_d() */
681 
682 /* }}} */
683 
684 /* }}} */
685 
686 /*------------------------------------------------------------------------*/
687 /* {{{ Full arithmetic */
688 
689 /* {{{ mp_abs(a, b) */
690 
691 /*
692   mp_abs(a, b)
693 
694   Compute b = |a|.  'a' and 'b' may be identical.
695  */
696 
697 mp_err mp_abs(const mp_int *a, mp_int *b)
698 {
699   mp_err   res;
700 
701   ARGCHK(a != NULL && b != NULL, MP_BADARG);
702 
703   if((res = mp_copy(a, b)) != MP_OKAY)
704     return res;
705 
706   SIGN(b) = ZPOS;
707 
708   return MP_OKAY;
709 
710 } /* end mp_abs() */
711 
712 /* }}} */
713 
714 /* {{{ mp_neg(a, b) */
715 
716 /*
717   mp_neg(a, b)
718 
719   Compute b = -a.  'a' and 'b' may be identical.
720  */
721 
722 mp_err mp_neg(const mp_int *a, mp_int *b)
723 {
724   mp_err   res;
725 
726   ARGCHK(a != NULL && b != NULL, MP_BADARG);
727 
728   if((res = mp_copy(a, b)) != MP_OKAY)
729     return res;
730 
731   if(s_mp_cmp_d(b, 0) == MP_EQ)
732     SIGN(b) = ZPOS;
733   else
734     SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
735 
736   return MP_OKAY;
737 
738 } /* end mp_neg() */
739 
740 /* }}} */
741 
742 /* {{{ mp_add(a, b, c) */
743 
744 /*
745   mp_add(a, b, c)
746 
747   Compute c = a + b.  All parameters may be identical.
748  */
749 
750 mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
751 {
752   mp_err  res;
753 
754   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
755 
756   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
757     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
758   } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
759     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
760   } else {                          /* different sign: |a|  < |b|   */
761     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
762   }
763 
764   if (s_mp_cmp_d(c, 0) == MP_EQ)
765     SIGN(c) = ZPOS;
766 
767 CLEANUP:
768   return res;
769 
770 } /* end mp_add() */
771 
772 /* }}} */
773 
774 /* {{{ mp_sub(a, b, c) */
775 
776 /*
777   mp_sub(a, b, c)
778 
779   Compute c = a - b.  All parameters may be identical.
780  */
781 
782 mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
783 {
784   mp_err  res;
785   int     magDiff;
786 
787   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
788 
789   if (a == b) {
790     mp_zero(c);
791     return MP_OKAY;
792   }
793 
794   if (MP_SIGN(a) != MP_SIGN(b)) {
795     MP_CHECKOK( s_mp_add_3arg(a, b, c) );
796   } else if (!(magDiff = s_mp_cmp(a, b))) {
797     mp_zero(c);
798     res = MP_OKAY;
799   } else if (magDiff > 0) {
800     MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
801   } else {
802     MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
803     MP_SIGN(c) = !MP_SIGN(a);
804   }
805 
806   if (s_mp_cmp_d(c, 0) == MP_EQ)
807     MP_SIGN(c) = MP_ZPOS;
808 
809 CLEANUP:
810   return res;
811 
812 } /* end mp_sub() */
813 
814 /* }}} */
815 
816 /* {{{ mp_mul(a, b, c) */
817 
818 /*
819   mp_mul(a, b, c)
820 
821   Compute c = a * b.  All parameters may be identical.
822  */
823 mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
824 {
825   mp_digit *pb;
826   mp_int   tmp;
827   mp_err   res;
828   mp_size  ib;
829   mp_size  useda, usedb;
830 
831   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
832 
833   if (a == c) {
834     if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
835       return res;
836     if (a == b)
837       b = &tmp;
838     a = &tmp;
839   } else if (b == c) {
840     if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
841       return res;
842     b = &tmp;
843   } else {
844     MP_DIGITS(&tmp) = 0;
845   }
846 
847   if (MP_USED(a) < MP_USED(b)) {
848     const mp_int *xch = b;	/* switch a and b, to do fewer outer loops */
849     b = a;
850     a = xch;
851   }
852 
853   MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
854   if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
855     goto CLEANUP;
856 
857 #ifdef NSS_USE_COMBA
858   if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
859       if (MP_USED(a) == 4) {
860           s_mp_mul_comba_4(a, b, c);
861           goto CLEANUP;
862       }
863       if (MP_USED(a) == 8) {
864           s_mp_mul_comba_8(a, b, c);
865           goto CLEANUP;
866       }
867       if (MP_USED(a) == 16) {
868           s_mp_mul_comba_16(a, b, c);
869           goto CLEANUP;
870       }
871       if (MP_USED(a) == 32) {
872           s_mp_mul_comba_32(a, b, c);
873           goto CLEANUP;
874       }
875   }
876 #endif
877 
878   pb = MP_DIGITS(b);
879   s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
880 
881   /* Outer loop:  Digits of b */
882   useda = MP_USED(a);
883   usedb = MP_USED(b);
884   for (ib = 1; ib < usedb; ib++) {
885     mp_digit b_i    = *pb++;
886 
887     /* Inner product:  Digits of a */
888     if (b_i)
889       s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
890     else
891       MP_DIGIT(c, ib + useda) = b_i;
892   }
893 
894   s_mp_clamp(c);
895 
896   if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
897     SIGN(c) = ZPOS;
898   else
899     SIGN(c) = NEG;
900 
901 CLEANUP:
902   mp_clear(&tmp);
903   return res;
904 } /* end mp_mul() */
905 
906 /* }}} */
907 
908 /* {{{ mp_sqr(a, sqr) */
909 
910 #if MP_SQUARE
911 /*
912   Computes the square of a.  This can be done more
913   efficiently than a general multiplication, because many of the
914   computation steps are redundant when squaring.  The inner product
915   step is a bit more complicated, but we save a fair number of
916   iterations of the multiplication loop.
917  */
918 
919 /* sqr = a^2;   Caller provides both a and tmp; */
920 mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
921 {
922   mp_digit *pa;
923   mp_digit d;
924   mp_err   res;
925   mp_size  ix;
926   mp_int   tmp;
927   int      count;
928 
929   ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
930 
931   if (a == sqr) {
932     if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
933       return res;
934     a = &tmp;
935   } else {
936     DIGITS(&tmp) = 0;
937     res = MP_OKAY;
938   }
939 
940   ix = 2 * MP_USED(a);
941   if (ix > MP_ALLOC(sqr)) {
942     MP_USED(sqr) = 1;
943     MP_CHECKOK( s_mp_grow(sqr, ix) );
944   }
945   MP_USED(sqr) = ix;
946   MP_DIGIT(sqr, 0) = 0;
947 
948 #ifdef NSS_USE_COMBA
949   if (IS_POWER_OF_2(MP_USED(a))) {
950       if (MP_USED(a) == 4) {
951           s_mp_sqr_comba_4(a, sqr);
952           goto CLEANUP;
953       }
954       if (MP_USED(a) == 8) {
955           s_mp_sqr_comba_8(a, sqr);
956           goto CLEANUP;
957       }
958       if (MP_USED(a) == 16) {
959           s_mp_sqr_comba_16(a, sqr);
960           goto CLEANUP;
961       }
962       if (MP_USED(a) == 32) {
963           s_mp_sqr_comba_32(a, sqr);
964           goto CLEANUP;
965       }
966   }
967 #endif
968 
969   pa = MP_DIGITS(a);
970   count = MP_USED(a) - 1;
971   if (count > 0) {
972     d = *pa++;
973     s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
974     for (ix = 3; --count > 0; ix += 2) {
975       d = *pa++;
976       s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
977     } /* for(ix ...) */
978     MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
979 
980     /* now sqr *= 2 */
981     s_mp_mul_2(sqr);
982   } else {
983     MP_DIGIT(sqr, 1) = 0;
984   }
985 
986   /* now add the squares of the digits of a to sqr. */
987   s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
988 
989   SIGN(sqr) = ZPOS;
990   s_mp_clamp(sqr);
991 
992 CLEANUP:
993   mp_clear(&tmp);
994   return res;
995 
996 } /* end mp_sqr() */
997 #endif
998 
999 /* }}} */
1000 
1001 /* {{{ mp_div(a, b, q, r) */
1002 
1003 /*
1004   mp_div(a, b, q, r)
1005 
1006   Compute q = a / b and r = a mod b.  Input parameters may be re-used
1007   as output parameters.  If q or r is NULL, that portion of the
1008   computation will be discarded (although it will still be computed)
1009  */
1010 mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1011 {
1012   mp_err   res;
1013   mp_int   *pQ, *pR;
1014   mp_int   qtmp, rtmp, btmp;
1015   int      cmp;
1016   mp_sign  signA;
1017   mp_sign  signB;
1018 
1019   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1020 
1021   signA = MP_SIGN(a);
1022   signB = MP_SIGN(b);
1023 
1024   if(mp_cmp_z(b) == MP_EQ)
1025     return MP_RANGE;
1026 
1027   DIGITS(&qtmp) = 0;
1028   DIGITS(&rtmp) = 0;
1029   DIGITS(&btmp) = 0;
1030 
1031   /* Set up some temporaries... */
1032   if (!r || r == a || r == b) {
1033     MP_CHECKOK( mp_init_copy(&rtmp, a) );
1034     pR = &rtmp;
1035   } else {
1036     MP_CHECKOK( mp_copy(a, r) );
1037     pR = r;
1038   }
1039 
1040   if (!q || q == a || q == b) {
1041     MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1042     pQ = &qtmp;
1043   } else {
1044     MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1045     pQ = q;
1046     mp_zero(pQ);
1047   }
1048 
1049   /*
1050     If |a| <= |b|, we can compute the solution without division;
1051     otherwise, we actually do the work required.
1052    */
1053   if ((cmp = s_mp_cmp(a, b)) <= 0) {
1054     if (cmp) {
1055       /* r was set to a above. */
1056       mp_zero(pQ);
1057     } else {
1058       mp_set(pQ, 1);
1059       mp_zero(pR);
1060     }
1061   } else {
1062     MP_CHECKOK( mp_init_copy(&btmp, b) );
1063     MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1064   }
1065 
1066   /* Compute the signs for the output  */
1067   MP_SIGN(pR) = signA;   /* Sr = Sa              */
1068   /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1069   MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1070 
1071   if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1072     SIGN(pQ) = ZPOS;
1073   if(s_mp_cmp_d(pR, 0) == MP_EQ)
1074     SIGN(pR) = ZPOS;
1075 
1076   /* Copy output, if it is needed      */
1077   if(q && q != pQ)
1078     s_mp_exch(pQ, q);
1079 
1080   if(r && r != pR)
1081     s_mp_exch(pR, r);
1082 
1083 CLEANUP:
1084   mp_clear(&btmp);
1085   mp_clear(&rtmp);
1086   mp_clear(&qtmp);
1087 
1088   return res;
1089 
1090 } /* end mp_div() */
1091 
1092 /* }}} */
1093 
1094 /* {{{ mp_div_2d(a, d, q, r) */
1095 
1096 mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1097 {
1098   mp_err  res;
1099 
1100   ARGCHK(a != NULL, MP_BADARG);
1101 
1102   if(q) {
1103     if((res = mp_copy(a, q)) != MP_OKAY)
1104       return res;
1105   }
1106   if(r) {
1107     if((res = mp_copy(a, r)) != MP_OKAY)
1108       return res;
1109   }
1110   if(q) {
1111     s_mp_div_2d(q, d);
1112   }
1113   if(r) {
1114     s_mp_mod_2d(r, d);
1115   }
1116 
1117   return MP_OKAY;
1118 
1119 } /* end mp_div_2d() */
1120 
1121 /* }}} */
1122 
1123 /* {{{ mp_expt(a, b, c) */
1124 
1125 /*
1126   mp_expt(a, b, c)
1127 
1128   Compute c = a ** b, that is, raise a to the b power.  Uses a
1129   standard iterative square-and-multiply technique.
1130  */
1131 
1132 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1133 {
1134   mp_int   s, x;
1135   mp_err   res;
1136   mp_digit d;
1137   int      dig, bit;
1138 
1139   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1140 
1141   if(mp_cmp_z(b) < 0)
1142     return MP_RANGE;
1143 
1144   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1145     return res;
1146 
1147   mp_set(&s, 1);
1148 
1149   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1150     goto X;
1151 
1152   /* Loop over low-order digits in ascending order */
1153   for(dig = 0; dig < (USED(b) - 1); dig++) {
1154     d = DIGIT(b, dig);
1155 
1156     /* Loop over bits of each non-maximal digit */
1157     for(bit = 0; bit < DIGIT_BIT; bit++) {
1158       if(d & 1) {
1159 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1160 	  goto CLEANUP;
1161       }
1162 
1163       d >>= 1;
1164 
1165       if((res = s_mp_sqr(&x)) != MP_OKAY)
1166 	goto CLEANUP;
1167     }
1168   }
1169 
1170   /* Consider now the last digit... */
1171   d = DIGIT(b, dig);
1172 
1173   while(d) {
1174     if(d & 1) {
1175       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1176 	goto CLEANUP;
1177     }
1178 
1179     d >>= 1;
1180 
1181     if((res = s_mp_sqr(&x)) != MP_OKAY)
1182       goto CLEANUP;
1183   }
1184 
1185   if(mp_iseven(b))
1186     SIGN(&s) = SIGN(a);
1187 
1188   res = mp_copy(&s, c);
1189 
1190 CLEANUP:
1191   mp_clear(&x);
1192 X:
1193   mp_clear(&s);
1194 
1195   return res;
1196 
1197 } /* end mp_expt() */
1198 
1199 /* }}} */
1200 
1201 /* {{{ mp_2expt(a, k) */
1202 
1203 /* Compute a = 2^k */
1204 
1205 mp_err mp_2expt(mp_int *a, mp_digit k)
1206 {
1207   ARGCHK(a != NULL, MP_BADARG);
1208 
1209   return s_mp_2expt(a, k);
1210 
1211 } /* end mp_2expt() */
1212 
1213 /* }}} */
1214 
1215 /* {{{ mp_mod(a, m, c) */
1216 
1217 /*
1218   mp_mod(a, m, c)
1219 
1220   Compute c = a (mod m).  Result will always be 0 <= c < m.
1221  */
1222 
1223 mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1224 {
1225   mp_err  res;
1226   int     mag;
1227 
1228   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1229 
1230   if(SIGN(m) == NEG)
1231     return MP_RANGE;
1232 
1233   /*
1234      If |a| > m, we need to divide to get the remainder and take the
1235      absolute value.
1236 
1237      If |a| < m, we don't need to do any division, just copy and adjust
1238      the sign (if a is negative).
1239 
1240      If |a| == m, we can simply set the result to zero.
1241 
1242      This order is intended to minimize the average path length of the
1243      comparison chain on common workloads -- the most frequent cases are
1244      that |a| != m, so we do those first.
1245    */
1246   if((mag = s_mp_cmp(a, m)) > 0) {
1247     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1248       return res;
1249 
1250     if(SIGN(c) == NEG) {
1251       if((res = mp_add(c, m, c)) != MP_OKAY)
1252 	return res;
1253     }
1254 
1255   } else if(mag < 0) {
1256     if((res = mp_copy(a, c)) != MP_OKAY)
1257       return res;
1258 
1259     if(mp_cmp_z(a) < 0) {
1260       if((res = mp_add(c, m, c)) != MP_OKAY)
1261 	return res;
1262 
1263     }
1264 
1265   } else {
1266     mp_zero(c);
1267 
1268   }
1269 
1270   return MP_OKAY;
1271 
1272 } /* end mp_mod() */
1273 
1274 /* }}} */
1275 
1276 /* {{{ mp_mod_d(a, d, c) */
1277 
1278 /*
1279   mp_mod_d(a, d, c)
1280 
1281   Compute c = a (mod d).  Result will always be 0 <= c < d
1282  */
1283 mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1284 {
1285   mp_err   res;
1286   mp_digit rem;
1287 
1288   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1289 
1290   if(s_mp_cmp_d(a, d) > 0) {
1291     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1292       return res;
1293 
1294   } else {
1295     if(SIGN(a) == NEG)
1296       rem = d - DIGIT(a, 0);
1297     else
1298       rem = DIGIT(a, 0);
1299   }
1300 
1301   if(c)
1302     *c = rem;
1303 
1304   return MP_OKAY;
1305 
1306 } /* end mp_mod_d() */
1307 
1308 /* }}} */
1309 
1310 /* {{{ mp_sqrt(a, b) */
1311 
1312 /*
1313   mp_sqrt(a, b)
1314 
1315   Compute the integer square root of a, and store the result in b.
1316   Uses an integer-arithmetic version of Newton's iterative linear
1317   approximation technique to determine this value; the result has the
1318   following two properties:
1319 
1320      b^2 <= a
1321      (b+1)^2 >= a
1322 
1323   It is a range error to pass a negative value.
1324  */
1325 mp_err mp_sqrt(const mp_int *a, mp_int *b)
1326 {
1327   mp_int   x, t;
1328   mp_err   res;
1329   mp_size  used;
1330 
1331   ARGCHK(a != NULL && b != NULL, MP_BADARG);
1332 
1333   /* Cannot take square root of a negative value */
1334   if(SIGN(a) == NEG)
1335     return MP_RANGE;
1336 
1337   /* Special cases for zero and one, trivial     */
1338   if(mp_cmp_d(a, 1) <= 0)
1339     return mp_copy(a, b);
1340 
1341   /* Initialize the temporaries we'll use below  */
1342   if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1343     return res;
1344 
1345   /* Compute an initial guess for the iteration as a itself */
1346   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1347     goto X;
1348 
1349   used = MP_USED(&x);
1350   if (used > 1) {
1351     s_mp_rshd(&x, used / 2);
1352   }
1353 
1354   for(;;) {
1355     /* t = (x * x) - a */
1356     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1357     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1358        (res = mp_sub(&t, a, &t)) != MP_OKAY)
1359       goto CLEANUP;
1360 
1361     /* t = t / 2x       */
1362     s_mp_mul_2(&x);
1363     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1364       goto CLEANUP;
1365     s_mp_div_2(&x);
1366 
1367     /* Terminate the loop, if the quotient is zero */
1368     if(mp_cmp_z(&t) == MP_EQ)
1369       break;
1370 
1371     /* x = x - t       */
1372     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1373       goto CLEANUP;
1374 
1375   }
1376 
1377   /* Copy result to output parameter */
1378   mp_sub_d(&x, 1, &x);
1379   s_mp_exch(&x, b);
1380 
1381  CLEANUP:
1382   mp_clear(&x);
1383  X:
1384   mp_clear(&t);
1385 
1386   return res;
1387 
1388 } /* end mp_sqrt() */
1389 
1390 /* }}} */
1391 
1392 /* }}} */
1393 
1394 /*------------------------------------------------------------------------*/
1395 /* {{{ Modular arithmetic */
1396 
1397 #if MP_MODARITH
1398 /* {{{ mp_addmod(a, b, m, c) */
1399 
1400 /*
1401   mp_addmod(a, b, m, c)
1402 
1403   Compute c = (a + b) mod m
1404  */
1405 
1406 mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1407 {
1408   mp_err  res;
1409 
1410   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1411 
1412   if((res = mp_add(a, b, c)) != MP_OKAY)
1413     return res;
1414   if((res = mp_mod(c, m, c)) != MP_OKAY)
1415     return res;
1416 
1417   return MP_OKAY;
1418 
1419 }
1420 
1421 /* }}} */
1422 
1423 /* {{{ mp_submod(a, b, m, c) */
1424 
1425 /*
1426   mp_submod(a, b, m, c)
1427 
1428   Compute c = (a - b) mod m
1429  */
1430 
1431 mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1432 {
1433   mp_err  res;
1434 
1435   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1436 
1437   if((res = mp_sub(a, b, c)) != MP_OKAY)
1438     return res;
1439   if((res = mp_mod(c, m, c)) != MP_OKAY)
1440     return res;
1441 
1442   return MP_OKAY;
1443 
1444 }
1445 
1446 /* }}} */
1447 
1448 /* {{{ mp_mulmod(a, b, m, c) */
1449 
1450 /*
1451   mp_mulmod(a, b, m, c)
1452 
1453   Compute c = (a * b) mod m
1454  */
1455 
1456 mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1457 {
1458   mp_err  res;
1459 
1460   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1461 
1462   if((res = mp_mul(a, b, c)) != MP_OKAY)
1463     return res;
1464   if((res = mp_mod(c, m, c)) != MP_OKAY)
1465     return res;
1466 
1467   return MP_OKAY;
1468 
1469 }
1470 
1471 /* }}} */
1472 
1473 /* {{{ mp_sqrmod(a, m, c) */
1474 
1475 #if MP_SQUARE
1476 mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1477 {
1478   mp_err  res;
1479 
1480   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1481 
1482   if((res = mp_sqr(a, c)) != MP_OKAY)
1483     return res;
1484   if((res = mp_mod(c, m, c)) != MP_OKAY)
1485     return res;
1486 
1487   return MP_OKAY;
1488 
1489 } /* end mp_sqrmod() */
1490 #endif
1491 
1492 /* }}} */
1493 
1494 /* {{{ s_mp_exptmod(a, b, m, c) */
1495 
1496 /*
1497   s_mp_exptmod(a, b, m, c)
1498 
1499   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1500   method with modular reductions at each step. (This is basically the
1501   same code as mp_expt(), except for the addition of the reductions)
1502 
1503   The modular reductions are done using Barrett's algorithm (see
1504   s_mp_reduce() below for details)
1505  */
1506 
1507 mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1508 {
1509   mp_int   s, x, mu;
1510   mp_err   res;
1511   mp_digit d;
1512   int      dig, bit;
1513 
1514   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1515 
1516   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1517     return MP_RANGE;
1518 
1519   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1520     return res;
1521   if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1522      (res = mp_mod(&x, m, &x)) != MP_OKAY)
1523     goto X;
1524   if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1525     goto MU;
1526 
1527   mp_set(&s, 1);
1528 
1529   /* mu = b^2k / m */
1530   s_mp_add_d(&mu, 1);
1531   s_mp_lshd(&mu, 2 * USED(m));
1532   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1533     goto CLEANUP;
1534 
1535   /* Loop over digits of b in ascending order, except highest order */
1536   for(dig = 0; dig < (USED(b) - 1); dig++) {
1537     d = DIGIT(b, dig);
1538 
1539     /* Loop over the bits of the lower-order digits */
1540     for(bit = 0; bit < DIGIT_BIT; bit++) {
1541       if(d & 1) {
1542 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1543 	  goto CLEANUP;
1544 	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1545 	  goto CLEANUP;
1546       }
1547 
1548       d >>= 1;
1549 
1550       if((res = s_mp_sqr(&x)) != MP_OKAY)
1551 	goto CLEANUP;
1552       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1553 	goto CLEANUP;
1554     }
1555   }
1556 
1557   /* Now do the last digit... */
1558   d = DIGIT(b, dig);
1559 
1560   while(d) {
1561     if(d & 1) {
1562       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1563 	goto CLEANUP;
1564       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1565 	goto CLEANUP;
1566     }
1567 
1568     d >>= 1;
1569 
1570     if((res = s_mp_sqr(&x)) != MP_OKAY)
1571       goto CLEANUP;
1572     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1573       goto CLEANUP;
1574   }
1575 
1576   s_mp_exch(&s, c);
1577 
1578  CLEANUP:
1579   mp_clear(&mu);
1580  MU:
1581   mp_clear(&x);
1582  X:
1583   mp_clear(&s);
1584 
1585   return res;
1586 
1587 } /* end s_mp_exptmod() */
1588 
1589 /* }}} */
1590 
1591 /* {{{ mp_exptmod_d(a, d, m, c) */
1592 
1593 mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1594 {
1595   mp_int   s, x;
1596   mp_err   res;
1597 
1598   ARGCHK(a != NULL && c != NULL, MP_BADARG);
1599 
1600   if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1601     return res;
1602   if((res = mp_init_copy(&x, a)) != MP_OKAY)
1603     goto X;
1604 
1605   mp_set(&s, 1);
1606 
1607   while(d != 0) {
1608     if(d & 1) {
1609       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1610 	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
1611 	goto CLEANUP;
1612     }
1613 
1614     d /= 2;
1615 
1616     if((res = s_mp_sqr(&x)) != MP_OKAY ||
1617        (res = mp_mod(&x, m, &x)) != MP_OKAY)
1618       goto CLEANUP;
1619   }
1620 
1621   s_mp_exch(&s, c);
1622 
1623 CLEANUP:
1624   mp_clear(&x);
1625 X:
1626   mp_clear(&s);
1627 
1628   return res;
1629 
1630 } /* end mp_exptmod_d() */
1631 
1632 /* }}} */
1633 #endif /* if MP_MODARITH */
1634 
1635 /* }}} */
1636 
1637 /*------------------------------------------------------------------------*/
1638 /* {{{ Comparison functions */
1639 
1640 /* {{{ mp_cmp_z(a) */
1641 
1642 /*
1643   mp_cmp_z(a)
1644 
1645   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1646  */
1647 
1648 int    mp_cmp_z(const mp_int *a)
1649 {
1650   if(SIGN(a) == NEG)
1651     return MP_LT;
1652   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1653     return MP_EQ;
1654   else
1655     return MP_GT;
1656 
1657 } /* end mp_cmp_z() */
1658 
1659 /* }}} */
1660 
1661 /* {{{ mp_cmp_d(a, d) */
1662 
1663 /*
1664   mp_cmp_d(a, d)
1665 
1666   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1667  */
1668 
1669 int    mp_cmp_d(const mp_int *a, mp_digit d)
1670 {
1671   ARGCHK(a != NULL, MP_EQ);
1672 
1673   if(SIGN(a) == NEG)
1674     return MP_LT;
1675 
1676   return s_mp_cmp_d(a, d);
1677 
1678 } /* end mp_cmp_d() */
1679 
1680 /* }}} */
1681 
1682 /* {{{ mp_cmp(a, b) */
1683 
1684 int    mp_cmp(const mp_int *a, const mp_int *b)
1685 {
1686   ARGCHK(a != NULL && b != NULL, MP_EQ);
1687 
1688   if(SIGN(a) == SIGN(b)) {
1689     int  mag;
1690 
1691     if((mag = s_mp_cmp(a, b)) == MP_EQ)
1692       return MP_EQ;
1693 
1694     if(SIGN(a) == ZPOS)
1695       return mag;
1696     else
1697       return -mag;
1698 
1699   } else if(SIGN(a) == ZPOS) {
1700     return MP_GT;
1701   } else {
1702     return MP_LT;
1703   }
1704 
1705 } /* end mp_cmp() */
1706 
1707 /* }}} */
1708 
1709 /* {{{ mp_cmp_mag(a, b) */
1710 
1711 /*
1712   mp_cmp_mag(a, b)
1713 
1714   Compares |a| <=> |b|, and returns an appropriate comparison result
1715  */
1716 
1717 int    mp_cmp_mag(mp_int *a, mp_int *b)
1718 {
1719   ARGCHK(a != NULL && b != NULL, MP_EQ);
1720 
1721   return s_mp_cmp(a, b);
1722 
1723 } /* end mp_cmp_mag() */
1724 
1725 /* }}} */
1726 
1727 /* {{{ mp_cmp_int(a, z, kmflag) */
1728 
1729 /*
1730   This just converts z to an mp_int, and uses the existing comparison
1731   routines.  This is sort of inefficient, but it's not clear to me how
1732   frequently this wil get used anyway.  For small positive constants,
1733   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1734  */
1735 int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1736 {
1737   mp_int  tmp;
1738   int     out;
1739 
1740   ARGCHK(a != NULL, MP_EQ);
1741 
1742   mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1743   out = mp_cmp(a, &tmp);
1744   mp_clear(&tmp);
1745 
1746   return out;
1747 
1748 } /* end mp_cmp_int() */
1749 
1750 /* }}} */
1751 
1752 /* {{{ mp_isodd(a) */
1753 
1754 /*
1755   mp_isodd(a)
1756 
1757   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1758  */
1759 int    mp_isodd(const mp_int *a)
1760 {
1761   ARGCHK(a != NULL, 0);
1762 
1763   return (int)(DIGIT(a, 0) & 1);
1764 
1765 } /* end mp_isodd() */
1766 
1767 /* }}} */
1768 
1769 /* {{{ mp_iseven(a) */
1770 
1771 int    mp_iseven(const mp_int *a)
1772 {
1773   return !mp_isodd(a);
1774 
1775 } /* end mp_iseven() */
1776 
1777 /* }}} */
1778 
1779 /* }}} */
1780 
1781 /*------------------------------------------------------------------------*/
1782 /* {{{ Number theoretic functions */
1783 
1784 #if MP_NUMTH
1785 /* {{{ mp_gcd(a, b, c) */
1786 
1787 /*
1788   Like the old mp_gcd() function, except computes the GCD using the
1789   binary algorithm due to Josef Stein in 1961 (via Knuth).
1790  */
1791 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1792 {
1793   mp_err   res;
1794   mp_int   u, v, t;
1795   mp_size  k = 0;
1796 
1797   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1798 
1799   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1800       return MP_RANGE;
1801   if(mp_cmp_z(a) == MP_EQ) {
1802     return mp_copy(b, c);
1803   } else if(mp_cmp_z(b) == MP_EQ) {
1804     return mp_copy(a, c);
1805   }
1806 
1807   if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1808     return res;
1809   if((res = mp_init_copy(&u, a)) != MP_OKAY)
1810     goto U;
1811   if((res = mp_init_copy(&v, b)) != MP_OKAY)
1812     goto V;
1813 
1814   SIGN(&u) = ZPOS;
1815   SIGN(&v) = ZPOS;
1816 
1817   /* Divide out common factors of 2 until at least 1 of a, b is even */
1818   while(mp_iseven(&u) && mp_iseven(&v)) {
1819     s_mp_div_2(&u);
1820     s_mp_div_2(&v);
1821     ++k;
1822   }
1823 
1824   /* Initialize t */
1825   if(mp_isodd(&u)) {
1826     if((res = mp_copy(&v, &t)) != MP_OKAY)
1827       goto CLEANUP;
1828 
1829     /* t = -v */
1830     if(SIGN(&v) == ZPOS)
1831       SIGN(&t) = NEG;
1832     else
1833       SIGN(&t) = ZPOS;
1834 
1835   } else {
1836     if((res = mp_copy(&u, &t)) != MP_OKAY)
1837       goto CLEANUP;
1838 
1839   }
1840 
1841   for(;;) {
1842     while(mp_iseven(&t)) {
1843       s_mp_div_2(&t);
1844     }
1845 
1846     if(mp_cmp_z(&t) == MP_GT) {
1847       if((res = mp_copy(&t, &u)) != MP_OKAY)
1848 	goto CLEANUP;
1849 
1850     } else {
1851       if((res = mp_copy(&t, &v)) != MP_OKAY)
1852 	goto CLEANUP;
1853 
1854       /* v = -t */
1855       if(SIGN(&t) == ZPOS)
1856 	SIGN(&v) = NEG;
1857       else
1858 	SIGN(&v) = ZPOS;
1859     }
1860 
1861     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1862       goto CLEANUP;
1863 
1864     if(s_mp_cmp_d(&t, 0) == MP_EQ)
1865       break;
1866   }
1867 
1868   s_mp_2expt(&v, k);       /* v = 2^k   */
1869   res = mp_mul(&u, &v, c); /* c = u * v */
1870 
1871  CLEANUP:
1872   mp_clear(&v);
1873  V:
1874   mp_clear(&u);
1875  U:
1876   mp_clear(&t);
1877 
1878   return res;
1879 
1880 } /* end mp_gcd() */
1881 
1882 /* }}} */
1883 
1884 /* {{{ mp_lcm(a, b, c) */
1885 
1886 /* We compute the least common multiple using the rule:
1887 
1888    ab = [a, b](a, b)
1889 
1890    ... by computing the product, and dividing out the gcd.
1891  */
1892 
1893 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1894 {
1895   mp_int  gcd, prod;
1896   mp_err  res;
1897 
1898   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1899 
1900   /* Set up temporaries */
1901   if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1902     return res;
1903   if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1904     goto GCD;
1905 
1906   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1907     goto CLEANUP;
1908   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1909     goto CLEANUP;
1910 
1911   res = mp_div(&prod, &gcd, c, NULL);
1912 
1913  CLEANUP:
1914   mp_clear(&prod);
1915  GCD:
1916   mp_clear(&gcd);
1917 
1918   return res;
1919 
1920 } /* end mp_lcm() */
1921 
1922 /* }}} */
1923 
1924 /* {{{ mp_xgcd(a, b, g, x, y) */
1925 
1926 /*
1927   mp_xgcd(a, b, g, x, y)
1928 
1929   Compute g = (a, b) and values x and y satisfying Bezout's identity
1930   (that is, ax + by = g).  This uses the binary extended GCD algorithm
1931   based on the Stein algorithm used for mp_gcd()
1932   See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1933  */
1934 
1935 mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1936 {
1937   mp_int   gx, xc, yc, u, v, A, B, C, D;
1938   mp_int  *clean[9];
1939   mp_err   res;
1940   int      last = -1;
1941 
1942   if(mp_cmp_z(b) == 0)
1943     return MP_RANGE;
1944 
1945   /* Initialize all these variables we need */
1946   MP_CHECKOK( mp_init(&u, FLAG(a)) );
1947   clean[++last] = &u;
1948   MP_CHECKOK( mp_init(&v, FLAG(a)) );
1949   clean[++last] = &v;
1950   MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1951   clean[++last] = &gx;
1952   MP_CHECKOK( mp_init(&A, FLAG(a)) );
1953   clean[++last] = &A;
1954   MP_CHECKOK( mp_init(&B, FLAG(a)) );
1955   clean[++last] = &B;
1956   MP_CHECKOK( mp_init(&C, FLAG(a)) );
1957   clean[++last] = &C;
1958   MP_CHECKOK( mp_init(&D, FLAG(a)) );
1959   clean[++last] = &D;
1960   MP_CHECKOK( mp_init_copy(&xc, a) );
1961   clean[++last] = &xc;
1962   mp_abs(&xc, &xc);
1963   MP_CHECKOK( mp_init_copy(&yc, b) );
1964   clean[++last] = &yc;
1965   mp_abs(&yc, &yc);
1966 
1967   mp_set(&gx, 1);
1968 
1969   /* Divide by two until at least one of them is odd */
1970   while(mp_iseven(&xc) && mp_iseven(&yc)) {
1971     mp_size nx = mp_trailing_zeros(&xc);
1972     mp_size ny = mp_trailing_zeros(&yc);
1973     mp_size n  = MP_MIN(nx, ny);
1974     s_mp_div_2d(&xc,n);
1975     s_mp_div_2d(&yc,n);
1976     MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1977   }
1978 
1979   mp_copy(&xc, &u);
1980   mp_copy(&yc, &v);
1981   mp_set(&A, 1); mp_set(&D, 1);
1982 
1983   /* Loop through binary GCD algorithm */
1984   do {
1985     while(mp_iseven(&u)) {
1986       s_mp_div_2(&u);
1987 
1988       if(mp_iseven(&A) && mp_iseven(&B)) {
1989 	s_mp_div_2(&A); s_mp_div_2(&B);
1990       } else {
1991 	MP_CHECKOK( mp_add(&A, &yc, &A) );
1992 	s_mp_div_2(&A);
1993 	MP_CHECKOK( mp_sub(&B, &xc, &B) );
1994 	s_mp_div_2(&B);
1995       }
1996     }
1997 
1998     while(mp_iseven(&v)) {
1999       s_mp_div_2(&v);
2000 
2001       if(mp_iseven(&C) && mp_iseven(&D)) {
2002 	s_mp_div_2(&C); s_mp_div_2(&D);
2003       } else {
2004 	MP_CHECKOK( mp_add(&C, &yc, &C) );
2005 	s_mp_div_2(&C);
2006 	MP_CHECKOK( mp_sub(&D, &xc, &D) );
2007 	s_mp_div_2(&D);
2008       }
2009     }
2010 
2011     if(mp_cmp(&u, &v) >= 0) {
2012       MP_CHECKOK( mp_sub(&u, &v, &u) );
2013       MP_CHECKOK( mp_sub(&A, &C, &A) );
2014       MP_CHECKOK( mp_sub(&B, &D, &B) );
2015     } else {
2016       MP_CHECKOK( mp_sub(&v, &u, &v) );
2017       MP_CHECKOK( mp_sub(&C, &A, &C) );
2018       MP_CHECKOK( mp_sub(&D, &B, &D) );
2019     }
2020   } while (mp_cmp_z(&u) != 0);
2021 
2022   /* copy results to output */
2023   if(x)
2024     MP_CHECKOK( mp_copy(&C, x) );
2025 
2026   if(y)
2027     MP_CHECKOK( mp_copy(&D, y) );
2028 
2029   if(g)
2030     MP_CHECKOK( mp_mul(&gx, &v, g) );
2031 
2032  CLEANUP:
2033   while(last >= 0)
2034     mp_clear(clean[last--]);
2035 
2036   return res;
2037 
2038 } /* end mp_xgcd() */
2039 
2040 /* }}} */
2041 
2042 mp_size mp_trailing_zeros(const mp_int *mp)
2043 {
2044   mp_digit d;
2045   mp_size  n = 0;
2046   int      ix;
2047 
2048   if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2049     return n;
2050 
2051   for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2052     n += MP_DIGIT_BIT;
2053   if (!d)
2054     return 0;	/* shouldn't happen, but ... */
2055 #if !defined(MP_USE_UINT_DIGIT)
2056   if (!(d & 0xffffffffU)) {
2057     d >>= 32;
2058     n  += 32;
2059   }
2060 #endif
2061   if (!(d & 0xffffU)) {
2062     d >>= 16;
2063     n  += 16;
2064   }
2065   if (!(d & 0xffU)) {
2066     d >>= 8;
2067     n  += 8;
2068   }
2069   if (!(d & 0xfU)) {
2070     d >>= 4;
2071     n  += 4;
2072   }
2073   if (!(d & 0x3U)) {
2074     d >>= 2;
2075     n  += 2;
2076   }
2077   if (!(d & 0x1U)) {
2078     d >>= 1;
2079     n  += 1;
2080   }
2081 #if MP_ARGCHK == 2
2082   assert(0 != (d & 1));
2083 #endif
2084   return n;
2085 }
2086 
2087 /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2088 ** Returns k (positive) or error (negative).
2089 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2090 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2091 */
2092 mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2093 {
2094   mp_err res;
2095   mp_err k    = 0;
2096   mp_int d, f, g;
2097 
2098   ARGCHK(a && p && c, MP_BADARG);
2099 
2100   MP_DIGITS(&d) = 0;
2101   MP_DIGITS(&f) = 0;
2102   MP_DIGITS(&g) = 0;
2103   MP_CHECKOK( mp_init(&d, FLAG(a)) );
2104   MP_CHECKOK( mp_init_copy(&f, a) );	/* f = a */
2105   MP_CHECKOK( mp_init_copy(&g, p) );	/* g = p */
2106 
2107   mp_set(c, 1);
2108   mp_zero(&d);
2109 
2110   if (mp_cmp_z(&f) == 0) {
2111     res = MP_UNDEF;
2112   } else
2113   for (;;) {
2114     int diff_sign;
2115     while (mp_iseven(&f)) {
2116       mp_size n = mp_trailing_zeros(&f);
2117       if (!n) {
2118 	res = MP_UNDEF;
2119 	goto CLEANUP;
2120       }
2121       s_mp_div_2d(&f, n);
2122       MP_CHECKOK( s_mp_mul_2d(&d, n) );
2123       k += n;
2124     }
2125     if (mp_cmp_d(&f, 1) == MP_EQ) {	/* f == 1 */
2126       res = k;
2127       break;
2128     }
2129     diff_sign = mp_cmp(&f, &g);
2130     if (diff_sign < 0) {		/* f < g */
2131       s_mp_exch(&f, &g);
2132       s_mp_exch(c, &d);
2133     } else if (diff_sign == 0) {		/* f == g */
2134       res = MP_UNDEF;		/* a and p are not relatively prime */
2135       break;
2136     }
2137     if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2138       MP_CHECKOK( mp_sub(&f, &g, &f) );	/* f = f - g */
2139       MP_CHECKOK( mp_sub(c,  &d,  c) );	/* c = c - d */
2140     } else {
2141       MP_CHECKOK( mp_add(&f, &g, &f) );	/* f = f + g */
2142       MP_CHECKOK( mp_add(c,  &d,  c) );	/* c = c + d */
2143     }
2144   }
2145   if (res >= 0) {
2146     while (MP_SIGN(c) != MP_ZPOS) {
2147       MP_CHECKOK( mp_add(c, p, c) );
2148     }
2149     res = k;
2150   }
2151 
2152 CLEANUP:
2153   mp_clear(&d);
2154   mp_clear(&f);
2155   mp_clear(&g);
2156   return res;
2157 }
2158 
2159 /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2160 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2161 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2162 */
2163 mp_digit  s_mp_invmod_radix(mp_digit P)
2164 {
2165   mp_digit T = P;
2166   T *= 2 - (P * T);
2167   T *= 2 - (P * T);
2168   T *= 2 - (P * T);
2169   T *= 2 - (P * T);
2170 #if !defined(MP_USE_UINT_DIGIT)
2171   T *= 2 - (P * T);
2172   T *= 2 - (P * T);
2173 #endif
2174   return T;
2175 }
2176 
2177 /* Given c, k, and prime p, where a*c == 2**k (mod p),
2178 ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2179 ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2180 ** by Richard Schroeppel (a.k.a. Captain Nemo).
2181 */
2182 mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2183 {
2184   int      k_orig = k;
2185   mp_digit r;
2186   mp_size  ix;
2187   mp_err   res;
2188 
2189   if (mp_cmp_z(c) < 0) {		/* c < 0 */
2190     MP_CHECKOK( mp_add(c, p, x) );	/* x = c + p */
2191   } else {
2192     MP_CHECKOK( mp_copy(c, x) );	/* x = c */
2193   }
2194 
2195   /* make sure x is large enough */
2196   ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2197   ix = MP_MAX(ix, MP_USED(x));
2198   MP_CHECKOK( s_mp_pad(x, ix) );
2199 
2200   r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2201 
2202   for (ix = 0; k > 0; ix++) {
2203     int      j = MP_MIN(k, MP_DIGIT_BIT);
2204     mp_digit v = r * MP_DIGIT(x, ix);
2205     if (j < MP_DIGIT_BIT) {
2206       v &= ((mp_digit)1 << j) - 1;	/* v = v mod (2 ** j) */
2207     }
2208     s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2209     k -= j;
2210   }
2211   s_mp_clamp(x);
2212   s_mp_div_2d(x, k_orig);
2213   res = MP_OKAY;
2214 
2215 CLEANUP:
2216   return res;
2217 }
2218 
2219 /* compute mod inverse using Schroeppel's method, only if m is odd */
2220 mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2221 {
2222   int k;
2223   mp_err  res;
2224   mp_int  x;
2225 
2226   ARGCHK(a && m && c, MP_BADARG);
2227 
2228   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2229     return MP_RANGE;
2230   if (mp_iseven(m))
2231     return MP_UNDEF;
2232 
2233   MP_DIGITS(&x) = 0;
2234 
2235   if (a == c) {
2236     if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2237       return res;
2238     if (a == m)
2239       m = &x;
2240     a = &x;
2241   } else if (m == c) {
2242     if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2243       return res;
2244     m = &x;
2245   } else {
2246     MP_DIGITS(&x) = 0;
2247   }
2248 
2249   MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2250   k = res;
2251   MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2252 CLEANUP:
2253   mp_clear(&x);
2254   return res;
2255 }
2256 
2257 /* Known good algorithm for computing modular inverse.  But slow. */
2258 mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2259 {
2260   mp_int  g, x;
2261   mp_err  res;
2262 
2263   ARGCHK(a && m && c, MP_BADARG);
2264 
2265   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2266     return MP_RANGE;
2267 
2268   MP_DIGITS(&g) = 0;
2269   MP_DIGITS(&x) = 0;
2270   MP_CHECKOK( mp_init(&x, FLAG(a)) );
2271   MP_CHECKOK( mp_init(&g, FLAG(a)) );
2272 
2273   MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2274 
2275   if (mp_cmp_d(&g, 1) != MP_EQ) {
2276     res = MP_UNDEF;
2277     goto CLEANUP;
2278   }
2279 
2280   res = mp_mod(&x, m, c);
2281   SIGN(c) = SIGN(a);
2282 
2283 CLEANUP:
2284   mp_clear(&x);
2285   mp_clear(&g);
2286 
2287   return res;
2288 }
2289 
2290 /* modular inverse where modulus is 2**k. */
2291 /* c = a**-1 mod 2**k */
2292 mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2293 {
2294   mp_err res;
2295   mp_size ix = k + 4;
2296   mp_int t0, t1, val, tmp, two2k;
2297 
2298   static const mp_digit d2 = 2;
2299   static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2300 
2301   if (mp_iseven(a))
2302     return MP_UNDEF;
2303   if (k <= MP_DIGIT_BIT) {
2304     mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2305     if (k < MP_DIGIT_BIT)
2306       i &= ((mp_digit)1 << k) - (mp_digit)1;
2307     mp_set(c, i);
2308     return MP_OKAY;
2309   }
2310   MP_DIGITS(&t0) = 0;
2311   MP_DIGITS(&t1) = 0;
2312   MP_DIGITS(&val) = 0;
2313   MP_DIGITS(&tmp) = 0;
2314   MP_DIGITS(&two2k) = 0;
2315   MP_CHECKOK( mp_init_copy(&val, a) );
2316   s_mp_mod_2d(&val, k);
2317   MP_CHECKOK( mp_init_copy(&t0, &val) );
2318   MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2319   MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2320   MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2321   MP_CHECKOK( s_mp_2expt(&two2k, k) );
2322   do {
2323     MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2324     MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2325     MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2326     s_mp_mod_2d(&t1, k);
2327     while (MP_SIGN(&t1) != MP_ZPOS) {
2328       MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2329     }
2330     if (mp_cmp(&t1, &t0) == MP_EQ)
2331       break;
2332     MP_CHECKOK( mp_copy(&t1, &t0) );
2333   } while (--ix > 0);
2334   if (!ix) {
2335     res = MP_UNDEF;
2336   } else {
2337     mp_exch(c, &t1);
2338   }
2339 
2340 CLEANUP:
2341   mp_clear(&t0);
2342   mp_clear(&t1);
2343   mp_clear(&val);
2344   mp_clear(&tmp);
2345   mp_clear(&two2k);
2346   return res;
2347 }
2348 
2349 mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2350 {
2351   mp_err res;
2352   mp_size k;
2353   mp_int oddFactor, evenFactor;	/* factors of the modulus */
2354   mp_int oddPart, evenPart;	/* parts to combine via CRT. */
2355   mp_int C2, tmp1, tmp2;
2356 
2357   /*static const mp_digit d1 = 1; */
2358   /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2359 
2360   if ((res = s_mp_ispow2(m)) >= 0) {
2361     k = res;
2362     return s_mp_invmod_2d(a, k, c);
2363   }
2364   MP_DIGITS(&oddFactor) = 0;
2365   MP_DIGITS(&evenFactor) = 0;
2366   MP_DIGITS(&oddPart) = 0;
2367   MP_DIGITS(&evenPart) = 0;
2368   MP_DIGITS(&C2)     = 0;
2369   MP_DIGITS(&tmp1)   = 0;
2370   MP_DIGITS(&tmp2)   = 0;
2371 
2372   MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2373   MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2374   MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2375   MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2376   MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2377   MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2378   MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2379 
2380   k = mp_trailing_zeros(m);
2381   s_mp_div_2d(&oddFactor, k);
2382   MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2383 
2384   /* compute a**-1 mod oddFactor. */
2385   MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2386   /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2387   MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2388 
2389   /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2390   /* let m1 = oddFactor,  v1 = oddPart,
2391    * let m2 = evenFactor, v2 = evenPart.
2392    */
2393 
2394   /* Compute C2 = m1**-1 mod m2. */
2395   MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2396 
2397   /* compute u = (v2 - v1)*C2 mod m2 */
2398   MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2399   MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2400   s_mp_mod_2d(&tmp2, k);
2401   while (MP_SIGN(&tmp2) != MP_ZPOS) {
2402     MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2403   }
2404 
2405   /* compute answer = v1 + u*m1 */
2406   MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2407   MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2408   /* not sure this is necessary, but it's low cost if not. */
2409   MP_CHECKOK( mp_mod(c,         m,          c) );
2410 
2411 CLEANUP:
2412   mp_clear(&oddFactor);
2413   mp_clear(&evenFactor);
2414   mp_clear(&oddPart);
2415   mp_clear(&evenPart);
2416   mp_clear(&C2);
2417   mp_clear(&tmp1);
2418   mp_clear(&tmp2);
2419   return res;
2420 }
2421 
2422 
2423 /* {{{ mp_invmod(a, m, c) */
2424 
2425 /*
2426   mp_invmod(a, m, c)
2427 
2428   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2429   This is equivalent to the question of whether (a, m) = 1.  If not,
2430   MP_UNDEF is returned, and there is no inverse.
2431  */
2432 
2433 mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2434 {
2435 
2436   ARGCHK(a && m && c, MP_BADARG);
2437 
2438   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2439     return MP_RANGE;
2440 
2441   if (mp_isodd(m)) {
2442     return s_mp_invmod_odd_m(a, m, c);
2443   }
2444   if (mp_iseven(a))
2445     return MP_UNDEF;	/* not invertable */
2446 
2447   return s_mp_invmod_even_m(a, m, c);
2448 
2449 } /* end mp_invmod() */
2450 
2451 /* }}} */
2452 #endif /* if MP_NUMTH */
2453 
2454 /* }}} */
2455 
2456 /*------------------------------------------------------------------------*/
2457 /* {{{ mp_print(mp, ofp) */
2458 
2459 #if MP_IOFUNC
2460 /*
2461   mp_print(mp, ofp)
2462 
2463   Print a textual representation of the given mp_int on the output
2464   stream 'ofp'.  Output is generated using the internal radix.
2465  */
2466 
2467 void   mp_print(mp_int *mp, FILE *ofp)
2468 {
2469   int   ix;
2470 
2471   if(mp == NULL || ofp == NULL)
2472     return;
2473 
2474   fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2475 
2476   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2477     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2478   }
2479 
2480 } /* end mp_print() */
2481 
2482 #endif /* if MP_IOFUNC */
2483 
2484 /* }}} */
2485 
2486 /*------------------------------------------------------------------------*/
2487 /* {{{ More I/O Functions */
2488 
2489 /* {{{ mp_read_raw(mp, str, len) */
2490 
2491 /*
2492    mp_read_raw(mp, str, len)
2493 
2494    Read in a raw value (base 256) into the given mp_int
2495  */
2496 
2497 mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2498 {
2499   int            ix;
2500   mp_err         res;
2501   unsigned char *ustr = (unsigned char *)str;
2502 
2503   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2504 
2505   mp_zero(mp);
2506 
2507   /* Get sign from first byte */
2508   if(ustr[0])
2509     SIGN(mp) = NEG;
2510   else
2511     SIGN(mp) = ZPOS;
2512 
2513   /* Read the rest of the digits */
2514   for(ix = 1; ix < len; ix++) {
2515     if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2516       return res;
2517     if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2518       return res;
2519   }
2520 
2521   return MP_OKAY;
2522 
2523 } /* end mp_read_raw() */
2524 
2525 /* }}} */
2526 
2527 /* {{{ mp_raw_size(mp) */
2528 
2529 int    mp_raw_size(mp_int *mp)
2530 {
2531   ARGCHK(mp != NULL, 0);
2532 
2533   return (USED(mp) * sizeof(mp_digit)) + 1;
2534 
2535 } /* end mp_raw_size() */
2536 
2537 /* }}} */
2538 
2539 /* {{{ mp_toraw(mp, str) */
2540 
2541 mp_err mp_toraw(mp_int *mp, char *str)
2542 {
2543   int  ix, jx, pos = 1;
2544 
2545   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2546 
2547   str[0] = (char)SIGN(mp);
2548 
2549   /* Iterate over each digit... */
2550   for(ix = USED(mp) - 1; ix >= 0; ix--) {
2551     mp_digit  d = DIGIT(mp, ix);
2552 
2553     /* Unpack digit bytes, high order first */
2554     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2555       str[pos++] = (char)(d >> (jx * CHAR_BIT));
2556     }
2557   }
2558 
2559   return MP_OKAY;
2560 
2561 } /* end mp_toraw() */
2562 
2563 /* }}} */
2564 
2565 /* {{{ mp_read_radix(mp, str, radix) */
2566 
2567 /*
2568   mp_read_radix(mp, str, radix)
2569 
2570   Read an integer from the given string, and set mp to the resulting
2571   value.  The input is presumed to be in base 10.  Leading non-digit
2572   characters are ignored, and the function reads until a non-digit
2573   character or the end of the string.
2574  */
2575 
2576 mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2577 {
2578   int     ix = 0, val = 0;
2579   mp_err  res;
2580   mp_sign sig = ZPOS;
2581 
2582   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2583 	 MP_BADARG);
2584 
2585   mp_zero(mp);
2586 
2587   /* Skip leading non-digit characters until a digit or '-' or '+' */
2588   while(str[ix] &&
2589 	(s_mp_tovalue(str[ix], radix) < 0) &&
2590 	str[ix] != '-' &&
2591 	str[ix] != '+') {
2592     ++ix;
2593   }
2594 
2595   if(str[ix] == '-') {
2596     sig = NEG;
2597     ++ix;
2598   } else if(str[ix] == '+') {
2599     sig = ZPOS; /* this is the default anyway... */
2600     ++ix;
2601   }
2602 
2603   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2604     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2605       return res;
2606     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2607       return res;
2608     ++ix;
2609   }
2610 
2611   if(s_mp_cmp_d(mp, 0) == MP_EQ)
2612     SIGN(mp) = ZPOS;
2613   else
2614     SIGN(mp) = sig;
2615 
2616   return MP_OKAY;
2617 
2618 } /* end mp_read_radix() */
2619 
2620 mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2621 {
2622   int     radix = default_radix;
2623   int     cx;
2624   mp_sign sig   = ZPOS;
2625   mp_err  res;
2626 
2627   /* Skip leading non-digit characters until a digit or '-' or '+' */
2628   while ((cx = *str) != 0 &&
2629 	(s_mp_tovalue(cx, radix) < 0) &&
2630 	cx != '-' &&
2631 	cx != '+') {
2632     ++str;
2633   }
2634 
2635   if (cx == '-') {
2636     sig = NEG;
2637     ++str;
2638   } else if (cx == '+') {
2639     sig = ZPOS; /* this is the default anyway... */
2640     ++str;
2641   }
2642 
2643   if (str[0] == '0') {
2644     if ((str[1] | 0x20) == 'x') {
2645       radix = 16;
2646       str += 2;
2647     } else {
2648       radix = 8;
2649       str++;
2650     }
2651   }
2652   res = mp_read_radix(a, str, radix);
2653   if (res == MP_OKAY) {
2654     MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2655   }
2656   return res;
2657 }
2658 
2659 /* }}} */
2660 
2661 /* {{{ mp_radix_size(mp, radix) */
2662 
2663 int    mp_radix_size(mp_int *mp, int radix)
2664 {
2665   int  bits;
2666 
2667   if(!mp || radix < 2 || radix > MAX_RADIX)
2668     return 0;
2669 
2670   bits = USED(mp) * DIGIT_BIT - 1;
2671 
2672   return s_mp_outlen(bits, radix);
2673 
2674 } /* end mp_radix_size() */
2675 
2676 /* }}} */
2677 
2678 /* {{{ mp_toradix(mp, str, radix) */
2679 
2680 mp_err mp_toradix(mp_int *mp, char *str, int radix)
2681 {
2682   int  ix, pos = 0;
2683 
2684   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2685   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2686 
2687   if(mp_cmp_z(mp) == MP_EQ) {
2688     str[0] = '0';
2689     str[1] = '\0';
2690   } else {
2691     mp_err   res;
2692     mp_int   tmp;
2693     mp_sign  sgn;
2694     mp_digit rem, rdx = (mp_digit)radix;
2695     char     ch;
2696 
2697     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2698       return res;
2699 
2700     /* Save sign for later, and take absolute value */
2701     sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2702 
2703     /* Generate output digits in reverse order      */
2704     while(mp_cmp_z(&tmp) != 0) {
2705       if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2706 	mp_clear(&tmp);
2707 	return res;
2708       }
2709 
2710       /* Generate digits, use capital letters */
2711       ch = s_mp_todigit(rem, radix, 0);
2712 
2713       str[pos++] = ch;
2714     }
2715 
2716     /* Add - sign if original value was negative */
2717     if(sgn == NEG)
2718       str[pos++] = '-';
2719 
2720     /* Add trailing NUL to end the string        */
2721     str[pos--] = '\0';
2722 
2723     /* Reverse the digits and sign indicator     */
2724     ix = 0;
2725     while(ix < pos) {
2726       char tmp = str[ix];
2727 
2728       str[ix] = str[pos];
2729       str[pos] = tmp;
2730       ++ix;
2731       --pos;
2732     }
2733 
2734     mp_clear(&tmp);
2735   }
2736 
2737   return MP_OKAY;
2738 
2739 } /* end mp_toradix() */
2740 
2741 /* }}} */
2742 
2743 /* {{{ mp_tovalue(ch, r) */
2744 
2745 int    mp_tovalue(char ch, int r)
2746 {
2747   return s_mp_tovalue(ch, r);
2748 
2749 } /* end mp_tovalue() */
2750 
2751 /* }}} */
2752 
2753 /* }}} */
2754 
2755 /* {{{ mp_strerror(ec) */
2756 
2757 /*
2758   mp_strerror(ec)
2759 
2760   Return a string describing the meaning of error code 'ec'.  The
2761   string returned is allocated in static memory, so the caller should
2762   not attempt to modify or free the memory associated with this
2763   string.
2764  */
2765 const char  *mp_strerror(mp_err ec)
2766 {
2767   int   aec = (ec < 0) ? -ec : ec;
2768 
2769   /* Code values are negative, so the senses of these comparisons
2770      are accurate */
2771   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2772     return mp_err_string[0];  /* unknown error code */
2773   } else {
2774     return mp_err_string[aec + 1];
2775   }
2776 
2777 } /* end mp_strerror() */
2778 
2779 /* }}} */
2780 
2781 /*========================================================================*/
2782 /*------------------------------------------------------------------------*/
2783 /* Static function definitions (internal use only)                        */
2784 
2785 /* {{{ Memory management */
2786 
2787 /* {{{ s_mp_grow(mp, min) */
2788 
2789 /* Make sure there are at least 'min' digits allocated to mp              */
2790 mp_err   s_mp_grow(mp_int *mp, mp_size min)
2791 {
2792   if(min > ALLOC(mp)) {
2793     mp_digit   *tmp;
2794 
2795     /* Set min to next nearest default precision block size */
2796     min = MP_ROUNDUP(min, s_mp_defprec);
2797 
2798     if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2799       return MP_MEM;
2800 
2801     s_mp_copy(DIGITS(mp), tmp, USED(mp));
2802 
2803 #if MP_CRYPTO
2804     s_mp_setz(DIGITS(mp), ALLOC(mp));
2805 #endif
2806     s_mp_free(DIGITS(mp), ALLOC(mp));
2807     DIGITS(mp) = tmp;
2808     ALLOC(mp) = min;
2809   }
2810 
2811   return MP_OKAY;
2812 
2813 } /* end s_mp_grow() */
2814 
2815 /* }}} */
2816 
2817 /* {{{ s_mp_pad(mp, min) */
2818 
2819 /* Make sure the used size of mp is at least 'min', growing if needed     */
2820 mp_err   s_mp_pad(mp_int *mp, mp_size min)
2821 {
2822   if(min > USED(mp)) {
2823     mp_err  res;
2824 
2825     /* Make sure there is room to increase precision  */
2826     if (min > ALLOC(mp)) {
2827       if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2828 	return res;
2829     } else {
2830       s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2831     }
2832 
2833     /* Increase precision; should already be 0-filled */
2834     USED(mp) = min;
2835   }
2836 
2837   return MP_OKAY;
2838 
2839 } /* end s_mp_pad() */
2840 
2841 /* }}} */
2842 
2843 /* {{{ s_mp_setz(dp, count) */
2844 
2845 #if MP_MACRO == 0
2846 /* Set 'count' digits pointed to by dp to be zeroes                       */
2847 void s_mp_setz(mp_digit *dp, mp_size count)
2848 {
2849 #if MP_MEMSET == 0
2850   int  ix;
2851 
2852   for(ix = 0; ix < count; ix++)
2853     dp[ix] = 0;
2854 #else
2855   memset(dp, 0, count * sizeof(mp_digit));
2856 #endif
2857 
2858 } /* end s_mp_setz() */
2859 #endif
2860 
2861 /* }}} */
2862 
2863 /* {{{ s_mp_copy(sp, dp, count) */
2864 
2865 #if MP_MACRO == 0
2866 /* Copy 'count' digits from sp to dp                                      */
2867 void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2868 {
2869 #if MP_MEMCPY == 0
2870   int  ix;
2871 
2872   for(ix = 0; ix < count; ix++)
2873     dp[ix] = sp[ix];
2874 #else
2875   memcpy(dp, sp, count * sizeof(mp_digit));
2876 #endif
2877 
2878 } /* end s_mp_copy() */
2879 #endif
2880 
2881 /* }}} */
2882 
2883 /* {{{ s_mp_alloc(nb, ni, kmflag) */
2884 
2885 #if MP_MACRO == 0
2886 /* Allocate ni records of nb bytes each, and return a pointer to that     */
2887 void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2888 {
2889   ++mp_allocs;
2890 #ifdef _KERNEL
2891   return kmem_zalloc(nb * ni, kmflag);
2892 #else
2893   return calloc(nb, ni);
2894 #endif
2895 
2896 } /* end s_mp_alloc() */
2897 #endif
2898 
2899 /* }}} */
2900 
2901 /* {{{ s_mp_free(ptr) */
2902 
2903 #if MP_MACRO == 0
2904 /* Free the memory pointed to by ptr                                      */
2905 void     s_mp_free(void *ptr, mp_size alloc)
2906 {
2907   if(ptr) {
2908     ++mp_frees;
2909 #ifdef _KERNEL
2910     kmem_free(ptr, alloc * sizeof (mp_digit));
2911 #else
2912     free(ptr);
2913 #endif
2914   }
2915 } /* end s_mp_free() */
2916 #endif
2917 
2918 /* }}} */
2919 
2920 /* {{{ s_mp_clamp(mp) */
2921 
2922 #if MP_MACRO == 0
2923 /* Remove leading zeroes from the given value                             */
2924 void     s_mp_clamp(mp_int *mp)
2925 {
2926   mp_size used = MP_USED(mp);
2927   while (used > 1 && DIGIT(mp, used - 1) == 0)
2928     --used;
2929   MP_USED(mp) = used;
2930 } /* end s_mp_clamp() */
2931 #endif
2932 
2933 /* }}} */
2934 
2935 /* {{{ s_mp_exch(a, b) */
2936 
2937 /* Exchange the data for a and b; (b, a) = (a, b)                         */
2938 void     s_mp_exch(mp_int *a, mp_int *b)
2939 {
2940   mp_int   tmp;
2941 
2942   tmp = *a;
2943   *a = *b;
2944   *b = tmp;
2945 
2946 } /* end s_mp_exch() */
2947 
2948 /* }}} */
2949 
2950 /* }}} */
2951 
2952 /* {{{ Arithmetic helpers */
2953 
2954 /* {{{ s_mp_lshd(mp, p) */
2955 
2956 /*
2957    Shift mp leftward by p digits, growing if needed, and zero-filling
2958    the in-shifted digits at the right end.  This is a convenient
2959    alternative to multiplication by powers of the radix
2960    The value of USED(mp) must already have been set to the value for
2961    the shifted result.
2962  */
2963 
2964 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2965 {
2966   mp_err  res;
2967   mp_size pos;
2968   int     ix;
2969 
2970   if(p == 0)
2971     return MP_OKAY;
2972 
2973   if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2974     return MP_OKAY;
2975 
2976   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2977     return res;
2978 
2979   pos = USED(mp) - 1;
2980 
2981   /* Shift all the significant figures over as needed */
2982   for(ix = pos - p; ix >= 0; ix--)
2983     DIGIT(mp, ix + p) = DIGIT(mp, ix);
2984 
2985   /* Fill the bottom digits with zeroes */
2986   for(ix = 0; ix < p; ix++)
2987     DIGIT(mp, ix) = 0;
2988 
2989   return MP_OKAY;
2990 
2991 } /* end s_mp_lshd() */
2992 
2993 /* }}} */
2994 
2995 /* {{{ s_mp_mul_2d(mp, d) */
2996 
2997 /*
2998   Multiply the integer by 2^d, where d is a number of bits.  This
2999   amounts to a bitwise shift of the value.
3000  */
3001 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
3002 {
3003   mp_err   res;
3004   mp_digit dshift, bshift;
3005   mp_digit mask;
3006 
3007   ARGCHK(mp != NULL,  MP_BADARG);
3008 
3009   dshift = d / MP_DIGIT_BIT;
3010   bshift = d % MP_DIGIT_BIT;
3011   /* bits to be shifted out of the top word */
3012   mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3013   mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3014 
3015   if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3016     return res;
3017 
3018   if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3019     return res;
3020 
3021   if (bshift) {
3022     mp_digit *pa = MP_DIGITS(mp);
3023     mp_digit *alim = pa + MP_USED(mp);
3024     mp_digit  prev = 0;
3025 
3026     for (pa += dshift; pa < alim; ) {
3027       mp_digit x = *pa;
3028       *pa++ = (x << bshift) | prev;
3029       prev = x >> (DIGIT_BIT - bshift);
3030     }
3031   }
3032 
3033   s_mp_clamp(mp);
3034   return MP_OKAY;
3035 } /* end s_mp_mul_2d() */
3036 
3037 /* {{{ s_mp_rshd(mp, p) */
3038 
3039 /*
3040    Shift mp rightward by p digits.  Maintains the invariant that
3041    digits above the precision are all zero.  Digits shifted off the
3042    end are lost.  Cannot fail.
3043  */
3044 
3045 void     s_mp_rshd(mp_int *mp, mp_size p)
3046 {
3047   mp_size  ix;
3048   mp_digit *src, *dst;
3049 
3050   if(p == 0)
3051     return;
3052 
3053   /* Shortcut when all digits are to be shifted off */
3054   if(p >= USED(mp)) {
3055     s_mp_setz(DIGITS(mp), ALLOC(mp));
3056     USED(mp) = 1;
3057     SIGN(mp) = ZPOS;
3058     return;
3059   }
3060 
3061   /* Shift all the significant figures over as needed */
3062   dst = MP_DIGITS(mp);
3063   src = dst + p;
3064   for (ix = USED(mp) - p; ix > 0; ix--)
3065     *dst++ = *src++;
3066 
3067   MP_USED(mp) -= p;
3068   /* Fill the top digits with zeroes */
3069   while (p-- > 0)
3070     *dst++ = 0;
3071 
3072 #if 0
3073   /* Strip off any leading zeroes    */
3074   s_mp_clamp(mp);
3075 #endif
3076 
3077 } /* end s_mp_rshd() */
3078 
3079 /* }}} */
3080 
3081 /* {{{ s_mp_div_2(mp) */
3082 
3083 /* Divide by two -- take advantage of radix properties to do it fast      */
3084 void     s_mp_div_2(mp_int *mp)
3085 {
3086   s_mp_div_2d(mp, 1);
3087 
3088 } /* end s_mp_div_2() */
3089 
3090 /* }}} */
3091 
3092 /* {{{ s_mp_mul_2(mp) */
3093 
3094 mp_err s_mp_mul_2(mp_int *mp)
3095 {
3096   mp_digit *pd;
3097   int      ix, used;
3098   mp_digit kin = 0;
3099 
3100   /* Shift digits leftward by 1 bit */
3101   used = MP_USED(mp);
3102   pd = MP_DIGITS(mp);
3103   for (ix = 0; ix < used; ix++) {
3104     mp_digit d = *pd;
3105     *pd++ = (d << 1) | kin;
3106     kin = (d >> (DIGIT_BIT - 1));
3107   }
3108 
3109   /* Deal with rollover from last digit */
3110   if (kin) {
3111     if (ix >= ALLOC(mp)) {
3112       mp_err res;
3113       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3114 	return res;
3115     }
3116 
3117     DIGIT(mp, ix) = kin;
3118     USED(mp) += 1;
3119   }
3120 
3121   return MP_OKAY;
3122 
3123 } /* end s_mp_mul_2() */
3124 
3125 /* }}} */
3126 
3127 /* {{{ s_mp_mod_2d(mp, d) */
3128 
3129 /*
3130   Remainder the integer by 2^d, where d is a number of bits.  This
3131   amounts to a bitwise AND of the value, and does not require the full
3132   division code
3133  */
3134 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3135 {
3136   mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3137   mp_size  ix;
3138   mp_digit dmask;
3139 
3140   if(ndig >= USED(mp))
3141     return;
3142 
3143   /* Flush all the bits above 2^d in its digit */
3144   dmask = ((mp_digit)1 << nbit) - 1;
3145   DIGIT(mp, ndig) &= dmask;
3146 
3147   /* Flush all digits above the one with 2^d in it */
3148   for(ix = ndig + 1; ix < USED(mp); ix++)
3149     DIGIT(mp, ix) = 0;
3150 
3151   s_mp_clamp(mp);
3152 
3153 } /* end s_mp_mod_2d() */
3154 
3155 /* }}} */
3156 
3157 /* {{{ s_mp_div_2d(mp, d) */
3158 
3159 /*
3160   Divide the integer by 2^d, where d is a number of bits.  This
3161   amounts to a bitwise shift of the value, and does not require the
3162   full division code (used in Barrett reduction, see below)
3163  */
3164 void     s_mp_div_2d(mp_int *mp, mp_digit d)
3165 {
3166   int       ix;
3167   mp_digit  save, next, mask;
3168 
3169   s_mp_rshd(mp, d / DIGIT_BIT);
3170   d %= DIGIT_BIT;
3171   if (d) {
3172     mask = ((mp_digit)1 << d) - 1;
3173     save = 0;
3174     for(ix = USED(mp) - 1; ix >= 0; ix--) {
3175       next = DIGIT(mp, ix) & mask;
3176       DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3177       save = next;
3178     }
3179   }
3180   s_mp_clamp(mp);
3181 
3182 } /* end s_mp_div_2d() */
3183 
3184 /* }}} */
3185 
3186 /* {{{ s_mp_norm(a, b, *d) */
3187 
3188 /*
3189   s_mp_norm(a, b, *d)
3190 
3191   Normalize a and b for division, where b is the divisor.  In order
3192   that we might make good guesses for quotient digits, we want the
3193   leading digit of b to be at least half the radix, which we
3194   accomplish by multiplying a and b by a power of 2.  The exponent
3195   (shift count) is placed in *pd, so that the remainder can be shifted
3196   back at the end of the division process.
3197  */
3198 
3199 mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3200 {
3201   mp_digit  d;
3202   mp_digit  mask;
3203   mp_digit  b_msd;
3204   mp_err    res    = MP_OKAY;
3205 
3206   d = 0;
3207   mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);	/* mask is msb of digit */
3208   b_msd = DIGIT(b, USED(b) - 1);
3209   while (!(b_msd & mask)) {
3210     b_msd <<= 1;
3211     ++d;
3212   }
3213 
3214   if (d) {
3215     MP_CHECKOK( s_mp_mul_2d(a, d) );
3216     MP_CHECKOK( s_mp_mul_2d(b, d) );
3217   }
3218 
3219   *pd = d;
3220 CLEANUP:
3221   return res;
3222 
3223 } /* end s_mp_norm() */
3224 
3225 /* }}} */
3226 
3227 /* }}} */
3228 
3229 /* {{{ Primitive digit arithmetic */
3230 
3231 /* {{{ s_mp_add_d(mp, d) */
3232 
3233 /* Add d to |mp| in place                                                 */
3234 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3235 {
3236 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3237   mp_word   w, k = 0;
3238   mp_size   ix = 1;
3239 
3240   w = (mp_word)DIGIT(mp, 0) + d;
3241   DIGIT(mp, 0) = ACCUM(w);
3242   k = CARRYOUT(w);
3243 
3244   while(ix < USED(mp) && k) {
3245     w = (mp_word)DIGIT(mp, ix) + k;
3246     DIGIT(mp, ix) = ACCUM(w);
3247     k = CARRYOUT(w);
3248     ++ix;
3249   }
3250 
3251   if(k != 0) {
3252     mp_err  res;
3253 
3254     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3255       return res;
3256 
3257     DIGIT(mp, ix) = (mp_digit)k;
3258   }
3259 
3260   return MP_OKAY;
3261 #else
3262   mp_digit * pmp = MP_DIGITS(mp);
3263   mp_digit sum, mp_i, carry = 0;
3264   mp_err   res = MP_OKAY;
3265   int used = (int)MP_USED(mp);
3266 
3267   mp_i = *pmp;
3268   *pmp++ = sum = d + mp_i;
3269   carry = (sum < d);
3270   while (carry && --used > 0) {
3271     mp_i = *pmp;
3272     *pmp++ = sum = carry + mp_i;
3273     carry = !sum;
3274   }
3275   if (carry && !used) {
3276     /* mp is growing */
3277     used = MP_USED(mp);
3278     MP_CHECKOK( s_mp_pad(mp, used + 1) );
3279     MP_DIGIT(mp, used) = carry;
3280   }
3281 CLEANUP:
3282   return res;
3283 #endif
3284 } /* end s_mp_add_d() */
3285 
3286 /* }}} */
3287 
3288 /* {{{ s_mp_sub_d(mp, d) */
3289 
3290 /* Subtract d from |mp| in place, assumes |mp| > d                        */
3291 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3292 {
3293 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3294   mp_word   w, b = 0;
3295   mp_size   ix = 1;
3296 
3297   /* Compute initial subtraction    */
3298   w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3299   b = CARRYOUT(w) ? 0 : 1;
3300   DIGIT(mp, 0) = ACCUM(w);
3301 
3302   /* Propagate borrows leftward     */
3303   while(b && ix < USED(mp)) {
3304     w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3305     b = CARRYOUT(w) ? 0 : 1;
3306     DIGIT(mp, ix) = ACCUM(w);
3307     ++ix;
3308   }
3309 
3310   /* Remove leading zeroes          */
3311   s_mp_clamp(mp);
3312 
3313   /* If we have a borrow out, it's a violation of the input invariant */
3314   if(b)
3315     return MP_RANGE;
3316   else
3317     return MP_OKAY;
3318 #else
3319   mp_digit *pmp = MP_DIGITS(mp);
3320   mp_digit mp_i, diff, borrow;
3321   mp_size  used = MP_USED(mp);
3322 
3323   mp_i = *pmp;
3324   *pmp++ = diff = mp_i - d;
3325   borrow = (diff > mp_i);
3326   while (borrow && --used) {
3327     mp_i = *pmp;
3328     *pmp++ = diff = mp_i - borrow;
3329     borrow = (diff > mp_i);
3330   }
3331   s_mp_clamp(mp);
3332   return (borrow && !used) ? MP_RANGE : MP_OKAY;
3333 #endif
3334 } /* end s_mp_sub_d() */
3335 
3336 /* }}} */
3337 
3338 /* {{{ s_mp_mul_d(a, d) */
3339 
3340 /* Compute a = a * d, single digit multiplication                         */
3341 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3342 {
3343   mp_err  res;
3344   mp_size used;
3345   int     pow;
3346 
3347   if (!d) {
3348     mp_zero(a);
3349     return MP_OKAY;
3350   }
3351   if (d == 1)
3352     return MP_OKAY;
3353   if (0 <= (pow = s_mp_ispow2d(d))) {
3354     return s_mp_mul_2d(a, (mp_digit)pow);
3355   }
3356 
3357   used = MP_USED(a);
3358   MP_CHECKOK( s_mp_pad(a, used + 1) );
3359 
3360   s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3361 
3362   s_mp_clamp(a);
3363 
3364 CLEANUP:
3365   return res;
3366 
3367 } /* end s_mp_mul_d() */
3368 
3369 /* }}} */
3370 
3371 /* {{{ s_mp_div_d(mp, d, r) */
3372 
3373 /*
3374   s_mp_div_d(mp, d, r)
3375 
3376   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3377   single digit d.  If r is null, the remainder will be discarded.
3378  */
3379 
3380 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3381 {
3382 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3383   mp_word   w = 0, q;
3384 #else
3385   mp_digit  w, q;
3386 #endif
3387   int       ix;
3388   mp_err    res;
3389   mp_int    quot;
3390   mp_int    rem;
3391 
3392   if(d == 0)
3393     return MP_RANGE;
3394   if (d == 1) {
3395     if (r)
3396       *r = 0;
3397     return MP_OKAY;
3398   }
3399   /* could check for power of 2 here, but mp_div_d does that. */
3400   if (MP_USED(mp) == 1) {
3401     mp_digit n   = MP_DIGIT(mp,0);
3402     mp_digit rem;
3403 
3404     q   = n / d;
3405     rem = n % d;
3406     MP_DIGIT(mp,0) = q;
3407     if (r)
3408       *r = rem;
3409     return MP_OKAY;
3410   }
3411 
3412   MP_DIGITS(&rem)  = 0;
3413   MP_DIGITS(&quot) = 0;
3414   /* Make room for the quotient */
3415   MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3416 
3417 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3418   for(ix = USED(mp) - 1; ix >= 0; ix--) {
3419     w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3420 
3421     if(w >= d) {
3422       q = w / d;
3423       w = w % d;
3424     } else {
3425       q = 0;
3426     }
3427 
3428     s_mp_lshd(&quot, 1);
3429     DIGIT(&quot, 0) = (mp_digit)q;
3430   }
3431 #else
3432   {
3433     mp_digit p;
3434 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3435     mp_digit norm;
3436 #endif
3437 
3438     MP_CHECKOK( mp_init_copy(&rem, mp) );
3439 
3440 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3441     MP_DIGIT(&quot, 0) = d;
3442     MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3443     if (norm)
3444       d <<= norm;
3445     MP_DIGIT(&quot, 0) = 0;
3446 #endif
3447 
3448     p = 0;
3449     for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3450       w = DIGIT(&rem, ix);
3451 
3452       if (p) {
3453         MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3454       } else if (w >= d) {
3455 	q = w / d;
3456 	w = w % d;
3457       } else {
3458 	q = 0;
3459       }
3460 
3461       MP_CHECKOK( s_mp_lshd(&quot, 1) );
3462       DIGIT(&quot, 0) = q;
3463       p = w;
3464     }
3465 #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3466     if (norm)
3467       w >>= norm;
3468 #endif
3469   }
3470 #endif
3471 
3472   /* Deliver the remainder, if desired */
3473   if(r)
3474     *r = (mp_digit)w;
3475 
3476   s_mp_clamp(&quot);
3477   mp_exch(&quot, mp);
3478 CLEANUP:
3479   mp_clear(&quot);
3480   mp_clear(&rem);
3481 
3482   return res;
3483 } /* end s_mp_div_d() */
3484 
3485 /* }}} */
3486 
3487 
3488 /* }}} */
3489 
3490 /* {{{ Primitive full arithmetic */
3491 
3492 /* {{{ s_mp_add(a, b) */
3493 
3494 /* Compute a = |a| + |b|                                                  */
3495 mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3496 {
3497 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3498   mp_word   w = 0;
3499 #else
3500   mp_digit  d, sum, carry = 0;
3501 #endif
3502   mp_digit *pa, *pb;
3503   mp_size   ix;
3504   mp_size   used;
3505   mp_err    res;
3506 
3507   /* Make sure a has enough precision for the output value */
3508   if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3509     return res;
3510 
3511   /*
3512     Add up all digits up to the precision of b.  If b had initially
3513     the same precision as a, or greater, we took care of it by the
3514     padding step above, so there is no problem.  If b had initially
3515     less precision, we'll have to make sure the carry out is duly
3516     propagated upward among the higher-order digits of the sum.
3517    */
3518   pa = MP_DIGITS(a);
3519   pb = MP_DIGITS(b);
3520   used = MP_USED(b);
3521   for(ix = 0; ix < used; ix++) {
3522 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3523     w = w + *pa + *pb++;
3524     *pa++ = ACCUM(w);
3525     w = CARRYOUT(w);
3526 #else
3527     d = *pa;
3528     sum = d + *pb++;
3529     d = (sum < d);			/* detect overflow */
3530     *pa++ = sum += carry;
3531     carry = d + (sum < carry);		/* detect overflow */
3532 #endif
3533   }
3534 
3535   /* If we run out of 'b' digits before we're actually done, make
3536      sure the carries get propagated upward...
3537    */
3538   used = MP_USED(a);
3539 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3540   while (w && ix < used) {
3541     w = w + *pa;
3542     *pa++ = ACCUM(w);
3543     w = CARRYOUT(w);
3544     ++ix;
3545   }
3546 #else
3547   while (carry && ix < used) {
3548     sum = carry + *pa;
3549     *pa++ = sum;
3550     carry = !sum;
3551     ++ix;
3552   }
3553 #endif
3554 
3555   /* If there's an overall carry out, increase precision and include
3556      it.  We could have done this initially, but why touch the memory
3557      allocator unless we're sure we have to?
3558    */
3559 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3560   if (w) {
3561     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3562       return res;
3563 
3564     DIGIT(a, ix) = (mp_digit)w;
3565   }
3566 #else
3567   if (carry) {
3568     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3569       return res;
3570 
3571     DIGIT(a, used) = carry;
3572   }
3573 #endif
3574 
3575   return MP_OKAY;
3576 } /* end s_mp_add() */
3577 
3578 /* }}} */
3579 
3580 /* Compute c = |a| + |b|         */ /* magnitude addition      */
3581 mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3582 {
3583   mp_digit *pa, *pb, *pc;
3584 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3585   mp_word   w = 0;
3586 #else
3587   mp_digit  sum, carry = 0, d;
3588 #endif
3589   mp_size   ix;
3590   mp_size   used;
3591   mp_err    res;
3592 
3593   MP_SIGN(c) = MP_SIGN(a);
3594   if (MP_USED(a) < MP_USED(b)) {
3595     const mp_int *xch = a;
3596     a = b;
3597     b = xch;
3598   }
3599 
3600   /* Make sure a has enough precision for the output value */
3601   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3602     return res;
3603 
3604   /*
3605     Add up all digits up to the precision of b.  If b had initially
3606     the same precision as a, or greater, we took care of it by the
3607     exchange step above, so there is no problem.  If b had initially
3608     less precision, we'll have to make sure the carry out is duly
3609     propagated upward among the higher-order digits of the sum.
3610    */
3611   pa = MP_DIGITS(a);
3612   pb = MP_DIGITS(b);
3613   pc = MP_DIGITS(c);
3614   used = MP_USED(b);
3615   for (ix = 0; ix < used; ix++) {
3616 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3617     w = w + *pa++ + *pb++;
3618     *pc++ = ACCUM(w);
3619     w = CARRYOUT(w);
3620 #else
3621     d = *pa++;
3622     sum = d + *pb++;
3623     d = (sum < d);			/* detect overflow */
3624     *pc++ = sum += carry;
3625     carry = d + (sum < carry);		/* detect overflow */
3626 #endif
3627   }
3628 
3629   /* If we run out of 'b' digits before we're actually done, make
3630      sure the carries get propagated upward...
3631    */
3632   for (used = MP_USED(a); ix < used; ++ix) {
3633 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3634     w = w + *pa++;
3635     *pc++ = ACCUM(w);
3636     w = CARRYOUT(w);
3637 #else
3638     *pc++ = sum = carry + *pa++;
3639     carry = (sum < carry);
3640 #endif
3641   }
3642 
3643   /* If there's an overall carry out, increase precision and include
3644      it.  We could have done this initially, but why touch the memory
3645      allocator unless we're sure we have to?
3646    */
3647 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3648   if (w) {
3649     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3650       return res;
3651 
3652     DIGIT(c, used) = (mp_digit)w;
3653     ++used;
3654   }
3655 #else
3656   if (carry) {
3657     if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3658       return res;
3659 
3660     DIGIT(c, used) = carry;
3661     ++used;
3662   }
3663 #endif
3664   MP_USED(c) = used;
3665   return MP_OKAY;
3666 }
3667 /* {{{ s_mp_add_offset(a, b, offset) */
3668 
3669 /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3670 mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3671 {
3672 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3673   mp_word   w, k = 0;
3674 #else
3675   mp_digit  d, sum, carry = 0;
3676 #endif
3677   mp_size   ib;
3678   mp_size   ia;
3679   mp_size   lim;
3680   mp_err    res;
3681 
3682   /* Make sure a has enough precision for the output value */
3683   lim = MP_USED(b) + offset;
3684   if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3685     return res;
3686 
3687   /*
3688     Add up all digits up to the precision of b.  If b had initially
3689     the same precision as a, or greater, we took care of it by the
3690     padding step above, so there is no problem.  If b had initially
3691     less precision, we'll have to make sure the carry out is duly
3692     propagated upward among the higher-order digits of the sum.
3693    */
3694   lim = USED(b);
3695   for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3696 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3697     w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3698     DIGIT(a, ia) = ACCUM(w);
3699     k = CARRYOUT(w);
3700 #else
3701     d = MP_DIGIT(a, ia);
3702     sum = d + MP_DIGIT(b, ib);
3703     d = (sum < d);
3704     MP_DIGIT(a,ia) = sum += carry;
3705     carry = d + (sum < carry);
3706 #endif
3707   }
3708 
3709   /* If we run out of 'b' digits before we're actually done, make
3710      sure the carries get propagated upward...
3711    */
3712 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3713   for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3714     w = (mp_word)DIGIT(a, ia) + k;
3715     DIGIT(a, ia) = ACCUM(w);
3716     k = CARRYOUT(w);
3717   }
3718 #else
3719   for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3720     d = MP_DIGIT(a, ia);
3721     MP_DIGIT(a,ia) = sum = d + carry;
3722     carry = (sum < d);
3723   }
3724 #endif
3725 
3726   /* If there's an overall carry out, increase precision and include
3727      it.  We could have done this initially, but why touch the memory
3728      allocator unless we're sure we have to?
3729    */
3730 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3731   if(k) {
3732     if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3733       return res;
3734 
3735     DIGIT(a, ia) = (mp_digit)k;
3736   }
3737 #else
3738   if (carry) {
3739     if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3740       return res;
3741 
3742     DIGIT(a, lim) = carry;
3743   }
3744 #endif
3745   s_mp_clamp(a);
3746 
3747   return MP_OKAY;
3748 
3749 } /* end s_mp_add_offset() */
3750 
3751 /* }}} */
3752 
3753 /* {{{ s_mp_sub(a, b) */
3754 
3755 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3756 mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3757 {
3758   mp_digit *pa, *pb, *limit;
3759 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3760   mp_sword  w = 0;
3761 #else
3762   mp_digit  d, diff, borrow = 0;
3763 #endif
3764 
3765   /*
3766     Subtract and propagate borrow.  Up to the precision of b, this
3767     accounts for the digits of b; after that, we just make sure the
3768     carries get to the right place.  This saves having to pad b out to
3769     the precision of a just to make the loops work right...
3770    */
3771   pa = MP_DIGITS(a);
3772   pb = MP_DIGITS(b);
3773   limit = pb + MP_USED(b);
3774   while (pb < limit) {
3775 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3776     w = w + *pa - *pb++;
3777     *pa++ = ACCUM(w);
3778     w >>= MP_DIGIT_BIT;
3779 #else
3780     d = *pa;
3781     diff = d - *pb++;
3782     d = (diff > d);				/* detect borrow */
3783     if (borrow && --diff == MP_DIGIT_MAX)
3784       ++d;
3785     *pa++ = diff;
3786     borrow = d;
3787 #endif
3788   }
3789   limit = MP_DIGITS(a) + MP_USED(a);
3790 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3791   while (w && pa < limit) {
3792     w = w + *pa;
3793     *pa++ = ACCUM(w);
3794     w >>= MP_DIGIT_BIT;
3795   }
3796 #else
3797   while (borrow && pa < limit) {
3798     d = *pa;
3799     *pa++ = diff = d - borrow;
3800     borrow = (diff > d);
3801   }
3802 #endif
3803 
3804   /* Clobber any leading zeroes we created    */
3805   s_mp_clamp(a);
3806 
3807   /*
3808      If there was a borrow out, then |b| > |a| in violation
3809      of our input invariant.  We've already done the work,
3810      but we'll at least complain about it...
3811    */
3812 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3813   return w ? MP_RANGE : MP_OKAY;
3814 #else
3815   return borrow ? MP_RANGE : MP_OKAY;
3816 #endif
3817 } /* end s_mp_sub() */
3818 
3819 /* }}} */
3820 
3821 /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3822 mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3823 {
3824   mp_digit *pa, *pb, *pc;
3825 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3826   mp_sword  w = 0;
3827 #else
3828   mp_digit  d, diff, borrow = 0;
3829 #endif
3830   int       ix, limit;
3831   mp_err    res;
3832 
3833   MP_SIGN(c) = MP_SIGN(a);
3834 
3835   /* Make sure a has enough precision for the output value */
3836   if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3837     return res;
3838 
3839   /*
3840     Subtract and propagate borrow.  Up to the precision of b, this
3841     accounts for the digits of b; after that, we just make sure the
3842     carries get to the right place.  This saves having to pad b out to
3843     the precision of a just to make the loops work right...
3844    */
3845   pa = MP_DIGITS(a);
3846   pb = MP_DIGITS(b);
3847   pc = MP_DIGITS(c);
3848   limit = MP_USED(b);
3849   for (ix = 0; ix < limit; ++ix) {
3850 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3851     w = w + *pa++ - *pb++;
3852     *pc++ = ACCUM(w);
3853     w >>= MP_DIGIT_BIT;
3854 #else
3855     d = *pa++;
3856     diff = d - *pb++;
3857     d = (diff > d);
3858     if (borrow && --diff == MP_DIGIT_MAX)
3859       ++d;
3860     *pc++ = diff;
3861     borrow = d;
3862 #endif
3863   }
3864   for (limit = MP_USED(a); ix < limit; ++ix) {
3865 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3866     w = w + *pa++;
3867     *pc++ = ACCUM(w);
3868     w >>= MP_DIGIT_BIT;
3869 #else
3870     d = *pa++;
3871     *pc++ = diff = d - borrow;
3872     borrow = (diff > d);
3873 #endif
3874   }
3875 
3876   /* Clobber any leading zeroes we created    */
3877   MP_USED(c) = ix;
3878   s_mp_clamp(c);
3879 
3880   /*
3881      If there was a borrow out, then |b| > |a| in violation
3882      of our input invariant.  We've already done the work,
3883      but we'll at least complain about it...
3884    */
3885 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3886   return w ? MP_RANGE : MP_OKAY;
3887 #else
3888   return borrow ? MP_RANGE : MP_OKAY;
3889 #endif
3890 }
3891 /* {{{ s_mp_mul(a, b) */
3892 
3893 /* Compute a = |a| * |b|                                                  */
3894 mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3895 {
3896   return mp_mul(a, b, a);
3897 } /* end s_mp_mul() */
3898 
3899 /* }}} */
3900 
3901 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3902 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3903 #define MP_MUL_DxD(a, b, Phi, Plo) \
3904   { unsigned long long product = (unsigned long long)a * b; \
3905     Plo = (mp_digit)product; \
3906     Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3907 #elif defined(OSF1)
3908 #define MP_MUL_DxD(a, b, Phi, Plo) \
3909   { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3910     Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3911 #else
3912 #define MP_MUL_DxD(a, b, Phi, Plo) \
3913   { mp_digit a0b1, a1b0; \
3914     Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3915     Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3916     a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3917     a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3918     a1b0 += a0b1; \
3919     Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3920     if (a1b0 < a0b1)  \
3921       Phi += MP_HALF_RADIX; \
3922     a1b0 <<= MP_HALF_DIGIT_BIT; \
3923     Plo += a1b0; \
3924     if (Plo < a1b0) \
3925       ++Phi; \
3926   }
3927 #endif
3928 
3929 #if !defined(MP_ASSEMBLY_MULTIPLY)
3930 /* c = a * b */
3931 void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3932 {
3933 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3934   mp_digit   d = 0;
3935 
3936   /* Inner product:  Digits of a */
3937   while (a_len--) {
3938     mp_word w = ((mp_word)b * *a++) + d;
3939     *c++ = ACCUM(w);
3940     d = CARRYOUT(w);
3941   }
3942   *c = d;
3943 #else
3944   mp_digit carry = 0;
3945   while (a_len--) {
3946     mp_digit a_i = *a++;
3947     mp_digit a0b0, a1b1;
3948 
3949     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3950 
3951     a0b0 += carry;
3952     if (a0b0 < carry)
3953       ++a1b1;
3954     *c++ = a0b0;
3955     carry = a1b1;
3956   }
3957   *c = carry;
3958 #endif
3959 }
3960 
3961 /* c += a * b */
3962 void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3963 			      mp_digit *c)
3964 {
3965 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3966   mp_digit   d = 0;
3967 
3968   /* Inner product:  Digits of a */
3969   while (a_len--) {
3970     mp_word w = ((mp_word)b * *a++) + *c + d;
3971     *c++ = ACCUM(w);
3972     d = CARRYOUT(w);
3973   }
3974   *c = d;
3975 #else
3976   mp_digit carry = 0;
3977   while (a_len--) {
3978     mp_digit a_i = *a++;
3979     mp_digit a0b0, a1b1;
3980 
3981     MP_MUL_DxD(a_i, b, a1b1, a0b0);
3982 
3983     a0b0 += carry;
3984     if (a0b0 < carry)
3985       ++a1b1;
3986     a0b0 += a_i = *c;
3987     if (a0b0 < a_i)
3988       ++a1b1;
3989     *c++ = a0b0;
3990     carry = a1b1;
3991   }
3992   *c = carry;
3993 #endif
3994 }
3995 
3996 /* Presently, this is only used by the Montgomery arithmetic code. */
3997 /* c += a * b */
3998 void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3999 {
4000 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4001   mp_digit   d = 0;
4002 
4003   /* Inner product:  Digits of a */
4004   while (a_len--) {
4005     mp_word w = ((mp_word)b * *a++) + *c + d;
4006     *c++ = ACCUM(w);
4007     d = CARRYOUT(w);
4008   }
4009 
4010   while (d) {
4011     mp_word w = (mp_word)*c + d;
4012     *c++ = ACCUM(w);
4013     d = CARRYOUT(w);
4014   }
4015 #else
4016   mp_digit carry = 0;
4017   while (a_len--) {
4018     mp_digit a_i = *a++;
4019     mp_digit a0b0, a1b1;
4020 
4021     MP_MUL_DxD(a_i, b, a1b1, a0b0);
4022 
4023     a0b0 += carry;
4024     if (a0b0 < carry)
4025       ++a1b1;
4026 
4027     a0b0 += a_i = *c;
4028     if (a0b0 < a_i)
4029       ++a1b1;
4030 
4031     *c++ = a0b0;
4032     carry = a1b1;
4033   }
4034   while (carry) {
4035     mp_digit c_i = *c;
4036     carry += c_i;
4037     *c++ = carry;
4038     carry = carry < c_i;
4039   }
4040 #endif
4041 }
4042 #endif
4043 
4044 #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4045 /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4046 #define MP_SQR_D(a, Phi, Plo) \
4047   { unsigned long long square = (unsigned long long)a * a; \
4048     Plo = (mp_digit)square; \
4049     Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4050 #elif defined(OSF1)
4051 #define MP_SQR_D(a, Phi, Plo) \
4052   { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4053     Phi = asm ("umulh %a0, %a0, %v0", a); }
4054 #else
4055 #define MP_SQR_D(a, Phi, Plo) \
4056   { mp_digit Pmid; \
4057     Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4058     Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4059     Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4060     Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4061     Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4062     Plo += Pmid;  \
4063     if (Plo < Pmid)  \
4064       ++Phi;  \
4065   }
4066 #endif
4067 
4068 #if !defined(MP_ASSEMBLY_SQUARE)
4069 /* Add the squares of the digits of a to the digits of b. */
4070 void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4071 {
4072 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4073   mp_word  w;
4074   mp_digit d;
4075   mp_size  ix;
4076 
4077   w  = 0;
4078 #define ADD_SQUARE(n) \
4079     d = pa[n]; \
4080     w += (d * (mp_word)d) + ps[2*n]; \
4081     ps[2*n] = ACCUM(w); \
4082     w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4083     ps[2*n+1] = ACCUM(w); \
4084     w = (w >> DIGIT_BIT)
4085 
4086   for (ix = a_len; ix >= 4; ix -= 4) {
4087     ADD_SQUARE(0);
4088     ADD_SQUARE(1);
4089     ADD_SQUARE(2);
4090     ADD_SQUARE(3);
4091     pa += 4;
4092     ps += 8;
4093   }
4094   if (ix) {
4095     ps += 2*ix;
4096     pa += ix;
4097     switch (ix) {
4098     case 3: ADD_SQUARE(-3); /* FALLTHRU */
4099     case 2: ADD_SQUARE(-2); /* FALLTHRU */
4100     case 1: ADD_SQUARE(-1); /* FALLTHRU */
4101     case 0: break;
4102     }
4103   }
4104   while (w) {
4105     w += *ps;
4106     *ps++ = ACCUM(w);
4107     w = (w >> DIGIT_BIT);
4108   }
4109 #else
4110   mp_digit carry = 0;
4111   while (a_len--) {
4112     mp_digit a_i = *pa++;
4113     mp_digit a0a0, a1a1;
4114 
4115     MP_SQR_D(a_i, a1a1, a0a0);
4116 
4117     /* here a1a1 and a0a0 constitute a_i ** 2 */
4118     a0a0 += carry;
4119     if (a0a0 < carry)
4120       ++a1a1;
4121 
4122     /* now add to ps */
4123     a0a0 += a_i = *ps;
4124     if (a0a0 < a_i)
4125       ++a1a1;
4126     *ps++ = a0a0;
4127     a1a1 += a_i = *ps;
4128     carry = (a1a1 < a_i);
4129     *ps++ = a1a1;
4130   }
4131   while (carry) {
4132     mp_digit s_i = *ps;
4133     carry += s_i;
4134     *ps++ = carry;
4135     carry = carry < s_i;
4136   }
4137 #endif
4138 }
4139 #endif
4140 
4141 #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4142 && !defined(MP_ASSEMBLY_DIV_2DX1D)
4143 /*
4144 ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4145 ** so its high bit is 1.   This code is from NSPR.
4146 */
4147 mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4148 		       mp_digit *qp, mp_digit *rp)
4149 {
4150     mp_digit d1, d0, q1, q0;
4151     mp_digit r1, r0, m;
4152 
4153     d1 = divisor >> MP_HALF_DIGIT_BIT;
4154     d0 = divisor & MP_HALF_DIGIT_MAX;
4155     r1 = Nhi % d1;
4156     q1 = Nhi / d1;
4157     m = q1 * d0;
4158     r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4159     if (r1 < m) {
4160         q1--, r1 += divisor;
4161         if (r1 >= divisor && r1 < m) {
4162 	    q1--, r1 += divisor;
4163 	}
4164     }
4165     r1 -= m;
4166     r0 = r1 % d1;
4167     q0 = r1 / d1;
4168     m = q0 * d0;
4169     r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4170     if (r0 < m) {
4171         q0--, r0 += divisor;
4172         if (r0 >= divisor && r0 < m) {
4173 	    q0--, r0 += divisor;
4174 	}
4175     }
4176     if (qp)
4177 	*qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4178     if (rp)
4179 	*rp = r0 - m;
4180     return MP_OKAY;
4181 }
4182 #endif
4183 
4184 #if MP_SQUARE
4185 /* {{{ s_mp_sqr(a) */
4186 
4187 mp_err   s_mp_sqr(mp_int *a)
4188 {
4189   mp_err   res;
4190   mp_int   tmp;
4191 
4192   if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4193     return res;
4194   res = mp_sqr(a, &tmp);
4195   if (res == MP_OKAY) {
4196     s_mp_exch(&tmp, a);
4197   }
4198   mp_clear(&tmp);
4199   return res;
4200 }
4201 
4202 /* }}} */
4203 #endif
4204 
4205 /* {{{ s_mp_div(a, b) */
4206 
4207 /*
4208   s_mp_div(a, b)
4209 
4210   Compute a = a / b and b = a mod b.  Assumes b > a.
4211  */
4212 
4213 mp_err   s_mp_div(mp_int *rem, 	/* i: dividend, o: remainder */
4214                   mp_int *div, 	/* i: divisor                */
4215 		  mp_int *quot)	/* i: 0;        o: quotient  */
4216 {
4217   mp_int   part, t;
4218 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4219   mp_word  q_msd;
4220 #else
4221   mp_digit q_msd;
4222 #endif
4223   mp_err   res;
4224   mp_digit d;
4225   mp_digit div_msd;
4226   int      ix;
4227 
4228   if(mp_cmp_z(div) == 0)
4229     return MP_RANGE;
4230 
4231   /* Shortcut if divisor is power of two */
4232   if((ix = s_mp_ispow2(div)) >= 0) {
4233     MP_CHECKOK( mp_copy(rem, quot) );
4234     s_mp_div_2d(quot, (mp_digit)ix);
4235     s_mp_mod_2d(rem,  (mp_digit)ix);
4236 
4237     return MP_OKAY;
4238   }
4239 
4240   DIGITS(&t) = 0;
4241   MP_SIGN(rem) = ZPOS;
4242   MP_SIGN(div) = ZPOS;
4243 
4244   /* A working temporary for division     */
4245   MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4246 
4247   /* Normalize to optimize guessing       */
4248   MP_CHECKOK( s_mp_norm(rem, div, &d) );
4249 
4250   part = *rem;
4251 
4252   /* Perform the division itself...woo!   */
4253   MP_USED(quot) = MP_ALLOC(quot);
4254 
4255   /* Find a partial substring of rem which is at least div */
4256   /* If we didn't find one, we're finished dividing    */
4257   while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4258     int i;
4259     int unusedRem;
4260 
4261     unusedRem = MP_USED(rem) - MP_USED(div);
4262     MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4263     MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4264     MP_USED(&part)   = MP_USED(div);
4265     if (s_mp_cmp(&part, div) < 0) {
4266       -- unusedRem;
4267 #if MP_ARGCHK == 2
4268       assert(unusedRem >= 0);
4269 #endif
4270       -- MP_DIGITS(&part);
4271       ++ MP_USED(&part);
4272       ++ MP_ALLOC(&part);
4273     }
4274 
4275     /* Compute a guess for the next quotient digit       */
4276     q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4277     div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4278     if (q_msd >= div_msd) {
4279       q_msd = 1;
4280     } else if (MP_USED(&part) > 1) {
4281 #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4282       q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4283       q_msd /= div_msd;
4284       if (q_msd == RADIX)
4285         --q_msd;
4286 #else
4287       mp_digit r;
4288       MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4289 				  div_msd, &q_msd, &r) );
4290 #endif
4291     } else {
4292       q_msd = 0;
4293     }
4294 #if MP_ARGCHK == 2
4295     assert(q_msd > 0); /* This case should never occur any more. */
4296 #endif
4297     if (q_msd <= 0)
4298       break;
4299 
4300     /* See what that multiplies out to                   */
4301     mp_copy(div, &t);
4302     MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4303 
4304     /*
4305        If it's too big, back it off.  We should not have to do this
4306        more than once, or, in rare cases, twice.  Knuth describes a
4307        method by which this could be reduced to a maximum of once, but
4308        I didn't implement that here.
4309      * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4310      */
4311     for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4312       --q_msd;
4313       s_mp_sub(&t, div);	/* t -= div */
4314     }
4315     if (i < 0) {
4316       res = MP_RANGE;
4317       goto CLEANUP;
4318     }
4319 
4320     /* At this point, q_msd should be the right next digit   */
4321     MP_CHECKOK( s_mp_sub(&part, &t) );	/* part -= t */
4322     s_mp_clamp(rem);
4323 
4324     /*
4325       Include the digit in the quotient.  We allocated enough memory
4326       for any quotient we could ever possibly get, so we should not
4327       have to check for failures here
4328      */
4329     MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4330   }
4331 
4332   /* Denormalize remainder                */
4333   if (d) {
4334     s_mp_div_2d(rem, d);
4335   }
4336 
4337   s_mp_clamp(quot);
4338 
4339 CLEANUP:
4340   mp_clear(&t);
4341 
4342   return res;
4343 
4344 } /* end s_mp_div() */
4345 
4346 
4347 /* }}} */
4348 
4349 /* {{{ s_mp_2expt(a, k) */
4350 
4351 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4352 {
4353   mp_err    res;
4354   mp_size   dig, bit;
4355 
4356   dig = k / DIGIT_BIT;
4357   bit = k % DIGIT_BIT;
4358 
4359   mp_zero(a);
4360   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4361     return res;
4362 
4363   DIGIT(a, dig) |= ((mp_digit)1 << bit);
4364 
4365   return MP_OKAY;
4366 
4367 } /* end s_mp_2expt() */
4368 
4369 /* }}} */
4370 
4371 /* {{{ s_mp_reduce(x, m, mu) */
4372 
4373 /*
4374   Compute Barrett reduction, x (mod m), given a precomputed value for
4375   mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4376   faster than straight division, when many reductions by the same
4377   value of m are required (such as in modular exponentiation).  This
4378   can nearly halve the time required to do modular exponentiation,
4379   as compared to using the full integer divide to reduce.
4380 
4381   This algorithm was derived from the _Handbook of Applied
4382   Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4383   pp. 603-604.
4384  */
4385 
4386 mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4387 {
4388   mp_int   q;
4389   mp_err   res;
4390 
4391   if((res = mp_init_copy(&q, x)) != MP_OKAY)
4392     return res;
4393 
4394   s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4395   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4396   s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4397 
4398   /* x = x mod b^(k+1), quick (no division) */
4399   s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4400 
4401   /* q = q * m mod b^(k+1), quick (no division) */
4402   s_mp_mul(&q, m);
4403   s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4404 
4405   /* x = x - q */
4406   if((res = mp_sub(x, &q, x)) != MP_OKAY)
4407     goto CLEANUP;
4408 
4409   /* If x < 0, add b^(k+1) to it */
4410   if(mp_cmp_z(x) < 0) {
4411     mp_set(&q, 1);
4412     if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4413       goto CLEANUP;
4414     if((res = mp_add(x, &q, x)) != MP_OKAY)
4415       goto CLEANUP;
4416   }
4417 
4418   /* Back off if it's too big */
4419   while(mp_cmp(x, m) >= 0) {
4420     if((res = s_mp_sub(x, m)) != MP_OKAY)
4421       break;
4422   }
4423 
4424  CLEANUP:
4425   mp_clear(&q);
4426 
4427   return res;
4428 
4429 } /* end s_mp_reduce() */
4430 
4431 /* }}} */
4432 
4433 /* }}} */
4434 
4435 /* {{{ Primitive comparisons */
4436 
4437 /* {{{ s_mp_cmp(a, b) */
4438 
4439 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4440 int      s_mp_cmp(const mp_int *a, const mp_int *b)
4441 {
4442   mp_size used_a = MP_USED(a);
4443   {
4444     mp_size used_b = MP_USED(b);
4445 
4446     if (used_a > used_b)
4447       goto IS_GT;
4448     if (used_a < used_b)
4449       goto IS_LT;
4450   }
4451   {
4452     mp_digit *pa, *pb;
4453     mp_digit da = 0, db = 0;
4454 
4455 #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4456 
4457     pa = MP_DIGITS(a) + used_a;
4458     pb = MP_DIGITS(b) + used_a;
4459     while (used_a >= 4) {
4460       pa     -= 4;
4461       pb     -= 4;
4462       used_a -= 4;
4463       CMP_AB(3);
4464       CMP_AB(2);
4465       CMP_AB(1);
4466       CMP_AB(0);
4467     }
4468     while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4469       /* do nothing */;
4470 done:
4471     if (da > db)
4472       goto IS_GT;
4473     if (da < db)
4474       goto IS_LT;
4475   }
4476   return MP_EQ;
4477 IS_LT:
4478   return MP_LT;
4479 IS_GT:
4480   return MP_GT;
4481 } /* end s_mp_cmp() */
4482 
4483 /* }}} */
4484 
4485 /* {{{ s_mp_cmp_d(a, d) */
4486 
4487 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4488 int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4489 {
4490   if(USED(a) > 1)
4491     return MP_GT;
4492 
4493   if(DIGIT(a, 0) < d)
4494     return MP_LT;
4495   else if(DIGIT(a, 0) > d)
4496     return MP_GT;
4497   else
4498     return MP_EQ;
4499 
4500 } /* end s_mp_cmp_d() */
4501 
4502 /* }}} */
4503 
4504 /* {{{ s_mp_ispow2(v) */
4505 
4506 /*
4507   Returns -1 if the value is not a power of two; otherwise, it returns
4508   k such that v = 2^k, i.e. lg(v).
4509  */
4510 int      s_mp_ispow2(const mp_int *v)
4511 {
4512   mp_digit d;
4513   int      extra = 0, ix;
4514 
4515   ix = MP_USED(v) - 1;
4516   d = MP_DIGIT(v, ix); /* most significant digit of v */
4517 
4518   extra = s_mp_ispow2d(d);
4519   if (extra < 0 || ix == 0)
4520     return extra;
4521 
4522   while (--ix >= 0) {
4523     if (DIGIT(v, ix) != 0)
4524       return -1; /* not a power of two */
4525     extra += MP_DIGIT_BIT;
4526   }
4527 
4528   return extra;
4529 
4530 } /* end s_mp_ispow2() */
4531 
4532 /* }}} */
4533 
4534 /* {{{ s_mp_ispow2d(d) */
4535 
4536 int      s_mp_ispow2d(mp_digit d)
4537 {
4538   if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4539     int pow = 0;
4540 #if defined (MP_USE_UINT_DIGIT)
4541     if (d & 0xffff0000U)
4542       pow += 16;
4543     if (d & 0xff00ff00U)
4544       pow += 8;
4545     if (d & 0xf0f0f0f0U)
4546       pow += 4;
4547     if (d & 0xccccccccU)
4548       pow += 2;
4549     if (d & 0xaaaaaaaaU)
4550       pow += 1;
4551 #elif defined(MP_USE_LONG_LONG_DIGIT)
4552     if (d & 0xffffffff00000000ULL)
4553       pow += 32;
4554     if (d & 0xffff0000ffff0000ULL)
4555       pow += 16;
4556     if (d & 0xff00ff00ff00ff00ULL)
4557       pow += 8;
4558     if (d & 0xf0f0f0f0f0f0f0f0ULL)
4559       pow += 4;
4560     if (d & 0xccccccccccccccccULL)
4561       pow += 2;
4562     if (d & 0xaaaaaaaaaaaaaaaaULL)
4563       pow += 1;
4564 #elif defined(MP_USE_LONG_DIGIT)
4565     if (d & 0xffffffff00000000UL)
4566       pow += 32;
4567     if (d & 0xffff0000ffff0000UL)
4568       pow += 16;
4569     if (d & 0xff00ff00ff00ff00UL)
4570       pow += 8;
4571     if (d & 0xf0f0f0f0f0f0f0f0UL)
4572       pow += 4;
4573     if (d & 0xccccccccccccccccUL)
4574       pow += 2;
4575     if (d & 0xaaaaaaaaaaaaaaaaUL)
4576       pow += 1;
4577 #else
4578 #error "unknown type for mp_digit"
4579 #endif
4580     return pow;
4581   }
4582   return -1;
4583 
4584 } /* end s_mp_ispow2d() */
4585 
4586 /* }}} */
4587 
4588 /* }}} */
4589 
4590 /* {{{ Primitive I/O helpers */
4591 
4592 /* {{{ s_mp_tovalue(ch, r) */
4593 
4594 /*
4595   Convert the given character to its digit value, in the given radix.
4596   If the given character is not understood in the given radix, -1 is
4597   returned.  Otherwise the digit's numeric value is returned.
4598 
4599   The results will be odd if you use a radix < 2 or > 62, you are
4600   expected to know what you're up to.
4601  */
4602 int      s_mp_tovalue(char ch, int r)
4603 {
4604   int    val, xch;
4605 
4606   if(r > 36)
4607     xch = ch;
4608   else
4609     xch = toupper(ch);
4610 
4611   if(isdigit(xch))
4612     val = xch - '0';
4613   else if(isupper(xch))
4614     val = xch - 'A' + 10;
4615   else if(islower(xch))
4616     val = xch - 'a' + 36;
4617   else if(xch == '+')
4618     val = 62;
4619   else if(xch == '/')
4620     val = 63;
4621   else
4622     return -1;
4623 
4624   if(val < 0 || val >= r)
4625     return -1;
4626 
4627   return val;
4628 
4629 } /* end s_mp_tovalue() */
4630 
4631 /* }}} */
4632 
4633 /* {{{ s_mp_todigit(val, r, low) */
4634 
4635 /*
4636   Convert val to a radix-r digit, if possible.  If val is out of range
4637   for r, returns zero.  Otherwise, returns an ASCII character denoting
4638   the value in the given radix.
4639 
4640   The results may be odd if you use a radix < 2 or > 64, you are
4641   expected to know what you're doing.
4642  */
4643 
4644 char     s_mp_todigit(mp_digit val, int r, int low)
4645 {
4646   char   ch;
4647 
4648   if(val >= r)
4649     return 0;
4650 
4651   ch = s_dmap_1[val];
4652 
4653   if(r <= 36 && low)
4654     ch = tolower(ch);
4655 
4656   return ch;
4657 
4658 } /* end s_mp_todigit() */
4659 
4660 /* }}} */
4661 
4662 /* {{{ s_mp_outlen(bits, radix) */
4663 
4664 /*
4665    Return an estimate for how long a string is needed to hold a radix
4666    r representation of a number with 'bits' significant bits, plus an
4667    extra for a zero terminator (assuming C style strings here)
4668  */
4669 int      s_mp_outlen(int bits, int r)
4670 {
4671   return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4672 
4673 } /* end s_mp_outlen() */
4674 
4675 /* }}} */
4676 
4677 /* }}} */
4678 
4679 /* {{{ mp_read_unsigned_octets(mp, str, len) */
4680 /* mp_read_unsigned_octets(mp, str, len)
4681    Read in a raw value (base 256) into the given mp_int
4682    No sign bit, number is positive.  Leading zeros ignored.
4683  */
4684 
4685 mp_err
4686 mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4687 {
4688   int            count;
4689   mp_err         res;
4690   mp_digit       d;
4691 
4692   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4693 
4694   mp_zero(mp);
4695 
4696   count = len % sizeof(mp_digit);
4697   if (count) {
4698     for (d = 0; count-- > 0; --len) {
4699       d = (d << 8) | *str++;
4700     }
4701     MP_DIGIT(mp, 0) = d;
4702   }
4703 
4704   /* Read the rest of the digits */
4705   for(; len > 0; len -= sizeof(mp_digit)) {
4706     for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4707       d = (d << 8) | *str++;
4708     }
4709     if (MP_EQ == mp_cmp_z(mp)) {
4710       if (!d)
4711 	continue;
4712     } else {
4713       if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4714 	return res;
4715     }
4716     MP_DIGIT(mp, 0) = d;
4717   }
4718   return MP_OKAY;
4719 } /* end mp_read_unsigned_octets() */
4720 /* }}} */
4721 
4722 /* {{{ mp_unsigned_octet_size(mp) */
4723 int
4724 mp_unsigned_octet_size(const mp_int *mp)
4725 {
4726   int  bytes;
4727   int  ix;
4728   mp_digit  d = 0;
4729 
4730   ARGCHK(mp != NULL, MP_BADARG);
4731   ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4732 
4733   bytes = (USED(mp) * sizeof(mp_digit));
4734 
4735   /* subtract leading zeros. */
4736   /* Iterate over each digit... */
4737   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4738     d = DIGIT(mp, ix);
4739     if (d)
4740 	break;
4741     bytes -= sizeof(d);
4742   }
4743   if (!bytes)
4744     return 1;
4745 
4746   /* Have MSD, check digit bytes, high order first */
4747   for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4748     unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4749     if (x)
4750 	break;
4751     --bytes;
4752   }
4753   return bytes;
4754 } /* end mp_unsigned_octet_size() */
4755 /* }}} */
4756 
4757 /* {{{ mp_to_unsigned_octets(mp, str) */
4758 /* output a buffer of big endian octets no longer than specified. */
4759 mp_err
4760 mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4761 {
4762   int  ix, pos = 0;
4763   int  bytes;
4764 
4765   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4766 
4767   bytes = mp_unsigned_octet_size(mp);
4768   ARGCHK(bytes <= maxlen, MP_BADARG);
4769 
4770   /* Iterate over each digit... */
4771   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4772     mp_digit  d = DIGIT(mp, ix);
4773     int       jx;
4774 
4775     /* Unpack digit bytes, high order first */
4776     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4777       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4778       if (!pos && !x)	/* suppress leading zeros */
4779 	continue;
4780       str[pos++] = x;
4781     }
4782   }
4783   if (!pos)
4784     str[pos++] = 0;
4785   return pos;
4786 } /* end mp_to_unsigned_octets() */
4787 /* }}} */
4788 
4789 /* {{{ mp_to_signed_octets(mp, str) */
4790 /* output a buffer of big endian octets no longer than specified. */
4791 mp_err
4792 mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4793 {
4794   int  ix, pos = 0;
4795   int  bytes;
4796 
4797   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4798 
4799   bytes = mp_unsigned_octet_size(mp);
4800   ARGCHK(bytes <= maxlen, MP_BADARG);
4801 
4802   /* Iterate over each digit... */
4803   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4804     mp_digit  d = DIGIT(mp, ix);
4805     int       jx;
4806 
4807     /* Unpack digit bytes, high order first */
4808     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4809       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4810       if (!pos) {
4811 	if (!x)		/* suppress leading zeros */
4812 	  continue;
4813 	if (x & 0x80) { /* add one leading zero to make output positive.  */
4814 	  ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4815 	  if (bytes + 1 > maxlen)
4816 	    return MP_BADARG;
4817 	  str[pos++] = 0;
4818 	}
4819       }
4820       str[pos++] = x;
4821     }
4822   }
4823   if (!pos)
4824     str[pos++] = 0;
4825   return pos;
4826 } /* end mp_to_signed_octets() */
4827 /* }}} */
4828 
4829 /* {{{ mp_to_fixlen_octets(mp, str) */
4830 /* output a buffer of big endian octets exactly as long as requested. */
4831 mp_err
4832 mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4833 {
4834   int  ix, pos = 0;
4835   int  bytes;
4836 
4837   ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4838 
4839   bytes = mp_unsigned_octet_size(mp);
4840   ARGCHK(bytes <= length, MP_BADARG);
4841 
4842   /* place any needed leading zeros */
4843   for (;length > bytes; --length) {
4844 	*str++ = 0;
4845   }
4846 
4847   /* Iterate over each digit... */
4848   for(ix = USED(mp) - 1; ix >= 0; ix--) {
4849     mp_digit  d = DIGIT(mp, ix);
4850     int       jx;
4851 
4852     /* Unpack digit bytes, high order first */
4853     for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4854       unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4855       if (!pos && !x)	/* suppress leading zeros */
4856 	continue;
4857       str[pos++] = x;
4858     }
4859   }
4860   if (!pos)
4861     str[pos++] = 0;
4862   return MP_OKAY;
4863 } /* end mp_to_fixlen_octets() */
4864 /* }}} */
4865 
4866 
4867 /*------------------------------------------------------------------------*/
4868 /* HERE THERE BE DRAGONS                                                  */
4869 /* END CSTYLED */
4870