xref: /illumos-gate/usr/src/common/mpi/mpi.c (revision e9610e3e86cbfb5fd6797a438c65b493250b4219)
1  /* BEGIN CSTYLED */
2  /*
3   *  mpi.c
4   *
5   *  Arbitrary precision integer arithmetic library
6   *
7   * ***** BEGIN LICENSE BLOCK *****
8   * Version: MPL 1.1/GPL 2.0/LGPL 2.1
9   *
10   * The contents of this file are subject to the Mozilla Public License Version
11   * 1.1 (the "License"); you may not use this file except in compliance with
12   * the License. You may obtain a copy of the License at
13   * http://www.mozilla.org/MPL/
14   *
15   * Software distributed under the License is distributed on an "AS IS" basis,
16   * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
17   * for the specific language governing rights and limitations under the
18   * License.
19   *
20   * The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
21   *
22   * The Initial Developer of the Original Code is
23   * Michael J. Fromberger.
24   * Portions created by the Initial Developer are Copyright (C) 1998
25   * the Initial Developer. All Rights Reserved.
26   *
27   * Contributor(s):
28   *   Netscape Communications Corporation
29   *   Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
30   *
31   * Alternatively, the contents of this file may be used under the terms of
32   * either the GNU General Public License Version 2 or later (the "GPL"), or
33   * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34   * in which case the provisions of the GPL or the LGPL are applicable instead
35   * of those above. If you wish to allow use of your version of this file only
36   * under the terms of either the GPL or the LGPL, and not to allow others to
37   * use your version of this file under the terms of the MPL, indicate your
38   * decision by deleting the provisions above and replace them with the notice
39   * and other provisions required by the GPL or the LGPL. If you do not delete
40   * the provisions above, a recipient may use your version of this file under
41   * the terms of any one of the MPL, the GPL or the LGPL.
42   *
43   * ***** END LICENSE BLOCK ***** */
44  /*
45   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
46   *
47   * Sun elects to use this software under the MPL license.
48   */
49  
50  /* $Id: mpi.c,v 1.45 2006/09/29 20:12:21 alexei.volkov.bugs%sun.com Exp $ */
51  
52  #include "mpi-priv.h"
53  #if defined(OSF1)
54  #include <c_asm.h>
55  #endif
56  
57  #if MP_LOGTAB
58  /*
59    A table of the logs of 2 for various bases (the 0 and 1 entries of
60    this table are meaningless and should not be referenced).
61  
62    This table is used to compute output lengths for the mp_toradix()
63    function.  Since a number n in radix r takes up about log_r(n)
64    digits, we estimate the output size by taking the least integer
65    greater than log_r(n), where:
66  
67    log_r(n) = log_2(n) * log_r(2)
68  
69    This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
70    which are the output bases supported.
71   */
72  #include "logtab.h"
73  #endif
74  
75  /* {{{ Constant strings */
76  
77  /* Constant strings returned by mp_strerror() */
78  static const char *mp_err_string[] = {
79    "unknown result code",     /* say what?            */
80    "boolean true",            /* MP_OKAY, MP_YES      */
81    "boolean false",           /* MP_NO                */
82    "out of memory",           /* MP_MEM               */
83    "argument out of range",   /* MP_RANGE             */
84    "invalid input parameter", /* MP_BADARG            */
85    "result is undefined"      /* MP_UNDEF             */
86  };
87  
88  /* Value to digit maps for radix conversion   */
89  
90  /* s_dmap_1 - standard digits and letters */
91  static const char *s_dmap_1 =
92    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
93  
94  /* }}} */
95  
96  unsigned long mp_allocs;
97  unsigned long mp_frees;
98  unsigned long mp_copies;
99  
100  /* {{{ Default precision manipulation */
101  
102  /* Default precision for newly created mp_int's      */
103  static mp_size s_mp_defprec = MP_DEFPREC;
104  
105  mp_size mp_get_prec(void)
106  {
107    return s_mp_defprec;
108  
109  } /* end mp_get_prec() */
110  
111  void         mp_set_prec(mp_size prec)
112  {
113    if(prec == 0)
114      s_mp_defprec = MP_DEFPREC;
115    else
116      s_mp_defprec = prec;
117  
118  } /* end mp_set_prec() */
119  
120  /* }}} */
121  
122  /*------------------------------------------------------------------------*/
123  /* {{{ mp_init(mp, kmflag) */
124  
125  /*
126    mp_init(mp, kmflag)
127  
128    Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
129    MP_MEM if memory could not be allocated for the structure.
130   */
131  
132  mp_err mp_init(mp_int *mp, int kmflag)
133  {
134    return mp_init_size(mp, s_mp_defprec, kmflag);
135  
136  } /* end mp_init() */
137  
138  /* }}} */
139  
140  /* {{{ mp_init_size(mp, prec, kmflag) */
141  
142  /*
143    mp_init_size(mp, prec, kmflag)
144  
145    Initialize a new zero-valued mp_int with at least the given
146    precision; returns MP_OKAY if successful, or MP_MEM if memory could
147    not be allocated for the structure.
148   */
149  
150  mp_err mp_init_size(mp_int *mp, mp_size prec, int kmflag)
151  {
152    ARGCHK(mp != NULL && prec > 0, MP_BADARG);
153  
154    prec = MP_ROUNDUP(prec, s_mp_defprec);
155    if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
156      return MP_MEM;
157  
158    SIGN(mp) = ZPOS;
159    USED(mp) = 1;
160    ALLOC(mp) = prec;
161    FLAG(mp) = kmflag;
162  
163    return MP_OKAY;
164  
165  } /* end mp_init_size() */
166  
167  /* }}} */
168  
169  /* {{{ mp_init_copy(mp, from) */
170  
171  /*
172    mp_init_copy(mp, from)
173  
174    Initialize mp as an exact copy of from.  Returns MP_OKAY if
175    successful, MP_MEM if memory could not be allocated for the new
176    structure.
177   */
178  
179  mp_err mp_init_copy(mp_int *mp, const mp_int *from)
180  {
181    ARGCHK(mp != NULL && from != NULL, MP_BADARG);
182  
183    if(mp == from)
184      return MP_OKAY;
185  
186    if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
187      return MP_MEM;
188  
189    s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
190    USED(mp) = USED(from);
191    ALLOC(mp) = ALLOC(from);
192    SIGN(mp) = SIGN(from);
193    FLAG(mp) = FLAG(from);
194  
195    return MP_OKAY;
196  
197  } /* end mp_init_copy() */
198  
199  /* }}} */
200  
201  /* {{{ mp_copy(from, to) */
202  
203  /*
204    mp_copy(from, to)
205  
206    Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
207    'to' has already been initialized (if not, use mp_init_copy()
208    instead). If 'from' and 'to' are identical, nothing happens.
209   */
210  
211  mp_err mp_copy(const mp_int *from, mp_int *to)
212  {
213    ARGCHK(from != NULL && to != NULL, MP_BADARG);
214  
215    if(from == to)
216      return MP_OKAY;
217  
218    ++mp_copies;
219    { /* copy */
220      mp_digit   *tmp;
221  
222      /*
223        If the allocated buffer in 'to' already has enough space to hold
224        all the used digits of 'from', we'll re-use it to avoid hitting
225        the memory allocater more than necessary; otherwise, we'd have
226        to grow anyway, so we just allocate a hunk and make the copy as
227        usual
228       */
229      if(ALLOC(to) >= USED(from)) {
230        s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
231        s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
232  
233      } else {
234        if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
235  	return MP_MEM;
236  
237        s_mp_copy(DIGITS(from), tmp, USED(from));
238  
239        if(DIGITS(to) != NULL) {
240  #if MP_CRYPTO
241  	s_mp_setz(DIGITS(to), ALLOC(to));
242  #endif
243  	s_mp_free(DIGITS(to), ALLOC(to));
244        }
245  
246        DIGITS(to) = tmp;
247        ALLOC(to) = ALLOC(from);
248      }
249  
250      /* Copy the precision and sign from the original */
251      USED(to) = USED(from);
252      SIGN(to) = SIGN(from);
253      FLAG(to) = FLAG(from);
254    } /* end copy */
255  
256    return MP_OKAY;
257  
258  } /* end mp_copy() */
259  
260  /* }}} */
261  
262  /* {{{ mp_exch(mp1, mp2) */
263  
264  /*
265    mp_exch(mp1, mp2)
266  
267    Exchange mp1 and mp2 without allocating any intermediate memory
268    (well, unless you count the stack space needed for this call and the
269    locals it creates...).  This cannot fail.
270   */
271  
272  void mp_exch(mp_int *mp1, mp_int *mp2)
273  {
274  #if MP_ARGCHK == 2
275    assert(mp1 != NULL && mp2 != NULL);
276  #else
277    if(mp1 == NULL || mp2 == NULL)
278      return;
279  #endif
280  
281    s_mp_exch(mp1, mp2);
282  
283  } /* end mp_exch() */
284  
285  /* }}} */
286  
287  /* {{{ mp_clear(mp) */
288  
289  /*
290    mp_clear(mp)
291  
292    Release the storage used by an mp_int, and void its fields so that
293    if someone calls mp_clear() again for the same int later, we won't
294    get tollchocked.
295   */
296  
297  void   mp_clear(mp_int *mp)
298  {
299    if(mp == NULL)
300      return;
301  
302    if(DIGITS(mp) != NULL) {
303  #if MP_CRYPTO
304      s_mp_setz(DIGITS(mp), ALLOC(mp));
305  #endif
306      s_mp_free(DIGITS(mp), ALLOC(mp));
307      DIGITS(mp) = NULL;
308    }
309  
310    USED(mp) = 0;
311    ALLOC(mp) = 0;
312  
313  } /* end mp_clear() */
314  
315  /* }}} */
316  
317  /* {{{ mp_zero(mp) */
318  
319  /*
320    mp_zero(mp)
321  
322    Set mp to zero.  Does not change the allocated size of the structure,
323    and therefore cannot fail (except on a bad argument, which we ignore)
324   */
325  void   mp_zero(mp_int *mp)
326  {
327    if(mp == NULL)
328      return;
329  
330    s_mp_setz(DIGITS(mp), ALLOC(mp));
331    USED(mp) = 1;
332    SIGN(mp) = ZPOS;
333  
334  } /* end mp_zero() */
335  
336  /* }}} */
337  
338  /* {{{ mp_set(mp, d) */
339  
340  void   mp_set(mp_int *mp, mp_digit d)
341  {
342    if(mp == NULL)
343      return;
344  
345    mp_zero(mp);
346    DIGIT(mp, 0) = d;
347  
348  } /* end mp_set() */
349  
350  /* }}} */
351  
352  /* {{{ mp_set_int(mp, z) */
353  
354  mp_err mp_set_int(mp_int *mp, long z)
355  {
356    int            ix;
357    unsigned long  v = labs(z);
358    mp_err         res;
359  
360    ARGCHK(mp != NULL, MP_BADARG);
361  
362    mp_zero(mp);
363    if(z == 0)
364      return MP_OKAY;  /* shortcut for zero */
365  
366    if (sizeof v <= sizeof(mp_digit)) {
367      DIGIT(mp,0) = v;
368    } else {
369      for (ix = sizeof(long) - 1; ix >= 0; ix--) {
370        if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
371  	return res;
372  
373        res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
374        if (res != MP_OKAY)
375  	return res;
376      }
377    }
378    if(z < 0)
379      SIGN(mp) = NEG;
380  
381    return MP_OKAY;
382  
383  } /* end mp_set_int() */
384  
385  /* }}} */
386  
387  /* {{{ mp_set_ulong(mp, z) */
388  
389  mp_err mp_set_ulong(mp_int *mp, unsigned long z)
390  {
391    int            ix;
392    mp_err         res;
393  
394    ARGCHK(mp != NULL, MP_BADARG);
395  
396    mp_zero(mp);
397    if(z == 0)
398      return MP_OKAY;  /* shortcut for zero */
399  
400    if (sizeof z <= sizeof(mp_digit)) {
401      DIGIT(mp,0) = z;
402    } else {
403      for (ix = sizeof(long) - 1; ix >= 0; ix--) {
404        if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
405  	return res;
406  
407        res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
408        if (res != MP_OKAY)
409  	return res;
410      }
411    }
412    return MP_OKAY;
413  } /* end mp_set_ulong() */
414  
415  /* }}} */
416  
417  /*------------------------------------------------------------------------*/
418  /* {{{ Digit arithmetic */
419  
420  /* {{{ mp_add_d(a, d, b) */
421  
422  /*
423    mp_add_d(a, d, b)
424  
425    Compute the sum b = a + d, for a single digit d.  Respects the sign of
426    its primary addend (single digits are unsigned anyway).
427   */
428  
429  mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
430  {
431    mp_int   tmp;
432    mp_err   res;
433  
434    ARGCHK(a != NULL && b != NULL, MP_BADARG);
435  
436    if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
437      return res;
438  
439    if(SIGN(&tmp) == ZPOS) {
440      if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
441        goto CLEANUP;
442    } else if(s_mp_cmp_d(&tmp, d) >= 0) {
443      if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
444        goto CLEANUP;
445    } else {
446      mp_neg(&tmp, &tmp);
447  
448      DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
449    }
450  
451    if(s_mp_cmp_d(&tmp, 0) == 0)
452      SIGN(&tmp) = ZPOS;
453  
454    s_mp_exch(&tmp, b);
455  
456  CLEANUP:
457    mp_clear(&tmp);
458    return res;
459  
460  } /* end mp_add_d() */
461  
462  /* }}} */
463  
464  /* {{{ mp_sub_d(a, d, b) */
465  
466  /*
467    mp_sub_d(a, d, b)
468  
469    Compute the difference b = a - d, for a single digit d.  Respects the
470    sign of its subtrahend (single digits are unsigned anyway).
471   */
472  
473  mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
474  {
475    mp_int   tmp;
476    mp_err   res;
477  
478    ARGCHK(a != NULL && b != NULL, MP_BADARG);
479  
480    if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
481      return res;
482  
483    if(SIGN(&tmp) == NEG) {
484      if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
485        goto CLEANUP;
486    } else if(s_mp_cmp_d(&tmp, d) >= 0) {
487      if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
488        goto CLEANUP;
489    } else {
490      mp_neg(&tmp, &tmp);
491  
492      DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
493      SIGN(&tmp) = NEG;
494    }
495  
496    if(s_mp_cmp_d(&tmp, 0) == 0)
497      SIGN(&tmp) = ZPOS;
498  
499    s_mp_exch(&tmp, b);
500  
501  CLEANUP:
502    mp_clear(&tmp);
503    return res;
504  
505  } /* end mp_sub_d() */
506  
507  /* }}} */
508  
509  /* {{{ mp_mul_d(a, d, b) */
510  
511  /*
512    mp_mul_d(a, d, b)
513  
514    Compute the product b = a * d, for a single digit d.  Respects the sign
515    of its multiplicand (single digits are unsigned anyway)
516   */
517  
518  mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
519  {
520    mp_err  res;
521  
522    ARGCHK(a != NULL && b != NULL, MP_BADARG);
523  
524    if(d == 0) {
525      mp_zero(b);
526      return MP_OKAY;
527    }
528  
529    if((res = mp_copy(a, b)) != MP_OKAY)
530      return res;
531  
532    res = s_mp_mul_d(b, d);
533  
534    return res;
535  
536  } /* end mp_mul_d() */
537  
538  /* }}} */
539  
540  /* {{{ mp_mul_2(a, c) */
541  
542  mp_err mp_mul_2(const mp_int *a, mp_int *c)
543  {
544    mp_err  res;
545  
546    ARGCHK(a != NULL && c != NULL, MP_BADARG);
547  
548    if((res = mp_copy(a, c)) != MP_OKAY)
549      return res;
550  
551    return s_mp_mul_2(c);
552  
553  } /* end mp_mul_2() */
554  
555  /* }}} */
556  
557  /* {{{ mp_div_d(a, d, q, r) */
558  
559  /*
560    mp_div_d(a, d, q, r)
561  
562    Compute the quotient q = a / d and remainder r = a mod d, for a
563    single digit d.  Respects the sign of its divisor (single digits are
564    unsigned anyway).
565   */
566  
567  mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
568  {
569    mp_err   res;
570    mp_int   qp;
571    mp_digit rem;
572    int      pow;
573  
574    ARGCHK(a != NULL, MP_BADARG);
575  
576    if(d == 0)
577      return MP_RANGE;
578  
579    /* Shortcut for powers of two ... */
580    if((pow = s_mp_ispow2d(d)) >= 0) {
581      mp_digit  mask;
582  
583      mask = ((mp_digit)1 << pow) - 1;
584      rem = DIGIT(a, 0) & mask;
585  
586      if(q) {
587        mp_copy(a, q);
588        s_mp_div_2d(q, pow);
589      }
590  
591      if(r)
592        *r = rem;
593  
594      return MP_OKAY;
595    }
596  
597    if((res = mp_init_copy(&qp, a)) != MP_OKAY)
598      return res;
599  
600    res = s_mp_div_d(&qp, d, &rem);
601  
602    if(s_mp_cmp_d(&qp, 0) == 0)
603      SIGN(q) = ZPOS;
604  
605    if(r)
606      *r = rem;
607  
608    if(q)
609      s_mp_exch(&qp, q);
610  
611    mp_clear(&qp);
612    return res;
613  
614  } /* end mp_div_d() */
615  
616  /* }}} */
617  
618  /* {{{ mp_div_2(a, c) */
619  
620  /*
621    mp_div_2(a, c)
622  
623    Compute c = a / 2, disregarding the remainder.
624   */
625  
626  mp_err mp_div_2(const mp_int *a, mp_int *c)
627  {
628    mp_err  res;
629  
630    ARGCHK(a != NULL && c != NULL, MP_BADARG);
631  
632    if((res = mp_copy(a, c)) != MP_OKAY)
633      return res;
634  
635    s_mp_div_2(c);
636  
637    return MP_OKAY;
638  
639  } /* end mp_div_2() */
640  
641  /* }}} */
642  
643  /* {{{ mp_expt_d(a, d, b) */
644  
645  mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
646  {
647    mp_int   s, x;
648    mp_err   res;
649  
650    ARGCHK(a != NULL && c != NULL, MP_BADARG);
651  
652    if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
653      return res;
654    if((res = mp_init_copy(&x, a)) != MP_OKAY)
655      goto X;
656  
657    DIGIT(&s, 0) = 1;
658  
659    while(d != 0) {
660      if(d & 1) {
661        if((res = s_mp_mul(&s, &x)) != MP_OKAY)
662  	goto CLEANUP;
663      }
664  
665      d /= 2;
666  
667      if((res = s_mp_sqr(&x)) != MP_OKAY)
668        goto CLEANUP;
669    }
670  
671    s_mp_exch(&s, c);
672  
673  CLEANUP:
674    mp_clear(&x);
675  X:
676    mp_clear(&s);
677  
678    return res;
679  
680  } /* end mp_expt_d() */
681  
682  /* }}} */
683  
684  /* }}} */
685  
686  /*------------------------------------------------------------------------*/
687  /* {{{ Full arithmetic */
688  
689  /* {{{ mp_abs(a, b) */
690  
691  /*
692    mp_abs(a, b)
693  
694    Compute b = |a|.  'a' and 'b' may be identical.
695   */
696  
697  mp_err mp_abs(const mp_int *a, mp_int *b)
698  {
699    mp_err   res;
700  
701    ARGCHK(a != NULL && b != NULL, MP_BADARG);
702  
703    if((res = mp_copy(a, b)) != MP_OKAY)
704      return res;
705  
706    SIGN(b) = ZPOS;
707  
708    return MP_OKAY;
709  
710  } /* end mp_abs() */
711  
712  /* }}} */
713  
714  /* {{{ mp_neg(a, b) */
715  
716  /*
717    mp_neg(a, b)
718  
719    Compute b = -a.  'a' and 'b' may be identical.
720   */
721  
722  mp_err mp_neg(const mp_int *a, mp_int *b)
723  {
724    mp_err   res;
725  
726    ARGCHK(a != NULL && b != NULL, MP_BADARG);
727  
728    if((res = mp_copy(a, b)) != MP_OKAY)
729      return res;
730  
731    if(s_mp_cmp_d(b, 0) == MP_EQ)
732      SIGN(b) = ZPOS;
733    else
734      SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
735  
736    return MP_OKAY;
737  
738  } /* end mp_neg() */
739  
740  /* }}} */
741  
742  /* {{{ mp_add(a, b, c) */
743  
744  /*
745    mp_add(a, b, c)
746  
747    Compute c = a + b.  All parameters may be identical.
748   */
749  
750  mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c)
751  {
752    mp_err  res;
753  
754    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
755  
756    if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
757      MP_CHECKOK( s_mp_add_3arg(a, b, c) );
758    } else if(s_mp_cmp(a, b) >= 0) {  /* different sign: |a| >= |b|   */
759      MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
760    } else {                          /* different sign: |a|  < |b|   */
761      MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
762    }
763  
764    if (s_mp_cmp_d(c, 0) == MP_EQ)
765      SIGN(c) = ZPOS;
766  
767  CLEANUP:
768    return res;
769  
770  } /* end mp_add() */
771  
772  /* }}} */
773  
774  /* {{{ mp_sub(a, b, c) */
775  
776  /*
777    mp_sub(a, b, c)
778  
779    Compute c = a - b.  All parameters may be identical.
780   */
781  
782  mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
783  {
784    mp_err  res;
785    int     magDiff;
786  
787    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
788  
789    if (a == b) {
790      mp_zero(c);
791      return MP_OKAY;
792    }
793  
794    if (MP_SIGN(a) != MP_SIGN(b)) {
795      MP_CHECKOK( s_mp_add_3arg(a, b, c) );
796    } else if (!(magDiff = s_mp_cmp(a, b))) {
797      mp_zero(c);
798      res = MP_OKAY;
799    } else if (magDiff > 0) {
800      MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
801    } else {
802      MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
803      MP_SIGN(c) = !MP_SIGN(a);
804    }
805  
806    if (s_mp_cmp_d(c, 0) == MP_EQ)
807      MP_SIGN(c) = MP_ZPOS;
808  
809  CLEANUP:
810    return res;
811  
812  } /* end mp_sub() */
813  
814  /* }}} */
815  
816  /* {{{ mp_mul(a, b, c) */
817  
818  /*
819    mp_mul(a, b, c)
820  
821    Compute c = a * b.  All parameters may be identical.
822   */
823  mp_err   mp_mul(const mp_int *a, const mp_int *b, mp_int * c)
824  {
825    mp_digit *pb;
826    mp_int   tmp;
827    mp_err   res;
828    mp_size  ib;
829    mp_size  useda, usedb;
830  
831    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
832  
833    if (a == c) {
834      if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
835        return res;
836      if (a == b)
837        b = &tmp;
838      a = &tmp;
839    } else if (b == c) {
840      if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
841        return res;
842      b = &tmp;
843    } else {
844      MP_DIGITS(&tmp) = 0;
845    }
846  
847    if (MP_USED(a) < MP_USED(b)) {
848      const mp_int *xch = b;	/* switch a and b, to do fewer outer loops */
849      b = a;
850      a = xch;
851    }
852  
853    MP_USED(c) = 1; MP_DIGIT(c, 0) = 0;
854    if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
855      goto CLEANUP;
856  
857  #ifdef NSS_USE_COMBA
858    if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
859        if (MP_USED(a) == 4) {
860            s_mp_mul_comba_4(a, b, c);
861            goto CLEANUP;
862        }
863        if (MP_USED(a) == 8) {
864            s_mp_mul_comba_8(a, b, c);
865            goto CLEANUP;
866        }
867        if (MP_USED(a) == 16) {
868            s_mp_mul_comba_16(a, b, c);
869            goto CLEANUP;
870        }
871        if (MP_USED(a) == 32) {
872            s_mp_mul_comba_32(a, b, c);
873            goto CLEANUP;
874        }
875    }
876  #endif
877  
878    pb = MP_DIGITS(b);
879    s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
880  
881    /* Outer loop:  Digits of b */
882    useda = MP_USED(a);
883    usedb = MP_USED(b);
884    for (ib = 1; ib < usedb; ib++) {
885      mp_digit b_i    = *pb++;
886  
887      /* Inner product:  Digits of a */
888      if (b_i)
889        s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
890      else
891        MP_DIGIT(c, ib + useda) = b_i;
892    }
893  
894    s_mp_clamp(c);
895  
896    if(SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
897      SIGN(c) = ZPOS;
898    else
899      SIGN(c) = NEG;
900  
901  CLEANUP:
902    mp_clear(&tmp);
903    return res;
904  } /* end mp_mul() */
905  
906  /* }}} */
907  
908  /* {{{ mp_sqr(a, sqr) */
909  
910  #if MP_SQUARE
911  /*
912    Computes the square of a.  This can be done more
913    efficiently than a general multiplication, because many of the
914    computation steps are redundant when squaring.  The inner product
915    step is a bit more complicated, but we save a fair number of
916    iterations of the multiplication loop.
917   */
918  
919  /* sqr = a^2;   Caller provides both a and tmp; */
920  mp_err   mp_sqr(const mp_int *a, mp_int *sqr)
921  {
922    mp_digit *pa;
923    mp_digit d;
924    mp_err   res;
925    mp_size  ix;
926    mp_int   tmp;
927    int      count;
928  
929    ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
930  
931    if (a == sqr) {
932      if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
933        return res;
934      a = &tmp;
935    } else {
936      DIGITS(&tmp) = 0;
937      res = MP_OKAY;
938    }
939  
940    ix = 2 * MP_USED(a);
941    if (ix > MP_ALLOC(sqr)) {
942      MP_USED(sqr) = 1;
943      MP_CHECKOK( s_mp_grow(sqr, ix) );
944    }
945    MP_USED(sqr) = ix;
946    MP_DIGIT(sqr, 0) = 0;
947  
948  #ifdef NSS_USE_COMBA
949    if (IS_POWER_OF_2(MP_USED(a))) {
950        if (MP_USED(a) == 4) {
951            s_mp_sqr_comba_4(a, sqr);
952            goto CLEANUP;
953        }
954        if (MP_USED(a) == 8) {
955            s_mp_sqr_comba_8(a, sqr);
956            goto CLEANUP;
957        }
958        if (MP_USED(a) == 16) {
959            s_mp_sqr_comba_16(a, sqr);
960            goto CLEANUP;
961        }
962        if (MP_USED(a) == 32) {
963            s_mp_sqr_comba_32(a, sqr);
964            goto CLEANUP;
965        }
966    }
967  #endif
968  
969    pa = MP_DIGITS(a);
970    count = MP_USED(a) - 1;
971    if (count > 0) {
972      d = *pa++;
973      s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
974      for (ix = 3; --count > 0; ix += 2) {
975        d = *pa++;
976        s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
977      } /* for(ix ...) */
978      MP_DIGIT(sqr, MP_USED(sqr)-1) = 0; /* above loop stopped short of this. */
979  
980      /* now sqr *= 2 */
981      s_mp_mul_2(sqr);
982    } else {
983      MP_DIGIT(sqr, 1) = 0;
984    }
985  
986    /* now add the squares of the digits of a to sqr. */
987    s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
988  
989    SIGN(sqr) = ZPOS;
990    s_mp_clamp(sqr);
991  
992  CLEANUP:
993    mp_clear(&tmp);
994    return res;
995  
996  } /* end mp_sqr() */
997  #endif
998  
999  /* }}} */
1000  
1001  /* {{{ mp_div(a, b, q, r) */
1002  
1003  /*
1004    mp_div(a, b, q, r)
1005  
1006    Compute q = a / b and r = a mod b.  Input parameters may be re-used
1007    as output parameters.  If q or r is NULL, that portion of the
1008    computation will be discarded (although it will still be computed)
1009   */
1010  mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
1011  {
1012    mp_err   res;
1013    mp_int   *pQ, *pR;
1014    mp_int   qtmp, rtmp, btmp;
1015    int      cmp;
1016    mp_sign  signA;
1017    mp_sign  signB;
1018  
1019    ARGCHK(a != NULL && b != NULL, MP_BADARG);
1020  
1021    signA = MP_SIGN(a);
1022    signB = MP_SIGN(b);
1023  
1024    if(mp_cmp_z(b) == MP_EQ)
1025      return MP_RANGE;
1026  
1027    DIGITS(&qtmp) = 0;
1028    DIGITS(&rtmp) = 0;
1029    DIGITS(&btmp) = 0;
1030  
1031    /* Set up some temporaries... */
1032    if (!r || r == a || r == b) {
1033      MP_CHECKOK( mp_init_copy(&rtmp, a) );
1034      pR = &rtmp;
1035    } else {
1036      MP_CHECKOK( mp_copy(a, r) );
1037      pR = r;
1038    }
1039  
1040    if (!q || q == a || q == b) {
1041      MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1042      pQ = &qtmp;
1043    } else {
1044      MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1045      pQ = q;
1046      mp_zero(pQ);
1047    }
1048  
1049    /*
1050      If |a| <= |b|, we can compute the solution without division;
1051      otherwise, we actually do the work required.
1052     */
1053    if ((cmp = s_mp_cmp(a, b)) <= 0) {
1054      if (cmp) {
1055        /* r was set to a above. */
1056        mp_zero(pQ);
1057      } else {
1058        mp_set(pQ, 1);
1059        mp_zero(pR);
1060      }
1061    } else {
1062      MP_CHECKOK( mp_init_copy(&btmp, b) );
1063      MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1064    }
1065  
1066    /* Compute the signs for the output  */
1067    MP_SIGN(pR) = signA;   /* Sr = Sa              */
1068    /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
1069    MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1070  
1071    if(s_mp_cmp_d(pQ, 0) == MP_EQ)
1072      SIGN(pQ) = ZPOS;
1073    if(s_mp_cmp_d(pR, 0) == MP_EQ)
1074      SIGN(pR) = ZPOS;
1075  
1076    /* Copy output, if it is needed      */
1077    if(q && q != pQ)
1078      s_mp_exch(pQ, q);
1079  
1080    if(r && r != pR)
1081      s_mp_exch(pR, r);
1082  
1083  CLEANUP:
1084    mp_clear(&btmp);
1085    mp_clear(&rtmp);
1086    mp_clear(&qtmp);
1087  
1088    return res;
1089  
1090  } /* end mp_div() */
1091  
1092  /* }}} */
1093  
1094  /* {{{ mp_div_2d(a, d, q, r) */
1095  
1096  mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
1097  {
1098    mp_err  res;
1099  
1100    ARGCHK(a != NULL, MP_BADARG);
1101  
1102    if(q) {
1103      if((res = mp_copy(a, q)) != MP_OKAY)
1104        return res;
1105    }
1106    if(r) {
1107      if((res = mp_copy(a, r)) != MP_OKAY)
1108        return res;
1109    }
1110    if(q) {
1111      s_mp_div_2d(q, d);
1112    }
1113    if(r) {
1114      s_mp_mod_2d(r, d);
1115    }
1116  
1117    return MP_OKAY;
1118  
1119  } /* end mp_div_2d() */
1120  
1121  /* }}} */
1122  
1123  /* {{{ mp_expt(a, b, c) */
1124  
1125  /*
1126    mp_expt(a, b, c)
1127  
1128    Compute c = a ** b, that is, raise a to the b power.  Uses a
1129    standard iterative square-and-multiply technique.
1130   */
1131  
1132  mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
1133  {
1134    mp_int   s, x;
1135    mp_err   res;
1136    mp_digit d;
1137    int      dig, bit;
1138  
1139    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1140  
1141    if(mp_cmp_z(b) < 0)
1142      return MP_RANGE;
1143  
1144    if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1145      return res;
1146  
1147    mp_set(&s, 1);
1148  
1149    if((res = mp_init_copy(&x, a)) != MP_OKAY)
1150      goto X;
1151  
1152    /* Loop over low-order digits in ascending order */
1153    for(dig = 0; dig < (USED(b) - 1); dig++) {
1154      d = DIGIT(b, dig);
1155  
1156      /* Loop over bits of each non-maximal digit */
1157      for(bit = 0; bit < DIGIT_BIT; bit++) {
1158        if(d & 1) {
1159  	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1160  	  goto CLEANUP;
1161        }
1162  
1163        d >>= 1;
1164  
1165        if((res = s_mp_sqr(&x)) != MP_OKAY)
1166  	goto CLEANUP;
1167      }
1168    }
1169  
1170    /* Consider now the last digit... */
1171    d = DIGIT(b, dig);
1172  
1173    while(d) {
1174      if(d & 1) {
1175        if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1176  	goto CLEANUP;
1177      }
1178  
1179      d >>= 1;
1180  
1181      if((res = s_mp_sqr(&x)) != MP_OKAY)
1182        goto CLEANUP;
1183    }
1184  
1185    if(mp_iseven(b))
1186      SIGN(&s) = SIGN(a);
1187  
1188    res = mp_copy(&s, c);
1189  
1190  CLEANUP:
1191    mp_clear(&x);
1192  X:
1193    mp_clear(&s);
1194  
1195    return res;
1196  
1197  } /* end mp_expt() */
1198  
1199  /* }}} */
1200  
1201  /* {{{ mp_2expt(a, k) */
1202  
1203  /* Compute a = 2^k */
1204  
1205  mp_err mp_2expt(mp_int *a, mp_digit k)
1206  {
1207    ARGCHK(a != NULL, MP_BADARG);
1208  
1209    return s_mp_2expt(a, k);
1210  
1211  } /* end mp_2expt() */
1212  
1213  /* }}} */
1214  
1215  /* {{{ mp_mod(a, m, c) */
1216  
1217  /*
1218    mp_mod(a, m, c)
1219  
1220    Compute c = a (mod m).  Result will always be 0 <= c < m.
1221   */
1222  
1223  mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
1224  {
1225    mp_err  res;
1226    int     mag;
1227  
1228    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1229  
1230    if(SIGN(m) == NEG)
1231      return MP_RANGE;
1232  
1233    /*
1234       If |a| > m, we need to divide to get the remainder and take the
1235       absolute value.
1236  
1237       If |a| < m, we don't need to do any division, just copy and adjust
1238       the sign (if a is negative).
1239  
1240       If |a| == m, we can simply set the result to zero.
1241  
1242       This order is intended to minimize the average path length of the
1243       comparison chain on common workloads -- the most frequent cases are
1244       that |a| != m, so we do those first.
1245     */
1246    if((mag = s_mp_cmp(a, m)) > 0) {
1247      if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1248        return res;
1249  
1250      if(SIGN(c) == NEG) {
1251        if((res = mp_add(c, m, c)) != MP_OKAY)
1252  	return res;
1253      }
1254  
1255    } else if(mag < 0) {
1256      if((res = mp_copy(a, c)) != MP_OKAY)
1257        return res;
1258  
1259      if(mp_cmp_z(a) < 0) {
1260        if((res = mp_add(c, m, c)) != MP_OKAY)
1261  	return res;
1262  
1263      }
1264  
1265    } else {
1266      mp_zero(c);
1267  
1268    }
1269  
1270    return MP_OKAY;
1271  
1272  } /* end mp_mod() */
1273  
1274  /* }}} */
1275  
1276  /* {{{ mp_mod_d(a, d, c) */
1277  
1278  /*
1279    mp_mod_d(a, d, c)
1280  
1281    Compute c = a (mod d).  Result will always be 0 <= c < d
1282   */
1283  mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
1284  {
1285    mp_err   res;
1286    mp_digit rem;
1287  
1288    ARGCHK(a != NULL && c != NULL, MP_BADARG);
1289  
1290    if(s_mp_cmp_d(a, d) > 0) {
1291      if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1292        return res;
1293  
1294    } else {
1295      if(SIGN(a) == NEG)
1296        rem = d - DIGIT(a, 0);
1297      else
1298        rem = DIGIT(a, 0);
1299    }
1300  
1301    if(c)
1302      *c = rem;
1303  
1304    return MP_OKAY;
1305  
1306  } /* end mp_mod_d() */
1307  
1308  /* }}} */
1309  
1310  /* {{{ mp_sqrt(a, b) */
1311  
1312  /*
1313    mp_sqrt(a, b)
1314  
1315    Compute the integer square root of a, and store the result in b.
1316    Uses an integer-arithmetic version of Newton's iterative linear
1317    approximation technique to determine this value; the result has the
1318    following two properties:
1319  
1320       b^2 <= a
1321       (b+1)^2 >= a
1322  
1323    It is a range error to pass a negative value.
1324   */
1325  mp_err mp_sqrt(const mp_int *a, mp_int *b)
1326  {
1327    mp_int   x, t;
1328    mp_err   res;
1329    mp_size  used;
1330  
1331    ARGCHK(a != NULL && b != NULL, MP_BADARG);
1332  
1333    /* Cannot take square root of a negative value */
1334    if(SIGN(a) == NEG)
1335      return MP_RANGE;
1336  
1337    /* Special cases for zero and one, trivial     */
1338    if(mp_cmp_d(a, 1) <= 0)
1339      return mp_copy(a, b);
1340  
1341    /* Initialize the temporaries we'll use below  */
1342    if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1343      return res;
1344  
1345    /* Compute an initial guess for the iteration as a itself */
1346    if((res = mp_init_copy(&x, a)) != MP_OKAY)
1347      goto X;
1348  
1349    used = MP_USED(&x);
1350    if (used > 1) {
1351      s_mp_rshd(&x, used / 2);
1352    }
1353  
1354    for(;;) {
1355      /* t = (x * x) - a */
1356      mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
1357      if((res = mp_sqr(&t, &t)) != MP_OKAY ||
1358         (res = mp_sub(&t, a, &t)) != MP_OKAY)
1359        goto CLEANUP;
1360  
1361      /* t = t / 2x       */
1362      s_mp_mul_2(&x);
1363      if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1364        goto CLEANUP;
1365      s_mp_div_2(&x);
1366  
1367      /* Terminate the loop, if the quotient is zero */
1368      if(mp_cmp_z(&t) == MP_EQ)
1369        break;
1370  
1371      /* x = x - t       */
1372      if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1373        goto CLEANUP;
1374  
1375    }
1376  
1377    /* Copy result to output parameter */
1378    mp_sub_d(&x, 1, &x);
1379    s_mp_exch(&x, b);
1380  
1381   CLEANUP:
1382    mp_clear(&x);
1383   X:
1384    mp_clear(&t);
1385  
1386    return res;
1387  
1388  } /* end mp_sqrt() */
1389  
1390  /* }}} */
1391  
1392  /* }}} */
1393  
1394  /*------------------------------------------------------------------------*/
1395  /* {{{ Modular arithmetic */
1396  
1397  #if MP_MODARITH
1398  /* {{{ mp_addmod(a, b, m, c) */
1399  
1400  /*
1401    mp_addmod(a, b, m, c)
1402  
1403    Compute c = (a + b) mod m
1404   */
1405  
1406  mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1407  {
1408    mp_err  res;
1409  
1410    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1411  
1412    if((res = mp_add(a, b, c)) != MP_OKAY)
1413      return res;
1414    if((res = mp_mod(c, m, c)) != MP_OKAY)
1415      return res;
1416  
1417    return MP_OKAY;
1418  
1419  }
1420  
1421  /* }}} */
1422  
1423  /* {{{ mp_submod(a, b, m, c) */
1424  
1425  /*
1426    mp_submod(a, b, m, c)
1427  
1428    Compute c = (a - b) mod m
1429   */
1430  
1431  mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1432  {
1433    mp_err  res;
1434  
1435    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1436  
1437    if((res = mp_sub(a, b, c)) != MP_OKAY)
1438      return res;
1439    if((res = mp_mod(c, m, c)) != MP_OKAY)
1440      return res;
1441  
1442    return MP_OKAY;
1443  
1444  }
1445  
1446  /* }}} */
1447  
1448  /* {{{ mp_mulmod(a, b, m, c) */
1449  
1450  /*
1451    mp_mulmod(a, b, m, c)
1452  
1453    Compute c = (a * b) mod m
1454   */
1455  
1456  mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1457  {
1458    mp_err  res;
1459  
1460    ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1461  
1462    if((res = mp_mul(a, b, c)) != MP_OKAY)
1463      return res;
1464    if((res = mp_mod(c, m, c)) != MP_OKAY)
1465      return res;
1466  
1467    return MP_OKAY;
1468  
1469  }
1470  
1471  /* }}} */
1472  
1473  /* {{{ mp_sqrmod(a, m, c) */
1474  
1475  #if MP_SQUARE
1476  mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
1477  {
1478    mp_err  res;
1479  
1480    ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1481  
1482    if((res = mp_sqr(a, c)) != MP_OKAY)
1483      return res;
1484    if((res = mp_mod(c, m, c)) != MP_OKAY)
1485      return res;
1486  
1487    return MP_OKAY;
1488  
1489  } /* end mp_sqrmod() */
1490  #endif
1491  
1492  /* }}} */
1493  
1494  /* {{{ s_mp_exptmod(a, b, m, c) */
1495  
1496  /*
1497    s_mp_exptmod(a, b, m, c)
1498  
1499    Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
1500    method with modular reductions at each step. (This is basically the
1501    same code as mp_expt(), except for the addition of the reductions)
1502  
1503    The modular reductions are done using Barrett's algorithm (see
1504    s_mp_reduce() below for details)
1505   */
1506  
1507  mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
1508  {
1509    mp_int   s, x, mu;
1510    mp_err   res;
1511    mp_digit d;
1512    int      dig, bit;
1513  
1514    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1515  
1516    if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
1517      return MP_RANGE;
1518  
1519    if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1520      return res;
1521    if((res = mp_init_copy(&x, a)) != MP_OKAY ||
1522       (res = mp_mod(&x, m, &x)) != MP_OKAY)
1523      goto X;
1524    if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1525      goto MU;
1526  
1527    mp_set(&s, 1);
1528  
1529    /* mu = b^2k / m */
1530    s_mp_add_d(&mu, 1);
1531    s_mp_lshd(&mu, 2 * USED(m));
1532    if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1533      goto CLEANUP;
1534  
1535    /* Loop over digits of b in ascending order, except highest order */
1536    for(dig = 0; dig < (USED(b) - 1); dig++) {
1537      d = DIGIT(b, dig);
1538  
1539      /* Loop over the bits of the lower-order digits */
1540      for(bit = 0; bit < DIGIT_BIT; bit++) {
1541        if(d & 1) {
1542  	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1543  	  goto CLEANUP;
1544  	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1545  	  goto CLEANUP;
1546        }
1547  
1548        d >>= 1;
1549  
1550        if((res = s_mp_sqr(&x)) != MP_OKAY)
1551  	goto CLEANUP;
1552        if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1553  	goto CLEANUP;
1554      }
1555    }
1556  
1557    /* Now do the last digit... */
1558    d = DIGIT(b, dig);
1559  
1560    while(d) {
1561      if(d & 1) {
1562        if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1563  	goto CLEANUP;
1564        if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1565  	goto CLEANUP;
1566      }
1567  
1568      d >>= 1;
1569  
1570      if((res = s_mp_sqr(&x)) != MP_OKAY)
1571        goto CLEANUP;
1572      if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1573        goto CLEANUP;
1574    }
1575  
1576    s_mp_exch(&s, c);
1577  
1578   CLEANUP:
1579    mp_clear(&mu);
1580   MU:
1581    mp_clear(&x);
1582   X:
1583    mp_clear(&s);
1584  
1585    return res;
1586  
1587  } /* end s_mp_exptmod() */
1588  
1589  /* }}} */
1590  
1591  /* {{{ mp_exptmod_d(a, d, m, c) */
1592  
1593  mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
1594  {
1595    mp_int   s, x;
1596    mp_err   res;
1597  
1598    ARGCHK(a != NULL && c != NULL, MP_BADARG);
1599  
1600    if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1601      return res;
1602    if((res = mp_init_copy(&x, a)) != MP_OKAY)
1603      goto X;
1604  
1605    mp_set(&s, 1);
1606  
1607    while(d != 0) {
1608      if(d & 1) {
1609        if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
1610  	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
1611  	goto CLEANUP;
1612      }
1613  
1614      d /= 2;
1615  
1616      if((res = s_mp_sqr(&x)) != MP_OKAY ||
1617         (res = mp_mod(&x, m, &x)) != MP_OKAY)
1618        goto CLEANUP;
1619    }
1620  
1621    s_mp_exch(&s, c);
1622  
1623  CLEANUP:
1624    mp_clear(&x);
1625  X:
1626    mp_clear(&s);
1627  
1628    return res;
1629  
1630  } /* end mp_exptmod_d() */
1631  
1632  /* }}} */
1633  #endif /* if MP_MODARITH */
1634  
1635  /* }}} */
1636  
1637  /*------------------------------------------------------------------------*/
1638  /* {{{ Comparison functions */
1639  
1640  /* {{{ mp_cmp_z(a) */
1641  
1642  /*
1643    mp_cmp_z(a)
1644  
1645    Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
1646   */
1647  
1648  int    mp_cmp_z(const mp_int *a)
1649  {
1650    if(SIGN(a) == NEG)
1651      return MP_LT;
1652    else if(USED(a) == 1 && DIGIT(a, 0) == 0)
1653      return MP_EQ;
1654    else
1655      return MP_GT;
1656  
1657  } /* end mp_cmp_z() */
1658  
1659  /* }}} */
1660  
1661  /* {{{ mp_cmp_d(a, d) */
1662  
1663  /*
1664    mp_cmp_d(a, d)
1665  
1666    Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
1667   */
1668  
1669  int    mp_cmp_d(const mp_int *a, mp_digit d)
1670  {
1671    ARGCHK(a != NULL, MP_EQ);
1672  
1673    if(SIGN(a) == NEG)
1674      return MP_LT;
1675  
1676    return s_mp_cmp_d(a, d);
1677  
1678  } /* end mp_cmp_d() */
1679  
1680  /* }}} */
1681  
1682  /* {{{ mp_cmp(a, b) */
1683  
1684  int    mp_cmp(const mp_int *a, const mp_int *b)
1685  {
1686    ARGCHK(a != NULL && b != NULL, MP_EQ);
1687  
1688    if(SIGN(a) == SIGN(b)) {
1689      int  mag;
1690  
1691      if((mag = s_mp_cmp(a, b)) == MP_EQ)
1692        return MP_EQ;
1693  
1694      if(SIGN(a) == ZPOS)
1695        return mag;
1696      else
1697        return -mag;
1698  
1699    } else if(SIGN(a) == ZPOS) {
1700      return MP_GT;
1701    } else {
1702      return MP_LT;
1703    }
1704  
1705  } /* end mp_cmp() */
1706  
1707  /* }}} */
1708  
1709  /* {{{ mp_cmp_mag(a, b) */
1710  
1711  /*
1712    mp_cmp_mag(a, b)
1713  
1714    Compares |a| <=> |b|, and returns an appropriate comparison result
1715   */
1716  
1717  int    mp_cmp_mag(mp_int *a, mp_int *b)
1718  {
1719    ARGCHK(a != NULL && b != NULL, MP_EQ);
1720  
1721    return s_mp_cmp(a, b);
1722  
1723  } /* end mp_cmp_mag() */
1724  
1725  /* }}} */
1726  
1727  /* {{{ mp_cmp_int(a, z, kmflag) */
1728  
1729  /*
1730    This just converts z to an mp_int, and uses the existing comparison
1731    routines.  This is sort of inefficient, but it's not clear to me how
1732    frequently this wil get used anyway.  For small positive constants,
1733    you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1734   */
1735  int    mp_cmp_int(const mp_int *a, long z, int kmflag)
1736  {
1737    mp_int  tmp;
1738    int     out;
1739  
1740    ARGCHK(a != NULL, MP_EQ);
1741  
1742    mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1743    out = mp_cmp(a, &tmp);
1744    mp_clear(&tmp);
1745  
1746    return out;
1747  
1748  } /* end mp_cmp_int() */
1749  
1750  /* }}} */
1751  
1752  /* {{{ mp_isodd(a) */
1753  
1754  /*
1755    mp_isodd(a)
1756  
1757    Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1758   */
1759  int    mp_isodd(const mp_int *a)
1760  {
1761    ARGCHK(a != NULL, 0);
1762  
1763    return (int)(DIGIT(a, 0) & 1);
1764  
1765  } /* end mp_isodd() */
1766  
1767  /* }}} */
1768  
1769  /* {{{ mp_iseven(a) */
1770  
1771  int    mp_iseven(const mp_int *a)
1772  {
1773    return !mp_isodd(a);
1774  
1775  } /* end mp_iseven() */
1776  
1777  /* }}} */
1778  
1779  /* }}} */
1780  
1781  /*------------------------------------------------------------------------*/
1782  /* {{{ Number theoretic functions */
1783  
1784  #if MP_NUMTH
1785  /* {{{ mp_gcd(a, b, c) */
1786  
1787  /*
1788    Like the old mp_gcd() function, except computes the GCD using the
1789    binary algorithm due to Josef Stein in 1961 (via Knuth).
1790   */
1791  mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
1792  {
1793    mp_err   res;
1794    mp_int   u, v, t;
1795    mp_size  k = 0;
1796  
1797    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1798  
1799    if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1800        return MP_RANGE;
1801    if(mp_cmp_z(a) == MP_EQ) {
1802      return mp_copy(b, c);
1803    } else if(mp_cmp_z(b) == MP_EQ) {
1804      return mp_copy(a, c);
1805    }
1806  
1807    if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1808      return res;
1809    if((res = mp_init_copy(&u, a)) != MP_OKAY)
1810      goto U;
1811    if((res = mp_init_copy(&v, b)) != MP_OKAY)
1812      goto V;
1813  
1814    SIGN(&u) = ZPOS;
1815    SIGN(&v) = ZPOS;
1816  
1817    /* Divide out common factors of 2 until at least 1 of a, b is even */
1818    while(mp_iseven(&u) && mp_iseven(&v)) {
1819      s_mp_div_2(&u);
1820      s_mp_div_2(&v);
1821      ++k;
1822    }
1823  
1824    /* Initialize t */
1825    if(mp_isodd(&u)) {
1826      if((res = mp_copy(&v, &t)) != MP_OKAY)
1827        goto CLEANUP;
1828  
1829      /* t = -v */
1830      if(SIGN(&v) == ZPOS)
1831        SIGN(&t) = NEG;
1832      else
1833        SIGN(&t) = ZPOS;
1834  
1835    } else {
1836      if((res = mp_copy(&u, &t)) != MP_OKAY)
1837        goto CLEANUP;
1838  
1839    }
1840  
1841    for(;;) {
1842      while(mp_iseven(&t)) {
1843        s_mp_div_2(&t);
1844      }
1845  
1846      if(mp_cmp_z(&t) == MP_GT) {
1847        if((res = mp_copy(&t, &u)) != MP_OKAY)
1848  	goto CLEANUP;
1849  
1850      } else {
1851        if((res = mp_copy(&t, &v)) != MP_OKAY)
1852  	goto CLEANUP;
1853  
1854        /* v = -t */
1855        if(SIGN(&t) == ZPOS)
1856  	SIGN(&v) = NEG;
1857        else
1858  	SIGN(&v) = ZPOS;
1859      }
1860  
1861      if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1862        goto CLEANUP;
1863  
1864      if(s_mp_cmp_d(&t, 0) == MP_EQ)
1865        break;
1866    }
1867  
1868    s_mp_2expt(&v, k);       /* v = 2^k   */
1869    res = mp_mul(&u, &v, c); /* c = u * v */
1870  
1871   CLEANUP:
1872    mp_clear(&v);
1873   V:
1874    mp_clear(&u);
1875   U:
1876    mp_clear(&t);
1877  
1878    return res;
1879  
1880  } /* end mp_gcd() */
1881  
1882  /* }}} */
1883  
1884  /* {{{ mp_lcm(a, b, c) */
1885  
1886  /* We compute the least common multiple using the rule:
1887  
1888     ab = [a, b](a, b)
1889  
1890     ... by computing the product, and dividing out the gcd.
1891   */
1892  
1893  mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
1894  {
1895    mp_int  gcd, prod;
1896    mp_err  res;
1897  
1898    ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1899  
1900    /* Set up temporaries */
1901    if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1902      return res;
1903    if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1904      goto GCD;
1905  
1906    if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1907      goto CLEANUP;
1908    if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1909      goto CLEANUP;
1910  
1911    res = mp_div(&prod, &gcd, c, NULL);
1912  
1913   CLEANUP:
1914    mp_clear(&prod);
1915   GCD:
1916    mp_clear(&gcd);
1917  
1918    return res;
1919  
1920  } /* end mp_lcm() */
1921  
1922  /* }}} */
1923  
1924  /* {{{ mp_xgcd(a, b, g, x, y) */
1925  
1926  /*
1927    mp_xgcd(a, b, g, x, y)
1928  
1929    Compute g = (a, b) and values x and y satisfying Bezout's identity
1930    (that is, ax + by = g).  This uses the binary extended GCD algorithm
1931    based on the Stein algorithm used for mp_gcd()
1932    See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1933   */
1934  
1935  mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
1936  {
1937    mp_int   gx, xc, yc, u, v, A, B, C, D;
1938    mp_int  *clean[9];
1939    mp_err   res;
1940    int      last = -1;
1941  
1942    if(mp_cmp_z(b) == 0)
1943      return MP_RANGE;
1944  
1945    /* Initialize all these variables we need */
1946    MP_CHECKOK( mp_init(&u, FLAG(a)) );
1947    clean[++last] = &u;
1948    MP_CHECKOK( mp_init(&v, FLAG(a)) );
1949    clean[++last] = &v;
1950    MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1951    clean[++last] = &gx;
1952    MP_CHECKOK( mp_init(&A, FLAG(a)) );
1953    clean[++last] = &A;
1954    MP_CHECKOK( mp_init(&B, FLAG(a)) );
1955    clean[++last] = &B;
1956    MP_CHECKOK( mp_init(&C, FLAG(a)) );
1957    clean[++last] = &C;
1958    MP_CHECKOK( mp_init(&D, FLAG(a)) );
1959    clean[++last] = &D;
1960    MP_CHECKOK( mp_init_copy(&xc, a) );
1961    clean[++last] = &xc;
1962    mp_abs(&xc, &xc);
1963    MP_CHECKOK( mp_init_copy(&yc, b) );
1964    clean[++last] = &yc;
1965    mp_abs(&yc, &yc);
1966  
1967    mp_set(&gx, 1);
1968  
1969    /* Divide by two until at least one of them is odd */
1970    while(mp_iseven(&xc) && mp_iseven(&yc)) {
1971      mp_size nx = mp_trailing_zeros(&xc);
1972      mp_size ny = mp_trailing_zeros(&yc);
1973      mp_size n  = MP_MIN(nx, ny);
1974      s_mp_div_2d(&xc,n);
1975      s_mp_div_2d(&yc,n);
1976      MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1977    }
1978  
1979    mp_copy(&xc, &u);
1980    mp_copy(&yc, &v);
1981    mp_set(&A, 1); mp_set(&D, 1);
1982  
1983    /* Loop through binary GCD algorithm */
1984    do {
1985      while(mp_iseven(&u)) {
1986        s_mp_div_2(&u);
1987  
1988        if(mp_iseven(&A) && mp_iseven(&B)) {
1989  	s_mp_div_2(&A); s_mp_div_2(&B);
1990        } else {
1991  	MP_CHECKOK( mp_add(&A, &yc, &A) );
1992  	s_mp_div_2(&A);
1993  	MP_CHECKOK( mp_sub(&B, &xc, &B) );
1994  	s_mp_div_2(&B);
1995        }
1996      }
1997  
1998      while(mp_iseven(&v)) {
1999        s_mp_div_2(&v);
2000  
2001        if(mp_iseven(&C) && mp_iseven(&D)) {
2002  	s_mp_div_2(&C); s_mp_div_2(&D);
2003        } else {
2004  	MP_CHECKOK( mp_add(&C, &yc, &C) );
2005  	s_mp_div_2(&C);
2006  	MP_CHECKOK( mp_sub(&D, &xc, &D) );
2007  	s_mp_div_2(&D);
2008        }
2009      }
2010  
2011      if(mp_cmp(&u, &v) >= 0) {
2012        MP_CHECKOK( mp_sub(&u, &v, &u) );
2013        MP_CHECKOK( mp_sub(&A, &C, &A) );
2014        MP_CHECKOK( mp_sub(&B, &D, &B) );
2015      } else {
2016        MP_CHECKOK( mp_sub(&v, &u, &v) );
2017        MP_CHECKOK( mp_sub(&C, &A, &C) );
2018        MP_CHECKOK( mp_sub(&D, &B, &D) );
2019      }
2020    } while (mp_cmp_z(&u) != 0);
2021  
2022    /* copy results to output */
2023    if(x)
2024      MP_CHECKOK( mp_copy(&C, x) );
2025  
2026    if(y)
2027      MP_CHECKOK( mp_copy(&D, y) );
2028  
2029    if(g)
2030      MP_CHECKOK( mp_mul(&gx, &v, g) );
2031  
2032   CLEANUP:
2033    while(last >= 0)
2034      mp_clear(clean[last--]);
2035  
2036    return res;
2037  
2038  } /* end mp_xgcd() */
2039  
2040  /* }}} */
2041  
2042  mp_size mp_trailing_zeros(const mp_int *mp)
2043  {
2044    mp_digit d;
2045    mp_size  n = 0;
2046    int      ix;
2047  
2048    if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
2049      return n;
2050  
2051    for (ix = 0; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2052      n += MP_DIGIT_BIT;
2053    if (!d)
2054      return 0;	/* shouldn't happen, but ... */
2055  #if !defined(MP_USE_UINT_DIGIT)
2056    if (!(d & 0xffffffffU)) {
2057      d >>= 32;
2058      n  += 32;
2059    }
2060  #endif
2061    if (!(d & 0xffffU)) {
2062      d >>= 16;
2063      n  += 16;
2064    }
2065    if (!(d & 0xffU)) {
2066      d >>= 8;
2067      n  += 8;
2068    }
2069    if (!(d & 0xfU)) {
2070      d >>= 4;
2071      n  += 4;
2072    }
2073    if (!(d & 0x3U)) {
2074      d >>= 2;
2075      n  += 2;
2076    }
2077    if (!(d & 0x1U)) {
2078      d >>= 1;
2079      n  += 1;
2080    }
2081  #if MP_ARGCHK == 2
2082    assert(0 != (d & 1));
2083  #endif
2084    return n;
2085  }
2086  
2087  /* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
2088  ** Returns k (positive) or error (negative).
2089  ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2090  ** by Richard Schroeppel (a.k.a. Captain Nemo).
2091  */
2092  mp_err s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
2093  {
2094    mp_err res;
2095    mp_err k    = 0;
2096    mp_int d, f, g;
2097  
2098    ARGCHK(a && p && c, MP_BADARG);
2099  
2100    MP_DIGITS(&d) = 0;
2101    MP_DIGITS(&f) = 0;
2102    MP_DIGITS(&g) = 0;
2103    MP_CHECKOK( mp_init(&d, FLAG(a)) );
2104    MP_CHECKOK( mp_init_copy(&f, a) );	/* f = a */
2105    MP_CHECKOK( mp_init_copy(&g, p) );	/* g = p */
2106  
2107    mp_set(c, 1);
2108    mp_zero(&d);
2109  
2110    if (mp_cmp_z(&f) == 0) {
2111      res = MP_UNDEF;
2112    } else
2113    for (;;) {
2114      int diff_sign;
2115      while (mp_iseven(&f)) {
2116        mp_size n = mp_trailing_zeros(&f);
2117        if (!n) {
2118  	res = MP_UNDEF;
2119  	goto CLEANUP;
2120        }
2121        s_mp_div_2d(&f, n);
2122        MP_CHECKOK( s_mp_mul_2d(&d, n) );
2123        k += n;
2124      }
2125      if (mp_cmp_d(&f, 1) == MP_EQ) {	/* f == 1 */
2126        res = k;
2127        break;
2128      }
2129      diff_sign = mp_cmp(&f, &g);
2130      if (diff_sign < 0) {		/* f < g */
2131        s_mp_exch(&f, &g);
2132        s_mp_exch(c, &d);
2133      } else if (diff_sign == 0) {		/* f == g */
2134        res = MP_UNDEF;		/* a and p are not relatively prime */
2135        break;
2136      }
2137      if ((MP_DIGIT(&f,0) % 4) == (MP_DIGIT(&g,0) % 4)) {
2138        MP_CHECKOK( mp_sub(&f, &g, &f) );	/* f = f - g */
2139        MP_CHECKOK( mp_sub(c,  &d,  c) );	/* c = c - d */
2140      } else {
2141        MP_CHECKOK( mp_add(&f, &g, &f) );	/* f = f + g */
2142        MP_CHECKOK( mp_add(c,  &d,  c) );	/* c = c + d */
2143      }
2144    }
2145    if (res >= 0) {
2146      while (MP_SIGN(c) != MP_ZPOS) {
2147        MP_CHECKOK( mp_add(c, p, c) );
2148      }
2149      res = k;
2150    }
2151  
2152  CLEANUP:
2153    mp_clear(&d);
2154    mp_clear(&f);
2155    mp_clear(&g);
2156    return res;
2157  }
2158  
2159  /* Compute T = (P ** -1) mod MP_RADIX.  Also works for 16-bit mp_digits.
2160  ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2161  ** by Richard Schroeppel (a.k.a. Captain Nemo).
2162  */
2163  mp_digit  s_mp_invmod_radix(mp_digit P)
2164  {
2165    mp_digit T = P;
2166    T *= 2 - (P * T);
2167    T *= 2 - (P * T);
2168    T *= 2 - (P * T);
2169    T *= 2 - (P * T);
2170  #if !defined(MP_USE_UINT_DIGIT)
2171    T *= 2 - (P * T);
2172    T *= 2 - (P * T);
2173  #endif
2174    return T;
2175  }
2176  
2177  /* Given c, k, and prime p, where a*c == 2**k (mod p),
2178  ** Compute x = (a ** -1) mod p.  This is similar to Montgomery reduction.
2179  ** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2180  ** by Richard Schroeppel (a.k.a. Captain Nemo).
2181  */
2182  mp_err  s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
2183  {
2184    int      k_orig = k;
2185    mp_digit r;
2186    mp_size  ix;
2187    mp_err   res;
2188  
2189    if (mp_cmp_z(c) < 0) {		/* c < 0 */
2190      MP_CHECKOK( mp_add(c, p, x) );	/* x = c + p */
2191    } else {
2192      MP_CHECKOK( mp_copy(c, x) );	/* x = c */
2193    }
2194  
2195    /* make sure x is large enough */
2196    ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
2197    ix = MP_MAX(ix, MP_USED(x));
2198    MP_CHECKOK( s_mp_pad(x, ix) );
2199  
2200    r = 0 - s_mp_invmod_radix(MP_DIGIT(p,0));
2201  
2202    for (ix = 0; k > 0; ix++) {
2203      int      j = MP_MIN(k, MP_DIGIT_BIT);
2204      mp_digit v = r * MP_DIGIT(x, ix);
2205      if (j < MP_DIGIT_BIT) {
2206        v &= ((mp_digit)1 << j) - 1;	/* v = v mod (2 ** j) */
2207      }
2208      s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
2209      k -= j;
2210    }
2211    s_mp_clamp(x);
2212    s_mp_div_2d(x, k_orig);
2213    res = MP_OKAY;
2214  
2215  CLEANUP:
2216    return res;
2217  }
2218  
2219  /* compute mod inverse using Schroeppel's method, only if m is odd */
2220  mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
2221  {
2222    int k;
2223    mp_err  res;
2224    mp_int  x;
2225  
2226    ARGCHK(a && m && c, MP_BADARG);
2227  
2228    if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2229      return MP_RANGE;
2230    if (mp_iseven(m))
2231      return MP_UNDEF;
2232  
2233    MP_DIGITS(&x) = 0;
2234  
2235    if (a == c) {
2236      if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2237        return res;
2238      if (a == m)
2239        m = &x;
2240      a = &x;
2241    } else if (m == c) {
2242      if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2243        return res;
2244      m = &x;
2245    } else {
2246      MP_DIGITS(&x) = 0;
2247    }
2248  
2249    MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2250    k = res;
2251    MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2252  CLEANUP:
2253    mp_clear(&x);
2254    return res;
2255  }
2256  
2257  /* Known good algorithm for computing modular inverse.  But slow. */
2258  mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
2259  {
2260    mp_int  g, x;
2261    mp_err  res;
2262  
2263    ARGCHK(a && m && c, MP_BADARG);
2264  
2265    if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2266      return MP_RANGE;
2267  
2268    MP_DIGITS(&g) = 0;
2269    MP_DIGITS(&x) = 0;
2270    MP_CHECKOK( mp_init(&x, FLAG(a)) );
2271    MP_CHECKOK( mp_init(&g, FLAG(a)) );
2272  
2273    MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2274  
2275    if (mp_cmp_d(&g, 1) != MP_EQ) {
2276      res = MP_UNDEF;
2277      goto CLEANUP;
2278    }
2279  
2280    res = mp_mod(&x, m, c);
2281    SIGN(c) = SIGN(a);
2282  
2283  CLEANUP:
2284    mp_clear(&x);
2285    mp_clear(&g);
2286  
2287    return res;
2288  }
2289  
2290  /* modular inverse where modulus is 2**k. */
2291  /* c = a**-1 mod 2**k */
2292  mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
2293  {
2294    mp_err res;
2295    mp_size ix = k + 4;
2296    mp_int t0, t1, val, tmp, two2k;
2297  
2298    static const mp_digit d2 = 2;
2299    static const mp_int two = { 0, MP_ZPOS, 1, 1, (mp_digit *)&d2 };
2300  
2301    if (mp_iseven(a))
2302      return MP_UNDEF;
2303    if (k <= MP_DIGIT_BIT) {
2304      mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,0));
2305      if (k < MP_DIGIT_BIT)
2306        i &= ((mp_digit)1 << k) - (mp_digit)1;
2307      mp_set(c, i);
2308      return MP_OKAY;
2309    }
2310    MP_DIGITS(&t0) = 0;
2311    MP_DIGITS(&t1) = 0;
2312    MP_DIGITS(&val) = 0;
2313    MP_DIGITS(&tmp) = 0;
2314    MP_DIGITS(&two2k) = 0;
2315    MP_CHECKOK( mp_init_copy(&val, a) );
2316    s_mp_mod_2d(&val, k);
2317    MP_CHECKOK( mp_init_copy(&t0, &val) );
2318    MP_CHECKOK( mp_init_copy(&t1, &t0)  );
2319    MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2320    MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2321    MP_CHECKOK( s_mp_2expt(&two2k, k) );
2322    do {
2323      MP_CHECKOK( mp_mul(&val, &t1, &tmp)  );
2324      MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2325      MP_CHECKOK( mp_mul(&t1, &tmp, &t1)   );
2326      s_mp_mod_2d(&t1, k);
2327      while (MP_SIGN(&t1) != MP_ZPOS) {
2328        MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2329      }
2330      if (mp_cmp(&t1, &t0) == MP_EQ)
2331        break;
2332      MP_CHECKOK( mp_copy(&t1, &t0) );
2333    } while (--ix > 0);
2334    if (!ix) {
2335      res = MP_UNDEF;
2336    } else {
2337      mp_exch(c, &t1);
2338    }
2339  
2340  CLEANUP:
2341    mp_clear(&t0);
2342    mp_clear(&t1);
2343    mp_clear(&val);
2344    mp_clear(&tmp);
2345    mp_clear(&two2k);
2346    return res;
2347  }
2348  
2349  mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
2350  {
2351    mp_err res;
2352    mp_size k;
2353    mp_int oddFactor, evenFactor;	/* factors of the modulus */
2354    mp_int oddPart, evenPart;	/* parts to combine via CRT. */
2355    mp_int C2, tmp1, tmp2;
2356  
2357    /*static const mp_digit d1 = 1; */
2358    /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
2359  
2360    if ((res = s_mp_ispow2(m)) >= 0) {
2361      k = res;
2362      return s_mp_invmod_2d(a, k, c);
2363    }
2364    MP_DIGITS(&oddFactor) = 0;
2365    MP_DIGITS(&evenFactor) = 0;
2366    MP_DIGITS(&oddPart) = 0;
2367    MP_DIGITS(&evenPart) = 0;
2368    MP_DIGITS(&C2)     = 0;
2369    MP_DIGITS(&tmp1)   = 0;
2370    MP_DIGITS(&tmp2)   = 0;
2371  
2372    MP_CHECKOK( mp_init_copy(&oddFactor, m) );    /* oddFactor = m */
2373    MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2374    MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2375    MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2376    MP_CHECKOK( mp_init(&C2, FLAG(m))     );
2377    MP_CHECKOK( mp_init(&tmp1, FLAG(m))   );
2378    MP_CHECKOK( mp_init(&tmp2, FLAG(m))   );
2379  
2380    k = mp_trailing_zeros(m);
2381    s_mp_div_2d(&oddFactor, k);
2382    MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2383  
2384    /* compute a**-1 mod oddFactor. */
2385    MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2386    /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
2387    MP_CHECKOK( s_mp_invmod_2d(   a,       k,    &evenPart) );
2388  
2389    /* Use Chinese Remainer theorem to compute a**-1 mod m. */
2390    /* let m1 = oddFactor,  v1 = oddPart,
2391     * let m2 = evenFactor, v2 = evenPart.
2392     */
2393  
2394    /* Compute C2 = m1**-1 mod m2. */
2395    MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k,    &C2) );
2396  
2397    /* compute u = (v2 - v1)*C2 mod m2 */
2398    MP_CHECKOK( mp_sub(&evenPart, &oddPart,   &tmp1) );
2399    MP_CHECKOK( mp_mul(&tmp1,     &C2,        &tmp2) );
2400    s_mp_mod_2d(&tmp2, k);
2401    while (MP_SIGN(&tmp2) != MP_ZPOS) {
2402      MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2403    }
2404  
2405    /* compute answer = v1 + u*m1 */
2406    MP_CHECKOK( mp_mul(&tmp2,     &oddFactor, c) );
2407    MP_CHECKOK( mp_add(&oddPart,  c,          c) );
2408    /* not sure this is necessary, but it's low cost if not. */
2409    MP_CHECKOK( mp_mod(c,         m,          c) );
2410  
2411  CLEANUP:
2412    mp_clear(&oddFactor);
2413    mp_clear(&evenFactor);
2414    mp_clear(&oddPart);
2415    mp_clear(&evenPart);
2416    mp_clear(&C2);
2417    mp_clear(&tmp1);
2418    mp_clear(&tmp2);
2419    return res;
2420  }
2421  
2422  
2423  /* {{{ mp_invmod(a, m, c) */
2424  
2425  /*
2426    mp_invmod(a, m, c)
2427  
2428    Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2429    This is equivalent to the question of whether (a, m) = 1.  If not,
2430    MP_UNDEF is returned, and there is no inverse.
2431   */
2432  
2433  mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
2434  {
2435  
2436    ARGCHK(a && m && c, MP_BADARG);
2437  
2438    if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
2439      return MP_RANGE;
2440  
2441    if (mp_isodd(m)) {
2442      return s_mp_invmod_odd_m(a, m, c);
2443    }
2444    if (mp_iseven(a))
2445      return MP_UNDEF;	/* not invertable */
2446  
2447    return s_mp_invmod_even_m(a, m, c);
2448  
2449  } /* end mp_invmod() */
2450  
2451  /* }}} */
2452  #endif /* if MP_NUMTH */
2453  
2454  /* }}} */
2455  
2456  /*------------------------------------------------------------------------*/
2457  /* {{{ mp_print(mp, ofp) */
2458  
2459  #if MP_IOFUNC
2460  /*
2461    mp_print(mp, ofp)
2462  
2463    Print a textual representation of the given mp_int on the output
2464    stream 'ofp'.  Output is generated using the internal radix.
2465   */
2466  
2467  void   mp_print(mp_int *mp, FILE *ofp)
2468  {
2469    int   ix;
2470  
2471    if(mp == NULL || ofp == NULL)
2472      return;
2473  
2474    fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
2475  
2476    for(ix = USED(mp) - 1; ix >= 0; ix--) {
2477      fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2478    }
2479  
2480  } /* end mp_print() */
2481  
2482  #endif /* if MP_IOFUNC */
2483  
2484  /* }}} */
2485  
2486  /*------------------------------------------------------------------------*/
2487  /* {{{ More I/O Functions */
2488  
2489  /* {{{ mp_read_raw(mp, str, len) */
2490  
2491  /*
2492     mp_read_raw(mp, str, len)
2493  
2494     Read in a raw value (base 256) into the given mp_int
2495   */
2496  
2497  mp_err  mp_read_raw(mp_int *mp, char *str, int len)
2498  {
2499    int            ix;
2500    mp_err         res;
2501    unsigned char *ustr = (unsigned char *)str;
2502  
2503    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
2504  
2505    mp_zero(mp);
2506  
2507    /* Get sign from first byte */
2508    if(ustr[0])
2509      SIGN(mp) = NEG;
2510    else
2511      SIGN(mp) = ZPOS;
2512  
2513    /* Read the rest of the digits */
2514    for(ix = 1; ix < len; ix++) {
2515      if((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
2516        return res;
2517      if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2518        return res;
2519    }
2520  
2521    return MP_OKAY;
2522  
2523  } /* end mp_read_raw() */
2524  
2525  /* }}} */
2526  
2527  /* {{{ mp_raw_size(mp) */
2528  
2529  int    mp_raw_size(mp_int *mp)
2530  {
2531    ARGCHK(mp != NULL, 0);
2532  
2533    return (USED(mp) * sizeof(mp_digit)) + 1;
2534  
2535  } /* end mp_raw_size() */
2536  
2537  /* }}} */
2538  
2539  /* {{{ mp_toraw(mp, str) */
2540  
2541  mp_err mp_toraw(mp_int *mp, char *str)
2542  {
2543    int  ix, jx, pos = 1;
2544  
2545    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2546  
2547    str[0] = (char)SIGN(mp);
2548  
2549    /* Iterate over each digit... */
2550    for(ix = USED(mp) - 1; ix >= 0; ix--) {
2551      mp_digit  d = DIGIT(mp, ix);
2552  
2553      /* Unpack digit bytes, high order first */
2554      for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
2555        str[pos++] = (char)(d >> (jx * CHAR_BIT));
2556      }
2557    }
2558  
2559    return MP_OKAY;
2560  
2561  } /* end mp_toraw() */
2562  
2563  /* }}} */
2564  
2565  /* {{{ mp_read_radix(mp, str, radix) */
2566  
2567  /*
2568    mp_read_radix(mp, str, radix)
2569  
2570    Read an integer from the given string, and set mp to the resulting
2571    value.  The input is presumed to be in base 10.  Leading non-digit
2572    characters are ignored, and the function reads until a non-digit
2573    character or the end of the string.
2574   */
2575  
2576  mp_err  mp_read_radix(mp_int *mp, const char *str, int radix)
2577  {
2578    int     ix = 0, val = 0;
2579    mp_err  res;
2580    mp_sign sig = ZPOS;
2581  
2582    ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
2583  	 MP_BADARG);
2584  
2585    mp_zero(mp);
2586  
2587    /* Skip leading non-digit characters until a digit or '-' or '+' */
2588    while(str[ix] &&
2589  	(s_mp_tovalue(str[ix], radix) < 0) &&
2590  	str[ix] != '-' &&
2591  	str[ix] != '+') {
2592      ++ix;
2593    }
2594  
2595    if(str[ix] == '-') {
2596      sig = NEG;
2597      ++ix;
2598    } else if(str[ix] == '+') {
2599      sig = ZPOS; /* this is the default anyway... */
2600      ++ix;
2601    }
2602  
2603    while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
2604      if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2605        return res;
2606      if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2607        return res;
2608      ++ix;
2609    }
2610  
2611    if(s_mp_cmp_d(mp, 0) == MP_EQ)
2612      SIGN(mp) = ZPOS;
2613    else
2614      SIGN(mp) = sig;
2615  
2616    return MP_OKAY;
2617  
2618  } /* end mp_read_radix() */
2619  
2620  mp_err mp_read_variable_radix(mp_int *a, const char * str, int default_radix)
2621  {
2622    int     radix = default_radix;
2623    int     cx;
2624    mp_sign sig   = ZPOS;
2625    mp_err  res;
2626  
2627    /* Skip leading non-digit characters until a digit or '-' or '+' */
2628    while ((cx = *str) != 0 &&
2629  	(s_mp_tovalue(cx, radix) < 0) &&
2630  	cx != '-' &&
2631  	cx != '+') {
2632      ++str;
2633    }
2634  
2635    if (cx == '-') {
2636      sig = NEG;
2637      ++str;
2638    } else if (cx == '+') {
2639      sig = ZPOS; /* this is the default anyway... */
2640      ++str;
2641    }
2642  
2643    if (str[0] == '0') {
2644      if ((str[1] | 0x20) == 'x') {
2645        radix = 16;
2646        str += 2;
2647      } else {
2648        radix = 8;
2649        str++;
2650      }
2651    }
2652    res = mp_read_radix(a, str, radix);
2653    if (res == MP_OKAY) {
2654      MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
2655    }
2656    return res;
2657  }
2658  
2659  /* }}} */
2660  
2661  /* {{{ mp_radix_size(mp, radix) */
2662  
2663  int    mp_radix_size(mp_int *mp, int radix)
2664  {
2665    int  bits;
2666  
2667    if(!mp || radix < 2 || radix > MAX_RADIX)
2668      return 0;
2669  
2670    bits = USED(mp) * DIGIT_BIT - 1;
2671  
2672    return s_mp_outlen(bits, radix);
2673  
2674  } /* end mp_radix_size() */
2675  
2676  /* }}} */
2677  
2678  /* {{{ mp_toradix(mp, str, radix) */
2679  
2680  mp_err mp_toradix(mp_int *mp, char *str, int radix)
2681  {
2682    int  ix, pos = 0;
2683  
2684    ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2685    ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
2686  
2687    if(mp_cmp_z(mp) == MP_EQ) {
2688      str[0] = '0';
2689      str[1] = '\0';
2690    } else {
2691      mp_err   res;
2692      mp_int   tmp;
2693      mp_sign  sgn;
2694      mp_digit rem, rdx = (mp_digit)radix;
2695      char     ch;
2696  
2697      if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2698        return res;
2699  
2700      /* Save sign for later, and take absolute value */
2701      sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2702  
2703      /* Generate output digits in reverse order      */
2704      while(mp_cmp_z(&tmp) != 0) {
2705        if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2706  	mp_clear(&tmp);
2707  	return res;
2708        }
2709  
2710        /* Generate digits, use capital letters */
2711        ch = s_mp_todigit(rem, radix, 0);
2712  
2713        str[pos++] = ch;
2714      }
2715  
2716      /* Add - sign if original value was negative */
2717      if(sgn == NEG)
2718        str[pos++] = '-';
2719  
2720      /* Add trailing NUL to end the string        */
2721      str[pos--] = '\0';
2722  
2723      /* Reverse the digits and sign indicator     */
2724      ix = 0;
2725      while(ix < pos) {
2726        char tmp = str[ix];
2727  
2728        str[ix] = str[pos];
2729        str[pos] = tmp;
2730        ++ix;
2731        --pos;
2732      }
2733  
2734      mp_clear(&tmp);
2735    }
2736  
2737    return MP_OKAY;
2738  
2739  } /* end mp_toradix() */
2740  
2741  /* }}} */
2742  
2743  /* {{{ mp_tovalue(ch, r) */
2744  
2745  int    mp_tovalue(char ch, int r)
2746  {
2747    return s_mp_tovalue(ch, r);
2748  
2749  } /* end mp_tovalue() */
2750  
2751  /* }}} */
2752  
2753  /* }}} */
2754  
2755  /* {{{ mp_strerror(ec) */
2756  
2757  /*
2758    mp_strerror(ec)
2759  
2760    Return a string describing the meaning of error code 'ec'.  The
2761    string returned is allocated in static memory, so the caller should
2762    not attempt to modify or free the memory associated with this
2763    string.
2764   */
2765  const char  *mp_strerror(mp_err ec)
2766  {
2767    int   aec = (ec < 0) ? -ec : ec;
2768  
2769    /* Code values are negative, so the senses of these comparisons
2770       are accurate */
2771    if(ec < MP_LAST_CODE || ec > MP_OKAY) {
2772      return mp_err_string[0];  /* unknown error code */
2773    } else {
2774      return mp_err_string[aec + 1];
2775    }
2776  
2777  } /* end mp_strerror() */
2778  
2779  /* }}} */
2780  
2781  /*========================================================================*/
2782  /*------------------------------------------------------------------------*/
2783  /* Static function definitions (internal use only)                        */
2784  
2785  /* {{{ Memory management */
2786  
2787  /* {{{ s_mp_grow(mp, min) */
2788  
2789  /* Make sure there are at least 'min' digits allocated to mp              */
2790  mp_err   s_mp_grow(mp_int *mp, mp_size min)
2791  {
2792    if(min > ALLOC(mp)) {
2793      mp_digit   *tmp;
2794  
2795      /* Set min to next nearest default precision block size */
2796      min = MP_ROUNDUP(min, s_mp_defprec);
2797  
2798      if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2799        return MP_MEM;
2800  
2801      s_mp_copy(DIGITS(mp), tmp, USED(mp));
2802  
2803  #if MP_CRYPTO
2804      s_mp_setz(DIGITS(mp), ALLOC(mp));
2805  #endif
2806      s_mp_free(DIGITS(mp), ALLOC(mp));
2807      DIGITS(mp) = tmp;
2808      ALLOC(mp) = min;
2809    }
2810  
2811    return MP_OKAY;
2812  
2813  } /* end s_mp_grow() */
2814  
2815  /* }}} */
2816  
2817  /* {{{ s_mp_pad(mp, min) */
2818  
2819  /* Make sure the used size of mp is at least 'min', growing if needed     */
2820  mp_err   s_mp_pad(mp_int *mp, mp_size min)
2821  {
2822    if(min > USED(mp)) {
2823      mp_err  res;
2824  
2825      /* Make sure there is room to increase precision  */
2826      if (min > ALLOC(mp)) {
2827        if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2828  	return res;
2829      } else {
2830        s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2831      }
2832  
2833      /* Increase precision; should already be 0-filled */
2834      USED(mp) = min;
2835    }
2836  
2837    return MP_OKAY;
2838  
2839  } /* end s_mp_pad() */
2840  
2841  /* }}} */
2842  
2843  /* {{{ s_mp_setz(dp, count) */
2844  
2845  #if MP_MACRO == 0
2846  /* Set 'count' digits pointed to by dp to be zeroes                       */
2847  void s_mp_setz(mp_digit *dp, mp_size count)
2848  {
2849  #if MP_MEMSET == 0
2850    int  ix;
2851  
2852    for(ix = 0; ix < count; ix++)
2853      dp[ix] = 0;
2854  #else
2855    memset(dp, 0, count * sizeof(mp_digit));
2856  #endif
2857  
2858  } /* end s_mp_setz() */
2859  #endif
2860  
2861  /* }}} */
2862  
2863  /* {{{ s_mp_copy(sp, dp, count) */
2864  
2865  #if MP_MACRO == 0
2866  /* Copy 'count' digits from sp to dp                                      */
2867  void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
2868  {
2869  #if MP_MEMCPY == 0
2870    int  ix;
2871  
2872    for(ix = 0; ix < count; ix++)
2873      dp[ix] = sp[ix];
2874  #else
2875    memcpy(dp, sp, count * sizeof(mp_digit));
2876  #endif
2877  
2878  } /* end s_mp_copy() */
2879  #endif
2880  
2881  /* }}} */
2882  
2883  /* {{{ s_mp_alloc(nb, ni, kmflag) */
2884  
2885  #if MP_MACRO == 0
2886  /* Allocate ni records of nb bytes each, and return a pointer to that     */
2887  void    *s_mp_alloc(size_t nb, size_t ni, int kmflag)
2888  {
2889    ++mp_allocs;
2890  #ifdef _KERNEL
2891    return kmem_zalloc(nb * ni, kmflag);
2892  #else
2893    return calloc(nb, ni);
2894  #endif
2895  
2896  } /* end s_mp_alloc() */
2897  #endif
2898  
2899  /* }}} */
2900  
2901  /* {{{ s_mp_free(ptr) */
2902  
2903  #if MP_MACRO == 0
2904  /* Free the memory pointed to by ptr                                      */
2905  void     s_mp_free(void *ptr, mp_size alloc)
2906  {
2907    if(ptr) {
2908      ++mp_frees;
2909  #ifdef _KERNEL
2910      kmem_free(ptr, alloc * sizeof (mp_digit));
2911  #else
2912      free(ptr);
2913  #endif
2914    }
2915  } /* end s_mp_free() */
2916  #endif
2917  
2918  /* }}} */
2919  
2920  /* {{{ s_mp_clamp(mp) */
2921  
2922  #if MP_MACRO == 0
2923  /* Remove leading zeroes from the given value                             */
2924  void     s_mp_clamp(mp_int *mp)
2925  {
2926    mp_size used = MP_USED(mp);
2927    while (used > 1 && DIGIT(mp, used - 1) == 0)
2928      --used;
2929    MP_USED(mp) = used;
2930  } /* end s_mp_clamp() */
2931  #endif
2932  
2933  /* }}} */
2934  
2935  /* {{{ s_mp_exch(a, b) */
2936  
2937  /* Exchange the data for a and b; (b, a) = (a, b)                         */
2938  void     s_mp_exch(mp_int *a, mp_int *b)
2939  {
2940    mp_int   tmp;
2941  
2942    tmp = *a;
2943    *a = *b;
2944    *b = tmp;
2945  
2946  } /* end s_mp_exch() */
2947  
2948  /* }}} */
2949  
2950  /* }}} */
2951  
2952  /* {{{ Arithmetic helpers */
2953  
2954  /* {{{ s_mp_lshd(mp, p) */
2955  
2956  /*
2957     Shift mp leftward by p digits, growing if needed, and zero-filling
2958     the in-shifted digits at the right end.  This is a convenient
2959     alternative to multiplication by powers of the radix
2960     The value of USED(mp) must already have been set to the value for
2961     the shifted result.
2962   */
2963  
2964  mp_err   s_mp_lshd(mp_int *mp, mp_size p)
2965  {
2966    mp_err  res;
2967    mp_size pos;
2968    int     ix;
2969  
2970    if(p == 0)
2971      return MP_OKAY;
2972  
2973    if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
2974      return MP_OKAY;
2975  
2976    if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2977      return res;
2978  
2979    pos = USED(mp) - 1;
2980  
2981    /* Shift all the significant figures over as needed */
2982    for(ix = pos - p; ix >= 0; ix--)
2983      DIGIT(mp, ix + p) = DIGIT(mp, ix);
2984  
2985    /* Fill the bottom digits with zeroes */
2986    for(ix = 0; ix < p; ix++)
2987      DIGIT(mp, ix) = 0;
2988  
2989    return MP_OKAY;
2990  
2991  } /* end s_mp_lshd() */
2992  
2993  /* }}} */
2994  
2995  /* {{{ s_mp_mul_2d(mp, d) */
2996  
2997  /*
2998    Multiply the integer by 2^d, where d is a number of bits.  This
2999    amounts to a bitwise shift of the value.
3000   */
3001  mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d)
3002  {
3003    mp_err   res;
3004    mp_digit dshift, bshift;
3005    mp_digit mask;
3006  
3007    ARGCHK(mp != NULL,  MP_BADARG);
3008  
3009    dshift = d / MP_DIGIT_BIT;
3010    bshift = d % MP_DIGIT_BIT;
3011    /* bits to be shifted out of the top word */
3012    mask   = ((mp_digit)~0 << (MP_DIGIT_BIT - bshift));
3013    mask  &= MP_DIGIT(mp, MP_USED(mp) - 1);
3014  
3015    if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0) )))
3016      return res;
3017  
3018    if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3019      return res;
3020  
3021    if (bshift) {
3022      mp_digit *pa = MP_DIGITS(mp);
3023      mp_digit *alim = pa + MP_USED(mp);
3024      mp_digit  prev = 0;
3025  
3026      for (pa += dshift; pa < alim; ) {
3027        mp_digit x = *pa;
3028        *pa++ = (x << bshift) | prev;
3029        prev = x >> (DIGIT_BIT - bshift);
3030      }
3031    }
3032  
3033    s_mp_clamp(mp);
3034    return MP_OKAY;
3035  } /* end s_mp_mul_2d() */
3036  
3037  /* {{{ s_mp_rshd(mp, p) */
3038  
3039  /*
3040     Shift mp rightward by p digits.  Maintains the invariant that
3041     digits above the precision are all zero.  Digits shifted off the
3042     end are lost.  Cannot fail.
3043   */
3044  
3045  void     s_mp_rshd(mp_int *mp, mp_size p)
3046  {
3047    mp_size  ix;
3048    mp_digit *src, *dst;
3049  
3050    if(p == 0)
3051      return;
3052  
3053    /* Shortcut when all digits are to be shifted off */
3054    if(p >= USED(mp)) {
3055      s_mp_setz(DIGITS(mp), ALLOC(mp));
3056      USED(mp) = 1;
3057      SIGN(mp) = ZPOS;
3058      return;
3059    }
3060  
3061    /* Shift all the significant figures over as needed */
3062    dst = MP_DIGITS(mp);
3063    src = dst + p;
3064    for (ix = USED(mp) - p; ix > 0; ix--)
3065      *dst++ = *src++;
3066  
3067    MP_USED(mp) -= p;
3068    /* Fill the top digits with zeroes */
3069    while (p-- > 0)
3070      *dst++ = 0;
3071  
3072  #if 0
3073    /* Strip off any leading zeroes    */
3074    s_mp_clamp(mp);
3075  #endif
3076  
3077  } /* end s_mp_rshd() */
3078  
3079  /* }}} */
3080  
3081  /* {{{ s_mp_div_2(mp) */
3082  
3083  /* Divide by two -- take advantage of radix properties to do it fast      */
3084  void     s_mp_div_2(mp_int *mp)
3085  {
3086    s_mp_div_2d(mp, 1);
3087  
3088  } /* end s_mp_div_2() */
3089  
3090  /* }}} */
3091  
3092  /* {{{ s_mp_mul_2(mp) */
3093  
3094  mp_err s_mp_mul_2(mp_int *mp)
3095  {
3096    mp_digit *pd;
3097    int      ix, used;
3098    mp_digit kin = 0;
3099  
3100    /* Shift digits leftward by 1 bit */
3101    used = MP_USED(mp);
3102    pd = MP_DIGITS(mp);
3103    for (ix = 0; ix < used; ix++) {
3104      mp_digit d = *pd;
3105      *pd++ = (d << 1) | kin;
3106      kin = (d >> (DIGIT_BIT - 1));
3107    }
3108  
3109    /* Deal with rollover from last digit */
3110    if (kin) {
3111      if (ix >= ALLOC(mp)) {
3112        mp_err res;
3113        if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
3114  	return res;
3115      }
3116  
3117      DIGIT(mp, ix) = kin;
3118      USED(mp) += 1;
3119    }
3120  
3121    return MP_OKAY;
3122  
3123  } /* end s_mp_mul_2() */
3124  
3125  /* }}} */
3126  
3127  /* {{{ s_mp_mod_2d(mp, d) */
3128  
3129  /*
3130    Remainder the integer by 2^d, where d is a number of bits.  This
3131    amounts to a bitwise AND of the value, and does not require the full
3132    division code
3133   */
3134  void     s_mp_mod_2d(mp_int *mp, mp_digit d)
3135  {
3136    mp_size  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3137    mp_size  ix;
3138    mp_digit dmask;
3139  
3140    if(ndig >= USED(mp))
3141      return;
3142  
3143    /* Flush all the bits above 2^d in its digit */
3144    dmask = ((mp_digit)1 << nbit) - 1;
3145    DIGIT(mp, ndig) &= dmask;
3146  
3147    /* Flush all digits above the one with 2^d in it */
3148    for(ix = ndig + 1; ix < USED(mp); ix++)
3149      DIGIT(mp, ix) = 0;
3150  
3151    s_mp_clamp(mp);
3152  
3153  } /* end s_mp_mod_2d() */
3154  
3155  /* }}} */
3156  
3157  /* {{{ s_mp_div_2d(mp, d) */
3158  
3159  /*
3160    Divide the integer by 2^d, where d is a number of bits.  This
3161    amounts to a bitwise shift of the value, and does not require the
3162    full division code (used in Barrett reduction, see below)
3163   */
3164  void     s_mp_div_2d(mp_int *mp, mp_digit d)
3165  {
3166    int       ix;
3167    mp_digit  save, next, mask;
3168  
3169    s_mp_rshd(mp, d / DIGIT_BIT);
3170    d %= DIGIT_BIT;
3171    if (d) {
3172      mask = ((mp_digit)1 << d) - 1;
3173      save = 0;
3174      for(ix = USED(mp) - 1; ix >= 0; ix--) {
3175        next = DIGIT(mp, ix) & mask;
3176        DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
3177        save = next;
3178      }
3179    }
3180    s_mp_clamp(mp);
3181  
3182  } /* end s_mp_div_2d() */
3183  
3184  /* }}} */
3185  
3186  /* {{{ s_mp_norm(a, b, *d) */
3187  
3188  /*
3189    s_mp_norm(a, b, *d)
3190  
3191    Normalize a and b for division, where b is the divisor.  In order
3192    that we might make good guesses for quotient digits, we want the
3193    leading digit of b to be at least half the radix, which we
3194    accomplish by multiplying a and b by a power of 2.  The exponent
3195    (shift count) is placed in *pd, so that the remainder can be shifted
3196    back at the end of the division process.
3197   */
3198  
3199  mp_err   s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
3200  {
3201    mp_digit  d;
3202    mp_digit  mask;
3203    mp_digit  b_msd;
3204    mp_err    res    = MP_OKAY;
3205  
3206    d = 0;
3207    mask  = DIGIT_MAX & ~(DIGIT_MAX >> 1);	/* mask is msb of digit */
3208    b_msd = DIGIT(b, USED(b) - 1);
3209    while (!(b_msd & mask)) {
3210      b_msd <<= 1;
3211      ++d;
3212    }
3213  
3214    if (d) {
3215      MP_CHECKOK( s_mp_mul_2d(a, d) );
3216      MP_CHECKOK( s_mp_mul_2d(b, d) );
3217    }
3218  
3219    *pd = d;
3220  CLEANUP:
3221    return res;
3222  
3223  } /* end s_mp_norm() */
3224  
3225  /* }}} */
3226  
3227  /* }}} */
3228  
3229  /* {{{ Primitive digit arithmetic */
3230  
3231  /* {{{ s_mp_add_d(mp, d) */
3232  
3233  /* Add d to |mp| in place                                                 */
3234  mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
3235  {
3236  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3237    mp_word   w, k = 0;
3238    mp_size   ix = 1;
3239  
3240    w = (mp_word)DIGIT(mp, 0) + d;
3241    DIGIT(mp, 0) = ACCUM(w);
3242    k = CARRYOUT(w);
3243  
3244    while(ix < USED(mp) && k) {
3245      w = (mp_word)DIGIT(mp, ix) + k;
3246      DIGIT(mp, ix) = ACCUM(w);
3247      k = CARRYOUT(w);
3248      ++ix;
3249    }
3250  
3251    if(k != 0) {
3252      mp_err  res;
3253  
3254      if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
3255        return res;
3256  
3257      DIGIT(mp, ix) = (mp_digit)k;
3258    }
3259  
3260    return MP_OKAY;
3261  #else
3262    mp_digit * pmp = MP_DIGITS(mp);
3263    mp_digit sum, mp_i, carry = 0;
3264    mp_err   res = MP_OKAY;
3265    int used = (int)MP_USED(mp);
3266  
3267    mp_i = *pmp;
3268    *pmp++ = sum = d + mp_i;
3269    carry = (sum < d);
3270    while (carry && --used > 0) {
3271      mp_i = *pmp;
3272      *pmp++ = sum = carry + mp_i;
3273      carry = !sum;
3274    }
3275    if (carry && !used) {
3276      /* mp is growing */
3277      used = MP_USED(mp);
3278      MP_CHECKOK( s_mp_pad(mp, used + 1) );
3279      MP_DIGIT(mp, used) = carry;
3280    }
3281  CLEANUP:
3282    return res;
3283  #endif
3284  } /* end s_mp_add_d() */
3285  
3286  /* }}} */
3287  
3288  /* {{{ s_mp_sub_d(mp, d) */
3289  
3290  /* Subtract d from |mp| in place, assumes |mp| > d                        */
3291  mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
3292  {
3293  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3294    mp_word   w, b = 0;
3295    mp_size   ix = 1;
3296  
3297    /* Compute initial subtraction    */
3298    w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
3299    b = CARRYOUT(w) ? 0 : 1;
3300    DIGIT(mp, 0) = ACCUM(w);
3301  
3302    /* Propagate borrows leftward     */
3303    while(b && ix < USED(mp)) {
3304      w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3305      b = CARRYOUT(w) ? 0 : 1;
3306      DIGIT(mp, ix) = ACCUM(w);
3307      ++ix;
3308    }
3309  
3310    /* Remove leading zeroes          */
3311    s_mp_clamp(mp);
3312  
3313    /* If we have a borrow out, it's a violation of the input invariant */
3314    if(b)
3315      return MP_RANGE;
3316    else
3317      return MP_OKAY;
3318  #else
3319    mp_digit *pmp = MP_DIGITS(mp);
3320    mp_digit mp_i, diff, borrow;
3321    mp_size  used = MP_USED(mp);
3322  
3323    mp_i = *pmp;
3324    *pmp++ = diff = mp_i - d;
3325    borrow = (diff > mp_i);
3326    while (borrow && --used) {
3327      mp_i = *pmp;
3328      *pmp++ = diff = mp_i - borrow;
3329      borrow = (diff > mp_i);
3330    }
3331    s_mp_clamp(mp);
3332    return (borrow && !used) ? MP_RANGE : MP_OKAY;
3333  #endif
3334  } /* end s_mp_sub_d() */
3335  
3336  /* }}} */
3337  
3338  /* {{{ s_mp_mul_d(a, d) */
3339  
3340  /* Compute a = a * d, single digit multiplication                         */
3341  mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
3342  {
3343    mp_err  res;
3344    mp_size used;
3345    int     pow;
3346  
3347    if (!d) {
3348      mp_zero(a);
3349      return MP_OKAY;
3350    }
3351    if (d == 1)
3352      return MP_OKAY;
3353    if (0 <= (pow = s_mp_ispow2d(d))) {
3354      return s_mp_mul_2d(a, (mp_digit)pow);
3355    }
3356  
3357    used = MP_USED(a);
3358    MP_CHECKOK( s_mp_pad(a, used + 1) );
3359  
3360    s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3361  
3362    s_mp_clamp(a);
3363  
3364  CLEANUP:
3365    return res;
3366  
3367  } /* end s_mp_mul_d() */
3368  
3369  /* }}} */
3370  
3371  /* {{{ s_mp_div_d(mp, d, r) */
3372  
3373  /*
3374    s_mp_div_d(mp, d, r)
3375  
3376    Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3377    single digit d.  If r is null, the remainder will be discarded.
3378   */
3379  
3380  mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
3381  {
3382  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3383    mp_word   w = 0, q;
3384  #else
3385    mp_digit  w, q;
3386  #endif
3387    int       ix;
3388    mp_err    res;
3389    mp_int    quot;
3390    mp_int    rem;
3391  
3392    if(d == 0)
3393      return MP_RANGE;
3394    if (d == 1) {
3395      if (r)
3396        *r = 0;
3397      return MP_OKAY;
3398    }
3399    /* could check for power of 2 here, but mp_div_d does that. */
3400    if (MP_USED(mp) == 1) {
3401      mp_digit n   = MP_DIGIT(mp,0);
3402      mp_digit rem;
3403  
3404      q   = n / d;
3405      rem = n % d;
3406      MP_DIGIT(mp,0) = q;
3407      if (r)
3408        *r = rem;
3409      return MP_OKAY;
3410    }
3411  
3412    MP_DIGITS(&rem)  = 0;
3413    MP_DIGITS(&quot) = 0;
3414    /* Make room for the quotient */
3415    MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3416  
3417  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3418    for(ix = USED(mp) - 1; ix >= 0; ix--) {
3419      w = (w << DIGIT_BIT) | DIGIT(mp, ix);
3420  
3421      if(w >= d) {
3422        q = w / d;
3423        w = w % d;
3424      } else {
3425        q = 0;
3426      }
3427  
3428      s_mp_lshd(&quot, 1);
3429      DIGIT(&quot, 0) = (mp_digit)q;
3430    }
3431  #else
3432    {
3433      mp_digit p;
3434  #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3435      mp_digit norm;
3436  #endif
3437  
3438      MP_CHECKOK( mp_init_copy(&rem, mp) );
3439  
3440  #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3441      MP_DIGIT(&quot, 0) = d;
3442      MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3443      if (norm)
3444        d <<= norm;
3445      MP_DIGIT(&quot, 0) = 0;
3446  #endif
3447  
3448      p = 0;
3449      for (ix = USED(&rem) - 1; ix >= 0; ix--) {
3450        w = DIGIT(&rem, ix);
3451  
3452        if (p) {
3453          MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3454        } else if (w >= d) {
3455  	q = w / d;
3456  	w = w % d;
3457        } else {
3458  	q = 0;
3459        }
3460  
3461        MP_CHECKOK( s_mp_lshd(&quot, 1) );
3462        DIGIT(&quot, 0) = q;
3463        p = w;
3464      }
3465  #if !defined(MP_ASSEMBLY_DIV_2DX1D)
3466      if (norm)
3467        w >>= norm;
3468  #endif
3469    }
3470  #endif
3471  
3472    /* Deliver the remainder, if desired */
3473    if(r)
3474      *r = (mp_digit)w;
3475  
3476    s_mp_clamp(&quot);
3477    mp_exch(&quot, mp);
3478  CLEANUP:
3479    mp_clear(&quot);
3480    mp_clear(&rem);
3481  
3482    return res;
3483  } /* end s_mp_div_d() */
3484  
3485  /* }}} */
3486  
3487  
3488  /* }}} */
3489  
3490  /* {{{ Primitive full arithmetic */
3491  
3492  /* {{{ s_mp_add(a, b) */
3493  
3494  /* Compute a = |a| + |b|                                                  */
3495  mp_err   s_mp_add(mp_int *a, const mp_int *b)  /* magnitude addition      */
3496  {
3497  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3498    mp_word   w = 0;
3499  #else
3500    mp_digit  d, sum, carry = 0;
3501  #endif
3502    mp_digit *pa, *pb;
3503    mp_size   ix;
3504    mp_size   used;
3505    mp_err    res;
3506  
3507    /* Make sure a has enough precision for the output value */
3508    if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3509      return res;
3510  
3511    /*
3512      Add up all digits up to the precision of b.  If b had initially
3513      the same precision as a, or greater, we took care of it by the
3514      padding step above, so there is no problem.  If b had initially
3515      less precision, we'll have to make sure the carry out is duly
3516      propagated upward among the higher-order digits of the sum.
3517     */
3518    pa = MP_DIGITS(a);
3519    pb = MP_DIGITS(b);
3520    used = MP_USED(b);
3521    for(ix = 0; ix < used; ix++) {
3522  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3523      w = w + *pa + *pb++;
3524      *pa++ = ACCUM(w);
3525      w = CARRYOUT(w);
3526  #else
3527      d = *pa;
3528      sum = d + *pb++;
3529      d = (sum < d);			/* detect overflow */
3530      *pa++ = sum += carry;
3531      carry = d + (sum < carry);		/* detect overflow */
3532  #endif
3533    }
3534  
3535    /* If we run out of 'b' digits before we're actually done, make
3536       sure the carries get propagated upward...
3537     */
3538    used = MP_USED(a);
3539  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3540    while (w && ix < used) {
3541      w = w + *pa;
3542      *pa++ = ACCUM(w);
3543      w = CARRYOUT(w);
3544      ++ix;
3545    }
3546  #else
3547    while (carry && ix < used) {
3548      sum = carry + *pa;
3549      *pa++ = sum;
3550      carry = !sum;
3551      ++ix;
3552    }
3553  #endif
3554  
3555    /* If there's an overall carry out, increase precision and include
3556       it.  We could have done this initially, but why touch the memory
3557       allocator unless we're sure we have to?
3558     */
3559  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3560    if (w) {
3561      if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3562        return res;
3563  
3564      DIGIT(a, ix) = (mp_digit)w;
3565    }
3566  #else
3567    if (carry) {
3568      if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
3569        return res;
3570  
3571      DIGIT(a, used) = carry;
3572    }
3573  #endif
3574  
3575    return MP_OKAY;
3576  } /* end s_mp_add() */
3577  
3578  /* }}} */
3579  
3580  /* Compute c = |a| + |b|         */ /* magnitude addition      */
3581  mp_err   s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3582  {
3583    mp_digit *pa, *pb, *pc;
3584  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3585    mp_word   w = 0;
3586  #else
3587    mp_digit  sum, carry = 0, d;
3588  #endif
3589    mp_size   ix;
3590    mp_size   used;
3591    mp_err    res;
3592  
3593    MP_SIGN(c) = MP_SIGN(a);
3594    if (MP_USED(a) < MP_USED(b)) {
3595      const mp_int *xch = a;
3596      a = b;
3597      b = xch;
3598    }
3599  
3600    /* Make sure a has enough precision for the output value */
3601    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3602      return res;
3603  
3604    /*
3605      Add up all digits up to the precision of b.  If b had initially
3606      the same precision as a, or greater, we took care of it by the
3607      exchange step above, so there is no problem.  If b had initially
3608      less precision, we'll have to make sure the carry out is duly
3609      propagated upward among the higher-order digits of the sum.
3610     */
3611    pa = MP_DIGITS(a);
3612    pb = MP_DIGITS(b);
3613    pc = MP_DIGITS(c);
3614    used = MP_USED(b);
3615    for (ix = 0; ix < used; ix++) {
3616  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3617      w = w + *pa++ + *pb++;
3618      *pc++ = ACCUM(w);
3619      w = CARRYOUT(w);
3620  #else
3621      d = *pa++;
3622      sum = d + *pb++;
3623      d = (sum < d);			/* detect overflow */
3624      *pc++ = sum += carry;
3625      carry = d + (sum < carry);		/* detect overflow */
3626  #endif
3627    }
3628  
3629    /* If we run out of 'b' digits before we're actually done, make
3630       sure the carries get propagated upward...
3631     */
3632    for (used = MP_USED(a); ix < used; ++ix) {
3633  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3634      w = w + *pa++;
3635      *pc++ = ACCUM(w);
3636      w = CARRYOUT(w);
3637  #else
3638      *pc++ = sum = carry + *pa++;
3639      carry = (sum < carry);
3640  #endif
3641    }
3642  
3643    /* If there's an overall carry out, increase precision and include
3644       it.  We could have done this initially, but why touch the memory
3645       allocator unless we're sure we have to?
3646     */
3647  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3648    if (w) {
3649      if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3650        return res;
3651  
3652      DIGIT(c, used) = (mp_digit)w;
3653      ++used;
3654    }
3655  #else
3656    if (carry) {
3657      if((res = s_mp_pad(c, used + 1)) != MP_OKAY)
3658        return res;
3659  
3660      DIGIT(c, used) = carry;
3661      ++used;
3662    }
3663  #endif
3664    MP_USED(c) = used;
3665    return MP_OKAY;
3666  }
3667  /* {{{ s_mp_add_offset(a, b, offset) */
3668  
3669  /* Compute a = |a| + ( |b| * (RADIX ** offset) )             */
3670  mp_err   s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
3671  {
3672  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3673    mp_word   w, k = 0;
3674  #else
3675    mp_digit  d, sum, carry = 0;
3676  #endif
3677    mp_size   ib;
3678    mp_size   ia;
3679    mp_size   lim;
3680    mp_err    res;
3681  
3682    /* Make sure a has enough precision for the output value */
3683    lim = MP_USED(b) + offset;
3684    if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3685      return res;
3686  
3687    /*
3688      Add up all digits up to the precision of b.  If b had initially
3689      the same precision as a, or greater, we took care of it by the
3690      padding step above, so there is no problem.  If b had initially
3691      less precision, we'll have to make sure the carry out is duly
3692      propagated upward among the higher-order digits of the sum.
3693     */
3694    lim = USED(b);
3695    for(ib = 0, ia = offset; ib < lim; ib++, ia++) {
3696  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3697      w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3698      DIGIT(a, ia) = ACCUM(w);
3699      k = CARRYOUT(w);
3700  #else
3701      d = MP_DIGIT(a, ia);
3702      sum = d + MP_DIGIT(b, ib);
3703      d = (sum < d);
3704      MP_DIGIT(a,ia) = sum += carry;
3705      carry = d + (sum < carry);
3706  #endif
3707    }
3708  
3709    /* If we run out of 'b' digits before we're actually done, make
3710       sure the carries get propagated upward...
3711     */
3712  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3713    for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3714      w = (mp_word)DIGIT(a, ia) + k;
3715      DIGIT(a, ia) = ACCUM(w);
3716      k = CARRYOUT(w);
3717    }
3718  #else
3719    for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3720      d = MP_DIGIT(a, ia);
3721      MP_DIGIT(a,ia) = sum = d + carry;
3722      carry = (sum < d);
3723    }
3724  #endif
3725  
3726    /* If there's an overall carry out, increase precision and include
3727       it.  We could have done this initially, but why touch the memory
3728       allocator unless we're sure we have to?
3729     */
3730  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3731    if(k) {
3732      if((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
3733        return res;
3734  
3735      DIGIT(a, ia) = (mp_digit)k;
3736    }
3737  #else
3738    if (carry) {
3739      if((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
3740        return res;
3741  
3742      DIGIT(a, lim) = carry;
3743    }
3744  #endif
3745    s_mp_clamp(a);
3746  
3747    return MP_OKAY;
3748  
3749  } /* end s_mp_add_offset() */
3750  
3751  /* }}} */
3752  
3753  /* {{{ s_mp_sub(a, b) */
3754  
3755  /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
3756  mp_err   s_mp_sub(mp_int *a, const mp_int *b)  /* magnitude subtract      */
3757  {
3758    mp_digit *pa, *pb, *limit;
3759  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3760    mp_sword  w = 0;
3761  #else
3762    mp_digit  d, diff, borrow = 0;
3763  #endif
3764  
3765    /*
3766      Subtract and propagate borrow.  Up to the precision of b, this
3767      accounts for the digits of b; after that, we just make sure the
3768      carries get to the right place.  This saves having to pad b out to
3769      the precision of a just to make the loops work right...
3770     */
3771    pa = MP_DIGITS(a);
3772    pb = MP_DIGITS(b);
3773    limit = pb + MP_USED(b);
3774    while (pb < limit) {
3775  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3776      w = w + *pa - *pb++;
3777      *pa++ = ACCUM(w);
3778      w >>= MP_DIGIT_BIT;
3779  #else
3780      d = *pa;
3781      diff = d - *pb++;
3782      d = (diff > d);				/* detect borrow */
3783      if (borrow && --diff == MP_DIGIT_MAX)
3784        ++d;
3785      *pa++ = diff;
3786      borrow = d;
3787  #endif
3788    }
3789    limit = MP_DIGITS(a) + MP_USED(a);
3790  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3791    while (w && pa < limit) {
3792      w = w + *pa;
3793      *pa++ = ACCUM(w);
3794      w >>= MP_DIGIT_BIT;
3795    }
3796  #else
3797    while (borrow && pa < limit) {
3798      d = *pa;
3799      *pa++ = diff = d - borrow;
3800      borrow = (diff > d);
3801    }
3802  #endif
3803  
3804    /* Clobber any leading zeroes we created    */
3805    s_mp_clamp(a);
3806  
3807    /*
3808       If there was a borrow out, then |b| > |a| in violation
3809       of our input invariant.  We've already done the work,
3810       but we'll at least complain about it...
3811     */
3812  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3813    return w ? MP_RANGE : MP_OKAY;
3814  #else
3815    return borrow ? MP_RANGE : MP_OKAY;
3816  #endif
3817  } /* end s_mp_sub() */
3818  
3819  /* }}} */
3820  
3821  /* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract      */
3822  mp_err   s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
3823  {
3824    mp_digit *pa, *pb, *pc;
3825  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3826    mp_sword  w = 0;
3827  #else
3828    mp_digit  d, diff, borrow = 0;
3829  #endif
3830    int       ix, limit;
3831    mp_err    res;
3832  
3833    MP_SIGN(c) = MP_SIGN(a);
3834  
3835    /* Make sure a has enough precision for the output value */
3836    if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3837      return res;
3838  
3839    /*
3840      Subtract and propagate borrow.  Up to the precision of b, this
3841      accounts for the digits of b; after that, we just make sure the
3842      carries get to the right place.  This saves having to pad b out to
3843      the precision of a just to make the loops work right...
3844     */
3845    pa = MP_DIGITS(a);
3846    pb = MP_DIGITS(b);
3847    pc = MP_DIGITS(c);
3848    limit = MP_USED(b);
3849    for (ix = 0; ix < limit; ++ix) {
3850  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3851      w = w + *pa++ - *pb++;
3852      *pc++ = ACCUM(w);
3853      w >>= MP_DIGIT_BIT;
3854  #else
3855      d = *pa++;
3856      diff = d - *pb++;
3857      d = (diff > d);
3858      if (borrow && --diff == MP_DIGIT_MAX)
3859        ++d;
3860      *pc++ = diff;
3861      borrow = d;
3862  #endif
3863    }
3864    for (limit = MP_USED(a); ix < limit; ++ix) {
3865  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3866      w = w + *pa++;
3867      *pc++ = ACCUM(w);
3868      w >>= MP_DIGIT_BIT;
3869  #else
3870      d = *pa++;
3871      *pc++ = diff = d - borrow;
3872      borrow = (diff > d);
3873  #endif
3874    }
3875  
3876    /* Clobber any leading zeroes we created    */
3877    MP_USED(c) = ix;
3878    s_mp_clamp(c);
3879  
3880    /*
3881       If there was a borrow out, then |b| > |a| in violation
3882       of our input invariant.  We've already done the work,
3883       but we'll at least complain about it...
3884     */
3885  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3886    return w ? MP_RANGE : MP_OKAY;
3887  #else
3888    return borrow ? MP_RANGE : MP_OKAY;
3889  #endif
3890  }
3891  /* {{{ s_mp_mul(a, b) */
3892  
3893  /* Compute a = |a| * |b|                                                  */
3894  mp_err   s_mp_mul(mp_int *a, const mp_int *b)
3895  {
3896    return mp_mul(a, b, a);
3897  } /* end s_mp_mul() */
3898  
3899  /* }}} */
3900  
3901  #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3902  /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
3903  #define MP_MUL_DxD(a, b, Phi, Plo) \
3904    { unsigned long long product = (unsigned long long)a * b; \
3905      Plo = (mp_digit)product; \
3906      Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3907  #elif defined(OSF1)
3908  #define MP_MUL_DxD(a, b, Phi, Plo) \
3909    { Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3910      Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3911  #else
3912  #define MP_MUL_DxD(a, b, Phi, Plo) \
3913    { mp_digit a0b1, a1b0; \
3914      Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3915      Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3916      a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3917      a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3918      a1b0 += a0b1; \
3919      Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3920      if (a1b0 < a0b1)  \
3921        Phi += MP_HALF_RADIX; \
3922      a1b0 <<= MP_HALF_DIGIT_BIT; \
3923      Plo += a1b0; \
3924      if (Plo < a1b0) \
3925        ++Phi; \
3926    }
3927  #endif
3928  
3929  #if !defined(MP_ASSEMBLY_MULTIPLY)
3930  /* c = a * b */
3931  void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3932  {
3933  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3934    mp_digit   d = 0;
3935  
3936    /* Inner product:  Digits of a */
3937    while (a_len--) {
3938      mp_word w = ((mp_word)b * *a++) + d;
3939      *c++ = ACCUM(w);
3940      d = CARRYOUT(w);
3941    }
3942    *c = d;
3943  #else
3944    mp_digit carry = 0;
3945    while (a_len--) {
3946      mp_digit a_i = *a++;
3947      mp_digit a0b0, a1b1;
3948  
3949      MP_MUL_DxD(a_i, b, a1b1, a0b0);
3950  
3951      a0b0 += carry;
3952      if (a0b0 < carry)
3953        ++a1b1;
3954      *c++ = a0b0;
3955      carry = a1b1;
3956    }
3957    *c = carry;
3958  #endif
3959  }
3960  
3961  /* c += a * b */
3962  void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3963  			      mp_digit *c)
3964  {
3965  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3966    mp_digit   d = 0;
3967  
3968    /* Inner product:  Digits of a */
3969    while (a_len--) {
3970      mp_word w = ((mp_word)b * *a++) + *c + d;
3971      *c++ = ACCUM(w);
3972      d = CARRYOUT(w);
3973    }
3974    *c = d;
3975  #else
3976    mp_digit carry = 0;
3977    while (a_len--) {
3978      mp_digit a_i = *a++;
3979      mp_digit a0b0, a1b1;
3980  
3981      MP_MUL_DxD(a_i, b, a1b1, a0b0);
3982  
3983      a0b0 += carry;
3984      if (a0b0 < carry)
3985        ++a1b1;
3986      a0b0 += a_i = *c;
3987      if (a0b0 < a_i)
3988        ++a1b1;
3989      *c++ = a0b0;
3990      carry = a1b1;
3991    }
3992    *c = carry;
3993  #endif
3994  }
3995  
3996  /* Presently, this is only used by the Montgomery arithmetic code. */
3997  /* c += a * b */
3998  void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
3999  {
4000  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4001    mp_digit   d = 0;
4002  
4003    /* Inner product:  Digits of a */
4004    while (a_len--) {
4005      mp_word w = ((mp_word)b * *a++) + *c + d;
4006      *c++ = ACCUM(w);
4007      d = CARRYOUT(w);
4008    }
4009  
4010    while (d) {
4011      mp_word w = (mp_word)*c + d;
4012      *c++ = ACCUM(w);
4013      d = CARRYOUT(w);
4014    }
4015  #else
4016    mp_digit carry = 0;
4017    while (a_len--) {
4018      mp_digit a_i = *a++;
4019      mp_digit a0b0, a1b1;
4020  
4021      MP_MUL_DxD(a_i, b, a1b1, a0b0);
4022  
4023      a0b0 += carry;
4024      if (a0b0 < carry)
4025        ++a1b1;
4026  
4027      a0b0 += a_i = *c;
4028      if (a0b0 < a_i)
4029        ++a1b1;
4030  
4031      *c++ = a0b0;
4032      carry = a1b1;
4033    }
4034    while (carry) {
4035      mp_digit c_i = *c;
4036      carry += c_i;
4037      *c++ = carry;
4038      carry = carry < c_i;
4039    }
4040  #endif
4041  }
4042  #endif
4043  
4044  #if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4045  /* This trick works on Sparc V8 CPUs with the Workshop compilers. */
4046  #define MP_SQR_D(a, Phi, Plo) \
4047    { unsigned long long square = (unsigned long long)a * a; \
4048      Plo = (mp_digit)square; \
4049      Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4050  #elif defined(OSF1)
4051  #define MP_SQR_D(a, Phi, Plo) \
4052    { Plo = asm ("mulq  %a0, %a0, %v0", a);\
4053      Phi = asm ("umulh %a0, %a0, %v0", a); }
4054  #else
4055  #define MP_SQR_D(a, Phi, Plo) \
4056    { mp_digit Pmid; \
4057      Plo  = (a  & MP_HALF_DIGIT_MAX) * (a  & MP_HALF_DIGIT_MAX); \
4058      Phi  = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4059      Pmid = (a  & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4060      Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1);  \
4061      Pmid <<= (MP_HALF_DIGIT_BIT + 1);  \
4062      Plo += Pmid;  \
4063      if (Plo < Pmid)  \
4064        ++Phi;  \
4065    }
4066  #endif
4067  
4068  #if !defined(MP_ASSEMBLY_SQUARE)
4069  /* Add the squares of the digits of a to the digits of b. */
4070  void s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
4071  {
4072  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4073    mp_word  w;
4074    mp_digit d;
4075    mp_size  ix;
4076  
4077    w  = 0;
4078  #define ADD_SQUARE(n) \
4079      d = pa[n]; \
4080      w += (d * (mp_word)d) + ps[2*n]; \
4081      ps[2*n] = ACCUM(w); \
4082      w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4083      ps[2*n+1] = ACCUM(w); \
4084      w = (w >> DIGIT_BIT)
4085  
4086    for (ix = a_len; ix >= 4; ix -= 4) {
4087      ADD_SQUARE(0);
4088      ADD_SQUARE(1);
4089      ADD_SQUARE(2);
4090      ADD_SQUARE(3);
4091      pa += 4;
4092      ps += 8;
4093    }
4094    if (ix) {
4095      ps += 2*ix;
4096      pa += ix;
4097      switch (ix) {
4098      case 3: ADD_SQUARE(-3); /* FALLTHRU */
4099      case 2: ADD_SQUARE(-2); /* FALLTHRU */
4100      case 1: ADD_SQUARE(-1); /* FALLTHRU */
4101      case 0: break;
4102      }
4103    }
4104    while (w) {
4105      w += *ps;
4106      *ps++ = ACCUM(w);
4107      w = (w >> DIGIT_BIT);
4108    }
4109  #else
4110    mp_digit carry = 0;
4111    while (a_len--) {
4112      mp_digit a_i = *pa++;
4113      mp_digit a0a0, a1a1;
4114  
4115      MP_SQR_D(a_i, a1a1, a0a0);
4116  
4117      /* here a1a1 and a0a0 constitute a_i ** 2 */
4118      a0a0 += carry;
4119      if (a0a0 < carry)
4120        ++a1a1;
4121  
4122      /* now add to ps */
4123      a0a0 += a_i = *ps;
4124      if (a0a0 < a_i)
4125        ++a1a1;
4126      *ps++ = a0a0;
4127      a1a1 += a_i = *ps;
4128      carry = (a1a1 < a_i);
4129      *ps++ = a1a1;
4130    }
4131    while (carry) {
4132      mp_digit s_i = *ps;
4133      carry += s_i;
4134      *ps++ = carry;
4135      carry = carry < s_i;
4136    }
4137  #endif
4138  }
4139  #endif
4140  
4141  #if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) \
4142  && !defined(MP_ASSEMBLY_DIV_2DX1D)
4143  /*
4144  ** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4145  ** so its high bit is 1.   This code is from NSPR.
4146  */
4147  mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4148  		       mp_digit *qp, mp_digit *rp)
4149  {
4150      mp_digit d1, d0, q1, q0;
4151      mp_digit r1, r0, m;
4152  
4153      d1 = divisor >> MP_HALF_DIGIT_BIT;
4154      d0 = divisor & MP_HALF_DIGIT_MAX;
4155      r1 = Nhi % d1;
4156      q1 = Nhi / d1;
4157      m = q1 * d0;
4158      r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
4159      if (r1 < m) {
4160          q1--, r1 += divisor;
4161          if (r1 >= divisor && r1 < m) {
4162  	    q1--, r1 += divisor;
4163  	}
4164      }
4165      r1 -= m;
4166      r0 = r1 % d1;
4167      q0 = r1 / d1;
4168      m = q0 * d0;
4169      r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
4170      if (r0 < m) {
4171          q0--, r0 += divisor;
4172          if (r0 >= divisor && r0 < m) {
4173  	    q0--, r0 += divisor;
4174  	}
4175      }
4176      if (qp)
4177  	*qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
4178      if (rp)
4179  	*rp = r0 - m;
4180      return MP_OKAY;
4181  }
4182  #endif
4183  
4184  #if MP_SQUARE
4185  /* {{{ s_mp_sqr(a) */
4186  
4187  mp_err   s_mp_sqr(mp_int *a)
4188  {
4189    mp_err   res;
4190    mp_int   tmp;
4191  
4192    if((res = mp_init_size(&tmp, 2 * USED(a), FLAG(a))) != MP_OKAY)
4193      return res;
4194    res = mp_sqr(a, &tmp);
4195    if (res == MP_OKAY) {
4196      s_mp_exch(&tmp, a);
4197    }
4198    mp_clear(&tmp);
4199    return res;
4200  }
4201  
4202  /* }}} */
4203  #endif
4204  
4205  /* {{{ s_mp_div(a, b) */
4206  
4207  /*
4208    s_mp_div(a, b)
4209  
4210    Compute a = a / b and b = a mod b.  Assumes b > a.
4211   */
4212  
4213  mp_err   s_mp_div(mp_int *rem, 	/* i: dividend, o: remainder */
4214                    mp_int *div, 	/* i: divisor                */
4215  		  mp_int *quot)	/* i: 0;        o: quotient  */
4216  {
4217    mp_int   part, t;
4218  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4219    mp_word  q_msd;
4220  #else
4221    mp_digit q_msd;
4222  #endif
4223    mp_err   res;
4224    mp_digit d;
4225    mp_digit div_msd;
4226    int      ix;
4227  
4228    if(mp_cmp_z(div) == 0)
4229      return MP_RANGE;
4230  
4231    /* Shortcut if divisor is power of two */
4232    if((ix = s_mp_ispow2(div)) >= 0) {
4233      MP_CHECKOK( mp_copy(rem, quot) );
4234      s_mp_div_2d(quot, (mp_digit)ix);
4235      s_mp_mod_2d(rem,  (mp_digit)ix);
4236  
4237      return MP_OKAY;
4238    }
4239  
4240    DIGITS(&t) = 0;
4241    MP_SIGN(rem) = ZPOS;
4242    MP_SIGN(div) = ZPOS;
4243  
4244    /* A working temporary for division     */
4245    MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4246  
4247    /* Normalize to optimize guessing       */
4248    MP_CHECKOK( s_mp_norm(rem, div, &d) );
4249  
4250    part = *rem;
4251  
4252    /* Perform the division itself...woo!   */
4253    MP_USED(quot) = MP_ALLOC(quot);
4254  
4255    /* Find a partial substring of rem which is at least div */
4256    /* If we didn't find one, we're finished dividing    */
4257    while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
4258      int i;
4259      int unusedRem;
4260  
4261      unusedRem = MP_USED(rem) - MP_USED(div);
4262      MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4263      MP_ALLOC(&part)  = MP_ALLOC(rem)  - unusedRem;
4264      MP_USED(&part)   = MP_USED(div);
4265      if (s_mp_cmp(&part, div) < 0) {
4266        -- unusedRem;
4267  #if MP_ARGCHK == 2
4268        assert(unusedRem >= 0);
4269  #endif
4270        -- MP_DIGITS(&part);
4271        ++ MP_USED(&part);
4272        ++ MP_ALLOC(&part);
4273      }
4274  
4275      /* Compute a guess for the next quotient digit       */
4276      q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
4277      div_msd = MP_DIGIT(div, MP_USED(div) - 1);
4278      if (q_msd >= div_msd) {
4279        q_msd = 1;
4280      } else if (MP_USED(&part) > 1) {
4281  #if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4282        q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
4283        q_msd /= div_msd;
4284        if (q_msd == RADIX)
4285          --q_msd;
4286  #else
4287        mp_digit r;
4288        MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
4289  				  div_msd, &q_msd, &r) );
4290  #endif
4291      } else {
4292        q_msd = 0;
4293      }
4294  #if MP_ARGCHK == 2
4295      assert(q_msd > 0); /* This case should never occur any more. */
4296  #endif
4297      if (q_msd <= 0)
4298        break;
4299  
4300      /* See what that multiplies out to                   */
4301      mp_copy(div, &t);
4302      MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4303  
4304      /*
4305         If it's too big, back it off.  We should not have to do this
4306         more than once, or, in rare cases, twice.  Knuth describes a
4307         method by which this could be reduced to a maximum of once, but
4308         I didn't implement that here.
4309       * When using s_mpv_div_2dx1d, we may have to do this 3 times.
4310       */
4311      for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
4312        --q_msd;
4313        s_mp_sub(&t, div);	/* t -= div */
4314      }
4315      if (i < 0) {
4316        res = MP_RANGE;
4317        goto CLEANUP;
4318      }
4319  
4320      /* At this point, q_msd should be the right next digit   */
4321      MP_CHECKOK( s_mp_sub(&part, &t) );	/* part -= t */
4322      s_mp_clamp(rem);
4323  
4324      /*
4325        Include the digit in the quotient.  We allocated enough memory
4326        for any quotient we could ever possibly get, so we should not
4327        have to check for failures here
4328       */
4329      MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4330    }
4331  
4332    /* Denormalize remainder                */
4333    if (d) {
4334      s_mp_div_2d(rem, d);
4335    }
4336  
4337    s_mp_clamp(quot);
4338  
4339  CLEANUP:
4340    mp_clear(&t);
4341  
4342    return res;
4343  
4344  } /* end s_mp_div() */
4345  
4346  
4347  /* }}} */
4348  
4349  /* {{{ s_mp_2expt(a, k) */
4350  
4351  mp_err   s_mp_2expt(mp_int *a, mp_digit k)
4352  {
4353    mp_err    res;
4354    mp_size   dig, bit;
4355  
4356    dig = k / DIGIT_BIT;
4357    bit = k % DIGIT_BIT;
4358  
4359    mp_zero(a);
4360    if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
4361      return res;
4362  
4363    DIGIT(a, dig) |= ((mp_digit)1 << bit);
4364  
4365    return MP_OKAY;
4366  
4367  } /* end s_mp_2expt() */
4368  
4369  /* }}} */
4370  
4371  /* {{{ s_mp_reduce(x, m, mu) */
4372  
4373  /*
4374    Compute Barrett reduction, x (mod m), given a precomputed value for
4375    mu = b^2k / m, where b = RADIX and k = #digits(m).  This should be
4376    faster than straight division, when many reductions by the same
4377    value of m are required (such as in modular exponentiation).  This
4378    can nearly halve the time required to do modular exponentiation,
4379    as compared to using the full integer divide to reduce.
4380  
4381    This algorithm was derived from the _Handbook of Applied
4382    Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4383    pp. 603-604.
4384   */
4385  
4386  mp_err   s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
4387  {
4388    mp_int   q;
4389    mp_err   res;
4390  
4391    if((res = mp_init_copy(&q, x)) != MP_OKAY)
4392      return res;
4393  
4394    s_mp_rshd(&q, USED(m) - 1);  /* q1 = x / b^(k-1)  */
4395    s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
4396    s_mp_rshd(&q, USED(m) + 1);  /* q3 = q2 / b^(k+1) */
4397  
4398    /* x = x mod b^(k+1), quick (no division) */
4399    s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
4400  
4401    /* q = q * m mod b^(k+1), quick (no division) */
4402    s_mp_mul(&q, m);
4403    s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
4404  
4405    /* x = x - q */
4406    if((res = mp_sub(x, &q, x)) != MP_OKAY)
4407      goto CLEANUP;
4408  
4409    /* If x < 0, add b^(k+1) to it */
4410    if(mp_cmp_z(x) < 0) {
4411      mp_set(&q, 1);
4412      if((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
4413        goto CLEANUP;
4414      if((res = mp_add(x, &q, x)) != MP_OKAY)
4415        goto CLEANUP;
4416    }
4417  
4418    /* Back off if it's too big */
4419    while(mp_cmp(x, m) >= 0) {
4420      if((res = s_mp_sub(x, m)) != MP_OKAY)
4421        break;
4422    }
4423  
4424   CLEANUP:
4425    mp_clear(&q);
4426  
4427    return res;
4428  
4429  } /* end s_mp_reduce() */
4430  
4431  /* }}} */
4432  
4433  /* }}} */
4434  
4435  /* {{{ Primitive comparisons */
4436  
4437  /* {{{ s_mp_cmp(a, b) */
4438  
4439  /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
4440  int      s_mp_cmp(const mp_int *a, const mp_int *b)
4441  {
4442    mp_size used_a = MP_USED(a);
4443    {
4444      mp_size used_b = MP_USED(b);
4445  
4446      if (used_a > used_b)
4447        goto IS_GT;
4448      if (used_a < used_b)
4449        goto IS_LT;
4450    }
4451    {
4452      mp_digit *pa, *pb;
4453      mp_digit da = 0, db = 0;
4454  
4455  #define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4456  
4457      pa = MP_DIGITS(a) + used_a;
4458      pb = MP_DIGITS(b) + used_a;
4459      while (used_a >= 4) {
4460        pa     -= 4;
4461        pb     -= 4;
4462        used_a -= 4;
4463        CMP_AB(3);
4464        CMP_AB(2);
4465        CMP_AB(1);
4466        CMP_AB(0);
4467      }
4468      while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
4469        /* do nothing */;
4470  done:
4471      if (da > db)
4472        goto IS_GT;
4473      if (da < db)
4474        goto IS_LT;
4475    }
4476    return MP_EQ;
4477  IS_LT:
4478    return MP_LT;
4479  IS_GT:
4480    return MP_GT;
4481  } /* end s_mp_cmp() */
4482  
4483  /* }}} */
4484  
4485  /* {{{ s_mp_cmp_d(a, d) */
4486  
4487  /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
4488  int      s_mp_cmp_d(const mp_int *a, mp_digit d)
4489  {
4490    if(USED(a) > 1)
4491      return MP_GT;
4492  
4493    if(DIGIT(a, 0) < d)
4494      return MP_LT;
4495    else if(DIGIT(a, 0) > d)
4496      return MP_GT;
4497    else
4498      return MP_EQ;
4499  
4500  } /* end s_mp_cmp_d() */
4501  
4502  /* }}} */
4503  
4504  /* {{{ s_mp_ispow2(v) */
4505  
4506  /*
4507    Returns -1 if the value is not a power of two; otherwise, it returns
4508    k such that v = 2^k, i.e. lg(v).
4509   */
4510  int      s_mp_ispow2(const mp_int *v)
4511  {
4512    mp_digit d;
4513    int      extra = 0, ix;
4514  
4515    ix = MP_USED(v) - 1;
4516    d = MP_DIGIT(v, ix); /* most significant digit of v */
4517  
4518    extra = s_mp_ispow2d(d);
4519    if (extra < 0 || ix == 0)
4520      return extra;
4521  
4522    while (--ix >= 0) {
4523      if (DIGIT(v, ix) != 0)
4524        return -1; /* not a power of two */
4525      extra += MP_DIGIT_BIT;
4526    }
4527  
4528    return extra;
4529  
4530  } /* end s_mp_ispow2() */
4531  
4532  /* }}} */
4533  
4534  /* {{{ s_mp_ispow2d(d) */
4535  
4536  int      s_mp_ispow2d(mp_digit d)
4537  {
4538    if ((d != 0) && ((d & (d-1)) == 0)) { /* d is a power of 2 */
4539      int pow = 0;
4540  #if defined (MP_USE_UINT_DIGIT)
4541      if (d & 0xffff0000U)
4542        pow += 16;
4543      if (d & 0xff00ff00U)
4544        pow += 8;
4545      if (d & 0xf0f0f0f0U)
4546        pow += 4;
4547      if (d & 0xccccccccU)
4548        pow += 2;
4549      if (d & 0xaaaaaaaaU)
4550        pow += 1;
4551  #elif defined(MP_USE_LONG_LONG_DIGIT)
4552      if (d & 0xffffffff00000000ULL)
4553        pow += 32;
4554      if (d & 0xffff0000ffff0000ULL)
4555        pow += 16;
4556      if (d & 0xff00ff00ff00ff00ULL)
4557        pow += 8;
4558      if (d & 0xf0f0f0f0f0f0f0f0ULL)
4559        pow += 4;
4560      if (d & 0xccccccccccccccccULL)
4561        pow += 2;
4562      if (d & 0xaaaaaaaaaaaaaaaaULL)
4563        pow += 1;
4564  #elif defined(MP_USE_LONG_DIGIT)
4565      if (d & 0xffffffff00000000UL)
4566        pow += 32;
4567      if (d & 0xffff0000ffff0000UL)
4568        pow += 16;
4569      if (d & 0xff00ff00ff00ff00UL)
4570        pow += 8;
4571      if (d & 0xf0f0f0f0f0f0f0f0UL)
4572        pow += 4;
4573      if (d & 0xccccccccccccccccUL)
4574        pow += 2;
4575      if (d & 0xaaaaaaaaaaaaaaaaUL)
4576        pow += 1;
4577  #else
4578  #error "unknown type for mp_digit"
4579  #endif
4580      return pow;
4581    }
4582    return -1;
4583  
4584  } /* end s_mp_ispow2d() */
4585  
4586  /* }}} */
4587  
4588  /* }}} */
4589  
4590  /* {{{ Primitive I/O helpers */
4591  
4592  /* {{{ s_mp_tovalue(ch, r) */
4593  
4594  /*
4595    Convert the given character to its digit value, in the given radix.
4596    If the given character is not understood in the given radix, -1 is
4597    returned.  Otherwise the digit's numeric value is returned.
4598  
4599    The results will be odd if you use a radix < 2 or > 62, you are
4600    expected to know what you're up to.
4601   */
4602  int      s_mp_tovalue(char ch, int r)
4603  {
4604    int    val, xch;
4605  
4606    if(r > 36)
4607      xch = ch;
4608    else
4609      xch = toupper(ch);
4610  
4611    if(isdigit(xch))
4612      val = xch - '0';
4613    else if(isupper(xch))
4614      val = xch - 'A' + 10;
4615    else if(islower(xch))
4616      val = xch - 'a' + 36;
4617    else if(xch == '+')
4618      val = 62;
4619    else if(xch == '/')
4620      val = 63;
4621    else
4622      return -1;
4623  
4624    if(val < 0 || val >= r)
4625      return -1;
4626  
4627    return val;
4628  
4629  } /* end s_mp_tovalue() */
4630  
4631  /* }}} */
4632  
4633  /* {{{ s_mp_todigit(val, r, low) */
4634  
4635  /*
4636    Convert val to a radix-r digit, if possible.  If val is out of range
4637    for r, returns zero.  Otherwise, returns an ASCII character denoting
4638    the value in the given radix.
4639  
4640    The results may be odd if you use a radix < 2 or > 64, you are
4641    expected to know what you're doing.
4642   */
4643  
4644  char     s_mp_todigit(mp_digit val, int r, int low)
4645  {
4646    char   ch;
4647  
4648    if(val >= r)
4649      return 0;
4650  
4651    ch = s_dmap_1[val];
4652  
4653    if(r <= 36 && low)
4654      ch = tolower(ch);
4655  
4656    return ch;
4657  
4658  } /* end s_mp_todigit() */
4659  
4660  /* }}} */
4661  
4662  /* {{{ s_mp_outlen(bits, radix) */
4663  
4664  /*
4665     Return an estimate for how long a string is needed to hold a radix
4666     r representation of a number with 'bits' significant bits, plus an
4667     extra for a zero terminator (assuming C style strings here)
4668   */
4669  int      s_mp_outlen(int bits, int r)
4670  {
4671    return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
4672  
4673  } /* end s_mp_outlen() */
4674  
4675  /* }}} */
4676  
4677  /* }}} */
4678  
4679  /* {{{ mp_read_unsigned_octets(mp, str, len) */
4680  /* mp_read_unsigned_octets(mp, str, len)
4681     Read in a raw value (base 256) into the given mp_int
4682     No sign bit, number is positive.  Leading zeros ignored.
4683   */
4684  
4685  mp_err
4686  mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
4687  {
4688    int            count;
4689    mp_err         res;
4690    mp_digit       d;
4691  
4692    ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
4693  
4694    mp_zero(mp);
4695  
4696    count = len % sizeof(mp_digit);
4697    if (count) {
4698      for (d = 0; count-- > 0; --len) {
4699        d = (d << 8) | *str++;
4700      }
4701      MP_DIGIT(mp, 0) = d;
4702    }
4703  
4704    /* Read the rest of the digits */
4705    for(; len > 0; len -= sizeof(mp_digit)) {
4706      for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
4707        d = (d << 8) | *str++;
4708      }
4709      if (MP_EQ == mp_cmp_z(mp)) {
4710        if (!d)
4711  	continue;
4712      } else {
4713        if((res = s_mp_lshd(mp, 1)) != MP_OKAY)
4714  	return res;
4715      }
4716      MP_DIGIT(mp, 0) = d;
4717    }
4718    return MP_OKAY;
4719  } /* end mp_read_unsigned_octets() */
4720  /* }}} */
4721  
4722  /* {{{ mp_unsigned_octet_size(mp) */
4723  int
4724  mp_unsigned_octet_size(const mp_int *mp)
4725  {
4726    int  bytes;
4727    int  ix;
4728    mp_digit  d = 0;
4729  
4730    ARGCHK(mp != NULL, MP_BADARG);
4731    ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4732  
4733    bytes = (USED(mp) * sizeof(mp_digit));
4734  
4735    /* subtract leading zeros. */
4736    /* Iterate over each digit... */
4737    for(ix = USED(mp) - 1; ix >= 0; ix--) {
4738      d = DIGIT(mp, ix);
4739      if (d)
4740  	break;
4741      bytes -= sizeof(d);
4742    }
4743    if (!bytes)
4744      return 1;
4745  
4746    /* Have MSD, check digit bytes, high order first */
4747    for(ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
4748      unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4749      if (x)
4750  	break;
4751      --bytes;
4752    }
4753    return bytes;
4754  } /* end mp_unsigned_octet_size() */
4755  /* }}} */
4756  
4757  /* {{{ mp_to_unsigned_octets(mp, str) */
4758  /* output a buffer of big endian octets no longer than specified. */
4759  mp_err
4760  mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4761  {
4762    int  ix, pos = 0;
4763    int  bytes;
4764  
4765    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4766  
4767    bytes = mp_unsigned_octet_size(mp);
4768    ARGCHK(bytes <= maxlen, MP_BADARG);
4769  
4770    /* Iterate over each digit... */
4771    for(ix = USED(mp) - 1; ix >= 0; ix--) {
4772      mp_digit  d = DIGIT(mp, ix);
4773      int       jx;
4774  
4775      /* Unpack digit bytes, high order first */
4776      for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4777        unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4778        if (!pos && !x)	/* suppress leading zeros */
4779  	continue;
4780        str[pos++] = x;
4781      }
4782    }
4783    if (!pos)
4784      str[pos++] = 0;
4785    return pos;
4786  } /* end mp_to_unsigned_octets() */
4787  /* }}} */
4788  
4789  /* {{{ mp_to_signed_octets(mp, str) */
4790  /* output a buffer of big endian octets no longer than specified. */
4791  mp_err
4792  mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
4793  {
4794    int  ix, pos = 0;
4795    int  bytes;
4796  
4797    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4798  
4799    bytes = mp_unsigned_octet_size(mp);
4800    ARGCHK(bytes <= maxlen, MP_BADARG);
4801  
4802    /* Iterate over each digit... */
4803    for(ix = USED(mp) - 1; ix >= 0; ix--) {
4804      mp_digit  d = DIGIT(mp, ix);
4805      int       jx;
4806  
4807      /* Unpack digit bytes, high order first */
4808      for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4809        unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4810        if (!pos) {
4811  	if (!x)		/* suppress leading zeros */
4812  	  continue;
4813  	if (x & 0x80) { /* add one leading zero to make output positive.  */
4814  	  ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
4815  	  if (bytes + 1 > maxlen)
4816  	    return MP_BADARG;
4817  	  str[pos++] = 0;
4818  	}
4819        }
4820        str[pos++] = x;
4821      }
4822    }
4823    if (!pos)
4824      str[pos++] = 0;
4825    return pos;
4826  } /* end mp_to_signed_octets() */
4827  /* }}} */
4828  
4829  /* {{{ mp_to_fixlen_octets(mp, str) */
4830  /* output a buffer of big endian octets exactly as long as requested. */
4831  mp_err
4832  mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
4833  {
4834    int  ix, pos = 0;
4835    int  bytes;
4836  
4837    ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4838  
4839    bytes = mp_unsigned_octet_size(mp);
4840    ARGCHK(bytes <= length, MP_BADARG);
4841  
4842    /* place any needed leading zeros */
4843    for (;length > bytes; --length) {
4844  	*str++ = 0;
4845    }
4846  
4847    /* Iterate over each digit... */
4848    for(ix = USED(mp) - 1; ix >= 0; ix--) {
4849      mp_digit  d = DIGIT(mp, ix);
4850      int       jx;
4851  
4852      /* Unpack digit bytes, high order first */
4853      for(jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
4854        unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4855        if (!pos && !x)	/* suppress leading zeros */
4856  	continue;
4857        str[pos++] = x;
4858      }
4859    }
4860    if (!pos)
4861      str[pos++] = 0;
4862    return MP_OKAY;
4863  } /* end mp_to_fixlen_octets() */
4864  /* }}} */
4865  
4866  
4867  /*------------------------------------------------------------------------*/
4868  /* HERE THERE BE DRAGONS                                                  */
4869  /* END CSTYLED */
4870