xref: /titanic_50/usr/src/common/bignum/bignumimpl.c (revision 90f050286227cf4c4f8aa425555d04723d331d48)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #define	big_div_pos_fast big_div_pos
30 
31 #include "bignum.h"
32 
33 /*
34  * Configuration guide
35  * -------------------
36  *
37  * There are 4 preprocessor symbols used to configure the bignum
38  * implementation.  This file contains no logic to configure based on
39  * processor; we leave that to the Makefiles to specify.
40  *
41  * USE_FLOATING_POINT
42  *   Meaning: There is support for a fast floating-point implementation of
43  *   Montgomery multiply.
44  *
45  * PSR_MUL
46  *   Meaning: There are processor-specific versions of the low level
47  *   functions to implement big_mul.  Those functions are: big_mul_set_vec,
48  *   big_mul_add_vec, big_mul_vec, and big_sqr_vec.  PSR_MUL implies support
49  *   for all 4 functions.  You cannot pick and choose which subset of these
50  *   functions to support; that would lead to a rat's nest of #ifdefs.
51  *
52  * HWCAP
53  *   Meaning: Call multiply support functions through a function pointer.
54  *   On x86, there are multiple implementations for differnt hardware
55  *   capabilities, such as MMX, SSE2, etc.  Tests are made at run-time, when
56  *   a function is first used.  So, the support functions are called through
57  *   a function pointer.  There is no need for that on Sparc, because there
58  *   is only one implementation; support functions are called directly.
59  *   Later, if there were some new VIS instruction, or something, and a
60  *   run-time test were needed, rather than variant kernel modules and
61  *   libraries, then HWCAP would be defined for Sparc, as well.
62  *
63  * UMUL64
64  *   Meaning: It is safe to use generic C code that assumes the existence
65  *   of a 32 x 32 --> 64 bit unsigned multiply.  If this is not defined,
66  *   then the generic code for big_mul_add_vec() must necessarily be very slow,
67  *   because it must fall back to using 16 x 16 --> 32 bit multiplication.
68  *
69  */
70 
71 
72 #ifdef	_KERNEL
73 
74 #include <sys/types.h>
75 #include <sys/kmem.h>
76 #include <sys/param.h>
77 #include <sys/sunddi.h>
78 
79 #define	big_malloc(size)	kmem_alloc(size, KM_NOSLEEP)
80 #define	big_free(ptr, size)	kmem_free(ptr, size)
81 
82 void *
83 big_realloc(void *from, size_t oldsize, size_t newsize)
84 {
85 	void *rv;
86 
87 	rv = kmem_alloc(newsize, KM_NOSLEEP);
88 	if (rv != NULL)
89 		bcopy(from, rv, oldsize);
90 	kmem_free(from, oldsize);
91 	return (rv);
92 }
93 
94 #else	/* _KERNEL */
95 
96 #include <stdlib.h>
97 #include <stdio.h>
98 
99 #ifndef MALLOC_DEBUG
100 
101 #define	big_malloc(size)	malloc(size)
102 #define	big_free(ptr, size)	free(ptr)
103 
104 #else
105 
106 void
107 big_free(void *ptr, size_t size)
108 {
109 	printf("freed %d bytes at %p\n", size, ptr);
110 	free(ptr);
111 }
112 
113 void *
114 big_malloc(size_t size)
115 {
116 	void *rv;
117 	rv = malloc(size);
118 	printf("malloced %d bytes, addr:%p\n", size, rv);
119 	return (rv);
120 }
121 #endif /* MALLOC_DEBUG */
122 
123 #define	big_realloc(x, y, z) realloc((x), (z))
124 
125 void
126 printbignum(char *aname, BIGNUM *a)
127 {
128 	int i;
129 
130 	(void) printf("\n%s\n%d\n", aname, a->sign*a->len);
131 	for (i = a->len - 1; i >= 0; i--) {
132 		(void) printf("%08x ", a->value[i]);
133 		if ((i % 8 == 0) && (i != 0))
134 		    (void) printf("\n");
135 	}
136 	(void) printf("\n");
137 }
138 
139 #endif	/* _KERNEL */
140 
141 
142 /* size in 32-bit words */
143 BIG_ERR_CODE
144 big_init(BIGNUM *number, int size)
145 {
146 	number->value = big_malloc(sizeof (uint32_t) * size);
147 	if (number->value == NULL) {
148 		return (BIG_NO_MEM);
149 	}
150 	number->size = size;
151 	number->len = 0;
152 	number->sign = 1;
153 	number->malloced = 1;
154 	return (BIG_OK);
155 }
156 
157 /* size in 32-bit words */
158 BIG_ERR_CODE
159 big_init1(BIGNUM *number, int size, uint32_t *buf, int bufsize)
160 {
161 	if ((buf == NULL) || (size > bufsize)) {
162 		number->value = big_malloc(sizeof (uint32_t) * size);
163 		if (number->value == NULL) {
164 			return (BIG_NO_MEM);
165 		}
166 		number->size = size;
167 		number->malloced = 1;
168 	} else {
169 		number->value = buf;
170 		number->size = bufsize;
171 		number->malloced = 0;
172 	}
173 		number->len = 0;
174 		number->sign = 1;
175 
176 	return (BIG_OK);
177 }
178 
179 void
180 big_finish(BIGNUM *number)
181 {
182 	if (number->malloced == 1) {
183 		big_free(number->value, sizeof (uint32_t) * number->size);
184 		number->malloced = 0;
185 	}
186 }
187 
188 /*
189  *  bn->size should be at least (len + 3) / 4
190  * converts from byte-big-endian format to bignum format (words in
191  * little endian order, but bytes within the words big endian)
192  */
193 void
194 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
195 {
196 	int		i, j, offs;
197 	uint32_t	word;
198 	uchar_t		*knwordp;
199 
200 #ifdef	_LP64
201 	/* LINTED */
202 	offs = (uint32_t)len % sizeof (uint32_t);
203 	/* LINTED */
204 	bn->len = (uint32_t)len / sizeof (uint32_t);
205 	/* LINTED */
206 	for (i = 0; i < (uint32_t)len / sizeof (uint32_t); i++) {
207 #else	/* !_LP64 */
208 	offs = len % sizeof (uint32_t);
209 	bn->len = len / sizeof (uint32_t);
210 	for (i = 0; i < len / sizeof (uint32_t); i++) {
211 #endif	/* _LP64 */
212 		knwordp = &(kn[len - sizeof (uint32_t) * (i + 1)]);
213 		word = knwordp[0];
214 		for (j = 1; j < sizeof (uint32_t); j++) {
215 			word = (word << 8)+ knwordp[j];
216 		}
217 		bn->value[i] = word;
218 	}
219 	if (offs > 0) {
220 		word = kn[0];
221 		for (i = 1; i < offs; i++) word = (word << 8) + kn[i];
222 		bn->value[bn->len++] = word;
223 	}
224 	while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) {
225 		bn->len --;
226 	}
227 }
228 
229 /*
230  * copies the least significant len bytes if
231  * len < bn->len * sizeof (uint32_t)
232  * converts from bignum format to byte-big-endian format.
233  * bignum format is words in little endian order,
234  * but bytes within words in native byte order.
235  */
236 void
237 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
238 {
239 	int		i, j, offs;
240 	uint32_t	word;
241 
242 	if (len < sizeof (uint32_t) * bn->len) {
243 #ifdef	_LP64
244 		/* LINTED */
245 		for (i = 0; i < (uint32_t)len / sizeof (uint32_t); i++) {
246 #else	/* !_LP64 */
247 		for (i = 0; i < len / sizeof (uint32_t); i++) {
248 #endif	/* _LP64 */
249 			word = bn->value[i];
250 			for (j = 0; j < sizeof (uint32_t); j++) {
251 				kn[len - sizeof (uint32_t) * i - j - 1] =
252 				    word & 0xff;
253 				word = word >> 8;
254 			}
255 		}
256 #ifdef	_LP64
257 		/* LINTED */
258 		offs = (uint32_t)len % sizeof (uint32_t);
259 #else	/* !_LP64 */
260 		offs = len % sizeof (uint32_t);
261 #endif	/* _LP64 */
262 		if (offs > 0) {
263 			word = bn->value[len / sizeof (uint32_t)];
264 #ifdef	_LP64
265 			    /* LINTED */
266 			    for (i =  (uint32_t)len % sizeof (uint32_t);
267 				i > 0; i --) {
268 #else	/* !_LP64 */
269 			    for (i = len % sizeof (uint32_t); i > 0; i --) {
270 #endif	/* _LP64 */
271 				    kn[i - 1] = word & 0xff;
272 				    word = word >> 8;
273 			    }
274 		}
275 	} else {
276 		for (i = 0; i < bn->len; i++) {
277 			word = bn->value[i];
278 			for (j = 0; j < sizeof (uint32_t); j++) {
279 				kn[len - sizeof (uint32_t) * i - j - 1] =
280 				    word & 0xff;
281 				word = word >> 8;
282 			}
283 		}
284 #ifdef	_LP64
285 		/* LINTED */
286 		for (i = 0; i < (uint32_t)len - sizeof (uint32_t) * bn->len;
287 		    i++) {
288 #else	/* !_LP64 */
289 		for (i = 0; i < len - sizeof (uint32_t) * bn->len; i++) {
290 #endif	/* _LP64 */
291 			kn[i] = 0;
292 		}
293 	}
294 }
295 
296 
297 int
298 big_bitlength(BIGNUM *a)
299 {
300 	int		l, b;
301 	uint32_t	c;
302 
303 	l = a->len - 1;
304 	while ((l > 0) && (a->value[l] == 0)) {
305 		l--;
306 	}
307 	b = sizeof (uint32_t) * 8;
308 	c = a->value[l];
309 	while ((b > 1) && ((c & 0x80000000) == 0)) {
310 		c = c << 1;
311 		b--;
312 	}
313 	return (l * (int)sizeof (uint32_t) * 8 + b);
314 }
315 
316 
317 BIG_ERR_CODE
318 big_copy(BIGNUM *dest, BIGNUM *src)
319 {
320 	uint32_t *newptr;
321 	int i, len;
322 
323 	len = src->len;
324 	while ((len > 1) && (src->value[len - 1] == 0))
325 		len--;
326 	src->len = len;
327 	if (dest->size < len) {
328 		if (dest->malloced == 1) {
329 			newptr = (uint32_t *)big_realloc(dest->value,
330 			    sizeof (uint32_t) * dest->size,
331 			    sizeof (uint32_t) * len);
332 		} else {
333 			newptr = (uint32_t *)
334 			    big_malloc(sizeof (uint32_t) * len);
335 			if (newptr != NULL) dest->malloced = 1;
336 		}
337 		if (newptr == NULL)
338 			return (BIG_NO_MEM);
339 		dest->value = newptr;
340 		dest->size = len;
341 	}
342 	dest->len = len;
343 	dest->sign = src->sign;
344 	for (i = 0; i < len; i++) dest->value[i] = src->value[i];
345 
346 	return (BIG_OK);
347 }
348 
349 
350 BIG_ERR_CODE
351 big_extend(BIGNUM *number, int size)
352 {
353 	uint32_t	*newptr;
354 	int		i;
355 
356 	if (number->size >= size)
357 		return (BIG_OK);
358 	if (number->malloced) {
359 		number->value =
360 		    big_realloc(number->value,
361 			sizeof (uint32_t) * number->size,
362 			sizeof (uint32_t) * size);
363 	} else {
364 		newptr = big_malloc(sizeof (uint32_t) * size);
365 		if (newptr != NULL) {
366 			for (i = 0; i < number->size; i++) {
367 				newptr[i] = number->value[i];
368 			}
369 		}
370 		number->value = newptr;
371 	}
372 
373 	if (number->value == NULL)
374 		return (BIG_NO_MEM);
375 
376 	number->size = size;
377 	number->malloced = 1;
378 	return (BIG_OK);
379 }
380 
381 
382 int
383 big_is_zero(BIGNUM *n)
384 {
385 	int i, result;
386 
387 	result = 1;
388 	for (i = 0; i < n->len; i++)
389 		if (n->value[i] != 0) result = 0;
390 	return (result);
391 }
392 
393 
394 BIG_ERR_CODE
395 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
396 {
397 	int i, shorter, longer;
398 	uint32_t cy, ai;
399 	uint32_t *r, *a, *b, *c;
400 	BIG_ERR_CODE err;
401 
402 	if (aa->len > bb->len) {
403 		shorter = bb->len;
404 		longer = aa->len;
405 		c = aa->value;
406 	} else {
407 		shorter = aa->len;
408 		longer = bb->len;
409 		c = bb->value;
410 	}
411 	if (result->size < longer + 1) {
412 		err = big_extend(result, longer + 1);
413 		if (err != BIG_OK)
414 			return (err);
415 	}
416 
417 	r = result->value;
418 	a = aa->value;
419 	b = bb->value;
420 	cy = 0;
421 	for (i = 0; i < shorter; i++) {
422 		ai = a[i];
423 		r[i] = ai + b[i] + cy;
424 		if (r[i] > ai) cy = 0;
425 		else if (r[i] < ai) cy = 1;
426 	}
427 	for (; i < longer; i++) {
428 		ai = c[i];
429 		r[i] = ai + cy;
430 		if (r[i] >= ai) cy = 0;
431 	}
432 	if (cy == 1) {
433 		r[i] = cy;
434 		result->len = longer + 1;
435 	} else {
436 		result->len = longer;
437 	}
438 	result->sign = 1;
439 	return (BIG_OK);
440 }
441 
442 
443 /* caller must make sure that result has at least len words allocated */
444 void
445 big_sub_vec(uint32_t *r, uint32_t *a, uint32_t *b, int len)
446 {
447 	int i;
448 	uint32_t cy, ai;
449 
450 	cy = 1;
451 	for (i = 0; i < len; i++) {
452 		ai = a[i];
453 		r[i] = ai + (~b[i]) + cy;
454 		if (r[i] > ai) cy = 0;
455 		else if (r[i] < ai) cy = 1;
456 	}
457 }
458 
459 
460 /* result=aa-bb  it is assumed that aa>=bb */
461 BIG_ERR_CODE
462 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
463 {
464 	int i, shorter;
465 	uint32_t cy, ai;
466 	uint32_t *r, *a, *b;
467 	BIG_ERR_CODE err;
468 
469 	if (aa->len > bb->len) shorter = bb->len;
470 	else shorter = aa->len;
471 	if (result->size < aa->len) {
472 		err = big_extend(result, aa->len);
473 		if (err != BIG_OK)
474 			return (err);
475 	}
476 
477 	r = result->value;
478 	a = aa->value;
479 	b = bb->value;
480 	result->len = aa->len;
481 	cy = 1;
482 	for (i = 0; i < shorter; i++) {
483 		ai = a[i];
484 		r[i] = ai + (~b[i]) + cy;
485 		if (r[i] > ai) cy = 0;
486 		else if (r[i] < ai) cy = 1;
487 	}
488 	for (; i < aa->len; i++) {
489 		ai = a[i];
490 		r[i] = ai + (~0) + cy;
491 		if (r[i] < ai) cy = 1;
492 	}
493 	result->sign = 1;
494 	if (cy == 0)
495 		return (BIG_INVALID_ARGS);
496 	else
497 		return (BIG_OK);
498 }
499 
500 
501 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
502 int
503 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
504 {
505 	int i;
506 
507 	if (aa->len > bb->len) {
508 		for (i = aa->len - 1; i > bb->len - 1; i--) {
509 			if (aa->value[i] > 0)
510 				return (1);
511 		}
512 	} else if (aa->len < bb->len) {
513 		for (i = bb->len - 1; i > aa->len - 1; i--) {
514 			if (bb->value[i] > 0)
515 				return (-1);
516 		}
517 	} else i = aa->len-1;
518 	for (; i >= 0; i--) {
519 		if (aa->value[i] > bb->value[i])
520 			return (1);
521 		else if (aa->value[i] < bb->value[i])
522 			return (-1);
523 	}
524 
525 	return (0);
526 }
527 
528 
529 BIG_ERR_CODE
530 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
531 {
532 	BIG_ERR_CODE err;
533 
534 	if ((bb->sign == -1) && (aa->sign == 1)) {
535 		if ((err = big_add_abs(result, aa, bb)) != BIG_OK)
536 			return (err);
537 		result->sign = 1;
538 	} else if ((aa->sign == -1) && (bb->sign == 1)) {
539 		if ((err = big_add_abs(result, aa, bb)) != BIG_OK)
540 			return (err);
541 		result->sign = -1;
542 	} else if ((aa->sign == 1) && (bb->sign == 1)) {
543 		if (big_cmp_abs(aa, bb) >= 0) {
544 			if ((err = big_sub_pos(result, aa, bb)) != BIG_OK)
545 				return (err);
546 			result->sign = 1;
547 		} else {
548 			if ((err = big_sub_pos(result, bb, aa)) != BIG_OK)
549 				return (err);
550 			result->sign = -1;
551 		}
552 	} else {
553 		if (big_cmp_abs(aa, bb) >= 0) {
554 			if ((err = big_sub_pos(result, aa, bb)) != BIG_OK)
555 				return (err);
556 			result->sign = -1;
557 		} else {
558 			if ((err = big_sub_pos(result, bb, aa)) != BIG_OK)
559 				return (err);
560 			result->sign = 1;
561 		}
562 	}
563 	return (BIG_OK);
564 }
565 
566 
567 
568 BIG_ERR_CODE
569 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
570 {
571 	BIG_ERR_CODE err;
572 
573 	if ((bb->sign == -1) && (aa->sign == -1)) {
574 		if ((err = big_add_abs(result, aa, bb)) != BIG_OK)
575 			return (err);
576 		result->sign = -1;
577 	} else if ((aa->sign == 1) && (bb->sign == 1)) {
578 		if ((err = big_add_abs(result, aa, bb)) != BIG_OK)
579 			return (err);
580 		result->sign = 1;
581 	} else if ((aa->sign == 1) && (bb->sign == -1)) {
582 		if (big_cmp_abs(aa, bb) >= 0) {
583 			if ((err = big_sub_pos(result, aa, bb)) != BIG_OK)
584 				return (err);
585 			result->sign = 1;
586 		} else {
587 			if ((err = big_sub_pos(result, bb, aa)) != BIG_OK)
588 				return (err);
589 			result->sign = -1;
590 		}
591 	} else {
592 		if (big_cmp_abs(aa, bb) >= 0) {
593 			if ((err = big_sub_pos(result, aa, bb)) != BIG_OK)
594 				return (err);
595 			result->sign = -1;
596 		} else {
597 			if ((err = big_sub_pos(result, bb, aa)) != BIG_OK)
598 				return (err);
599 			result->sign = 1;
600 		}
601 	}
602 	return (BIG_OK);
603 }
604 
605 
606 /* result = aa/2 aa must be positive */
607 BIG_ERR_CODE
608 big_half_pos(BIGNUM *result, BIGNUM *aa)
609 {
610 	BIG_ERR_CODE err;
611 	int i;
612 	uint32_t cy, cy1;
613 	uint32_t *a, *r;
614 
615 	if (result->size < aa->len) {
616 		err = big_extend(result, aa->len);
617 		if (err != BIG_OK)
618 			return (err);
619 	}
620 
621 	result->len = aa->len;
622 	a = aa->value;
623 	r = result->value;
624 	cy = 0;
625 	for (i = aa->len-1; i >= 0; i--) {
626 		cy1 = a[i] << 31;
627 		r[i] = (cy|(a[i] >> 1));
628 		cy = cy1;
629 	}
630 	if (r[result->len-1] == 0) result->len--;
631 	return (BIG_OK);
632 }
633 
634 /* result  =  aa*2 aa must be positive */
635 BIG_ERR_CODE
636 big_double(BIGNUM *result, BIGNUM *aa)
637 {
638 	BIG_ERR_CODE err;
639 	int i, rsize;
640 	uint32_t cy, cy1;
641 	uint32_t *a, *r;
642 
643 	if ((aa->len > 0) && ((aa->value[aa->len - 1] & 0x80000000) != 0))
644 		rsize = aa->len + 1;
645 	else rsize = aa->len;
646 
647 	if (result->size < rsize) {
648 		err = big_extend(result, rsize);
649 		if (err != BIG_OK)
650 			return (err);
651 	}
652 
653 	a = aa->value;
654 	r = result->value;
655 	if (rsize == aa->len + 1) r[rsize - 1] = 1;
656 	cy = 0;
657 	for (i = 0; i < aa->len; i++) {
658 		cy1 = a[i] >> 31;
659 		r[i] = (cy | (a[i] << 1));
660 		cy = cy1;
661 	}
662 	result->len = rsize;
663 	return (BIG_OK);
664 }
665 
666 /* returns aa mod b, aa must be nonneg, b must be a max 16-bit integer */
667 uint32_t
668 big_mod16_pos(BIGNUM *aa, uint32_t b)
669 {
670 	int i;
671 	uint32_t rem;
672 
673 	if (aa->len == 0)
674 		return (0);
675 	rem = aa->value[aa->len - 1] % b;
676 	for (i = aa->len - 2; i >= 0; i--) {
677 		rem = ((rem << 16) | (aa->value[i] >> 16)) % b;
678 		rem = ((rem << 16) | (aa->value[i] & 0xffff)) % b;
679 	}
680 	return (rem);
681 }
682 
683 
684 /*
685  * result = aa - (2^32)^lendiff * bb
686  * result->size should be at least aa->len at entry
687  * aa, bb, and result should be positive
688  */
689 void
690 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
691 {
692 	int i, lendiff;
693 	BIGNUM res1, aa1;
694 
695 	lendiff = aa->len - bb->len;
696 	res1.size = result->size - lendiff;
697 	res1.malloced = 0;
698 	res1.value = result->value + lendiff;
699 	aa1.size = aa->size - lendiff;
700 	aa1.value = aa->value + lendiff;
701 	aa1.len = bb->len;
702 	aa1.sign = 1;
703 	(void) big_sub_pos(&res1, &aa1, bb);
704 	if (result->value != aa->value) {
705 		for (i = 0; i < lendiff; i++) {
706 			result->value[i] = aa->value[i];
707 		}
708 	}
709 	result->len = aa->len;
710 }
711 
712 
713 /*
714  * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
715  *							(2^32)^lendiff * |bb|
716  * aa->len should be >= bb->len
717  */
718 int
719 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
720 {
721 	int lendiff;
722 	BIGNUM aa1;
723 
724 	lendiff = aa->len - bb->len;
725 	aa1.len = bb->len;
726 	aa1.size = aa->size - lendiff;
727 	aa1.malloced = 0;
728 	aa1.value = aa->value + lendiff;
729 	return (big_cmp_abs(&aa1, bb));
730 }
731 
732 
733 /*
734  * result = aa * b where b is a max. 16-bit positive integer.
735  * result should have enough space allocated.
736  */
737 void
738 big_mul16_low(BIGNUM *result, BIGNUM *aa, uint32_t b)
739 {
740 	int i;
741 	uint32_t t1, t2, ai, cy;
742 	uint32_t *a, *r;
743 
744 	a = aa->value;
745 	r = result->value;
746 	cy = 0;
747 	for (i = 0; i < aa->len; i++) {
748 		ai = a[i];
749 		t1 = (ai & 0xffff) * b + cy;
750 		t2 = (ai >> 16) * b + (t1 >> 16);
751 		r[i] = (t1 & 0xffff) | (t2 << 16);
752 		cy = t2 >> 16;
753 	}
754 	r[i] = cy;
755 	result->len = aa->len + 1;
756 	result->sign = aa->sign;
757 }
758 
759 
760 /*
761  * result = aa * b * 2^16 where b is a max. 16-bit positive integer.
762  * result should have enough space allocated.
763  */
764 void
765 big_mul16_high(BIGNUM *result, BIGNUM *aa, uint32_t b)
766 {
767 	int i;
768 	uint32_t t1, t2, ai, cy, ri;
769 	uint32_t *a, *r;
770 
771 	a = aa->value;
772 	r = result->value;
773 	cy = 0;
774 	ri = 0;
775 	for (i = 0; i < aa->len; i++) {
776 		ai = a[i];
777 		t1 = (ai & 0xffff) * b + cy;
778 		t2 = (ai >> 16) * b + (t1 >> 16);
779 		r[i] = (t1 << 16) + ri;
780 		ri = t2 & 0xffff;
781 		cy = t2 >> 16;
782 	}
783 	r[i] = (cy << 16) + ri;
784 	result->len = aa->len + 1;
785 	result->sign = aa->sign;
786 }
787 
788 /* it is assumed that result->size is big enough */
789 void
790 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
791 {
792 	int i;
793 	uint32_t cy, ai;
794 
795 	if (offs == 0) {
796 		if (result != aa) {
797 			(void) big_copy(result, aa);
798 		}
799 		return;
800 	}
801 	cy = 0;
802 	for (i = 0; i < aa->len; i++) {
803 		ai = aa->value[i];
804 		result->value[i] = (ai << offs) | cy;
805 		cy = ai >> (32 - offs);
806 	}
807 	if (cy != 0) {
808 		result->len = aa->len + 1;
809 		result->value[result->len - 1] = cy;
810 	} else {
811 		result->len = aa->len;
812 	}
813 	result->sign = aa->sign;
814 }
815 
816 /* it is assumed that result->size is big enough */
817 void
818 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
819 {
820 	int i;
821 	uint32_t cy, ai;
822 
823 	if (offs == 0) {
824 		if (result != aa) {
825 			(void) big_copy(result, aa);
826 		}
827 		return;
828 	}
829 	cy = aa->value[0] >> offs;
830 	for (i = 1; i < aa->len; i++) {
831 		ai = aa->value[i];
832 		result->value[i-1] = (ai << (32 - offs)) | cy;
833 		cy = ai >> offs;
834 	}
835 	result->len = aa->len;
836 	result->value[result->len - 1] = cy;
837 	result->sign = aa->sign;
838 }
839 
840 
841 /*
842  * result = aa/bb   remainder = aa mod bb
843  * it is assumed that aa and bb are positive
844  */
845 BIG_ERR_CODE
846 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
847 {
848 	BIG_ERR_CODE err;
849 	int i, alen, blen, tlen, rlen, offs;
850 	uint32_t higha, highb, coeff;
851 	uint64_t highb64;
852 	uint32_t *a, *b;
853 	BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
854 	uint32_t tmp1value[BIGTMPSIZE];
855 	uint32_t tmp2value[BIGTMPSIZE];
856 	uint32_t tresultvalue[BIGTMPSIZE];
857 	uint32_t bblowvalue[BIGTMPSIZE];
858 	uint32_t bbhighvalue[BIGTMPSIZE];
859 
860 	a = aa->value;
861 	b = bb->value;
862 	alen = aa->len;
863 	blen = bb->len;
864 	while ((alen > 1) && (a[alen - 1] == 0)) alen = alen - 1;
865 	aa->len = alen;
866 	while ((blen > 1) && (b[blen - 1] == 0)) blen = blen - 1;
867 	bb->len = blen;
868 	if ((blen == 1) && (b[0] == 0))
869 		return (BIG_DIV_BY_0);
870 
871 	if (big_cmp_abs(aa, bb) < 0) {
872 		if ((remainder != NULL) &&
873 		    ((err = big_copy(remainder, aa)) != BIG_OK))
874 			return (err);
875 		if (result != NULL) {
876 			result->len = 1;
877 			result->sign = 1;
878 			result->value[0] = 0;
879 		}
880 		return (BIG_OK);
881 	}
882 
883 	if ((err = big_init1(&bblow, blen + 1,
884 	    bblowvalue, arraysize(bblowvalue))) != BIG_OK)
885 		return (err);
886 
887 	if ((err = big_init1(&bbhigh, blen + 1,
888 	    bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
889 		goto ret1;
890 
891 	if ((err = big_init1(&tmp1, alen + 2,
892 	    tmp1value, arraysize(tmp1value))) != BIG_OK)
893 		goto ret2;
894 
895 	if ((err = big_init1(&tmp2, blen + 2,
896 	    tmp2value, arraysize(tmp2value))) != BIG_OK)
897 		goto ret3;
898 
899 	if ((err = big_init1(&tresult, alen - blen + 2,
900 	    tresultvalue, arraysize(tresultvalue))) != BIG_OK)
901 		goto ret4;
902 
903 	offs = 0;
904 	if (blen > 1) {
905 		highb64 = (((uint64_t)(b[blen - 1])) << 32) |
906 		    ((uint64_t)(b[blen - 2]));
907 	} else {
908 		highb64 = (((uint64_t)(b[blen - 1])) << 32);
909 	}
910 	if (highb64 >= 0x1000000000000ull) {
911 		highb64 = highb64 >> 16;
912 		offs = 16;
913 	}
914 	while ((highb64 & 0x800000000000ull) == 0) {
915 		highb64 = highb64 << 1;
916 		offs++;
917 	}
918 #ifdef	_LP64
919 	/* LINTED */
920 	highb = (highb64 >> 32) & 0xffffffff;
921 #else	/* !_LP64 */
922 	highb = highb64 >> 32;
923 #endif	/* _LP64 */
924 
925 	big_shiftleft(&bblow, bb, offs);
926 	if (offs <= 15) {
927 		big_shiftleft(&bbhigh, &bblow, 16);
928 	} else {
929 		big_shiftright(&bbhigh, &bblow, 16);
930 	}
931 	if (bbhigh.value[bbhigh.len - 1] == 0) {
932 		bbhigh.len--;
933 	} else {
934 		bbhigh.value[bbhigh.len] = 0;
935 	}
936 
937 	big_shiftleft(&tmp1, aa, offs);
938 	rlen = tmp1.len - bblow.len + 1;
939 	tresult.len = rlen;
940 
941 	tmp1.len++;
942 	tlen = tmp1.len;
943 	tmp1.value[tmp1.len - 1] = 0;
944 	for (i = 0; i < rlen; i++) {
945 		higha = (tmp1.value[tlen - 1] << 16) +
946 		    (tmp1.value[tlen - 2] >> 16);
947 		coeff = higha / (highb + 1);
948 		big_mul16_high(&tmp2, &bblow, coeff);
949 		big_sub_pos_high(&tmp1, &tmp1, &tmp2);
950 		bbhigh.len++;
951 		while (tmp1.value[tlen - 1] > 0) {
952 			big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
953 			coeff++;
954 		}
955 		bbhigh.len--;
956 		tlen--;
957 		tmp1.len--;
958 		while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
959 			big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
960 			coeff++;
961 		}
962 		tresult.value[rlen - i - 1] = coeff << 16;
963 		higha = tmp1.value[tlen - 1];
964 		coeff = higha / (highb + 1);
965 		big_mul16_low(&tmp2, &bblow, coeff);
966 		tmp2.len--;
967 		big_sub_pos_high(&tmp1, &tmp1, &tmp2);
968 		while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
969 			big_sub_pos_high(&tmp1, &tmp1, &bblow);
970 			coeff++;
971 		}
972 		tresult.value[rlen - i - 1] =
973 		    tresult.value[rlen - i - 1] + coeff;
974 	}
975 
976 	big_shiftright(&tmp1, &tmp1, offs);
977 
978 	err = BIG_OK;
979 
980 	if ((remainder != NULL) &&
981 	    ((err = big_copy(remainder, &tmp1)) != BIG_OK))
982 		goto ret;
983 
984 	if (result != NULL)
985 		err = big_copy(result, &tresult);
986 
987 ret:
988 	big_finish(&tresult);
989 ret4:
990 	big_finish(&tmp1);
991 ret3:
992 	big_finish(&tmp2);
993 ret2:
994 	big_finish(&bbhigh);
995 ret1:
996 	big_finish(&bblow);
997 	return (err);
998 }
999 
1000 /*
1001  * If there is no processor-specific integer implementation of
1002  * the lower level multiply functions, then this code is provided
1003  * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1004  * big_sqr_vec().
1005  *
1006  * There are two generic implementations.  One that assumes that
1007  * there is hardware and C compiler support for a 32 x 32 --> 64
1008  * bit unsigned multiply, but otherwise is not specific to any
1009  * processor, platform, or ISA.
1010  *
1011  * The other makes very few assumptions about hardware capabilities.
1012  * It does not even assume that there is any implementation of a
1013  * 32 x 32 --> 64 bit multiply that is accessible to C code and
1014  * appropriate to use.  It falls constructs 32 x 32 --> 64 bit
1015  * multiplies from 16 x 16 --> 32 bit multiplies.
1016  *
1017  */
1018 
1019 #if !defined(PSR_MUL)
1020 
1021 #ifdef UMUL64
1022 
1023 #define	UNROLL8
1024 
1025 #define	MUL_SET_VEC_ROUND_PREFETCH(R) \
1026 	p = pf * d; \
1027 	pf = (uint64_t)a[R+1]; \
1028 	t = p + cy; \
1029 	r[R] = (uint32_t)t; \
1030 	cy = t >> 32
1031 
1032 #define	MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1033 	p = pf * d; \
1034 	t = p + cy; \
1035 	r[R] = (uint32_t)t; \
1036 	cy = t >> 32
1037 
1038 #define	MUL_ADD_VEC_ROUND_PREFETCH(R) \
1039 	t = (uint64_t)r[R]; \
1040 	p = pf * d; \
1041 	pf = (uint64_t)a[R+1]; \
1042 	t = p + t + cy; \
1043 	r[R] = (uint32_t)t; \
1044 	cy = t >> 32
1045 
1046 #define	MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1047 	t = (uint64_t)r[R]; \
1048 	p = pf * d; \
1049 	t = p + t + cy; \
1050 	r[R] = (uint32_t)t; \
1051 	cy = t >> 32
1052 
1053 #ifdef UNROLL8
1054 
1055 #define	UNROLL 8
1056 
1057 /*
1058  * r = a * b
1059  * where r and a are vectors; b is a single 32-bit digit
1060  */
1061 
1062 uint32_t
1063 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1064 {
1065 	uint64_t d, pf, p, t, cy;
1066 
1067 	if (len == 0)
1068 		return (0);
1069 	cy = 0;
1070 	d = (uint64_t)b;
1071 	pf = (uint64_t)a[0];
1072 	while (len > UNROLL) {
1073 		MUL_SET_VEC_ROUND_PREFETCH(0);
1074 		MUL_SET_VEC_ROUND_PREFETCH(1);
1075 		MUL_SET_VEC_ROUND_PREFETCH(2);
1076 		MUL_SET_VEC_ROUND_PREFETCH(3);
1077 		MUL_SET_VEC_ROUND_PREFETCH(4);
1078 		MUL_SET_VEC_ROUND_PREFETCH(5);
1079 		MUL_SET_VEC_ROUND_PREFETCH(6);
1080 		MUL_SET_VEC_ROUND_PREFETCH(7);
1081 		r += UNROLL;
1082 		a += UNROLL;
1083 		len -= UNROLL;
1084 	}
1085 	if (len == UNROLL) {
1086 		MUL_SET_VEC_ROUND_PREFETCH(0);
1087 		MUL_SET_VEC_ROUND_PREFETCH(1);
1088 		MUL_SET_VEC_ROUND_PREFETCH(2);
1089 		MUL_SET_VEC_ROUND_PREFETCH(3);
1090 		MUL_SET_VEC_ROUND_PREFETCH(4);
1091 		MUL_SET_VEC_ROUND_PREFETCH(5);
1092 		MUL_SET_VEC_ROUND_PREFETCH(6);
1093 		MUL_SET_VEC_ROUND_NOPREFETCH(7);
1094 		return ((uint32_t)cy);
1095 	}
1096 	while (len > 1) {
1097 		MUL_SET_VEC_ROUND_PREFETCH(0);
1098 		++r;
1099 		++a;
1100 		--len;
1101 	}
1102 	if (len > 0) {
1103 		MUL_SET_VEC_ROUND_NOPREFETCH(0);
1104 	}
1105 	return ((uint32_t)cy);
1106 }
1107 
1108 /*
1109  * r += a * b
1110  * where r and a are vectors; b is a single 32-bit digit
1111  */
1112 
1113 uint32_t
1114 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1115 {
1116 	uint64_t d, pf, p, t, cy;
1117 
1118 	if (len == 0)
1119 		return (0);
1120 	cy = 0;
1121 	d = (uint64_t)b;
1122 	pf = (uint64_t)a[0];
1123 	while (len > 8) {
1124 		MUL_ADD_VEC_ROUND_PREFETCH(0);
1125 		MUL_ADD_VEC_ROUND_PREFETCH(1);
1126 		MUL_ADD_VEC_ROUND_PREFETCH(2);
1127 		MUL_ADD_VEC_ROUND_PREFETCH(3);
1128 		MUL_ADD_VEC_ROUND_PREFETCH(4);
1129 		MUL_ADD_VEC_ROUND_PREFETCH(5);
1130 		MUL_ADD_VEC_ROUND_PREFETCH(6);
1131 		MUL_ADD_VEC_ROUND_PREFETCH(7);
1132 		r += 8;
1133 		a += 8;
1134 		len -= 8;
1135 	}
1136 	if (len == 8) {
1137 		MUL_ADD_VEC_ROUND_PREFETCH(0);
1138 		MUL_ADD_VEC_ROUND_PREFETCH(1);
1139 		MUL_ADD_VEC_ROUND_PREFETCH(2);
1140 		MUL_ADD_VEC_ROUND_PREFETCH(3);
1141 		MUL_ADD_VEC_ROUND_PREFETCH(4);
1142 		MUL_ADD_VEC_ROUND_PREFETCH(5);
1143 		MUL_ADD_VEC_ROUND_PREFETCH(6);
1144 		MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1145 		return ((uint32_t)cy);
1146 	}
1147 	while (len > 1) {
1148 		MUL_ADD_VEC_ROUND_PREFETCH(0);
1149 		++r;
1150 		++a;
1151 		--len;
1152 	}
1153 	if (len > 0) {
1154 		MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1155 	}
1156 	return ((uint32_t)cy);
1157 }
1158 #endif /* UNROLL8 */
1159 
1160 void
1161 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1162 {
1163 	uint32_t *tr, *ta;
1164 	int tlen, row, col;
1165 	uint64_t p, s, t, t2, cy;
1166 	uint32_t d;
1167 
1168 	tr = r + 1;
1169 	ta = a;
1170 	tlen = len - 1;
1171 	tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1172 	while (--tlen > 0) {
1173 		tr += 2;
1174 		++ta;
1175 		tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1176 	}
1177 	s = (uint64_t)a[0];
1178 	s = s * s;
1179 	r[0] = (uint32_t)s;
1180 	cy = s >> 32;
1181 	p = ((uint64_t)r[1] << 1) + cy;
1182 	r[1] = (uint32_t)p;
1183 	cy = p >> 32;
1184 	row = 1;
1185 	col = 2;
1186 	while (row < len) {
1187 		s = (uint64_t)a[row];
1188 		s = s * s;
1189 		p = (uint64_t)r[col] << 1;
1190 		t = p + s;
1191 		d = (uint32_t)t;
1192 		t2 = (uint64_t)d + cy;
1193 		r[col] = (uint32_t)t2;
1194 		cy = (t >> 32) + (t2 >> 32);
1195 		if (row == len - 1)
1196 			break;
1197 		p = ((uint64_t)r[col+1] << 1) + cy;
1198 		r[col+1] = (uint32_t)p;
1199 		cy = p >> 32;
1200 		++row;
1201 		col += 2;
1202 	}
1203 	r[col+1] = (uint32_t)cy;
1204 }
1205 
1206 #else /* ! UMUL64 */
1207 
1208 /*
1209  * r = r + a * digit, r and a are vectors of length len
1210  * returns the carry digit
1211  */
1212 uint32_t
1213 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1214 {
1215 	uint32_t cy, cy1, retcy, dlow, dhigh;
1216 	int i;
1217 
1218 	cy1 = 0;
1219 	dlow = digit & 0xffff;
1220 	dhigh = digit >> 16;
1221 	for (i = 0; i < len; i++) {
1222 		cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1223 		cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1224 		r[i] = (cy & 0xffff) | (cy1 << 16);
1225 	}
1226 	retcy = cy1 >> 16;
1227 
1228 	cy1 = r[0] & 0xffff;
1229 	for (i = 0; i < len - 1; i++) {
1230 		cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1231 		r[i] = (cy1 & 0xffff) | (cy << 16);
1232 		cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1233 	}
1234 	cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1235 	r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1236 	retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1237 
1238 	return (retcy);
1239 }
1240 
1241 /*
1242  * r = a * digit, r and a are vectors of length len
1243  * returns the carry digit
1244  */
1245 uint32_t
1246 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1247 {
1248 	return (big_mul_add_vec(r, a, len, digit));
1249 }
1250 
1251 void
1252 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1253 {
1254 	int i;
1255 
1256 	r[len] = big_mul_set_vec(r, a, len, a[0]);
1257 	for (i = 1; i < len; ++i)
1258 		r[len + i] = big_mul_add_vec(r+i, a, len, a[i]);
1259 }
1260 
1261 #endif /* UMUL64 */
1262 
1263 void
1264 big_mul_vec(uint32_t *r, uint32_t *a, int alen, uint32_t *b, int blen)
1265 {
1266 	int i;
1267 
1268 	r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1269 	for (i = 1; i < blen; ++i)
1270 		r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]);
1271 }
1272 
1273 
1274 #endif /* ! PSR_MUL */
1275 
1276 
1277 /*
1278  * result = aa * bb  result->value should be big enough to hold the result
1279  *
1280  * Implementation: Standard grammar school algorithm
1281  *
1282  */
1283 
1284 BIG_ERR_CODE
1285 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1286 {
1287 	BIGNUM tmp1;
1288 	uint32_t tmp1value[BIGTMPSIZE];
1289 	uint32_t *r, *t, *a, *b;
1290 	BIG_ERR_CODE err;
1291 	int i, alen, blen, rsize, sign, diff;
1292 
1293 	if (aa == bb) {
1294 		diff = 0;
1295 	} else {
1296 		diff = big_cmp_abs(aa, bb);
1297 		if (diff < 0) {
1298 			BIGNUM *tt;
1299 			tt = aa;
1300 			aa = bb;
1301 			bb = tt;
1302 		}
1303 	}
1304 	a = aa->value;
1305 	b = bb->value;
1306 	alen = aa->len;
1307 	blen = bb->len;
1308 	while ((alen > 1) && (a[alen - 1] == 0)) alen--;
1309 	aa->len = alen;
1310 	while ((blen > 1) && (b[blen - 1] == 0)) blen--;
1311 	bb->len = blen;
1312 
1313 	rsize = alen + blen;
1314 	if (result->size < rsize) {
1315 		err = big_extend(result, rsize);
1316 		if (err != BIG_OK)
1317 			return (err);
1318 		/* aa or bb might be an alias to result */
1319 		a = aa->value;
1320 		b = bb->value;
1321 	}
1322 	r = result->value;
1323 
1324 	if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1325 		result->len = 1;
1326 		result->sign = 1;
1327 		r[0] = 0;
1328 		return (BIG_OK);
1329 	}
1330 	sign = aa->sign * bb->sign;
1331 	if ((alen == 1) && (a[0] == 1)) {
1332 		for (i = 0; i < blen; i++) r[i] = b[i];
1333 		result->len = blen;
1334 		result->sign = sign;
1335 		return (BIG_OK);
1336 	}
1337 	if ((blen == 1) && (b[0] == 1)) {
1338 		for (i = 0; i < alen; i++) r[i] = a[i];
1339 		result->len = alen;
1340 		result->sign = sign;
1341 		return (BIG_OK);
1342 	}
1343 
1344 	err = big_init1(&tmp1, rsize, tmp1value, arraysize(tmp1value));
1345 	if (err != BIG_OK)
1346 		return (err);
1347 	t = tmp1.value;
1348 	for (i = 0; i < rsize; i++) t[i] = 0;
1349 
1350 	if (diff == 0 && alen > 2)
1351 		BIG_SQR_VEC(t, a, alen);
1352 	else if (blen > 0)
1353 		BIG_MUL_VEC(t, a, alen, b, blen);
1354 	if (t[rsize - 1] == 0)
1355 		--rsize;
1356 	tmp1.len = rsize;
1357 	if ((err = big_copy(result, &tmp1)) != BIG_OK)
1358 		return (err);
1359 
1360 	result->sign = sign;
1361 
1362 	if (tmp1.malloced) big_finish(&tmp1);
1363 
1364 	return (BIG_OK);
1365 }
1366 
1367 
1368 /*
1369  * caller must ensure that  a < n,  b < n  and  ret->size >=  2 * n->len + 1
1370  * and that ret is not n
1371  */
1372 BIG_ERR_CODE
1373 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, uint32_t n0)
1374 {
1375 	int i, j, nlen, needsubtract;
1376 	uint32_t *nn, *rr;
1377 	uint32_t digit, c;
1378 	BIG_ERR_CODE err;
1379 
1380 	nlen = n->len;
1381 	nn = n->value;
1382 
1383 	rr = ret->value;
1384 
1385 	if ((err = big_mul(ret, a, b)) != BIG_OK)
1386 		return (err);
1387 
1388 	rr = ret->value;
1389 	for (i = ret->len; i < 2 * nlen + 1; i++) rr[i] = 0;
1390 	for (i = 0; i < nlen; i++) {
1391 		digit = rr[i];
1392 		digit = digit * n0;
1393 
1394 		c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit);
1395 		j = i + nlen;
1396 		rr[j] += c;
1397 		while (rr[j] < c) {
1398 			rr[j + 1] += 1;
1399 			j++;
1400 			c = 1;
1401 		}
1402 	}
1403 
1404 	needsubtract = 0;
1405 	if ((rr[2 * nlen]  != 0))
1406 		needsubtract = 1;
1407 	else {
1408 		for (i = 2 * nlen - 1; i >= nlen; i--) {
1409 			if (rr[i] > nn[i - nlen]) {
1410 				needsubtract = 1;
1411 				break;
1412 			} else if (rr[i] < nn[i - nlen]) break;
1413 		}
1414 	}
1415 	if (needsubtract)
1416 		big_sub_vec(rr, rr + nlen, nn, nlen);
1417 	else {
1418 		for (i = 0; i < nlen; i++)
1419 			rr[i] = rr[i + nlen];
1420 	}
1421 	for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--);
1422 	ret->len = i+1;
1423 
1424 	return (BIG_OK);
1425 }
1426 
1427 uint32_t
1428 big_n0(uint32_t n)
1429 {
1430 	int i;
1431 	uint32_t result, tmp;
1432 
1433 	result = 0;
1434 	tmp = 0xffffffff;
1435 	for (i = 0; i < 32; i++) {
1436 		if ((tmp & 1) == 1) {
1437 			result = (result >> 1) | 0x80000000;
1438 			tmp = tmp - n;
1439 		} else  result = (result>>1);
1440 		tmp = tmp >> 1;
1441 	}
1442 
1443 	return (result);
1444 }
1445 
1446 
1447 int
1448 big_numbits(BIGNUM *n)
1449 {
1450 	int i, j;
1451 	uint32_t t;
1452 
1453 	for (i = n->len - 1; i > 0; i--)
1454 		if (n->value[i] != 0) break;
1455 	t = n->value[i];
1456 	for (j = 32; j > 0; j--) {
1457 		if ((t & 0x80000000) == 0)
1458 			t = t << 1;
1459 		else
1460 			return (32 * i + j);
1461 	}
1462 	return (0);
1463 }
1464 
1465 /* caller must make sure that a < n */
1466 BIG_ERR_CODE
1467 big_mont_rr(BIGNUM *result, BIGNUM *n)
1468 {
1469 	BIGNUM rr;
1470 	uint32_t rrvalue[BIGTMPSIZE];
1471 	int len, i;
1472 	BIG_ERR_CODE err;
1473 
1474 	rr.malloced = 0;
1475 	len = n->len;
1476 
1477 	if ((err = big_init1(&rr, 2 * len + 1,
1478 	    rrvalue, arraysize(rrvalue))) != BIG_OK)
1479 		return (err);
1480 
1481 	for (i = 0; i < 2 * len; i++) rr.value[i] = 0;
1482 	rr.value[2 * len] = 1;
1483 	rr.len = 2 * len + 1;
1484 	if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK)
1485 		goto ret;
1486 	err = big_copy(result, &rr);
1487 ret:
1488 	if (rr.malloced) big_finish(&rr);
1489 	return (err);
1490 }
1491 
1492 /* caller must make sure that a < n */
1493 BIG_ERR_CODE
1494 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, uint32_t n0, BIGNUM *n_rr)
1495 {
1496 	BIGNUM rr;
1497 	uint32_t rrvalue[BIGTMPSIZE];
1498 	int len, i;
1499 	BIG_ERR_CODE err;
1500 
1501 	rr.malloced = 0;
1502 	len = n->len;
1503 
1504 	if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1505 	    != BIG_OK)
1506 			return (err);
1507 
1508 	if (n_rr == NULL) {
1509 		for (i = 0; i < 2 * len; i++) rr.value[i] = 0;
1510 		rr.value[2 * len] = 1;
1511 		rr.len = 2 * len + 1;
1512 		if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK)
1513 			goto ret;
1514 		n_rr = &rr;
1515 	}
1516 
1517 	if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK)
1518 		goto ret;
1519 	err = big_copy(result, &rr);
1520 ret:
1521 	if (rr.malloced) big_finish(&rr);
1522 	return (err);
1523 }
1524 
1525 
1526 #define	MAX_EXP_BIT_GROUP_SIZE 6
1527 #define	APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1528 
1529 #ifdef USE_FLOATING_POINT
1530 
1531 /*
1532  * This version makes use of floating point for performance.
1533  */
1534 static BIG_ERR_CODE
1535 _big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
1536 {
1537 	BIGNUM ma, tmp, rr;
1538 	uint32_t mavalue[BIGTMPSIZE];
1539 	uint32_t tmpvalue[BIGTMPSIZE];
1540 	uint32_t rrvalue[BIGTMPSIZE];
1541 	int i, j, k, l, m, p, bit, bitind, bitcount, nlen;
1542 	BIG_ERR_CODE err;
1543 	uint32_t n0;
1544 	double dn0;
1545 	double *dn, *dt, *d16r, *d32r;
1546 	uint32_t *nint, *prod;
1547 	double *apowers[APOWERS_MAX_SIZE];
1548 	int nbits, groupbits, apowerssize;
1549 
1550 	nbits = big_numbits(e);
1551 	if (nbits < 50) {
1552 		groupbits = 1;
1553 		apowerssize = 1;
1554 	} else {
1555 		groupbits = MAX_EXP_BIT_GROUP_SIZE;
1556 		apowerssize = 1 << (groupbits - 1);
1557 	}
1558 
1559 	if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue)))	!=
1560 	    BIG_OK)
1561 		return (err);
1562 	ma.len = 1;
1563 	ma.value[0] = 0;
1564 
1565 	if ((err = big_init1(&tmp, 2 * n->len + 1,
1566 	    tmpvalue, arraysize(tmpvalue))) != BIG_OK)
1567 		goto ret1;
1568 	tmp.len = 1;
1569 	tmp.value[0] = 0;
1570 
1571 	rr.malloced = 0;
1572 	if (n_rr == NULL) {
1573 		if ((err = big_init1(&rr, 2 * n->len + 1,
1574 		    rrvalue, arraysize(rrvalue))) != BIG_OK)
1575 			goto ret2;
1576 		if (big_mont_rr(&rr, n) != BIG_OK)
1577 			goto ret2;
1578 		n_rr = &rr;
1579 	}
1580 
1581 	n0 = big_n0(n->value[0]);
1582 
1583 	if (big_cmp_abs(a, n) > 0) {
1584 		if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK)
1585 			goto ret2;
1586 		err = big_mont_conv(&ma, &ma, n, n0, n_rr);
1587 	} else {
1588 		err = big_mont_conv(&ma, a, n, n0, n_rr);
1589 	}
1590 	if (err != BIG_OK)
1591 		goto ret3;
1592 
1593 	tmp.len = 1;
1594 	tmp.value[0] = 1;
1595 	if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK)
1596 		goto ret3;
1597 
1598 	nlen = n->len;
1599 	dn0 = (double)(n0 & 0xffff);
1600 
1601 	dn = dt = d16r = d32r = NULL;
1602 	nint = prod = NULL;
1603 	for (i = 0; i < apowerssize; i++) {
1604 		apowers[i] = NULL;
1605 	}
1606 
1607 	if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
1608 		err = BIG_NO_MEM;
1609 		goto ret;
1610 	}
1611 	if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
1612 		err = BIG_NO_MEM;
1613 		goto ret;
1614 	}
1615 	if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
1616 		err = BIG_NO_MEM;
1617 		goto ret;
1618 	}
1619 	if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
1620 		err = BIG_NO_MEM;
1621 		goto ret;
1622 	}
1623 	if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
1624 		err = BIG_NO_MEM;
1625 		goto ret;
1626 	}
1627 	if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
1628 		err = BIG_NO_MEM;
1629 		goto ret;
1630 	}
1631 	for (i = 0; i < apowerssize; i++) {
1632 		if ((apowers[i] = big_malloc((2 * nlen + 1) *
1633 		    sizeof (double))) == NULL) {
1634 			err = BIG_NO_MEM;
1635 			goto ret;
1636 		}
1637 	}
1638 
1639 	for (i = 0; i < ma.len; i++) nint[i] = ma.value[i];
1640 	for (; i < nlen; i++) nint[i] = 0;
1641 	conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
1642 
1643 	for (i = 0; i < n->len; i++) nint[i] = n->value[i];
1644 	for (; i < nlen; i++) nint[i] = 0;
1645 	conv_i32_to_d32(dn, nint, nlen);
1646 
1647 	mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
1648 	conv_i32_to_d32(d32r, prod, nlen);
1649 	for (i = 1; i < apowerssize; i++) {
1650 		mont_mulf_noconv(prod, d32r, apowers[i - 1],
1651 		    dt, dn, nint, nlen, dn0);
1652 		conv_i32_to_d16(apowers[i], prod, nlen);
1653 	}
1654 
1655 	for (i = 0; i < tmp.len; i++) prod[i] = tmp.value[i];
1656 	for (; i < nlen + 1; i++) prod[i] = 0;
1657 
1658 	bitind = nbits % 32;
1659 	k = 0;
1660 	l = 0;
1661 	p = 0;
1662 	bitcount = 0;
1663 	for (i = nbits / 32; i >= 0; i--) {
1664 		for (j = bitind - 1; j >= 0; j--) {
1665 			bit = (e->value[i] >> j) & 1;
1666 			if ((bitcount == 0) && (bit == 0)) {
1667 				conv_i32_to_d32_and_d16(d32r, d16r,
1668 				    prod, nlen);
1669 				mont_mulf_noconv(prod, d32r, d16r,
1670 				    dt, dn, nint, nlen, dn0);
1671 			} else {
1672 				bitcount++;
1673 				p = p * 2 + bit;
1674 				if (bit == 1) {
1675 					k = k + l + 1;
1676 					l = 0;
1677 				} else {
1678 					l++;
1679 				}
1680 				if (bitcount == groupbits) {
1681 					for (m = 0; m < k; m++) {
1682 						conv_i32_to_d32_and_d16(
1683 							d32r, d16r,
1684 							prod, nlen);
1685 						mont_mulf_noconv(prod, d32r,
1686 						    d16r, dt, dn, nint,
1687 						    nlen, dn0);
1688 					}
1689 					conv_i32_to_d32(d32r, prod, nlen);
1690 					mont_mulf_noconv(prod, d32r,
1691 					    apowers[p >> (l+1)],
1692 					    dt, dn, nint, nlen, dn0);
1693 					for (m = 0; m < l; m++) {
1694 						conv_i32_to_d32_and_d16(
1695 							d32r, d16r,
1696 							prod, nlen);
1697 						mont_mulf_noconv(prod, d32r,
1698 						    d16r, dt, dn, nint,
1699 						    nlen, dn0);
1700 					}
1701 					k = 0;
1702 					l = 0;
1703 					p = 0;
1704 					bitcount = 0;
1705 				}
1706 			}
1707 		}
1708 		bitind = 32;
1709 	}
1710 
1711 	for (m = 0; m < k; m++) {
1712 		conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
1713 		mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
1714 	}
1715 	if (p != 0) {
1716 		conv_i32_to_d32(d32r, prod, nlen);
1717 		mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
1718 		    dt, dn, nint, nlen, dn0);
1719 	}
1720 	for (m = 0; m < l; m++) {
1721 		conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
1722 		mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
1723 	}
1724 
1725 	ma.value[0] = 1;
1726 	ma.len = 1;
1727 	for (i = 0; i < nlen; i++) tmp.value[i] = prod[i];
1728 	for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--);
1729 	tmp.len = i + 1;
1730 	if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK)
1731 		goto ret;
1732 	err = big_copy(result, &tmp);
1733 ret:
1734 	for (i = apowerssize - 1; i >= 0; i--) {
1735 		if (apowers[i] != NULL)
1736 			big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
1737 	}
1738 	if (d32r != NULL)
1739 		big_free(d32r, nlen * sizeof (double));
1740 	if (d16r != NULL)
1741 		big_free(d16r, (2 * nlen + 1) * sizeof (double));
1742 	if (prod != NULL)
1743 		big_free(prod, (nlen + 1) * sizeof (uint32_t));
1744 	if (nint != NULL)
1745 		big_free(nint, nlen * sizeof (uint32_t));
1746 	if (dt != NULL)
1747 		big_free(dt, (4 * nlen + 2) * sizeof (double));
1748 	if (dn != NULL)
1749 		big_free(dn, nlen * sizeof (double));
1750 
1751 ret3:
1752 	big_finish(&rr);
1753 ret2:
1754 	big_finish(&tmp);
1755 ret1:
1756 	big_finish(&ma);
1757 	return (err);
1758 
1759 }
1760 
1761 #ifdef _KERNEL
1762 
1763 #include <sys/sysmacros.h>
1764 #include <sys/regset.h>
1765 #include <sys/fpu/fpusystm.h>
1766 
1767 /* the alignment for block stores to save fp registers */
1768 #define	FPR_ALIGN	(64)
1769 
1770 extern void big_savefp(kfpu_t *);
1771 extern void big_restorefp(kfpu_t *);
1772 
1773 #endif /* _KERNEL */
1774 
1775 BIG_ERR_CODE
1776 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
1777 {
1778 #ifdef _KERNEL
1779 	BIG_ERR_CODE rv;
1780 	uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
1781 	kfpu_t *fpu;
1782 
1783 #ifdef DEBUG
1784 	if (!fpu_exists)
1785 		return (BIG_GENERAL_ERR);
1786 #endif
1787 
1788 	fpu =  (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
1789 	big_savefp(fpu);
1790 
1791 	rv = _big_modexp(result, a, e, n, n_rr);
1792 
1793 	big_restorefp(fpu);
1794 
1795 	return (rv);
1796 #else
1797 	return (_big_modexp(result, a, e, n, n_rr));
1798 #endif	/* _KERNEL */
1799 }
1800 
1801 #else /* ! USE_FLOATING_POINT */
1802 
1803 /*
1804  * This version uses strictly integer math and is safe in the kernel
1805  * for all platforms.
1806  */
1807 
1808 /*
1809  * computes a^e mod n
1810  * assumes a < n, n odd, result->value at least as long as n->value
1811  */
1812 BIG_ERR_CODE
1813 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
1814 {
1815 	BIGNUM ma, tmp, rr;
1816 	uint32_t mavalue[BIGTMPSIZE];
1817 	uint32_t tmpvalue[BIGTMPSIZE];
1818 	uint32_t rrvalue[BIGTMPSIZE];
1819 	BIGNUM apowers[APOWERS_MAX_SIZE];
1820 	int i, j, k, l, m, p,
1821 	    bit, bitind, bitcount, groupbits, apowerssize;
1822 	BIG_ERR_CODE err;
1823 	uint32_t n0;
1824 
1825 	int nbits;
1826 
1827 	nbits = big_numbits(e);
1828 	if (nbits < 50) {
1829 		groupbits = 1;
1830 		apowerssize = 1;
1831 	} else {
1832 		groupbits = MAX_EXP_BIT_GROUP_SIZE;
1833 		apowerssize = 1 << (groupbits - 1);
1834 	}
1835 
1836 	if ((err = big_init1(&ma, n->len,
1837 	    mavalue, arraysize(mavalue))) != BIG_OK)
1838 		return (err);
1839 	ma.len = 1;
1840 	ma.value[0] = 0;
1841 
1842 	if ((err = big_init1(&tmp, 2 * n->len + 1,
1843 	    tmpvalue, arraysize(tmpvalue))) != BIG_OK)
1844 		goto ret1;
1845 	tmp.len = 1;
1846 	tmp.value[0] = 1;
1847 
1848 	n0 = big_n0(n->value[0]);
1849 
1850 	rr.malloced = 0;
1851 	if (n_rr == NULL) {
1852 		if ((err = big_init1(&rr, 2 * n->len + 1,
1853 		    rrvalue, arraysize(rrvalue))) != BIG_OK)
1854 			goto ret2;
1855 
1856 		if (big_mont_rr(&rr, n) != BIG_OK)
1857 			goto ret3;
1858 		n_rr = &rr;
1859 	}
1860 
1861 	for (i = 0; i < apowerssize; i++) apowers[i].malloced = 0;
1862 	for (i = 0; i < apowerssize; i++) {
1863 		if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1864 		    BIG_OK)
1865 			goto ret;
1866 	}
1867 
1868 	if (big_cmp_abs(a, n) > 0) {
1869 		if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK)
1870 			goto ret;
1871 		err = big_mont_conv(&ma, &ma, n, n0, n_rr);
1872 	} else {
1873 		err = big_mont_conv(&ma, a, n, n0, n_rr);
1874 	}
1875 	if (err != BIG_OK) goto ret;
1876 
1877 	(void) big_copy(&(apowers[0]), &ma);
1878 	if ((err = big_mont_mul(&tmp, &ma, &ma, n, n0)) != BIG_OK)
1879 		goto ret;
1880 	(void) big_copy(&ma, &tmp);
1881 
1882 	for (i = 1; i < apowerssize; i++) {
1883 		if ((err = big_mont_mul(&tmp, &ma,
1884 		    &(apowers[i-1]), n, n0)) != BIG_OK)
1885 			goto ret;
1886 		(void) big_copy(&apowers[i], &tmp);
1887 	}
1888 
1889 	tmp.len = 1;
1890 	tmp.value[0] = 1;
1891 	if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK)
1892 		goto ret;
1893 
1894 	bitind = nbits % 32;
1895 	k = 0;
1896 	l = 0;
1897 	p = 0;
1898 	bitcount = 0;
1899 	for (i = nbits / 32; i >= 0; i--) {
1900 		for (j = bitind - 1; j >= 0; j--) {
1901 			bit = (e->value[i] >> j) & 1;
1902 			if ((bitcount == 0) && (bit == 0)) {
1903 				if ((err = big_mont_mul(&tmp,
1904 				    &tmp, &tmp, n, n0)) != BIG_OK)
1905 					goto ret;
1906 			} else {
1907 				bitcount++;
1908 				p = p * 2 + bit;
1909 				if (bit == 1) {
1910 					k = k + l + 1;
1911 					l = 0;
1912 				} else {
1913 					l++;
1914 				}
1915 				if (bitcount == groupbits) {
1916 					for (m = 0; m < k; m++) {
1917 						if ((err = big_mont_mul(&tmp,
1918 						    &tmp, &tmp, n, n0)) !=
1919 						    BIG_OK)
1920 							goto ret;
1921 					}
1922 					if ((err = big_mont_mul(&tmp, &tmp,
1923 					    &(apowers[p >> (l + 1)]),
1924 					    n, n0)) != BIG_OK)
1925 						goto ret;
1926 					for (m = 0; m < l; m++) {
1927 						if ((err = big_mont_mul(&tmp,
1928 						    &tmp, &tmp, n, n0)) !=
1929 						    BIG_OK)
1930 							goto ret;
1931 					}
1932 					k = 0;
1933 					l = 0;
1934 					p = 0;
1935 					bitcount = 0;
1936 				}
1937 			}
1938 		}
1939 		bitind = 32;
1940 	}
1941 
1942 	for (m = 0; m < k; m++) {
1943 		if ((err = big_mont_mul(&tmp, &tmp, &tmp, n, n0)) != BIG_OK)
1944 			goto ret;
1945 	}
1946 	if (p != 0) {
1947 		if ((err = big_mont_mul(&tmp, &tmp,
1948 		    &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK)
1949 			goto ret;
1950 	}
1951 	for (m = 0; m < l; m++) {
1952 		if ((err = big_mont_mul(&tmp, &tmp, &tmp, n, n0)) != BIG_OK)
1953 			goto ret;
1954 	}
1955 
1956 	ma.value[0] = 1;
1957 	ma.len = 1;
1958 	if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK)
1959 		goto ret;
1960 	err = big_copy(result, &tmp);
1961 ret:
1962 	for (i = apowerssize - 1; i >= 0; i--) {
1963 		big_finish(&(apowers[i]));
1964 	}
1965 ret3:
1966 	if (rr.malloced) big_finish(&rr);
1967 ret2:
1968 	if (tmp.malloced) big_finish(&tmp);
1969 ret1:
1970 	if (ma.malloced) big_finish(&ma);
1971 	return (err);
1972 }
1973 
1974 #endif /* USE_FLOATING_POINT */
1975 
1976 
1977 BIG_ERR_CODE
1978 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
1979     BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
1980     BIGNUM *p_rr, BIGNUM *q_rr)
1981 {
1982 	BIGNUM ap, aq, tmp;
1983 	int alen, biglen, sign;
1984 	BIG_ERR_CODE err;
1985 
1986 	if (p->len > q->len) biglen = p->len;
1987 	else biglen = q->len;
1988 
1989 	if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK)
1990 		return (err);
1991 	if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK)
1992 		goto ret1;
1993 	if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK)
1994 		goto ret2;
1995 
1996 	/*
1997 	 * check whether a is too short - to avoid timing attacks
1998 	 */
1999 	alen = a->len;
2000 	while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2001 		alen--;
2002 	}
2003 	if (alen < p->len + q->len) {
2004 		/*
2005 		 * a is too short, add p*q to it before
2006 		 * taking it modulo p and q
2007 		 * this will also affect timing, but this difference
2008 		 * does not depend on p or q, only on a
2009 		 * (in "normal" operation, this path will never be
2010 		 * taken, so it is not a performance penalty
2011 		 */
2012 		if ((err = big_mul(&tmp, p, q)) != BIG_OK)
2013 			goto ret;
2014 		if ((err = big_add(&tmp, &tmp, a)) != BIG_OK)
2015 			goto ret;
2016 		if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK)
2017 			goto ret;
2018 		if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK)
2019 			goto ret;
2020 	} else {
2021 		if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK)
2022 			goto ret;
2023 		if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK)
2024 			goto ret;
2025 	}
2026 
2027 	if ((err = big_modexp(&ap, &ap, dmodpminus1, p, p_rr)) != BIG_OK)
2028 		goto ret;
2029 	if ((err = big_modexp(&aq, &aq, dmodqminus1, q, q_rr)) != BIG_OK)
2030 		goto ret;
2031 	if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK)
2032 		goto ret;
2033 	if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK)
2034 		goto ret;
2035 	sign = tmp.sign;
2036 	tmp.sign = 1;
2037 	if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK)
2038 		goto ret;
2039 	if ((sign == -1) && (!big_is_zero(&aq))) {
2040 		(void) big_sub_pos(&aq, q, &aq);
2041 	}
2042 	if ((err = big_mul(&tmp, &aq, p)) != BIG_OK)
2043 		goto ret;
2044 	err = big_add_abs(result, &ap, &tmp);
2045 
2046 ret:
2047 	big_finish(&tmp);
2048 ret2:
2049 	big_finish(&aq);
2050 ret1:
2051 	big_finish(&ap);
2052 
2053 	return (err);
2054 }
2055 
2056 
2057 uint32_t onearr[1] = {1};
2058 BIGNUM One = {1, 1, 1, 0, onearr};
2059 
2060 uint32_t twoarr[1] = {2};
2061 BIGNUM Two = {1, 1, 1, 0, twoarr};
2062 
2063 uint32_t fourarr[1] = {4};
2064 BIGNUM Four = {1, 1, 1, 0, fourarr};
2065 
2066 BIG_ERR_CODE
2067 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2068 {
2069 	BIGNUM *high, *low, *mid, *t;
2070 	BIGNUM t1, t2, t3, prod;
2071 	uint32_t t1value[BIGTMPSIZE];
2072 	uint32_t t2value[BIGTMPSIZE];
2073 	uint32_t t3value[BIGTMPSIZE];
2074 	uint32_t prodvalue[BIGTMPSIZE];
2075 	int i, nbits, diff, nrootbits, highbits;
2076 	BIG_ERR_CODE err;
2077 
2078 	nbits = big_numbits(n);
2079 
2080 	if ((err = big_init1(&t1, n->len + 1,
2081 	    t1value, arraysize(t1value))) != BIG_OK)
2082 		return (err);
2083 	if ((err = big_init1(&t2, n->len + 1,
2084 	    t2value, arraysize(t2value))) != BIG_OK)
2085 		goto ret1;
2086 	if ((err = big_init1(&t3, n->len + 1,
2087 	    t3value, arraysize(t3value))) != BIG_OK)
2088 		goto ret2;
2089 	if ((err = big_init1(&prod, n->len + 1,
2090 	    prodvalue, arraysize(prodvalue))) != BIG_OK)
2091 		goto ret3;
2092 
2093 	nrootbits = (nbits + 1) / 2;
2094 	t1.len = t2.len = t3.len = (nrootbits - 1) / 32 + 1;
2095 	for (i = 0; i < t1.len; i++) {
2096 		t1.value[i] = 0;
2097 		t2.value[i] = 0xffffffff;
2098 	}
2099 	highbits = nrootbits - 32 * (t1.len - 1);
2100 	if (highbits == 32) {
2101 		t1.value[t1.len - 1] = 0x80000000;
2102 		t2.value[t2.len - 1] = 0xffffffff;
2103 	} else {
2104 		t1.value[t1.len - 1] = 1 << (highbits - 1);
2105 		t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2106 	}
2107 	high = &t2;
2108 	low = &t1;
2109 	mid = &t3;
2110 
2111 	if ((err = big_mul(&prod, high, high)) != BIG_OK)
2112 		goto ret;
2113 	diff = big_cmp_abs(&prod, n);
2114 	if (diff <= 0) {
2115 		err = big_copy(result, high);
2116 		goto ret;
2117 	}
2118 
2119 	(void) big_sub_pos(mid, high, low);
2120 	while (big_cmp_abs(&One, mid) != 0) {
2121 		(void) big_add_abs(mid, high, low);
2122 		(void) big_half_pos(mid, mid);
2123 		if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2124 			goto ret;
2125 		diff = big_cmp_abs(&prod, n);
2126 		if (diff > 0) {
2127 			t = high;
2128 			high = mid;
2129 			mid = t;
2130 		} else if (diff < 0) {
2131 			t = low;
2132 			low = mid;
2133 			mid = t;
2134 		} else {
2135 			err = big_copy(result, low);
2136 			goto ret;
2137 		}
2138 		(void) big_sub_pos(mid, high, low);
2139 	}
2140 
2141 	err = big_copy(result, low);
2142 ret:
2143 	if (prod.malloced) big_finish(&prod);
2144 ret3:
2145 	if (t3.malloced) big_finish(&t3);
2146 ret2:
2147 	if (t2.malloced) big_finish(&t2);
2148 ret1:
2149 	if (t1.malloced) big_finish(&t1);
2150 
2151 	return (err);
2152 }
2153 
2154 
2155 BIG_ERR_CODE
2156 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2157 {
2158 	BIGNUM *t, *tmp2, *m, *n;
2159 	BIGNUM t1, t2, t3;
2160 	uint32_t t1value[BIGTMPSIZE];
2161 	uint32_t t2value[BIGTMPSIZE];
2162 	uint32_t t3value[BIGTMPSIZE];
2163 	int len, err;
2164 
2165 	if (big_is_zero(nn) ||
2166 	    (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2167 		*jac = 0;
2168 		return (BIG_OK);
2169 	}
2170 
2171 	if (nn->len > mm->len) len = nn->len;
2172 	else len = mm->len;
2173 
2174 	if ((err = big_init1(&t1, len,
2175 	    t1value, arraysize(t1value))) != BIG_OK)
2176 		return (err);
2177 	if ((err = big_init1(&t2, len,
2178 	    t2value, arraysize(t2value))) != BIG_OK)
2179 		goto ret1;
2180 	if ((err = big_init1(&t3, len,
2181 	    t3value, arraysize(t3value))) != BIG_OK)
2182 		goto ret2;
2183 
2184 	n = &t1;
2185 	m = &t2;
2186 	tmp2 = &t3;
2187 
2188 	(void) big_copy(n, nn);
2189 	(void) big_copy(m, mm);
2190 
2191 	*jac = 1;
2192 	while (big_cmp_abs(&One, m) != 0) {
2193 		if (big_is_zero(n)) {
2194 			*jac = 0;
2195 			goto ret;
2196 		}
2197 		if ((m->value[0] & 1) == 0) {
2198 			if (((n->value[0] & 7) == 3) ||
2199 			    ((n->value[0] & 7) == 5)) *jac = -*jac;
2200 			(void) big_half_pos(m, m);
2201 		} else if ((n->value[0] & 1) == 0) {
2202 			if (((m->value[0] & 7) == 3) ||
2203 			    ((m->value[0] & 7) == 5)) *jac = -*jac;
2204 			(void) big_half_pos(n, n);
2205 		} else {
2206 			if (((m->value[0] & 3) == 3) &&
2207 			    ((n->value[0] & 3) == 3)) {
2208 				*jac = -*jac;
2209 			}
2210 			if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK)
2211 				goto ret;
2212 			t = tmp2;
2213 			tmp2 = m;
2214 			m = n;
2215 			n = t;
2216 		}
2217 	}
2218 	err = BIG_OK;
2219 
2220 ret:
2221 	if (t3.malloced) big_finish(&t3);
2222 ret2:
2223 	if (t2.malloced) big_finish(&t2);
2224 ret1:
2225 	if (t1.malloced) big_finish(&t1);
2226 
2227 	return (err);
2228 }
2229 
2230 
2231 BIG_ERR_CODE
2232 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2233 {
2234 	int m, w, i;
2235 	uint32_t bit;
2236 	BIGNUM ki, tmp, tmp2;
2237 	uint32_t kivalue[BIGTMPSIZE];
2238 	uint32_t tmpvalue[BIGTMPSIZE];
2239 	uint32_t tmp2value[BIGTMPSIZE];
2240 	BIG_ERR_CODE err;
2241 
2242 	if (big_cmp_abs(k, &One) == 0) {
2243 		(void) big_copy(Lk, p);
2244 		(void) big_copy(Lkminus1, &Two);
2245 		return (BIG_OK);
2246 	}
2247 
2248 	if ((err = big_init1(&ki, k->len + 1,
2249 	    kivalue, arraysize(kivalue))) != BIG_OK)
2250 		return (err);
2251 
2252 	if ((err = big_init1(&tmp, 2 * n->len +1,
2253 	    tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2254 		goto ret1;
2255 
2256 	if ((err = big_init1(&tmp2, n->len,
2257 	    tmp2value, arraysize(tmp2value))) != BIG_OK)
2258 		goto ret2;
2259 
2260 	m = big_numbits(k);
2261 	ki.len = (m - 1) / 32 + 1;
2262 	w = (m - 1) / 32;
2263 	bit = 1 << ((m - 1) % 32);
2264 	for (i = 0; i < ki.len; i++) ki.value[i] = 0;
2265 	ki.value[ki.len - 1] = bit;
2266 	if (big_cmp_abs(k, &ki) != 0)
2267 		(void) big_double(&ki, &ki);
2268 	(void) big_sub_pos(&ki, &ki, k);
2269 
2270 	(void) big_copy(Lk, p);
2271 	(void) big_copy(Lkminus1, &Two);
2272 
2273 	for (i = 0; i < m; i++) {
2274 		if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK)
2275 			goto ret;
2276 		(void) big_add_abs(&tmp, &tmp, n);
2277 		(void) big_sub_pos(&tmp, &tmp, p);
2278 		if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK)
2279 			goto ret;
2280 
2281 		if ((ki.value[w] & bit) != 0) {
2282 			if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2283 			    BIG_OK)
2284 				goto ret;
2285 			(void) big_add_abs(&tmp, &tmp, n);
2286 			(void) big_sub_pos(&tmp, &tmp, &Two);
2287 			if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2288 			    BIG_OK)
2289 				goto ret;
2290 			(void) big_copy(Lk, &tmp2);
2291 		} else {
2292 			if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK)
2293 				goto ret;
2294 			(void) big_add_abs(&tmp, &tmp, n);
2295 			(void) big_sub_pos(&tmp, &tmp, &Two);
2296 			if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK)
2297 				goto ret;
2298 			(void) big_copy(Lkminus1, &tmp2);
2299 		}
2300 		bit = bit >> 1;
2301 		if (bit == 0) {
2302 			bit = 0x80000000;
2303 			w--;
2304 		}
2305 	}
2306 
2307 	err = BIG_OK;
2308 
2309 ret:
2310 	if (tmp2.malloced) big_finish(&tmp2);
2311 ret2:
2312 	if (tmp.malloced) big_finish(&tmp);
2313 ret1:
2314 	if (ki.malloced) big_finish(&ki);
2315 
2316 	return (err);
2317 }
2318 
2319 
2320 BIG_ERR_CODE
2321 big_isprime_pos(BIGNUM *n)
2322 {
2323 	BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2324 	uint32_t ovalue[BIGTMPSIZE];
2325 	uint32_t nminus1value[BIGTMPSIZE];
2326 	uint32_t tmpvalue[BIGTMPSIZE];
2327 	uint32_t Lkminus1value[BIGTMPSIZE];
2328 	uint32_t Lkvalue[BIGTMPSIZE];
2329 	BIG_ERR_CODE err;
2330 	int e, i, jac;
2331 
2332 	if (big_cmp_abs(n, &One) == 0)
2333 		return (BIG_FALSE);
2334 	if (big_cmp_abs(n, &Two) == 0)
2335 		return (BIG_TRUE);
2336 	if ((n->value[0] & 1) == 0)
2337 		return (BIG_FALSE);
2338 
2339 	if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != BIG_OK)
2340 		return (err);
2341 
2342 	if ((err = big_init1(&nminus1, n->len,
2343 	    nminus1value, arraysize(nminus1value))) != BIG_OK)
2344 		goto ret1;
2345 
2346 	if ((err = big_init1(&tmp, 2 * n->len,
2347 	    tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2348 		goto ret2;
2349 
2350 	if ((err = big_init1(&Lkminus1, n->len,
2351 	    Lkminus1value, arraysize(Lkminus1value))) != BIG_OK)
2352 		goto ret3;
2353 
2354 	if ((err = big_init1(&Lk, n->len,
2355 	    Lkvalue, arraysize(Lkvalue))) != BIG_OK)
2356 		goto ret4;
2357 
2358 	(void) big_sub_pos(&o, n, &One); 	/* cannot fail */
2359 	(void) big_copy(&nminus1, &o);		/* cannot fail */
2360 	e = 0;
2361 	while ((o.value[0] & 1) == 0) {
2362 		e++;
2363 		(void) big_half_pos(&o, &o);  /* cannot fail */
2364 	}
2365 	if ((err = big_modexp(&tmp, &Two, &o, n, NULL)) != BIG_OK)
2366 		goto ret;
2367 	i = 0;
2368 	while ((i < e) &&
2369 	    (big_cmp_abs(&tmp, &One) != 0) &&
2370 	    (big_cmp_abs(&tmp, &nminus1) != 0)) {
2371 		if ((err = big_modexp(&tmp, &tmp, &Two, n, NULL)) !=  BIG_OK)
2372 			goto ret;
2373 		i++;
2374 	}
2375 	if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2376 	    ((i == 0) && (big_cmp_abs(&tmp, &One) == 0)))) {
2377 		err = BIG_FALSE;
2378 		goto ret;
2379 	}
2380 
2381 	if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK)
2382 		goto ret;
2383 	if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK)
2384 		goto ret;
2385 	if (big_cmp_abs(&tmp, n) == 0) {
2386 		err = BIG_FALSE;
2387 		goto ret;
2388 	}
2389 
2390 	(void) big_copy(&o, &Two);
2391 	do {
2392 		(void) big_add_abs(&o, &o, &One);
2393 		if ((err = big_mul(&tmp, &o, &o)) != BIG_OK)
2394 			goto ret;
2395 		(void) big_sub_pos(&tmp, &tmp, &Four);
2396 		if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK)
2397 			goto ret;
2398 	} while (jac != -1);
2399 
2400 	(void) big_add_abs(&tmp, n, &One);
2401 	if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK)
2402 		goto ret;
2403 	if ((big_cmp_abs(&Lkminus1, &o) == 0) && (big_cmp_abs(&Lk, &Two) == 0))
2404 		err = BIG_TRUE;
2405 	else err = BIG_FALSE;
2406 
2407 ret:
2408 	if (Lk.malloced) big_finish(&Lk);
2409 ret4:
2410 	if (Lkminus1.malloced) big_finish(&Lkminus1);
2411 ret3:
2412 	if (tmp.malloced) big_finish(&tmp);
2413 ret2:
2414 	if (nminus1.malloced) big_finish(&nminus1);
2415 ret1:
2416 	if (o.malloced) big_finish(&o);
2417 
2418 	return (err);
2419 }
2420 
2421 
2422 #define	SIEVESIZE 1000
2423 
2424 uint32_t smallprimes[] =
2425 {
2426 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2427 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97
2428 };
2429 
2430 
2431 BIG_ERR_CODE
2432 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2433 {
2434 	BIG_ERR_CODE err;
2435 	int sieve[SIEVESIZE];
2436 	int i;
2437 	uint32_t off, p;
2438 
2439 	if ((err = big_copy(result, n)) != BIG_OK)
2440 		return (err);
2441 	result->value[0] |= 1;
2442 	/* CONSTCOND */
2443 	while (1) {
2444 		for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2445 		for (i = 0;
2446 		    i < sizeof (smallprimes) / sizeof (uint32_t); i++) {
2447 			p = smallprimes[i];
2448 			off = big_mod16_pos(result, p);
2449 			off = p - off;
2450 			if ((off % 2) == 1) off = off + p;
2451 			off = off/2;
2452 			while (off < SIEVESIZE) {
2453 				sieve[off] = 1;
2454 				off = off + p;
2455 			}
2456 		}
2457 
2458 		for (i = 0; i < SIEVESIZE; i++) {
2459 			if (sieve[i] == 0) {
2460 				err = big_isprime_pos(result);
2461 				if (err != BIG_FALSE) {
2462 					if (err != BIG_TRUE)
2463 						return (err);
2464 					else
2465 						return (BIG_OK);
2466 				}
2467 
2468 			}
2469 			if ((err = big_add_abs(result, result, &Two)) !=
2470 			    BIG_OK)
2471 				return (err);
2472 		}
2473 	}
2474 	/* NOTREACHED */
2475 }
2476 
2477 
2478 BIG_ERR_CODE
2479 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2480 {
2481 	BIG_ERR_CODE err;
2482 
2483 
2484 	if ((err = big_copy(result, n)) != BIG_OK)
2485 		return (err);
2486 	result->value[0] |= 1;
2487 	while ((err = big_isprime_pos(result)) != BIG_TRUE) {
2488 		if (err != BIG_FALSE)
2489 			return (err);
2490 		if ((err = big_add_abs(result, result, &Two)) != BIG_OK)
2491 			return (err);
2492 	}
2493 	return (BIG_OK);
2494 }
2495 
2496 
2497 /*
2498  * given m and e, computes the rest in the equation
2499  * gcd(m, e) = cm * m + ce * e
2500  */
2501 BIG_ERR_CODE
2502 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
2503 {
2504 	BIGNUM *xi, *ri, *riminus1, *riminus2, *t,
2505 	    *vmi, *vei, *vmiminus1, *veiminus1;
2506 	BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
2507 	uint32_t t1value[BIGTMPSIZE];
2508 	uint32_t t2value[BIGTMPSIZE];
2509 	uint32_t t3value[BIGTMPSIZE];
2510 	uint32_t t4value[BIGTMPSIZE];
2511 	uint32_t t5value[BIGTMPSIZE];
2512 	uint32_t t6value[BIGTMPSIZE];
2513 	uint32_t t7value[BIGTMPSIZE];
2514 	uint32_t t8value[BIGTMPSIZE];
2515 	uint32_t tmpvalue[BIGTMPSIZE];
2516 	BIG_ERR_CODE err;
2517 	int len;
2518 
2519 	if (big_cmp_abs(m, e) >= 0) len = m->len;
2520 	else len = e->len;
2521 
2522 	if ((err = big_init1(&t1, len,
2523 	    t1value, arraysize(t1value))) != BIG_OK)
2524 		return (err);
2525 	if ((err = big_init1(&t2, len,
2526 	    t2value, arraysize(t2value))) != BIG_OK)
2527 			goto ret1;
2528 	if ((err = big_init1(&t3, len,
2529 	    t3value, arraysize(t3value))) != BIG_OK)
2530 			goto ret2;
2531 	if ((err = big_init1(&t4, len,
2532 	    t4value, arraysize(t3value))) != BIG_OK)
2533 			goto ret3;
2534 	if ((err = big_init1(&t5, len,
2535 	    t5value, arraysize(t5value))) != BIG_OK)
2536 			goto ret4;
2537 	if ((err = big_init1(&t6, len,
2538 	    t6value, arraysize(t6value))) != BIG_OK)
2539 			goto ret5;
2540 	if ((err = big_init1(&t7, len,
2541 	    t7value, arraysize(t7value))) != BIG_OK)
2542 			goto ret6;
2543 	if ((err = big_init1(&t8, len,
2544 	    t8value, arraysize(t8value))) != BIG_OK)
2545 			goto ret7;
2546 
2547 	if ((err = big_init1(&tmp, 2 * len,
2548 	    tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2549 		goto ret8;
2550 
2551 	ri = &t1;
2552 	ri->value[0] = 1;
2553 	ri->len = 1;
2554 	xi = &t2;
2555 	riminus1 = &t3;
2556 	riminus2 = &t4;
2557 	vmi = &t5;
2558 	vei = &t6;
2559 	vmiminus1 = &t7;
2560 	veiminus1 = &t8;
2561 
2562 	(void) big_copy(vmiminus1, &One);
2563 	(void) big_copy(vmi, &One);
2564 	(void) big_copy(veiminus1, &One);
2565 	(void) big_copy(xi, &One);
2566 	vei->len = 1;
2567 	vei->value[0] = 0;
2568 
2569 	(void) big_copy(riminus1, m);
2570 	(void) big_copy(ri, e);
2571 
2572 	while (!big_is_zero(ri)) {
2573 		t = riminus2;
2574 		riminus2 = riminus1;
2575 		riminus1 = ri;
2576 		ri = t;
2577 		if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK)
2578 			goto ret;
2579 		if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK)
2580 			goto ret;
2581 		t = vmiminus1;
2582 		vmiminus1 = vmi;
2583 		vmi = t;
2584 		if ((err = big_mul(&tmp, vei, xi)) != BIG_OK)
2585 			goto ret;
2586 		if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK)
2587 			goto ret;
2588 		t = veiminus1;
2589 		veiminus1 = vei;
2590 		vei = t;
2591 		if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != BIG_OK)
2592 			goto ret;
2593 	}
2594 	if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK))
2595 		goto ret;
2596 	if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK))
2597 		goto ret;
2598 	if (ce != NULL)
2599 		err = big_copy(ce, vei);
2600 ret:
2601 	if (tmp.malloced) big_finish(&tmp);
2602 ret8:
2603 	if (t8.malloced) big_finish(&t8);
2604 ret7:
2605 	if (t7.malloced) big_finish(&t7);
2606 ret6:
2607 	if (t6.malloced) big_finish(&t6);
2608 ret5:
2609 	if (t5.malloced) big_finish(&t5);
2610 ret4:
2611 	if (t4.malloced) big_finish(&t4);
2612 ret3:
2613 	if (t3.malloced) big_finish(&t3);
2614 ret2:
2615 	if (t2.malloced) big_finish(&t2);
2616 ret1:
2617 	if (t1.malloced) big_finish(&t1);
2618 
2619 	return (err);
2620 }
2621