1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Configuration guide
28 * -------------------
29 *
30 * There are 4 preprocessor symbols used to configure the bignum
31 * implementation. This file contains no logic to configure based on
32 * processor; we leave that to the Makefiles to specify.
33 *
34 * USE_FLOATING_POINT
35 * Meaning: There is support for a fast floating-point implementation of
36 * Montgomery multiply.
37 *
38 * PSR_MUL
39 * Meaning: There are processor-specific versions of the low level
40 * functions to implement big_mul. Those functions are: big_mul_set_vec,
41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support
42 * for all 4 functions. You cannot pick and choose which subset of these
43 * functions to support; that would lead to a rat's nest of #ifdefs.
44 *
45 * HWCAP
46 * Meaning: Call multiply support functions through a function pointer.
47 * On x86, there are multiple implementations for different hardware
48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when
49 * a function is first used. So, the support functions are called through
50 * a function pointer. There is no need for that on Sparc, because there
51 * is only one implementation; support functions are called directly.
52 * Later, if there were some new VIS instruction, or something, and a
53 * run-time test were needed, rather than variant kernel modules and
54 * libraries, then HWCAP would be defined for Sparc, as well.
55 *
56 * UMUL64
57 * Meaning: It is safe to use generic C code that assumes the existence
58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined,
59 * then the generic code for big_mul_add_vec() must necessarily be very slow,
60 * because it must fall back to using 16 x 16 --> 32 bit multiplication.
61 *
62 */
63
64
65 #include <sys/types.h>
66 #include "bignum.h"
67
68 #ifdef _KERNEL
69 #include <sys/ddi.h>
70 #include <sys/mdesc.h>
71 #include <sys/crypto/common.h>
72
73 #include <sys/kmem.h>
74 #include <sys/param.h>
75 #include <sys/sunddi.h>
76
77 #else
78 #include <stdlib.h>
79 #include <stdio.h>
80 #include <assert.h>
81 #define ASSERT assert
82 #endif /* _KERNEL */
83
84 #ifdef __amd64
85 #ifdef _KERNEL
86 #include <sys/x86_archext.h> /* cpuid_getvendor() */
87 #include <sys/cpuvar.h>
88 #else
89 #include <sys/auxv.h> /* getisax() */
90 #endif /* _KERNEL */
91 #endif /* __amd64 */
92
93
94 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */
95 #define UI32(ui) ((uint32_t)ui)
96 #else /* size_t already 32-bits */
97 #define UI32(ui) (ui)
98 #endif
99
100
101 #ifdef _KERNEL
102 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP)
103 #define big_free(ptr, size) kmem_free(ptr, size)
104
105 /*
106 * big_realloc()
107 * Allocate memory of newsize bytes and copy oldsize bytes
108 * to the newly-allocated memory, then free the
109 * previously-allocated memory.
110 * Note: newsize must be > oldsize
111 */
112 void *
big_realloc(void * from,size_t oldsize,size_t newsize)113 big_realloc(void *from, size_t oldsize, size_t newsize)
114 {
115 void *rv;
116
117 rv = kmem_alloc(newsize, KM_NOSLEEP);
118 if (rv != NULL)
119 bcopy(from, rv, oldsize);
120 kmem_free(from, oldsize);
121 return (rv);
122 }
123
124 #else /* _KERNEL */
125
126 #ifndef MALLOC_DEBUG
127
128 #define big_malloc(size) malloc(size)
129 #define big_free(ptr, size) free(ptr)
130
131 #else
132
133 void
big_free(void * ptr,size_t size)134 big_free(void *ptr, size_t size)
135 {
136 printf("freed %d bytes at %p\n", size, ptr);
137 free(ptr);
138 }
139
140 void *
big_malloc(size_t size)141 big_malloc(size_t size)
142 {
143 void *rv;
144 rv = malloc(size);
145 printf("malloced %d bytes, addr:%p\n", size, rv);
146 return (rv);
147 }
148 #endif /* MALLOC_DEBUG */
149
150 #define big_realloc(x, y, z) realloc((x), (z))
151
152
153 /*
154 * printbignum()
155 * Print a BIGNUM type to stdout.
156 */
157 void
printbignum(char * aname,BIGNUM * a)158 printbignum(char *aname, BIGNUM *a)
159 {
160 int i;
161
162 (void) printf("\n%s\n%d\n", aname, a->sign*a->len);
163 for (i = a->len - 1; i >= 0; i--) {
164 #ifdef BIGNUM_CHUNK_32
165 (void) printf("%08x ", a->value[i]);
166 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) {
167 (void) printf("\n");
168 }
169 #else
170 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32),
171 (uint32_t)((a->value[i]) & 0xffffffff));
172 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */
173 (void) printf("\n");
174 }
175 #endif
176 }
177 (void) printf("\n");
178 }
179
180 #endif /* _KERNEL */
181
182
183 #ifdef __amd64
184 /*
185 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64).
186 * Cache the result, as the CPU can't change.
187 *
188 * Note: the userland version uses getisax() and checks for an AMD-64-only
189 * feature. The kernel version uses cpuid_getvendor().
190 */
191 static int
bignum_on_intel(void)192 bignum_on_intel(void)
193 {
194 static int cached_result = -1;
195
196 if (cached_result == -1) { /* first time */
197 #ifdef _KERNEL
198 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel);
199 #else
200 uint_t ui;
201
202 (void) getisax(&ui, 1);
203 cached_result = ((ui & AV_386_AMD_MMX) == 0);
204 #endif /* _KERNEL */
205 }
206
207 return (cached_result);
208 }
209 #endif /* __amd64 */
210
211
212 /*
213 * big_init()
214 * Initialize and allocate memory for a BIGNUM type.
215 *
216 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0)
217 *
218 * Note: call big_finish() to free memory allocated by big_init().
219 *
220 * Input:
221 * number Uninitialized memory for BIGNUM
222 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
223 *
224 * Output:
225 * number Initialized BIGNUM
226 *
227 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
228 */
229 BIG_ERR_CODE
big_init(BIGNUM * number,int size)230 big_init(BIGNUM *number, int size)
231 {
232 number->value = big_malloc(BIGNUM_WORDSIZE * size);
233 if (number->value == NULL) {
234 return (BIG_NO_MEM);
235 }
236 number->size = size;
237 number->len = 0;
238 number->sign = 1;
239 number->malloced = 1;
240 return (BIG_OK);
241 }
242
243
244 /*
245 * big_init1()
246 * Initialize and, if needed, allocate memory for a BIGNUM type.
247 * Use the buffer passed, buf, if any, instad of allocating memory
248 * if it's at least "size" bytes.
249 *
250 * Note: call big_finish() to free memory allocated by big_init().
251 *
252 * Input:
253 * number Uninitialized memory for BIGNUM
254 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM
255 * buf Buffer for storing a BIGNUM.
256 * If NULL, big_init1() will allocate a buffer
257 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf
258 *
259 * Output:
260 * number Initialized BIGNUM
261 *
262 * Return BIG_OK on success or BIG_NO_MEM for an allocation error.
263 */
264 BIG_ERR_CODE
big_init1(BIGNUM * number,int size,BIG_CHUNK_TYPE * buf,int bufsize)265 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize)
266 {
267 if ((buf == NULL) || (size > bufsize)) {
268 number->value = big_malloc(BIGNUM_WORDSIZE * size);
269 if (number->value == NULL) {
270 return (BIG_NO_MEM);
271 }
272 number->size = size;
273 number->malloced = 1;
274 } else {
275 number->value = buf;
276 number->size = bufsize;
277 number->malloced = 0;
278 }
279 number->len = 0;
280 number->sign = 1;
281
282 return (BIG_OK);
283 }
284
285
286 /*
287 * big_finish()
288 * Free memory, if any, allocated by big_init() or big_init1().
289 */
290 void
big_finish(BIGNUM * number)291 big_finish(BIGNUM *number)
292 {
293 if (number->malloced == 1) {
294 big_free(number->value, BIGNUM_WORDSIZE * number->size);
295 number->malloced = 0;
296 }
297 }
298
299
300 /*
301 * bn->size should be at least
302 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes
303 * converts from byte-big-endian format to bignum format (words in
304 * little endian order, but bytes within the words big endian)
305 */
306 void
bytestring2bignum(BIGNUM * bn,uchar_t * kn,size_t len)307 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len)
308 {
309 int i, j;
310 uint32_t offs;
311 const uint32_t slen = UI32(len);
312 BIG_CHUNK_TYPE word;
313 uchar_t *knwordp;
314
315 if (slen == 0) {
316 bn->len = 1;
317 bn->value[0] = 0;
318 return;
319 }
320
321 offs = slen % BIGNUM_WORDSIZE;
322 bn->len = slen / BIGNUM_WORDSIZE;
323
324 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
325 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]);
326 word = knwordp[0];
327 for (j = 1; j < BIGNUM_WORDSIZE; j++) {
328 word = (word << BITSINBYTE) + knwordp[j];
329 }
330 bn->value[i] = word;
331 }
332 if (offs > 0) {
333 word = kn[0];
334 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i];
335 bn->value[bn->len++] = word;
336 }
337 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) {
338 bn->len --;
339 }
340 }
341
342
343 /*
344 * copies the least significant len bytes if
345 * len < bn->len * BIGNUM_WORDSIZE
346 * converts from bignum format to byte-big-endian format.
347 * bignum format is words of type BIG_CHUNK_TYPE in little endian order.
348 */
349 void
bignum2bytestring(uchar_t * kn,BIGNUM * bn,size_t len)350 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len)
351 {
352 int i, j;
353 uint32_t offs;
354 const uint32_t slen = UI32(len);
355 BIG_CHUNK_TYPE word;
356
357 if (len < BIGNUM_WORDSIZE * bn->len) {
358 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) {
359 word = bn->value[i];
360 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
361 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
362 word & 0xff;
363 word = word >> BITSINBYTE;
364 }
365 }
366 offs = slen % BIGNUM_WORDSIZE;
367 if (offs > 0) {
368 word = bn->value[slen / BIGNUM_WORDSIZE];
369 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) {
370 kn[i - 1] = word & 0xff;
371 word = word >> BITSINBYTE;
372 }
373 }
374 } else {
375 for (i = 0; i < bn->len; i++) {
376 word = bn->value[i];
377 for (j = 0; j < BIGNUM_WORDSIZE; j++) {
378 kn[slen - BIGNUM_WORDSIZE * i - j - 1] =
379 word & 0xff;
380 word = word >> BITSINBYTE;
381 }
382 }
383 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) {
384 kn[i] = 0;
385 }
386 }
387 }
388
389
390 int
big_bitlength(BIGNUM * a)391 big_bitlength(BIGNUM *a)
392 {
393 int l = 0, b = 0;
394 BIG_CHUNK_TYPE c;
395
396 l = a->len - 1;
397 while ((l > 0) && (a->value[l] == 0)) {
398 l--;
399 }
400 b = BIG_CHUNK_SIZE;
401 c = a->value[l];
402 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) {
403 c = c << 1;
404 b--;
405 }
406
407 return (l * BIG_CHUNK_SIZE + b);
408 }
409
410
411 /*
412 * big_copy()
413 * Copy BIGNUM src to dest, allocating memory if needed.
414 */
415 BIG_ERR_CODE
big_copy(BIGNUM * dest,BIGNUM * src)416 big_copy(BIGNUM *dest, BIGNUM *src)
417 {
418 BIG_CHUNK_TYPE *newptr;
419 int i, len;
420
421 len = src->len;
422 while ((len > 1) && (src->value[len - 1] == 0)) {
423 len--;
424 }
425 src->len = len;
426 if (dest->size < len) {
427 if (dest->malloced == 1) {
428 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value,
429 BIGNUM_WORDSIZE * dest->size,
430 BIGNUM_WORDSIZE * len);
431 } else {
432 newptr = (BIG_CHUNK_TYPE *)
433 big_malloc(BIGNUM_WORDSIZE * len);
434 if (newptr != NULL) {
435 dest->malloced = 1;
436 }
437 }
438 if (newptr == NULL) {
439 return (BIG_NO_MEM);
440 }
441 dest->value = newptr;
442 dest->size = len;
443 }
444 dest->len = len;
445 dest->sign = src->sign;
446 for (i = 0; i < len; i++) {
447 dest->value[i] = src->value[i];
448 }
449
450 return (BIG_OK);
451 }
452
453
454 /*
455 * big_extend()
456 * Allocate memory to extend BIGNUM number to size bignum chunks,
457 * if not at least that size already.
458 */
459 BIG_ERR_CODE
big_extend(BIGNUM * number,int size)460 big_extend(BIGNUM *number, int size)
461 {
462 BIG_CHUNK_TYPE *newptr;
463 int i;
464
465 if (number->size >= size)
466 return (BIG_OK);
467 if (number->malloced) {
468 number->value = big_realloc(number->value,
469 BIGNUM_WORDSIZE * number->size,
470 BIGNUM_WORDSIZE * size);
471 } else {
472 newptr = big_malloc(BIGNUM_WORDSIZE * size);
473 if (newptr != NULL) {
474 for (i = 0; i < number->size; i++) {
475 newptr[i] = number->value[i];
476 }
477 }
478 number->value = newptr;
479 }
480
481 if (number->value == NULL) {
482 return (BIG_NO_MEM);
483 }
484
485 number->size = size;
486 number->malloced = 1;
487 return (BIG_OK);
488 }
489
490
491 /* returns 1 if n == 0 */
492 int
big_is_zero(BIGNUM * n)493 big_is_zero(BIGNUM *n)
494 {
495 int i, result;
496
497 result = 1;
498 for (i = 0; i < n->len; i++) {
499 if (n->value[i] != 0) {
500 result = 0;
501 }
502 }
503 return (result);
504 }
505
506
507 BIG_ERR_CODE
big_add_abs(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)508 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
509 {
510 int i, shorter, longer;
511 BIG_CHUNK_TYPE cy, ai;
512 BIG_CHUNK_TYPE *r, *a, *b, *c;
513 BIG_ERR_CODE err;
514 BIGNUM *longerarg;
515
516 if (aa->len > bb->len) {
517 shorter = bb->len;
518 longer = aa->len;
519 longerarg = aa;
520 } else {
521 shorter = aa->len;
522 longer = bb->len;
523 longerarg = bb;
524 }
525 if (result->size < longer + 1) {
526 err = big_extend(result, longer + 1);
527 if (err != BIG_OK) {
528 return (err);
529 }
530 }
531
532 r = result->value;
533 a = aa->value;
534 b = bb->value;
535 c = longerarg->value;
536 cy = 0;
537 for (i = 0; i < shorter; i++) {
538 ai = a[i];
539 r[i] = ai + b[i] + cy;
540 if (r[i] > ai) {
541 cy = 0;
542 } else if (r[i] < ai) {
543 cy = 1;
544 }
545 }
546 for (; i < longer; i++) {
547 ai = c[i];
548 r[i] = ai + cy;
549 if (r[i] >= ai) {
550 cy = 0;
551 }
552 }
553 if (cy == 1) {
554 r[i] = cy;
555 result->len = longer + 1;
556 } else {
557 result->len = longer;
558 }
559 result->sign = 1;
560 return (BIG_OK);
561 }
562
563
564 /* caller must make sure that result has at least len words allocated */
565 void
big_sub_vec(BIG_CHUNK_TYPE * r,BIG_CHUNK_TYPE * a,BIG_CHUNK_TYPE * b,int len)566 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len)
567 {
568 int i;
569 BIG_CHUNK_TYPE cy, ai;
570
571 cy = 1;
572 for (i = 0; i < len; i++) {
573 ai = a[i];
574 r[i] = ai + (~b[i]) + cy;
575 if (r[i] > ai) {
576 cy = 0;
577 } else if (r[i] < ai) {
578 cy = 1;
579 }
580 }
581 }
582
583
584 /* result=aa-bb it is assumed that aa>=bb */
585 BIG_ERR_CODE
big_sub_pos(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)586 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
587 {
588 int i, shorter;
589 BIG_CHUNK_TYPE cy = 1, ai;
590 BIG_CHUNK_TYPE *r, *a, *b;
591 BIG_ERR_CODE err = BIG_OK;
592
593 if (aa->len > bb->len) {
594 shorter = bb->len;
595 } else {
596 shorter = aa->len;
597 }
598 if (result->size < aa->len) {
599 err = big_extend(result, aa->len);
600 if (err != BIG_OK) {
601 return (err);
602 }
603 }
604
605 r = result->value;
606 a = aa->value;
607 b = bb->value;
608 result->len = aa->len;
609 cy = 1;
610 for (i = 0; i < shorter; i++) {
611 ai = a[i];
612 r[i] = ai + (~b[i]) + cy;
613 if (r[i] > ai) {
614 cy = 0;
615 } else if (r[i] < ai) {
616 cy = 1;
617 }
618 }
619 for (; i < aa->len; i++) {
620 ai = a[i];
621 r[i] = ai + (~0) + cy;
622 if (r[i] < ai) {
623 cy = 1;
624 }
625 }
626 result->sign = 1;
627
628 if (cy == 0) {
629 return (BIG_INVALID_ARGS);
630 } else {
631 return (BIG_OK);
632 }
633 }
634
635
636 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */
637 int
big_cmp_abs(BIGNUM * aa,BIGNUM * bb)638 big_cmp_abs(BIGNUM *aa, BIGNUM *bb)
639 {
640 int i;
641
642 if (aa->len > bb->len) {
643 for (i = aa->len - 1; i > bb->len - 1; i--) {
644 if (aa->value[i] > 0) {
645 return (1);
646 }
647 }
648 } else if (aa->len < bb->len) {
649 for (i = bb->len - 1; i > aa->len - 1; i--) {
650 if (bb->value[i] > 0) {
651 return (-1);
652 }
653 }
654 } else {
655 i = aa->len - 1;
656 }
657 for (; i >= 0; i--) {
658 if (aa->value[i] > bb->value[i]) {
659 return (1);
660 } else if (aa->value[i] < bb->value[i]) {
661 return (-1);
662 }
663 }
664
665 return (0);
666 }
667
668
669 BIG_ERR_CODE
big_sub(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)670 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
671 {
672 BIG_ERR_CODE err;
673
674 if ((bb->sign == -1) && (aa->sign == 1)) {
675 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
676 return (err);
677 }
678 result->sign = 1;
679 } else if ((aa->sign == -1) && (bb->sign == 1)) {
680 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
681 return (err);
682 }
683 result->sign = -1;
684 } else if ((aa->sign == 1) && (bb->sign == 1)) {
685 if (big_cmp_abs(aa, bb) >= 0) {
686 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
687 return (err);
688 }
689 result->sign = 1;
690 } else {
691 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
692 return (err);
693 }
694 result->sign = -1;
695 }
696 } else {
697 if (big_cmp_abs(aa, bb) >= 0) {
698 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
699 return (err);
700 }
701 result->sign = -1;
702 } else {
703 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
704 return (err);
705 }
706 result->sign = 1;
707 }
708 }
709 return (BIG_OK);
710 }
711
712
713 BIG_ERR_CODE
big_add(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)714 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
715 {
716 BIG_ERR_CODE err;
717
718 if ((bb->sign == -1) && (aa->sign == -1)) {
719 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
720 return (err);
721 }
722 result->sign = -1;
723 } else if ((aa->sign == 1) && (bb->sign == 1)) {
724 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) {
725 return (err);
726 }
727 result->sign = 1;
728 } else if ((aa->sign == 1) && (bb->sign == -1)) {
729 if (big_cmp_abs(aa, bb) >= 0) {
730 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
731 return (err);
732 }
733 result->sign = 1;
734 } else {
735 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
736 return (err);
737 }
738 result->sign = -1;
739 }
740 } else {
741 if (big_cmp_abs(aa, bb) >= 0) {
742 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) {
743 return (err);
744 }
745 result->sign = -1;
746 } else {
747 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) {
748 return (err);
749 }
750 result->sign = 1;
751 }
752 }
753 return (BIG_OK);
754 }
755
756
757 /* result = aa/2 */
758 BIG_ERR_CODE
big_half_pos(BIGNUM * result,BIGNUM * aa)759 big_half_pos(BIGNUM *result, BIGNUM *aa)
760 {
761 BIG_ERR_CODE err;
762 int i;
763 BIG_CHUNK_TYPE cy, cy1;
764 BIG_CHUNK_TYPE *a, *r;
765
766 if (result->size < aa->len) {
767 err = big_extend(result, aa->len);
768 if (err != BIG_OK) {
769 return (err);
770 }
771 }
772
773 result->len = aa->len;
774 a = aa->value;
775 r = result->value;
776 cy = 0;
777 for (i = aa->len - 1; i >= 0; i--) {
778 cy1 = a[i] << (BIG_CHUNK_SIZE - 1);
779 r[i] = (cy | (a[i] >> 1));
780 cy = cy1;
781 }
782 if (r[result->len - 1] == 0) {
783 result->len--;
784 }
785
786 return (BIG_OK);
787 }
788
789 /* result = aa*2 */
790 BIG_ERR_CODE
big_double(BIGNUM * result,BIGNUM * aa)791 big_double(BIGNUM *result, BIGNUM *aa)
792 {
793 BIG_ERR_CODE err;
794 int i, rsize;
795 BIG_CHUNK_TYPE cy, cy1;
796 BIG_CHUNK_TYPE *a, *r;
797
798 if ((aa->len > 0) &&
799 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) {
800 rsize = aa->len + 1;
801 } else {
802 rsize = aa->len;
803 }
804
805 if (result->size < rsize) {
806 err = big_extend(result, rsize);
807 if (err != BIG_OK)
808 return (err);
809 }
810
811 a = aa->value;
812 r = result->value;
813 if (rsize == aa->len + 1) {
814 r[rsize - 1] = 1;
815 }
816 cy = 0;
817 for (i = 0; i < aa->len; i++) {
818 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1);
819 r[i] = (cy | (a[i] << 1));
820 cy = cy1;
821 }
822 result->len = rsize;
823 return (BIG_OK);
824 }
825
826
827 /*
828 * returns aa mod b, aa must be nonneg, b must be a max
829 * (BIG_CHUNK_SIZE / 2)-bit integer
830 */
831 static uint32_t
big_modhalf_pos(BIGNUM * aa,uint32_t b)832 big_modhalf_pos(BIGNUM *aa, uint32_t b)
833 {
834 int i;
835 BIG_CHUNK_TYPE rem;
836
837 if (aa->len == 0) {
838 return (0);
839 }
840 rem = aa->value[aa->len - 1] % b;
841 for (i = aa->len - 2; i >= 0; i--) {
842 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
843 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b;
844 rem = ((rem << (BIG_CHUNK_SIZE / 2)) |
845 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b;
846 }
847
848 return ((uint32_t)rem);
849 }
850
851
852 /*
853 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb
854 * result->size should be at least aa->len at entry
855 * aa, bb, and result should be positive
856 */
857 void
big_sub_pos_high(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)858 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
859 {
860 int i, lendiff;
861 BIGNUM res1, aa1;
862
863 lendiff = aa->len - bb->len;
864 res1.size = result->size - lendiff;
865 res1.malloced = 0;
866 res1.value = result->value + lendiff;
867 aa1.size = aa->size - lendiff;
868 aa1.value = aa->value + lendiff;
869 aa1.len = bb->len;
870 aa1.sign = 1;
871 (void) big_sub_pos(&res1, &aa1, bb);
872 if (result->value != aa->value) {
873 for (i = 0; i < lendiff; i++) {
874 result->value[i] = aa->value[i];
875 }
876 }
877 result->len = aa->len;
878 }
879
880
881 /*
882 * returns 1, 0, or -1 depending on whether |aa| > , ==, or <
883 * (2^BIG_CHUNK_SIZE)^lendiff * |bb|
884 * aa->len should be >= bb->len
885 */
886 int
big_cmp_abs_high(BIGNUM * aa,BIGNUM * bb)887 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb)
888 {
889 int lendiff;
890 BIGNUM aa1;
891
892 lendiff = aa->len - bb->len;
893 aa1.len = bb->len;
894 aa1.size = aa->size - lendiff;
895 aa1.malloced = 0;
896 aa1.value = aa->value + lendiff;
897 return (big_cmp_abs(&aa1, bb));
898 }
899
900
901 /*
902 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer.
903 * result should have enough space allocated.
904 */
905 static void
big_mulhalf_low(BIGNUM * result,BIGNUM * aa,BIG_CHUNK_TYPE b)906 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
907 {
908 int i;
909 BIG_CHUNK_TYPE t1, t2, ai, cy;
910 BIG_CHUNK_TYPE *a, *r;
911
912 a = aa->value;
913 r = result->value;
914 cy = 0;
915 for (i = 0; i < aa->len; i++) {
916 ai = a[i];
917 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
918 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
919 (t1 >> (BIG_CHUNK_SIZE / 2));
920 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) |
921 (t2 << (BIG_CHUNK_SIZE / 2));
922 cy = t2 >> (BIG_CHUNK_SIZE / 2);
923 }
924 r[i] = cy;
925 result->len = aa->len + 1;
926 result->sign = aa->sign;
927 }
928
929
930 /*
931 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max.
932 * (BIG_CHUNK_SIZE / 2)-bit positive integer.
933 * result should have enough space allocated.
934 */
935 static void
big_mulhalf_high(BIGNUM * result,BIGNUM * aa,BIG_CHUNK_TYPE b)936 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b)
937 {
938 int i;
939 BIG_CHUNK_TYPE t1, t2, ai, cy, ri;
940 BIG_CHUNK_TYPE *a, *r;
941
942 a = aa->value;
943 r = result->value;
944 cy = 0;
945 ri = 0;
946 for (i = 0; i < aa->len; i++) {
947 ai = a[i];
948 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy;
949 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b +
950 (t1 >> (BIG_CHUNK_SIZE / 2));
951 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri;
952 ri = t2 & BIG_CHUNK_LOWHALFBITS;
953 cy = t2 >> (BIG_CHUNK_SIZE / 2);
954 }
955 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri;
956 result->len = aa->len + 1;
957 result->sign = aa->sign;
958 }
959
960
961 /* it is assumed that result->size is big enough */
962 void
big_shiftleft(BIGNUM * result,BIGNUM * aa,int offs)963 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs)
964 {
965 int i;
966 BIG_CHUNK_TYPE cy, ai;
967
968 if (offs == 0) {
969 if (result != aa) {
970 (void) big_copy(result, aa);
971 }
972 return;
973 }
974 cy = 0;
975 for (i = 0; i < aa->len; i++) {
976 ai = aa->value[i];
977 result->value[i] = (ai << offs) | cy;
978 cy = ai >> (BIG_CHUNK_SIZE - offs);
979 }
980 if (cy != 0) {
981 result->len = aa->len + 1;
982 result->value[result->len - 1] = cy;
983 } else {
984 result->len = aa->len;
985 }
986 result->sign = aa->sign;
987 }
988
989
990 /* it is assumed that result->size is big enough */
991 void
big_shiftright(BIGNUM * result,BIGNUM * aa,int offs)992 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs)
993 {
994 int i;
995 BIG_CHUNK_TYPE cy, ai;
996
997 if (offs == 0) {
998 if (result != aa) {
999 (void) big_copy(result, aa);
1000 }
1001 return;
1002 }
1003 cy = aa->value[0] >> offs;
1004 for (i = 1; i < aa->len; i++) {
1005 ai = aa->value[i];
1006 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy;
1007 cy = ai >> offs;
1008 }
1009 result->len = aa->len;
1010 result->value[result->len - 1] = cy;
1011 result->sign = aa->sign;
1012 }
1013
1014
1015 /*
1016 * result = aa/bb remainder = aa mod bb
1017 * it is assumed that aa and bb are positive
1018 */
1019 BIG_ERR_CODE
big_div_pos(BIGNUM * result,BIGNUM * remainder,BIGNUM * aa,BIGNUM * bb)1020 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb)
1021 {
1022 BIG_ERR_CODE err = BIG_OK;
1023 int i, alen, blen, tlen, rlen, offs;
1024 BIG_CHUNK_TYPE higha, highb, coeff;
1025 BIG_CHUNK_TYPE *a, *b;
1026 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2;
1027 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1028 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
1029 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE];
1030 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE];
1031 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE];
1032
1033 a = aa->value;
1034 b = bb->value;
1035 alen = aa->len;
1036 blen = bb->len;
1037 while ((alen > 1) && (a[alen - 1] == 0)) {
1038 alen = alen - 1;
1039 }
1040 aa->len = alen;
1041 while ((blen > 1) && (b[blen - 1] == 0)) {
1042 blen = blen - 1;
1043 }
1044 bb->len = blen;
1045 if ((blen == 1) && (b[0] == 0)) {
1046 return (BIG_DIV_BY_0);
1047 }
1048
1049 if (big_cmp_abs(aa, bb) < 0) {
1050 if ((remainder != NULL) &&
1051 ((err = big_copy(remainder, aa)) != BIG_OK)) {
1052 return (err);
1053 }
1054 if (result != NULL) {
1055 result->len = 1;
1056 result->sign = 1;
1057 result->value[0] = 0;
1058 }
1059 return (BIG_OK);
1060 }
1061
1062 if ((err = big_init1(&bblow, blen + 1,
1063 bblowvalue, arraysize(bblowvalue))) != BIG_OK)
1064 return (err);
1065
1066 if ((err = big_init1(&bbhigh, blen + 1,
1067 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK)
1068 goto ret1;
1069
1070 if ((err = big_init1(&tmp1, alen + 2,
1071 tmp1value, arraysize(tmp1value))) != BIG_OK)
1072 goto ret2;
1073
1074 if ((err = big_init1(&tmp2, blen + 2,
1075 tmp2value, arraysize(tmp2value))) != BIG_OK)
1076 goto ret3;
1077
1078 if ((err = big_init1(&tresult, alen - blen + 2,
1079 tresultvalue, arraysize(tresultvalue))) != BIG_OK)
1080 goto ret4;
1081
1082 offs = 0;
1083 highb = b[blen - 1];
1084 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) {
1085 highb = highb >> (BIG_CHUNK_SIZE / 2);
1086 offs = (BIG_CHUNK_SIZE / 2);
1087 }
1088 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) {
1089 highb = highb << 1;
1090 offs++;
1091 }
1092
1093 big_shiftleft(&bblow, bb, offs);
1094
1095 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) {
1096 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1097 } else {
1098 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2);
1099 }
1100 if (bbhigh.value[bbhigh.len - 1] == 0) {
1101 bbhigh.len--;
1102 } else {
1103 bbhigh.value[bbhigh.len] = 0;
1104 }
1105
1106 highb = bblow.value[bblow.len - 1];
1107
1108 big_shiftleft(&tmp1, aa, offs);
1109 rlen = tmp1.len - bblow.len + 1;
1110 tresult.len = rlen;
1111
1112 tmp1.len++;
1113 tlen = tmp1.len;
1114 tmp1.value[tmp1.len - 1] = 0;
1115 for (i = 0; i < rlen; i++) {
1116 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) +
1117 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2));
1118 coeff = higha / (highb + 1);
1119 big_mulhalf_high(&tmp2, &bblow, coeff);
1120 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1121 bbhigh.len++;
1122 while (tmp1.value[tlen - 1] > 0) {
1123 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1124 coeff++;
1125 }
1126 bbhigh.len--;
1127 tlen--;
1128 tmp1.len--;
1129 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) {
1130 big_sub_pos_high(&tmp1, &tmp1, &bbhigh);
1131 coeff++;
1132 }
1133 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2);
1134 higha = tmp1.value[tlen - 1];
1135 coeff = higha / (highb + 1);
1136 big_mulhalf_low(&tmp2, &bblow, coeff);
1137 tmp2.len--;
1138 big_sub_pos_high(&tmp1, &tmp1, &tmp2);
1139 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) {
1140 big_sub_pos_high(&tmp1, &tmp1, &bblow);
1141 coeff++;
1142 }
1143 tresult.value[rlen - i - 1] =
1144 tresult.value[rlen - i - 1] + coeff;
1145 }
1146
1147 big_shiftright(&tmp1, &tmp1, offs);
1148
1149 err = BIG_OK;
1150
1151 if ((remainder != NULL) &&
1152 ((err = big_copy(remainder, &tmp1)) != BIG_OK))
1153 goto ret;
1154
1155 if (result != NULL)
1156 err = big_copy(result, &tresult);
1157
1158 ret:
1159 big_finish(&tresult);
1160 ret4:
1161 big_finish(&tmp1);
1162 ret3:
1163 big_finish(&tmp2);
1164 ret2:
1165 big_finish(&bbhigh);
1166 ret1:
1167 big_finish(&bblow);
1168 return (err);
1169 }
1170
1171
1172 /*
1173 * If there is no processor-specific integer implementation of
1174 * the lower level multiply functions, then this code is provided
1175 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and
1176 * big_sqr_vec().
1177 *
1178 * There are two generic implementations. One that assumes that
1179 * there is hardware and C compiler support for a 32 x 32 --> 64
1180 * bit unsigned multiply, but otherwise is not specific to any
1181 * processor, platform, or ISA.
1182 *
1183 * The other makes very few assumptions about hardware capabilities.
1184 * It does not even assume that there is any implementation of a
1185 * 32 x 32 --> 64 bit multiply that is accessible to C code and
1186 * appropriate to use. It falls constructs 32 x 32 --> 64 bit
1187 * multiplies from 16 x 16 --> 32 bit multiplies.
1188 *
1189 */
1190
1191 #if !defined(PSR_MUL)
1192
1193 #ifdef UMUL64
1194
1195 #if (BIG_CHUNK_SIZE == 32)
1196
1197 #define UNROLL8
1198
1199 #define MUL_SET_VEC_ROUND_PREFETCH(R) \
1200 p = pf * d; \
1201 pf = (uint64_t)a[R + 1]; \
1202 t = p + cy; \
1203 r[R] = (uint32_t)t; \
1204 cy = t >> 32
1205
1206 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \
1207 p = pf * d; \
1208 t = p + cy; \
1209 r[R] = (uint32_t)t; \
1210 cy = t >> 32
1211
1212 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \
1213 t = (uint64_t)r[R]; \
1214 p = pf * d; \
1215 pf = (uint64_t)a[R + 1]; \
1216 t = p + t + cy; \
1217 r[R] = (uint32_t)t; \
1218 cy = t >> 32
1219
1220 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \
1221 t = (uint64_t)r[R]; \
1222 p = pf * d; \
1223 t = p + t + cy; \
1224 r[R] = (uint32_t)t; \
1225 cy = t >> 32
1226
1227 #ifdef UNROLL8
1228
1229 #define UNROLL 8
1230
1231 /*
1232 * r = a * b
1233 * where r and a are vectors; b is a single 32-bit digit
1234 */
1235
1236 uint32_t
big_mul_set_vec(uint32_t * r,uint32_t * a,int len,uint32_t b)1237 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1238 {
1239 uint64_t d, pf, p, t, cy;
1240
1241 if (len == 0)
1242 return (0);
1243 cy = 0;
1244 d = (uint64_t)b;
1245 pf = (uint64_t)a[0];
1246 while (len > UNROLL) {
1247 MUL_SET_VEC_ROUND_PREFETCH(0);
1248 MUL_SET_VEC_ROUND_PREFETCH(1);
1249 MUL_SET_VEC_ROUND_PREFETCH(2);
1250 MUL_SET_VEC_ROUND_PREFETCH(3);
1251 MUL_SET_VEC_ROUND_PREFETCH(4);
1252 MUL_SET_VEC_ROUND_PREFETCH(5);
1253 MUL_SET_VEC_ROUND_PREFETCH(6);
1254 MUL_SET_VEC_ROUND_PREFETCH(7);
1255 r += UNROLL;
1256 a += UNROLL;
1257 len -= UNROLL;
1258 }
1259 if (len == UNROLL) {
1260 MUL_SET_VEC_ROUND_PREFETCH(0);
1261 MUL_SET_VEC_ROUND_PREFETCH(1);
1262 MUL_SET_VEC_ROUND_PREFETCH(2);
1263 MUL_SET_VEC_ROUND_PREFETCH(3);
1264 MUL_SET_VEC_ROUND_PREFETCH(4);
1265 MUL_SET_VEC_ROUND_PREFETCH(5);
1266 MUL_SET_VEC_ROUND_PREFETCH(6);
1267 MUL_SET_VEC_ROUND_NOPREFETCH(7);
1268 return ((uint32_t)cy);
1269 }
1270 while (len > 1) {
1271 MUL_SET_VEC_ROUND_PREFETCH(0);
1272 ++r;
1273 ++a;
1274 --len;
1275 }
1276 if (len > 0) {
1277 MUL_SET_VEC_ROUND_NOPREFETCH(0);
1278 }
1279 return ((uint32_t)cy);
1280 }
1281
1282 /*
1283 * r += a * b
1284 * where r and a are vectors; b is a single 32-bit digit
1285 */
1286
1287 uint32_t
big_mul_add_vec(uint32_t * r,uint32_t * a,int len,uint32_t b)1288 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b)
1289 {
1290 uint64_t d, pf, p, t, cy;
1291
1292 if (len == 0)
1293 return (0);
1294 cy = 0;
1295 d = (uint64_t)b;
1296 pf = (uint64_t)a[0];
1297 while (len > 8) {
1298 MUL_ADD_VEC_ROUND_PREFETCH(0);
1299 MUL_ADD_VEC_ROUND_PREFETCH(1);
1300 MUL_ADD_VEC_ROUND_PREFETCH(2);
1301 MUL_ADD_VEC_ROUND_PREFETCH(3);
1302 MUL_ADD_VEC_ROUND_PREFETCH(4);
1303 MUL_ADD_VEC_ROUND_PREFETCH(5);
1304 MUL_ADD_VEC_ROUND_PREFETCH(6);
1305 MUL_ADD_VEC_ROUND_PREFETCH(7);
1306 r += 8;
1307 a += 8;
1308 len -= 8;
1309 }
1310 if (len == 8) {
1311 MUL_ADD_VEC_ROUND_PREFETCH(0);
1312 MUL_ADD_VEC_ROUND_PREFETCH(1);
1313 MUL_ADD_VEC_ROUND_PREFETCH(2);
1314 MUL_ADD_VEC_ROUND_PREFETCH(3);
1315 MUL_ADD_VEC_ROUND_PREFETCH(4);
1316 MUL_ADD_VEC_ROUND_PREFETCH(5);
1317 MUL_ADD_VEC_ROUND_PREFETCH(6);
1318 MUL_ADD_VEC_ROUND_NOPREFETCH(7);
1319 return ((uint32_t)cy);
1320 }
1321 while (len > 1) {
1322 MUL_ADD_VEC_ROUND_PREFETCH(0);
1323 ++r;
1324 ++a;
1325 --len;
1326 }
1327 if (len > 0) {
1328 MUL_ADD_VEC_ROUND_NOPREFETCH(0);
1329 }
1330 return ((uint32_t)cy);
1331 }
1332 #endif /* UNROLL8 */
1333
1334 void
big_sqr_vec(uint32_t * r,uint32_t * a,int len)1335 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1336 {
1337 uint32_t *tr, *ta;
1338 int tlen, row, col;
1339 uint64_t p, s, t, t2, cy;
1340 uint32_t d;
1341
1342 tr = r + 1;
1343 ta = a;
1344 tlen = len - 1;
1345 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]);
1346 while (--tlen > 0) {
1347 tr += 2;
1348 ++ta;
1349 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]);
1350 }
1351 s = (uint64_t)a[0];
1352 s = s * s;
1353 r[0] = (uint32_t)s;
1354 cy = s >> 32;
1355 p = ((uint64_t)r[1] << 1) + cy;
1356 r[1] = (uint32_t)p;
1357 cy = p >> 32;
1358 row = 1;
1359 col = 2;
1360 while (row < len) {
1361 s = (uint64_t)a[row];
1362 s = s * s;
1363 p = (uint64_t)r[col] << 1;
1364 t = p + s;
1365 d = (uint32_t)t;
1366 t2 = (uint64_t)d + cy;
1367 r[col] = (uint32_t)t2;
1368 cy = (t >> 32) + (t2 >> 32);
1369 if (row == len - 1)
1370 break;
1371 p = ((uint64_t)r[col + 1] << 1) + cy;
1372 r[col + 1] = (uint32_t)p;
1373 cy = p >> 32;
1374 ++row;
1375 col += 2;
1376 }
1377 r[col + 1] = (uint32_t)cy;
1378 }
1379
1380 #else /* BIG_CHUNK_SIZE == 64 */
1381
1382 /*
1383 * r = r + a * digit, r and a are vectors of length len
1384 * returns the carry digit
1385 */
1386 BIG_CHUNK_TYPE
big_mul_add_vec(BIG_CHUNK_TYPE * r,BIG_CHUNK_TYPE * a,int len,BIG_CHUNK_TYPE digit)1387 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1388 BIG_CHUNK_TYPE digit)
1389 {
1390 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh;
1391 int i;
1392
1393 cy1 = 0;
1394 dlow = digit & BIG_CHUNK_LOWHALFBITS;
1395 dhigh = digit >> (BIG_CHUNK_SIZE / 2);
1396 for (i = 0; i < len; i++) {
1397 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1398 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1399 (r[i] & BIG_CHUNK_LOWHALFBITS);
1400 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1401 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1402 (r[i] >> (BIG_CHUNK_SIZE / 2));
1403 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) |
1404 (cy1 << (BIG_CHUNK_SIZE / 2));
1405 }
1406 retcy = cy1 >> (BIG_CHUNK_SIZE / 2);
1407
1408 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS;
1409 for (i = 0; i < len - 1; i++) {
1410 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1411 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) +
1412 (r[i] >> (BIG_CHUNK_SIZE / 2));
1413 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1414 (cy << (BIG_CHUNK_SIZE / 2));
1415 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) +
1416 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) +
1417 (r[i + 1] & BIG_CHUNK_LOWHALFBITS);
1418 }
1419 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) +
1420 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) +
1421 (r[len - 1] >> (BIG_CHUNK_SIZE / 2));
1422 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) |
1423 (cy << (BIG_CHUNK_SIZE / 2));
1424 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) +
1425 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy;
1426
1427 return (retcy);
1428 }
1429
1430
1431 /*
1432 * r = a * digit, r and a are vectors of length len
1433 * returns the carry digit
1434 */
1435 BIG_CHUNK_TYPE
big_mul_set_vec(BIG_CHUNK_TYPE * r,BIG_CHUNK_TYPE * a,int len,BIG_CHUNK_TYPE digit)1436 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len,
1437 BIG_CHUNK_TYPE digit)
1438 {
1439 int i;
1440
1441 ASSERT(r != a);
1442 for (i = 0; i < len; i++) {
1443 r[i] = 0;
1444 }
1445 return (big_mul_add_vec(r, a, len, digit));
1446 }
1447
1448 void
big_sqr_vec(BIG_CHUNK_TYPE * r,BIG_CHUNK_TYPE * a,int len)1449 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len)
1450 {
1451 int i;
1452
1453 ASSERT(r != a);
1454 r[len] = big_mul_set_vec(r, a, len, a[0]);
1455 for (i = 1; i < len; ++i)
1456 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1457 }
1458
1459 #endif /* BIG_CHUNK_SIZE == 32/64 */
1460
1461
1462 #else /* ! UMUL64 */
1463
1464 #if (BIG_CHUNK_SIZE != 32)
1465 #error "Don't use 64-bit chunks without defining UMUL64"
1466 #endif
1467
1468
1469 /*
1470 * r = r + a * digit, r and a are vectors of length len
1471 * returns the carry digit
1472 */
1473 uint32_t
big_mul_add_vec(uint32_t * r,uint32_t * a,int len,uint32_t digit)1474 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1475 {
1476 uint32_t cy, cy1, retcy, dlow, dhigh;
1477 int i;
1478
1479 cy1 = 0;
1480 dlow = digit & 0xffff;
1481 dhigh = digit >> 16;
1482 for (i = 0; i < len; i++) {
1483 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff);
1484 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16);
1485 r[i] = (cy & 0xffff) | (cy1 << 16);
1486 }
1487 retcy = cy1 >> 16;
1488
1489 cy1 = r[0] & 0xffff;
1490 for (i = 0; i < len - 1; i++) {
1491 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16);
1492 r[i] = (cy1 & 0xffff) | (cy << 16);
1493 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff);
1494 }
1495 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16);
1496 r[len - 1] = (cy1 & 0xffff) | (cy << 16);
1497 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy;
1498
1499 return (retcy);
1500 }
1501
1502
1503 /*
1504 * r = a * digit, r and a are vectors of length len
1505 * returns the carry digit
1506 */
1507 uint32_t
big_mul_set_vec(uint32_t * r,uint32_t * a,int len,uint32_t digit)1508 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit)
1509 {
1510 int i;
1511
1512 ASSERT(r != a);
1513 for (i = 0; i < len; i++) {
1514 r[i] = 0;
1515 }
1516
1517 return (big_mul_add_vec(r, a, len, digit));
1518 }
1519
1520 void
big_sqr_vec(uint32_t * r,uint32_t * a,int len)1521 big_sqr_vec(uint32_t *r, uint32_t *a, int len)
1522 {
1523 int i;
1524
1525 ASSERT(r != a);
1526 r[len] = big_mul_set_vec(r, a, len, a[0]);
1527 for (i = 1; i < len; ++i)
1528 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]);
1529 }
1530
1531 #endif /* UMUL64 */
1532
1533 void
big_mul_vec(BIG_CHUNK_TYPE * r,BIG_CHUNK_TYPE * a,int alen,BIG_CHUNK_TYPE * b,int blen)1534 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen,
1535 BIG_CHUNK_TYPE *b, int blen)
1536 {
1537 int i;
1538
1539 r[alen] = big_mul_set_vec(r, a, alen, b[0]);
1540 for (i = 1; i < blen; ++i)
1541 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]);
1542 }
1543
1544
1545 #endif /* ! PSR_MUL */
1546
1547
1548 /*
1549 * result = aa * bb result->value should be big enough to hold the result
1550 *
1551 * Implementation: Standard grammar school algorithm
1552 *
1553 */
1554 BIG_ERR_CODE
big_mul(BIGNUM * result,BIGNUM * aa,BIGNUM * bb)1555 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb)
1556 {
1557 BIGNUM tmp1;
1558 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1559 BIG_CHUNK_TYPE *r, *t, *a, *b;
1560 BIG_ERR_CODE err;
1561 int i, alen, blen, rsize, sign, diff;
1562
1563 if (aa == bb) {
1564 diff = 0;
1565 } else {
1566 diff = big_cmp_abs(aa, bb);
1567 if (diff < 0) {
1568 BIGNUM *tt;
1569 tt = aa;
1570 aa = bb;
1571 bb = tt;
1572 }
1573 }
1574 a = aa->value;
1575 b = bb->value;
1576 alen = aa->len;
1577 blen = bb->len;
1578 while ((alen > 1) && (a[alen - 1] == 0)) {
1579 alen--;
1580 }
1581 aa->len = alen;
1582 while ((blen > 1) && (b[blen - 1] == 0)) {
1583 blen--;
1584 }
1585 bb->len = blen;
1586
1587 rsize = alen + blen;
1588 ASSERT(rsize > 0);
1589 if (result->size < rsize) {
1590 err = big_extend(result, rsize);
1591 if (err != BIG_OK) {
1592 return (err);
1593 }
1594 /* aa or bb might be an alias to result */
1595 a = aa->value;
1596 b = bb->value;
1597 }
1598 r = result->value;
1599
1600 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) {
1601 result->len = 1;
1602 result->sign = 1;
1603 r[0] = 0;
1604 return (BIG_OK);
1605 }
1606 sign = aa->sign * bb->sign;
1607 if ((alen == 1) && (a[0] == 1)) {
1608 for (i = 0; i < blen; i++) {
1609 r[i] = b[i];
1610 }
1611 result->len = blen;
1612 result->sign = sign;
1613 return (BIG_OK);
1614 }
1615 if ((blen == 1) && (b[0] == 1)) {
1616 for (i = 0; i < alen; i++) {
1617 r[i] = a[i];
1618 }
1619 result->len = alen;
1620 result->sign = sign;
1621 return (BIG_OK);
1622 }
1623
1624 if ((err = big_init1(&tmp1, rsize,
1625 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1626 return (err);
1627 }
1628 t = tmp1.value;
1629
1630 for (i = 0; i < rsize; i++) {
1631 t[i] = 0;
1632 }
1633
1634 if (diff == 0 && alen > 2) {
1635 BIG_SQR_VEC(t, a, alen);
1636 } else if (blen > 0) {
1637 BIG_MUL_VEC(t, a, alen, b, blen);
1638 }
1639
1640 if (t[rsize - 1] == 0) {
1641 tmp1.len = rsize - 1;
1642 } else {
1643 tmp1.len = rsize;
1644 }
1645
1646 err = big_copy(result, &tmp1);
1647
1648 result->sign = sign;
1649
1650 big_finish(&tmp1);
1651
1652 return (err);
1653 }
1654
1655
1656 /*
1657 * big_mont_mul()
1658 * Montgomery multiplication.
1659 *
1660 * Caller must ensure that a < n, b < n, ret->size >= 2 * n->len + 1,
1661 * and that ret is not n.
1662 */
1663 BIG_ERR_CODE
big_mont_mul(BIGNUM * ret,BIGNUM * a,BIGNUM * b,BIGNUM * n,BIG_CHUNK_TYPE n0)1664 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0)
1665 {
1666 int i, j, nlen, needsubtract;
1667 BIG_CHUNK_TYPE *nn, *rr, *rrplusi;
1668 BIG_CHUNK_TYPE digit, c;
1669 BIG_ERR_CODE err;
1670 #ifdef __amd64
1671 #define BIG_CPU_UNKNOWN 0
1672 #define BIG_CPU_AMD 1
1673 #define BIG_CPU_INTEL 2
1674 static int big_cpu = BIG_CPU_UNKNOWN;
1675 BIG_CHUNK_TYPE carry[BIGTMPSIZE];
1676
1677 if (big_cpu == BIG_CPU_UNKNOWN) {
1678 big_cpu = 1 + bignum_on_intel();
1679 }
1680 #endif /* __amd64 */
1681
1682 nlen = n->len;
1683 nn = n->value;
1684
1685 rr = ret->value;
1686
1687 if ((err = big_mul(ret, a, b)) != BIG_OK) {
1688 return (err);
1689 }
1690
1691 rr = ret->value;
1692 for (i = ret->len; i < 2 * nlen + 1; i++) {
1693 rr[i] = 0;
1694 }
1695
1696 #ifdef __amd64 /* pipelining optimization for Intel 64, but not AMD64 */
1697 if ((big_cpu == BIG_CPU_INTEL) && (nlen <= BIGTMPSIZE)) {
1698 /*
1699 * Perform the following in two for loops to reduce the
1700 * dependency between computing the carryover bits with
1701 * BIG_MUL_ADD_VEC() and adding them, thus improving pipelining.
1702 */
1703 for (i = 0; i < nlen; i++) {
1704 rrplusi = rr + i;
1705 digit = *rrplusi * n0;
1706 carry[i] = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit);
1707 }
1708 for (i = 0; i < nlen; i++) {
1709 j = i + nlen;
1710 rr[j] += carry[i];
1711 while (rr[j] < carry[i]) {
1712 rr[++j] += 1;
1713 carry[i] = 1;
1714 }
1715 }
1716 } else
1717 #endif /* __amd64 */
1718 { /* no pipelining optimization */
1719 for (i = 0; i < nlen; i++) {
1720 rrplusi = rr + i;
1721 digit = *rrplusi * n0;
1722 c = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit);
1723 j = i + nlen;
1724 rr[j] += c;
1725 while (rr[j] < c) {
1726 rr[++j] += 1;
1727 c = 1;
1728 }
1729 }
1730 }
1731
1732 needsubtract = 0;
1733 if ((rr[2 * nlen] != 0))
1734 needsubtract = 1;
1735 else {
1736 for (i = 2 * nlen - 1; i >= nlen; i--) {
1737 if (rr[i] > nn[i - nlen]) {
1738 needsubtract = 1;
1739 break;
1740 } else if (rr[i] < nn[i - nlen]) {
1741 break;
1742 }
1743 }
1744 }
1745 if (needsubtract)
1746 big_sub_vec(rr, rr + nlen, nn, nlen);
1747 else {
1748 for (i = 0; i < nlen; i++) {
1749 rr[i] = rr[i + nlen];
1750 }
1751 }
1752
1753 /* Remove leading zeros, but keep at least 1 digit: */
1754 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--)
1755 ;
1756 ret->len = i + 1;
1757
1758 return (BIG_OK);
1759 }
1760
1761
1762 BIG_CHUNK_TYPE
big_n0(BIG_CHUNK_TYPE n)1763 big_n0(BIG_CHUNK_TYPE n)
1764 {
1765 int i;
1766 BIG_CHUNK_TYPE result, tmp;
1767
1768 result = 0;
1769 tmp = BIG_CHUNK_ALLBITS;
1770 for (i = 0; i < BIG_CHUNK_SIZE; i++) {
1771 if ((tmp & 1) == 1) {
1772 result = (result >> 1) | BIG_CHUNK_HIGHBIT;
1773 tmp = tmp - n;
1774 } else {
1775 result = (result >> 1);
1776 }
1777 tmp = tmp >> 1;
1778 }
1779
1780 return (result);
1781 }
1782
1783
1784 int
big_numbits(BIGNUM * n)1785 big_numbits(BIGNUM *n)
1786 {
1787 int i, j;
1788 BIG_CHUNK_TYPE t;
1789
1790 for (i = n->len - 1; i > 0; i--) {
1791 if (n->value[i] != 0) {
1792 break;
1793 }
1794 }
1795 t = n->value[i];
1796 for (j = BIG_CHUNK_SIZE; j > 0; j--) {
1797 if ((t & BIG_CHUNK_HIGHBIT) == 0) {
1798 t = t << 1;
1799 } else {
1800 return (BIG_CHUNK_SIZE * i + j);
1801 }
1802 }
1803 return (0);
1804 }
1805
1806
1807 /* caller must make sure that a < n */
1808 BIG_ERR_CODE
big_mont_rr(BIGNUM * result,BIGNUM * n)1809 big_mont_rr(BIGNUM *result, BIGNUM *n)
1810 {
1811 BIGNUM rr;
1812 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1813 int len, i;
1814 BIG_ERR_CODE err;
1815
1816 rr.malloced = 0;
1817 len = n->len;
1818
1819 if ((err = big_init1(&rr, 2 * len + 1,
1820 rrvalue, arraysize(rrvalue))) != BIG_OK) {
1821 return (err);
1822 }
1823
1824 for (i = 0; i < 2 * len; i++) {
1825 rr.value[i] = 0;
1826 }
1827 rr.value[2 * len] = 1;
1828 rr.len = 2 * len + 1;
1829 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1830 goto ret;
1831 }
1832 err = big_copy(result, &rr);
1833 ret:
1834 big_finish(&rr);
1835 return (err);
1836 }
1837
1838
1839 /* caller must make sure that a < n */
1840 BIG_ERR_CODE
big_mont_conv(BIGNUM * result,BIGNUM * a,BIGNUM * n,BIG_CHUNK_TYPE n0,BIGNUM * n_rr)1841 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0,
1842 BIGNUM *n_rr)
1843 {
1844 BIGNUM rr;
1845 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
1846 int len, i;
1847 BIG_ERR_CODE err;
1848
1849 rr.malloced = 0;
1850 len = n->len;
1851
1852 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue)))
1853 != BIG_OK) {
1854 return (err);
1855 }
1856
1857 if (n_rr == NULL) {
1858 for (i = 0; i < 2 * len; i++) {
1859 rr.value[i] = 0;
1860 }
1861 rr.value[2 * len] = 1;
1862 rr.len = 2 * len + 1;
1863 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) {
1864 goto ret;
1865 }
1866 n_rr = &rr;
1867 }
1868
1869 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) {
1870 goto ret;
1871 }
1872 err = big_copy(result, &rr);
1873
1874 ret:
1875 big_finish(&rr);
1876 return (err);
1877 }
1878
1879
1880 #ifdef USE_FLOATING_POINT
1881 #define big_modexp_ncp_float big_modexp_ncp_sw
1882 #else
1883 #define big_modexp_ncp_int big_modexp_ncp_sw
1884 #endif
1885
1886 #define MAX_EXP_BIT_GROUP_SIZE 6
1887 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1))
1888
1889 /* ARGSUSED */
1890 static BIG_ERR_CODE
big_modexp_ncp_int(BIGNUM * result,BIGNUM * ma,BIGNUM * e,BIGNUM * n,BIGNUM * tmp,BIG_CHUNK_TYPE n0)1891 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
1892 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
1893
1894 {
1895 BIGNUM apowers[APOWERS_MAX_SIZE];
1896 BIGNUM tmp1;
1897 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE];
1898 int i, j, k, l, m, p;
1899 uint32_t bit, bitind, bitcount, groupbits, apowerssize;
1900 uint32_t nbits;
1901 BIG_ERR_CODE err;
1902
1903 nbits = big_numbits(e);
1904 if (nbits < 50) {
1905 groupbits = 1;
1906 apowerssize = 1;
1907 } else {
1908 groupbits = MAX_EXP_BIT_GROUP_SIZE;
1909 apowerssize = 1 << (groupbits - 1);
1910 }
1911
1912
1913 if ((err = big_init1(&tmp1, 2 * n->len + 1,
1914 tmp1value, arraysize(tmp1value))) != BIG_OK) {
1915 return (err);
1916 }
1917
1918 /* clear the malloced bit to help cleanup */
1919 for (i = 0; i < apowerssize; i++) {
1920 apowers[i].malloced = 0;
1921 }
1922
1923 for (i = 0; i < apowerssize; i++) {
1924 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) !=
1925 BIG_OK) {
1926 goto ret;
1927 }
1928 }
1929
1930 (void) big_copy(&(apowers[0]), ma);
1931
1932 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) {
1933 goto ret;
1934 }
1935 (void) big_copy(ma, &tmp1);
1936
1937 for (i = 1; i < apowerssize; i++) {
1938 if ((err = big_mont_mul(&tmp1, ma,
1939 &(apowers[i - 1]), n, n0)) != BIG_OK) {
1940 goto ret;
1941 }
1942 (void) big_copy(&apowers[i], &tmp1);
1943 }
1944
1945 bitind = nbits % BIG_CHUNK_SIZE;
1946 k = 0;
1947 l = 0;
1948 p = 0;
1949 bitcount = 0;
1950 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
1951 for (j = bitind - 1; j >= 0; j--) {
1952 bit = (e->value[i] >> j) & 1;
1953 if ((bitcount == 0) && (bit == 0)) {
1954 if ((err = big_mont_mul(tmp,
1955 tmp, tmp, n, n0)) != BIG_OK) {
1956 goto ret;
1957 }
1958 } else {
1959 bitcount++;
1960 p = p * 2 + bit;
1961 if (bit == 1) {
1962 k = k + l + 1;
1963 l = 0;
1964 } else {
1965 l++;
1966 }
1967 if (bitcount == groupbits) {
1968 for (m = 0; m < k; m++) {
1969 if ((err = big_mont_mul(tmp,
1970 tmp, tmp, n, n0)) !=
1971 BIG_OK) {
1972 goto ret;
1973 }
1974 }
1975 if ((err = big_mont_mul(tmp, tmp,
1976 &(apowers[p >> (l + 1)]),
1977 n, n0)) != BIG_OK) {
1978 goto ret;
1979 }
1980 for (m = 0; m < l; m++) {
1981 if ((err = big_mont_mul(tmp,
1982 tmp, tmp, n, n0)) !=
1983 BIG_OK) {
1984 goto ret;
1985 }
1986 }
1987 k = 0;
1988 l = 0;
1989 p = 0;
1990 bitcount = 0;
1991 }
1992 }
1993 }
1994 bitind = BIG_CHUNK_SIZE;
1995 }
1996
1997 for (m = 0; m < k; m++) {
1998 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) {
1999 goto ret;
2000 }
2001 }
2002 if (p != 0) {
2003 if ((err = big_mont_mul(tmp, tmp,
2004 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) {
2005 goto ret;
2006 }
2007 }
2008 for (m = 0; m < l; m++) {
2009 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) {
2010 goto ret;
2011 }
2012 }
2013
2014 ret:
2015 for (i = apowerssize - 1; i >= 0; i--) {
2016 big_finish(&(apowers[i]));
2017 }
2018 big_finish(&tmp1);
2019
2020 return (err);
2021 }
2022
2023
2024 #ifdef USE_FLOATING_POINT
2025
2026 #ifdef _KERNEL
2027
2028 #include <sys/sysmacros.h>
2029 #include <sys/regset.h>
2030 #include <sys/fpu/fpusystm.h>
2031
2032 /* the alignment for block stores to save fp registers */
2033 #define FPR_ALIGN (64)
2034
2035 extern void big_savefp(kfpu_t *);
2036 extern void big_restorefp(kfpu_t *);
2037
2038 #endif /* _KERNEL */
2039
2040 /*
2041 * This version makes use of floating point for performance
2042 */
2043 static BIG_ERR_CODE
big_modexp_ncp_float(BIGNUM * result,BIGNUM * ma,BIGNUM * e,BIGNUM * n,BIGNUM * tmp,BIG_CHUNK_TYPE n0)2044 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n,
2045 BIGNUM *tmp, BIG_CHUNK_TYPE n0)
2046 {
2047
2048 int i, j, k, l, m, p;
2049 uint32_t bit, bitind, bitcount, nlen;
2050 double dn0;
2051 double *dn, *dt, *d16r, *d32r;
2052 uint32_t *nint, *prod;
2053 double *apowers[APOWERS_MAX_SIZE];
2054 uint32_t nbits, groupbits, apowerssize;
2055 BIG_ERR_CODE err = BIG_OK;
2056
2057 #ifdef _KERNEL
2058 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN];
2059 kfpu_t *fpu;
2060
2061 #ifdef DEBUG
2062 if (!fpu_exists)
2063 return (BIG_GENERAL_ERR);
2064 #endif
2065
2066 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN);
2067 big_savefp(fpu);
2068
2069 #endif /* _KERNEL */
2070
2071 nbits = big_numbits(e);
2072 if (nbits < 50) {
2073 groupbits = 1;
2074 apowerssize = 1;
2075 } else {
2076 groupbits = MAX_EXP_BIT_GROUP_SIZE;
2077 apowerssize = 1 << (groupbits - 1);
2078 }
2079
2080 nlen = (BIG_CHUNK_SIZE / 32) * n->len;
2081 dn0 = (double)(n0 & 0xffff);
2082
2083 dn = dt = d16r = d32r = NULL;
2084 nint = prod = NULL;
2085 for (i = 0; i < apowerssize; i++) {
2086 apowers[i] = NULL;
2087 }
2088
2089 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) {
2090 err = BIG_NO_MEM;
2091 goto ret;
2092 }
2093 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) {
2094 err = BIG_NO_MEM;
2095 goto ret;
2096 }
2097 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) {
2098 err = BIG_NO_MEM;
2099 goto ret;
2100 }
2101 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) {
2102 err = BIG_NO_MEM;
2103 goto ret;
2104 }
2105 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) {
2106 err = BIG_NO_MEM;
2107 goto ret;
2108 }
2109 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) {
2110 err = BIG_NO_MEM;
2111 goto ret;
2112 }
2113 for (i = 0; i < apowerssize; i++) {
2114 if ((apowers[i] = big_malloc((2 * nlen + 1) *
2115 sizeof (double))) == NULL) {
2116 err = BIG_NO_MEM;
2117 goto ret;
2118 }
2119 }
2120
2121 #if (BIG_CHUNK_SIZE == 32)
2122 for (i = 0; i < ma->len; i++) {
2123 nint[i] = ma->value[i];
2124 }
2125 for (; i < nlen; i++) {
2126 nint[i] = 0;
2127 }
2128 #else
2129 for (i = 0; i < ma->len; i++) {
2130 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL);
2131 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32);
2132 }
2133 for (i = ma->len * 2; i < nlen; i++) {
2134 nint[i] = 0;
2135 }
2136 #endif
2137 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen);
2138
2139 #if (BIG_CHUNK_SIZE == 32)
2140 for (i = 0; i < n->len; i++) {
2141 nint[i] = n->value[i];
2142 }
2143 for (; i < nlen; i++) {
2144 nint[i] = 0;
2145 }
2146 #else
2147 for (i = 0; i < n->len; i++) {
2148 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL);
2149 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32);
2150 }
2151 for (i = n->len * 2; i < nlen; i++) {
2152 nint[i] = 0;
2153 }
2154 #endif
2155 conv_i32_to_d32(dn, nint, nlen);
2156
2157 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0);
2158 conv_i32_to_d32(d32r, prod, nlen);
2159 for (i = 1; i < apowerssize; i++) {
2160 mont_mulf_noconv(prod, d32r, apowers[i - 1],
2161 dt, dn, nint, nlen, dn0);
2162 conv_i32_to_d16(apowers[i], prod, nlen);
2163 }
2164
2165 #if (BIG_CHUNK_SIZE == 32)
2166 for (i = 0; i < tmp->len; i++) {
2167 prod[i] = tmp->value[i];
2168 }
2169 for (; i < nlen + 1; i++) {
2170 prod[i] = 0;
2171 }
2172 #else
2173 for (i = 0; i < tmp->len; i++) {
2174 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL);
2175 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32);
2176 }
2177 for (i = tmp->len * 2; i < nlen + 1; i++) {
2178 prod[i] = 0;
2179 }
2180 #endif
2181
2182 bitind = nbits % BIG_CHUNK_SIZE;
2183 k = 0;
2184 l = 0;
2185 p = 0;
2186 bitcount = 0;
2187 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) {
2188 for (j = bitind - 1; j >= 0; j--) {
2189 bit = (e->value[i] >> j) & 1;
2190 if ((bitcount == 0) && (bit == 0)) {
2191 conv_i32_to_d32_and_d16(d32r, d16r,
2192 prod, nlen);
2193 mont_mulf_noconv(prod, d32r, d16r,
2194 dt, dn, nint, nlen, dn0);
2195 } else {
2196 bitcount++;
2197 p = p * 2 + bit;
2198 if (bit == 1) {
2199 k = k + l + 1;
2200 l = 0;
2201 } else {
2202 l++;
2203 }
2204 if (bitcount == groupbits) {
2205 for (m = 0; m < k; m++) {
2206 conv_i32_to_d32_and_d16(d32r,
2207 d16r, prod, nlen);
2208 mont_mulf_noconv(prod, d32r,
2209 d16r, dt, dn, nint,
2210 nlen, dn0);
2211 }
2212 conv_i32_to_d32(d32r, prod, nlen);
2213 mont_mulf_noconv(prod, d32r,
2214 apowers[p >> (l + 1)],
2215 dt, dn, nint, nlen, dn0);
2216 for (m = 0; m < l; m++) {
2217 conv_i32_to_d32_and_d16(d32r,
2218 d16r, prod, nlen);
2219 mont_mulf_noconv(prod, d32r,
2220 d16r, dt, dn, nint,
2221 nlen, dn0);
2222 }
2223 k = 0;
2224 l = 0;
2225 p = 0;
2226 bitcount = 0;
2227 }
2228 }
2229 }
2230 bitind = BIG_CHUNK_SIZE;
2231 }
2232
2233 for (m = 0; m < k; m++) {
2234 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2235 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2236 }
2237 if (p != 0) {
2238 conv_i32_to_d32(d32r, prod, nlen);
2239 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)],
2240 dt, dn, nint, nlen, dn0);
2241 }
2242 for (m = 0; m < l; m++) {
2243 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen);
2244 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0);
2245 }
2246
2247 #if (BIG_CHUNK_SIZE == 32)
2248 for (i = 0; i < nlen; i++) {
2249 result->value[i] = prod[i];
2250 }
2251 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--)
2252 ;
2253 #else
2254 for (i = 0; i < nlen / 2; i++) {
2255 result->value[i] = (uint64_t)(prod[2 * i]) +
2256 (((uint64_t)(prod[2 * i + 1])) << 32);
2257 }
2258 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--)
2259 ;
2260 #endif
2261 result->len = i + 1;
2262
2263 ret:
2264 for (i = apowerssize - 1; i >= 0; i--) {
2265 if (apowers[i] != NULL)
2266 big_free(apowers[i], (2 * nlen + 1) * sizeof (double));
2267 }
2268 if (d32r != NULL) {
2269 big_free(d32r, nlen * sizeof (double));
2270 }
2271 if (d16r != NULL) {
2272 big_free(d16r, (2 * nlen + 1) * sizeof (double));
2273 }
2274 if (prod != NULL) {
2275 big_free(prod, (nlen + 1) * sizeof (uint32_t));
2276 }
2277 if (nint != NULL) {
2278 big_free(nint, nlen * sizeof (uint32_t));
2279 }
2280 if (dt != NULL) {
2281 big_free(dt, (4 * nlen + 2) * sizeof (double));
2282 }
2283 if (dn != NULL) {
2284 big_free(dn, nlen * sizeof (double));
2285 }
2286
2287 #ifdef _KERNEL
2288 big_restorefp(fpu);
2289 #endif
2290
2291 return (err);
2292 }
2293
2294 #endif /* USE_FLOATING_POINT */
2295
2296
2297 BIG_ERR_CODE
big_modexp_ext(BIGNUM * result,BIGNUM * a,BIGNUM * e,BIGNUM * n,BIGNUM * n_rr,big_modexp_ncp_info_t * info)2298 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr,
2299 big_modexp_ncp_info_t *info)
2300 {
2301 BIGNUM ma, tmp, rr;
2302 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE];
2303 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2304 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE];
2305 BIG_ERR_CODE err;
2306 BIG_CHUNK_TYPE n0;
2307
2308 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) !=
2309 BIG_OK) {
2310 return (err);
2311 }
2312 ma.len = 1;
2313 ma.value[0] = 0;
2314
2315 if ((err = big_init1(&tmp, 2 * n->len + 1,
2316 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2317 goto ret1;
2318 }
2319
2320 /* clear the malloced bit to help cleanup */
2321 rr.malloced = 0;
2322
2323 if (n_rr == NULL) {
2324 if ((err = big_init1(&rr, 2 * n->len + 1,
2325 rrvalue, arraysize(rrvalue))) != BIG_OK) {
2326 goto ret2;
2327 }
2328 if (big_mont_rr(&rr, n) != BIG_OK) {
2329 goto ret;
2330 }
2331 n_rr = &rr;
2332 }
2333
2334 n0 = big_n0(n->value[0]);
2335
2336 if (big_cmp_abs(a, n) > 0) {
2337 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) {
2338 goto ret;
2339 }
2340 err = big_mont_conv(&ma, &ma, n, n0, n_rr);
2341 } else {
2342 err = big_mont_conv(&ma, a, n, n0, n_rr);
2343 }
2344 if (err != BIG_OK) {
2345 goto ret;
2346 }
2347
2348 tmp.len = 1;
2349 tmp.value[0] = 1;
2350 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) {
2351 goto ret;
2352 }
2353
2354 if ((info != NULL) && (info->func != NULL)) {
2355 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0,
2356 info->ncp, info->reqp);
2357 } else {
2358 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0);
2359 }
2360 if (err != BIG_OK) {
2361 goto ret;
2362 }
2363
2364 ma.value[0] = 1;
2365 ma.len = 1;
2366 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) {
2367 goto ret;
2368 }
2369 err = big_copy(result, &tmp);
2370
2371 ret:
2372 if (rr.malloced) {
2373 big_finish(&rr);
2374 }
2375 ret2:
2376 big_finish(&tmp);
2377 ret1:
2378 big_finish(&ma);
2379
2380 return (err);
2381 }
2382
2383 BIG_ERR_CODE
big_modexp(BIGNUM * result,BIGNUM * a,BIGNUM * e,BIGNUM * n,BIGNUM * n_rr)2384 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr)
2385 {
2386 return (big_modexp_ext(result, a, e, n, n_rr, NULL));
2387 }
2388
2389
2390 BIG_ERR_CODE
big_modexp_crt_ext(BIGNUM * result,BIGNUM * a,BIGNUM * dmodpminus1,BIGNUM * dmodqminus1,BIGNUM * p,BIGNUM * q,BIGNUM * pinvmodq,BIGNUM * p_rr,BIGNUM * q_rr,big_modexp_ncp_info_t * info)2391 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2392 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2393 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info)
2394 {
2395 BIGNUM ap, aq, tmp;
2396 int alen, biglen, sign;
2397 BIG_ERR_CODE err;
2398
2399 if (p->len > q->len) {
2400 biglen = p->len;
2401 } else {
2402 biglen = q->len;
2403 }
2404
2405 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) {
2406 return (err);
2407 }
2408 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) {
2409 goto ret1;
2410 }
2411 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) {
2412 goto ret2;
2413 }
2414
2415 /*
2416 * check whether a is too short - to avoid timing attacks
2417 */
2418 alen = a->len;
2419 while ((alen > p->len) && (a->value[alen - 1] == 0)) {
2420 alen--;
2421 }
2422 if (alen < p->len + q->len) {
2423 /*
2424 * a is too short, add p*q to it before
2425 * taking it modulo p and q
2426 * this will also affect timing, but this difference
2427 * does not depend on p or q, only on a
2428 * (in "normal" operation, this path will never be
2429 * taken, so it is not a performance penalty
2430 */
2431 if ((err = big_mul(&tmp, p, q)) != BIG_OK) {
2432 goto ret;
2433 }
2434 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) {
2435 goto ret;
2436 }
2437 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) {
2438 goto ret;
2439 }
2440 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2441 goto ret;
2442 }
2443 } else {
2444 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) {
2445 goto ret;
2446 }
2447 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) {
2448 goto ret;
2449 }
2450 }
2451
2452 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) !=
2453 BIG_OK) {
2454 goto ret;
2455 }
2456 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) !=
2457 BIG_OK) {
2458 goto ret;
2459 }
2460 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) {
2461 goto ret;
2462 }
2463 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) {
2464 goto ret;
2465 }
2466 sign = tmp.sign;
2467 tmp.sign = 1;
2468 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) {
2469 goto ret;
2470 }
2471 if ((sign == -1) && (!big_is_zero(&aq))) {
2472 (void) big_sub_pos(&aq, q, &aq);
2473 }
2474 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) {
2475 goto ret;
2476 }
2477 err = big_add_abs(result, &ap, &tmp);
2478
2479 ret:
2480 big_finish(&tmp);
2481 ret2:
2482 big_finish(&aq);
2483 ret1:
2484 big_finish(&ap);
2485
2486 return (err);
2487 }
2488
2489
2490 BIG_ERR_CODE
big_modexp_crt(BIGNUM * result,BIGNUM * a,BIGNUM * dmodpminus1,BIGNUM * dmodqminus1,BIGNUM * p,BIGNUM * q,BIGNUM * pinvmodq,BIGNUM * p_rr,BIGNUM * q_rr)2491 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1,
2492 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq,
2493 BIGNUM *p_rr, BIGNUM *q_rr)
2494 {
2495 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1,
2496 p, q, pinvmodq, p_rr, q_rr, NULL));
2497 }
2498
2499
2500 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1};
2501 #if !defined(NO_BIG_ONE)
2502 BIGNUM big_One = {1, 1, 1, 0, onearr};
2503 #endif
2504
2505 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2};
2506 #if !defined(NO_BIG_TWO)
2507 BIGNUM big_Two = {1, 1, 1, 0, twoarr};
2508 #endif
2509
2510 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4};
2511 static BIGNUM big_Four = {1, 1, 1, 0, fourarr};
2512
2513
2514 BIG_ERR_CODE
big_sqrt_pos(BIGNUM * result,BIGNUM * n)2515 big_sqrt_pos(BIGNUM *result, BIGNUM *n)
2516 {
2517 BIGNUM *high, *low, *mid, *t;
2518 BIGNUM t1, t2, t3, prod;
2519 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2520 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2521 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2522 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE];
2523 int i, diff;
2524 uint32_t nbits, nrootbits, highbits;
2525 BIG_ERR_CODE err;
2526
2527 nbits = big_numbits(n);
2528
2529 if ((err = big_init1(&t1, n->len + 1,
2530 t1value, arraysize(t1value))) != BIG_OK)
2531 return (err);
2532 if ((err = big_init1(&t2, n->len + 1,
2533 t2value, arraysize(t2value))) != BIG_OK)
2534 goto ret1;
2535 if ((err = big_init1(&t3, n->len + 1,
2536 t3value, arraysize(t3value))) != BIG_OK)
2537 goto ret2;
2538 if ((err = big_init1(&prod, n->len + 1,
2539 prodvalue, arraysize(prodvalue))) != BIG_OK)
2540 goto ret3;
2541
2542 nrootbits = (nbits + 1) / 2;
2543 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1;
2544 for (i = 0; i < t1.len; i++) {
2545 t1.value[i] = 0;
2546 t2.value[i] = BIG_CHUNK_ALLBITS;
2547 }
2548 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1);
2549 if (highbits == BIG_CHUNK_SIZE) {
2550 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT;
2551 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS;
2552 } else {
2553 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1);
2554 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1;
2555 }
2556
2557 high = &t2;
2558 low = &t1;
2559 mid = &t3;
2560
2561 if ((err = big_mul(&prod, high, high)) != BIG_OK) {
2562 goto ret;
2563 }
2564 diff = big_cmp_abs(&prod, n);
2565 if (diff <= 0) {
2566 err = big_copy(result, high);
2567 goto ret;
2568 }
2569
2570 (void) big_sub_pos(mid, high, low);
2571 while (big_cmp_abs(&big_One, mid) != 0) {
2572 (void) big_add_abs(mid, high, low);
2573 (void) big_half_pos(mid, mid);
2574 if ((err = big_mul(&prod, mid, mid)) != BIG_OK)
2575 goto ret;
2576 diff = big_cmp_abs(&prod, n);
2577 if (diff > 0) {
2578 t = high;
2579 high = mid;
2580 mid = t;
2581 } else if (diff < 0) {
2582 t = low;
2583 low = mid;
2584 mid = t;
2585 } else {
2586 err = big_copy(result, low);
2587 goto ret;
2588 }
2589 (void) big_sub_pos(mid, high, low);
2590 }
2591
2592 err = big_copy(result, low);
2593 ret:
2594 if (prod.malloced) big_finish(&prod);
2595 ret3:
2596 if (t3.malloced) big_finish(&t3);
2597 ret2:
2598 if (t2.malloced) big_finish(&t2);
2599 ret1:
2600 if (t1.malloced) big_finish(&t1);
2601
2602 return (err);
2603 }
2604
2605
2606 BIG_ERR_CODE
big_Jacobi_pos(int * jac,BIGNUM * nn,BIGNUM * mm)2607 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm)
2608 {
2609 BIGNUM *t, *tmp2, *m, *n;
2610 BIGNUM t1, t2, t3;
2611 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
2612 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
2613 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
2614 int len, err;
2615
2616 if (big_is_zero(nn) ||
2617 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) {
2618 *jac = 0;
2619 return (BIG_OK);
2620 }
2621
2622 if (nn->len > mm->len) {
2623 len = nn->len;
2624 } else {
2625 len = mm->len;
2626 }
2627
2628 if ((err = big_init1(&t1, len,
2629 t1value, arraysize(t1value))) != BIG_OK) {
2630 return (err);
2631 }
2632 if ((err = big_init1(&t2, len,
2633 t2value, arraysize(t2value))) != BIG_OK) {
2634 goto ret1;
2635 }
2636 if ((err = big_init1(&t3, len,
2637 t3value, arraysize(t3value))) != BIG_OK) {
2638 goto ret2;
2639 }
2640
2641 n = &t1;
2642 m = &t2;
2643 tmp2 = &t3;
2644
2645 (void) big_copy(n, nn);
2646 (void) big_copy(m, mm);
2647
2648 *jac = 1;
2649 while (big_cmp_abs(&big_One, m) != 0) {
2650 if (big_is_zero(n)) {
2651 *jac = 0;
2652 goto ret;
2653 }
2654 if ((m->value[0] & 1) == 0) {
2655 if (((n->value[0] & 7) == 3) ||
2656 ((n->value[0] & 7) == 5))
2657 *jac = -*jac;
2658 (void) big_half_pos(m, m);
2659 } else if ((n->value[0] & 1) == 0) {
2660 if (((m->value[0] & 7) == 3) ||
2661 ((m->value[0] & 7) == 5))
2662 *jac = -*jac;
2663 (void) big_half_pos(n, n);
2664 } else {
2665 if (((m->value[0] & 3) == 3) &&
2666 ((n->value[0] & 3) == 3)) {
2667 *jac = -*jac;
2668 }
2669 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) {
2670 goto ret;
2671 }
2672 t = tmp2;
2673 tmp2 = m;
2674 m = n;
2675 n = t;
2676 }
2677 }
2678 err = BIG_OK;
2679
2680 ret:
2681 if (t3.malloced) big_finish(&t3);
2682 ret2:
2683 if (t2.malloced) big_finish(&t2);
2684 ret1:
2685 if (t1.malloced) big_finish(&t1);
2686
2687 return (err);
2688 }
2689
2690
2691 BIG_ERR_CODE
big_Lucas(BIGNUM * Lkminus1,BIGNUM * Lk,BIGNUM * p,BIGNUM * k,BIGNUM * n)2692 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n)
2693 {
2694 int i;
2695 uint32_t m, w;
2696 BIG_CHUNK_TYPE bit;
2697 BIGNUM ki, tmp, tmp2;
2698 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE];
2699 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2700 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE];
2701 BIG_ERR_CODE err;
2702
2703 if (big_cmp_abs(k, &big_One) == 0) {
2704 (void) big_copy(Lk, p);
2705 (void) big_copy(Lkminus1, &big_Two);
2706 return (BIG_OK);
2707 }
2708
2709 if ((err = big_init1(&ki, k->len + 1,
2710 kivalue, arraysize(kivalue))) != BIG_OK)
2711 return (err);
2712
2713 if ((err = big_init1(&tmp, 2 * n->len + 1,
2714 tmpvalue, arraysize(tmpvalue))) != BIG_OK)
2715 goto ret1;
2716
2717 if ((err = big_init1(&tmp2, n->len,
2718 tmp2value, arraysize(tmp2value))) != BIG_OK)
2719 goto ret2;
2720
2721 m = big_numbits(k);
2722 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1;
2723 w = (m - 1) / BIG_CHUNK_SIZE;
2724 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE);
2725 for (i = 0; i < ki.len; i++) {
2726 ki.value[i] = 0;
2727 }
2728 ki.value[ki.len - 1] = bit;
2729 if (big_cmp_abs(k, &ki) != 0) {
2730 (void) big_double(&ki, &ki);
2731 }
2732 (void) big_sub_pos(&ki, &ki, k);
2733
2734 (void) big_copy(Lk, p);
2735 (void) big_copy(Lkminus1, &big_Two);
2736
2737 for (i = 0; i < m; i++) {
2738 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) {
2739 goto ret;
2740 }
2741 (void) big_add_abs(&tmp, &tmp, n);
2742 (void) big_sub_pos(&tmp, &tmp, p);
2743 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) {
2744 goto ret;
2745 }
2746 if ((ki.value[w] & bit) != 0) {
2747 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) !=
2748 BIG_OK) {
2749 goto ret;
2750 }
2751 (void) big_add_abs(&tmp, &tmp, n);
2752 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2753 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) !=
2754 BIG_OK) {
2755 goto ret;
2756 }
2757 (void) big_copy(Lk, &tmp2);
2758 } else {
2759 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) {
2760 goto ret;
2761 }
2762 (void) big_add_abs(&tmp, &tmp, n);
2763 (void) big_sub_pos(&tmp, &tmp, &big_Two);
2764 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) {
2765 goto ret;
2766 }
2767 (void) big_copy(Lkminus1, &tmp2);
2768 }
2769 bit = bit >> 1;
2770 if (bit == 0) {
2771 bit = BIG_CHUNK_HIGHBIT;
2772 w--;
2773 }
2774 }
2775
2776 err = BIG_OK;
2777
2778 ret:
2779 if (tmp2.malloced) big_finish(&tmp2);
2780 ret2:
2781 if (tmp.malloced) big_finish(&tmp);
2782 ret1:
2783 if (ki.malloced) big_finish(&ki);
2784
2785 return (err);
2786 }
2787
2788
2789 BIG_ERR_CODE
big_isprime_pos_ext(BIGNUM * n,big_modexp_ncp_info_t * info)2790 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info)
2791 {
2792 BIGNUM o, nminus1, tmp, Lkminus1, Lk;
2793 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE];
2794 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE];
2795 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
2796 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE];
2797 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE];
2798 BIG_ERR_CODE err;
2799 int e, i, jac;
2800
2801 if (big_cmp_abs(n, &big_One) == 0) {
2802 return (BIG_FALSE);
2803 }
2804 if (big_cmp_abs(n, &big_Two) == 0) {
2805 return (BIG_TRUE);
2806 }
2807 if ((n->value[0] & 1) == 0) {
2808 return (BIG_FALSE);
2809 }
2810
2811 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) !=
2812 BIG_OK) {
2813 return (err);
2814 }
2815
2816 if ((err = big_init1(&nminus1, n->len,
2817 nminus1value, arraysize(nminus1value))) != BIG_OK) {
2818 goto ret1;
2819 }
2820
2821 if ((err = big_init1(&tmp, 2 * n->len,
2822 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
2823 goto ret2;
2824 }
2825
2826 if ((err = big_init1(&Lkminus1, n->len,
2827 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) {
2828 goto ret3;
2829 }
2830
2831 if ((err = big_init1(&Lk, n->len,
2832 Lkvalue, arraysize(Lkvalue))) != BIG_OK) {
2833 goto ret4;
2834 }
2835
2836 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */
2837 (void) big_copy(&nminus1, &o); /* cannot fail */
2838 e = 0;
2839 while ((o.value[0] & 1) == 0) {
2840 e++;
2841 (void) big_half_pos(&o, &o); /* cannot fail */
2842 }
2843 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) !=
2844 BIG_OK) {
2845 goto ret;
2846 }
2847
2848 i = 0;
2849 while ((i < e) &&
2850 (big_cmp_abs(&tmp, &big_One) != 0) &&
2851 (big_cmp_abs(&tmp, &nminus1) != 0)) {
2852 if ((err =
2853 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) !=
2854 BIG_OK)
2855 goto ret;
2856 i++;
2857 }
2858
2859 if (!((big_cmp_abs(&tmp, &nminus1) == 0) ||
2860 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) {
2861 err = BIG_FALSE;
2862 goto ret;
2863 }
2864
2865 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) {
2866 goto ret;
2867 }
2868
2869 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) {
2870 goto ret;
2871 }
2872 if (big_cmp_abs(&tmp, n) == 0) {
2873 err = BIG_FALSE;
2874 goto ret;
2875 }
2876
2877 (void) big_copy(&o, &big_Two);
2878 do {
2879 (void) big_add_abs(&o, &o, &big_One);
2880 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) {
2881 goto ret;
2882 }
2883 (void) big_sub_pos(&tmp, &tmp, &big_Four);
2884 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) {
2885 goto ret;
2886 }
2887 } while (jac != -1);
2888
2889 (void) big_add_abs(&tmp, n, &big_One);
2890 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) {
2891 goto ret;
2892 }
2893
2894 if ((big_cmp_abs(&Lkminus1, &o) == 0) &&
2895 (big_cmp_abs(&Lk, &big_Two) == 0)) {
2896 err = BIG_TRUE;
2897 } else {
2898 err = BIG_FALSE;
2899 }
2900
2901 ret:
2902 if (Lk.malloced) big_finish(&Lk);
2903 ret4:
2904 if (Lkminus1.malloced) big_finish(&Lkminus1);
2905 ret3:
2906 if (tmp.malloced) big_finish(&tmp);
2907 ret2:
2908 if (nminus1.malloced) big_finish(&nminus1);
2909 ret1:
2910 if (o.malloced) big_finish(&o);
2911
2912 return (err);
2913 }
2914
2915
2916 BIG_ERR_CODE
big_isprime_pos(BIGNUM * n)2917 big_isprime_pos(BIGNUM *n)
2918 {
2919 return (big_isprime_pos_ext(n, NULL));
2920 }
2921
2922
2923 #define SIEVESIZE 1000
2924
2925
2926 BIG_ERR_CODE
big_nextprime_pos_ext(BIGNUM * result,BIGNUM * n,big_modexp_ncp_info_t * info)2927 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info)
2928 {
2929 static const uint32_t smallprimes[] = {
2930 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
2931 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 };
2932 BIG_ERR_CODE err;
2933 int sieve[SIEVESIZE];
2934 int i;
2935 uint32_t off, p;
2936
2937 if ((err = big_copy(result, n)) != BIG_OK) {
2938 return (err);
2939 }
2940 result->value[0] |= 1;
2941 /* CONSTCOND */
2942 while (1) {
2943 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0;
2944 for (i = 0;
2945 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) {
2946 p = smallprimes[i];
2947 off = big_modhalf_pos(result, p);
2948 off = p - off;
2949 if ((off % 2) == 1) {
2950 off = off + p;
2951 }
2952 off = off / 2;
2953 while (off < SIEVESIZE) {
2954 sieve[off] = 1;
2955 off = off + p;
2956 }
2957 }
2958
2959 for (i = 0; i < SIEVESIZE; i++) {
2960 if (sieve[i] == 0) {
2961 err = big_isprime_pos_ext(result, info);
2962 if (err != BIG_FALSE) {
2963 if (err != BIG_TRUE) {
2964 return (err);
2965 } else {
2966 goto out;
2967 }
2968 }
2969
2970 }
2971 if ((err = big_add_abs(result, result, &big_Two)) !=
2972 BIG_OK) {
2973 return (err);
2974 }
2975 }
2976 }
2977
2978 out:
2979 return (BIG_OK);
2980 }
2981
2982
2983 BIG_ERR_CODE
big_nextprime_pos(BIGNUM * result,BIGNUM * n)2984 big_nextprime_pos(BIGNUM *result, BIGNUM *n)
2985 {
2986 return (big_nextprime_pos_ext(result, n, NULL));
2987 }
2988
2989
2990 BIG_ERR_CODE
big_nextprime_pos_slow(BIGNUM * result,BIGNUM * n)2991 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n)
2992 {
2993 BIG_ERR_CODE err;
2994
2995
2996 if ((err = big_copy(result, n)) != BIG_OK)
2997 return (err);
2998 result->value[0] |= 1;
2999 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) {
3000 if (err != BIG_FALSE)
3001 return (err);
3002 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK)
3003 return (err);
3004 }
3005 return (BIG_OK);
3006 }
3007
3008
3009 /*
3010 * given m and e, computes the rest in the equation
3011 * gcd(m, e) = cm * m + ce * e
3012 */
3013 BIG_ERR_CODE
big_ext_gcd_pos(BIGNUM * gcd,BIGNUM * cm,BIGNUM * ce,BIGNUM * m,BIGNUM * e)3014 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e)
3015 {
3016 BIGNUM *xi, *ri, *riminus1, *riminus2, *t;
3017 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1;
3018 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp;
3019 BIG_CHUNK_TYPE t1value[BIGTMPSIZE];
3020 BIG_CHUNK_TYPE t2value[BIGTMPSIZE];
3021 BIG_CHUNK_TYPE t3value[BIGTMPSIZE];
3022 BIG_CHUNK_TYPE t4value[BIGTMPSIZE];
3023 BIG_CHUNK_TYPE t5value[BIGTMPSIZE];
3024 BIG_CHUNK_TYPE t6value[BIGTMPSIZE];
3025 BIG_CHUNK_TYPE t7value[BIGTMPSIZE];
3026 BIG_CHUNK_TYPE t8value[BIGTMPSIZE];
3027 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE];
3028 BIG_ERR_CODE err;
3029 int len;
3030
3031 if (big_cmp_abs(m, e) >= 0) {
3032 len = m->len;
3033 } else {
3034 len = e->len;
3035 }
3036
3037 if ((err = big_init1(&t1, len,
3038 t1value, arraysize(t1value))) != BIG_OK) {
3039 return (err);
3040 }
3041 if ((err = big_init1(&t2, len,
3042 t2value, arraysize(t2value))) != BIG_OK) {
3043 goto ret1;
3044 }
3045 if ((err = big_init1(&t3, len,
3046 t3value, arraysize(t3value))) != BIG_OK) {
3047 goto ret2;
3048 }
3049 if ((err = big_init1(&t4, len,
3050 t4value, arraysize(t3value))) != BIG_OK) {
3051 goto ret3;
3052 }
3053 if ((err = big_init1(&t5, len,
3054 t5value, arraysize(t5value))) != BIG_OK) {
3055 goto ret4;
3056 }
3057 if ((err = big_init1(&t6, len,
3058 t6value, arraysize(t6value))) != BIG_OK) {
3059 goto ret5;
3060 }
3061 if ((err = big_init1(&t7, len,
3062 t7value, arraysize(t7value))) != BIG_OK) {
3063 goto ret6;
3064 }
3065 if ((err = big_init1(&t8, len,
3066 t8value, arraysize(t8value))) != BIG_OK) {
3067 goto ret7;
3068 }
3069
3070 if ((err = big_init1(&tmp, 2 * len,
3071 tmpvalue, arraysize(tmpvalue))) != BIG_OK) {
3072 goto ret8;
3073 }
3074
3075 ri = &t1;
3076 ri->value[0] = 1;
3077 ri->len = 1;
3078 xi = &t2;
3079 riminus1 = &t3;
3080 riminus2 = &t4;
3081 vmi = &t5;
3082 vei = &t6;
3083 vmiminus1 = &t7;
3084 veiminus1 = &t8;
3085
3086 (void) big_copy(vmiminus1, &big_One);
3087 (void) big_copy(vmi, &big_One);
3088 (void) big_copy(veiminus1, &big_One);
3089 (void) big_copy(xi, &big_One);
3090 vei->len = 1;
3091 vei->value[0] = 0;
3092
3093 (void) big_copy(riminus1, m);
3094 (void) big_copy(ri, e);
3095
3096 while (!big_is_zero(ri)) {
3097 t = riminus2;
3098 riminus2 = riminus1;
3099 riminus1 = ri;
3100 ri = t;
3101 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) {
3102 goto ret;
3103 }
3104 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) {
3105 goto ret;
3106 }
3107 t = vmiminus1;
3108 vmiminus1 = vmi;
3109 vmi = t;
3110 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) {
3111 goto ret;
3112 }
3113 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) {
3114 goto ret;
3115 }
3116 t = veiminus1;
3117 veiminus1 = vei;
3118 vei = t;
3119 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) !=
3120 BIG_OK) {
3121 goto ret;
3122 }
3123 }
3124 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) {
3125 goto ret;
3126 }
3127 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) {
3128 goto ret;
3129 }
3130 if (ce != NULL) {
3131 err = big_copy(ce, vei);
3132 }
3133 ret:
3134 if (tmp.malloced) big_finish(&tmp);
3135 ret8:
3136 if (t8.malloced) big_finish(&t8);
3137 ret7:
3138 if (t7.malloced) big_finish(&t7);
3139 ret6:
3140 if (t6.malloced) big_finish(&t6);
3141 ret5:
3142 if (t5.malloced) big_finish(&t5);
3143 ret4:
3144 if (t4.malloced) big_finish(&t4);
3145 ret3:
3146 if (t3.malloced) big_finish(&t3);
3147 ret2:
3148 if (t2.malloced) big_finish(&t2);
3149 ret1:
3150 if (t1.malloced) big_finish(&t1);
3151
3152 return (err);
3153 }
3154
3155 /*
3156 * Get a rlen-bit random number in BIGNUM format. Caller-supplied
3157 * (*rfunc)(void *dbuf, size_t dlen) must return 0 for success and
3158 * -1 for failure. Note: (*rfunc)() takes length in bytes, not bits.
3159 */
3160 BIG_ERR_CODE
big_random(BIGNUM * r,size_t rlen,int (* rfunc)(void *,size_t))3161 big_random(BIGNUM *r, size_t rlen, int (*rfunc)(void *, size_t))
3162 {
3163 size_t rwords, rbytes;
3164 int shift;
3165
3166 if (r == NULL || rlen == 0 || rfunc == NULL)
3167 return (BIG_INVALID_ARGS);
3168
3169 /*
3170 * Convert rlen bits to r->len words (32- or 64-bit), rbytes bytes
3171 * and extend r if it's not big enough to hold the random number.
3172 */
3173 rwords = BITLEN2BIGNUMLEN(rlen);
3174 rbytes = rwords * sizeof (BIG_CHUNK_TYPE);
3175 if (big_extend(r, rwords) != BIG_OK)
3176 return (BIG_NO_MEM);
3177 #ifdef BIGNUM_CHUNK_32
3178 r->len = rwords;
3179 #else
3180 r->len = (uint32_t)rwords;
3181 #endif
3182
3183 if ((*rfunc)(r->value, rbytes) < 0)
3184 return (BIG_NO_RANDOM);
3185
3186 r->value[rwords - 1] |= BIG_CHUNK_HIGHBIT;
3187
3188 /*
3189 * If the bit length is not a word boundary, shift the most
3190 * significant word so that we have an exactly rlen-long number.
3191 */
3192 if ((shift = rlen % BIG_CHUNK_SIZE) != 0)
3193 r->value[rwords - 1] >>= (BIG_CHUNK_SIZE - shift);
3194
3195 r->sign = 1; /* non-negative */
3196
3197 return (BIG_OK);
3198 }
3199