1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Configuration guide 28 * ------------------- 29 * 30 * There are 4 preprocessor symbols used to configure the bignum 31 * implementation. This file contains no logic to configure based on 32 * processor; we leave that to the Makefiles to specify. 33 * 34 * USE_FLOATING_POINT 35 * Meaning: There is support for a fast floating-point implementation of 36 * Montgomery multiply. 37 * 38 * PSR_MUL 39 * Meaning: There are processor-specific versions of the low level 40 * functions to implement big_mul. Those functions are: big_mul_set_vec, 41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 42 * for all 4 functions. You cannot pick and choose which subset of these 43 * functions to support; that would lead to a rat's nest of #ifdefs. 44 * 45 * HWCAP 46 * Meaning: Call multiply support functions through a function pointer. 47 * On x86, there are multiple implementations for different hardware 48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 49 * a function is first used. So, the support functions are called through 50 * a function pointer. There is no need for that on Sparc, because there 51 * is only one implementation; support functions are called directly. 52 * Later, if there were some new VIS instruction, or something, and a 53 * run-time test were needed, rather than variant kernel modules and 54 * libraries, then HWCAP would be defined for Sparc, as well. 55 * 56 * UMUL64 57 * Meaning: It is safe to use generic C code that assumes the existence 58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 59 * then the generic code for big_mul_add_vec() must necessarily be very slow, 60 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 61 * 62 */ 63 64 65 #include <sys/types.h> 66 #include "bignum.h" 67 68 #ifdef _KERNEL 69 #include <sys/ddi.h> 70 #include <sys/mdesc.h> 71 #include <sys/crypto/common.h> 72 73 #include <sys/kmem.h> 74 #include <sys/param.h> 75 #include <sys/sunddi.h> 76 77 #else 78 #include <stdlib.h> 79 #include <stdio.h> 80 #include <assert.h> 81 #define ASSERT assert 82 #endif /* _KERNEL */ 83 84 #ifdef __amd64 85 #ifdef _KERNEL 86 #include <sys/x86_archext.h> /* cpuid_getvendor() */ 87 #include <sys/cpuvar.h> 88 #else 89 #include <sys/auxv.h> /* getisax() */ 90 #endif /* _KERNEL */ 91 #endif /* __amd64 */ 92 93 94 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */ 95 #define UI32(ui) ((uint32_t)ui) 96 #else /* size_t already 32-bits */ 97 #define UI32(ui) (ui) 98 #endif 99 100 101 #ifdef _KERNEL 102 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 103 #define big_free(ptr, size) kmem_free(ptr, size) 104 105 /* 106 * big_realloc() 107 * Allocate memory of newsize bytes and copy oldsize bytes 108 * to the newly-allocated memory, then free the 109 * previously-allocated memory. 110 * Note: newsize must be > oldsize 111 */ 112 void * 113 big_realloc(void *from, size_t oldsize, size_t newsize) 114 { 115 void *rv; 116 117 rv = kmem_alloc(newsize, KM_NOSLEEP); 118 if (rv != NULL) 119 bcopy(from, rv, oldsize); 120 kmem_free(from, oldsize); 121 return (rv); 122 } 123 124 #else /* _KERNEL */ 125 126 #ifndef MALLOC_DEBUG 127 128 #define big_malloc(size) malloc(size) 129 #define big_free(ptr, size) free(ptr) 130 131 #else 132 133 void 134 big_free(void *ptr, size_t size) 135 { 136 printf("freed %d bytes at %p\n", size, ptr); 137 free(ptr); 138 } 139 140 void * 141 big_malloc(size_t size) 142 { 143 void *rv; 144 rv = malloc(size); 145 printf("malloced %d bytes, addr:%p\n", size, rv); 146 return (rv); 147 } 148 #endif /* MALLOC_DEBUG */ 149 150 #define big_realloc(x, y, z) realloc((x), (z)) 151 152 153 /* 154 * printbignum() 155 * Print a BIGNUM type to stdout. 156 */ 157 void 158 printbignum(char *aname, BIGNUM *a) 159 { 160 int i; 161 162 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 163 for (i = a->len - 1; i >= 0; i--) { 164 #ifdef BIGNUM_CHUNK_32 165 (void) printf("%08x ", a->value[i]); 166 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) { 167 (void) printf("\n"); 168 } 169 #else 170 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32), 171 (uint32_t)((a->value[i]) & 0xffffffff)); 172 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */ 173 (void) printf("\n"); 174 } 175 #endif 176 } 177 (void) printf("\n"); 178 } 179 180 #endif /* _KERNEL */ 181 182 183 #ifdef __amd64 184 /* 185 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64). 186 */ 187 static int 188 bignum_on_intel(void) 189 { 190 #ifdef _KERNEL 191 return (cpuid_getvendor(CPU) == X86_VENDOR_Intel); 192 #else 193 uint_t ui; 194 (void) getisax(&ui, 1); 195 return ((ui & AV_386_AMD_MMX) == 0); 196 #endif /* _KERNEL */ 197 } 198 #endif /* __amd64 */ 199 200 201 /* 202 * big_init() 203 * Initialize and allocate memory for a BIGNUM type. 204 * 205 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0) 206 * 207 * Note: call big_finish() to free memory allocated by big_init(). 208 * 209 * Input: 210 * number Uninitialized memory for BIGNUM 211 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 212 * 213 * Output: 214 * number Initialized BIGNUM 215 * 216 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 217 */ 218 BIG_ERR_CODE 219 big_init(BIGNUM *number, int size) 220 { 221 number->value = big_malloc(BIGNUM_WORDSIZE * size); 222 if (number->value == NULL) { 223 return (BIG_NO_MEM); 224 } 225 number->size = size; 226 number->len = 0; 227 number->sign = 1; 228 number->malloced = 1; 229 return (BIG_OK); 230 } 231 232 233 /* 234 * big_init1() 235 * Initialize and, if needed, allocate memory for a BIGNUM type. 236 * Use the buffer passed, buf, if any, instad of allocating memory 237 * if it's at least "size" bytes. 238 * 239 * Note: call big_finish() to free memory allocated by big_init(). 240 * 241 * Input: 242 * number Uninitialized memory for BIGNUM 243 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 244 * buf Buffer for storing a BIGNUM. 245 * If NULL, big_init1() will allocate a buffer 246 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf 247 * 248 * Output: 249 * number Initialized BIGNUM 250 * 251 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 252 */ 253 BIG_ERR_CODE 254 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize) 255 { 256 if ((buf == NULL) || (size > bufsize)) { 257 number->value = big_malloc(BIGNUM_WORDSIZE * size); 258 if (number->value == NULL) { 259 return (BIG_NO_MEM); 260 } 261 number->size = size; 262 number->malloced = 1; 263 } else { 264 number->value = buf; 265 number->size = bufsize; 266 number->malloced = 0; 267 } 268 number->len = 0; 269 number->sign = 1; 270 271 return (BIG_OK); 272 } 273 274 275 /* 276 * big_finish() 277 * Free memory, if any, allocated by big_init() or big_init1(). 278 */ 279 void 280 big_finish(BIGNUM *number) 281 { 282 if (number->malloced == 1) { 283 big_free(number->value, BIGNUM_WORDSIZE * number->size); 284 number->malloced = 0; 285 } 286 } 287 288 289 /* 290 * bn->size should be at least 291 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes 292 * converts from byte-big-endian format to bignum format (words in 293 * little endian order, but bytes within the words big endian) 294 */ 295 void 296 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 297 { 298 int i, j; 299 uint32_t offs; 300 const uint32_t slen = UI32(len); 301 BIG_CHUNK_TYPE word; 302 uchar_t *knwordp; 303 304 if (slen == 0) { 305 bn->len = 1; 306 bn->value[0] = 0; 307 return; 308 } 309 310 offs = slen % BIGNUM_WORDSIZE; 311 bn->len = slen / BIGNUM_WORDSIZE; 312 313 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 314 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]); 315 word = knwordp[0]; 316 for (j = 1; j < BIGNUM_WORDSIZE; j++) { 317 word = (word << BITSINBYTE) + knwordp[j]; 318 } 319 bn->value[i] = word; 320 } 321 if (offs > 0) { 322 word = kn[0]; 323 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i]; 324 bn->value[bn->len++] = word; 325 } 326 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) { 327 bn->len --; 328 } 329 } 330 331 332 /* 333 * copies the least significant len bytes if 334 * len < bn->len * BIGNUM_WORDSIZE 335 * converts from bignum format to byte-big-endian format. 336 * bignum format is words of type BIG_CHUNK_TYPE in little endian order. 337 */ 338 void 339 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 340 { 341 int i, j; 342 uint32_t offs; 343 const uint32_t slen = UI32(len); 344 BIG_CHUNK_TYPE word; 345 346 if (len < BIGNUM_WORDSIZE * bn->len) { 347 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 348 word = bn->value[i]; 349 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 350 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 351 word & 0xff; 352 word = word >> BITSINBYTE; 353 } 354 } 355 offs = slen % BIGNUM_WORDSIZE; 356 if (offs > 0) { 357 word = bn->value[slen / BIGNUM_WORDSIZE]; 358 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) { 359 kn[i - 1] = word & 0xff; 360 word = word >> BITSINBYTE; 361 } 362 } 363 } else { 364 for (i = 0; i < bn->len; i++) { 365 word = bn->value[i]; 366 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 367 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 368 word & 0xff; 369 word = word >> BITSINBYTE; 370 } 371 } 372 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) { 373 kn[i] = 0; 374 } 375 } 376 } 377 378 379 int 380 big_bitlength(BIGNUM *a) 381 { 382 int l = 0, b = 0; 383 BIG_CHUNK_TYPE c; 384 385 l = a->len - 1; 386 while ((l > 0) && (a->value[l] == 0)) { 387 l--; 388 } 389 b = BIG_CHUNK_SIZE; 390 c = a->value[l]; 391 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) { 392 c = c << 1; 393 b--; 394 } 395 396 return (l * BIG_CHUNK_SIZE + b); 397 } 398 399 400 /* 401 * big_copy() 402 * Copy BIGNUM src to dest, allocating memory if needed. 403 */ 404 BIG_ERR_CODE 405 big_copy(BIGNUM *dest, BIGNUM *src) 406 { 407 BIG_CHUNK_TYPE *newptr; 408 int i, len; 409 410 len = src->len; 411 while ((len > 1) && (src->value[len - 1] == 0)) { 412 len--; 413 } 414 src->len = len; 415 if (dest->size < len) { 416 if (dest->malloced == 1) { 417 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value, 418 BIGNUM_WORDSIZE * dest->size, 419 BIGNUM_WORDSIZE * len); 420 } else { 421 newptr = (BIG_CHUNK_TYPE *) 422 big_malloc(BIGNUM_WORDSIZE * len); 423 if (newptr != NULL) { 424 dest->malloced = 1; 425 } 426 } 427 if (newptr == NULL) { 428 return (BIG_NO_MEM); 429 } 430 dest->value = newptr; 431 dest->size = len; 432 } 433 dest->len = len; 434 dest->sign = src->sign; 435 for (i = 0; i < len; i++) { 436 dest->value[i] = src->value[i]; 437 } 438 439 return (BIG_OK); 440 } 441 442 443 /* 444 * big_extend() 445 * Allocate memory to extend BIGNUM number to size bignum chunks, 446 * if not at least that size already. 447 */ 448 BIG_ERR_CODE 449 big_extend(BIGNUM *number, int size) 450 { 451 BIG_CHUNK_TYPE *newptr; 452 int i; 453 454 if (number->size >= size) 455 return (BIG_OK); 456 if (number->malloced) { 457 number->value = big_realloc(number->value, 458 BIGNUM_WORDSIZE * number->size, 459 BIGNUM_WORDSIZE * size); 460 } else { 461 newptr = big_malloc(BIGNUM_WORDSIZE * size); 462 if (newptr != NULL) { 463 for (i = 0; i < number->size; i++) { 464 newptr[i] = number->value[i]; 465 } 466 } 467 number->value = newptr; 468 } 469 470 if (number->value == NULL) { 471 return (BIG_NO_MEM); 472 } 473 474 number->size = size; 475 number->malloced = 1; 476 return (BIG_OK); 477 } 478 479 480 /* returns 1 if n == 0 */ 481 int 482 big_is_zero(BIGNUM *n) 483 { 484 int i, result; 485 486 result = 1; 487 for (i = 0; i < n->len; i++) { 488 if (n->value[i] != 0) { 489 result = 0; 490 } 491 } 492 return (result); 493 } 494 495 496 BIG_ERR_CODE 497 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 498 { 499 int i, shorter, longer; 500 BIG_CHUNK_TYPE cy, ai; 501 BIG_CHUNK_TYPE *r, *a, *b, *c; 502 BIG_ERR_CODE err; 503 BIGNUM *longerarg; 504 505 if (aa->len > bb->len) { 506 shorter = bb->len; 507 longer = aa->len; 508 longerarg = aa; 509 } else { 510 shorter = aa->len; 511 longer = bb->len; 512 longerarg = bb; 513 } 514 if (result->size < longer + 1) { 515 err = big_extend(result, longer + 1); 516 if (err != BIG_OK) { 517 return (err); 518 } 519 } 520 521 r = result->value; 522 a = aa->value; 523 b = bb->value; 524 c = longerarg->value; 525 cy = 0; 526 for (i = 0; i < shorter; i++) { 527 ai = a[i]; 528 r[i] = ai + b[i] + cy; 529 if (r[i] > ai) { 530 cy = 0; 531 } else if (r[i] < ai) { 532 cy = 1; 533 } 534 } 535 for (; i < longer; i++) { 536 ai = c[i]; 537 r[i] = ai + cy; 538 if (r[i] >= ai) { 539 cy = 0; 540 } 541 } 542 if (cy == 1) { 543 r[i] = cy; 544 result->len = longer + 1; 545 } else { 546 result->len = longer; 547 } 548 result->sign = 1; 549 return (BIG_OK); 550 } 551 552 553 /* caller must make sure that result has at least len words allocated */ 554 void 555 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len) 556 { 557 int i; 558 BIG_CHUNK_TYPE cy, ai; 559 560 cy = 1; 561 for (i = 0; i < len; i++) { 562 ai = a[i]; 563 r[i] = ai + (~b[i]) + cy; 564 if (r[i] > ai) { 565 cy = 0; 566 } else if (r[i] < ai) { 567 cy = 1; 568 } 569 } 570 } 571 572 573 /* result=aa-bb it is assumed that aa>=bb */ 574 BIG_ERR_CODE 575 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 576 { 577 int i, shorter; 578 BIG_CHUNK_TYPE cy = 1, ai; 579 BIG_CHUNK_TYPE *r, *a, *b; 580 BIG_ERR_CODE err = BIG_OK; 581 582 if (aa->len > bb->len) { 583 shorter = bb->len; 584 } else { 585 shorter = aa->len; 586 } 587 if (result->size < aa->len) { 588 err = big_extend(result, aa->len); 589 if (err != BIG_OK) { 590 return (err); 591 } 592 } 593 594 r = result->value; 595 a = aa->value; 596 b = bb->value; 597 result->len = aa->len; 598 cy = 1; 599 for (i = 0; i < shorter; i++) { 600 ai = a[i]; 601 r[i] = ai + (~b[i]) + cy; 602 if (r[i] > ai) { 603 cy = 0; 604 } else if (r[i] < ai) { 605 cy = 1; 606 } 607 } 608 for (; i < aa->len; i++) { 609 ai = a[i]; 610 r[i] = ai + (~0) + cy; 611 if (r[i] < ai) { 612 cy = 1; 613 } 614 } 615 result->sign = 1; 616 617 if (cy == 0) { 618 return (BIG_INVALID_ARGS); 619 } else { 620 return (BIG_OK); 621 } 622 } 623 624 625 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 626 int 627 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 628 { 629 int i; 630 631 if (aa->len > bb->len) { 632 for (i = aa->len - 1; i > bb->len - 1; i--) { 633 if (aa->value[i] > 0) { 634 return (1); 635 } 636 } 637 } else if (aa->len < bb->len) { 638 for (i = bb->len - 1; i > aa->len - 1; i--) { 639 if (bb->value[i] > 0) { 640 return (-1); 641 } 642 } 643 } else { 644 i = aa->len - 1; 645 } 646 for (; i >= 0; i--) { 647 if (aa->value[i] > bb->value[i]) { 648 return (1); 649 } else if (aa->value[i] < bb->value[i]) { 650 return (-1); 651 } 652 } 653 654 return (0); 655 } 656 657 658 BIG_ERR_CODE 659 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 660 { 661 BIG_ERR_CODE err; 662 663 if ((bb->sign == -1) && (aa->sign == 1)) { 664 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 665 return (err); 666 } 667 result->sign = 1; 668 } else if ((aa->sign == -1) && (bb->sign == 1)) { 669 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 670 return (err); 671 } 672 result->sign = -1; 673 } else if ((aa->sign == 1) && (bb->sign == 1)) { 674 if (big_cmp_abs(aa, bb) >= 0) { 675 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 676 return (err); 677 } 678 result->sign = 1; 679 } else { 680 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 681 return (err); 682 } 683 result->sign = -1; 684 } 685 } else { 686 if (big_cmp_abs(aa, bb) >= 0) { 687 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 688 return (err); 689 } 690 result->sign = -1; 691 } else { 692 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 693 return (err); 694 } 695 result->sign = 1; 696 } 697 } 698 return (BIG_OK); 699 } 700 701 702 BIG_ERR_CODE 703 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 704 { 705 BIG_ERR_CODE err; 706 707 if ((bb->sign == -1) && (aa->sign == -1)) { 708 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 709 return (err); 710 } 711 result->sign = -1; 712 } else if ((aa->sign == 1) && (bb->sign == 1)) { 713 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 714 return (err); 715 } 716 result->sign = 1; 717 } else if ((aa->sign == 1) && (bb->sign == -1)) { 718 if (big_cmp_abs(aa, bb) >= 0) { 719 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 720 return (err); 721 } 722 result->sign = 1; 723 } else { 724 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 725 return (err); 726 } 727 result->sign = -1; 728 } 729 } else { 730 if (big_cmp_abs(aa, bb) >= 0) { 731 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 732 return (err); 733 } 734 result->sign = -1; 735 } else { 736 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 737 return (err); 738 } 739 result->sign = 1; 740 } 741 } 742 return (BIG_OK); 743 } 744 745 746 /* result = aa/2 */ 747 BIG_ERR_CODE 748 big_half_pos(BIGNUM *result, BIGNUM *aa) 749 { 750 BIG_ERR_CODE err; 751 int i; 752 BIG_CHUNK_TYPE cy, cy1; 753 BIG_CHUNK_TYPE *a, *r; 754 755 if (result->size < aa->len) { 756 err = big_extend(result, aa->len); 757 if (err != BIG_OK) { 758 return (err); 759 } 760 } 761 762 result->len = aa->len; 763 a = aa->value; 764 r = result->value; 765 cy = 0; 766 for (i = aa->len - 1; i >= 0; i--) { 767 cy1 = a[i] << (BIG_CHUNK_SIZE - 1); 768 r[i] = (cy | (a[i] >> 1)); 769 cy = cy1; 770 } 771 if (r[result->len - 1] == 0) { 772 result->len--; 773 } 774 775 return (BIG_OK); 776 } 777 778 /* result = aa*2 */ 779 BIG_ERR_CODE 780 big_double(BIGNUM *result, BIGNUM *aa) 781 { 782 BIG_ERR_CODE err; 783 int i, rsize; 784 BIG_CHUNK_TYPE cy, cy1; 785 BIG_CHUNK_TYPE *a, *r; 786 787 if ((aa->len > 0) && 788 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) { 789 rsize = aa->len + 1; 790 } else { 791 rsize = aa->len; 792 } 793 794 if (result->size < rsize) { 795 err = big_extend(result, rsize); 796 if (err != BIG_OK) 797 return (err); 798 } 799 800 a = aa->value; 801 r = result->value; 802 if (rsize == aa->len + 1) { 803 r[rsize - 1] = 1; 804 } 805 cy = 0; 806 for (i = 0; i < aa->len; i++) { 807 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1); 808 r[i] = (cy | (a[i] << 1)); 809 cy = cy1; 810 } 811 result->len = rsize; 812 return (BIG_OK); 813 } 814 815 816 /* 817 * returns aa mod b, aa must be nonneg, b must be a max 818 * (BIG_CHUNK_SIZE / 2)-bit integer 819 */ 820 static uint32_t 821 big_modhalf_pos(BIGNUM *aa, uint32_t b) 822 { 823 int i; 824 BIG_CHUNK_TYPE rem; 825 826 if (aa->len == 0) { 827 return (0); 828 } 829 rem = aa->value[aa->len - 1] % b; 830 for (i = aa->len - 2; i >= 0; i--) { 831 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 832 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b; 833 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 834 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b; 835 } 836 837 return ((uint32_t)rem); 838 } 839 840 841 /* 842 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb 843 * result->size should be at least aa->len at entry 844 * aa, bb, and result should be positive 845 */ 846 void 847 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 848 { 849 int i, lendiff; 850 BIGNUM res1, aa1; 851 852 lendiff = aa->len - bb->len; 853 res1.size = result->size - lendiff; 854 res1.malloced = 0; 855 res1.value = result->value + lendiff; 856 aa1.size = aa->size - lendiff; 857 aa1.value = aa->value + lendiff; 858 aa1.len = bb->len; 859 aa1.sign = 1; 860 (void) big_sub_pos(&res1, &aa1, bb); 861 if (result->value != aa->value) { 862 for (i = 0; i < lendiff; i++) { 863 result->value[i] = aa->value[i]; 864 } 865 } 866 result->len = aa->len; 867 } 868 869 870 /* 871 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 872 * (2^BIG_CHUNK_SIZE)^lendiff * |bb| 873 * aa->len should be >= bb->len 874 */ 875 int 876 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 877 { 878 int lendiff; 879 BIGNUM aa1; 880 881 lendiff = aa->len - bb->len; 882 aa1.len = bb->len; 883 aa1.size = aa->size - lendiff; 884 aa1.malloced = 0; 885 aa1.value = aa->value + lendiff; 886 return (big_cmp_abs(&aa1, bb)); 887 } 888 889 890 /* 891 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer. 892 * result should have enough space allocated. 893 */ 894 static void 895 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 896 { 897 int i; 898 BIG_CHUNK_TYPE t1, t2, ai, cy; 899 BIG_CHUNK_TYPE *a, *r; 900 901 a = aa->value; 902 r = result->value; 903 cy = 0; 904 for (i = 0; i < aa->len; i++) { 905 ai = a[i]; 906 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 907 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 908 (t1 >> (BIG_CHUNK_SIZE / 2)); 909 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) | 910 (t2 << (BIG_CHUNK_SIZE / 2)); 911 cy = t2 >> (BIG_CHUNK_SIZE / 2); 912 } 913 r[i] = cy; 914 result->len = aa->len + 1; 915 result->sign = aa->sign; 916 } 917 918 919 /* 920 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max. 921 * (BIG_CHUNK_SIZE / 2)-bit positive integer. 922 * result should have enough space allocated. 923 */ 924 static void 925 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 926 { 927 int i; 928 BIG_CHUNK_TYPE t1, t2, ai, cy, ri; 929 BIG_CHUNK_TYPE *a, *r; 930 931 a = aa->value; 932 r = result->value; 933 cy = 0; 934 ri = 0; 935 for (i = 0; i < aa->len; i++) { 936 ai = a[i]; 937 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 938 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 939 (t1 >> (BIG_CHUNK_SIZE / 2)); 940 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri; 941 ri = t2 & BIG_CHUNK_LOWHALFBITS; 942 cy = t2 >> (BIG_CHUNK_SIZE / 2); 943 } 944 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri; 945 result->len = aa->len + 1; 946 result->sign = aa->sign; 947 } 948 949 950 /* it is assumed that result->size is big enough */ 951 void 952 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 953 { 954 int i; 955 BIG_CHUNK_TYPE cy, ai; 956 957 if (offs == 0) { 958 if (result != aa) { 959 (void) big_copy(result, aa); 960 } 961 return; 962 } 963 cy = 0; 964 for (i = 0; i < aa->len; i++) { 965 ai = aa->value[i]; 966 result->value[i] = (ai << offs) | cy; 967 cy = ai >> (BIG_CHUNK_SIZE - offs); 968 } 969 if (cy != 0) { 970 result->len = aa->len + 1; 971 result->value[result->len - 1] = cy; 972 } else { 973 result->len = aa->len; 974 } 975 result->sign = aa->sign; 976 } 977 978 979 /* it is assumed that result->size is big enough */ 980 void 981 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 982 { 983 int i; 984 BIG_CHUNK_TYPE cy, ai; 985 986 if (offs == 0) { 987 if (result != aa) { 988 (void) big_copy(result, aa); 989 } 990 return; 991 } 992 cy = aa->value[0] >> offs; 993 for (i = 1; i < aa->len; i++) { 994 ai = aa->value[i]; 995 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy; 996 cy = ai >> offs; 997 } 998 result->len = aa->len; 999 result->value[result->len - 1] = cy; 1000 result->sign = aa->sign; 1001 } 1002 1003 1004 /* 1005 * result = aa/bb remainder = aa mod bb 1006 * it is assumed that aa and bb are positive 1007 */ 1008 BIG_ERR_CODE 1009 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 1010 { 1011 BIG_ERR_CODE err = BIG_OK; 1012 int i, alen, blen, tlen, rlen, offs; 1013 BIG_CHUNK_TYPE higha, highb, coeff; 1014 BIG_CHUNK_TYPE *a, *b; 1015 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 1016 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1017 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 1018 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE]; 1019 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE]; 1020 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE]; 1021 1022 a = aa->value; 1023 b = bb->value; 1024 alen = aa->len; 1025 blen = bb->len; 1026 while ((alen > 1) && (a[alen - 1] == 0)) { 1027 alen = alen - 1; 1028 } 1029 aa->len = alen; 1030 while ((blen > 1) && (b[blen - 1] == 0)) { 1031 blen = blen - 1; 1032 } 1033 bb->len = blen; 1034 if ((blen == 1) && (b[0] == 0)) { 1035 return (BIG_DIV_BY_0); 1036 } 1037 1038 if (big_cmp_abs(aa, bb) < 0) { 1039 if ((remainder != NULL) && 1040 ((err = big_copy(remainder, aa)) != BIG_OK)) { 1041 return (err); 1042 } 1043 if (result != NULL) { 1044 result->len = 1; 1045 result->sign = 1; 1046 result->value[0] = 0; 1047 } 1048 return (BIG_OK); 1049 } 1050 1051 if ((err = big_init1(&bblow, blen + 1, 1052 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 1053 return (err); 1054 1055 if ((err = big_init1(&bbhigh, blen + 1, 1056 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 1057 goto ret1; 1058 1059 if ((err = big_init1(&tmp1, alen + 2, 1060 tmp1value, arraysize(tmp1value))) != BIG_OK) 1061 goto ret2; 1062 1063 if ((err = big_init1(&tmp2, blen + 2, 1064 tmp2value, arraysize(tmp2value))) != BIG_OK) 1065 goto ret3; 1066 1067 if ((err = big_init1(&tresult, alen - blen + 2, 1068 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 1069 goto ret4; 1070 1071 offs = 0; 1072 highb = b[blen - 1]; 1073 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) { 1074 highb = highb >> (BIG_CHUNK_SIZE / 2); 1075 offs = (BIG_CHUNK_SIZE / 2); 1076 } 1077 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) { 1078 highb = highb << 1; 1079 offs++; 1080 } 1081 1082 big_shiftleft(&bblow, bb, offs); 1083 1084 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) { 1085 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1086 } else { 1087 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1088 } 1089 if (bbhigh.value[bbhigh.len - 1] == 0) { 1090 bbhigh.len--; 1091 } else { 1092 bbhigh.value[bbhigh.len] = 0; 1093 } 1094 1095 highb = bblow.value[bblow.len - 1]; 1096 1097 big_shiftleft(&tmp1, aa, offs); 1098 rlen = tmp1.len - bblow.len + 1; 1099 tresult.len = rlen; 1100 1101 tmp1.len++; 1102 tlen = tmp1.len; 1103 tmp1.value[tmp1.len - 1] = 0; 1104 for (i = 0; i < rlen; i++) { 1105 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) + 1106 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2)); 1107 coeff = higha / (highb + 1); 1108 big_mulhalf_high(&tmp2, &bblow, coeff); 1109 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1110 bbhigh.len++; 1111 while (tmp1.value[tlen - 1] > 0) { 1112 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1113 coeff++; 1114 } 1115 bbhigh.len--; 1116 tlen--; 1117 tmp1.len--; 1118 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 1119 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1120 coeff++; 1121 } 1122 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2); 1123 higha = tmp1.value[tlen - 1]; 1124 coeff = higha / (highb + 1); 1125 big_mulhalf_low(&tmp2, &bblow, coeff); 1126 tmp2.len--; 1127 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1128 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 1129 big_sub_pos_high(&tmp1, &tmp1, &bblow); 1130 coeff++; 1131 } 1132 tresult.value[rlen - i - 1] = 1133 tresult.value[rlen - i - 1] + coeff; 1134 } 1135 1136 big_shiftright(&tmp1, &tmp1, offs); 1137 1138 err = BIG_OK; 1139 1140 if ((remainder != NULL) && 1141 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 1142 goto ret; 1143 1144 if (result != NULL) 1145 err = big_copy(result, &tresult); 1146 1147 ret: 1148 big_finish(&tresult); 1149 ret4: 1150 big_finish(&tmp1); 1151 ret3: 1152 big_finish(&tmp2); 1153 ret2: 1154 big_finish(&bbhigh); 1155 ret1: 1156 big_finish(&bblow); 1157 return (err); 1158 } 1159 1160 1161 /* 1162 * If there is no processor-specific integer implementation of 1163 * the lower level multiply functions, then this code is provided 1164 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1165 * big_sqr_vec(). 1166 * 1167 * There are two generic implementations. One that assumes that 1168 * there is hardware and C compiler support for a 32 x 32 --> 64 1169 * bit unsigned multiply, but otherwise is not specific to any 1170 * processor, platform, or ISA. 1171 * 1172 * The other makes very few assumptions about hardware capabilities. 1173 * It does not even assume that there is any implementation of a 1174 * 32 x 32 --> 64 bit multiply that is accessible to C code and 1175 * appropriate to use. It falls constructs 32 x 32 --> 64 bit 1176 * multiplies from 16 x 16 --> 32 bit multiplies. 1177 * 1178 */ 1179 1180 #if !defined(PSR_MUL) 1181 1182 #ifdef UMUL64 1183 1184 #if (BIG_CHUNK_SIZE == 32) 1185 1186 #define UNROLL8 1187 1188 #define MUL_SET_VEC_ROUND_PREFETCH(R) \ 1189 p = pf * d; \ 1190 pf = (uint64_t)a[R + 1]; \ 1191 t = p + cy; \ 1192 r[R] = (uint32_t)t; \ 1193 cy = t >> 32 1194 1195 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \ 1196 p = pf * d; \ 1197 t = p + cy; \ 1198 r[R] = (uint32_t)t; \ 1199 cy = t >> 32 1200 1201 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \ 1202 t = (uint64_t)r[R]; \ 1203 p = pf * d; \ 1204 pf = (uint64_t)a[R + 1]; \ 1205 t = p + t + cy; \ 1206 r[R] = (uint32_t)t; \ 1207 cy = t >> 32 1208 1209 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \ 1210 t = (uint64_t)r[R]; \ 1211 p = pf * d; \ 1212 t = p + t + cy; \ 1213 r[R] = (uint32_t)t; \ 1214 cy = t >> 32 1215 1216 #ifdef UNROLL8 1217 1218 #define UNROLL 8 1219 1220 /* 1221 * r = a * b 1222 * where r and a are vectors; b is a single 32-bit digit 1223 */ 1224 1225 uint32_t 1226 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1227 { 1228 uint64_t d, pf, p, t, cy; 1229 1230 if (len == 0) 1231 return (0); 1232 cy = 0; 1233 d = (uint64_t)b; 1234 pf = (uint64_t)a[0]; 1235 while (len > UNROLL) { 1236 MUL_SET_VEC_ROUND_PREFETCH(0); 1237 MUL_SET_VEC_ROUND_PREFETCH(1); 1238 MUL_SET_VEC_ROUND_PREFETCH(2); 1239 MUL_SET_VEC_ROUND_PREFETCH(3); 1240 MUL_SET_VEC_ROUND_PREFETCH(4); 1241 MUL_SET_VEC_ROUND_PREFETCH(5); 1242 MUL_SET_VEC_ROUND_PREFETCH(6); 1243 MUL_SET_VEC_ROUND_PREFETCH(7); 1244 r += UNROLL; 1245 a += UNROLL; 1246 len -= UNROLL; 1247 } 1248 if (len == UNROLL) { 1249 MUL_SET_VEC_ROUND_PREFETCH(0); 1250 MUL_SET_VEC_ROUND_PREFETCH(1); 1251 MUL_SET_VEC_ROUND_PREFETCH(2); 1252 MUL_SET_VEC_ROUND_PREFETCH(3); 1253 MUL_SET_VEC_ROUND_PREFETCH(4); 1254 MUL_SET_VEC_ROUND_PREFETCH(5); 1255 MUL_SET_VEC_ROUND_PREFETCH(6); 1256 MUL_SET_VEC_ROUND_NOPREFETCH(7); 1257 return ((uint32_t)cy); 1258 } 1259 while (len > 1) { 1260 MUL_SET_VEC_ROUND_PREFETCH(0); 1261 ++r; 1262 ++a; 1263 --len; 1264 } 1265 if (len > 0) { 1266 MUL_SET_VEC_ROUND_NOPREFETCH(0); 1267 } 1268 return ((uint32_t)cy); 1269 } 1270 1271 /* 1272 * r += a * b 1273 * where r and a are vectors; b is a single 32-bit digit 1274 */ 1275 1276 uint32_t 1277 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1278 { 1279 uint64_t d, pf, p, t, cy; 1280 1281 if (len == 0) 1282 return (0); 1283 cy = 0; 1284 d = (uint64_t)b; 1285 pf = (uint64_t)a[0]; 1286 while (len > 8) { 1287 MUL_ADD_VEC_ROUND_PREFETCH(0); 1288 MUL_ADD_VEC_ROUND_PREFETCH(1); 1289 MUL_ADD_VEC_ROUND_PREFETCH(2); 1290 MUL_ADD_VEC_ROUND_PREFETCH(3); 1291 MUL_ADD_VEC_ROUND_PREFETCH(4); 1292 MUL_ADD_VEC_ROUND_PREFETCH(5); 1293 MUL_ADD_VEC_ROUND_PREFETCH(6); 1294 MUL_ADD_VEC_ROUND_PREFETCH(7); 1295 r += 8; 1296 a += 8; 1297 len -= 8; 1298 } 1299 if (len == 8) { 1300 MUL_ADD_VEC_ROUND_PREFETCH(0); 1301 MUL_ADD_VEC_ROUND_PREFETCH(1); 1302 MUL_ADD_VEC_ROUND_PREFETCH(2); 1303 MUL_ADD_VEC_ROUND_PREFETCH(3); 1304 MUL_ADD_VEC_ROUND_PREFETCH(4); 1305 MUL_ADD_VEC_ROUND_PREFETCH(5); 1306 MUL_ADD_VEC_ROUND_PREFETCH(6); 1307 MUL_ADD_VEC_ROUND_NOPREFETCH(7); 1308 return ((uint32_t)cy); 1309 } 1310 while (len > 1) { 1311 MUL_ADD_VEC_ROUND_PREFETCH(0); 1312 ++r; 1313 ++a; 1314 --len; 1315 } 1316 if (len > 0) { 1317 MUL_ADD_VEC_ROUND_NOPREFETCH(0); 1318 } 1319 return ((uint32_t)cy); 1320 } 1321 #endif /* UNROLL8 */ 1322 1323 void 1324 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1325 { 1326 uint32_t *tr, *ta; 1327 int tlen, row, col; 1328 uint64_t p, s, t, t2, cy; 1329 uint32_t d; 1330 1331 tr = r + 1; 1332 ta = a; 1333 tlen = len - 1; 1334 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]); 1335 while (--tlen > 0) { 1336 tr += 2; 1337 ++ta; 1338 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]); 1339 } 1340 s = (uint64_t)a[0]; 1341 s = s * s; 1342 r[0] = (uint32_t)s; 1343 cy = s >> 32; 1344 p = ((uint64_t)r[1] << 1) + cy; 1345 r[1] = (uint32_t)p; 1346 cy = p >> 32; 1347 row = 1; 1348 col = 2; 1349 while (row < len) { 1350 s = (uint64_t)a[row]; 1351 s = s * s; 1352 p = (uint64_t)r[col] << 1; 1353 t = p + s; 1354 d = (uint32_t)t; 1355 t2 = (uint64_t)d + cy; 1356 r[col] = (uint32_t)t2; 1357 cy = (t >> 32) + (t2 >> 32); 1358 if (row == len - 1) 1359 break; 1360 p = ((uint64_t)r[col + 1] << 1) + cy; 1361 r[col + 1] = (uint32_t)p; 1362 cy = p >> 32; 1363 ++row; 1364 col += 2; 1365 } 1366 r[col + 1] = (uint32_t)cy; 1367 } 1368 1369 #else /* BIG_CHUNK_SIZE == 64 */ 1370 1371 /* 1372 * r = r + a * digit, r and a are vectors of length len 1373 * returns the carry digit 1374 */ 1375 BIG_CHUNK_TYPE 1376 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1377 BIG_CHUNK_TYPE digit) 1378 { 1379 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh; 1380 int i; 1381 1382 cy1 = 0; 1383 dlow = digit & BIG_CHUNK_LOWHALFBITS; 1384 dhigh = digit >> (BIG_CHUNK_SIZE / 2); 1385 for (i = 0; i < len; i++) { 1386 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1387 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1388 (r[i] & BIG_CHUNK_LOWHALFBITS); 1389 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1390 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1391 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1392 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) | 1393 (cy1 << (BIG_CHUNK_SIZE / 2)); 1394 } 1395 retcy = cy1 >> (BIG_CHUNK_SIZE / 2); 1396 1397 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS; 1398 for (i = 0; i < len - 1; i++) { 1399 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1400 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1401 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1402 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1403 (cy << (BIG_CHUNK_SIZE / 2)); 1404 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1405 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1406 (r[i + 1] & BIG_CHUNK_LOWHALFBITS); 1407 } 1408 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1409 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) + 1410 (r[len - 1] >> (BIG_CHUNK_SIZE / 2)); 1411 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1412 (cy << (BIG_CHUNK_SIZE / 2)); 1413 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) + 1414 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy; 1415 1416 return (retcy); 1417 } 1418 1419 1420 /* 1421 * r = a * digit, r and a are vectors of length len 1422 * returns the carry digit 1423 */ 1424 BIG_CHUNK_TYPE 1425 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1426 BIG_CHUNK_TYPE digit) 1427 { 1428 int i; 1429 1430 ASSERT(r != a); 1431 for (i = 0; i < len; i++) { 1432 r[i] = 0; 1433 } 1434 return (big_mul_add_vec(r, a, len, digit)); 1435 } 1436 1437 void 1438 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len) 1439 { 1440 int i; 1441 1442 ASSERT(r != a); 1443 r[len] = big_mul_set_vec(r, a, len, a[0]); 1444 for (i = 1; i < len; ++i) 1445 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1446 } 1447 1448 #endif /* BIG_CHUNK_SIZE == 32/64 */ 1449 1450 1451 #else /* ! UMUL64 */ 1452 1453 #if (BIG_CHUNK_SIZE != 32) 1454 #error Don't use 64-bit chunks without defining UMUL64 1455 #endif 1456 1457 1458 /* 1459 * r = r + a * digit, r and a are vectors of length len 1460 * returns the carry digit 1461 */ 1462 uint32_t 1463 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1464 { 1465 uint32_t cy, cy1, retcy, dlow, dhigh; 1466 int i; 1467 1468 cy1 = 0; 1469 dlow = digit & 0xffff; 1470 dhigh = digit >> 16; 1471 for (i = 0; i < len; i++) { 1472 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff); 1473 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16); 1474 r[i] = (cy & 0xffff) | (cy1 << 16); 1475 } 1476 retcy = cy1 >> 16; 1477 1478 cy1 = r[0] & 0xffff; 1479 for (i = 0; i < len - 1; i++) { 1480 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16); 1481 r[i] = (cy1 & 0xffff) | (cy << 16); 1482 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff); 1483 } 1484 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16); 1485 r[len - 1] = (cy1 & 0xffff) | (cy << 16); 1486 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy; 1487 1488 return (retcy); 1489 } 1490 1491 1492 /* 1493 * r = a * digit, r and a are vectors of length len 1494 * returns the carry digit 1495 */ 1496 uint32_t 1497 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1498 { 1499 int i; 1500 1501 ASSERT(r != a); 1502 for (i = 0; i < len; i++) { 1503 r[i] = 0; 1504 } 1505 1506 return (big_mul_add_vec(r, a, len, digit)); 1507 } 1508 1509 void 1510 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1511 { 1512 int i; 1513 1514 ASSERT(r != a); 1515 r[len] = big_mul_set_vec(r, a, len, a[0]); 1516 for (i = 1; i < len; ++i) 1517 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1518 } 1519 1520 #endif /* UMUL64 */ 1521 1522 void 1523 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen, 1524 BIG_CHUNK_TYPE *b, int blen) 1525 { 1526 int i; 1527 1528 r[alen] = big_mul_set_vec(r, a, alen, b[0]); 1529 for (i = 1; i < blen; ++i) 1530 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]); 1531 } 1532 1533 1534 #endif /* ! PSR_MUL */ 1535 1536 1537 /* 1538 * result = aa * bb result->value should be big enough to hold the result 1539 * 1540 * Implementation: Standard grammar school algorithm 1541 * 1542 */ 1543 BIG_ERR_CODE 1544 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 1545 { 1546 BIGNUM tmp1; 1547 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1548 BIG_CHUNK_TYPE *r, *t, *a, *b; 1549 BIG_ERR_CODE err; 1550 int i, alen, blen, rsize, sign, diff; 1551 1552 if (aa == bb) { 1553 diff = 0; 1554 } else { 1555 diff = big_cmp_abs(aa, bb); 1556 if (diff < 0) { 1557 BIGNUM *tt; 1558 tt = aa; 1559 aa = bb; 1560 bb = tt; 1561 } 1562 } 1563 a = aa->value; 1564 b = bb->value; 1565 alen = aa->len; 1566 blen = bb->len; 1567 while ((alen > 1) && (a[alen - 1] == 0)) { 1568 alen--; 1569 } 1570 aa->len = alen; 1571 while ((blen > 1) && (b[blen - 1] == 0)) { 1572 blen--; 1573 } 1574 bb->len = blen; 1575 1576 rsize = alen + blen; 1577 ASSERT(rsize > 0); 1578 if (result->size < rsize) { 1579 err = big_extend(result, rsize); 1580 if (err != BIG_OK) { 1581 return (err); 1582 } 1583 /* aa or bb might be an alias to result */ 1584 a = aa->value; 1585 b = bb->value; 1586 } 1587 r = result->value; 1588 1589 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) { 1590 result->len = 1; 1591 result->sign = 1; 1592 r[0] = 0; 1593 return (BIG_OK); 1594 } 1595 sign = aa->sign * bb->sign; 1596 if ((alen == 1) && (a[0] == 1)) { 1597 for (i = 0; i < blen; i++) { 1598 r[i] = b[i]; 1599 } 1600 result->len = blen; 1601 result->sign = sign; 1602 return (BIG_OK); 1603 } 1604 if ((blen == 1) && (b[0] == 1)) { 1605 for (i = 0; i < alen; i++) { 1606 r[i] = a[i]; 1607 } 1608 result->len = alen; 1609 result->sign = sign; 1610 return (BIG_OK); 1611 } 1612 1613 if ((err = big_init1(&tmp1, rsize, 1614 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1615 return (err); 1616 } 1617 t = tmp1.value; 1618 1619 for (i = 0; i < rsize; i++) { 1620 t[i] = 0; 1621 } 1622 1623 if (diff == 0 && alen > 2) { 1624 BIG_SQR_VEC(t, a, alen); 1625 } else if (blen > 0) { 1626 BIG_MUL_VEC(t, a, alen, b, blen); 1627 } 1628 1629 if (t[rsize - 1] == 0) { 1630 tmp1.len = rsize - 1; 1631 } else { 1632 tmp1.len = rsize; 1633 } 1634 1635 err = big_copy(result, &tmp1); 1636 1637 result->sign = sign; 1638 1639 big_finish(&tmp1); 1640 1641 return (err); 1642 } 1643 1644 1645 /* 1646 * big_mont_mul() 1647 * Montgomery multiplication. 1648 * 1649 * Caller must ensure that a < n, b < n, ret->size >= 2 * n->len + 1, 1650 * and that ret is not n. 1651 */ 1652 BIG_ERR_CODE 1653 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0) 1654 { 1655 int i, j, nlen, needsubtract; 1656 BIG_CHUNK_TYPE *nn, *rr, *rrplusi; 1657 BIG_CHUNK_TYPE digit, c; 1658 BIG_ERR_CODE err; 1659 #ifdef __amd64 1660 #define BIG_CPU_UNKNOWN 0 1661 #define BIG_CPU_AMD 1 1662 #define BIG_CPU_INTEL 2 1663 static int big_cpu = BIG_CPU_UNKNOWN; 1664 BIG_CHUNK_TYPE carry[BIGTMPSIZE]; 1665 1666 if (big_cpu == BIG_CPU_UNKNOWN) { 1667 big_cpu = 1 + bignum_on_intel(); 1668 } 1669 #endif /* __amd64 */ 1670 1671 nlen = n->len; 1672 nn = n->value; 1673 1674 rr = ret->value; 1675 1676 if ((err = big_mul(ret, a, b)) != BIG_OK) { 1677 return (err); 1678 } 1679 1680 rr = ret->value; 1681 for (i = ret->len; i < 2 * nlen + 1; i++) { 1682 rr[i] = 0; 1683 } 1684 1685 #ifdef __amd64 /* pipelining optimization for Intel 64, but not AMD64 */ 1686 if ((big_cpu == BIG_CPU_INTEL) && (nlen <= BIGTMPSIZE)) { 1687 /* 1688 * Perform the following in two for loops to reduce the 1689 * dependency between computing the carryover bits with 1690 * BIG_MUL_ADD_VEC() and adding them, thus improving pipelining. 1691 */ 1692 for (i = 0; i < nlen; i++) { 1693 rrplusi = rr + i; 1694 digit = *rrplusi * n0; 1695 carry[i] = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit); 1696 } 1697 for (i = 0; i < nlen; i++) { 1698 j = i + nlen; 1699 rr[j] += carry[i]; 1700 while (rr[j] < carry[i]) { 1701 rr[++j] += 1; 1702 carry[i] = 1; 1703 } 1704 } 1705 } else 1706 #endif /* __amd64 */ 1707 { /* no pipelining optimization */ 1708 for (i = 0; i < nlen; i++) { 1709 rrplusi = rr + i; 1710 digit = *rrplusi * n0; 1711 c = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit); 1712 j = i + nlen; 1713 rr[j] += c; 1714 while (rr[j] < c) { 1715 rr[++j] += 1; 1716 c = 1; 1717 } 1718 } 1719 } 1720 1721 needsubtract = 0; 1722 if ((rr[2 * nlen] != 0)) 1723 needsubtract = 1; 1724 else { 1725 for (i = 2 * nlen - 1; i >= nlen; i--) { 1726 if (rr[i] > nn[i - nlen]) { 1727 needsubtract = 1; 1728 break; 1729 } else if (rr[i] < nn[i - nlen]) { 1730 break; 1731 } 1732 } 1733 } 1734 if (needsubtract) 1735 big_sub_vec(rr, rr + nlen, nn, nlen); 1736 else { 1737 for (i = 0; i < nlen; i++) { 1738 rr[i] = rr[i + nlen]; 1739 } 1740 } 1741 1742 /* Remove leading zeros, but keep at least 1 digit: */ 1743 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--) 1744 ; 1745 ret->len = i + 1; 1746 1747 return (BIG_OK); 1748 } 1749 1750 1751 BIG_CHUNK_TYPE 1752 big_n0(BIG_CHUNK_TYPE n) 1753 { 1754 int i; 1755 BIG_CHUNK_TYPE result, tmp; 1756 1757 result = 0; 1758 tmp = BIG_CHUNK_ALLBITS; 1759 for (i = 0; i < BIG_CHUNK_SIZE; i++) { 1760 if ((tmp & 1) == 1) { 1761 result = (result >> 1) | BIG_CHUNK_HIGHBIT; 1762 tmp = tmp - n; 1763 } else { 1764 result = (result >> 1); 1765 } 1766 tmp = tmp >> 1; 1767 } 1768 1769 return (result); 1770 } 1771 1772 1773 int 1774 big_numbits(BIGNUM *n) 1775 { 1776 int i, j; 1777 BIG_CHUNK_TYPE t; 1778 1779 for (i = n->len - 1; i > 0; i--) { 1780 if (n->value[i] != 0) { 1781 break; 1782 } 1783 } 1784 t = n->value[i]; 1785 for (j = BIG_CHUNK_SIZE; j > 0; j--) { 1786 if ((t & BIG_CHUNK_HIGHBIT) == 0) { 1787 t = t << 1; 1788 } else { 1789 return (BIG_CHUNK_SIZE * i + j); 1790 } 1791 } 1792 return (0); 1793 } 1794 1795 1796 /* caller must make sure that a < n */ 1797 BIG_ERR_CODE 1798 big_mont_rr(BIGNUM *result, BIGNUM *n) 1799 { 1800 BIGNUM rr; 1801 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1802 int len, i; 1803 BIG_ERR_CODE err; 1804 1805 rr.malloced = 0; 1806 len = n->len; 1807 1808 if ((err = big_init1(&rr, 2 * len + 1, 1809 rrvalue, arraysize(rrvalue))) != BIG_OK) { 1810 return (err); 1811 } 1812 1813 for (i = 0; i < 2 * len; i++) { 1814 rr.value[i] = 0; 1815 } 1816 rr.value[2 * len] = 1; 1817 rr.len = 2 * len + 1; 1818 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1819 goto ret; 1820 } 1821 err = big_copy(result, &rr); 1822 ret: 1823 big_finish(&rr); 1824 return (err); 1825 } 1826 1827 1828 /* caller must make sure that a < n */ 1829 BIG_ERR_CODE 1830 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0, 1831 BIGNUM *n_rr) 1832 { 1833 BIGNUM rr; 1834 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1835 int len, i; 1836 BIG_ERR_CODE err; 1837 1838 rr.malloced = 0; 1839 len = n->len; 1840 1841 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue))) 1842 != BIG_OK) { 1843 return (err); 1844 } 1845 1846 if (n_rr == NULL) { 1847 for (i = 0; i < 2 * len; i++) { 1848 rr.value[i] = 0; 1849 } 1850 rr.value[2 * len] = 1; 1851 rr.len = 2 * len + 1; 1852 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1853 goto ret; 1854 } 1855 n_rr = &rr; 1856 } 1857 1858 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) { 1859 goto ret; 1860 } 1861 err = big_copy(result, &rr); 1862 1863 ret: 1864 big_finish(&rr); 1865 return (err); 1866 } 1867 1868 1869 #ifdef USE_FLOATING_POINT 1870 #define big_modexp_ncp_float big_modexp_ncp_sw 1871 #else 1872 #define big_modexp_ncp_int big_modexp_ncp_sw 1873 #endif 1874 1875 #define MAX_EXP_BIT_GROUP_SIZE 6 1876 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1)) 1877 1878 /* ARGSUSED */ 1879 static BIG_ERR_CODE 1880 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1881 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1882 1883 { 1884 BIGNUM apowers[APOWERS_MAX_SIZE]; 1885 BIGNUM tmp1; 1886 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1887 int i, j, k, l, m, p; 1888 uint32_t bit, bitind, bitcount, groupbits, apowerssize; 1889 uint32_t nbits; 1890 BIG_ERR_CODE err; 1891 1892 nbits = big_numbits(e); 1893 if (nbits < 50) { 1894 groupbits = 1; 1895 apowerssize = 1; 1896 } else { 1897 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1898 apowerssize = 1 << (groupbits - 1); 1899 } 1900 1901 1902 if ((err = big_init1(&tmp1, 2 * n->len + 1, 1903 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1904 return (err); 1905 } 1906 1907 /* clear the malloced bit to help cleanup */ 1908 for (i = 0; i < apowerssize; i++) { 1909 apowers[i].malloced = 0; 1910 } 1911 1912 for (i = 0; i < apowerssize; i++) { 1913 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) != 1914 BIG_OK) { 1915 goto ret; 1916 } 1917 } 1918 1919 (void) big_copy(&(apowers[0]), ma); 1920 1921 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) { 1922 goto ret; 1923 } 1924 (void) big_copy(ma, &tmp1); 1925 1926 for (i = 1; i < apowerssize; i++) { 1927 if ((err = big_mont_mul(&tmp1, ma, 1928 &(apowers[i - 1]), n, n0)) != BIG_OK) { 1929 goto ret; 1930 } 1931 (void) big_copy(&apowers[i], &tmp1); 1932 } 1933 1934 bitind = nbits % BIG_CHUNK_SIZE; 1935 k = 0; 1936 l = 0; 1937 p = 0; 1938 bitcount = 0; 1939 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 1940 for (j = bitind - 1; j >= 0; j--) { 1941 bit = (e->value[i] >> j) & 1; 1942 if ((bitcount == 0) && (bit == 0)) { 1943 if ((err = big_mont_mul(tmp, 1944 tmp, tmp, n, n0)) != BIG_OK) { 1945 goto ret; 1946 } 1947 } else { 1948 bitcount++; 1949 p = p * 2 + bit; 1950 if (bit == 1) { 1951 k = k + l + 1; 1952 l = 0; 1953 } else { 1954 l++; 1955 } 1956 if (bitcount == groupbits) { 1957 for (m = 0; m < k; m++) { 1958 if ((err = big_mont_mul(tmp, 1959 tmp, tmp, n, n0)) != 1960 BIG_OK) { 1961 goto ret; 1962 } 1963 } 1964 if ((err = big_mont_mul(tmp, tmp, 1965 &(apowers[p >> (l + 1)]), 1966 n, n0)) != BIG_OK) { 1967 goto ret; 1968 } 1969 for (m = 0; m < l; m++) { 1970 if ((err = big_mont_mul(tmp, 1971 tmp, tmp, n, n0)) != 1972 BIG_OK) { 1973 goto ret; 1974 } 1975 } 1976 k = 0; 1977 l = 0; 1978 p = 0; 1979 bitcount = 0; 1980 } 1981 } 1982 } 1983 bitind = BIG_CHUNK_SIZE; 1984 } 1985 1986 for (m = 0; m < k; m++) { 1987 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) { 1988 goto ret; 1989 } 1990 } 1991 if (p != 0) { 1992 if ((err = big_mont_mul(tmp, tmp, 1993 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) { 1994 goto ret; 1995 } 1996 } 1997 for (m = 0; m < l; m++) { 1998 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) { 1999 goto ret; 2000 } 2001 } 2002 2003 ret: 2004 for (i = apowerssize - 1; i >= 0; i--) { 2005 big_finish(&(apowers[i])); 2006 } 2007 big_finish(&tmp1); 2008 2009 return (err); 2010 } 2011 2012 2013 #ifdef USE_FLOATING_POINT 2014 2015 #ifdef _KERNEL 2016 2017 #include <sys/sysmacros.h> 2018 #include <sys/regset.h> 2019 #include <sys/fpu/fpusystm.h> 2020 2021 /* the alignment for block stores to save fp registers */ 2022 #define FPR_ALIGN (64) 2023 2024 extern void big_savefp(kfpu_t *); 2025 extern void big_restorefp(kfpu_t *); 2026 2027 #endif /* _KERNEL */ 2028 2029 /* 2030 * This version makes use of floating point for performance 2031 */ 2032 static BIG_ERR_CODE 2033 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 2034 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 2035 { 2036 2037 int i, j, k, l, m, p; 2038 uint32_t bit, bitind, bitcount, nlen; 2039 double dn0; 2040 double *dn, *dt, *d16r, *d32r; 2041 uint32_t *nint, *prod; 2042 double *apowers[APOWERS_MAX_SIZE]; 2043 uint32_t nbits, groupbits, apowerssize; 2044 BIG_ERR_CODE err = BIG_OK; 2045 2046 #ifdef _KERNEL 2047 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN]; 2048 kfpu_t *fpu; 2049 2050 #ifdef DEBUG 2051 if (!fpu_exists) 2052 return (BIG_GENERAL_ERR); 2053 #endif 2054 2055 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN); 2056 big_savefp(fpu); 2057 2058 #endif /* _KERNEL */ 2059 2060 nbits = big_numbits(e); 2061 if (nbits < 50) { 2062 groupbits = 1; 2063 apowerssize = 1; 2064 } else { 2065 groupbits = MAX_EXP_BIT_GROUP_SIZE; 2066 apowerssize = 1 << (groupbits - 1); 2067 } 2068 2069 nlen = (BIG_CHUNK_SIZE / 32) * n->len; 2070 dn0 = (double)(n0 & 0xffff); 2071 2072 dn = dt = d16r = d32r = NULL; 2073 nint = prod = NULL; 2074 for (i = 0; i < apowerssize; i++) { 2075 apowers[i] = NULL; 2076 } 2077 2078 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) { 2079 err = BIG_NO_MEM; 2080 goto ret; 2081 } 2082 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) { 2083 err = BIG_NO_MEM; 2084 goto ret; 2085 } 2086 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) { 2087 err = BIG_NO_MEM; 2088 goto ret; 2089 } 2090 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) { 2091 err = BIG_NO_MEM; 2092 goto ret; 2093 } 2094 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) { 2095 err = BIG_NO_MEM; 2096 goto ret; 2097 } 2098 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) { 2099 err = BIG_NO_MEM; 2100 goto ret; 2101 } 2102 for (i = 0; i < apowerssize; i++) { 2103 if ((apowers[i] = big_malloc((2 * nlen + 1) * 2104 sizeof (double))) == NULL) { 2105 err = BIG_NO_MEM; 2106 goto ret; 2107 } 2108 } 2109 2110 #if (BIG_CHUNK_SIZE == 32) 2111 for (i = 0; i < ma->len; i++) { 2112 nint[i] = ma->value[i]; 2113 } 2114 for (; i < nlen; i++) { 2115 nint[i] = 0; 2116 } 2117 #else 2118 for (i = 0; i < ma->len; i++) { 2119 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL); 2120 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32); 2121 } 2122 for (i = ma->len * 2; i < nlen; i++) { 2123 nint[i] = 0; 2124 } 2125 #endif 2126 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen); 2127 2128 #if (BIG_CHUNK_SIZE == 32) 2129 for (i = 0; i < n->len; i++) { 2130 nint[i] = n->value[i]; 2131 } 2132 for (; i < nlen; i++) { 2133 nint[i] = 0; 2134 } 2135 #else 2136 for (i = 0; i < n->len; i++) { 2137 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL); 2138 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32); 2139 } 2140 for (i = n->len * 2; i < nlen; i++) { 2141 nint[i] = 0; 2142 } 2143 #endif 2144 conv_i32_to_d32(dn, nint, nlen); 2145 2146 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0); 2147 conv_i32_to_d32(d32r, prod, nlen); 2148 for (i = 1; i < apowerssize; i++) { 2149 mont_mulf_noconv(prod, d32r, apowers[i - 1], 2150 dt, dn, nint, nlen, dn0); 2151 conv_i32_to_d16(apowers[i], prod, nlen); 2152 } 2153 2154 #if (BIG_CHUNK_SIZE == 32) 2155 for (i = 0; i < tmp->len; i++) { 2156 prod[i] = tmp->value[i]; 2157 } 2158 for (; i < nlen + 1; i++) { 2159 prod[i] = 0; 2160 } 2161 #else 2162 for (i = 0; i < tmp->len; i++) { 2163 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL); 2164 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32); 2165 } 2166 for (i = tmp->len * 2; i < nlen + 1; i++) { 2167 prod[i] = 0; 2168 } 2169 #endif 2170 2171 bitind = nbits % BIG_CHUNK_SIZE; 2172 k = 0; 2173 l = 0; 2174 p = 0; 2175 bitcount = 0; 2176 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 2177 for (j = bitind - 1; j >= 0; j--) { 2178 bit = (e->value[i] >> j) & 1; 2179 if ((bitcount == 0) && (bit == 0)) { 2180 conv_i32_to_d32_and_d16(d32r, d16r, 2181 prod, nlen); 2182 mont_mulf_noconv(prod, d32r, d16r, 2183 dt, dn, nint, nlen, dn0); 2184 } else { 2185 bitcount++; 2186 p = p * 2 + bit; 2187 if (bit == 1) { 2188 k = k + l + 1; 2189 l = 0; 2190 } else { 2191 l++; 2192 } 2193 if (bitcount == groupbits) { 2194 for (m = 0; m < k; m++) { 2195 conv_i32_to_d32_and_d16(d32r, 2196 d16r, prod, nlen); 2197 mont_mulf_noconv(prod, d32r, 2198 d16r, dt, dn, nint, 2199 nlen, dn0); 2200 } 2201 conv_i32_to_d32(d32r, prod, nlen); 2202 mont_mulf_noconv(prod, d32r, 2203 apowers[p >> (l + 1)], 2204 dt, dn, nint, nlen, dn0); 2205 for (m = 0; m < l; m++) { 2206 conv_i32_to_d32_and_d16(d32r, 2207 d16r, prod, nlen); 2208 mont_mulf_noconv(prod, d32r, 2209 d16r, dt, dn, nint, 2210 nlen, dn0); 2211 } 2212 k = 0; 2213 l = 0; 2214 p = 0; 2215 bitcount = 0; 2216 } 2217 } 2218 } 2219 bitind = BIG_CHUNK_SIZE; 2220 } 2221 2222 for (m = 0; m < k; m++) { 2223 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2224 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2225 } 2226 if (p != 0) { 2227 conv_i32_to_d32(d32r, prod, nlen); 2228 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)], 2229 dt, dn, nint, nlen, dn0); 2230 } 2231 for (m = 0; m < l; m++) { 2232 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2233 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2234 } 2235 2236 #if (BIG_CHUNK_SIZE == 32) 2237 for (i = 0; i < nlen; i++) { 2238 result->value[i] = prod[i]; 2239 } 2240 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--) 2241 ; 2242 #else 2243 for (i = 0; i < nlen / 2; i++) { 2244 result->value[i] = (uint64_t)(prod[2 * i]) + 2245 (((uint64_t)(prod[2 * i + 1])) << 32); 2246 } 2247 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--) 2248 ; 2249 #endif 2250 result->len = i + 1; 2251 2252 ret: 2253 for (i = apowerssize - 1; i >= 0; i--) { 2254 if (apowers[i] != NULL) 2255 big_free(apowers[i], (2 * nlen + 1) * sizeof (double)); 2256 } 2257 if (d32r != NULL) { 2258 big_free(d32r, nlen * sizeof (double)); 2259 } 2260 if (d16r != NULL) { 2261 big_free(d16r, (2 * nlen + 1) * sizeof (double)); 2262 } 2263 if (prod != NULL) { 2264 big_free(prod, (nlen + 1) * sizeof (uint32_t)); 2265 } 2266 if (nint != NULL) { 2267 big_free(nint, nlen * sizeof (uint32_t)); 2268 } 2269 if (dt != NULL) { 2270 big_free(dt, (4 * nlen + 2) * sizeof (double)); 2271 } 2272 if (dn != NULL) { 2273 big_free(dn, nlen * sizeof (double)); 2274 } 2275 2276 #ifdef _KERNEL 2277 big_restorefp(fpu); 2278 #endif 2279 2280 return (err); 2281 } 2282 2283 #endif /* USE_FLOATING_POINT */ 2284 2285 2286 BIG_ERR_CODE 2287 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr, 2288 big_modexp_ncp_info_t *info) 2289 { 2290 BIGNUM ma, tmp, rr; 2291 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE]; 2292 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2293 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 2294 BIG_ERR_CODE err; 2295 BIG_CHUNK_TYPE n0; 2296 2297 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) != 2298 BIG_OK) { 2299 return (err); 2300 } 2301 ma.len = 1; 2302 ma.value[0] = 0; 2303 2304 if ((err = big_init1(&tmp, 2 * n->len + 1, 2305 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2306 goto ret1; 2307 } 2308 2309 /* clear the malloced bit to help cleanup */ 2310 rr.malloced = 0; 2311 2312 if (n_rr == NULL) { 2313 if ((err = big_init1(&rr, 2 * n->len + 1, 2314 rrvalue, arraysize(rrvalue))) != BIG_OK) { 2315 goto ret2; 2316 } 2317 if (big_mont_rr(&rr, n) != BIG_OK) { 2318 goto ret; 2319 } 2320 n_rr = &rr; 2321 } 2322 2323 n0 = big_n0(n->value[0]); 2324 2325 if (big_cmp_abs(a, n) > 0) { 2326 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) { 2327 goto ret; 2328 } 2329 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 2330 } else { 2331 err = big_mont_conv(&ma, a, n, n0, n_rr); 2332 } 2333 if (err != BIG_OK) { 2334 goto ret; 2335 } 2336 2337 tmp.len = 1; 2338 tmp.value[0] = 1; 2339 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) { 2340 goto ret; 2341 } 2342 2343 if ((info != NULL) && (info->func != NULL)) { 2344 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0, 2345 info->ncp, info->reqp); 2346 } else { 2347 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0); 2348 } 2349 if (err != BIG_OK) { 2350 goto ret; 2351 } 2352 2353 ma.value[0] = 1; 2354 ma.len = 1; 2355 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) { 2356 goto ret; 2357 } 2358 err = big_copy(result, &tmp); 2359 2360 ret: 2361 if (rr.malloced) { 2362 big_finish(&rr); 2363 } 2364 ret2: 2365 big_finish(&tmp); 2366 ret1: 2367 big_finish(&ma); 2368 2369 return (err); 2370 } 2371 2372 BIG_ERR_CODE 2373 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 2374 { 2375 return (big_modexp_ext(result, a, e, n, n_rr, NULL)); 2376 } 2377 2378 2379 BIG_ERR_CODE 2380 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2381 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2382 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info) 2383 { 2384 BIGNUM ap, aq, tmp; 2385 int alen, biglen, sign; 2386 BIG_ERR_CODE err; 2387 2388 if (p->len > q->len) { 2389 biglen = p->len; 2390 } else { 2391 biglen = q->len; 2392 } 2393 2394 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) { 2395 return (err); 2396 } 2397 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) { 2398 goto ret1; 2399 } 2400 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) { 2401 goto ret2; 2402 } 2403 2404 /* 2405 * check whether a is too short - to avoid timing attacks 2406 */ 2407 alen = a->len; 2408 while ((alen > p->len) && (a->value[alen - 1] == 0)) { 2409 alen--; 2410 } 2411 if (alen < p->len + q->len) { 2412 /* 2413 * a is too short, add p*q to it before 2414 * taking it modulo p and q 2415 * this will also affect timing, but this difference 2416 * does not depend on p or q, only on a 2417 * (in "normal" operation, this path will never be 2418 * taken, so it is not a performance penalty 2419 */ 2420 if ((err = big_mul(&tmp, p, q)) != BIG_OK) { 2421 goto ret; 2422 } 2423 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) { 2424 goto ret; 2425 } 2426 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) { 2427 goto ret; 2428 } 2429 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2430 goto ret; 2431 } 2432 } else { 2433 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) { 2434 goto ret; 2435 } 2436 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) { 2437 goto ret; 2438 } 2439 } 2440 2441 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) != 2442 BIG_OK) { 2443 goto ret; 2444 } 2445 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) != 2446 BIG_OK) { 2447 goto ret; 2448 } 2449 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) { 2450 goto ret; 2451 } 2452 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) { 2453 goto ret; 2454 } 2455 sign = tmp.sign; 2456 tmp.sign = 1; 2457 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2458 goto ret; 2459 } 2460 if ((sign == -1) && (!big_is_zero(&aq))) { 2461 (void) big_sub_pos(&aq, q, &aq); 2462 } 2463 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) { 2464 goto ret; 2465 } 2466 err = big_add_abs(result, &ap, &tmp); 2467 2468 ret: 2469 big_finish(&tmp); 2470 ret2: 2471 big_finish(&aq); 2472 ret1: 2473 big_finish(&ap); 2474 2475 return (err); 2476 } 2477 2478 2479 BIG_ERR_CODE 2480 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2481 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2482 BIGNUM *p_rr, BIGNUM *q_rr) 2483 { 2484 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1, 2485 p, q, pinvmodq, p_rr, q_rr, NULL)); 2486 } 2487 2488 2489 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1}; 2490 BIGNUM big_One = {1, 1, 1, 0, onearr}; 2491 2492 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2}; 2493 BIGNUM big_Two = {1, 1, 1, 0, twoarr}; 2494 2495 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4}; 2496 static BIGNUM big_Four = {1, 1, 1, 0, fourarr}; 2497 2498 2499 BIG_ERR_CODE 2500 big_sqrt_pos(BIGNUM *result, BIGNUM *n) 2501 { 2502 BIGNUM *high, *low, *mid, *t; 2503 BIGNUM t1, t2, t3, prod; 2504 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2505 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2506 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2507 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE]; 2508 int i, diff; 2509 uint32_t nbits, nrootbits, highbits; 2510 BIG_ERR_CODE err; 2511 2512 nbits = big_numbits(n); 2513 2514 if ((err = big_init1(&t1, n->len + 1, 2515 t1value, arraysize(t1value))) != BIG_OK) 2516 return (err); 2517 if ((err = big_init1(&t2, n->len + 1, 2518 t2value, arraysize(t2value))) != BIG_OK) 2519 goto ret1; 2520 if ((err = big_init1(&t3, n->len + 1, 2521 t3value, arraysize(t3value))) != BIG_OK) 2522 goto ret2; 2523 if ((err = big_init1(&prod, n->len + 1, 2524 prodvalue, arraysize(prodvalue))) != BIG_OK) 2525 goto ret3; 2526 2527 nrootbits = (nbits + 1) / 2; 2528 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1; 2529 for (i = 0; i < t1.len; i++) { 2530 t1.value[i] = 0; 2531 t2.value[i] = BIG_CHUNK_ALLBITS; 2532 } 2533 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1); 2534 if (highbits == BIG_CHUNK_SIZE) { 2535 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT; 2536 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS; 2537 } else { 2538 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1); 2539 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1; 2540 } 2541 2542 high = &t2; 2543 low = &t1; 2544 mid = &t3; 2545 2546 if ((err = big_mul(&prod, high, high)) != BIG_OK) { 2547 goto ret; 2548 } 2549 diff = big_cmp_abs(&prod, n); 2550 if (diff <= 0) { 2551 err = big_copy(result, high); 2552 goto ret; 2553 } 2554 2555 (void) big_sub_pos(mid, high, low); 2556 while (big_cmp_abs(&big_One, mid) != 0) { 2557 (void) big_add_abs(mid, high, low); 2558 (void) big_half_pos(mid, mid); 2559 if ((err = big_mul(&prod, mid, mid)) != BIG_OK) 2560 goto ret; 2561 diff = big_cmp_abs(&prod, n); 2562 if (diff > 0) { 2563 t = high; 2564 high = mid; 2565 mid = t; 2566 } else if (diff < 0) { 2567 t = low; 2568 low = mid; 2569 mid = t; 2570 } else { 2571 err = big_copy(result, low); 2572 goto ret; 2573 } 2574 (void) big_sub_pos(mid, high, low); 2575 } 2576 2577 err = big_copy(result, low); 2578 ret: 2579 if (prod.malloced) big_finish(&prod); 2580 ret3: 2581 if (t3.malloced) big_finish(&t3); 2582 ret2: 2583 if (t2.malloced) big_finish(&t2); 2584 ret1: 2585 if (t1.malloced) big_finish(&t1); 2586 2587 return (err); 2588 } 2589 2590 2591 BIG_ERR_CODE 2592 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm) 2593 { 2594 BIGNUM *t, *tmp2, *m, *n; 2595 BIGNUM t1, t2, t3; 2596 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2597 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2598 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2599 int len, err; 2600 2601 if (big_is_zero(nn) || 2602 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) { 2603 *jac = 0; 2604 return (BIG_OK); 2605 } 2606 2607 if (nn->len > mm->len) { 2608 len = nn->len; 2609 } else { 2610 len = mm->len; 2611 } 2612 2613 if ((err = big_init1(&t1, len, 2614 t1value, arraysize(t1value))) != BIG_OK) { 2615 return (err); 2616 } 2617 if ((err = big_init1(&t2, len, 2618 t2value, arraysize(t2value))) != BIG_OK) { 2619 goto ret1; 2620 } 2621 if ((err = big_init1(&t3, len, 2622 t3value, arraysize(t3value))) != BIG_OK) { 2623 goto ret2; 2624 } 2625 2626 n = &t1; 2627 m = &t2; 2628 tmp2 = &t3; 2629 2630 (void) big_copy(n, nn); 2631 (void) big_copy(m, mm); 2632 2633 *jac = 1; 2634 while (big_cmp_abs(&big_One, m) != 0) { 2635 if (big_is_zero(n)) { 2636 *jac = 0; 2637 goto ret; 2638 } 2639 if ((m->value[0] & 1) == 0) { 2640 if (((n->value[0] & 7) == 3) || 2641 ((n->value[0] & 7) == 5)) 2642 *jac = -*jac; 2643 (void) big_half_pos(m, m); 2644 } else if ((n->value[0] & 1) == 0) { 2645 if (((m->value[0] & 7) == 3) || 2646 ((m->value[0] & 7) == 5)) 2647 *jac = -*jac; 2648 (void) big_half_pos(n, n); 2649 } else { 2650 if (((m->value[0] & 3) == 3) && 2651 ((n->value[0] & 3) == 3)) { 2652 *jac = -*jac; 2653 } 2654 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) { 2655 goto ret; 2656 } 2657 t = tmp2; 2658 tmp2 = m; 2659 m = n; 2660 n = t; 2661 } 2662 } 2663 err = BIG_OK; 2664 2665 ret: 2666 if (t3.malloced) big_finish(&t3); 2667 ret2: 2668 if (t2.malloced) big_finish(&t2); 2669 ret1: 2670 if (t1.malloced) big_finish(&t1); 2671 2672 return (err); 2673 } 2674 2675 2676 BIG_ERR_CODE 2677 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n) 2678 { 2679 int i; 2680 uint32_t m, w; 2681 BIG_CHUNK_TYPE bit; 2682 BIGNUM ki, tmp, tmp2; 2683 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE]; 2684 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2685 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 2686 BIG_ERR_CODE err; 2687 2688 if (big_cmp_abs(k, &big_One) == 0) { 2689 (void) big_copy(Lk, p); 2690 (void) big_copy(Lkminus1, &big_Two); 2691 return (BIG_OK); 2692 } 2693 2694 if ((err = big_init1(&ki, k->len + 1, 2695 kivalue, arraysize(kivalue))) != BIG_OK) 2696 return (err); 2697 2698 if ((err = big_init1(&tmp, 2 * n->len + 1, 2699 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2700 goto ret1; 2701 2702 if ((err = big_init1(&tmp2, n->len, 2703 tmp2value, arraysize(tmp2value))) != BIG_OK) 2704 goto ret2; 2705 2706 m = big_numbits(k); 2707 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1; 2708 w = (m - 1) / BIG_CHUNK_SIZE; 2709 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE); 2710 for (i = 0; i < ki.len; i++) { 2711 ki.value[i] = 0; 2712 } 2713 ki.value[ki.len - 1] = bit; 2714 if (big_cmp_abs(k, &ki) != 0) { 2715 (void) big_double(&ki, &ki); 2716 } 2717 (void) big_sub_pos(&ki, &ki, k); 2718 2719 (void) big_copy(Lk, p); 2720 (void) big_copy(Lkminus1, &big_Two); 2721 2722 for (i = 0; i < m; i++) { 2723 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) { 2724 goto ret; 2725 } 2726 (void) big_add_abs(&tmp, &tmp, n); 2727 (void) big_sub_pos(&tmp, &tmp, p); 2728 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) { 2729 goto ret; 2730 } 2731 if ((ki.value[w] & bit) != 0) { 2732 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) != 2733 BIG_OK) { 2734 goto ret; 2735 } 2736 (void) big_add_abs(&tmp, &tmp, n); 2737 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2738 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) != 2739 BIG_OK) { 2740 goto ret; 2741 } 2742 (void) big_copy(Lk, &tmp2); 2743 } else { 2744 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) { 2745 goto ret; 2746 } 2747 (void) big_add_abs(&tmp, &tmp, n); 2748 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2749 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) { 2750 goto ret; 2751 } 2752 (void) big_copy(Lkminus1, &tmp2); 2753 } 2754 bit = bit >> 1; 2755 if (bit == 0) { 2756 bit = BIG_CHUNK_HIGHBIT; 2757 w--; 2758 } 2759 } 2760 2761 err = BIG_OK; 2762 2763 ret: 2764 if (tmp2.malloced) big_finish(&tmp2); 2765 ret2: 2766 if (tmp.malloced) big_finish(&tmp); 2767 ret1: 2768 if (ki.malloced) big_finish(&ki); 2769 2770 return (err); 2771 } 2772 2773 2774 BIG_ERR_CODE 2775 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info) 2776 { 2777 BIGNUM o, nminus1, tmp, Lkminus1, Lk; 2778 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE]; 2779 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE]; 2780 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2781 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE]; 2782 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE]; 2783 BIG_ERR_CODE err; 2784 int e, i, jac; 2785 2786 if (big_cmp_abs(n, &big_One) == 0) { 2787 return (BIG_FALSE); 2788 } 2789 if (big_cmp_abs(n, &big_Two) == 0) { 2790 return (BIG_TRUE); 2791 } 2792 if ((n->value[0] & 1) == 0) { 2793 return (BIG_FALSE); 2794 } 2795 2796 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != 2797 BIG_OK) { 2798 return (err); 2799 } 2800 2801 if ((err = big_init1(&nminus1, n->len, 2802 nminus1value, arraysize(nminus1value))) != BIG_OK) { 2803 goto ret1; 2804 } 2805 2806 if ((err = big_init1(&tmp, 2 * n->len, 2807 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2808 goto ret2; 2809 } 2810 2811 if ((err = big_init1(&Lkminus1, n->len, 2812 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) { 2813 goto ret3; 2814 } 2815 2816 if ((err = big_init1(&Lk, n->len, 2817 Lkvalue, arraysize(Lkvalue))) != BIG_OK) { 2818 goto ret4; 2819 } 2820 2821 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */ 2822 (void) big_copy(&nminus1, &o); /* cannot fail */ 2823 e = 0; 2824 while ((o.value[0] & 1) == 0) { 2825 e++; 2826 (void) big_half_pos(&o, &o); /* cannot fail */ 2827 } 2828 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) != 2829 BIG_OK) { 2830 goto ret; 2831 } 2832 2833 i = 0; 2834 while ((i < e) && 2835 (big_cmp_abs(&tmp, &big_One) != 0) && 2836 (big_cmp_abs(&tmp, &nminus1) != 0)) { 2837 if ((err = 2838 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) != 2839 BIG_OK) 2840 goto ret; 2841 i++; 2842 } 2843 2844 if (!((big_cmp_abs(&tmp, &nminus1) == 0) || 2845 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) { 2846 err = BIG_FALSE; 2847 goto ret; 2848 } 2849 2850 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) { 2851 goto ret; 2852 } 2853 2854 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) { 2855 goto ret; 2856 } 2857 if (big_cmp_abs(&tmp, n) == 0) { 2858 err = BIG_FALSE; 2859 goto ret; 2860 } 2861 2862 (void) big_copy(&o, &big_Two); 2863 do { 2864 (void) big_add_abs(&o, &o, &big_One); 2865 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) { 2866 goto ret; 2867 } 2868 (void) big_sub_pos(&tmp, &tmp, &big_Four); 2869 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) { 2870 goto ret; 2871 } 2872 } while (jac != -1); 2873 2874 (void) big_add_abs(&tmp, n, &big_One); 2875 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) { 2876 goto ret; 2877 } 2878 2879 if ((big_cmp_abs(&Lkminus1, &o) == 0) && 2880 (big_cmp_abs(&Lk, &big_Two) == 0)) { 2881 err = BIG_TRUE; 2882 } else { 2883 err = BIG_FALSE; 2884 } 2885 2886 ret: 2887 if (Lk.malloced) big_finish(&Lk); 2888 ret4: 2889 if (Lkminus1.malloced) big_finish(&Lkminus1); 2890 ret3: 2891 if (tmp.malloced) big_finish(&tmp); 2892 ret2: 2893 if (nminus1.malloced) big_finish(&nminus1); 2894 ret1: 2895 if (o.malloced) big_finish(&o); 2896 2897 return (err); 2898 } 2899 2900 2901 BIG_ERR_CODE 2902 big_isprime_pos(BIGNUM *n) 2903 { 2904 return (big_isprime_pos_ext(n, NULL)); 2905 } 2906 2907 2908 #define SIEVESIZE 1000 2909 2910 2911 BIG_ERR_CODE 2912 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info) 2913 { 2914 static const uint32_t smallprimes[] = { 2915 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 2916 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 }; 2917 BIG_ERR_CODE err; 2918 int sieve[SIEVESIZE]; 2919 int i; 2920 uint32_t off, p; 2921 2922 if ((err = big_copy(result, n)) != BIG_OK) { 2923 return (err); 2924 } 2925 result->value[0] |= 1; 2926 /* CONSTCOND */ 2927 while (1) { 2928 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0; 2929 for (i = 0; 2930 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) { 2931 p = smallprimes[i]; 2932 off = big_modhalf_pos(result, p); 2933 off = p - off; 2934 if ((off % 2) == 1) { 2935 off = off + p; 2936 } 2937 off = off / 2; 2938 while (off < SIEVESIZE) { 2939 sieve[off] = 1; 2940 off = off + p; 2941 } 2942 } 2943 2944 for (i = 0; i < SIEVESIZE; i++) { 2945 if (sieve[i] == 0) { 2946 err = big_isprime_pos_ext(result, info); 2947 if (err != BIG_FALSE) { 2948 if (err != BIG_TRUE) { 2949 return (err); 2950 } else { 2951 goto out; 2952 } 2953 } 2954 2955 } 2956 if ((err = big_add_abs(result, result, &big_Two)) != 2957 BIG_OK) { 2958 return (err); 2959 } 2960 } 2961 } 2962 2963 out: 2964 return (BIG_OK); 2965 } 2966 2967 2968 BIG_ERR_CODE 2969 big_nextprime_pos(BIGNUM *result, BIGNUM *n) 2970 { 2971 return (big_nextprime_pos_ext(result, n, NULL)); 2972 } 2973 2974 2975 BIG_ERR_CODE 2976 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n) 2977 { 2978 BIG_ERR_CODE err; 2979 2980 2981 if ((err = big_copy(result, n)) != BIG_OK) 2982 return (err); 2983 result->value[0] |= 1; 2984 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) { 2985 if (err != BIG_FALSE) 2986 return (err); 2987 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK) 2988 return (err); 2989 } 2990 return (BIG_OK); 2991 } 2992 2993 2994 /* 2995 * given m and e, computes the rest in the equation 2996 * gcd(m, e) = cm * m + ce * e 2997 */ 2998 BIG_ERR_CODE 2999 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e) 3000 { 3001 BIGNUM *xi, *ri, *riminus1, *riminus2, *t; 3002 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1; 3003 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp; 3004 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 3005 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 3006 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 3007 BIG_CHUNK_TYPE t4value[BIGTMPSIZE]; 3008 BIG_CHUNK_TYPE t5value[BIGTMPSIZE]; 3009 BIG_CHUNK_TYPE t6value[BIGTMPSIZE]; 3010 BIG_CHUNK_TYPE t7value[BIGTMPSIZE]; 3011 BIG_CHUNK_TYPE t8value[BIGTMPSIZE]; 3012 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 3013 BIG_ERR_CODE err; 3014 int len; 3015 3016 if (big_cmp_abs(m, e) >= 0) { 3017 len = m->len; 3018 } else { 3019 len = e->len; 3020 } 3021 3022 if ((err = big_init1(&t1, len, 3023 t1value, arraysize(t1value))) != BIG_OK) { 3024 return (err); 3025 } 3026 if ((err = big_init1(&t2, len, 3027 t2value, arraysize(t2value))) != BIG_OK) { 3028 goto ret1; 3029 } 3030 if ((err = big_init1(&t3, len, 3031 t3value, arraysize(t3value))) != BIG_OK) { 3032 goto ret2; 3033 } 3034 if ((err = big_init1(&t4, len, 3035 t4value, arraysize(t3value))) != BIG_OK) { 3036 goto ret3; 3037 } 3038 if ((err = big_init1(&t5, len, 3039 t5value, arraysize(t5value))) != BIG_OK) { 3040 goto ret4; 3041 } 3042 if ((err = big_init1(&t6, len, 3043 t6value, arraysize(t6value))) != BIG_OK) { 3044 goto ret5; 3045 } 3046 if ((err = big_init1(&t7, len, 3047 t7value, arraysize(t7value))) != BIG_OK) { 3048 goto ret6; 3049 } 3050 if ((err = big_init1(&t8, len, 3051 t8value, arraysize(t8value))) != BIG_OK) { 3052 goto ret7; 3053 } 3054 3055 if ((err = big_init1(&tmp, 2 * len, 3056 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 3057 goto ret8; 3058 } 3059 3060 ri = &t1; 3061 ri->value[0] = 1; 3062 ri->len = 1; 3063 xi = &t2; 3064 riminus1 = &t3; 3065 riminus2 = &t4; 3066 vmi = &t5; 3067 vei = &t6; 3068 vmiminus1 = &t7; 3069 veiminus1 = &t8; 3070 3071 (void) big_copy(vmiminus1, &big_One); 3072 (void) big_copy(vmi, &big_One); 3073 (void) big_copy(veiminus1, &big_One); 3074 (void) big_copy(xi, &big_One); 3075 vei->len = 1; 3076 vei->value[0] = 0; 3077 3078 (void) big_copy(riminus1, m); 3079 (void) big_copy(ri, e); 3080 3081 while (!big_is_zero(ri)) { 3082 t = riminus2; 3083 riminus2 = riminus1; 3084 riminus1 = ri; 3085 ri = t; 3086 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) { 3087 goto ret; 3088 } 3089 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) { 3090 goto ret; 3091 } 3092 t = vmiminus1; 3093 vmiminus1 = vmi; 3094 vmi = t; 3095 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) { 3096 goto ret; 3097 } 3098 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) { 3099 goto ret; 3100 } 3101 t = veiminus1; 3102 veiminus1 = vei; 3103 vei = t; 3104 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != 3105 BIG_OK) { 3106 goto ret; 3107 } 3108 } 3109 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) { 3110 goto ret; 3111 } 3112 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) { 3113 goto ret; 3114 } 3115 if (ce != NULL) { 3116 err = big_copy(ce, vei); 3117 } 3118 ret: 3119 if (tmp.malloced) big_finish(&tmp); 3120 ret8: 3121 if (t8.malloced) big_finish(&t8); 3122 ret7: 3123 if (t7.malloced) big_finish(&t7); 3124 ret6: 3125 if (t6.malloced) big_finish(&t6); 3126 ret5: 3127 if (t5.malloced) big_finish(&t5); 3128 ret4: 3129 if (t4.malloced) big_finish(&t4); 3130 ret3: 3131 if (t3.malloced) big_finish(&t3); 3132 ret2: 3133 if (t2.malloced) big_finish(&t2); 3134 ret1: 3135 if (t1.malloced) big_finish(&t1); 3136 3137 return (err); 3138 } 3139