1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Configuration guide 28 * ------------------- 29 * 30 * There are 4 preprocessor symbols used to configure the bignum 31 * implementation. This file contains no logic to configure based on 32 * processor; we leave that to the Makefiles to specify. 33 * 34 * USE_FLOATING_POINT 35 * Meaning: There is support for a fast floating-point implementation of 36 * Montgomery multiply. 37 * 38 * PSR_MUL 39 * Meaning: There are processor-specific versions of the low level 40 * functions to implement big_mul. Those functions are: big_mul_set_vec, 41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 42 * for all 4 functions. You cannot pick and choose which subset of these 43 * functions to support; that would lead to a rat's nest of #ifdefs. 44 * 45 * HWCAP 46 * Meaning: Call multiply support functions through a function pointer. 47 * On x86, there are multiple implementations for different hardware 48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 49 * a function is first used. So, the support functions are called through 50 * a function pointer. There is no need for that on Sparc, because there 51 * is only one implementation; support functions are called directly. 52 * Later, if there were some new VIS instruction, or something, and a 53 * run-time test were needed, rather than variant kernel modules and 54 * libraries, then HWCAP would be defined for Sparc, as well. 55 * 56 * UMUL64 57 * Meaning: It is safe to use generic C code that assumes the existence 58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 59 * then the generic code for big_mul_add_vec() must necessarily be very slow, 60 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 61 * 62 */ 63 64 65 #include <sys/types.h> 66 #include "bignum.h" 67 68 #ifdef _KERNEL 69 #include <sys/ddi.h> 70 #include <sys/mdesc.h> 71 #include <sys/crypto/common.h> 72 73 #include <sys/kmem.h> 74 #include <sys/param.h> 75 #include <sys/sunddi.h> 76 77 #else 78 #include <stdlib.h> 79 #include <stdio.h> 80 #include <assert.h> 81 #define ASSERT assert 82 #endif /* _KERNEL */ 83 84 #ifdef __amd64 85 #ifdef _KERNEL 86 #include <sys/x86_archext.h> /* cpuid_getvendor() */ 87 #include <sys/cpuvar.h> 88 #else 89 #include <sys/auxv.h> /* getisax() */ 90 #endif /* _KERNEL */ 91 #endif /* __amd64 */ 92 93 94 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */ 95 #define UI32(ui) ((uint32_t)ui) 96 #else /* size_t already 32-bits */ 97 #define UI32(ui) (ui) 98 #endif 99 100 101 #ifdef _KERNEL 102 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 103 #define big_free(ptr, size) kmem_free(ptr, size) 104 105 /* 106 * big_realloc() 107 * Allocate memory of newsize bytes and copy oldsize bytes 108 * to the newly-allocated memory, then free the 109 * previously-allocated memory. 110 * Note: newsize must be > oldsize 111 */ 112 void * 113 big_realloc(void *from, size_t oldsize, size_t newsize) 114 { 115 void *rv; 116 117 rv = kmem_alloc(newsize, KM_NOSLEEP); 118 if (rv != NULL) 119 bcopy(from, rv, oldsize); 120 kmem_free(from, oldsize); 121 return (rv); 122 } 123 124 #else /* _KERNEL */ 125 126 #ifndef MALLOC_DEBUG 127 128 #define big_malloc(size) malloc(size) 129 #define big_free(ptr, size) free(ptr) 130 131 #else 132 133 void 134 big_free(void *ptr, size_t size) 135 { 136 printf("freed %d bytes at %p\n", size, ptr); 137 free(ptr); 138 } 139 140 void * 141 big_malloc(size_t size) 142 { 143 void *rv; 144 rv = malloc(size); 145 printf("malloced %d bytes, addr:%p\n", size, rv); 146 return (rv); 147 } 148 #endif /* MALLOC_DEBUG */ 149 150 #define big_realloc(x, y, z) realloc((x), (z)) 151 152 153 /* 154 * printbignum() 155 * Print a BIGNUM type to stdout. 156 */ 157 void 158 printbignum(char *aname, BIGNUM *a) 159 { 160 int i; 161 162 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 163 for (i = a->len - 1; i >= 0; i--) { 164 #ifdef BIGNUM_CHUNK_32 165 (void) printf("%08x ", a->value[i]); 166 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) { 167 (void) printf("\n"); 168 } 169 #else 170 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32), 171 (uint32_t)((a->value[i]) & 0xffffffff)); 172 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */ 173 (void) printf("\n"); 174 } 175 #endif 176 } 177 (void) printf("\n"); 178 } 179 180 #endif /* _KERNEL */ 181 182 183 #ifdef __amd64 184 /* 185 * Return 1 if executing on Intel, otherwise 0 (e.g., AMD64). 186 * Cache the result, as the CPU can't change. 187 * 188 * Note: the userland version uses getisax() and checks for an AMD-64-only 189 * feature. The kernel version uses cpuid_getvendor(). 190 */ 191 static int 192 bignum_on_intel(void) 193 { 194 static int cached_result = -1; 195 196 if (cached_result == -1) { /* first time */ 197 #ifdef _KERNEL 198 cached_result = (cpuid_getvendor(CPU) == X86_VENDOR_Intel); 199 #else 200 uint_t ui; 201 202 (void) getisax(&ui, 1); 203 cached_result = ((ui & AV_386_AMD_MMX) == 0); 204 #endif /* _KERNEL */ 205 } 206 207 return (cached_result); 208 } 209 #endif /* __amd64 */ 210 211 212 /* 213 * big_init() 214 * Initialize and allocate memory for a BIGNUM type. 215 * 216 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0) 217 * 218 * Note: call big_finish() to free memory allocated by big_init(). 219 * 220 * Input: 221 * number Uninitialized memory for BIGNUM 222 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 223 * 224 * Output: 225 * number Initialized BIGNUM 226 * 227 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 228 */ 229 BIG_ERR_CODE 230 big_init(BIGNUM *number, int size) 231 { 232 number->value = big_malloc(BIGNUM_WORDSIZE * size); 233 if (number->value == NULL) { 234 return (BIG_NO_MEM); 235 } 236 number->size = size; 237 number->len = 0; 238 number->sign = 1; 239 number->malloced = 1; 240 return (BIG_OK); 241 } 242 243 244 /* 245 * big_init1() 246 * Initialize and, if needed, allocate memory for a BIGNUM type. 247 * Use the buffer passed, buf, if any, instad of allocating memory 248 * if it's at least "size" bytes. 249 * 250 * Note: call big_finish() to free memory allocated by big_init(). 251 * 252 * Input: 253 * number Uninitialized memory for BIGNUM 254 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 255 * buf Buffer for storing a BIGNUM. 256 * If NULL, big_init1() will allocate a buffer 257 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf 258 * 259 * Output: 260 * number Initialized BIGNUM 261 * 262 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 263 */ 264 BIG_ERR_CODE 265 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize) 266 { 267 if ((buf == NULL) || (size > bufsize)) { 268 number->value = big_malloc(BIGNUM_WORDSIZE * size); 269 if (number->value == NULL) { 270 return (BIG_NO_MEM); 271 } 272 number->size = size; 273 number->malloced = 1; 274 } else { 275 number->value = buf; 276 number->size = bufsize; 277 number->malloced = 0; 278 } 279 number->len = 0; 280 number->sign = 1; 281 282 return (BIG_OK); 283 } 284 285 286 /* 287 * big_finish() 288 * Free memory, if any, allocated by big_init() or big_init1(). 289 */ 290 void 291 big_finish(BIGNUM *number) 292 { 293 if (number->malloced == 1) { 294 big_free(number->value, BIGNUM_WORDSIZE * number->size); 295 number->malloced = 0; 296 } 297 } 298 299 300 /* 301 * bn->size should be at least 302 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes 303 * converts from byte-big-endian format to bignum format (words in 304 * little endian order, but bytes within the words big endian) 305 */ 306 void 307 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 308 { 309 int i, j; 310 uint32_t offs; 311 const uint32_t slen = UI32(len); 312 BIG_CHUNK_TYPE word; 313 uchar_t *knwordp; 314 315 if (slen == 0) { 316 bn->len = 1; 317 bn->value[0] = 0; 318 return; 319 } 320 321 offs = slen % BIGNUM_WORDSIZE; 322 bn->len = slen / BIGNUM_WORDSIZE; 323 324 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 325 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]); 326 word = knwordp[0]; 327 for (j = 1; j < BIGNUM_WORDSIZE; j++) { 328 word = (word << BITSINBYTE) + knwordp[j]; 329 } 330 bn->value[i] = word; 331 } 332 if (offs > 0) { 333 word = kn[0]; 334 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i]; 335 bn->value[bn->len++] = word; 336 } 337 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) { 338 bn->len --; 339 } 340 } 341 342 343 /* 344 * copies the least significant len bytes if 345 * len < bn->len * BIGNUM_WORDSIZE 346 * converts from bignum format to byte-big-endian format. 347 * bignum format is words of type BIG_CHUNK_TYPE in little endian order. 348 */ 349 void 350 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 351 { 352 int i, j; 353 uint32_t offs; 354 const uint32_t slen = UI32(len); 355 BIG_CHUNK_TYPE word; 356 357 if (len < BIGNUM_WORDSIZE * bn->len) { 358 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 359 word = bn->value[i]; 360 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 361 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 362 word & 0xff; 363 word = word >> BITSINBYTE; 364 } 365 } 366 offs = slen % BIGNUM_WORDSIZE; 367 if (offs > 0) { 368 word = bn->value[slen / BIGNUM_WORDSIZE]; 369 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) { 370 kn[i - 1] = word & 0xff; 371 word = word >> BITSINBYTE; 372 } 373 } 374 } else { 375 for (i = 0; i < bn->len; i++) { 376 word = bn->value[i]; 377 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 378 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 379 word & 0xff; 380 word = word >> BITSINBYTE; 381 } 382 } 383 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) { 384 kn[i] = 0; 385 } 386 } 387 } 388 389 390 int 391 big_bitlength(BIGNUM *a) 392 { 393 int l = 0, b = 0; 394 BIG_CHUNK_TYPE c; 395 396 l = a->len - 1; 397 while ((l > 0) && (a->value[l] == 0)) { 398 l--; 399 } 400 b = BIG_CHUNK_SIZE; 401 c = a->value[l]; 402 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) { 403 c = c << 1; 404 b--; 405 } 406 407 return (l * BIG_CHUNK_SIZE + b); 408 } 409 410 411 /* 412 * big_copy() 413 * Copy BIGNUM src to dest, allocating memory if needed. 414 */ 415 BIG_ERR_CODE 416 big_copy(BIGNUM *dest, BIGNUM *src) 417 { 418 BIG_CHUNK_TYPE *newptr; 419 int i, len; 420 421 len = src->len; 422 while ((len > 1) && (src->value[len - 1] == 0)) { 423 len--; 424 } 425 src->len = len; 426 if (dest->size < len) { 427 if (dest->malloced == 1) { 428 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value, 429 BIGNUM_WORDSIZE * dest->size, 430 BIGNUM_WORDSIZE * len); 431 } else { 432 newptr = (BIG_CHUNK_TYPE *) 433 big_malloc(BIGNUM_WORDSIZE * len); 434 if (newptr != NULL) { 435 dest->malloced = 1; 436 } 437 } 438 if (newptr == NULL) { 439 return (BIG_NO_MEM); 440 } 441 dest->value = newptr; 442 dest->size = len; 443 } 444 dest->len = len; 445 dest->sign = src->sign; 446 for (i = 0; i < len; i++) { 447 dest->value[i] = src->value[i]; 448 } 449 450 return (BIG_OK); 451 } 452 453 454 /* 455 * big_extend() 456 * Allocate memory to extend BIGNUM number to size bignum chunks, 457 * if not at least that size already. 458 */ 459 BIG_ERR_CODE 460 big_extend(BIGNUM *number, int size) 461 { 462 BIG_CHUNK_TYPE *newptr; 463 int i; 464 465 if (number->size >= size) 466 return (BIG_OK); 467 if (number->malloced) { 468 number->value = big_realloc(number->value, 469 BIGNUM_WORDSIZE * number->size, 470 BIGNUM_WORDSIZE * size); 471 } else { 472 newptr = big_malloc(BIGNUM_WORDSIZE * size); 473 if (newptr != NULL) { 474 for (i = 0; i < number->size; i++) { 475 newptr[i] = number->value[i]; 476 } 477 } 478 number->value = newptr; 479 } 480 481 if (number->value == NULL) { 482 return (BIG_NO_MEM); 483 } 484 485 number->size = size; 486 number->malloced = 1; 487 return (BIG_OK); 488 } 489 490 491 /* returns 1 if n == 0 */ 492 int 493 big_is_zero(BIGNUM *n) 494 { 495 int i, result; 496 497 result = 1; 498 for (i = 0; i < n->len; i++) { 499 if (n->value[i] != 0) { 500 result = 0; 501 } 502 } 503 return (result); 504 } 505 506 507 BIG_ERR_CODE 508 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 509 { 510 int i, shorter, longer; 511 BIG_CHUNK_TYPE cy, ai; 512 BIG_CHUNK_TYPE *r, *a, *b, *c; 513 BIG_ERR_CODE err; 514 BIGNUM *longerarg; 515 516 if (aa->len > bb->len) { 517 shorter = bb->len; 518 longer = aa->len; 519 longerarg = aa; 520 } else { 521 shorter = aa->len; 522 longer = bb->len; 523 longerarg = bb; 524 } 525 if (result->size < longer + 1) { 526 err = big_extend(result, longer + 1); 527 if (err != BIG_OK) { 528 return (err); 529 } 530 } 531 532 r = result->value; 533 a = aa->value; 534 b = bb->value; 535 c = longerarg->value; 536 cy = 0; 537 for (i = 0; i < shorter; i++) { 538 ai = a[i]; 539 r[i] = ai + b[i] + cy; 540 if (r[i] > ai) { 541 cy = 0; 542 } else if (r[i] < ai) { 543 cy = 1; 544 } 545 } 546 for (; i < longer; i++) { 547 ai = c[i]; 548 r[i] = ai + cy; 549 if (r[i] >= ai) { 550 cy = 0; 551 } 552 } 553 if (cy == 1) { 554 r[i] = cy; 555 result->len = longer + 1; 556 } else { 557 result->len = longer; 558 } 559 result->sign = 1; 560 return (BIG_OK); 561 } 562 563 564 /* caller must make sure that result has at least len words allocated */ 565 void 566 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len) 567 { 568 int i; 569 BIG_CHUNK_TYPE cy, ai; 570 571 cy = 1; 572 for (i = 0; i < len; i++) { 573 ai = a[i]; 574 r[i] = ai + (~b[i]) + cy; 575 if (r[i] > ai) { 576 cy = 0; 577 } else if (r[i] < ai) { 578 cy = 1; 579 } 580 } 581 } 582 583 584 /* result=aa-bb it is assumed that aa>=bb */ 585 BIG_ERR_CODE 586 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 587 { 588 int i, shorter; 589 BIG_CHUNK_TYPE cy = 1, ai; 590 BIG_CHUNK_TYPE *r, *a, *b; 591 BIG_ERR_CODE err = BIG_OK; 592 593 if (aa->len > bb->len) { 594 shorter = bb->len; 595 } else { 596 shorter = aa->len; 597 } 598 if (result->size < aa->len) { 599 err = big_extend(result, aa->len); 600 if (err != BIG_OK) { 601 return (err); 602 } 603 } 604 605 r = result->value; 606 a = aa->value; 607 b = bb->value; 608 result->len = aa->len; 609 cy = 1; 610 for (i = 0; i < shorter; i++) { 611 ai = a[i]; 612 r[i] = ai + (~b[i]) + cy; 613 if (r[i] > ai) { 614 cy = 0; 615 } else if (r[i] < ai) { 616 cy = 1; 617 } 618 } 619 for (; i < aa->len; i++) { 620 ai = a[i]; 621 r[i] = ai + (~0) + cy; 622 if (r[i] < ai) { 623 cy = 1; 624 } 625 } 626 result->sign = 1; 627 628 if (cy == 0) { 629 return (BIG_INVALID_ARGS); 630 } else { 631 return (BIG_OK); 632 } 633 } 634 635 636 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 637 int 638 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 639 { 640 int i; 641 642 if (aa->len > bb->len) { 643 for (i = aa->len - 1; i > bb->len - 1; i--) { 644 if (aa->value[i] > 0) { 645 return (1); 646 } 647 } 648 } else if (aa->len < bb->len) { 649 for (i = bb->len - 1; i > aa->len - 1; i--) { 650 if (bb->value[i] > 0) { 651 return (-1); 652 } 653 } 654 } else { 655 i = aa->len - 1; 656 } 657 for (; i >= 0; i--) { 658 if (aa->value[i] > bb->value[i]) { 659 return (1); 660 } else if (aa->value[i] < bb->value[i]) { 661 return (-1); 662 } 663 } 664 665 return (0); 666 } 667 668 669 BIG_ERR_CODE 670 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 671 { 672 BIG_ERR_CODE err; 673 674 if ((bb->sign == -1) && (aa->sign == 1)) { 675 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 676 return (err); 677 } 678 result->sign = 1; 679 } else if ((aa->sign == -1) && (bb->sign == 1)) { 680 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 681 return (err); 682 } 683 result->sign = -1; 684 } else if ((aa->sign == 1) && (bb->sign == 1)) { 685 if (big_cmp_abs(aa, bb) >= 0) { 686 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 687 return (err); 688 } 689 result->sign = 1; 690 } else { 691 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 692 return (err); 693 } 694 result->sign = -1; 695 } 696 } else { 697 if (big_cmp_abs(aa, bb) >= 0) { 698 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 699 return (err); 700 } 701 result->sign = -1; 702 } else { 703 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 704 return (err); 705 } 706 result->sign = 1; 707 } 708 } 709 return (BIG_OK); 710 } 711 712 713 BIG_ERR_CODE 714 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 715 { 716 BIG_ERR_CODE err; 717 718 if ((bb->sign == -1) && (aa->sign == -1)) { 719 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 720 return (err); 721 } 722 result->sign = -1; 723 } else if ((aa->sign == 1) && (bb->sign == 1)) { 724 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 725 return (err); 726 } 727 result->sign = 1; 728 } else if ((aa->sign == 1) && (bb->sign == -1)) { 729 if (big_cmp_abs(aa, bb) >= 0) { 730 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 731 return (err); 732 } 733 result->sign = 1; 734 } else { 735 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 736 return (err); 737 } 738 result->sign = -1; 739 } 740 } else { 741 if (big_cmp_abs(aa, bb) >= 0) { 742 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 743 return (err); 744 } 745 result->sign = -1; 746 } else { 747 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 748 return (err); 749 } 750 result->sign = 1; 751 } 752 } 753 return (BIG_OK); 754 } 755 756 757 /* result = aa/2 */ 758 BIG_ERR_CODE 759 big_half_pos(BIGNUM *result, BIGNUM *aa) 760 { 761 BIG_ERR_CODE err; 762 int i; 763 BIG_CHUNK_TYPE cy, cy1; 764 BIG_CHUNK_TYPE *a, *r; 765 766 if (result->size < aa->len) { 767 err = big_extend(result, aa->len); 768 if (err != BIG_OK) { 769 return (err); 770 } 771 } 772 773 result->len = aa->len; 774 a = aa->value; 775 r = result->value; 776 cy = 0; 777 for (i = aa->len - 1; i >= 0; i--) { 778 cy1 = a[i] << (BIG_CHUNK_SIZE - 1); 779 r[i] = (cy | (a[i] >> 1)); 780 cy = cy1; 781 } 782 if (r[result->len - 1] == 0) { 783 result->len--; 784 } 785 786 return (BIG_OK); 787 } 788 789 /* result = aa*2 */ 790 BIG_ERR_CODE 791 big_double(BIGNUM *result, BIGNUM *aa) 792 { 793 BIG_ERR_CODE err; 794 int i, rsize; 795 BIG_CHUNK_TYPE cy, cy1; 796 BIG_CHUNK_TYPE *a, *r; 797 798 if ((aa->len > 0) && 799 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) { 800 rsize = aa->len + 1; 801 } else { 802 rsize = aa->len; 803 } 804 805 if (result->size < rsize) { 806 err = big_extend(result, rsize); 807 if (err != BIG_OK) 808 return (err); 809 } 810 811 a = aa->value; 812 r = result->value; 813 if (rsize == aa->len + 1) { 814 r[rsize - 1] = 1; 815 } 816 cy = 0; 817 for (i = 0; i < aa->len; i++) { 818 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1); 819 r[i] = (cy | (a[i] << 1)); 820 cy = cy1; 821 } 822 result->len = rsize; 823 return (BIG_OK); 824 } 825 826 827 /* 828 * returns aa mod b, aa must be nonneg, b must be a max 829 * (BIG_CHUNK_SIZE / 2)-bit integer 830 */ 831 static uint32_t 832 big_modhalf_pos(BIGNUM *aa, uint32_t b) 833 { 834 int i; 835 BIG_CHUNK_TYPE rem; 836 837 if (aa->len == 0) { 838 return (0); 839 } 840 rem = aa->value[aa->len - 1] % b; 841 for (i = aa->len - 2; i >= 0; i--) { 842 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 843 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b; 844 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 845 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b; 846 } 847 848 return ((uint32_t)rem); 849 } 850 851 852 /* 853 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb 854 * result->size should be at least aa->len at entry 855 * aa, bb, and result should be positive 856 */ 857 void 858 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 859 { 860 int i, lendiff; 861 BIGNUM res1, aa1; 862 863 lendiff = aa->len - bb->len; 864 res1.size = result->size - lendiff; 865 res1.malloced = 0; 866 res1.value = result->value + lendiff; 867 aa1.size = aa->size - lendiff; 868 aa1.value = aa->value + lendiff; 869 aa1.len = bb->len; 870 aa1.sign = 1; 871 (void) big_sub_pos(&res1, &aa1, bb); 872 if (result->value != aa->value) { 873 for (i = 0; i < lendiff; i++) { 874 result->value[i] = aa->value[i]; 875 } 876 } 877 result->len = aa->len; 878 } 879 880 881 /* 882 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 883 * (2^BIG_CHUNK_SIZE)^lendiff * |bb| 884 * aa->len should be >= bb->len 885 */ 886 int 887 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 888 { 889 int lendiff; 890 BIGNUM aa1; 891 892 lendiff = aa->len - bb->len; 893 aa1.len = bb->len; 894 aa1.size = aa->size - lendiff; 895 aa1.malloced = 0; 896 aa1.value = aa->value + lendiff; 897 return (big_cmp_abs(&aa1, bb)); 898 } 899 900 901 /* 902 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer. 903 * result should have enough space allocated. 904 */ 905 static void 906 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 907 { 908 int i; 909 BIG_CHUNK_TYPE t1, t2, ai, cy; 910 BIG_CHUNK_TYPE *a, *r; 911 912 a = aa->value; 913 r = result->value; 914 cy = 0; 915 for (i = 0; i < aa->len; i++) { 916 ai = a[i]; 917 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 918 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 919 (t1 >> (BIG_CHUNK_SIZE / 2)); 920 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) | 921 (t2 << (BIG_CHUNK_SIZE / 2)); 922 cy = t2 >> (BIG_CHUNK_SIZE / 2); 923 } 924 r[i] = cy; 925 result->len = aa->len + 1; 926 result->sign = aa->sign; 927 } 928 929 930 /* 931 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max. 932 * (BIG_CHUNK_SIZE / 2)-bit positive integer. 933 * result should have enough space allocated. 934 */ 935 static void 936 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 937 { 938 int i; 939 BIG_CHUNK_TYPE t1, t2, ai, cy, ri; 940 BIG_CHUNK_TYPE *a, *r; 941 942 a = aa->value; 943 r = result->value; 944 cy = 0; 945 ri = 0; 946 for (i = 0; i < aa->len; i++) { 947 ai = a[i]; 948 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 949 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 950 (t1 >> (BIG_CHUNK_SIZE / 2)); 951 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri; 952 ri = t2 & BIG_CHUNK_LOWHALFBITS; 953 cy = t2 >> (BIG_CHUNK_SIZE / 2); 954 } 955 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri; 956 result->len = aa->len + 1; 957 result->sign = aa->sign; 958 } 959 960 961 /* it is assumed that result->size is big enough */ 962 void 963 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 964 { 965 int i; 966 BIG_CHUNK_TYPE cy, ai; 967 968 if (offs == 0) { 969 if (result != aa) { 970 (void) big_copy(result, aa); 971 } 972 return; 973 } 974 cy = 0; 975 for (i = 0; i < aa->len; i++) { 976 ai = aa->value[i]; 977 result->value[i] = (ai << offs) | cy; 978 cy = ai >> (BIG_CHUNK_SIZE - offs); 979 } 980 if (cy != 0) { 981 result->len = aa->len + 1; 982 result->value[result->len - 1] = cy; 983 } else { 984 result->len = aa->len; 985 } 986 result->sign = aa->sign; 987 } 988 989 990 /* it is assumed that result->size is big enough */ 991 void 992 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 993 { 994 int i; 995 BIG_CHUNK_TYPE cy, ai; 996 997 if (offs == 0) { 998 if (result != aa) { 999 (void) big_copy(result, aa); 1000 } 1001 return; 1002 } 1003 cy = aa->value[0] >> offs; 1004 for (i = 1; i < aa->len; i++) { 1005 ai = aa->value[i]; 1006 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy; 1007 cy = ai >> offs; 1008 } 1009 result->len = aa->len; 1010 result->value[result->len - 1] = cy; 1011 result->sign = aa->sign; 1012 } 1013 1014 1015 /* 1016 * result = aa/bb remainder = aa mod bb 1017 * it is assumed that aa and bb are positive 1018 */ 1019 BIG_ERR_CODE 1020 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 1021 { 1022 BIG_ERR_CODE err = BIG_OK; 1023 int i, alen, blen, tlen, rlen, offs; 1024 BIG_CHUNK_TYPE higha, highb, coeff; 1025 BIG_CHUNK_TYPE *a, *b; 1026 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 1027 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1028 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 1029 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE]; 1030 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE]; 1031 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE]; 1032 1033 a = aa->value; 1034 b = bb->value; 1035 alen = aa->len; 1036 blen = bb->len; 1037 while ((alen > 1) && (a[alen - 1] == 0)) { 1038 alen = alen - 1; 1039 } 1040 aa->len = alen; 1041 while ((blen > 1) && (b[blen - 1] == 0)) { 1042 blen = blen - 1; 1043 } 1044 bb->len = blen; 1045 if ((blen == 1) && (b[0] == 0)) { 1046 return (BIG_DIV_BY_0); 1047 } 1048 1049 if (big_cmp_abs(aa, bb) < 0) { 1050 if ((remainder != NULL) && 1051 ((err = big_copy(remainder, aa)) != BIG_OK)) { 1052 return (err); 1053 } 1054 if (result != NULL) { 1055 result->len = 1; 1056 result->sign = 1; 1057 result->value[0] = 0; 1058 } 1059 return (BIG_OK); 1060 } 1061 1062 if ((err = big_init1(&bblow, blen + 1, 1063 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 1064 return (err); 1065 1066 if ((err = big_init1(&bbhigh, blen + 1, 1067 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 1068 goto ret1; 1069 1070 if ((err = big_init1(&tmp1, alen + 2, 1071 tmp1value, arraysize(tmp1value))) != BIG_OK) 1072 goto ret2; 1073 1074 if ((err = big_init1(&tmp2, blen + 2, 1075 tmp2value, arraysize(tmp2value))) != BIG_OK) 1076 goto ret3; 1077 1078 if ((err = big_init1(&tresult, alen - blen + 2, 1079 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 1080 goto ret4; 1081 1082 offs = 0; 1083 highb = b[blen - 1]; 1084 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) { 1085 highb = highb >> (BIG_CHUNK_SIZE / 2); 1086 offs = (BIG_CHUNK_SIZE / 2); 1087 } 1088 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) { 1089 highb = highb << 1; 1090 offs++; 1091 } 1092 1093 big_shiftleft(&bblow, bb, offs); 1094 1095 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) { 1096 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1097 } else { 1098 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1099 } 1100 if (bbhigh.value[bbhigh.len - 1] == 0) { 1101 bbhigh.len--; 1102 } else { 1103 bbhigh.value[bbhigh.len] = 0; 1104 } 1105 1106 highb = bblow.value[bblow.len - 1]; 1107 1108 big_shiftleft(&tmp1, aa, offs); 1109 rlen = tmp1.len - bblow.len + 1; 1110 tresult.len = rlen; 1111 1112 tmp1.len++; 1113 tlen = tmp1.len; 1114 tmp1.value[tmp1.len - 1] = 0; 1115 for (i = 0; i < rlen; i++) { 1116 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) + 1117 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2)); 1118 coeff = higha / (highb + 1); 1119 big_mulhalf_high(&tmp2, &bblow, coeff); 1120 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1121 bbhigh.len++; 1122 while (tmp1.value[tlen - 1] > 0) { 1123 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1124 coeff++; 1125 } 1126 bbhigh.len--; 1127 tlen--; 1128 tmp1.len--; 1129 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 1130 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1131 coeff++; 1132 } 1133 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2); 1134 higha = tmp1.value[tlen - 1]; 1135 coeff = higha / (highb + 1); 1136 big_mulhalf_low(&tmp2, &bblow, coeff); 1137 tmp2.len--; 1138 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1139 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 1140 big_sub_pos_high(&tmp1, &tmp1, &bblow); 1141 coeff++; 1142 } 1143 tresult.value[rlen - i - 1] = 1144 tresult.value[rlen - i - 1] + coeff; 1145 } 1146 1147 big_shiftright(&tmp1, &tmp1, offs); 1148 1149 err = BIG_OK; 1150 1151 if ((remainder != NULL) && 1152 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 1153 goto ret; 1154 1155 if (result != NULL) 1156 err = big_copy(result, &tresult); 1157 1158 ret: 1159 big_finish(&tresult); 1160 ret4: 1161 big_finish(&tmp1); 1162 ret3: 1163 big_finish(&tmp2); 1164 ret2: 1165 big_finish(&bbhigh); 1166 ret1: 1167 big_finish(&bblow); 1168 return (err); 1169 } 1170 1171 1172 /* 1173 * If there is no processor-specific integer implementation of 1174 * the lower level multiply functions, then this code is provided 1175 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1176 * big_sqr_vec(). 1177 * 1178 * There are two generic implementations. One that assumes that 1179 * there is hardware and C compiler support for a 32 x 32 --> 64 1180 * bit unsigned multiply, but otherwise is not specific to any 1181 * processor, platform, or ISA. 1182 * 1183 * The other makes very few assumptions about hardware capabilities. 1184 * It does not even assume that there is any implementation of a 1185 * 32 x 32 --> 64 bit multiply that is accessible to C code and 1186 * appropriate to use. It falls constructs 32 x 32 --> 64 bit 1187 * multiplies from 16 x 16 --> 32 bit multiplies. 1188 * 1189 */ 1190 1191 #if !defined(PSR_MUL) 1192 1193 #ifdef UMUL64 1194 1195 #if (BIG_CHUNK_SIZE == 32) 1196 1197 #define UNROLL8 1198 1199 #define MUL_SET_VEC_ROUND_PREFETCH(R) \ 1200 p = pf * d; \ 1201 pf = (uint64_t)a[R + 1]; \ 1202 t = p + cy; \ 1203 r[R] = (uint32_t)t; \ 1204 cy = t >> 32 1205 1206 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \ 1207 p = pf * d; \ 1208 t = p + cy; \ 1209 r[R] = (uint32_t)t; \ 1210 cy = t >> 32 1211 1212 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \ 1213 t = (uint64_t)r[R]; \ 1214 p = pf * d; \ 1215 pf = (uint64_t)a[R + 1]; \ 1216 t = p + t + cy; \ 1217 r[R] = (uint32_t)t; \ 1218 cy = t >> 32 1219 1220 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \ 1221 t = (uint64_t)r[R]; \ 1222 p = pf * d; \ 1223 t = p + t + cy; \ 1224 r[R] = (uint32_t)t; \ 1225 cy = t >> 32 1226 1227 #ifdef UNROLL8 1228 1229 #define UNROLL 8 1230 1231 /* 1232 * r = a * b 1233 * where r and a are vectors; b is a single 32-bit digit 1234 */ 1235 1236 uint32_t 1237 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1238 { 1239 uint64_t d, pf, p, t, cy; 1240 1241 if (len == 0) 1242 return (0); 1243 cy = 0; 1244 d = (uint64_t)b; 1245 pf = (uint64_t)a[0]; 1246 while (len > UNROLL) { 1247 MUL_SET_VEC_ROUND_PREFETCH(0); 1248 MUL_SET_VEC_ROUND_PREFETCH(1); 1249 MUL_SET_VEC_ROUND_PREFETCH(2); 1250 MUL_SET_VEC_ROUND_PREFETCH(3); 1251 MUL_SET_VEC_ROUND_PREFETCH(4); 1252 MUL_SET_VEC_ROUND_PREFETCH(5); 1253 MUL_SET_VEC_ROUND_PREFETCH(6); 1254 MUL_SET_VEC_ROUND_PREFETCH(7); 1255 r += UNROLL; 1256 a += UNROLL; 1257 len -= UNROLL; 1258 } 1259 if (len == UNROLL) { 1260 MUL_SET_VEC_ROUND_PREFETCH(0); 1261 MUL_SET_VEC_ROUND_PREFETCH(1); 1262 MUL_SET_VEC_ROUND_PREFETCH(2); 1263 MUL_SET_VEC_ROUND_PREFETCH(3); 1264 MUL_SET_VEC_ROUND_PREFETCH(4); 1265 MUL_SET_VEC_ROUND_PREFETCH(5); 1266 MUL_SET_VEC_ROUND_PREFETCH(6); 1267 MUL_SET_VEC_ROUND_NOPREFETCH(7); 1268 return ((uint32_t)cy); 1269 } 1270 while (len > 1) { 1271 MUL_SET_VEC_ROUND_PREFETCH(0); 1272 ++r; 1273 ++a; 1274 --len; 1275 } 1276 if (len > 0) { 1277 MUL_SET_VEC_ROUND_NOPREFETCH(0); 1278 } 1279 return ((uint32_t)cy); 1280 } 1281 1282 /* 1283 * r += a * b 1284 * where r and a are vectors; b is a single 32-bit digit 1285 */ 1286 1287 uint32_t 1288 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1289 { 1290 uint64_t d, pf, p, t, cy; 1291 1292 if (len == 0) 1293 return (0); 1294 cy = 0; 1295 d = (uint64_t)b; 1296 pf = (uint64_t)a[0]; 1297 while (len > 8) { 1298 MUL_ADD_VEC_ROUND_PREFETCH(0); 1299 MUL_ADD_VEC_ROUND_PREFETCH(1); 1300 MUL_ADD_VEC_ROUND_PREFETCH(2); 1301 MUL_ADD_VEC_ROUND_PREFETCH(3); 1302 MUL_ADD_VEC_ROUND_PREFETCH(4); 1303 MUL_ADD_VEC_ROUND_PREFETCH(5); 1304 MUL_ADD_VEC_ROUND_PREFETCH(6); 1305 MUL_ADD_VEC_ROUND_PREFETCH(7); 1306 r += 8; 1307 a += 8; 1308 len -= 8; 1309 } 1310 if (len == 8) { 1311 MUL_ADD_VEC_ROUND_PREFETCH(0); 1312 MUL_ADD_VEC_ROUND_PREFETCH(1); 1313 MUL_ADD_VEC_ROUND_PREFETCH(2); 1314 MUL_ADD_VEC_ROUND_PREFETCH(3); 1315 MUL_ADD_VEC_ROUND_PREFETCH(4); 1316 MUL_ADD_VEC_ROUND_PREFETCH(5); 1317 MUL_ADD_VEC_ROUND_PREFETCH(6); 1318 MUL_ADD_VEC_ROUND_NOPREFETCH(7); 1319 return ((uint32_t)cy); 1320 } 1321 while (len > 1) { 1322 MUL_ADD_VEC_ROUND_PREFETCH(0); 1323 ++r; 1324 ++a; 1325 --len; 1326 } 1327 if (len > 0) { 1328 MUL_ADD_VEC_ROUND_NOPREFETCH(0); 1329 } 1330 return ((uint32_t)cy); 1331 } 1332 #endif /* UNROLL8 */ 1333 1334 void 1335 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1336 { 1337 uint32_t *tr, *ta; 1338 int tlen, row, col; 1339 uint64_t p, s, t, t2, cy; 1340 uint32_t d; 1341 1342 tr = r + 1; 1343 ta = a; 1344 tlen = len - 1; 1345 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]); 1346 while (--tlen > 0) { 1347 tr += 2; 1348 ++ta; 1349 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]); 1350 } 1351 s = (uint64_t)a[0]; 1352 s = s * s; 1353 r[0] = (uint32_t)s; 1354 cy = s >> 32; 1355 p = ((uint64_t)r[1] << 1) + cy; 1356 r[1] = (uint32_t)p; 1357 cy = p >> 32; 1358 row = 1; 1359 col = 2; 1360 while (row < len) { 1361 s = (uint64_t)a[row]; 1362 s = s * s; 1363 p = (uint64_t)r[col] << 1; 1364 t = p + s; 1365 d = (uint32_t)t; 1366 t2 = (uint64_t)d + cy; 1367 r[col] = (uint32_t)t2; 1368 cy = (t >> 32) + (t2 >> 32); 1369 if (row == len - 1) 1370 break; 1371 p = ((uint64_t)r[col + 1] << 1) + cy; 1372 r[col + 1] = (uint32_t)p; 1373 cy = p >> 32; 1374 ++row; 1375 col += 2; 1376 } 1377 r[col + 1] = (uint32_t)cy; 1378 } 1379 1380 #else /* BIG_CHUNK_SIZE == 64 */ 1381 1382 /* 1383 * r = r + a * digit, r and a are vectors of length len 1384 * returns the carry digit 1385 */ 1386 BIG_CHUNK_TYPE 1387 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1388 BIG_CHUNK_TYPE digit) 1389 { 1390 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh; 1391 int i; 1392 1393 cy1 = 0; 1394 dlow = digit & BIG_CHUNK_LOWHALFBITS; 1395 dhigh = digit >> (BIG_CHUNK_SIZE / 2); 1396 for (i = 0; i < len; i++) { 1397 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1398 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1399 (r[i] & BIG_CHUNK_LOWHALFBITS); 1400 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1401 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1402 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1403 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) | 1404 (cy1 << (BIG_CHUNK_SIZE / 2)); 1405 } 1406 retcy = cy1 >> (BIG_CHUNK_SIZE / 2); 1407 1408 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS; 1409 for (i = 0; i < len - 1; i++) { 1410 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1411 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1412 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1413 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1414 (cy << (BIG_CHUNK_SIZE / 2)); 1415 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1416 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1417 (r[i + 1] & BIG_CHUNK_LOWHALFBITS); 1418 } 1419 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1420 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) + 1421 (r[len - 1] >> (BIG_CHUNK_SIZE / 2)); 1422 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1423 (cy << (BIG_CHUNK_SIZE / 2)); 1424 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) + 1425 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy; 1426 1427 return (retcy); 1428 } 1429 1430 1431 /* 1432 * r = a * digit, r and a are vectors of length len 1433 * returns the carry digit 1434 */ 1435 BIG_CHUNK_TYPE 1436 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1437 BIG_CHUNK_TYPE digit) 1438 { 1439 int i; 1440 1441 ASSERT(r != a); 1442 for (i = 0; i < len; i++) { 1443 r[i] = 0; 1444 } 1445 return (big_mul_add_vec(r, a, len, digit)); 1446 } 1447 1448 void 1449 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len) 1450 { 1451 int i; 1452 1453 ASSERT(r != a); 1454 r[len] = big_mul_set_vec(r, a, len, a[0]); 1455 for (i = 1; i < len; ++i) 1456 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1457 } 1458 1459 #endif /* BIG_CHUNK_SIZE == 32/64 */ 1460 1461 1462 #else /* ! UMUL64 */ 1463 1464 #if (BIG_CHUNK_SIZE != 32) 1465 #error Don't use 64-bit chunks without defining UMUL64 1466 #endif 1467 1468 1469 /* 1470 * r = r + a * digit, r and a are vectors of length len 1471 * returns the carry digit 1472 */ 1473 uint32_t 1474 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1475 { 1476 uint32_t cy, cy1, retcy, dlow, dhigh; 1477 int i; 1478 1479 cy1 = 0; 1480 dlow = digit & 0xffff; 1481 dhigh = digit >> 16; 1482 for (i = 0; i < len; i++) { 1483 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff); 1484 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16); 1485 r[i] = (cy & 0xffff) | (cy1 << 16); 1486 } 1487 retcy = cy1 >> 16; 1488 1489 cy1 = r[0] & 0xffff; 1490 for (i = 0; i < len - 1; i++) { 1491 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16); 1492 r[i] = (cy1 & 0xffff) | (cy << 16); 1493 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff); 1494 } 1495 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16); 1496 r[len - 1] = (cy1 & 0xffff) | (cy << 16); 1497 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy; 1498 1499 return (retcy); 1500 } 1501 1502 1503 /* 1504 * r = a * digit, r and a are vectors of length len 1505 * returns the carry digit 1506 */ 1507 uint32_t 1508 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1509 { 1510 int i; 1511 1512 ASSERT(r != a); 1513 for (i = 0; i < len; i++) { 1514 r[i] = 0; 1515 } 1516 1517 return (big_mul_add_vec(r, a, len, digit)); 1518 } 1519 1520 void 1521 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1522 { 1523 int i; 1524 1525 ASSERT(r != a); 1526 r[len] = big_mul_set_vec(r, a, len, a[0]); 1527 for (i = 1; i < len; ++i) 1528 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1529 } 1530 1531 #endif /* UMUL64 */ 1532 1533 void 1534 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen, 1535 BIG_CHUNK_TYPE *b, int blen) 1536 { 1537 int i; 1538 1539 r[alen] = big_mul_set_vec(r, a, alen, b[0]); 1540 for (i = 1; i < blen; ++i) 1541 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]); 1542 } 1543 1544 1545 #endif /* ! PSR_MUL */ 1546 1547 1548 /* 1549 * result = aa * bb result->value should be big enough to hold the result 1550 * 1551 * Implementation: Standard grammar school algorithm 1552 * 1553 */ 1554 BIG_ERR_CODE 1555 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 1556 { 1557 BIGNUM tmp1; 1558 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1559 BIG_CHUNK_TYPE *r, *t, *a, *b; 1560 BIG_ERR_CODE err; 1561 int i, alen, blen, rsize, sign, diff; 1562 1563 if (aa == bb) { 1564 diff = 0; 1565 } else { 1566 diff = big_cmp_abs(aa, bb); 1567 if (diff < 0) { 1568 BIGNUM *tt; 1569 tt = aa; 1570 aa = bb; 1571 bb = tt; 1572 } 1573 } 1574 a = aa->value; 1575 b = bb->value; 1576 alen = aa->len; 1577 blen = bb->len; 1578 while ((alen > 1) && (a[alen - 1] == 0)) { 1579 alen--; 1580 } 1581 aa->len = alen; 1582 while ((blen > 1) && (b[blen - 1] == 0)) { 1583 blen--; 1584 } 1585 bb->len = blen; 1586 1587 rsize = alen + blen; 1588 ASSERT(rsize > 0); 1589 if (result->size < rsize) { 1590 err = big_extend(result, rsize); 1591 if (err != BIG_OK) { 1592 return (err); 1593 } 1594 /* aa or bb might be an alias to result */ 1595 a = aa->value; 1596 b = bb->value; 1597 } 1598 r = result->value; 1599 1600 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) { 1601 result->len = 1; 1602 result->sign = 1; 1603 r[0] = 0; 1604 return (BIG_OK); 1605 } 1606 sign = aa->sign * bb->sign; 1607 if ((alen == 1) && (a[0] == 1)) { 1608 for (i = 0; i < blen; i++) { 1609 r[i] = b[i]; 1610 } 1611 result->len = blen; 1612 result->sign = sign; 1613 return (BIG_OK); 1614 } 1615 if ((blen == 1) && (b[0] == 1)) { 1616 for (i = 0; i < alen; i++) { 1617 r[i] = a[i]; 1618 } 1619 result->len = alen; 1620 result->sign = sign; 1621 return (BIG_OK); 1622 } 1623 1624 if ((err = big_init1(&tmp1, rsize, 1625 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1626 return (err); 1627 } 1628 t = tmp1.value; 1629 1630 for (i = 0; i < rsize; i++) { 1631 t[i] = 0; 1632 } 1633 1634 if (diff == 0 && alen > 2) { 1635 BIG_SQR_VEC(t, a, alen); 1636 } else if (blen > 0) { 1637 BIG_MUL_VEC(t, a, alen, b, blen); 1638 } 1639 1640 if (t[rsize - 1] == 0) { 1641 tmp1.len = rsize - 1; 1642 } else { 1643 tmp1.len = rsize; 1644 } 1645 1646 err = big_copy(result, &tmp1); 1647 1648 result->sign = sign; 1649 1650 big_finish(&tmp1); 1651 1652 return (err); 1653 } 1654 1655 1656 /* 1657 * big_mont_mul() 1658 * Montgomery multiplication. 1659 * 1660 * Caller must ensure that a < n, b < n, ret->size >= 2 * n->len + 1, 1661 * and that ret is not n. 1662 */ 1663 BIG_ERR_CODE 1664 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0) 1665 { 1666 int i, j, nlen, needsubtract; 1667 BIG_CHUNK_TYPE *nn, *rr, *rrplusi; 1668 BIG_CHUNK_TYPE digit, c; 1669 BIG_ERR_CODE err; 1670 #ifdef __amd64 1671 #define BIG_CPU_UNKNOWN 0 1672 #define BIG_CPU_AMD 1 1673 #define BIG_CPU_INTEL 2 1674 static int big_cpu = BIG_CPU_UNKNOWN; 1675 BIG_CHUNK_TYPE carry[BIGTMPSIZE]; 1676 1677 if (big_cpu == BIG_CPU_UNKNOWN) { 1678 big_cpu = 1 + bignum_on_intel(); 1679 } 1680 #endif /* __amd64 */ 1681 1682 nlen = n->len; 1683 nn = n->value; 1684 1685 rr = ret->value; 1686 1687 if ((err = big_mul(ret, a, b)) != BIG_OK) { 1688 return (err); 1689 } 1690 1691 rr = ret->value; 1692 for (i = ret->len; i < 2 * nlen + 1; i++) { 1693 rr[i] = 0; 1694 } 1695 1696 #ifdef __amd64 /* pipelining optimization for Intel 64, but not AMD64 */ 1697 if ((big_cpu == BIG_CPU_INTEL) && (nlen <= BIGTMPSIZE)) { 1698 /* 1699 * Perform the following in two for loops to reduce the 1700 * dependency between computing the carryover bits with 1701 * BIG_MUL_ADD_VEC() and adding them, thus improving pipelining. 1702 */ 1703 for (i = 0; i < nlen; i++) { 1704 rrplusi = rr + i; 1705 digit = *rrplusi * n0; 1706 carry[i] = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit); 1707 } 1708 for (i = 0; i < nlen; i++) { 1709 j = i + nlen; 1710 rr[j] += carry[i]; 1711 while (rr[j] < carry[i]) { 1712 rr[++j] += 1; 1713 carry[i] = 1; 1714 } 1715 } 1716 } else 1717 #endif /* __amd64 */ 1718 { /* no pipelining optimization */ 1719 for (i = 0; i < nlen; i++) { 1720 rrplusi = rr + i; 1721 digit = *rrplusi * n0; 1722 c = BIG_MUL_ADD_VEC(rrplusi, nn, nlen, digit); 1723 j = i + nlen; 1724 rr[j] += c; 1725 while (rr[j] < c) { 1726 rr[++j] += 1; 1727 c = 1; 1728 } 1729 } 1730 } 1731 1732 needsubtract = 0; 1733 if ((rr[2 * nlen] != 0)) 1734 needsubtract = 1; 1735 else { 1736 for (i = 2 * nlen - 1; i >= nlen; i--) { 1737 if (rr[i] > nn[i - nlen]) { 1738 needsubtract = 1; 1739 break; 1740 } else if (rr[i] < nn[i - nlen]) { 1741 break; 1742 } 1743 } 1744 } 1745 if (needsubtract) 1746 big_sub_vec(rr, rr + nlen, nn, nlen); 1747 else { 1748 for (i = 0; i < nlen; i++) { 1749 rr[i] = rr[i + nlen]; 1750 } 1751 } 1752 1753 /* Remove leading zeros, but keep at least 1 digit: */ 1754 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--) 1755 ; 1756 ret->len = i + 1; 1757 1758 return (BIG_OK); 1759 } 1760 1761 1762 BIG_CHUNK_TYPE 1763 big_n0(BIG_CHUNK_TYPE n) 1764 { 1765 int i; 1766 BIG_CHUNK_TYPE result, tmp; 1767 1768 result = 0; 1769 tmp = BIG_CHUNK_ALLBITS; 1770 for (i = 0; i < BIG_CHUNK_SIZE; i++) { 1771 if ((tmp & 1) == 1) { 1772 result = (result >> 1) | BIG_CHUNK_HIGHBIT; 1773 tmp = tmp - n; 1774 } else { 1775 result = (result >> 1); 1776 } 1777 tmp = tmp >> 1; 1778 } 1779 1780 return (result); 1781 } 1782 1783 1784 int 1785 big_numbits(BIGNUM *n) 1786 { 1787 int i, j; 1788 BIG_CHUNK_TYPE t; 1789 1790 for (i = n->len - 1; i > 0; i--) { 1791 if (n->value[i] != 0) { 1792 break; 1793 } 1794 } 1795 t = n->value[i]; 1796 for (j = BIG_CHUNK_SIZE; j > 0; j--) { 1797 if ((t & BIG_CHUNK_HIGHBIT) == 0) { 1798 t = t << 1; 1799 } else { 1800 return (BIG_CHUNK_SIZE * i + j); 1801 } 1802 } 1803 return (0); 1804 } 1805 1806 1807 /* caller must make sure that a < n */ 1808 BIG_ERR_CODE 1809 big_mont_rr(BIGNUM *result, BIGNUM *n) 1810 { 1811 BIGNUM rr; 1812 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1813 int len, i; 1814 BIG_ERR_CODE err; 1815 1816 rr.malloced = 0; 1817 len = n->len; 1818 1819 if ((err = big_init1(&rr, 2 * len + 1, 1820 rrvalue, arraysize(rrvalue))) != BIG_OK) { 1821 return (err); 1822 } 1823 1824 for (i = 0; i < 2 * len; i++) { 1825 rr.value[i] = 0; 1826 } 1827 rr.value[2 * len] = 1; 1828 rr.len = 2 * len + 1; 1829 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1830 goto ret; 1831 } 1832 err = big_copy(result, &rr); 1833 ret: 1834 big_finish(&rr); 1835 return (err); 1836 } 1837 1838 1839 /* caller must make sure that a < n */ 1840 BIG_ERR_CODE 1841 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0, 1842 BIGNUM *n_rr) 1843 { 1844 BIGNUM rr; 1845 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1846 int len, i; 1847 BIG_ERR_CODE err; 1848 1849 rr.malloced = 0; 1850 len = n->len; 1851 1852 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue))) 1853 != BIG_OK) { 1854 return (err); 1855 } 1856 1857 if (n_rr == NULL) { 1858 for (i = 0; i < 2 * len; i++) { 1859 rr.value[i] = 0; 1860 } 1861 rr.value[2 * len] = 1; 1862 rr.len = 2 * len + 1; 1863 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1864 goto ret; 1865 } 1866 n_rr = &rr; 1867 } 1868 1869 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) { 1870 goto ret; 1871 } 1872 err = big_copy(result, &rr); 1873 1874 ret: 1875 big_finish(&rr); 1876 return (err); 1877 } 1878 1879 1880 #ifdef USE_FLOATING_POINT 1881 #define big_modexp_ncp_float big_modexp_ncp_sw 1882 #else 1883 #define big_modexp_ncp_int big_modexp_ncp_sw 1884 #endif 1885 1886 #define MAX_EXP_BIT_GROUP_SIZE 6 1887 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1)) 1888 1889 /* ARGSUSED */ 1890 static BIG_ERR_CODE 1891 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1892 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1893 1894 { 1895 BIGNUM apowers[APOWERS_MAX_SIZE]; 1896 BIGNUM tmp1; 1897 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1898 int i, j, k, l, m, p; 1899 uint32_t bit, bitind, bitcount, groupbits, apowerssize; 1900 uint32_t nbits; 1901 BIG_ERR_CODE err; 1902 1903 nbits = big_numbits(e); 1904 if (nbits < 50) { 1905 groupbits = 1; 1906 apowerssize = 1; 1907 } else { 1908 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1909 apowerssize = 1 << (groupbits - 1); 1910 } 1911 1912 1913 if ((err = big_init1(&tmp1, 2 * n->len + 1, 1914 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1915 return (err); 1916 } 1917 1918 /* clear the malloced bit to help cleanup */ 1919 for (i = 0; i < apowerssize; i++) { 1920 apowers[i].malloced = 0; 1921 } 1922 1923 for (i = 0; i < apowerssize; i++) { 1924 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) != 1925 BIG_OK) { 1926 goto ret; 1927 } 1928 } 1929 1930 (void) big_copy(&(apowers[0]), ma); 1931 1932 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) { 1933 goto ret; 1934 } 1935 (void) big_copy(ma, &tmp1); 1936 1937 for (i = 1; i < apowerssize; i++) { 1938 if ((err = big_mont_mul(&tmp1, ma, 1939 &(apowers[i - 1]), n, n0)) != BIG_OK) { 1940 goto ret; 1941 } 1942 (void) big_copy(&apowers[i], &tmp1); 1943 } 1944 1945 bitind = nbits % BIG_CHUNK_SIZE; 1946 k = 0; 1947 l = 0; 1948 p = 0; 1949 bitcount = 0; 1950 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 1951 for (j = bitind - 1; j >= 0; j--) { 1952 bit = (e->value[i] >> j) & 1; 1953 if ((bitcount == 0) && (bit == 0)) { 1954 if ((err = big_mont_mul(tmp, 1955 tmp, tmp, n, n0)) != BIG_OK) { 1956 goto ret; 1957 } 1958 } else { 1959 bitcount++; 1960 p = p * 2 + bit; 1961 if (bit == 1) { 1962 k = k + l + 1; 1963 l = 0; 1964 } else { 1965 l++; 1966 } 1967 if (bitcount == groupbits) { 1968 for (m = 0; m < k; m++) { 1969 if ((err = big_mont_mul(tmp, 1970 tmp, tmp, n, n0)) != 1971 BIG_OK) { 1972 goto ret; 1973 } 1974 } 1975 if ((err = big_mont_mul(tmp, tmp, 1976 &(apowers[p >> (l + 1)]), 1977 n, n0)) != BIG_OK) { 1978 goto ret; 1979 } 1980 for (m = 0; m < l; m++) { 1981 if ((err = big_mont_mul(tmp, 1982 tmp, tmp, n, n0)) != 1983 BIG_OK) { 1984 goto ret; 1985 } 1986 } 1987 k = 0; 1988 l = 0; 1989 p = 0; 1990 bitcount = 0; 1991 } 1992 } 1993 } 1994 bitind = BIG_CHUNK_SIZE; 1995 } 1996 1997 for (m = 0; m < k; m++) { 1998 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) { 1999 goto ret; 2000 } 2001 } 2002 if (p != 0) { 2003 if ((err = big_mont_mul(tmp, tmp, 2004 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) { 2005 goto ret; 2006 } 2007 } 2008 for (m = 0; m < l; m++) { 2009 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) { 2010 goto ret; 2011 } 2012 } 2013 2014 ret: 2015 for (i = apowerssize - 1; i >= 0; i--) { 2016 big_finish(&(apowers[i])); 2017 } 2018 big_finish(&tmp1); 2019 2020 return (err); 2021 } 2022 2023 2024 #ifdef USE_FLOATING_POINT 2025 2026 #ifdef _KERNEL 2027 2028 #include <sys/sysmacros.h> 2029 #include <sys/regset.h> 2030 #include <sys/fpu/fpusystm.h> 2031 2032 /* the alignment for block stores to save fp registers */ 2033 #define FPR_ALIGN (64) 2034 2035 extern void big_savefp(kfpu_t *); 2036 extern void big_restorefp(kfpu_t *); 2037 2038 #endif /* _KERNEL */ 2039 2040 /* 2041 * This version makes use of floating point for performance 2042 */ 2043 static BIG_ERR_CODE 2044 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 2045 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 2046 { 2047 2048 int i, j, k, l, m, p; 2049 uint32_t bit, bitind, bitcount, nlen; 2050 double dn0; 2051 double *dn, *dt, *d16r, *d32r; 2052 uint32_t *nint, *prod; 2053 double *apowers[APOWERS_MAX_SIZE]; 2054 uint32_t nbits, groupbits, apowerssize; 2055 BIG_ERR_CODE err = BIG_OK; 2056 2057 #ifdef _KERNEL 2058 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN]; 2059 kfpu_t *fpu; 2060 2061 #ifdef DEBUG 2062 if (!fpu_exists) 2063 return (BIG_GENERAL_ERR); 2064 #endif 2065 2066 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN); 2067 big_savefp(fpu); 2068 2069 #endif /* _KERNEL */ 2070 2071 nbits = big_numbits(e); 2072 if (nbits < 50) { 2073 groupbits = 1; 2074 apowerssize = 1; 2075 } else { 2076 groupbits = MAX_EXP_BIT_GROUP_SIZE; 2077 apowerssize = 1 << (groupbits - 1); 2078 } 2079 2080 nlen = (BIG_CHUNK_SIZE / 32) * n->len; 2081 dn0 = (double)(n0 & 0xffff); 2082 2083 dn = dt = d16r = d32r = NULL; 2084 nint = prod = NULL; 2085 for (i = 0; i < apowerssize; i++) { 2086 apowers[i] = NULL; 2087 } 2088 2089 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) { 2090 err = BIG_NO_MEM; 2091 goto ret; 2092 } 2093 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) { 2094 err = BIG_NO_MEM; 2095 goto ret; 2096 } 2097 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) { 2098 err = BIG_NO_MEM; 2099 goto ret; 2100 } 2101 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) { 2102 err = BIG_NO_MEM; 2103 goto ret; 2104 } 2105 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) { 2106 err = BIG_NO_MEM; 2107 goto ret; 2108 } 2109 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) { 2110 err = BIG_NO_MEM; 2111 goto ret; 2112 } 2113 for (i = 0; i < apowerssize; i++) { 2114 if ((apowers[i] = big_malloc((2 * nlen + 1) * 2115 sizeof (double))) == NULL) { 2116 err = BIG_NO_MEM; 2117 goto ret; 2118 } 2119 } 2120 2121 #if (BIG_CHUNK_SIZE == 32) 2122 for (i = 0; i < ma->len; i++) { 2123 nint[i] = ma->value[i]; 2124 } 2125 for (; i < nlen; i++) { 2126 nint[i] = 0; 2127 } 2128 #else 2129 for (i = 0; i < ma->len; i++) { 2130 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL); 2131 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32); 2132 } 2133 for (i = ma->len * 2; i < nlen; i++) { 2134 nint[i] = 0; 2135 } 2136 #endif 2137 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen); 2138 2139 #if (BIG_CHUNK_SIZE == 32) 2140 for (i = 0; i < n->len; i++) { 2141 nint[i] = n->value[i]; 2142 } 2143 for (; i < nlen; i++) { 2144 nint[i] = 0; 2145 } 2146 #else 2147 for (i = 0; i < n->len; i++) { 2148 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL); 2149 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32); 2150 } 2151 for (i = n->len * 2; i < nlen; i++) { 2152 nint[i] = 0; 2153 } 2154 #endif 2155 conv_i32_to_d32(dn, nint, nlen); 2156 2157 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0); 2158 conv_i32_to_d32(d32r, prod, nlen); 2159 for (i = 1; i < apowerssize; i++) { 2160 mont_mulf_noconv(prod, d32r, apowers[i - 1], 2161 dt, dn, nint, nlen, dn0); 2162 conv_i32_to_d16(apowers[i], prod, nlen); 2163 } 2164 2165 #if (BIG_CHUNK_SIZE == 32) 2166 for (i = 0; i < tmp->len; i++) { 2167 prod[i] = tmp->value[i]; 2168 } 2169 for (; i < nlen + 1; i++) { 2170 prod[i] = 0; 2171 } 2172 #else 2173 for (i = 0; i < tmp->len; i++) { 2174 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL); 2175 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32); 2176 } 2177 for (i = tmp->len * 2; i < nlen + 1; i++) { 2178 prod[i] = 0; 2179 } 2180 #endif 2181 2182 bitind = nbits % BIG_CHUNK_SIZE; 2183 k = 0; 2184 l = 0; 2185 p = 0; 2186 bitcount = 0; 2187 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 2188 for (j = bitind - 1; j >= 0; j--) { 2189 bit = (e->value[i] >> j) & 1; 2190 if ((bitcount == 0) && (bit == 0)) { 2191 conv_i32_to_d32_and_d16(d32r, d16r, 2192 prod, nlen); 2193 mont_mulf_noconv(prod, d32r, d16r, 2194 dt, dn, nint, nlen, dn0); 2195 } else { 2196 bitcount++; 2197 p = p * 2 + bit; 2198 if (bit == 1) { 2199 k = k + l + 1; 2200 l = 0; 2201 } else { 2202 l++; 2203 } 2204 if (bitcount == groupbits) { 2205 for (m = 0; m < k; m++) { 2206 conv_i32_to_d32_and_d16(d32r, 2207 d16r, prod, nlen); 2208 mont_mulf_noconv(prod, d32r, 2209 d16r, dt, dn, nint, 2210 nlen, dn0); 2211 } 2212 conv_i32_to_d32(d32r, prod, nlen); 2213 mont_mulf_noconv(prod, d32r, 2214 apowers[p >> (l + 1)], 2215 dt, dn, nint, nlen, dn0); 2216 for (m = 0; m < l; m++) { 2217 conv_i32_to_d32_and_d16(d32r, 2218 d16r, prod, nlen); 2219 mont_mulf_noconv(prod, d32r, 2220 d16r, dt, dn, nint, 2221 nlen, dn0); 2222 } 2223 k = 0; 2224 l = 0; 2225 p = 0; 2226 bitcount = 0; 2227 } 2228 } 2229 } 2230 bitind = BIG_CHUNK_SIZE; 2231 } 2232 2233 for (m = 0; m < k; m++) { 2234 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2235 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2236 } 2237 if (p != 0) { 2238 conv_i32_to_d32(d32r, prod, nlen); 2239 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)], 2240 dt, dn, nint, nlen, dn0); 2241 } 2242 for (m = 0; m < l; m++) { 2243 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2244 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2245 } 2246 2247 #if (BIG_CHUNK_SIZE == 32) 2248 for (i = 0; i < nlen; i++) { 2249 result->value[i] = prod[i]; 2250 } 2251 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--) 2252 ; 2253 #else 2254 for (i = 0; i < nlen / 2; i++) { 2255 result->value[i] = (uint64_t)(prod[2 * i]) + 2256 (((uint64_t)(prod[2 * i + 1])) << 32); 2257 } 2258 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--) 2259 ; 2260 #endif 2261 result->len = i + 1; 2262 2263 ret: 2264 for (i = apowerssize - 1; i >= 0; i--) { 2265 if (apowers[i] != NULL) 2266 big_free(apowers[i], (2 * nlen + 1) * sizeof (double)); 2267 } 2268 if (d32r != NULL) { 2269 big_free(d32r, nlen * sizeof (double)); 2270 } 2271 if (d16r != NULL) { 2272 big_free(d16r, (2 * nlen + 1) * sizeof (double)); 2273 } 2274 if (prod != NULL) { 2275 big_free(prod, (nlen + 1) * sizeof (uint32_t)); 2276 } 2277 if (nint != NULL) { 2278 big_free(nint, nlen * sizeof (uint32_t)); 2279 } 2280 if (dt != NULL) { 2281 big_free(dt, (4 * nlen + 2) * sizeof (double)); 2282 } 2283 if (dn != NULL) { 2284 big_free(dn, nlen * sizeof (double)); 2285 } 2286 2287 #ifdef _KERNEL 2288 big_restorefp(fpu); 2289 #endif 2290 2291 return (err); 2292 } 2293 2294 #endif /* USE_FLOATING_POINT */ 2295 2296 2297 BIG_ERR_CODE 2298 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr, 2299 big_modexp_ncp_info_t *info) 2300 { 2301 BIGNUM ma, tmp, rr; 2302 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE]; 2303 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2304 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 2305 BIG_ERR_CODE err; 2306 BIG_CHUNK_TYPE n0; 2307 2308 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) != 2309 BIG_OK) { 2310 return (err); 2311 } 2312 ma.len = 1; 2313 ma.value[0] = 0; 2314 2315 if ((err = big_init1(&tmp, 2 * n->len + 1, 2316 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2317 goto ret1; 2318 } 2319 2320 /* clear the malloced bit to help cleanup */ 2321 rr.malloced = 0; 2322 2323 if (n_rr == NULL) { 2324 if ((err = big_init1(&rr, 2 * n->len + 1, 2325 rrvalue, arraysize(rrvalue))) != BIG_OK) { 2326 goto ret2; 2327 } 2328 if (big_mont_rr(&rr, n) != BIG_OK) { 2329 goto ret; 2330 } 2331 n_rr = &rr; 2332 } 2333 2334 n0 = big_n0(n->value[0]); 2335 2336 if (big_cmp_abs(a, n) > 0) { 2337 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) { 2338 goto ret; 2339 } 2340 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 2341 } else { 2342 err = big_mont_conv(&ma, a, n, n0, n_rr); 2343 } 2344 if (err != BIG_OK) { 2345 goto ret; 2346 } 2347 2348 tmp.len = 1; 2349 tmp.value[0] = 1; 2350 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) { 2351 goto ret; 2352 } 2353 2354 if ((info != NULL) && (info->func != NULL)) { 2355 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0, 2356 info->ncp, info->reqp); 2357 } else { 2358 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0); 2359 } 2360 if (err != BIG_OK) { 2361 goto ret; 2362 } 2363 2364 ma.value[0] = 1; 2365 ma.len = 1; 2366 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) { 2367 goto ret; 2368 } 2369 err = big_copy(result, &tmp); 2370 2371 ret: 2372 if (rr.malloced) { 2373 big_finish(&rr); 2374 } 2375 ret2: 2376 big_finish(&tmp); 2377 ret1: 2378 big_finish(&ma); 2379 2380 return (err); 2381 } 2382 2383 BIG_ERR_CODE 2384 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 2385 { 2386 return (big_modexp_ext(result, a, e, n, n_rr, NULL)); 2387 } 2388 2389 2390 BIG_ERR_CODE 2391 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2392 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2393 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info) 2394 { 2395 BIGNUM ap, aq, tmp; 2396 int alen, biglen, sign; 2397 BIG_ERR_CODE err; 2398 2399 if (p->len > q->len) { 2400 biglen = p->len; 2401 } else { 2402 biglen = q->len; 2403 } 2404 2405 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) { 2406 return (err); 2407 } 2408 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) { 2409 goto ret1; 2410 } 2411 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) { 2412 goto ret2; 2413 } 2414 2415 /* 2416 * check whether a is too short - to avoid timing attacks 2417 */ 2418 alen = a->len; 2419 while ((alen > p->len) && (a->value[alen - 1] == 0)) { 2420 alen--; 2421 } 2422 if (alen < p->len + q->len) { 2423 /* 2424 * a is too short, add p*q to it before 2425 * taking it modulo p and q 2426 * this will also affect timing, but this difference 2427 * does not depend on p or q, only on a 2428 * (in "normal" operation, this path will never be 2429 * taken, so it is not a performance penalty 2430 */ 2431 if ((err = big_mul(&tmp, p, q)) != BIG_OK) { 2432 goto ret; 2433 } 2434 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) { 2435 goto ret; 2436 } 2437 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) { 2438 goto ret; 2439 } 2440 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2441 goto ret; 2442 } 2443 } else { 2444 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) { 2445 goto ret; 2446 } 2447 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) { 2448 goto ret; 2449 } 2450 } 2451 2452 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) != 2453 BIG_OK) { 2454 goto ret; 2455 } 2456 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) != 2457 BIG_OK) { 2458 goto ret; 2459 } 2460 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) { 2461 goto ret; 2462 } 2463 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) { 2464 goto ret; 2465 } 2466 sign = tmp.sign; 2467 tmp.sign = 1; 2468 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2469 goto ret; 2470 } 2471 if ((sign == -1) && (!big_is_zero(&aq))) { 2472 (void) big_sub_pos(&aq, q, &aq); 2473 } 2474 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) { 2475 goto ret; 2476 } 2477 err = big_add_abs(result, &ap, &tmp); 2478 2479 ret: 2480 big_finish(&tmp); 2481 ret2: 2482 big_finish(&aq); 2483 ret1: 2484 big_finish(&ap); 2485 2486 return (err); 2487 } 2488 2489 2490 BIG_ERR_CODE 2491 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2492 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2493 BIGNUM *p_rr, BIGNUM *q_rr) 2494 { 2495 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1, 2496 p, q, pinvmodq, p_rr, q_rr, NULL)); 2497 } 2498 2499 2500 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1}; 2501 BIGNUM big_One = {1, 1, 1, 0, onearr}; 2502 2503 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2}; 2504 BIGNUM big_Two = {1, 1, 1, 0, twoarr}; 2505 2506 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4}; 2507 static BIGNUM big_Four = {1, 1, 1, 0, fourarr}; 2508 2509 2510 BIG_ERR_CODE 2511 big_sqrt_pos(BIGNUM *result, BIGNUM *n) 2512 { 2513 BIGNUM *high, *low, *mid, *t; 2514 BIGNUM t1, t2, t3, prod; 2515 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2516 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2517 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2518 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE]; 2519 int i, diff; 2520 uint32_t nbits, nrootbits, highbits; 2521 BIG_ERR_CODE err; 2522 2523 nbits = big_numbits(n); 2524 2525 if ((err = big_init1(&t1, n->len + 1, 2526 t1value, arraysize(t1value))) != BIG_OK) 2527 return (err); 2528 if ((err = big_init1(&t2, n->len + 1, 2529 t2value, arraysize(t2value))) != BIG_OK) 2530 goto ret1; 2531 if ((err = big_init1(&t3, n->len + 1, 2532 t3value, arraysize(t3value))) != BIG_OK) 2533 goto ret2; 2534 if ((err = big_init1(&prod, n->len + 1, 2535 prodvalue, arraysize(prodvalue))) != BIG_OK) 2536 goto ret3; 2537 2538 nrootbits = (nbits + 1) / 2; 2539 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1; 2540 for (i = 0; i < t1.len; i++) { 2541 t1.value[i] = 0; 2542 t2.value[i] = BIG_CHUNK_ALLBITS; 2543 } 2544 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1); 2545 if (highbits == BIG_CHUNK_SIZE) { 2546 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT; 2547 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS; 2548 } else { 2549 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1); 2550 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1; 2551 } 2552 2553 high = &t2; 2554 low = &t1; 2555 mid = &t3; 2556 2557 if ((err = big_mul(&prod, high, high)) != BIG_OK) { 2558 goto ret; 2559 } 2560 diff = big_cmp_abs(&prod, n); 2561 if (diff <= 0) { 2562 err = big_copy(result, high); 2563 goto ret; 2564 } 2565 2566 (void) big_sub_pos(mid, high, low); 2567 while (big_cmp_abs(&big_One, mid) != 0) { 2568 (void) big_add_abs(mid, high, low); 2569 (void) big_half_pos(mid, mid); 2570 if ((err = big_mul(&prod, mid, mid)) != BIG_OK) 2571 goto ret; 2572 diff = big_cmp_abs(&prod, n); 2573 if (diff > 0) { 2574 t = high; 2575 high = mid; 2576 mid = t; 2577 } else if (diff < 0) { 2578 t = low; 2579 low = mid; 2580 mid = t; 2581 } else { 2582 err = big_copy(result, low); 2583 goto ret; 2584 } 2585 (void) big_sub_pos(mid, high, low); 2586 } 2587 2588 err = big_copy(result, low); 2589 ret: 2590 if (prod.malloced) big_finish(&prod); 2591 ret3: 2592 if (t3.malloced) big_finish(&t3); 2593 ret2: 2594 if (t2.malloced) big_finish(&t2); 2595 ret1: 2596 if (t1.malloced) big_finish(&t1); 2597 2598 return (err); 2599 } 2600 2601 2602 BIG_ERR_CODE 2603 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm) 2604 { 2605 BIGNUM *t, *tmp2, *m, *n; 2606 BIGNUM t1, t2, t3; 2607 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2608 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2609 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2610 int len, err; 2611 2612 if (big_is_zero(nn) || 2613 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) { 2614 *jac = 0; 2615 return (BIG_OK); 2616 } 2617 2618 if (nn->len > mm->len) { 2619 len = nn->len; 2620 } else { 2621 len = mm->len; 2622 } 2623 2624 if ((err = big_init1(&t1, len, 2625 t1value, arraysize(t1value))) != BIG_OK) { 2626 return (err); 2627 } 2628 if ((err = big_init1(&t2, len, 2629 t2value, arraysize(t2value))) != BIG_OK) { 2630 goto ret1; 2631 } 2632 if ((err = big_init1(&t3, len, 2633 t3value, arraysize(t3value))) != BIG_OK) { 2634 goto ret2; 2635 } 2636 2637 n = &t1; 2638 m = &t2; 2639 tmp2 = &t3; 2640 2641 (void) big_copy(n, nn); 2642 (void) big_copy(m, mm); 2643 2644 *jac = 1; 2645 while (big_cmp_abs(&big_One, m) != 0) { 2646 if (big_is_zero(n)) { 2647 *jac = 0; 2648 goto ret; 2649 } 2650 if ((m->value[0] & 1) == 0) { 2651 if (((n->value[0] & 7) == 3) || 2652 ((n->value[0] & 7) == 5)) 2653 *jac = -*jac; 2654 (void) big_half_pos(m, m); 2655 } else if ((n->value[0] & 1) == 0) { 2656 if (((m->value[0] & 7) == 3) || 2657 ((m->value[0] & 7) == 5)) 2658 *jac = -*jac; 2659 (void) big_half_pos(n, n); 2660 } else { 2661 if (((m->value[0] & 3) == 3) && 2662 ((n->value[0] & 3) == 3)) { 2663 *jac = -*jac; 2664 } 2665 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) { 2666 goto ret; 2667 } 2668 t = tmp2; 2669 tmp2 = m; 2670 m = n; 2671 n = t; 2672 } 2673 } 2674 err = BIG_OK; 2675 2676 ret: 2677 if (t3.malloced) big_finish(&t3); 2678 ret2: 2679 if (t2.malloced) big_finish(&t2); 2680 ret1: 2681 if (t1.malloced) big_finish(&t1); 2682 2683 return (err); 2684 } 2685 2686 2687 BIG_ERR_CODE 2688 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n) 2689 { 2690 int i; 2691 uint32_t m, w; 2692 BIG_CHUNK_TYPE bit; 2693 BIGNUM ki, tmp, tmp2; 2694 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE]; 2695 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2696 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 2697 BIG_ERR_CODE err; 2698 2699 if (big_cmp_abs(k, &big_One) == 0) { 2700 (void) big_copy(Lk, p); 2701 (void) big_copy(Lkminus1, &big_Two); 2702 return (BIG_OK); 2703 } 2704 2705 if ((err = big_init1(&ki, k->len + 1, 2706 kivalue, arraysize(kivalue))) != BIG_OK) 2707 return (err); 2708 2709 if ((err = big_init1(&tmp, 2 * n->len + 1, 2710 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2711 goto ret1; 2712 2713 if ((err = big_init1(&tmp2, n->len, 2714 tmp2value, arraysize(tmp2value))) != BIG_OK) 2715 goto ret2; 2716 2717 m = big_numbits(k); 2718 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1; 2719 w = (m - 1) / BIG_CHUNK_SIZE; 2720 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE); 2721 for (i = 0; i < ki.len; i++) { 2722 ki.value[i] = 0; 2723 } 2724 ki.value[ki.len - 1] = bit; 2725 if (big_cmp_abs(k, &ki) != 0) { 2726 (void) big_double(&ki, &ki); 2727 } 2728 (void) big_sub_pos(&ki, &ki, k); 2729 2730 (void) big_copy(Lk, p); 2731 (void) big_copy(Lkminus1, &big_Two); 2732 2733 for (i = 0; i < m; i++) { 2734 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) { 2735 goto ret; 2736 } 2737 (void) big_add_abs(&tmp, &tmp, n); 2738 (void) big_sub_pos(&tmp, &tmp, p); 2739 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) { 2740 goto ret; 2741 } 2742 if ((ki.value[w] & bit) != 0) { 2743 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) != 2744 BIG_OK) { 2745 goto ret; 2746 } 2747 (void) big_add_abs(&tmp, &tmp, n); 2748 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2749 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) != 2750 BIG_OK) { 2751 goto ret; 2752 } 2753 (void) big_copy(Lk, &tmp2); 2754 } else { 2755 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) { 2756 goto ret; 2757 } 2758 (void) big_add_abs(&tmp, &tmp, n); 2759 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2760 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) { 2761 goto ret; 2762 } 2763 (void) big_copy(Lkminus1, &tmp2); 2764 } 2765 bit = bit >> 1; 2766 if (bit == 0) { 2767 bit = BIG_CHUNK_HIGHBIT; 2768 w--; 2769 } 2770 } 2771 2772 err = BIG_OK; 2773 2774 ret: 2775 if (tmp2.malloced) big_finish(&tmp2); 2776 ret2: 2777 if (tmp.malloced) big_finish(&tmp); 2778 ret1: 2779 if (ki.malloced) big_finish(&ki); 2780 2781 return (err); 2782 } 2783 2784 2785 BIG_ERR_CODE 2786 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info) 2787 { 2788 BIGNUM o, nminus1, tmp, Lkminus1, Lk; 2789 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE]; 2790 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE]; 2791 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2792 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE]; 2793 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE]; 2794 BIG_ERR_CODE err; 2795 int e, i, jac; 2796 2797 if (big_cmp_abs(n, &big_One) == 0) { 2798 return (BIG_FALSE); 2799 } 2800 if (big_cmp_abs(n, &big_Two) == 0) { 2801 return (BIG_TRUE); 2802 } 2803 if ((n->value[0] & 1) == 0) { 2804 return (BIG_FALSE); 2805 } 2806 2807 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != 2808 BIG_OK) { 2809 return (err); 2810 } 2811 2812 if ((err = big_init1(&nminus1, n->len, 2813 nminus1value, arraysize(nminus1value))) != BIG_OK) { 2814 goto ret1; 2815 } 2816 2817 if ((err = big_init1(&tmp, 2 * n->len, 2818 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2819 goto ret2; 2820 } 2821 2822 if ((err = big_init1(&Lkminus1, n->len, 2823 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) { 2824 goto ret3; 2825 } 2826 2827 if ((err = big_init1(&Lk, n->len, 2828 Lkvalue, arraysize(Lkvalue))) != BIG_OK) { 2829 goto ret4; 2830 } 2831 2832 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */ 2833 (void) big_copy(&nminus1, &o); /* cannot fail */ 2834 e = 0; 2835 while ((o.value[0] & 1) == 0) { 2836 e++; 2837 (void) big_half_pos(&o, &o); /* cannot fail */ 2838 } 2839 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) != 2840 BIG_OK) { 2841 goto ret; 2842 } 2843 2844 i = 0; 2845 while ((i < e) && 2846 (big_cmp_abs(&tmp, &big_One) != 0) && 2847 (big_cmp_abs(&tmp, &nminus1) != 0)) { 2848 if ((err = 2849 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) != 2850 BIG_OK) 2851 goto ret; 2852 i++; 2853 } 2854 2855 if (!((big_cmp_abs(&tmp, &nminus1) == 0) || 2856 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) { 2857 err = BIG_FALSE; 2858 goto ret; 2859 } 2860 2861 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) { 2862 goto ret; 2863 } 2864 2865 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) { 2866 goto ret; 2867 } 2868 if (big_cmp_abs(&tmp, n) == 0) { 2869 err = BIG_FALSE; 2870 goto ret; 2871 } 2872 2873 (void) big_copy(&o, &big_Two); 2874 do { 2875 (void) big_add_abs(&o, &o, &big_One); 2876 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) { 2877 goto ret; 2878 } 2879 (void) big_sub_pos(&tmp, &tmp, &big_Four); 2880 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) { 2881 goto ret; 2882 } 2883 } while (jac != -1); 2884 2885 (void) big_add_abs(&tmp, n, &big_One); 2886 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) { 2887 goto ret; 2888 } 2889 2890 if ((big_cmp_abs(&Lkminus1, &o) == 0) && 2891 (big_cmp_abs(&Lk, &big_Two) == 0)) { 2892 err = BIG_TRUE; 2893 } else { 2894 err = BIG_FALSE; 2895 } 2896 2897 ret: 2898 if (Lk.malloced) big_finish(&Lk); 2899 ret4: 2900 if (Lkminus1.malloced) big_finish(&Lkminus1); 2901 ret3: 2902 if (tmp.malloced) big_finish(&tmp); 2903 ret2: 2904 if (nminus1.malloced) big_finish(&nminus1); 2905 ret1: 2906 if (o.malloced) big_finish(&o); 2907 2908 return (err); 2909 } 2910 2911 2912 BIG_ERR_CODE 2913 big_isprime_pos(BIGNUM *n) 2914 { 2915 return (big_isprime_pos_ext(n, NULL)); 2916 } 2917 2918 2919 #define SIEVESIZE 1000 2920 2921 2922 BIG_ERR_CODE 2923 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info) 2924 { 2925 static const uint32_t smallprimes[] = { 2926 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 2927 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 }; 2928 BIG_ERR_CODE err; 2929 int sieve[SIEVESIZE]; 2930 int i; 2931 uint32_t off, p; 2932 2933 if ((err = big_copy(result, n)) != BIG_OK) { 2934 return (err); 2935 } 2936 result->value[0] |= 1; 2937 /* CONSTCOND */ 2938 while (1) { 2939 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0; 2940 for (i = 0; 2941 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) { 2942 p = smallprimes[i]; 2943 off = big_modhalf_pos(result, p); 2944 off = p - off; 2945 if ((off % 2) == 1) { 2946 off = off + p; 2947 } 2948 off = off / 2; 2949 while (off < SIEVESIZE) { 2950 sieve[off] = 1; 2951 off = off + p; 2952 } 2953 } 2954 2955 for (i = 0; i < SIEVESIZE; i++) { 2956 if (sieve[i] == 0) { 2957 err = big_isprime_pos_ext(result, info); 2958 if (err != BIG_FALSE) { 2959 if (err != BIG_TRUE) { 2960 return (err); 2961 } else { 2962 goto out; 2963 } 2964 } 2965 2966 } 2967 if ((err = big_add_abs(result, result, &big_Two)) != 2968 BIG_OK) { 2969 return (err); 2970 } 2971 } 2972 } 2973 2974 out: 2975 return (BIG_OK); 2976 } 2977 2978 2979 BIG_ERR_CODE 2980 big_nextprime_pos(BIGNUM *result, BIGNUM *n) 2981 { 2982 return (big_nextprime_pos_ext(result, n, NULL)); 2983 } 2984 2985 2986 BIG_ERR_CODE 2987 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n) 2988 { 2989 BIG_ERR_CODE err; 2990 2991 2992 if ((err = big_copy(result, n)) != BIG_OK) 2993 return (err); 2994 result->value[0] |= 1; 2995 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) { 2996 if (err != BIG_FALSE) 2997 return (err); 2998 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK) 2999 return (err); 3000 } 3001 return (BIG_OK); 3002 } 3003 3004 3005 /* 3006 * given m and e, computes the rest in the equation 3007 * gcd(m, e) = cm * m + ce * e 3008 */ 3009 BIG_ERR_CODE 3010 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e) 3011 { 3012 BIGNUM *xi, *ri, *riminus1, *riminus2, *t; 3013 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1; 3014 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp; 3015 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 3016 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 3017 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 3018 BIG_CHUNK_TYPE t4value[BIGTMPSIZE]; 3019 BIG_CHUNK_TYPE t5value[BIGTMPSIZE]; 3020 BIG_CHUNK_TYPE t6value[BIGTMPSIZE]; 3021 BIG_CHUNK_TYPE t7value[BIGTMPSIZE]; 3022 BIG_CHUNK_TYPE t8value[BIGTMPSIZE]; 3023 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 3024 BIG_ERR_CODE err; 3025 int len; 3026 3027 if (big_cmp_abs(m, e) >= 0) { 3028 len = m->len; 3029 } else { 3030 len = e->len; 3031 } 3032 3033 if ((err = big_init1(&t1, len, 3034 t1value, arraysize(t1value))) != BIG_OK) { 3035 return (err); 3036 } 3037 if ((err = big_init1(&t2, len, 3038 t2value, arraysize(t2value))) != BIG_OK) { 3039 goto ret1; 3040 } 3041 if ((err = big_init1(&t3, len, 3042 t3value, arraysize(t3value))) != BIG_OK) { 3043 goto ret2; 3044 } 3045 if ((err = big_init1(&t4, len, 3046 t4value, arraysize(t3value))) != BIG_OK) { 3047 goto ret3; 3048 } 3049 if ((err = big_init1(&t5, len, 3050 t5value, arraysize(t5value))) != BIG_OK) { 3051 goto ret4; 3052 } 3053 if ((err = big_init1(&t6, len, 3054 t6value, arraysize(t6value))) != BIG_OK) { 3055 goto ret5; 3056 } 3057 if ((err = big_init1(&t7, len, 3058 t7value, arraysize(t7value))) != BIG_OK) { 3059 goto ret6; 3060 } 3061 if ((err = big_init1(&t8, len, 3062 t8value, arraysize(t8value))) != BIG_OK) { 3063 goto ret7; 3064 } 3065 3066 if ((err = big_init1(&tmp, 2 * len, 3067 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 3068 goto ret8; 3069 } 3070 3071 ri = &t1; 3072 ri->value[0] = 1; 3073 ri->len = 1; 3074 xi = &t2; 3075 riminus1 = &t3; 3076 riminus2 = &t4; 3077 vmi = &t5; 3078 vei = &t6; 3079 vmiminus1 = &t7; 3080 veiminus1 = &t8; 3081 3082 (void) big_copy(vmiminus1, &big_One); 3083 (void) big_copy(vmi, &big_One); 3084 (void) big_copy(veiminus1, &big_One); 3085 (void) big_copy(xi, &big_One); 3086 vei->len = 1; 3087 vei->value[0] = 0; 3088 3089 (void) big_copy(riminus1, m); 3090 (void) big_copy(ri, e); 3091 3092 while (!big_is_zero(ri)) { 3093 t = riminus2; 3094 riminus2 = riminus1; 3095 riminus1 = ri; 3096 ri = t; 3097 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) { 3098 goto ret; 3099 } 3100 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) { 3101 goto ret; 3102 } 3103 t = vmiminus1; 3104 vmiminus1 = vmi; 3105 vmi = t; 3106 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) { 3107 goto ret; 3108 } 3109 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) { 3110 goto ret; 3111 } 3112 t = veiminus1; 3113 veiminus1 = vei; 3114 vei = t; 3115 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != 3116 BIG_OK) { 3117 goto ret; 3118 } 3119 } 3120 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) { 3121 goto ret; 3122 } 3123 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) { 3124 goto ret; 3125 } 3126 if (ce != NULL) { 3127 err = big_copy(ce, vei); 3128 } 3129 ret: 3130 if (tmp.malloced) big_finish(&tmp); 3131 ret8: 3132 if (t8.malloced) big_finish(&t8); 3133 ret7: 3134 if (t7.malloced) big_finish(&t7); 3135 ret6: 3136 if (t6.malloced) big_finish(&t6); 3137 ret5: 3138 if (t5.malloced) big_finish(&t5); 3139 ret4: 3140 if (t4.malloced) big_finish(&t4); 3141 ret3: 3142 if (t3.malloced) big_finish(&t3); 3143 ret2: 3144 if (t2.malloced) big_finish(&t2); 3145 ret1: 3146 if (t1.malloced) big_finish(&t1); 3147 3148 return (err); 3149 } 3150 3151 /* 3152 * Get a rlen-bit random number in BIGNUM format. Caller-supplied 3153 * (*rfunc)(void *dbuf, size_t dlen) must return 0 for success and 3154 * -1 for failure. Note: (*rfunc)() takes length in bytes, not bits. 3155 */ 3156 BIG_ERR_CODE 3157 big_random(BIGNUM *r, size_t rlen, int (*rfunc)(void *, size_t)) 3158 { 3159 size_t rwords, rbytes; 3160 int shift; 3161 3162 if (r == NULL || rlen == 0 || rfunc == NULL) 3163 return (BIG_INVALID_ARGS); 3164 3165 /* 3166 * Convert rlen bits to r->len words (32- or 64-bit), rbytes bytes 3167 * and extend r if it's not big enough to hold the random number. 3168 */ 3169 rwords = BITLEN2BIGNUMLEN(rlen); 3170 rbytes = rwords * sizeof (BIG_CHUNK_TYPE); 3171 if (big_extend(r, rwords) != BIG_OK) 3172 return (BIG_NO_MEM); 3173 #ifdef BIGNUM_CHUNK_32 3174 r->len = rwords; 3175 #else 3176 r->len = (uint32_t)rwords; 3177 #endif 3178 3179 if ((*rfunc)(r->value, rbytes) < 0) 3180 return (BIG_NO_RANDOM); 3181 3182 r->value[rwords - 1] |= BIG_CHUNK_HIGHBIT; 3183 3184 /* 3185 * If the bit length is not a word boundary, shift the most 3186 * significant word so that we have an exactly rlen-long number. 3187 */ 3188 if ((shift = rlen % BIG_CHUNK_SIZE) != 0) 3189 r->value[rwords - 1] >>= (BIG_CHUNK_SIZE - shift); 3190 3191 r->sign = 1; /* non-negative */ 3192 3193 return (BIG_OK); 3194 } 3195