1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Configuration guide 28 * ------------------- 29 * 30 * There are 4 preprocessor symbols used to configure the bignum 31 * implementation. This file contains no logic to configure based on 32 * processor; we leave that to the Makefiles to specify. 33 * 34 * USE_FLOATING_POINT 35 * Meaning: There is support for a fast floating-point implementation of 36 * Montgomery multiply. 37 * 38 * PSR_MUL 39 * Meaning: There are processor-specific versions of the low level 40 * functions to implement big_mul. Those functions are: big_mul_set_vec, 41 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 42 * for all 4 functions. You cannot pick and choose which subset of these 43 * functions to support; that would lead to a rat's nest of #ifdefs. 44 * 45 * HWCAP 46 * Meaning: Call multiply support functions through a function pointer. 47 * On x86, there are multiple implementations for different hardware 48 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 49 * a function is first used. So, the support functions are called through 50 * a function pointer. There is no need for that on Sparc, because there 51 * is only one implementation; support functions are called directly. 52 * Later, if there were some new VIS instruction, or something, and a 53 * run-time test were needed, rather than variant kernel modules and 54 * libraries, then HWCAP would be defined for Sparc, as well. 55 * 56 * UMUL64 57 * Meaning: It is safe to use generic C code that assumes the existence 58 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 59 * then the generic code for big_mul_add_vec() must necessarily be very slow, 60 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 61 * 62 */ 63 64 65 #include <sys/types.h> 66 #include "bignum.h" 67 68 #ifdef _KERNEL 69 #include <sys/ddi.h> 70 #include <sys/mdesc.h> 71 #include <sys/crypto/common.h> 72 73 #include <sys/kmem.h> 74 #include <sys/param.h> 75 #include <sys/sunddi.h> 76 77 #else 78 #include <stdlib.h> 79 #include <stdio.h> 80 #include <assert.h> 81 #define ASSERT assert 82 #endif /* _KERNEL */ 83 84 #ifdef _LP64 /* truncate 64-bit size_t to 32-bits */ 85 #define UI32(ui) ((uint32_t)ui) 86 #else /* size_t already 32-bits */ 87 #define UI32(ui) (ui) 88 #endif 89 90 91 #ifdef _KERNEL 92 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 93 #define big_free(ptr, size) kmem_free(ptr, size) 94 95 void * 96 big_realloc(void *from, size_t oldsize, size_t newsize) 97 { 98 void *rv; 99 100 rv = kmem_alloc(newsize, KM_NOSLEEP); 101 if (rv != NULL) 102 bcopy(from, rv, oldsize); 103 kmem_free(from, oldsize); 104 return (rv); 105 } 106 107 #else /* _KERNEL */ 108 109 #ifndef MALLOC_DEBUG 110 111 #define big_malloc(size) malloc(size) 112 #define big_free(ptr, size) free(ptr) 113 114 #else 115 116 void 117 big_free(void *ptr, size_t size) 118 { 119 printf("freed %d bytes at %p\n", size, ptr); 120 free(ptr); 121 } 122 123 void * 124 big_malloc(size_t size) 125 { 126 void *rv; 127 rv = malloc(size); 128 printf("malloced %d bytes, addr:%p\n", size, rv); 129 return (rv); 130 } 131 #endif /* MALLOC_DEBUG */ 132 133 #define big_realloc(x, y, z) realloc((x), (z)) 134 135 136 /* 137 * printbignum() 138 * Print a BIGNUM type to stdout. 139 */ 140 void 141 printbignum(char *aname, BIGNUM *a) 142 { 143 int i; 144 145 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 146 for (i = a->len - 1; i >= 0; i--) { 147 #ifdef BIGNUM_CHUNK_32 148 (void) printf("%08x ", a->value[i]); 149 if (((i & (BITSINBYTE - 1)) == 0) && (i != 0)) { 150 (void) printf("\n"); 151 } 152 #else 153 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32), 154 (uint32_t)((a->value[i]) & 0xffffffff)); 155 if (((i & 3) == 0) && (i != 0)) { /* end of this chunk */ 156 (void) printf("\n"); 157 } 158 #endif 159 } 160 (void) printf("\n"); 161 } 162 163 #endif /* _KERNEL */ 164 165 166 /* 167 * big_init() 168 * Initialize and allocate memory for a BIGNUM type. 169 * 170 * big_init(number, size) is equivalent to big_init1(number, size, NULL, 0) 171 * 172 * Note: call big_finish() to free memory allocated by big_init(). 173 * 174 * Input: 175 * number Uninitialized memory for BIGNUM 176 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 177 * 178 * Output: 179 * number Initialized BIGNUM 180 * 181 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 182 */ 183 BIG_ERR_CODE 184 big_init(BIGNUM *number, int size) 185 { 186 number->value = big_malloc(BIGNUM_WORDSIZE * size); 187 if (number->value == NULL) { 188 return (BIG_NO_MEM); 189 } 190 number->size = size; 191 number->len = 0; 192 number->sign = 1; 193 number->malloced = 1; 194 return (BIG_OK); 195 } 196 197 198 /* 199 * big_init1() 200 * Initialize and, if needed, allocate memory for a BIGNUM type. 201 * Use the buffer passed, buf, if any, instad of allocating memory 202 * if it's at least "size" bytes. 203 * 204 * Note: call big_finish() to free memory allocated by big_init(). 205 * 206 * Input: 207 * number Uninitialized memory for BIGNUM 208 * size Minimum size, in BIG_CHUNK_SIZE-bit words, required for BIGNUM 209 * buf Buffer for storing a BIGNUM. 210 * If NULL, big_init1() will allocate a buffer 211 * bufsize Size, in BIG_CHUNK_SIZE_bit words, of buf 212 * 213 * Output: 214 * number Initialized BIGNUM 215 * 216 * Return BIG_OK on success or BIG_NO_MEM for an allocation error. 217 */ 218 BIG_ERR_CODE 219 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize) 220 { 221 if ((buf == NULL) || (size > bufsize)) { 222 number->value = big_malloc(BIGNUM_WORDSIZE * size); 223 if (number->value == NULL) { 224 return (BIG_NO_MEM); 225 } 226 number->size = size; 227 number->malloced = 1; 228 } else { 229 number->value = buf; 230 number->size = bufsize; 231 number->malloced = 0; 232 } 233 number->len = 0; 234 number->sign = 1; 235 236 return (BIG_OK); 237 } 238 239 240 /* 241 * big_finish() 242 * Free memory, if any, allocated by big_init() or big_init1(). 243 */ 244 void 245 big_finish(BIGNUM *number) 246 { 247 if (number->malloced == 1) { 248 big_free(number->value, BIGNUM_WORDSIZE * number->size); 249 number->malloced = 0; 250 } 251 } 252 253 254 /* 255 * bn->size should be at least 256 * (len + BIGNUM_WORDSIZE - 1) / BIGNUM_WORDSIZE bytes 257 * converts from byte-big-endian format to bignum format (words in 258 * little endian order, but bytes within the words big endian) 259 */ 260 void 261 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 262 { 263 int i, j; 264 uint32_t offs; 265 const uint32_t slen = UI32(len); 266 BIG_CHUNK_TYPE word; 267 uchar_t *knwordp; 268 269 if (slen == 0) { 270 bn->len = 1; 271 bn->value[0] = 0; 272 return; 273 } 274 275 offs = slen % BIGNUM_WORDSIZE; 276 bn->len = slen / BIGNUM_WORDSIZE; 277 278 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 279 knwordp = &(kn[slen - BIGNUM_WORDSIZE * (i + 1)]); 280 word = knwordp[0]; 281 for (j = 1; j < BIGNUM_WORDSIZE; j++) { 282 word = (word << BITSINBYTE) + knwordp[j]; 283 } 284 bn->value[i] = word; 285 } 286 if (offs > 0) { 287 word = kn[0]; 288 for (i = 1; i < offs; i++) word = (word << BITSINBYTE) + kn[i]; 289 bn->value[bn->len++] = word; 290 } 291 while ((bn->len > 1) && (bn->value[bn->len - 1] == 0)) { 292 bn->len --; 293 } 294 } 295 296 297 /* 298 * copies the least significant len bytes if 299 * len < bn->len * BIGNUM_WORDSIZE 300 * converts from bignum format to byte-big-endian format. 301 * bignum format is words of type BIG_CHUNK_TYPE in little endian order. 302 */ 303 void 304 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 305 { 306 int i, j; 307 uint32_t offs; 308 const uint32_t slen = UI32(len); 309 BIG_CHUNK_TYPE word; 310 311 if (len < BIGNUM_WORDSIZE * bn->len) { 312 for (i = 0; i < slen / BIGNUM_WORDSIZE; i++) { 313 word = bn->value[i]; 314 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 315 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 316 word & 0xff; 317 word = word >> BITSINBYTE; 318 } 319 } 320 offs = slen % BIGNUM_WORDSIZE; 321 if (offs > 0) { 322 word = bn->value[slen / BIGNUM_WORDSIZE]; 323 for (i = slen % BIGNUM_WORDSIZE; i > 0; i --) { 324 kn[i - 1] = word & 0xff; 325 word = word >> BITSINBYTE; 326 } 327 } 328 } else { 329 for (i = 0; i < bn->len; i++) { 330 word = bn->value[i]; 331 for (j = 0; j < BIGNUM_WORDSIZE; j++) { 332 kn[slen - BIGNUM_WORDSIZE * i - j - 1] = 333 word & 0xff; 334 word = word >> BITSINBYTE; 335 } 336 } 337 for (i = 0; i < slen - BIGNUM_WORDSIZE * bn->len; i++) { 338 kn[i] = 0; 339 } 340 } 341 } 342 343 344 int 345 big_bitlength(BIGNUM *a) 346 { 347 int l = 0, b = 0; 348 BIG_CHUNK_TYPE c; 349 350 l = a->len - 1; 351 while ((l > 0) && (a->value[l] == 0)) { 352 l--; 353 } 354 b = BIG_CHUNK_SIZE; 355 c = a->value[l]; 356 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) { 357 c = c << 1; 358 b--; 359 } 360 361 return (l * BIG_CHUNK_SIZE + b); 362 } 363 364 365 BIG_ERR_CODE 366 big_copy(BIGNUM *dest, BIGNUM *src) 367 { 368 BIG_CHUNK_TYPE *newptr; 369 int i, len; 370 371 len = src->len; 372 while ((len > 1) && (src->value[len - 1] == 0)) { 373 len--; 374 } 375 src->len = len; 376 if (dest->size < len) { 377 if (dest->malloced == 1) { 378 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value, 379 BIGNUM_WORDSIZE * dest->size, 380 BIGNUM_WORDSIZE * len); 381 } else { 382 newptr = (BIG_CHUNK_TYPE *) 383 big_malloc(BIGNUM_WORDSIZE * len); 384 if (newptr != NULL) { 385 dest->malloced = 1; 386 } 387 } 388 if (newptr == NULL) { 389 return (BIG_NO_MEM); 390 } 391 dest->value = newptr; 392 dest->size = len; 393 } 394 dest->len = len; 395 dest->sign = src->sign; 396 for (i = 0; i < len; i++) { 397 dest->value[i] = src->value[i]; 398 } 399 400 return (BIG_OK); 401 } 402 403 404 BIG_ERR_CODE 405 big_extend(BIGNUM *number, int size) 406 { 407 BIG_CHUNK_TYPE *newptr; 408 int i; 409 410 if (number->size >= size) 411 return (BIG_OK); 412 if (number->malloced) { 413 number->value = big_realloc(number->value, 414 BIGNUM_WORDSIZE * number->size, 415 BIGNUM_WORDSIZE * size); 416 } else { 417 newptr = big_malloc(BIGNUM_WORDSIZE * size); 418 if (newptr != NULL) { 419 for (i = 0; i < number->size; i++) { 420 newptr[i] = number->value[i]; 421 } 422 } 423 number->value = newptr; 424 } 425 426 if (number->value == NULL) { 427 return (BIG_NO_MEM); 428 } 429 430 number->size = size; 431 number->malloced = 1; 432 return (BIG_OK); 433 } 434 435 436 /* returns 1 if n == 0 */ 437 int 438 big_is_zero(BIGNUM *n) 439 { 440 int i, result; 441 442 result = 1; 443 for (i = 0; i < n->len; i++) { 444 if (n->value[i] != 0) { 445 result = 0; 446 } 447 } 448 return (result); 449 } 450 451 452 BIG_ERR_CODE 453 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 454 { 455 int i, shorter, longer; 456 BIG_CHUNK_TYPE cy, ai; 457 BIG_CHUNK_TYPE *r, *a, *b, *c; 458 BIG_ERR_CODE err; 459 BIGNUM *longerarg; 460 461 if (aa->len > bb->len) { 462 shorter = bb->len; 463 longer = aa->len; 464 longerarg = aa; 465 } else { 466 shorter = aa->len; 467 longer = bb->len; 468 longerarg = bb; 469 } 470 if (result->size < longer + 1) { 471 err = big_extend(result, longer + 1); 472 if (err != BIG_OK) { 473 return (err); 474 } 475 } 476 477 r = result->value; 478 a = aa->value; 479 b = bb->value; 480 c = longerarg->value; 481 cy = 0; 482 for (i = 0; i < shorter; i++) { 483 ai = a[i]; 484 r[i] = ai + b[i] + cy; 485 if (r[i] > ai) { 486 cy = 0; 487 } else if (r[i] < ai) { 488 cy = 1; 489 } 490 } 491 for (; i < longer; i++) { 492 ai = c[i]; 493 r[i] = ai + cy; 494 if (r[i] >= ai) { 495 cy = 0; 496 } 497 } 498 if (cy == 1) { 499 r[i] = cy; 500 result->len = longer + 1; 501 } else { 502 result->len = longer; 503 } 504 result->sign = 1; 505 return (BIG_OK); 506 } 507 508 509 /* caller must make sure that result has at least len words allocated */ 510 void 511 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len) 512 { 513 int i; 514 BIG_CHUNK_TYPE cy, ai; 515 516 cy = 1; 517 for (i = 0; i < len; i++) { 518 ai = a[i]; 519 r[i] = ai + (~b[i]) + cy; 520 if (r[i] > ai) { 521 cy = 0; 522 } else if (r[i] < ai) { 523 cy = 1; 524 } 525 } 526 } 527 528 529 /* result=aa-bb it is assumed that aa>=bb */ 530 BIG_ERR_CODE 531 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 532 { 533 int i, shorter; 534 BIG_CHUNK_TYPE cy = 1, ai; 535 BIG_CHUNK_TYPE *r, *a, *b; 536 BIG_ERR_CODE err = BIG_OK; 537 538 if (aa->len > bb->len) { 539 shorter = bb->len; 540 } else { 541 shorter = aa->len; 542 } 543 if (result->size < aa->len) { 544 err = big_extend(result, aa->len); 545 if (err != BIG_OK) { 546 return (err); 547 } 548 } 549 550 r = result->value; 551 a = aa->value; 552 b = bb->value; 553 result->len = aa->len; 554 cy = 1; 555 for (i = 0; i < shorter; i++) { 556 ai = a[i]; 557 r[i] = ai + (~b[i]) + cy; 558 if (r[i] > ai) { 559 cy = 0; 560 } else if (r[i] < ai) { 561 cy = 1; 562 } 563 } 564 for (; i < aa->len; i++) { 565 ai = a[i]; 566 r[i] = ai + (~0) + cy; 567 if (r[i] < ai) { 568 cy = 1; 569 } 570 } 571 result->sign = 1; 572 573 if (cy == 0) { 574 return (BIG_INVALID_ARGS); 575 } else { 576 return (BIG_OK); 577 } 578 } 579 580 581 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 582 int 583 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 584 { 585 int i; 586 587 if (aa->len > bb->len) { 588 for (i = aa->len - 1; i > bb->len - 1; i--) { 589 if (aa->value[i] > 0) { 590 return (1); 591 } 592 } 593 } else if (aa->len < bb->len) { 594 for (i = bb->len - 1; i > aa->len - 1; i--) { 595 if (bb->value[i] > 0) { 596 return (-1); 597 } 598 } 599 } else { 600 i = aa->len - 1; 601 } 602 for (; i >= 0; i--) { 603 if (aa->value[i] > bb->value[i]) { 604 return (1); 605 } else if (aa->value[i] < bb->value[i]) { 606 return (-1); 607 } 608 } 609 610 return (0); 611 } 612 613 614 BIG_ERR_CODE 615 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 616 { 617 BIG_ERR_CODE err; 618 619 if ((bb->sign == -1) && (aa->sign == 1)) { 620 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 621 return (err); 622 } 623 result->sign = 1; 624 } else if ((aa->sign == -1) && (bb->sign == 1)) { 625 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 626 return (err); 627 } 628 result->sign = -1; 629 } else if ((aa->sign == 1) && (bb->sign == 1)) { 630 if (big_cmp_abs(aa, bb) >= 0) { 631 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 632 return (err); 633 } 634 result->sign = 1; 635 } else { 636 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 637 return (err); 638 } 639 result->sign = -1; 640 } 641 } else { 642 if (big_cmp_abs(aa, bb) >= 0) { 643 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 644 return (err); 645 } 646 result->sign = -1; 647 } else { 648 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 649 return (err); 650 } 651 result->sign = 1; 652 } 653 } 654 return (BIG_OK); 655 } 656 657 658 BIG_ERR_CODE 659 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 660 { 661 BIG_ERR_CODE err; 662 663 if ((bb->sign == -1) && (aa->sign == -1)) { 664 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 665 return (err); 666 } 667 result->sign = -1; 668 } else if ((aa->sign == 1) && (bb->sign == 1)) { 669 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 670 return (err); 671 } 672 result->sign = 1; 673 } else if ((aa->sign == 1) && (bb->sign == -1)) { 674 if (big_cmp_abs(aa, bb) >= 0) { 675 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 676 return (err); 677 } 678 result->sign = 1; 679 } else { 680 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 681 return (err); 682 } 683 result->sign = -1; 684 } 685 } else { 686 if (big_cmp_abs(aa, bb) >= 0) { 687 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 688 return (err); 689 } 690 result->sign = -1; 691 } else { 692 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 693 return (err); 694 } 695 result->sign = 1; 696 } 697 } 698 return (BIG_OK); 699 } 700 701 702 /* result = aa/2 */ 703 BIG_ERR_CODE 704 big_half_pos(BIGNUM *result, BIGNUM *aa) 705 { 706 BIG_ERR_CODE err; 707 int i; 708 BIG_CHUNK_TYPE cy, cy1; 709 BIG_CHUNK_TYPE *a, *r; 710 711 if (result->size < aa->len) { 712 err = big_extend(result, aa->len); 713 if (err != BIG_OK) { 714 return (err); 715 } 716 } 717 718 result->len = aa->len; 719 a = aa->value; 720 r = result->value; 721 cy = 0; 722 for (i = aa->len - 1; i >= 0; i--) { 723 cy1 = a[i] << (BIG_CHUNK_SIZE - 1); 724 r[i] = (cy | (a[i] >> 1)); 725 cy = cy1; 726 } 727 if (r[result->len - 1] == 0) { 728 result->len--; 729 } 730 731 return (BIG_OK); 732 } 733 734 /* result = aa*2 */ 735 BIG_ERR_CODE 736 big_double(BIGNUM *result, BIGNUM *aa) 737 { 738 BIG_ERR_CODE err; 739 int i, rsize; 740 BIG_CHUNK_TYPE cy, cy1; 741 BIG_CHUNK_TYPE *a, *r; 742 743 if ((aa->len > 0) && 744 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) { 745 rsize = aa->len + 1; 746 } else { 747 rsize = aa->len; 748 } 749 750 if (result->size < rsize) { 751 err = big_extend(result, rsize); 752 if (err != BIG_OK) 753 return (err); 754 } 755 756 a = aa->value; 757 r = result->value; 758 if (rsize == aa->len + 1) { 759 r[rsize - 1] = 1; 760 } 761 cy = 0; 762 for (i = 0; i < aa->len; i++) { 763 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1); 764 r[i] = (cy | (a[i] << 1)); 765 cy = cy1; 766 } 767 result->len = rsize; 768 return (BIG_OK); 769 } 770 771 772 /* 773 * returns aa mod b, aa must be nonneg, b must be a max 774 * (BIG_CHUNK_SIZE / 2)-bit integer 775 */ 776 static uint32_t 777 big_modhalf_pos(BIGNUM *aa, uint32_t b) 778 { 779 int i; 780 BIG_CHUNK_TYPE rem; 781 782 if (aa->len == 0) { 783 return (0); 784 } 785 rem = aa->value[aa->len - 1] % b; 786 for (i = aa->len - 2; i >= 0; i--) { 787 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 788 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b; 789 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 790 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b; 791 } 792 793 return ((uint32_t)rem); 794 } 795 796 797 /* 798 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb 799 * result->size should be at least aa->len at entry 800 * aa, bb, and result should be positive 801 */ 802 void 803 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 804 { 805 int i, lendiff; 806 BIGNUM res1, aa1; 807 808 lendiff = aa->len - bb->len; 809 res1.size = result->size - lendiff; 810 res1.malloced = 0; 811 res1.value = result->value + lendiff; 812 aa1.size = aa->size - lendiff; 813 aa1.value = aa->value + lendiff; 814 aa1.len = bb->len; 815 aa1.sign = 1; 816 (void) big_sub_pos(&res1, &aa1, bb); 817 if (result->value != aa->value) { 818 for (i = 0; i < lendiff; i++) { 819 result->value[i] = aa->value[i]; 820 } 821 } 822 result->len = aa->len; 823 } 824 825 826 /* 827 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 828 * (2^BIG_CHUNK_SIZE)^lendiff * |bb| 829 * aa->len should be >= bb->len 830 */ 831 int 832 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 833 { 834 int lendiff; 835 BIGNUM aa1; 836 837 lendiff = aa->len - bb->len; 838 aa1.len = bb->len; 839 aa1.size = aa->size - lendiff; 840 aa1.malloced = 0; 841 aa1.value = aa->value + lendiff; 842 return (big_cmp_abs(&aa1, bb)); 843 } 844 845 846 /* 847 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer. 848 * result should have enough space allocated. 849 */ 850 static void 851 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 852 { 853 int i; 854 BIG_CHUNK_TYPE t1, t2, ai, cy; 855 BIG_CHUNK_TYPE *a, *r; 856 857 a = aa->value; 858 r = result->value; 859 cy = 0; 860 for (i = 0; i < aa->len; i++) { 861 ai = a[i]; 862 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 863 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 864 (t1 >> (BIG_CHUNK_SIZE / 2)); 865 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) | 866 (t2 << (BIG_CHUNK_SIZE / 2)); 867 cy = t2 >> (BIG_CHUNK_SIZE / 2); 868 } 869 r[i] = cy; 870 result->len = aa->len + 1; 871 result->sign = aa->sign; 872 } 873 874 875 /* 876 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max. 877 * (BIG_CHUNK_SIZE / 2)-bit positive integer. 878 * result should have enough space allocated. 879 */ 880 static void 881 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 882 { 883 int i; 884 BIG_CHUNK_TYPE t1, t2, ai, cy, ri; 885 BIG_CHUNK_TYPE *a, *r; 886 887 a = aa->value; 888 r = result->value; 889 cy = 0; 890 ri = 0; 891 for (i = 0; i < aa->len; i++) { 892 ai = a[i]; 893 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 894 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 895 (t1 >> (BIG_CHUNK_SIZE / 2)); 896 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri; 897 ri = t2 & BIG_CHUNK_LOWHALFBITS; 898 cy = t2 >> (BIG_CHUNK_SIZE / 2); 899 } 900 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri; 901 result->len = aa->len + 1; 902 result->sign = aa->sign; 903 } 904 905 906 /* it is assumed that result->size is big enough */ 907 void 908 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 909 { 910 int i; 911 BIG_CHUNK_TYPE cy, ai; 912 913 if (offs == 0) { 914 if (result != aa) { 915 (void) big_copy(result, aa); 916 } 917 return; 918 } 919 cy = 0; 920 for (i = 0; i < aa->len; i++) { 921 ai = aa->value[i]; 922 result->value[i] = (ai << offs) | cy; 923 cy = ai >> (BIG_CHUNK_SIZE - offs); 924 } 925 if (cy != 0) { 926 result->len = aa->len + 1; 927 result->value[result->len - 1] = cy; 928 } else { 929 result->len = aa->len; 930 } 931 result->sign = aa->sign; 932 } 933 934 935 /* it is assumed that result->size is big enough */ 936 void 937 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 938 { 939 int i; 940 BIG_CHUNK_TYPE cy, ai; 941 942 if (offs == 0) { 943 if (result != aa) { 944 (void) big_copy(result, aa); 945 } 946 return; 947 } 948 cy = aa->value[0] >> offs; 949 for (i = 1; i < aa->len; i++) { 950 ai = aa->value[i]; 951 result->value[i - 1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy; 952 cy = ai >> offs; 953 } 954 result->len = aa->len; 955 result->value[result->len - 1] = cy; 956 result->sign = aa->sign; 957 } 958 959 960 /* 961 * result = aa/bb remainder = aa mod bb 962 * it is assumed that aa and bb are positive 963 */ 964 BIG_ERR_CODE 965 big_div_pos(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 966 { 967 BIG_ERR_CODE err = BIG_OK; 968 int i, alen, blen, tlen, rlen, offs; 969 BIG_CHUNK_TYPE higha, highb, coeff; 970 BIG_CHUNK_TYPE *a, *b; 971 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 972 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 973 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 974 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE]; 975 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE]; 976 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE]; 977 978 a = aa->value; 979 b = bb->value; 980 alen = aa->len; 981 blen = bb->len; 982 while ((alen > 1) && (a[alen - 1] == 0)) { 983 alen = alen - 1; 984 } 985 aa->len = alen; 986 while ((blen > 1) && (b[blen - 1] == 0)) { 987 blen = blen - 1; 988 } 989 bb->len = blen; 990 if ((blen == 1) && (b[0] == 0)) { 991 return (BIG_DIV_BY_0); 992 } 993 994 if (big_cmp_abs(aa, bb) < 0) { 995 if ((remainder != NULL) && 996 ((err = big_copy(remainder, aa)) != BIG_OK)) { 997 return (err); 998 } 999 if (result != NULL) { 1000 result->len = 1; 1001 result->sign = 1; 1002 result->value[0] = 0; 1003 } 1004 return (BIG_OK); 1005 } 1006 1007 if ((err = big_init1(&bblow, blen + 1, 1008 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 1009 return (err); 1010 1011 if ((err = big_init1(&bbhigh, blen + 1, 1012 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 1013 goto ret1; 1014 1015 if ((err = big_init1(&tmp1, alen + 2, 1016 tmp1value, arraysize(tmp1value))) != BIG_OK) 1017 goto ret2; 1018 1019 if ((err = big_init1(&tmp2, blen + 2, 1020 tmp2value, arraysize(tmp2value))) != BIG_OK) 1021 goto ret3; 1022 1023 if ((err = big_init1(&tresult, alen - blen + 2, 1024 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 1025 goto ret4; 1026 1027 offs = 0; 1028 highb = b[blen - 1]; 1029 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) { 1030 highb = highb >> (BIG_CHUNK_SIZE / 2); 1031 offs = (BIG_CHUNK_SIZE / 2); 1032 } 1033 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) { 1034 highb = highb << 1; 1035 offs++; 1036 } 1037 1038 big_shiftleft(&bblow, bb, offs); 1039 1040 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) { 1041 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1042 } else { 1043 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1044 } 1045 if (bbhigh.value[bbhigh.len - 1] == 0) { 1046 bbhigh.len--; 1047 } else { 1048 bbhigh.value[bbhigh.len] = 0; 1049 } 1050 1051 highb = bblow.value[bblow.len - 1]; 1052 1053 big_shiftleft(&tmp1, aa, offs); 1054 rlen = tmp1.len - bblow.len + 1; 1055 tresult.len = rlen; 1056 1057 tmp1.len++; 1058 tlen = tmp1.len; 1059 tmp1.value[tmp1.len - 1] = 0; 1060 for (i = 0; i < rlen; i++) { 1061 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) + 1062 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2)); 1063 coeff = higha / (highb + 1); 1064 big_mulhalf_high(&tmp2, &bblow, coeff); 1065 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1066 bbhigh.len++; 1067 while (tmp1.value[tlen - 1] > 0) { 1068 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1069 coeff++; 1070 } 1071 bbhigh.len--; 1072 tlen--; 1073 tmp1.len--; 1074 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 1075 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1076 coeff++; 1077 } 1078 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2); 1079 higha = tmp1.value[tlen - 1]; 1080 coeff = higha / (highb + 1); 1081 big_mulhalf_low(&tmp2, &bblow, coeff); 1082 tmp2.len--; 1083 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1084 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 1085 big_sub_pos_high(&tmp1, &tmp1, &bblow); 1086 coeff++; 1087 } 1088 tresult.value[rlen - i - 1] = 1089 tresult.value[rlen - i - 1] + coeff; 1090 } 1091 1092 big_shiftright(&tmp1, &tmp1, offs); 1093 1094 err = BIG_OK; 1095 1096 if ((remainder != NULL) && 1097 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 1098 goto ret; 1099 1100 if (result != NULL) 1101 err = big_copy(result, &tresult); 1102 1103 ret: 1104 big_finish(&tresult); 1105 ret4: 1106 big_finish(&tmp1); 1107 ret3: 1108 big_finish(&tmp2); 1109 ret2: 1110 big_finish(&bbhigh); 1111 ret1: 1112 big_finish(&bblow); 1113 return (err); 1114 } 1115 1116 1117 /* 1118 * If there is no processor-specific integer implementation of 1119 * the lower level multiply functions, then this code is provided 1120 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1121 * big_sqr_vec(). 1122 * 1123 * There are two generic implementations. One that assumes that 1124 * there is hardware and C compiler support for a 32 x 32 --> 64 1125 * bit unsigned multiply, but otherwise is not specific to any 1126 * processor, platform, or ISA. 1127 * 1128 * The other makes very few assumptions about hardware capabilities. 1129 * It does not even assume that there is any implementation of a 1130 * 32 x 32 --> 64 bit multiply that is accessible to C code and 1131 * appropriate to use. It falls constructs 32 x 32 --> 64 bit 1132 * multiplies from 16 x 16 --> 32 bit multiplies. 1133 * 1134 */ 1135 1136 #if !defined(PSR_MUL) 1137 1138 #ifdef UMUL64 1139 1140 #if (BIG_CHUNK_SIZE == 32) 1141 1142 #define UNROLL8 1143 1144 #define MUL_SET_VEC_ROUND_PREFETCH(R) \ 1145 p = pf * d; \ 1146 pf = (uint64_t)a[R + 1]; \ 1147 t = p + cy; \ 1148 r[R] = (uint32_t)t; \ 1149 cy = t >> 32 1150 1151 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \ 1152 p = pf * d; \ 1153 t = p + cy; \ 1154 r[R] = (uint32_t)t; \ 1155 cy = t >> 32 1156 1157 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \ 1158 t = (uint64_t)r[R]; \ 1159 p = pf * d; \ 1160 pf = (uint64_t)a[R + 1]; \ 1161 t = p + t + cy; \ 1162 r[R] = (uint32_t)t; \ 1163 cy = t >> 32 1164 1165 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \ 1166 t = (uint64_t)r[R]; \ 1167 p = pf * d; \ 1168 t = p + t + cy; \ 1169 r[R] = (uint32_t)t; \ 1170 cy = t >> 32 1171 1172 #ifdef UNROLL8 1173 1174 #define UNROLL 8 1175 1176 /* 1177 * r = a * b 1178 * where r and a are vectors; b is a single 32-bit digit 1179 */ 1180 1181 uint32_t 1182 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1183 { 1184 uint64_t d, pf, p, t, cy; 1185 1186 if (len == 0) 1187 return (0); 1188 cy = 0; 1189 d = (uint64_t)b; 1190 pf = (uint64_t)a[0]; 1191 while (len > UNROLL) { 1192 MUL_SET_VEC_ROUND_PREFETCH(0); 1193 MUL_SET_VEC_ROUND_PREFETCH(1); 1194 MUL_SET_VEC_ROUND_PREFETCH(2); 1195 MUL_SET_VEC_ROUND_PREFETCH(3); 1196 MUL_SET_VEC_ROUND_PREFETCH(4); 1197 MUL_SET_VEC_ROUND_PREFETCH(5); 1198 MUL_SET_VEC_ROUND_PREFETCH(6); 1199 MUL_SET_VEC_ROUND_PREFETCH(7); 1200 r += UNROLL; 1201 a += UNROLL; 1202 len -= UNROLL; 1203 } 1204 if (len == UNROLL) { 1205 MUL_SET_VEC_ROUND_PREFETCH(0); 1206 MUL_SET_VEC_ROUND_PREFETCH(1); 1207 MUL_SET_VEC_ROUND_PREFETCH(2); 1208 MUL_SET_VEC_ROUND_PREFETCH(3); 1209 MUL_SET_VEC_ROUND_PREFETCH(4); 1210 MUL_SET_VEC_ROUND_PREFETCH(5); 1211 MUL_SET_VEC_ROUND_PREFETCH(6); 1212 MUL_SET_VEC_ROUND_NOPREFETCH(7); 1213 return ((uint32_t)cy); 1214 } 1215 while (len > 1) { 1216 MUL_SET_VEC_ROUND_PREFETCH(0); 1217 ++r; 1218 ++a; 1219 --len; 1220 } 1221 if (len > 0) { 1222 MUL_SET_VEC_ROUND_NOPREFETCH(0); 1223 } 1224 return ((uint32_t)cy); 1225 } 1226 1227 /* 1228 * r += a * b 1229 * where r and a are vectors; b is a single 32-bit digit 1230 */ 1231 1232 uint32_t 1233 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1234 { 1235 uint64_t d, pf, p, t, cy; 1236 1237 if (len == 0) 1238 return (0); 1239 cy = 0; 1240 d = (uint64_t)b; 1241 pf = (uint64_t)a[0]; 1242 while (len > 8) { 1243 MUL_ADD_VEC_ROUND_PREFETCH(0); 1244 MUL_ADD_VEC_ROUND_PREFETCH(1); 1245 MUL_ADD_VEC_ROUND_PREFETCH(2); 1246 MUL_ADD_VEC_ROUND_PREFETCH(3); 1247 MUL_ADD_VEC_ROUND_PREFETCH(4); 1248 MUL_ADD_VEC_ROUND_PREFETCH(5); 1249 MUL_ADD_VEC_ROUND_PREFETCH(6); 1250 MUL_ADD_VEC_ROUND_PREFETCH(7); 1251 r += 8; 1252 a += 8; 1253 len -= 8; 1254 } 1255 if (len == 8) { 1256 MUL_ADD_VEC_ROUND_PREFETCH(0); 1257 MUL_ADD_VEC_ROUND_PREFETCH(1); 1258 MUL_ADD_VEC_ROUND_PREFETCH(2); 1259 MUL_ADD_VEC_ROUND_PREFETCH(3); 1260 MUL_ADD_VEC_ROUND_PREFETCH(4); 1261 MUL_ADD_VEC_ROUND_PREFETCH(5); 1262 MUL_ADD_VEC_ROUND_PREFETCH(6); 1263 MUL_ADD_VEC_ROUND_NOPREFETCH(7); 1264 return ((uint32_t)cy); 1265 } 1266 while (len > 1) { 1267 MUL_ADD_VEC_ROUND_PREFETCH(0); 1268 ++r; 1269 ++a; 1270 --len; 1271 } 1272 if (len > 0) { 1273 MUL_ADD_VEC_ROUND_NOPREFETCH(0); 1274 } 1275 return ((uint32_t)cy); 1276 } 1277 #endif /* UNROLL8 */ 1278 1279 void 1280 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1281 { 1282 uint32_t *tr, *ta; 1283 int tlen, row, col; 1284 uint64_t p, s, t, t2, cy; 1285 uint32_t d; 1286 1287 tr = r + 1; 1288 ta = a; 1289 tlen = len - 1; 1290 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]); 1291 while (--tlen > 0) { 1292 tr += 2; 1293 ++ta; 1294 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]); 1295 } 1296 s = (uint64_t)a[0]; 1297 s = s * s; 1298 r[0] = (uint32_t)s; 1299 cy = s >> 32; 1300 p = ((uint64_t)r[1] << 1) + cy; 1301 r[1] = (uint32_t)p; 1302 cy = p >> 32; 1303 row = 1; 1304 col = 2; 1305 while (row < len) { 1306 s = (uint64_t)a[row]; 1307 s = s * s; 1308 p = (uint64_t)r[col] << 1; 1309 t = p + s; 1310 d = (uint32_t)t; 1311 t2 = (uint64_t)d + cy; 1312 r[col] = (uint32_t)t2; 1313 cy = (t >> 32) + (t2 >> 32); 1314 if (row == len - 1) 1315 break; 1316 p = ((uint64_t)r[col + 1] << 1) + cy; 1317 r[col + 1] = (uint32_t)p; 1318 cy = p >> 32; 1319 ++row; 1320 col += 2; 1321 } 1322 r[col + 1] = (uint32_t)cy; 1323 } 1324 1325 #else /* BIG_CHUNK_SIZE == 64 */ 1326 1327 /* 1328 * r = r + a * digit, r and a are vectors of length len 1329 * returns the carry digit 1330 */ 1331 BIG_CHUNK_TYPE 1332 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1333 BIG_CHUNK_TYPE digit) 1334 { 1335 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh; 1336 int i; 1337 1338 cy1 = 0; 1339 dlow = digit & BIG_CHUNK_LOWHALFBITS; 1340 dhigh = digit >> (BIG_CHUNK_SIZE / 2); 1341 for (i = 0; i < len; i++) { 1342 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1343 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1344 (r[i] & BIG_CHUNK_LOWHALFBITS); 1345 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1346 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1347 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1348 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) | 1349 (cy1 << (BIG_CHUNK_SIZE / 2)); 1350 } 1351 retcy = cy1 >> (BIG_CHUNK_SIZE / 2); 1352 1353 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS; 1354 for (i = 0; i < len - 1; i++) { 1355 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1356 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1357 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1358 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1359 (cy << (BIG_CHUNK_SIZE / 2)); 1360 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1361 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1362 (r[i + 1] & BIG_CHUNK_LOWHALFBITS); 1363 } 1364 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1365 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) + 1366 (r[len - 1] >> (BIG_CHUNK_SIZE / 2)); 1367 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1368 (cy << (BIG_CHUNK_SIZE / 2)); 1369 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) + 1370 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy; 1371 1372 return (retcy); 1373 } 1374 1375 1376 /* 1377 * r = a * digit, r and a are vectors of length len 1378 * returns the carry digit 1379 */ 1380 BIG_CHUNK_TYPE 1381 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1382 BIG_CHUNK_TYPE digit) 1383 { 1384 int i; 1385 1386 ASSERT(r != a); 1387 for (i = 0; i < len; i++) { 1388 r[i] = 0; 1389 } 1390 return (big_mul_add_vec(r, a, len, digit)); 1391 } 1392 1393 void 1394 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len) 1395 { 1396 int i; 1397 1398 ASSERT(r != a); 1399 r[len] = big_mul_set_vec(r, a, len, a[0]); 1400 for (i = 1; i < len; ++i) 1401 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1402 } 1403 1404 #endif /* BIG_CHUNK_SIZE == 32/64 */ 1405 1406 1407 #else /* ! UMUL64 */ 1408 1409 #if (BIG_CHUNK_SIZE != 32) 1410 #error Don't use 64-bit chunks without defining UMUL64 1411 #endif 1412 1413 1414 /* 1415 * r = r + a * digit, r and a are vectors of length len 1416 * returns the carry digit 1417 */ 1418 uint32_t 1419 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1420 { 1421 uint32_t cy, cy1, retcy, dlow, dhigh; 1422 int i; 1423 1424 cy1 = 0; 1425 dlow = digit & 0xffff; 1426 dhigh = digit >> 16; 1427 for (i = 0; i < len; i++) { 1428 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff); 1429 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16); 1430 r[i] = (cy & 0xffff) | (cy1 << 16); 1431 } 1432 retcy = cy1 >> 16; 1433 1434 cy1 = r[0] & 0xffff; 1435 for (i = 0; i < len - 1; i++) { 1436 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16); 1437 r[i] = (cy1 & 0xffff) | (cy << 16); 1438 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff); 1439 } 1440 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16); 1441 r[len - 1] = (cy1 & 0xffff) | (cy << 16); 1442 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy; 1443 1444 return (retcy); 1445 } 1446 1447 1448 /* 1449 * r = a * digit, r and a are vectors of length len 1450 * returns the carry digit 1451 */ 1452 uint32_t 1453 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1454 { 1455 int i; 1456 1457 ASSERT(r != a); 1458 for (i = 0; i < len; i++) { 1459 r[i] = 0; 1460 } 1461 1462 return (big_mul_add_vec(r, a, len, digit)); 1463 } 1464 1465 void 1466 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1467 { 1468 int i; 1469 1470 ASSERT(r != a); 1471 r[len] = big_mul_set_vec(r, a, len, a[0]); 1472 for (i = 1; i < len; ++i) 1473 r[len + i] = big_mul_add_vec(r + i, a, len, a[i]); 1474 } 1475 1476 #endif /* UMUL64 */ 1477 1478 void 1479 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen, 1480 BIG_CHUNK_TYPE *b, int blen) 1481 { 1482 int i; 1483 1484 r[alen] = big_mul_set_vec(r, a, alen, b[0]); 1485 for (i = 1; i < blen; ++i) 1486 r[alen + i] = big_mul_add_vec(r + i, a, alen, b[i]); 1487 } 1488 1489 1490 #endif /* ! PSR_MUL */ 1491 1492 1493 /* 1494 * result = aa * bb result->value should be big enough to hold the result 1495 * 1496 * Implementation: Standard grammar school algorithm 1497 * 1498 */ 1499 BIG_ERR_CODE 1500 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 1501 { 1502 BIGNUM tmp1; 1503 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1504 BIG_CHUNK_TYPE *r, *t, *a, *b; 1505 BIG_ERR_CODE err; 1506 int i, alen, blen, rsize, sign, diff; 1507 1508 if (aa == bb) { 1509 diff = 0; 1510 } else { 1511 diff = big_cmp_abs(aa, bb); 1512 if (diff < 0) { 1513 BIGNUM *tt; 1514 tt = aa; 1515 aa = bb; 1516 bb = tt; 1517 } 1518 } 1519 a = aa->value; 1520 b = bb->value; 1521 alen = aa->len; 1522 blen = bb->len; 1523 while ((alen > 1) && (a[alen - 1] == 0)) { 1524 alen--; 1525 } 1526 aa->len = alen; 1527 while ((blen > 1) && (b[blen - 1] == 0)) { 1528 blen--; 1529 } 1530 bb->len = blen; 1531 1532 rsize = alen + blen; 1533 ASSERT(rsize > 0); 1534 if (result->size < rsize) { 1535 err = big_extend(result, rsize); 1536 if (err != BIG_OK) { 1537 return (err); 1538 } 1539 /* aa or bb might be an alias to result */ 1540 a = aa->value; 1541 b = bb->value; 1542 } 1543 r = result->value; 1544 1545 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) { 1546 result->len = 1; 1547 result->sign = 1; 1548 r[0] = 0; 1549 return (BIG_OK); 1550 } 1551 sign = aa->sign * bb->sign; 1552 if ((alen == 1) && (a[0] == 1)) { 1553 for (i = 0; i < blen; i++) { 1554 r[i] = b[i]; 1555 } 1556 result->len = blen; 1557 result->sign = sign; 1558 return (BIG_OK); 1559 } 1560 if ((blen == 1) && (b[0] == 1)) { 1561 for (i = 0; i < alen; i++) { 1562 r[i] = a[i]; 1563 } 1564 result->len = alen; 1565 result->sign = sign; 1566 return (BIG_OK); 1567 } 1568 1569 if ((err = big_init1(&tmp1, rsize, 1570 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1571 return (err); 1572 } 1573 (void) big_copy(&tmp1, aa); 1574 t = tmp1.value; 1575 1576 for (i = 0; i < rsize; i++) { 1577 t[i] = 0; 1578 } 1579 1580 if (diff == 0 && alen > 2) { 1581 BIG_SQR_VEC(t, a, alen); 1582 } else if (blen > 0) { 1583 BIG_MUL_VEC(t, a, alen, b, blen); 1584 } 1585 1586 if (t[rsize - 1] == 0) { 1587 tmp1.len = rsize - 1; 1588 } else { 1589 tmp1.len = rsize; 1590 } 1591 1592 err = big_copy(result, &tmp1); 1593 1594 result->sign = sign; 1595 1596 big_finish(&tmp1); 1597 1598 return (err); 1599 } 1600 1601 1602 /* 1603 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1 1604 * and that ret is not n 1605 */ 1606 BIG_ERR_CODE 1607 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0) 1608 { 1609 int i, j, nlen, needsubtract; 1610 BIG_CHUNK_TYPE *nn, *rr; 1611 BIG_CHUNK_TYPE digit, c; 1612 BIG_ERR_CODE err; 1613 1614 nlen = n->len; 1615 nn = n->value; 1616 1617 rr = ret->value; 1618 1619 if ((err = big_mul(ret, a, b)) != BIG_OK) { 1620 return (err); 1621 } 1622 1623 rr = ret->value; 1624 for (i = ret->len; i < 2 * nlen + 1; i++) { 1625 rr[i] = 0; 1626 } 1627 for (i = 0; i < nlen; i++) { 1628 digit = rr[i]; 1629 digit = digit * n0; 1630 1631 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit); 1632 j = i + nlen; 1633 rr[j] += c; 1634 while (rr[j] < c) { 1635 rr[j + 1] += 1; 1636 j++; 1637 c = 1; 1638 } 1639 } 1640 1641 needsubtract = 0; 1642 if ((rr[2 * nlen] != 0)) 1643 needsubtract = 1; 1644 else { 1645 for (i = 2 * nlen - 1; i >= nlen; i--) { 1646 if (rr[i] > nn[i - nlen]) { 1647 needsubtract = 1; 1648 break; 1649 } else if (rr[i] < nn[i - nlen]) { 1650 break; 1651 } 1652 } 1653 } 1654 if (needsubtract) 1655 big_sub_vec(rr, rr + nlen, nn, nlen); 1656 else { 1657 for (i = 0; i < nlen; i++) { 1658 rr[i] = rr[i + nlen]; 1659 } 1660 } 1661 1662 /* Remove leading zeros, but keep at least 1 digit: */ 1663 for (i = nlen - 1; (i > 0) && (rr[i] == 0); i--) 1664 ; 1665 ret->len = i + 1; 1666 1667 return (BIG_OK); 1668 } 1669 1670 1671 BIG_CHUNK_TYPE 1672 big_n0(BIG_CHUNK_TYPE n) 1673 { 1674 int i; 1675 BIG_CHUNK_TYPE result, tmp; 1676 1677 result = 0; 1678 tmp = BIG_CHUNK_ALLBITS; 1679 for (i = 0; i < BIG_CHUNK_SIZE; i++) { 1680 if ((tmp & 1) == 1) { 1681 result = (result >> 1) | BIG_CHUNK_HIGHBIT; 1682 tmp = tmp - n; 1683 } else { 1684 result = (result >> 1); 1685 } 1686 tmp = tmp >> 1; 1687 } 1688 1689 return (result); 1690 } 1691 1692 1693 int 1694 big_numbits(BIGNUM *n) 1695 { 1696 int i, j; 1697 BIG_CHUNK_TYPE t; 1698 1699 for (i = n->len - 1; i > 0; i--) { 1700 if (n->value[i] != 0) { 1701 break; 1702 } 1703 } 1704 t = n->value[i]; 1705 for (j = BIG_CHUNK_SIZE; j > 0; j--) { 1706 if ((t & BIG_CHUNK_HIGHBIT) == 0) { 1707 t = t << 1; 1708 } else { 1709 return (BIG_CHUNK_SIZE * i + j); 1710 } 1711 } 1712 return (0); 1713 } 1714 1715 1716 /* caller must make sure that a < n */ 1717 BIG_ERR_CODE 1718 big_mont_rr(BIGNUM *result, BIGNUM *n) 1719 { 1720 BIGNUM rr; 1721 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1722 int len, i; 1723 BIG_ERR_CODE err; 1724 1725 rr.malloced = 0; 1726 len = n->len; 1727 1728 if ((err = big_init1(&rr, 2 * len + 1, 1729 rrvalue, arraysize(rrvalue))) != BIG_OK) { 1730 return (err); 1731 } 1732 1733 for (i = 0; i < 2 * len; i++) { 1734 rr.value[i] = 0; 1735 } 1736 rr.value[2 * len] = 1; 1737 rr.len = 2 * len + 1; 1738 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1739 goto ret; 1740 } 1741 err = big_copy(result, &rr); 1742 ret: 1743 big_finish(&rr); 1744 return (err); 1745 } 1746 1747 1748 /* caller must make sure that a < n */ 1749 BIG_ERR_CODE 1750 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0, 1751 BIGNUM *n_rr) 1752 { 1753 BIGNUM rr; 1754 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1755 int len, i; 1756 BIG_ERR_CODE err; 1757 1758 rr.malloced = 0; 1759 len = n->len; 1760 1761 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue))) 1762 != BIG_OK) { 1763 return (err); 1764 } 1765 1766 if (n_rr == NULL) { 1767 for (i = 0; i < 2 * len; i++) { 1768 rr.value[i] = 0; 1769 } 1770 rr.value[2 * len] = 1; 1771 rr.len = 2 * len + 1; 1772 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1773 goto ret; 1774 } 1775 n_rr = &rr; 1776 } 1777 1778 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) { 1779 goto ret; 1780 } 1781 err = big_copy(result, &rr); 1782 1783 ret: 1784 big_finish(&rr); 1785 return (err); 1786 } 1787 1788 1789 #ifdef USE_FLOATING_POINT 1790 #define big_modexp_ncp_float big_modexp_ncp_sw 1791 #else 1792 #define big_modexp_ncp_int big_modexp_ncp_sw 1793 #endif 1794 1795 #define MAX_EXP_BIT_GROUP_SIZE 6 1796 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1)) 1797 1798 /* ARGSUSED */ 1799 static BIG_ERR_CODE 1800 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1801 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1802 1803 { 1804 BIGNUM apowers[APOWERS_MAX_SIZE]; 1805 BIGNUM tmp1; 1806 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1807 int i, j, k, l, m, p; 1808 uint32_t bit, bitind, bitcount, groupbits, apowerssize; 1809 uint32_t nbits; 1810 BIG_ERR_CODE err; 1811 1812 nbits = big_numbits(e); 1813 if (nbits < 50) { 1814 groupbits = 1; 1815 apowerssize = 1; 1816 } else { 1817 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1818 apowerssize = 1 << (groupbits - 1); 1819 } 1820 1821 1822 if ((err = big_init1(&tmp1, 2 * n->len + 1, 1823 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1824 return (err); 1825 } 1826 1827 /* clear the malloced bit to help cleanup */ 1828 for (i = 0; i < apowerssize; i++) { 1829 apowers[i].malloced = 0; 1830 } 1831 1832 for (i = 0; i < apowerssize; i++) { 1833 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) != 1834 BIG_OK) { 1835 goto ret; 1836 } 1837 } 1838 1839 (void) big_copy(&(apowers[0]), ma); 1840 1841 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) { 1842 goto ret; 1843 } 1844 (void) big_copy(ma, &tmp1); 1845 1846 for (i = 1; i < apowerssize; i++) { 1847 if ((err = big_mont_mul(&tmp1, ma, 1848 &(apowers[i - 1]), n, n0)) != BIG_OK) { 1849 goto ret; 1850 } 1851 (void) big_copy(&apowers[i], &tmp1); 1852 } 1853 1854 bitind = nbits % BIG_CHUNK_SIZE; 1855 k = 0; 1856 l = 0; 1857 p = 0; 1858 bitcount = 0; 1859 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 1860 for (j = bitind - 1; j >= 0; j--) { 1861 bit = (e->value[i] >> j) & 1; 1862 if ((bitcount == 0) && (bit == 0)) { 1863 if ((err = big_mont_mul(tmp, 1864 tmp, tmp, n, n0)) != BIG_OK) { 1865 goto ret; 1866 } 1867 } else { 1868 bitcount++; 1869 p = p * 2 + bit; 1870 if (bit == 1) { 1871 k = k + l + 1; 1872 l = 0; 1873 } else { 1874 l++; 1875 } 1876 if (bitcount == groupbits) { 1877 for (m = 0; m < k; m++) { 1878 if ((err = big_mont_mul(tmp, 1879 tmp, tmp, n, n0)) != 1880 BIG_OK) { 1881 goto ret; 1882 } 1883 } 1884 if ((err = big_mont_mul(tmp, tmp, 1885 &(apowers[p >> (l + 1)]), 1886 n, n0)) != BIG_OK) { 1887 goto ret; 1888 } 1889 for (m = 0; m < l; m++) { 1890 if ((err = big_mont_mul(tmp, 1891 tmp, tmp, n, n0)) != 1892 BIG_OK) { 1893 goto ret; 1894 } 1895 } 1896 k = 0; 1897 l = 0; 1898 p = 0; 1899 bitcount = 0; 1900 } 1901 } 1902 } 1903 bitind = BIG_CHUNK_SIZE; 1904 } 1905 1906 for (m = 0; m < k; m++) { 1907 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) { 1908 goto ret; 1909 } 1910 } 1911 if (p != 0) { 1912 if ((err = big_mont_mul(tmp, tmp, 1913 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) { 1914 goto ret; 1915 } 1916 } 1917 for (m = 0; m < l; m++) { 1918 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) { 1919 goto ret; 1920 } 1921 } 1922 1923 ret: 1924 for (i = apowerssize - 1; i >= 0; i--) { 1925 big_finish(&(apowers[i])); 1926 } 1927 big_finish(&tmp1); 1928 1929 return (err); 1930 } 1931 1932 1933 #ifdef USE_FLOATING_POINT 1934 1935 #ifdef _KERNEL 1936 1937 #include <sys/sysmacros.h> 1938 #include <sys/regset.h> 1939 #include <sys/fpu/fpusystm.h> 1940 1941 /* the alignment for block stores to save fp registers */ 1942 #define FPR_ALIGN (64) 1943 1944 extern void big_savefp(kfpu_t *); 1945 extern void big_restorefp(kfpu_t *); 1946 1947 #endif /* _KERNEL */ 1948 1949 /* 1950 * This version makes use of floating point for performance 1951 */ 1952 static BIG_ERR_CODE 1953 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1954 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1955 { 1956 1957 int i, j, k, l, m, p; 1958 uint32_t bit, bitind, bitcount, nlen; 1959 double dn0; 1960 double *dn, *dt, *d16r, *d32r; 1961 uint32_t *nint, *prod; 1962 double *apowers[APOWERS_MAX_SIZE]; 1963 uint32_t nbits, groupbits, apowerssize; 1964 BIG_ERR_CODE err = BIG_OK; 1965 1966 #ifdef _KERNEL 1967 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN]; 1968 kfpu_t *fpu; 1969 1970 #ifdef DEBUG 1971 if (!fpu_exists) 1972 return (BIG_GENERAL_ERR); 1973 #endif 1974 1975 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN); 1976 big_savefp(fpu); 1977 1978 #endif /* _KERNEL */ 1979 1980 nbits = big_numbits(e); 1981 if (nbits < 50) { 1982 groupbits = 1; 1983 apowerssize = 1; 1984 } else { 1985 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1986 apowerssize = 1 << (groupbits - 1); 1987 } 1988 1989 nlen = (BIG_CHUNK_SIZE / 32) * n->len; 1990 dn0 = (double)(n0 & 0xffff); 1991 1992 dn = dt = d16r = d32r = NULL; 1993 nint = prod = NULL; 1994 for (i = 0; i < apowerssize; i++) { 1995 apowers[i] = NULL; 1996 } 1997 1998 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) { 1999 err = BIG_NO_MEM; 2000 goto ret; 2001 } 2002 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) { 2003 err = BIG_NO_MEM; 2004 goto ret; 2005 } 2006 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) { 2007 err = BIG_NO_MEM; 2008 goto ret; 2009 } 2010 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) { 2011 err = BIG_NO_MEM; 2012 goto ret; 2013 } 2014 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) { 2015 err = BIG_NO_MEM; 2016 goto ret; 2017 } 2018 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) { 2019 err = BIG_NO_MEM; 2020 goto ret; 2021 } 2022 for (i = 0; i < apowerssize; i++) { 2023 if ((apowers[i] = big_malloc((2 * nlen + 1) * 2024 sizeof (double))) == NULL) { 2025 err = BIG_NO_MEM; 2026 goto ret; 2027 } 2028 } 2029 2030 #if (BIG_CHUNK_SIZE == 32) 2031 for (i = 0; i < ma->len; i++) { 2032 nint[i] = ma->value[i]; 2033 } 2034 for (; i < nlen; i++) { 2035 nint[i] = 0; 2036 } 2037 #else 2038 for (i = 0; i < ma->len; i++) { 2039 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL); 2040 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32); 2041 } 2042 for (i = ma->len * 2; i < nlen; i++) { 2043 nint[i] = 0; 2044 } 2045 #endif 2046 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen); 2047 2048 #if (BIG_CHUNK_SIZE == 32) 2049 for (i = 0; i < n->len; i++) { 2050 nint[i] = n->value[i]; 2051 } 2052 for (; i < nlen; i++) { 2053 nint[i] = 0; 2054 } 2055 #else 2056 for (i = 0; i < n->len; i++) { 2057 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL); 2058 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32); 2059 } 2060 for (i = n->len * 2; i < nlen; i++) { 2061 nint[i] = 0; 2062 } 2063 #endif 2064 conv_i32_to_d32(dn, nint, nlen); 2065 2066 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0); 2067 conv_i32_to_d32(d32r, prod, nlen); 2068 for (i = 1; i < apowerssize; i++) { 2069 mont_mulf_noconv(prod, d32r, apowers[i - 1], 2070 dt, dn, nint, nlen, dn0); 2071 conv_i32_to_d16(apowers[i], prod, nlen); 2072 } 2073 2074 #if (BIG_CHUNK_SIZE == 32) 2075 for (i = 0; i < tmp->len; i++) { 2076 prod[i] = tmp->value[i]; 2077 } 2078 for (; i < nlen + 1; i++) { 2079 prod[i] = 0; 2080 } 2081 #else 2082 for (i = 0; i < tmp->len; i++) { 2083 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL); 2084 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32); 2085 } 2086 for (i = tmp->len * 2; i < nlen + 1; i++) { 2087 prod[i] = 0; 2088 } 2089 #endif 2090 2091 bitind = nbits % BIG_CHUNK_SIZE; 2092 k = 0; 2093 l = 0; 2094 p = 0; 2095 bitcount = 0; 2096 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 2097 for (j = bitind - 1; j >= 0; j--) { 2098 bit = (e->value[i] >> j) & 1; 2099 if ((bitcount == 0) && (bit == 0)) { 2100 conv_i32_to_d32_and_d16(d32r, d16r, 2101 prod, nlen); 2102 mont_mulf_noconv(prod, d32r, d16r, 2103 dt, dn, nint, nlen, dn0); 2104 } else { 2105 bitcount++; 2106 p = p * 2 + bit; 2107 if (bit == 1) { 2108 k = k + l + 1; 2109 l = 0; 2110 } else { 2111 l++; 2112 } 2113 if (bitcount == groupbits) { 2114 for (m = 0; m < k; m++) { 2115 conv_i32_to_d32_and_d16(d32r, 2116 d16r, prod, nlen); 2117 mont_mulf_noconv(prod, d32r, 2118 d16r, dt, dn, nint, 2119 nlen, dn0); 2120 } 2121 conv_i32_to_d32(d32r, prod, nlen); 2122 mont_mulf_noconv(prod, d32r, 2123 apowers[p >> (l + 1)], 2124 dt, dn, nint, nlen, dn0); 2125 for (m = 0; m < l; m++) { 2126 conv_i32_to_d32_and_d16(d32r, 2127 d16r, prod, nlen); 2128 mont_mulf_noconv(prod, d32r, 2129 d16r, dt, dn, nint, 2130 nlen, dn0); 2131 } 2132 k = 0; 2133 l = 0; 2134 p = 0; 2135 bitcount = 0; 2136 } 2137 } 2138 } 2139 bitind = BIG_CHUNK_SIZE; 2140 } 2141 2142 for (m = 0; m < k; m++) { 2143 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2144 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2145 } 2146 if (p != 0) { 2147 conv_i32_to_d32(d32r, prod, nlen); 2148 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)], 2149 dt, dn, nint, nlen, dn0); 2150 } 2151 for (m = 0; m < l; m++) { 2152 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2153 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2154 } 2155 2156 #if (BIG_CHUNK_SIZE == 32) 2157 for (i = 0; i < nlen; i++) { 2158 result->value[i] = prod[i]; 2159 } 2160 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--) 2161 ; 2162 #else 2163 for (i = 0; i < nlen / 2; i++) { 2164 result->value[i] = (uint64_t)(prod[2 * i]) + 2165 (((uint64_t)(prod[2 * i + 1])) << 32); 2166 } 2167 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--) 2168 ; 2169 #endif 2170 result->len = i + 1; 2171 2172 ret: 2173 for (i = apowerssize - 1; i >= 0; i--) { 2174 if (apowers[i] != NULL) 2175 big_free(apowers[i], (2 * nlen + 1) * sizeof (double)); 2176 } 2177 if (d32r != NULL) { 2178 big_free(d32r, nlen * sizeof (double)); 2179 } 2180 if (d16r != NULL) { 2181 big_free(d16r, (2 * nlen + 1) * sizeof (double)); 2182 } 2183 if (prod != NULL) { 2184 big_free(prod, (nlen + 1) * sizeof (uint32_t)); 2185 } 2186 if (nint != NULL) { 2187 big_free(nint, nlen * sizeof (uint32_t)); 2188 } 2189 if (dt != NULL) { 2190 big_free(dt, (4 * nlen + 2) * sizeof (double)); 2191 } 2192 if (dn != NULL) { 2193 big_free(dn, nlen * sizeof (double)); 2194 } 2195 2196 #ifdef _KERNEL 2197 big_restorefp(fpu); 2198 #endif 2199 2200 return (err); 2201 } 2202 2203 #endif /* USE_FLOATING_POINT */ 2204 2205 2206 BIG_ERR_CODE 2207 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr, 2208 big_modexp_ncp_info_t *info) 2209 { 2210 BIGNUM ma, tmp, rr; 2211 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE]; 2212 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2213 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 2214 BIG_ERR_CODE err; 2215 BIG_CHUNK_TYPE n0; 2216 2217 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) != 2218 BIG_OK) { 2219 return (err); 2220 } 2221 ma.len = 1; 2222 ma.value[0] = 0; 2223 2224 if ((err = big_init1(&tmp, 2 * n->len + 1, 2225 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2226 goto ret1; 2227 } 2228 2229 /* clear the malloced bit to help cleanup */ 2230 rr.malloced = 0; 2231 2232 if (n_rr == NULL) { 2233 if ((err = big_init1(&rr, 2 * n->len + 1, 2234 rrvalue, arraysize(rrvalue))) != BIG_OK) { 2235 goto ret2; 2236 } 2237 if (big_mont_rr(&rr, n) != BIG_OK) { 2238 goto ret; 2239 } 2240 n_rr = &rr; 2241 } 2242 2243 n0 = big_n0(n->value[0]); 2244 2245 if (big_cmp_abs(a, n) > 0) { 2246 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) { 2247 goto ret; 2248 } 2249 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 2250 } else { 2251 err = big_mont_conv(&ma, a, n, n0, n_rr); 2252 } 2253 if (err != BIG_OK) { 2254 goto ret; 2255 } 2256 2257 tmp.len = 1; 2258 tmp.value[0] = 1; 2259 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) { 2260 goto ret; 2261 } 2262 2263 if ((info != NULL) && (info->func != NULL)) { 2264 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0, 2265 info->ncp, info->reqp); 2266 } else { 2267 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0); 2268 } 2269 if (err != BIG_OK) { 2270 goto ret; 2271 } 2272 2273 ma.value[0] = 1; 2274 ma.len = 1; 2275 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) { 2276 goto ret; 2277 } 2278 err = big_copy(result, &tmp); 2279 2280 ret: 2281 if (rr.malloced) { 2282 big_finish(&rr); 2283 } 2284 ret2: 2285 big_finish(&tmp); 2286 ret1: 2287 big_finish(&ma); 2288 2289 return (err); 2290 } 2291 2292 BIG_ERR_CODE 2293 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 2294 { 2295 return (big_modexp_ext(result, a, e, n, n_rr, NULL)); 2296 } 2297 2298 2299 BIG_ERR_CODE 2300 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2301 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2302 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info) 2303 { 2304 BIGNUM ap, aq, tmp; 2305 int alen, biglen, sign; 2306 BIG_ERR_CODE err; 2307 2308 if (p->len > q->len) { 2309 biglen = p->len; 2310 } else { 2311 biglen = q->len; 2312 } 2313 2314 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) { 2315 return (err); 2316 } 2317 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) { 2318 goto ret1; 2319 } 2320 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) { 2321 goto ret2; 2322 } 2323 2324 /* 2325 * check whether a is too short - to avoid timing attacks 2326 */ 2327 alen = a->len; 2328 while ((alen > p->len) && (a->value[alen - 1] == 0)) { 2329 alen--; 2330 } 2331 if (alen < p->len + q->len) { 2332 /* 2333 * a is too short, add p*q to it before 2334 * taking it modulo p and q 2335 * this will also affect timing, but this difference 2336 * does not depend on p or q, only on a 2337 * (in "normal" operation, this path will never be 2338 * taken, so it is not a performance penalty 2339 */ 2340 if ((err = big_mul(&tmp, p, q)) != BIG_OK) { 2341 goto ret; 2342 } 2343 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) { 2344 goto ret; 2345 } 2346 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) { 2347 goto ret; 2348 } 2349 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2350 goto ret; 2351 } 2352 } else { 2353 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) { 2354 goto ret; 2355 } 2356 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) { 2357 goto ret; 2358 } 2359 } 2360 2361 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) != 2362 BIG_OK) { 2363 goto ret; 2364 } 2365 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) != 2366 BIG_OK) { 2367 goto ret; 2368 } 2369 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) { 2370 goto ret; 2371 } 2372 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) { 2373 goto ret; 2374 } 2375 sign = tmp.sign; 2376 tmp.sign = 1; 2377 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2378 goto ret; 2379 } 2380 if ((sign == -1) && (!big_is_zero(&aq))) { 2381 (void) big_sub_pos(&aq, q, &aq); 2382 } 2383 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) { 2384 goto ret; 2385 } 2386 err = big_add_abs(result, &ap, &tmp); 2387 2388 ret: 2389 big_finish(&tmp); 2390 ret2: 2391 big_finish(&aq); 2392 ret1: 2393 big_finish(&ap); 2394 2395 return (err); 2396 } 2397 2398 2399 BIG_ERR_CODE 2400 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2401 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2402 BIGNUM *p_rr, BIGNUM *q_rr) 2403 { 2404 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1, 2405 p, q, pinvmodq, p_rr, q_rr, NULL)); 2406 } 2407 2408 2409 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1}; 2410 BIGNUM big_One = {1, 1, 1, 0, onearr}; 2411 2412 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2}; 2413 BIGNUM big_Two = {1, 1, 1, 0, twoarr}; 2414 2415 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4}; 2416 static BIGNUM big_Four = {1, 1, 1, 0, fourarr}; 2417 2418 2419 BIG_ERR_CODE 2420 big_sqrt_pos(BIGNUM *result, BIGNUM *n) 2421 { 2422 BIGNUM *high, *low, *mid, *t; 2423 BIGNUM t1, t2, t3, prod; 2424 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2425 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2426 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2427 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE]; 2428 int i, diff; 2429 uint32_t nbits, nrootbits, highbits; 2430 BIG_ERR_CODE err; 2431 2432 nbits = big_numbits(n); 2433 2434 if ((err = big_init1(&t1, n->len + 1, 2435 t1value, arraysize(t1value))) != BIG_OK) 2436 return (err); 2437 if ((err = big_init1(&t2, n->len + 1, 2438 t2value, arraysize(t2value))) != BIG_OK) 2439 goto ret1; 2440 if ((err = big_init1(&t3, n->len + 1, 2441 t3value, arraysize(t3value))) != BIG_OK) 2442 goto ret2; 2443 if ((err = big_init1(&prod, n->len + 1, 2444 prodvalue, arraysize(prodvalue))) != BIG_OK) 2445 goto ret3; 2446 2447 nrootbits = (nbits + 1) / 2; 2448 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1; 2449 for (i = 0; i < t1.len; i++) { 2450 t1.value[i] = 0; 2451 t2.value[i] = BIG_CHUNK_ALLBITS; 2452 } 2453 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1); 2454 if (highbits == BIG_CHUNK_SIZE) { 2455 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT; 2456 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS; 2457 } else { 2458 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1); 2459 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1; 2460 } 2461 2462 high = &t2; 2463 low = &t1; 2464 mid = &t3; 2465 2466 if ((err = big_mul(&prod, high, high)) != BIG_OK) { 2467 goto ret; 2468 } 2469 diff = big_cmp_abs(&prod, n); 2470 if (diff <= 0) { 2471 err = big_copy(result, high); 2472 goto ret; 2473 } 2474 2475 (void) big_sub_pos(mid, high, low); 2476 while (big_cmp_abs(&big_One, mid) != 0) { 2477 (void) big_add_abs(mid, high, low); 2478 (void) big_half_pos(mid, mid); 2479 if ((err = big_mul(&prod, mid, mid)) != BIG_OK) 2480 goto ret; 2481 diff = big_cmp_abs(&prod, n); 2482 if (diff > 0) { 2483 t = high; 2484 high = mid; 2485 mid = t; 2486 } else if (diff < 0) { 2487 t = low; 2488 low = mid; 2489 mid = t; 2490 } else { 2491 err = big_copy(result, low); 2492 goto ret; 2493 } 2494 (void) big_sub_pos(mid, high, low); 2495 } 2496 2497 err = big_copy(result, low); 2498 ret: 2499 if (prod.malloced) big_finish(&prod); 2500 ret3: 2501 if (t3.malloced) big_finish(&t3); 2502 ret2: 2503 if (t2.malloced) big_finish(&t2); 2504 ret1: 2505 if (t1.malloced) big_finish(&t1); 2506 2507 return (err); 2508 } 2509 2510 2511 BIG_ERR_CODE 2512 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm) 2513 { 2514 BIGNUM *t, *tmp2, *m, *n; 2515 BIGNUM t1, t2, t3; 2516 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2517 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2518 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2519 int len, err; 2520 2521 if (big_is_zero(nn) || 2522 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) { 2523 *jac = 0; 2524 return (BIG_OK); 2525 } 2526 2527 if (nn->len > mm->len) { 2528 len = nn->len; 2529 } else { 2530 len = mm->len; 2531 } 2532 2533 if ((err = big_init1(&t1, len, 2534 t1value, arraysize(t1value))) != BIG_OK) { 2535 return (err); 2536 } 2537 if ((err = big_init1(&t2, len, 2538 t2value, arraysize(t2value))) != BIG_OK) { 2539 goto ret1; 2540 } 2541 if ((err = big_init1(&t3, len, 2542 t3value, arraysize(t3value))) != BIG_OK) { 2543 goto ret2; 2544 } 2545 2546 n = &t1; 2547 m = &t2; 2548 tmp2 = &t3; 2549 2550 (void) big_copy(n, nn); 2551 (void) big_copy(m, mm); 2552 2553 *jac = 1; 2554 while (big_cmp_abs(&big_One, m) != 0) { 2555 if (big_is_zero(n)) { 2556 *jac = 0; 2557 goto ret; 2558 } 2559 if ((m->value[0] & 1) == 0) { 2560 if (((n->value[0] & 7) == 3) || 2561 ((n->value[0] & 7) == 5)) 2562 *jac = -*jac; 2563 (void) big_half_pos(m, m); 2564 } else if ((n->value[0] & 1) == 0) { 2565 if (((m->value[0] & 7) == 3) || 2566 ((m->value[0] & 7) == 5)) 2567 *jac = -*jac; 2568 (void) big_half_pos(n, n); 2569 } else { 2570 if (((m->value[0] & 3) == 3) && 2571 ((n->value[0] & 3) == 3)) { 2572 *jac = -*jac; 2573 } 2574 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) { 2575 goto ret; 2576 } 2577 t = tmp2; 2578 tmp2 = m; 2579 m = n; 2580 n = t; 2581 } 2582 } 2583 err = BIG_OK; 2584 2585 ret: 2586 if (t3.malloced) big_finish(&t3); 2587 ret2: 2588 if (t2.malloced) big_finish(&t2); 2589 ret1: 2590 if (t1.malloced) big_finish(&t1); 2591 2592 return (err); 2593 } 2594 2595 2596 BIG_ERR_CODE 2597 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n) 2598 { 2599 int i; 2600 uint32_t m, w; 2601 BIG_CHUNK_TYPE bit; 2602 BIGNUM ki, tmp, tmp2; 2603 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE]; 2604 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2605 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 2606 BIG_ERR_CODE err; 2607 2608 if (big_cmp_abs(k, &big_One) == 0) { 2609 (void) big_copy(Lk, p); 2610 (void) big_copy(Lkminus1, &big_Two); 2611 return (BIG_OK); 2612 } 2613 2614 if ((err = big_init1(&ki, k->len + 1, 2615 kivalue, arraysize(kivalue))) != BIG_OK) 2616 return (err); 2617 2618 if ((err = big_init1(&tmp, 2 * n->len + 1, 2619 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2620 goto ret1; 2621 2622 if ((err = big_init1(&tmp2, n->len, 2623 tmp2value, arraysize(tmp2value))) != BIG_OK) 2624 goto ret2; 2625 2626 m = big_numbits(k); 2627 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1; 2628 w = (m - 1) / BIG_CHUNK_SIZE; 2629 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE); 2630 for (i = 0; i < ki.len; i++) { 2631 ki.value[i] = 0; 2632 } 2633 ki.value[ki.len - 1] = bit; 2634 if (big_cmp_abs(k, &ki) != 0) { 2635 (void) big_double(&ki, &ki); 2636 } 2637 (void) big_sub_pos(&ki, &ki, k); 2638 2639 (void) big_copy(Lk, p); 2640 (void) big_copy(Lkminus1, &big_Two); 2641 2642 for (i = 0; i < m; i++) { 2643 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) { 2644 goto ret; 2645 } 2646 (void) big_add_abs(&tmp, &tmp, n); 2647 (void) big_sub_pos(&tmp, &tmp, p); 2648 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) { 2649 goto ret; 2650 } 2651 if ((ki.value[w] & bit) != 0) { 2652 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) != 2653 BIG_OK) { 2654 goto ret; 2655 } 2656 (void) big_add_abs(&tmp, &tmp, n); 2657 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2658 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) != 2659 BIG_OK) { 2660 goto ret; 2661 } 2662 (void) big_copy(Lk, &tmp2); 2663 } else { 2664 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) { 2665 goto ret; 2666 } 2667 (void) big_add_abs(&tmp, &tmp, n); 2668 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2669 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) { 2670 goto ret; 2671 } 2672 (void) big_copy(Lkminus1, &tmp2); 2673 } 2674 bit = bit >> 1; 2675 if (bit == 0) { 2676 bit = BIG_CHUNK_HIGHBIT; 2677 w--; 2678 } 2679 } 2680 2681 err = BIG_OK; 2682 2683 ret: 2684 if (tmp2.malloced) big_finish(&tmp2); 2685 ret2: 2686 if (tmp.malloced) big_finish(&tmp); 2687 ret1: 2688 if (ki.malloced) big_finish(&ki); 2689 2690 return (err); 2691 } 2692 2693 2694 BIG_ERR_CODE 2695 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info) 2696 { 2697 BIGNUM o, nminus1, tmp, Lkminus1, Lk; 2698 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE]; 2699 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE]; 2700 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2701 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE]; 2702 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE]; 2703 BIG_ERR_CODE err; 2704 int e, i, jac; 2705 2706 if (big_cmp_abs(n, &big_One) == 0) { 2707 return (BIG_FALSE); 2708 } 2709 if (big_cmp_abs(n, &big_Two) == 0) { 2710 return (BIG_TRUE); 2711 } 2712 if ((n->value[0] & 1) == 0) { 2713 return (BIG_FALSE); 2714 } 2715 2716 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != 2717 BIG_OK) { 2718 return (err); 2719 } 2720 2721 if ((err = big_init1(&nminus1, n->len, 2722 nminus1value, arraysize(nminus1value))) != BIG_OK) { 2723 goto ret1; 2724 } 2725 2726 if ((err = big_init1(&tmp, 2 * n->len, 2727 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2728 goto ret2; 2729 } 2730 2731 if ((err = big_init1(&Lkminus1, n->len, 2732 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) { 2733 goto ret3; 2734 } 2735 2736 if ((err = big_init1(&Lk, n->len, 2737 Lkvalue, arraysize(Lkvalue))) != BIG_OK) { 2738 goto ret4; 2739 } 2740 2741 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */ 2742 (void) big_copy(&nminus1, &o); /* cannot fail */ 2743 e = 0; 2744 while ((o.value[0] & 1) == 0) { 2745 e++; 2746 (void) big_half_pos(&o, &o); /* cannot fail */ 2747 } 2748 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) != 2749 BIG_OK) { 2750 goto ret; 2751 } 2752 2753 i = 0; 2754 while ((i < e) && 2755 (big_cmp_abs(&tmp, &big_One) != 0) && 2756 (big_cmp_abs(&tmp, &nminus1) != 0)) { 2757 if ((err = 2758 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) != 2759 BIG_OK) 2760 goto ret; 2761 i++; 2762 } 2763 2764 if (!((big_cmp_abs(&tmp, &nminus1) == 0) || 2765 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) { 2766 err = BIG_FALSE; 2767 goto ret; 2768 } 2769 2770 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) { 2771 goto ret; 2772 } 2773 2774 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) { 2775 goto ret; 2776 } 2777 if (big_cmp_abs(&tmp, n) == 0) { 2778 err = BIG_FALSE; 2779 goto ret; 2780 } 2781 2782 (void) big_copy(&o, &big_Two); 2783 do { 2784 (void) big_add_abs(&o, &o, &big_One); 2785 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) { 2786 goto ret; 2787 } 2788 (void) big_sub_pos(&tmp, &tmp, &big_Four); 2789 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) { 2790 goto ret; 2791 } 2792 } while (jac != -1); 2793 2794 (void) big_add_abs(&tmp, n, &big_One); 2795 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) { 2796 goto ret; 2797 } 2798 2799 if ((big_cmp_abs(&Lkminus1, &o) == 0) && 2800 (big_cmp_abs(&Lk, &big_Two) == 0)) { 2801 err = BIG_TRUE; 2802 } else { 2803 err = BIG_FALSE; 2804 } 2805 2806 ret: 2807 if (Lk.malloced) big_finish(&Lk); 2808 ret4: 2809 if (Lkminus1.malloced) big_finish(&Lkminus1); 2810 ret3: 2811 if (tmp.malloced) big_finish(&tmp); 2812 ret2: 2813 if (nminus1.malloced) big_finish(&nminus1); 2814 ret1: 2815 if (o.malloced) big_finish(&o); 2816 2817 return (err); 2818 } 2819 2820 2821 BIG_ERR_CODE 2822 big_isprime_pos(BIGNUM *n) 2823 { 2824 return (big_isprime_pos_ext(n, NULL)); 2825 } 2826 2827 2828 #define SIEVESIZE 1000 2829 2830 2831 BIG_ERR_CODE 2832 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info) 2833 { 2834 static const uint32_t smallprimes[] = { 2835 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 2836 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 }; 2837 BIG_ERR_CODE err; 2838 int sieve[SIEVESIZE]; 2839 int i; 2840 uint32_t off, p; 2841 2842 if ((err = big_copy(result, n)) != BIG_OK) { 2843 return (err); 2844 } 2845 result->value[0] |= 1; 2846 /* CONSTCOND */ 2847 while (1) { 2848 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0; 2849 for (i = 0; 2850 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) { 2851 p = smallprimes[i]; 2852 off = big_modhalf_pos(result, p); 2853 off = p - off; 2854 if ((off % 2) == 1) { 2855 off = off + p; 2856 } 2857 off = off / 2; 2858 while (off < SIEVESIZE) { 2859 sieve[off] = 1; 2860 off = off + p; 2861 } 2862 } 2863 2864 for (i = 0; i < SIEVESIZE; i++) { 2865 if (sieve[i] == 0) { 2866 err = big_isprime_pos_ext(result, info); 2867 if (err != BIG_FALSE) { 2868 if (err != BIG_TRUE) { 2869 return (err); 2870 } else { 2871 goto out; 2872 } 2873 } 2874 2875 } 2876 if ((err = big_add_abs(result, result, &big_Two)) != 2877 BIG_OK) { 2878 return (err); 2879 } 2880 } 2881 } 2882 2883 out: 2884 return (BIG_OK); 2885 } 2886 2887 2888 BIG_ERR_CODE 2889 big_nextprime_pos(BIGNUM *result, BIGNUM *n) 2890 { 2891 return (big_nextprime_pos_ext(result, n, NULL)); 2892 } 2893 2894 2895 BIG_ERR_CODE 2896 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n) 2897 { 2898 BIG_ERR_CODE err; 2899 2900 2901 if ((err = big_copy(result, n)) != BIG_OK) 2902 return (err); 2903 result->value[0] |= 1; 2904 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) { 2905 if (err != BIG_FALSE) 2906 return (err); 2907 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK) 2908 return (err); 2909 } 2910 return (BIG_OK); 2911 } 2912 2913 2914 /* 2915 * given m and e, computes the rest in the equation 2916 * gcd(m, e) = cm * m + ce * e 2917 */ 2918 BIG_ERR_CODE 2919 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e) 2920 { 2921 BIGNUM *xi, *ri, *riminus1, *riminus2, *t; 2922 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1; 2923 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp; 2924 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2925 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2926 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2927 BIG_CHUNK_TYPE t4value[BIGTMPSIZE]; 2928 BIG_CHUNK_TYPE t5value[BIGTMPSIZE]; 2929 BIG_CHUNK_TYPE t6value[BIGTMPSIZE]; 2930 BIG_CHUNK_TYPE t7value[BIGTMPSIZE]; 2931 BIG_CHUNK_TYPE t8value[BIGTMPSIZE]; 2932 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2933 BIG_ERR_CODE err; 2934 int len; 2935 2936 if (big_cmp_abs(m, e) >= 0) { 2937 len = m->len; 2938 } else { 2939 len = e->len; 2940 } 2941 2942 if ((err = big_init1(&t1, len, 2943 t1value, arraysize(t1value))) != BIG_OK) { 2944 return (err); 2945 } 2946 if ((err = big_init1(&t2, len, 2947 t2value, arraysize(t2value))) != BIG_OK) { 2948 goto ret1; 2949 } 2950 if ((err = big_init1(&t3, len, 2951 t3value, arraysize(t3value))) != BIG_OK) { 2952 goto ret2; 2953 } 2954 if ((err = big_init1(&t4, len, 2955 t4value, arraysize(t3value))) != BIG_OK) { 2956 goto ret3; 2957 } 2958 if ((err = big_init1(&t5, len, 2959 t5value, arraysize(t5value))) != BIG_OK) { 2960 goto ret4; 2961 } 2962 if ((err = big_init1(&t6, len, 2963 t6value, arraysize(t6value))) != BIG_OK) { 2964 goto ret5; 2965 } 2966 if ((err = big_init1(&t7, len, 2967 t7value, arraysize(t7value))) != BIG_OK) { 2968 goto ret6; 2969 } 2970 if ((err = big_init1(&t8, len, 2971 t8value, arraysize(t8value))) != BIG_OK) { 2972 goto ret7; 2973 } 2974 2975 if ((err = big_init1(&tmp, 2 * len, 2976 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2977 goto ret8; 2978 } 2979 2980 ri = &t1; 2981 ri->value[0] = 1; 2982 ri->len = 1; 2983 xi = &t2; 2984 riminus1 = &t3; 2985 riminus2 = &t4; 2986 vmi = &t5; 2987 vei = &t6; 2988 vmiminus1 = &t7; 2989 veiminus1 = &t8; 2990 2991 (void) big_copy(vmiminus1, &big_One); 2992 (void) big_copy(vmi, &big_One); 2993 (void) big_copy(veiminus1, &big_One); 2994 (void) big_copy(xi, &big_One); 2995 vei->len = 1; 2996 vei->value[0] = 0; 2997 2998 (void) big_copy(riminus1, m); 2999 (void) big_copy(ri, e); 3000 3001 while (!big_is_zero(ri)) { 3002 t = riminus2; 3003 riminus2 = riminus1; 3004 riminus1 = ri; 3005 ri = t; 3006 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) { 3007 goto ret; 3008 } 3009 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) { 3010 goto ret; 3011 } 3012 t = vmiminus1; 3013 vmiminus1 = vmi; 3014 vmi = t; 3015 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) { 3016 goto ret; 3017 } 3018 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) { 3019 goto ret; 3020 } 3021 t = veiminus1; 3022 veiminus1 = vei; 3023 vei = t; 3024 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != 3025 BIG_OK) { 3026 goto ret; 3027 } 3028 } 3029 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) { 3030 goto ret; 3031 } 3032 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) { 3033 goto ret; 3034 } 3035 if (ce != NULL) { 3036 err = big_copy(ce, vei); 3037 } 3038 ret: 3039 if (tmp.malloced) big_finish(&tmp); 3040 ret8: 3041 if (t8.malloced) big_finish(&t8); 3042 ret7: 3043 if (t7.malloced) big_finish(&t7); 3044 ret6: 3045 if (t6.malloced) big_finish(&t6); 3046 ret5: 3047 if (t5.malloced) big_finish(&t5); 3048 ret4: 3049 if (t4.malloced) big_finish(&t4); 3050 ret3: 3051 if (t3.malloced) big_finish(&t3); 3052 ret2: 3053 if (t2.malloced) big_finish(&t2); 3054 ret1: 3055 if (t1.malloced) big_finish(&t1); 3056 3057 return (err); 3058 } 3059