1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #define big_div_pos_fast big_div_pos 29 30 #include "bignum.h" 31 32 /* 33 * Configuration guide 34 * ------------------- 35 * 36 * There are 4 preprocessor symbols used to configure the bignum 37 * implementation. This file contains no logic to configure based on 38 * processor; we leave that to the Makefiles to specify. 39 * 40 * USE_FLOATING_POINT 41 * Meaning: There is support for a fast floating-point implementation of 42 * Montgomery multiply. 43 * 44 * PSR_MUL 45 * Meaning: There are processor-specific versions of the low level 46 * functions to implement big_mul. Those functions are: big_mul_set_vec, 47 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 48 * for all 4 functions. You cannot pick and choose which subset of these 49 * functions to support; that would lead to a rat's nest of #ifdefs. 50 * 51 * HWCAP 52 * Meaning: Call multiply support functions through a function pointer. 53 * On x86, there are multiple implementations for differnt hardware 54 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 55 * a function is first used. So, the support functions are called through 56 * a function pointer. There is no need for that on Sparc, because there 57 * is only one implementation; support functions are called directly. 58 * Later, if there were some new VIS instruction, or something, and a 59 * run-time test were needed, rather than variant kernel modules and 60 * libraries, then HWCAP would be defined for Sparc, as well. 61 * 62 * UMUL64 63 * Meaning: It is safe to use generic C code that assumes the existence 64 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 65 * then the generic code for big_mul_add_vec() must necessarily be very slow, 66 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 67 * 68 */ 69 70 71 #ifdef _KERNEL 72 #include <sys/ddi.h> 73 #include <sys/mdesc.h> 74 #include <sys/crypto/common.h> 75 76 #include <sys/types.h> 77 #include <sys/kmem.h> 78 #include <sys/param.h> 79 #include <sys/sunddi.h> 80 81 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 82 #define big_free(ptr, size) kmem_free(ptr, size) 83 84 void * 85 big_realloc(void *from, size_t oldsize, size_t newsize) 86 { 87 void *rv; 88 89 rv = kmem_alloc(newsize, KM_NOSLEEP); 90 if (rv != NULL) 91 bcopy(from, rv, oldsize); 92 kmem_free(from, oldsize); 93 return (rv); 94 } 95 96 #else /* _KERNEL */ 97 98 #include <stdlib.h> 99 #include <stdio.h> 100 #include <assert.h> 101 #define ASSERT assert 102 103 #ifndef MALLOC_DEBUG 104 105 #define big_malloc(size) malloc(size) 106 #define big_free(ptr, size) free(ptr) 107 108 #else 109 110 void 111 big_free(void *ptr, size_t size) 112 { 113 printf("freed %d bytes at %p\n", size, ptr); 114 free(ptr); 115 } 116 117 void * 118 big_malloc(size_t size) 119 { 120 void *rv; 121 rv = malloc(size); 122 printf("malloced %d bytes, addr:%p\n", size, rv); 123 return (rv); 124 } 125 #endif /* MALLOC_DEBUG */ 126 127 #define big_realloc(x, y, z) realloc((x), (z)) 128 129 void 130 printbignum(char *aname, BIGNUM *a) 131 { 132 int i; 133 134 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 135 for (i = a->len - 1; i >= 0; i--) { 136 #ifdef BIGNUM_CHUNK_32 137 (void) printf("%08x ", a->value[i]); 138 if ((i % 8 == 0) && (i != 0)) { 139 (void) printf("\n"); 140 } 141 #else 142 (void) printf("%08x %08x ", (uint32_t)((a->value[i]) >> 32), 143 (uint32_t)((a->value[i]) & 0xffffffff)); 144 if ((i % 4 == 0) && (i != 0)) { 145 (void) printf("\n"); 146 } 147 #endif 148 } 149 (void) printf("\n"); 150 } 151 152 #endif /* _KERNEL */ 153 154 155 /* size in BIG_CHUNK_SIZE-bit words */ 156 BIG_ERR_CODE 157 big_init(BIGNUM *number, int size) 158 { 159 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 160 if (number->value == NULL) { 161 return (BIG_NO_MEM); 162 } 163 number->size = size; 164 number->len = 0; 165 number->sign = 1; 166 number->malloced = 1; 167 return (BIG_OK); 168 } 169 170 /* size in BIG_CHUNK_SIZE-bit words */ 171 BIG_ERR_CODE 172 big_init1(BIGNUM *number, int size, BIG_CHUNK_TYPE *buf, int bufsize) 173 { 174 if ((buf == NULL) || (size > bufsize)) { 175 number->value = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 176 if (number->value == NULL) { 177 return (BIG_NO_MEM); 178 } 179 number->size = size; 180 number->malloced = 1; 181 } else { 182 number->value = buf; 183 number->size = bufsize; 184 number->malloced = 0; 185 } 186 number->len = 0; 187 number->sign = 1; 188 189 return (BIG_OK); 190 } 191 192 void 193 big_finish(BIGNUM *number) 194 { 195 if (number->malloced == 1) { 196 big_free(number->value, 197 sizeof (BIG_CHUNK_TYPE) * number->size); 198 number->malloced = 0; 199 } 200 } 201 202 203 /* 204 * bn->size should be at least 205 * (len + sizeof (BIG_CHUNK_TYPE) - 1) / sizeof (BIG_CHUNK_TYPE) bytes 206 * converts from byte-big-endian format to bignum format (words in 207 * little endian order, but bytes within the words big endian) 208 */ 209 void 210 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 211 { 212 int i, j, offs; 213 BIG_CHUNK_TYPE word; 214 uchar_t *knwordp; 215 216 #ifdef _LP64 217 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 218 bn->len = (uint32_t)len / sizeof (BIG_CHUNK_TYPE); 219 220 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) { 221 #else /* !_LP64 */ 222 offs = len % sizeof (BIG_CHUNK_TYPE); 223 bn->len = len / sizeof (BIG_CHUNK_TYPE); 224 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) { 225 #endif /* _LP64 */ 226 knwordp = &(kn[len - sizeof (BIG_CHUNK_TYPE) * (i + 1)]); 227 word = knwordp[0]; 228 for (j = 1; j < sizeof (BIG_CHUNK_TYPE); j++) { 229 word = (word << 8)+ knwordp[j]; 230 } 231 bn->value[i] = word; 232 } 233 if (offs > 0) { 234 word = kn[0]; 235 for (i = 1; i < offs; i++) word = (word << 8) + kn[i]; 236 bn->value[bn->len++] = word; 237 } 238 while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) { 239 bn->len --; 240 } 241 } 242 243 /* 244 * copies the least significant len bytes if 245 * len < bn->len * sizeof (BIG_CHUNK_TYPE) 246 * converts from bignum format to byte-big-endian format. 247 * bignum format is words of type BIG_CHUNK_TYPE in little endian order. 248 */ 249 void 250 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 251 { 252 int i, j, offs; 253 BIG_CHUNK_TYPE word; 254 255 if (len < sizeof (BIG_CHUNK_TYPE) * bn->len) { 256 #ifdef _LP64 257 for (i = 0; i < (uint32_t)len / sizeof (BIG_CHUNK_TYPE); i++) { 258 #else /* !_LP64 */ 259 for (i = 0; i < len / sizeof (BIG_CHUNK_TYPE); i++) { 260 #endif /* _LP64 */ 261 word = bn->value[i]; 262 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) { 263 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] = 264 word & 0xff; 265 word = word >> 8; 266 } 267 } 268 #ifdef _LP64 269 offs = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 270 #else /* !_LP64 */ 271 offs = len % sizeof (BIG_CHUNK_TYPE); 272 #endif /* _LP64 */ 273 if (offs > 0) { 274 word = bn->value[len / sizeof (BIG_CHUNK_TYPE)]; 275 #ifdef _LP64 276 for (i = (uint32_t)len % sizeof (BIG_CHUNK_TYPE); 277 i > 0; i --) { 278 #else /* !_LP64 */ 279 for (i = len % sizeof (BIG_CHUNK_TYPE); 280 i > 0; i --) { 281 #endif /* _LP64 */ 282 kn[i - 1] = word & 0xff; 283 word = word >> 8; 284 } 285 } 286 } else { 287 for (i = 0; i < bn->len; i++) { 288 word = bn->value[i]; 289 for (j = 0; j < sizeof (BIG_CHUNK_TYPE); j++) { 290 kn[len - sizeof (BIG_CHUNK_TYPE) * i - j - 1] = 291 word & 0xff; 292 word = word >> 8; 293 } 294 } 295 #ifdef _LP64 296 for (i = 0; 297 i < (uint32_t)len - sizeof (BIG_CHUNK_TYPE) * bn->len; 298 i++) { 299 #else /* !_LP64 */ 300 for (i = 0; i < len - sizeof (BIG_CHUNK_TYPE) * bn->len; i++) { 301 #endif /* _LP64 */ 302 kn[i] = 0; 303 } 304 } 305 } 306 307 308 int 309 big_bitlength(BIGNUM *a) 310 { 311 int l = 0, b = 0; 312 BIG_CHUNK_TYPE c; 313 314 l = a->len - 1; 315 while ((l > 0) && (a->value[l] == 0)) { 316 l--; 317 } 318 b = sizeof (BIG_CHUNK_TYPE) * BITSINBYTE; 319 c = a->value[l]; 320 while ((b > 1) && ((c & BIG_CHUNK_HIGHBIT) == 0)) { 321 c = c << 1; 322 b--; 323 } 324 325 return (l * sizeof (BIG_CHUNK_TYPE) * BITSINBYTE + b); 326 } 327 328 329 BIG_ERR_CODE 330 big_copy(BIGNUM *dest, BIGNUM *src) 331 { 332 BIG_CHUNK_TYPE *newptr; 333 int i, len; 334 335 len = src->len; 336 while ((len > 1) && (src->value[len - 1] == 0)) { 337 len--; 338 } 339 src->len = len; 340 if (dest->size < len) { 341 if (dest->malloced == 1) { 342 newptr = (BIG_CHUNK_TYPE *)big_realloc(dest->value, 343 sizeof (BIG_CHUNK_TYPE) * dest->size, 344 sizeof (BIG_CHUNK_TYPE) * len); 345 } else { 346 newptr = (BIG_CHUNK_TYPE *) 347 big_malloc(sizeof (BIG_CHUNK_TYPE) * len); 348 if (newptr != NULL) { 349 dest->malloced = 1; 350 } 351 } 352 if (newptr == NULL) { 353 return (BIG_NO_MEM); 354 } 355 dest->value = newptr; 356 dest->size = len; 357 } 358 dest->len = len; 359 dest->sign = src->sign; 360 for (i = 0; i < len; i++) { 361 dest->value[i] = src->value[i]; 362 } 363 364 return (BIG_OK); 365 } 366 367 368 BIG_ERR_CODE 369 big_extend(BIGNUM *number, int size) 370 { 371 BIG_CHUNK_TYPE *newptr; 372 int i; 373 374 if (number->size >= size) 375 return (BIG_OK); 376 if (number->malloced) { 377 number->value = big_realloc(number->value, 378 sizeof (BIG_CHUNK_TYPE) * number->size, 379 sizeof (BIG_CHUNK_TYPE) * size); 380 } else { 381 newptr = big_malloc(sizeof (BIG_CHUNK_TYPE) * size); 382 if (newptr != NULL) { 383 for (i = 0; i < number->size; i++) { 384 newptr[i] = number->value[i]; 385 } 386 } 387 number->value = newptr; 388 } 389 390 if (number->value == NULL) { 391 return (BIG_NO_MEM); 392 } 393 394 number->size = size; 395 number->malloced = 1; 396 return (BIG_OK); 397 } 398 399 400 /* returns 1 if n == 0 */ 401 int 402 big_is_zero(BIGNUM *n) 403 { 404 int i, result; 405 406 result = 1; 407 for (i = 0; i < n->len; i++) { 408 if (n->value[i] != 0) { 409 result = 0; 410 } 411 } 412 return (result); 413 } 414 415 416 BIG_ERR_CODE 417 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 418 { 419 int i, shorter, longer; 420 BIG_CHUNK_TYPE cy, ai; 421 BIG_CHUNK_TYPE *r, *a, *b, *c; 422 BIG_ERR_CODE err; 423 BIGNUM *longerarg; 424 425 if (aa->len > bb->len) { 426 shorter = bb->len; 427 longer = aa->len; 428 longerarg = aa; 429 } else { 430 shorter = aa->len; 431 longer = bb->len; 432 longerarg = bb; 433 } 434 if (result->size < longer + 1) { 435 err = big_extend(result, longer + 1); 436 if (err != BIG_OK) { 437 return (err); 438 } 439 } 440 441 r = result->value; 442 a = aa->value; 443 b = bb->value; 444 c = longerarg->value; 445 cy = 0; 446 for (i = 0; i < shorter; i++) { 447 ai = a[i]; 448 r[i] = ai + b[i] + cy; 449 if (r[i] > ai) { 450 cy = 0; 451 } else if (r[i] < ai) { 452 cy = 1; 453 } 454 } 455 for (; i < longer; i++) { 456 ai = c[i]; 457 r[i] = ai + cy; 458 if (r[i] >= ai) { 459 cy = 0; 460 } 461 } 462 if (cy == 1) { 463 r[i] = cy; 464 result->len = longer + 1; 465 } else { 466 result->len = longer; 467 } 468 result->sign = 1; 469 return (BIG_OK); 470 } 471 472 473 /* caller must make sure that result has at least len words allocated */ 474 void 475 big_sub_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, BIG_CHUNK_TYPE *b, int len) 476 { 477 int i; 478 BIG_CHUNK_TYPE cy, ai; 479 480 cy = 1; 481 for (i = 0; i < len; i++) { 482 ai = a[i]; 483 r[i] = ai + (~b[i]) + cy; 484 if (r[i] > ai) { 485 cy = 0; 486 } else if (r[i] < ai) { 487 cy = 1; 488 } 489 } 490 } 491 492 493 /* result=aa-bb it is assumed that aa>=bb */ 494 BIG_ERR_CODE 495 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 496 { 497 int i, shorter; 498 BIG_CHUNK_TYPE cy = 1, ai; 499 BIG_CHUNK_TYPE *r, *a, *b; 500 BIG_ERR_CODE err = BIG_OK; 501 502 if (aa->len > bb->len) { 503 shorter = bb->len; 504 } else { 505 shorter = aa->len; 506 } 507 if (result->size < aa->len) { 508 err = big_extend(result, aa->len); 509 if (err != BIG_OK) { 510 return (err); 511 } 512 } 513 514 r = result->value; 515 a = aa->value; 516 b = bb->value; 517 result->len = aa->len; 518 cy = 1; 519 for (i = 0; i < shorter; i++) { 520 ai = a[i]; 521 r[i] = ai + (~b[i]) + cy; 522 if (r[i] > ai) { 523 cy = 0; 524 } else if (r[i] < ai) { 525 cy = 1; 526 } 527 } 528 for (; i < aa->len; i++) { 529 ai = a[i]; 530 r[i] = ai + (~0) + cy; 531 if (r[i] < ai) { 532 cy = 1; 533 } 534 } 535 result->sign = 1; 536 537 if (cy == 0) { 538 return (BIG_INVALID_ARGS); 539 } else { 540 return (BIG_OK); 541 } 542 } 543 544 545 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 546 int 547 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 548 { 549 int i; 550 551 if (aa->len > bb->len) { 552 for (i = aa->len - 1; i > bb->len - 1; i--) { 553 if (aa->value[i] > 0) { 554 return (1); 555 } 556 } 557 } else if (aa->len < bb->len) { 558 for (i = bb->len - 1; i > aa->len - 1; i--) { 559 if (bb->value[i] > 0) { 560 return (-1); 561 } 562 } 563 } else { 564 i = aa->len-1; 565 } 566 for (; i >= 0; i--) { 567 if (aa->value[i] > bb->value[i]) { 568 return (1); 569 } else if (aa->value[i] < bb->value[i]) { 570 return (-1); 571 } 572 } 573 574 return (0); 575 } 576 577 578 BIG_ERR_CODE 579 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 580 { 581 BIG_ERR_CODE err; 582 583 if ((bb->sign == -1) && (aa->sign == 1)) { 584 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 585 return (err); 586 } 587 result->sign = 1; 588 } else if ((aa->sign == -1) && (bb->sign == 1)) { 589 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 590 return (err); 591 } 592 result->sign = -1; 593 } else if ((aa->sign == 1) && (bb->sign == 1)) { 594 if (big_cmp_abs(aa, bb) >= 0) { 595 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 596 return (err); 597 } 598 result->sign = 1; 599 } else { 600 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 601 return (err); 602 } 603 result->sign = -1; 604 } 605 } else { 606 if (big_cmp_abs(aa, bb) >= 0) { 607 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 608 return (err); 609 } 610 result->sign = -1; 611 } else { 612 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 613 return (err); 614 } 615 result->sign = 1; 616 } 617 } 618 return (BIG_OK); 619 } 620 621 622 BIG_ERR_CODE 623 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 624 { 625 BIG_ERR_CODE err; 626 627 if ((bb->sign == -1) && (aa->sign == -1)) { 628 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 629 return (err); 630 } 631 result->sign = -1; 632 } else if ((aa->sign == 1) && (bb->sign == 1)) { 633 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) { 634 return (err); 635 } 636 result->sign = 1; 637 } else if ((aa->sign == 1) && (bb->sign == -1)) { 638 if (big_cmp_abs(aa, bb) >= 0) { 639 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 640 return (err); 641 } 642 result->sign = 1; 643 } else { 644 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 645 return (err); 646 } 647 result->sign = -1; 648 } 649 } else { 650 if (big_cmp_abs(aa, bb) >= 0) { 651 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) { 652 return (err); 653 } 654 result->sign = -1; 655 } else { 656 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) { 657 return (err); 658 } 659 result->sign = 1; 660 } 661 } 662 return (BIG_OK); 663 } 664 665 666 /* result = aa/2 */ 667 BIG_ERR_CODE 668 big_half_pos(BIGNUM *result, BIGNUM *aa) 669 { 670 BIG_ERR_CODE err; 671 int i; 672 BIG_CHUNK_TYPE cy, cy1; 673 BIG_CHUNK_TYPE *a, *r; 674 675 if (result->size < aa->len) { 676 err = big_extend(result, aa->len); 677 if (err != BIG_OK) { 678 return (err); 679 } 680 } 681 682 result->len = aa->len; 683 a = aa->value; 684 r = result->value; 685 cy = 0; 686 for (i = aa->len - 1; i >= 0; i--) { 687 cy1 = a[i] << (BIG_CHUNK_SIZE - 1); 688 r[i] = (cy | (a[i] >> 1)); 689 cy = cy1; 690 } 691 if (r[result->len - 1] == 0) { 692 result->len--; 693 } 694 695 return (BIG_OK); 696 } 697 698 /* result = aa*2 */ 699 BIG_ERR_CODE 700 big_double(BIGNUM *result, BIGNUM *aa) 701 { 702 BIG_ERR_CODE err; 703 int i, rsize; 704 BIG_CHUNK_TYPE cy, cy1; 705 BIG_CHUNK_TYPE *a, *r; 706 707 if ((aa->len > 0) && 708 ((aa->value[aa->len - 1] & BIG_CHUNK_HIGHBIT) != 0)) { 709 rsize = aa->len + 1; 710 } else { 711 rsize = aa->len; 712 } 713 714 if (result->size < rsize) { 715 err = big_extend(result, rsize); 716 if (err != BIG_OK) 717 return (err); 718 } 719 720 a = aa->value; 721 r = result->value; 722 if (rsize == aa->len + 1) { 723 r[rsize - 1] = 1; 724 } 725 cy = 0; 726 for (i = 0; i < aa->len; i++) { 727 cy1 = a[i] >> (BIG_CHUNK_SIZE - 1); 728 r[i] = (cy | (a[i] << 1)); 729 cy = cy1; 730 } 731 result->len = rsize; 732 return (BIG_OK); 733 } 734 735 736 /* 737 * returns aa mod b, aa must be nonneg, b must be a max 738 * (BIG_CHUNK_SIZE / 2)-bit integer 739 */ 740 static uint32_t 741 big_modhalf_pos(BIGNUM *aa, uint32_t b) 742 { 743 int i; 744 BIG_CHUNK_TYPE rem; 745 746 if (aa->len == 0) { 747 return (0); 748 } 749 rem = aa->value[aa->len - 1] % b; 750 for (i = aa->len - 2; i >= 0; i--) { 751 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 752 (aa->value[i] >> (BIG_CHUNK_SIZE / 2))) % b; 753 rem = ((rem << (BIG_CHUNK_SIZE / 2)) | 754 (aa->value[i] & BIG_CHUNK_LOWHALFBITS)) % b; 755 } 756 757 return ((uint32_t)rem); 758 } 759 760 761 /* 762 * result = aa - (2^BIG_CHUNK_SIZE)^lendiff * bb 763 * result->size should be at least aa->len at entry 764 * aa, bb, and result should be positive 765 */ 766 void 767 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 768 { 769 int i, lendiff; 770 BIGNUM res1, aa1; 771 772 lendiff = aa->len - bb->len; 773 res1.size = result->size - lendiff; 774 res1.malloced = 0; 775 res1.value = result->value + lendiff; 776 aa1.size = aa->size - lendiff; 777 aa1.value = aa->value + lendiff; 778 aa1.len = bb->len; 779 aa1.sign = 1; 780 (void) big_sub_pos(&res1, &aa1, bb); 781 if (result->value != aa->value) { 782 for (i = 0; i < lendiff; i++) { 783 result->value[i] = aa->value[i]; 784 } 785 } 786 result->len = aa->len; 787 } 788 789 790 /* 791 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 792 * (2^BIG_CHUNK_SIZE)^lendiff * |bb| 793 * aa->len should be >= bb->len 794 */ 795 int 796 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 797 { 798 int lendiff; 799 BIGNUM aa1; 800 801 lendiff = aa->len - bb->len; 802 aa1.len = bb->len; 803 aa1.size = aa->size - lendiff; 804 aa1.malloced = 0; 805 aa1.value = aa->value + lendiff; 806 return (big_cmp_abs(&aa1, bb)); 807 } 808 809 810 /* 811 * result = aa * b where b is a max. (BIG_CHUNK_SIZE / 2)-bit positive integer. 812 * result should have enough space allocated. 813 */ 814 static void 815 big_mulhalf_low(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 816 { 817 int i; 818 BIG_CHUNK_TYPE t1, t2, ai, cy; 819 BIG_CHUNK_TYPE *a, *r; 820 821 a = aa->value; 822 r = result->value; 823 cy = 0; 824 for (i = 0; i < aa->len; i++) { 825 ai = a[i]; 826 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 827 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 828 (t1 >> (BIG_CHUNK_SIZE / 2)); 829 r[i] = (t1 & BIG_CHUNK_LOWHALFBITS) | 830 (t2 << (BIG_CHUNK_SIZE / 2)); 831 cy = t2 >> (BIG_CHUNK_SIZE / 2); 832 } 833 r[i] = cy; 834 result->len = aa->len + 1; 835 result->sign = aa->sign; 836 } 837 838 839 /* 840 * result = aa * b * 2^(BIG_CHUNK_SIZE / 2) where b is a max. 841 * (BIG_CHUNK_SIZE / 2)-bit positive integer. 842 * result should have enough space allocated. 843 */ 844 static void 845 big_mulhalf_high(BIGNUM *result, BIGNUM *aa, BIG_CHUNK_TYPE b) 846 { 847 int i; 848 BIG_CHUNK_TYPE t1, t2, ai, cy, ri; 849 BIG_CHUNK_TYPE *a, *r; 850 851 a = aa->value; 852 r = result->value; 853 cy = 0; 854 ri = 0; 855 for (i = 0; i < aa->len; i++) { 856 ai = a[i]; 857 t1 = (ai & BIG_CHUNK_LOWHALFBITS) * b + cy; 858 t2 = (ai >> (BIG_CHUNK_SIZE / 2)) * b + 859 (t1 >> (BIG_CHUNK_SIZE / 2)); 860 r[i] = (t1 << (BIG_CHUNK_SIZE / 2)) + ri; 861 ri = t2 & BIG_CHUNK_LOWHALFBITS; 862 cy = t2 >> (BIG_CHUNK_SIZE / 2); 863 } 864 r[i] = (cy << (BIG_CHUNK_SIZE / 2)) + ri; 865 result->len = aa->len + 1; 866 result->sign = aa->sign; 867 } 868 869 870 /* it is assumed that result->size is big enough */ 871 void 872 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 873 { 874 int i; 875 BIG_CHUNK_TYPE cy, ai; 876 877 if (offs == 0) { 878 if (result != aa) { 879 (void) big_copy(result, aa); 880 } 881 return; 882 } 883 cy = 0; 884 for (i = 0; i < aa->len; i++) { 885 ai = aa->value[i]; 886 result->value[i] = (ai << offs) | cy; 887 cy = ai >> (BIG_CHUNK_SIZE - offs); 888 } 889 if (cy != 0) { 890 result->len = aa->len + 1; 891 result->value[result->len - 1] = cy; 892 } else { 893 result->len = aa->len; 894 } 895 result->sign = aa->sign; 896 } 897 898 899 /* it is assumed that result->size is big enough */ 900 void 901 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 902 { 903 int i; 904 BIG_CHUNK_TYPE cy, ai; 905 906 if (offs == 0) { 907 if (result != aa) { 908 (void) big_copy(result, aa); 909 } 910 return; 911 } 912 cy = aa->value[0] >> offs; 913 for (i = 1; i < aa->len; i++) { 914 ai = aa->value[i]; 915 result->value[i-1] = (ai << (BIG_CHUNK_SIZE - offs)) | cy; 916 cy = ai >> offs; 917 } 918 result->len = aa->len; 919 result->value[result->len - 1] = cy; 920 result->sign = aa->sign; 921 } 922 923 924 /* 925 * result = aa/bb remainder = aa mod bb 926 * it is assumed that aa and bb are positive 927 */ 928 BIG_ERR_CODE 929 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 930 { 931 BIG_ERR_CODE err = BIG_OK; 932 int i, alen, blen, tlen, rlen, offs; 933 BIG_CHUNK_TYPE higha, highb, coeff; 934 BIG_CHUNK_TYPE *a, *b; 935 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 936 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 937 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 938 BIG_CHUNK_TYPE tresultvalue[BIGTMPSIZE]; 939 BIG_CHUNK_TYPE bblowvalue[BIGTMPSIZE]; 940 BIG_CHUNK_TYPE bbhighvalue[BIGTMPSIZE]; 941 942 a = aa->value; 943 b = bb->value; 944 alen = aa->len; 945 blen = bb->len; 946 while ((alen > 1) && (a[alen - 1] == 0)) { 947 alen = alen - 1; 948 } 949 aa->len = alen; 950 while ((blen > 1) && (b[blen - 1] == 0)) { 951 blen = blen - 1; 952 } 953 bb->len = blen; 954 if ((blen == 1) && (b[0] == 0)) { 955 return (BIG_DIV_BY_0); 956 } 957 958 if (big_cmp_abs(aa, bb) < 0) { 959 if ((remainder != NULL) && 960 ((err = big_copy(remainder, aa)) != BIG_OK)) { 961 return (err); 962 } 963 if (result != NULL) { 964 result->len = 1; 965 result->sign = 1; 966 result->value[0] = 0; 967 } 968 return (BIG_OK); 969 } 970 971 if ((err = big_init1(&bblow, blen + 1, 972 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 973 return (err); 974 975 if ((err = big_init1(&bbhigh, blen + 1, 976 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 977 goto ret1; 978 979 if ((err = big_init1(&tmp1, alen + 2, 980 tmp1value, arraysize(tmp1value))) != BIG_OK) 981 goto ret2; 982 983 if ((err = big_init1(&tmp2, blen + 2, 984 tmp2value, arraysize(tmp2value))) != BIG_OK) 985 goto ret3; 986 987 if ((err = big_init1(&tresult, alen - blen + 2, 988 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 989 goto ret4; 990 991 offs = 0; 992 highb = b[blen - 1]; 993 if (highb >= (BIG_CHUNK_HALF_HIGHBIT << 1)) { 994 highb = highb >> (BIG_CHUNK_SIZE / 2); 995 offs = (BIG_CHUNK_SIZE / 2); 996 } 997 while ((highb & BIG_CHUNK_HALF_HIGHBIT) == 0) { 998 highb = highb << 1; 999 offs++; 1000 } 1001 1002 big_shiftleft(&bblow, bb, offs); 1003 1004 if (offs <= (BIG_CHUNK_SIZE / 2 - 1)) { 1005 big_shiftleft(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1006 } else { 1007 big_shiftright(&bbhigh, &bblow, BIG_CHUNK_SIZE / 2); 1008 } 1009 if (bbhigh.value[bbhigh.len - 1] == 0) { 1010 bbhigh.len--; 1011 } else { 1012 bbhigh.value[bbhigh.len] = 0; 1013 } 1014 1015 highb = bblow.value[bblow.len - 1]; 1016 1017 big_shiftleft(&tmp1, aa, offs); 1018 rlen = tmp1.len - bblow.len + 1; 1019 tresult.len = rlen; 1020 1021 tmp1.len++; 1022 tlen = tmp1.len; 1023 tmp1.value[tmp1.len - 1] = 0; 1024 for (i = 0; i < rlen; i++) { 1025 higha = (tmp1.value[tlen - 1] << (BIG_CHUNK_SIZE / 2)) + 1026 (tmp1.value[tlen - 2] >> (BIG_CHUNK_SIZE / 2)); 1027 coeff = higha / (highb + 1); 1028 big_mulhalf_high(&tmp2, &bblow, coeff); 1029 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1030 bbhigh.len++; 1031 while (tmp1.value[tlen - 1] > 0) { 1032 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1033 coeff++; 1034 } 1035 bbhigh.len--; 1036 tlen--; 1037 tmp1.len--; 1038 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 1039 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 1040 coeff++; 1041 } 1042 tresult.value[rlen - i - 1] = coeff << (BIG_CHUNK_SIZE / 2); 1043 higha = tmp1.value[tlen - 1]; 1044 coeff = higha / (highb + 1); 1045 big_mulhalf_low(&tmp2, &bblow, coeff); 1046 tmp2.len--; 1047 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 1048 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 1049 big_sub_pos_high(&tmp1, &tmp1, &bblow); 1050 coeff++; 1051 } 1052 tresult.value[rlen - i - 1] = 1053 tresult.value[rlen - i - 1] + coeff; 1054 } 1055 1056 big_shiftright(&tmp1, &tmp1, offs); 1057 1058 err = BIG_OK; 1059 1060 if ((remainder != NULL) && 1061 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 1062 goto ret; 1063 1064 if (result != NULL) 1065 err = big_copy(result, &tresult); 1066 1067 ret: 1068 big_finish(&tresult); 1069 ret4: 1070 big_finish(&tmp1); 1071 ret3: 1072 big_finish(&tmp2); 1073 ret2: 1074 big_finish(&bbhigh); 1075 ret1: 1076 big_finish(&bblow); 1077 return (err); 1078 } 1079 1080 /* 1081 * If there is no processor-specific integer implementation of 1082 * the lower level multiply functions, then this code is provided 1083 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1084 * big_sqr_vec(). 1085 * 1086 * There are two generic implementations. One that assumes that 1087 * there is hardware and C compiler support for a 32 x 32 --> 64 1088 * bit unsigned multiply, but otherwise is not specific to any 1089 * processor, platform, or ISA. 1090 * 1091 * The other makes very few assumptions about hardware capabilities. 1092 * It does not even assume that there is any implementation of a 1093 * 32 x 32 --> 64 bit multiply that is accessible to C code and 1094 * appropriate to use. It falls constructs 32 x 32 --> 64 bit 1095 * multiplies from 16 x 16 --> 32 bit multiplies. 1096 * 1097 */ 1098 1099 #if !defined(PSR_MUL) 1100 1101 #ifdef UMUL64 1102 1103 #if (BIG_CHUNK_SIZE == 32) 1104 1105 #define UNROLL8 1106 1107 #define MUL_SET_VEC_ROUND_PREFETCH(R) \ 1108 p = pf * d; \ 1109 pf = (uint64_t)a[R+1]; \ 1110 t = p + cy; \ 1111 r[R] = (uint32_t)t; \ 1112 cy = t >> 32 1113 1114 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \ 1115 p = pf * d; \ 1116 t = p + cy; \ 1117 r[R] = (uint32_t)t; \ 1118 cy = t >> 32 1119 1120 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \ 1121 t = (uint64_t)r[R]; \ 1122 p = pf * d; \ 1123 pf = (uint64_t)a[R+1]; \ 1124 t = p + t + cy; \ 1125 r[R] = (uint32_t)t; \ 1126 cy = t >> 32 1127 1128 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \ 1129 t = (uint64_t)r[R]; \ 1130 p = pf * d; \ 1131 t = p + t + cy; \ 1132 r[R] = (uint32_t)t; \ 1133 cy = t >> 32 1134 1135 #ifdef UNROLL8 1136 1137 #define UNROLL 8 1138 1139 /* 1140 * r = a * b 1141 * where r and a are vectors; b is a single 32-bit digit 1142 */ 1143 1144 uint32_t 1145 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1146 { 1147 uint64_t d, pf, p, t, cy; 1148 1149 if (len == 0) 1150 return (0); 1151 cy = 0; 1152 d = (uint64_t)b; 1153 pf = (uint64_t)a[0]; 1154 while (len > UNROLL) { 1155 MUL_SET_VEC_ROUND_PREFETCH(0); 1156 MUL_SET_VEC_ROUND_PREFETCH(1); 1157 MUL_SET_VEC_ROUND_PREFETCH(2); 1158 MUL_SET_VEC_ROUND_PREFETCH(3); 1159 MUL_SET_VEC_ROUND_PREFETCH(4); 1160 MUL_SET_VEC_ROUND_PREFETCH(5); 1161 MUL_SET_VEC_ROUND_PREFETCH(6); 1162 MUL_SET_VEC_ROUND_PREFETCH(7); 1163 r += UNROLL; 1164 a += UNROLL; 1165 len -= UNROLL; 1166 } 1167 if (len == UNROLL) { 1168 MUL_SET_VEC_ROUND_PREFETCH(0); 1169 MUL_SET_VEC_ROUND_PREFETCH(1); 1170 MUL_SET_VEC_ROUND_PREFETCH(2); 1171 MUL_SET_VEC_ROUND_PREFETCH(3); 1172 MUL_SET_VEC_ROUND_PREFETCH(4); 1173 MUL_SET_VEC_ROUND_PREFETCH(5); 1174 MUL_SET_VEC_ROUND_PREFETCH(6); 1175 MUL_SET_VEC_ROUND_NOPREFETCH(7); 1176 return ((uint32_t)cy); 1177 } 1178 while (len > 1) { 1179 MUL_SET_VEC_ROUND_PREFETCH(0); 1180 ++r; 1181 ++a; 1182 --len; 1183 } 1184 if (len > 0) { 1185 MUL_SET_VEC_ROUND_NOPREFETCH(0); 1186 } 1187 return ((uint32_t)cy); 1188 } 1189 1190 /* 1191 * r += a * b 1192 * where r and a are vectors; b is a single 32-bit digit 1193 */ 1194 1195 uint32_t 1196 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1197 { 1198 uint64_t d, pf, p, t, cy; 1199 1200 if (len == 0) 1201 return (0); 1202 cy = 0; 1203 d = (uint64_t)b; 1204 pf = (uint64_t)a[0]; 1205 while (len > 8) { 1206 MUL_ADD_VEC_ROUND_PREFETCH(0); 1207 MUL_ADD_VEC_ROUND_PREFETCH(1); 1208 MUL_ADD_VEC_ROUND_PREFETCH(2); 1209 MUL_ADD_VEC_ROUND_PREFETCH(3); 1210 MUL_ADD_VEC_ROUND_PREFETCH(4); 1211 MUL_ADD_VEC_ROUND_PREFETCH(5); 1212 MUL_ADD_VEC_ROUND_PREFETCH(6); 1213 MUL_ADD_VEC_ROUND_PREFETCH(7); 1214 r += 8; 1215 a += 8; 1216 len -= 8; 1217 } 1218 if (len == 8) { 1219 MUL_ADD_VEC_ROUND_PREFETCH(0); 1220 MUL_ADD_VEC_ROUND_PREFETCH(1); 1221 MUL_ADD_VEC_ROUND_PREFETCH(2); 1222 MUL_ADD_VEC_ROUND_PREFETCH(3); 1223 MUL_ADD_VEC_ROUND_PREFETCH(4); 1224 MUL_ADD_VEC_ROUND_PREFETCH(5); 1225 MUL_ADD_VEC_ROUND_PREFETCH(6); 1226 MUL_ADD_VEC_ROUND_NOPREFETCH(7); 1227 return ((uint32_t)cy); 1228 } 1229 while (len > 1) { 1230 MUL_ADD_VEC_ROUND_PREFETCH(0); 1231 ++r; 1232 ++a; 1233 --len; 1234 } 1235 if (len > 0) { 1236 MUL_ADD_VEC_ROUND_NOPREFETCH(0); 1237 } 1238 return ((uint32_t)cy); 1239 } 1240 #endif /* UNROLL8 */ 1241 1242 void 1243 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1244 { 1245 uint32_t *tr, *ta; 1246 int tlen, row, col; 1247 uint64_t p, s, t, t2, cy; 1248 uint32_t d; 1249 1250 tr = r + 1; 1251 ta = a; 1252 tlen = len - 1; 1253 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]); 1254 while (--tlen > 0) { 1255 tr += 2; 1256 ++ta; 1257 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]); 1258 } 1259 s = (uint64_t)a[0]; 1260 s = s * s; 1261 r[0] = (uint32_t)s; 1262 cy = s >> 32; 1263 p = ((uint64_t)r[1] << 1) + cy; 1264 r[1] = (uint32_t)p; 1265 cy = p >> 32; 1266 row = 1; 1267 col = 2; 1268 while (row < len) { 1269 s = (uint64_t)a[row]; 1270 s = s * s; 1271 p = (uint64_t)r[col] << 1; 1272 t = p + s; 1273 d = (uint32_t)t; 1274 t2 = (uint64_t)d + cy; 1275 r[col] = (uint32_t)t2; 1276 cy = (t >> 32) + (t2 >> 32); 1277 if (row == len - 1) 1278 break; 1279 p = ((uint64_t)r[col+1] << 1) + cy; 1280 r[col+1] = (uint32_t)p; 1281 cy = p >> 32; 1282 ++row; 1283 col += 2; 1284 } 1285 r[col+1] = (uint32_t)cy; 1286 } 1287 1288 #else /* BIG_CHUNK_SIZE == 64 */ 1289 1290 /* 1291 * r = r + a * digit, r and a are vectors of length len 1292 * returns the carry digit 1293 */ 1294 BIG_CHUNK_TYPE 1295 big_mul_add_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1296 BIG_CHUNK_TYPE digit) 1297 { 1298 BIG_CHUNK_TYPE cy, cy1, retcy, dlow, dhigh; 1299 int i; 1300 1301 cy1 = 0; 1302 dlow = digit & BIG_CHUNK_LOWHALFBITS; 1303 dhigh = digit >> (BIG_CHUNK_SIZE / 2); 1304 for (i = 0; i < len; i++) { 1305 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1306 dlow * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1307 (r[i] & BIG_CHUNK_LOWHALFBITS); 1308 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1309 dlow * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1310 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1311 r[i] = (cy & BIG_CHUNK_LOWHALFBITS) | 1312 (cy1 << (BIG_CHUNK_SIZE / 2)); 1313 } 1314 retcy = cy1 >> (BIG_CHUNK_SIZE / 2); 1315 1316 cy1 = r[0] & BIG_CHUNK_LOWHALFBITS; 1317 for (i = 0; i < len - 1; i++) { 1318 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1319 dhigh * (a[i] & BIG_CHUNK_LOWHALFBITS) + 1320 (r[i] >> (BIG_CHUNK_SIZE / 2)); 1321 r[i] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1322 (cy << (BIG_CHUNK_SIZE / 2)); 1323 cy1 = (cy >> (BIG_CHUNK_SIZE / 2)) + 1324 dhigh * (a[i] >> (BIG_CHUNK_SIZE / 2)) + 1325 (r[i + 1] & BIG_CHUNK_LOWHALFBITS); 1326 } 1327 cy = (cy1 >> (BIG_CHUNK_SIZE / 2)) + 1328 dhigh * (a[len - 1] & BIG_CHUNK_LOWHALFBITS) + 1329 (r[len - 1] >> (BIG_CHUNK_SIZE / 2)); 1330 r[len - 1] = (cy1 & BIG_CHUNK_LOWHALFBITS) | 1331 (cy << (BIG_CHUNK_SIZE / 2)); 1332 retcy = (cy >> (BIG_CHUNK_SIZE / 2)) + 1333 dhigh * (a[len - 1] >> (BIG_CHUNK_SIZE / 2)) + retcy; 1334 1335 return (retcy); 1336 } 1337 1338 1339 /* 1340 * r = a * digit, r and a are vectors of length len 1341 * returns the carry digit 1342 */ 1343 BIG_CHUNK_TYPE 1344 big_mul_set_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len, 1345 BIG_CHUNK_TYPE digit) 1346 { 1347 int i; 1348 1349 ASSERT(r != a); 1350 for (i = 0; i < len; i++) { 1351 r[i] = 0; 1352 } 1353 return (big_mul_add_vec(r, a, len, digit)); 1354 } 1355 1356 void 1357 big_sqr_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int len) 1358 { 1359 int i; 1360 1361 ASSERT(r != a); 1362 r[len] = big_mul_set_vec(r, a, len, a[0]); 1363 for (i = 1; i < len; ++i) 1364 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]); 1365 } 1366 1367 #endif /* BIG_CHUNK_SIZE == 32/64 */ 1368 1369 #else /* ! UMUL64 */ 1370 1371 #if (BIG_CHUNK_SIZE != 32) 1372 #error Don't use 64-bit chunks without defining UMUL64 1373 #endif 1374 1375 1376 /* 1377 * r = r + a * digit, r and a are vectors of length len 1378 * returns the carry digit 1379 */ 1380 uint32_t 1381 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1382 { 1383 uint32_t cy, cy1, retcy, dlow, dhigh; 1384 int i; 1385 1386 cy1 = 0; 1387 dlow = digit & 0xffff; 1388 dhigh = digit >> 16; 1389 for (i = 0; i < len; i++) { 1390 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff); 1391 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16); 1392 r[i] = (cy & 0xffff) | (cy1 << 16); 1393 } 1394 retcy = cy1 >> 16; 1395 1396 cy1 = r[0] & 0xffff; 1397 for (i = 0; i < len - 1; i++) { 1398 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16); 1399 r[i] = (cy1 & 0xffff) | (cy << 16); 1400 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff); 1401 } 1402 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16); 1403 r[len - 1] = (cy1 & 0xffff) | (cy << 16); 1404 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy; 1405 1406 return (retcy); 1407 } 1408 1409 1410 /* 1411 * r = a * digit, r and a are vectors of length len 1412 * returns the carry digit 1413 */ 1414 uint32_t 1415 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1416 { 1417 int i; 1418 1419 ASSERT(r != a); 1420 for (i = 0; i < len; i++) { 1421 r[i] = 0; 1422 } 1423 1424 return (big_mul_add_vec(r, a, len, digit)); 1425 } 1426 1427 void 1428 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1429 { 1430 int i; 1431 1432 ASSERT(r != a); 1433 r[len] = big_mul_set_vec(r, a, len, a[0]); 1434 for (i = 1; i < len; ++i) 1435 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]); 1436 } 1437 1438 #endif /* UMUL64 */ 1439 1440 void 1441 big_mul_vec(BIG_CHUNK_TYPE *r, BIG_CHUNK_TYPE *a, int alen, 1442 BIG_CHUNK_TYPE *b, int blen) 1443 { 1444 int i; 1445 1446 r[alen] = big_mul_set_vec(r, a, alen, b[0]); 1447 for (i = 1; i < blen; ++i) 1448 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]); 1449 } 1450 1451 1452 #endif /* ! PSR_MUL */ 1453 1454 1455 /* 1456 * result = aa * bb result->value should be big enough to hold the result 1457 * 1458 * Implementation: Standard grammar school algorithm 1459 * 1460 */ 1461 BIG_ERR_CODE 1462 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 1463 { 1464 BIGNUM tmp1; 1465 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1466 BIG_CHUNK_TYPE *r, *t, *a, *b; 1467 BIG_ERR_CODE err; 1468 int i, alen, blen, rsize, sign, diff; 1469 1470 if (aa == bb) { 1471 diff = 0; 1472 } else { 1473 diff = big_cmp_abs(aa, bb); 1474 if (diff < 0) { 1475 BIGNUM *tt; 1476 tt = aa; 1477 aa = bb; 1478 bb = tt; 1479 } 1480 } 1481 a = aa->value; 1482 b = bb->value; 1483 alen = aa->len; 1484 blen = bb->len; 1485 while ((alen > 1) && (a[alen - 1] == 0)) { 1486 alen--; 1487 } 1488 aa->len = alen; 1489 while ((blen > 1) && (b[blen - 1] == 0)) { 1490 blen--; 1491 } 1492 bb->len = blen; 1493 1494 rsize = alen + blen; 1495 if (result->size < rsize) { 1496 err = big_extend(result, rsize); 1497 if (err != BIG_OK) { 1498 return (err); 1499 } 1500 /* aa or bb might be an alias to result */ 1501 a = aa->value; 1502 b = bb->value; 1503 } 1504 r = result->value; 1505 1506 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) { 1507 result->len = 1; 1508 result->sign = 1; 1509 r[0] = 0; 1510 return (BIG_OK); 1511 } 1512 sign = aa->sign * bb->sign; 1513 if ((alen == 1) && (a[0] == 1)) { 1514 for (i = 0; i < blen; i++) { 1515 r[i] = b[i]; 1516 } 1517 result->len = blen; 1518 result->sign = sign; 1519 return (BIG_OK); 1520 } 1521 if ((blen == 1) && (b[0] == 1)) { 1522 for (i = 0; i < alen; i++) { 1523 r[i] = a[i]; 1524 } 1525 result->len = alen; 1526 result->sign = sign; 1527 return (BIG_OK); 1528 } 1529 1530 if ((err = big_init1(&tmp1, rsize, 1531 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1532 return (err); 1533 } 1534 (void) big_copy(&tmp1, aa); 1535 t = tmp1.value; 1536 1537 for (i = 0; i < rsize; i++) { 1538 t[i] = 0; 1539 } 1540 1541 if (diff == 0 && alen > 2) { 1542 BIG_SQR_VEC(t, a, alen); 1543 } else if (blen > 0) { 1544 BIG_MUL_VEC(t, a, alen, b, blen); 1545 } 1546 1547 if (t[rsize - 1] == 0) { 1548 tmp1.len = rsize - 1; 1549 } else { 1550 tmp1.len = rsize; 1551 } 1552 if ((err = big_copy(result, &tmp1)) != BIG_OK) { 1553 return (err); 1554 } 1555 result->sign = sign; 1556 1557 big_finish(&tmp1); 1558 1559 return (BIG_OK); 1560 } 1561 1562 1563 /* 1564 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1 1565 * and that ret is not n 1566 */ 1567 BIG_ERR_CODE 1568 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, BIG_CHUNK_TYPE n0) 1569 { 1570 int i, j, nlen, needsubtract; 1571 BIG_CHUNK_TYPE *nn, *rr; 1572 BIG_CHUNK_TYPE digit, c; 1573 BIG_ERR_CODE err; 1574 1575 nlen = n->len; 1576 nn = n->value; 1577 1578 rr = ret->value; 1579 1580 if ((err = big_mul(ret, a, b)) != BIG_OK) { 1581 return (err); 1582 } 1583 1584 rr = ret->value; 1585 for (i = ret->len; i < 2 * nlen + 1; i++) { 1586 rr[i] = 0; 1587 } 1588 for (i = 0; i < nlen; i++) { 1589 digit = rr[i]; 1590 digit = digit * n0; 1591 1592 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit); 1593 j = i + nlen; 1594 rr[j] += c; 1595 while (rr[j] < c) { 1596 rr[j + 1] += 1; 1597 j++; 1598 c = 1; 1599 } 1600 } 1601 1602 needsubtract = 0; 1603 if ((rr[2 * nlen] != 0)) 1604 needsubtract = 1; 1605 else { 1606 for (i = 2 * nlen - 1; i >= nlen; i--) { 1607 if (rr[i] > nn[i - nlen]) { 1608 needsubtract = 1; 1609 break; 1610 } else if (rr[i] < nn[i - nlen]) { 1611 break; 1612 } 1613 } 1614 } 1615 if (needsubtract) 1616 big_sub_vec(rr, rr + nlen, nn, nlen); 1617 else { 1618 for (i = 0; i < nlen; i++) { 1619 rr[i] = rr[i + nlen]; 1620 } 1621 } 1622 for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--) 1623 ; 1624 ret->len = i+1; 1625 1626 return (BIG_OK); 1627 } 1628 1629 1630 BIG_CHUNK_TYPE 1631 big_n0(BIG_CHUNK_TYPE n) 1632 { 1633 int i; 1634 BIG_CHUNK_TYPE result, tmp; 1635 1636 result = 0; 1637 tmp = BIG_CHUNK_ALLBITS; 1638 for (i = 0; i < BIG_CHUNK_SIZE; i++) { 1639 if ((tmp & 1) == 1) { 1640 result = (result >> 1) | BIG_CHUNK_HIGHBIT; 1641 tmp = tmp - n; 1642 } else { 1643 result = (result >> 1); 1644 } 1645 tmp = tmp >> 1; 1646 } 1647 1648 return (result); 1649 } 1650 1651 1652 int 1653 big_numbits(BIGNUM *n) 1654 { 1655 int i, j; 1656 BIG_CHUNK_TYPE t; 1657 1658 for (i = n->len - 1; i > 0; i--) { 1659 if (n->value[i] != 0) { 1660 break; 1661 } 1662 } 1663 t = n->value[i]; 1664 for (j = BIG_CHUNK_SIZE; j > 0; j--) { 1665 if ((t & BIG_CHUNK_HIGHBIT) == 0) { 1666 t = t << 1; 1667 } else { 1668 return (BIG_CHUNK_SIZE * i + j); 1669 } 1670 } 1671 return (0); 1672 } 1673 1674 1675 /* caller must make sure that a < n */ 1676 BIG_ERR_CODE 1677 big_mont_rr(BIGNUM *result, BIGNUM *n) 1678 { 1679 BIGNUM rr; 1680 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1681 int len, i; 1682 BIG_ERR_CODE err; 1683 1684 rr.malloced = 0; 1685 len = n->len; 1686 1687 if ((err = big_init1(&rr, 2 * len + 1, 1688 rrvalue, arraysize(rrvalue))) != BIG_OK) { 1689 return (err); 1690 } 1691 1692 for (i = 0; i < 2 * len; i++) { 1693 rr.value[i] = 0; 1694 } 1695 rr.value[2 * len] = 1; 1696 rr.len = 2 * len + 1; 1697 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1698 goto ret; 1699 } 1700 err = big_copy(result, &rr); 1701 ret: 1702 big_finish(&rr); 1703 return (err); 1704 } 1705 1706 1707 /* caller must make sure that a < n */ 1708 BIG_ERR_CODE 1709 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, BIG_CHUNK_TYPE n0, 1710 BIGNUM *n_rr) 1711 { 1712 BIGNUM rr; 1713 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 1714 int len, i; 1715 BIG_ERR_CODE err; 1716 1717 rr.malloced = 0; 1718 len = n->len; 1719 1720 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue))) 1721 != BIG_OK) { 1722 return (err); 1723 } 1724 1725 if (n_rr == NULL) { 1726 for (i = 0; i < 2 * len; i++) { 1727 rr.value[i] = 0; 1728 } 1729 rr.value[2 * len] = 1; 1730 rr.len = 2 * len + 1; 1731 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) { 1732 goto ret; 1733 } 1734 n_rr = &rr; 1735 } 1736 1737 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) { 1738 goto ret; 1739 } 1740 err = big_copy(result, &rr); 1741 1742 ret: 1743 big_finish(&rr); 1744 return (err); 1745 } 1746 1747 1748 #ifdef USE_FLOATING_POINT 1749 #define big_modexp_ncp_float big_modexp_ncp_sw 1750 #else 1751 #define big_modexp_ncp_int big_modexp_ncp_sw 1752 #endif 1753 1754 #define MAX_EXP_BIT_GROUP_SIZE 6 1755 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1)) 1756 1757 /* ARGSUSED */ 1758 static BIG_ERR_CODE 1759 big_modexp_ncp_int(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1760 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1761 1762 { 1763 BIGNUM apowers[APOWERS_MAX_SIZE]; 1764 BIGNUM tmp1; 1765 BIG_CHUNK_TYPE tmp1value[BIGTMPSIZE]; 1766 int i, j, k, l, m, p; 1767 int bit, bitind, bitcount, groupbits, apowerssize; 1768 int nbits; 1769 BIG_ERR_CODE err; 1770 1771 nbits = big_numbits(e); 1772 if (nbits < 50) { 1773 groupbits = 1; 1774 apowerssize = 1; 1775 } else { 1776 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1777 apowerssize = 1 << (groupbits - 1); 1778 } 1779 1780 1781 if ((err = big_init1(&tmp1, 2 * n->len + 1, 1782 tmp1value, arraysize(tmp1value))) != BIG_OK) { 1783 return (err); 1784 } 1785 1786 /* set the malloced bit to help cleanup */ 1787 for (i = 0; i < apowerssize; i++) { 1788 apowers[i].malloced = 0; 1789 } 1790 for (i = 0; i < apowerssize; i++) { 1791 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) != 1792 BIG_OK) { 1793 goto ret; 1794 } 1795 } 1796 1797 (void) big_copy(&(apowers[0]), ma); 1798 1799 if ((err = big_mont_mul(&tmp1, ma, ma, n, n0)) != BIG_OK) { 1800 goto ret; 1801 } 1802 (void) big_copy(ma, &tmp1); 1803 1804 for (i = 1; i < apowerssize; i++) { 1805 if ((err = big_mont_mul(&tmp1, ma, 1806 &(apowers[i-1]), n, n0)) != BIG_OK) { 1807 goto ret; 1808 } 1809 (void) big_copy(&apowers[i], &tmp1); 1810 } 1811 1812 bitind = nbits % BIG_CHUNK_SIZE; 1813 k = 0; 1814 l = 0; 1815 p = 0; 1816 bitcount = 0; 1817 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 1818 for (j = bitind - 1; j >= 0; j--) { 1819 bit = (e->value[i] >> j) & 1; 1820 if ((bitcount == 0) && (bit == 0)) { 1821 if ((err = big_mont_mul(tmp, 1822 tmp, tmp, n, n0)) != BIG_OK) { 1823 goto ret; 1824 } 1825 } else { 1826 bitcount++; 1827 p = p * 2 + bit; 1828 if (bit == 1) { 1829 k = k + l + 1; 1830 l = 0; 1831 } else { 1832 l++; 1833 } 1834 if (bitcount == groupbits) { 1835 for (m = 0; m < k; m++) { 1836 if ((err = big_mont_mul(tmp, 1837 tmp, tmp, n, n0)) != 1838 BIG_OK) { 1839 goto ret; 1840 } 1841 } 1842 if ((err = big_mont_mul(tmp, tmp, 1843 &(apowers[p >> (l + 1)]), 1844 n, n0)) != BIG_OK) { 1845 goto ret; 1846 } 1847 for (m = 0; m < l; m++) { 1848 if ((err = big_mont_mul(tmp, 1849 tmp, tmp, n, n0)) != 1850 BIG_OK) { 1851 goto ret; 1852 } 1853 } 1854 k = 0; 1855 l = 0; 1856 p = 0; 1857 bitcount = 0; 1858 } 1859 } 1860 } 1861 bitind = BIG_CHUNK_SIZE; 1862 } 1863 1864 for (m = 0; m < k; m++) { 1865 if ((err = big_mont_mul(tmp, tmp, tmp, n, n0)) != BIG_OK) { 1866 goto ret; 1867 } 1868 } 1869 if (p != 0) { 1870 if ((err = big_mont_mul(tmp, tmp, 1871 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) { 1872 goto ret; 1873 } 1874 } 1875 for (m = 0; m < l; m++) { 1876 if ((err = big_mont_mul(result, tmp, tmp, n, n0)) != BIG_OK) { 1877 goto ret; 1878 } 1879 } 1880 1881 ret: 1882 for (i = apowerssize - 1; i >= 0; i--) { 1883 big_finish(&(apowers[i])); 1884 } 1885 big_finish(&tmp1); 1886 1887 return (err); 1888 } 1889 1890 1891 #ifdef USE_FLOATING_POINT 1892 1893 #ifdef _KERNEL 1894 1895 #include <sys/sysmacros.h> 1896 #include <sys/regset.h> 1897 #include <sys/fpu/fpusystm.h> 1898 1899 /* the alignment for block stores to save fp registers */ 1900 #define FPR_ALIGN (64) 1901 1902 extern void big_savefp(kfpu_t *); 1903 extern void big_restorefp(kfpu_t *); 1904 1905 #endif /* _KERNEL */ 1906 1907 /* 1908 * This version makes use of floating point for performance 1909 */ 1910 static BIG_ERR_CODE 1911 big_modexp_ncp_float(BIGNUM *result, BIGNUM *ma, BIGNUM *e, BIGNUM *n, 1912 BIGNUM *tmp, BIG_CHUNK_TYPE n0) 1913 { 1914 1915 int i, j, k, l, m, p, bit, bitind, bitcount, nlen; 1916 double dn0; 1917 double *dn, *dt, *d16r, *d32r; 1918 uint32_t *nint, *prod; 1919 double *apowers[APOWERS_MAX_SIZE]; 1920 int nbits, groupbits, apowerssize; 1921 BIG_ERR_CODE err = BIG_OK; 1922 1923 #ifdef _KERNEL 1924 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN]; 1925 kfpu_t *fpu; 1926 1927 #ifdef DEBUG 1928 if (!fpu_exists) 1929 return (BIG_GENERAL_ERR); 1930 #endif 1931 1932 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN); 1933 big_savefp(fpu); 1934 1935 #endif /* _KERNEL */ 1936 1937 nbits = big_numbits(e); 1938 if (nbits < 50) { 1939 groupbits = 1; 1940 apowerssize = 1; 1941 } else { 1942 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1943 apowerssize = 1 << (groupbits - 1); 1944 } 1945 1946 nlen = (BIG_CHUNK_SIZE / 32) * n->len; 1947 dn0 = (double)(n0 & 0xffff); 1948 1949 dn = dt = d16r = d32r = NULL; 1950 nint = prod = NULL; 1951 for (i = 0; i < apowerssize; i++) { 1952 apowers[i] = NULL; 1953 } 1954 1955 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) { 1956 err = BIG_NO_MEM; 1957 goto ret; 1958 } 1959 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) { 1960 err = BIG_NO_MEM; 1961 goto ret; 1962 } 1963 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) { 1964 err = BIG_NO_MEM; 1965 goto ret; 1966 } 1967 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) { 1968 err = BIG_NO_MEM; 1969 goto ret; 1970 } 1971 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) { 1972 err = BIG_NO_MEM; 1973 goto ret; 1974 } 1975 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) { 1976 err = BIG_NO_MEM; 1977 goto ret; 1978 } 1979 for (i = 0; i < apowerssize; i++) { 1980 if ((apowers[i] = big_malloc((2 * nlen + 1) * 1981 sizeof (double))) == NULL) { 1982 err = BIG_NO_MEM; 1983 goto ret; 1984 } 1985 } 1986 1987 #if (BIG_CHUNK_SIZE == 32) 1988 for (i = 0; i < ma->len; i++) { 1989 nint[i] = ma->value[i]; 1990 } 1991 for (; i < nlen; i++) { 1992 nint[i] = 0; 1993 } 1994 #else 1995 for (i = 0; i < ma->len; i++) { 1996 nint[2 * i] = (uint32_t)(ma->value[i] & 0xffffffffULL); 1997 nint[2 * i + 1] = (uint32_t)(ma->value[i] >> 32); 1998 } 1999 for (i = ma->len * 2; i < nlen; i++) { 2000 nint[i] = 0; 2001 } 2002 #endif 2003 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen); 2004 2005 #if (BIG_CHUNK_SIZE == 32) 2006 for (i = 0; i < n->len; i++) { 2007 nint[i] = n->value[i]; 2008 } 2009 for (; i < nlen; i++) { 2010 nint[i] = 0; 2011 } 2012 #else 2013 for (i = 0; i < n->len; i++) { 2014 nint[2 * i] = (uint32_t)(n->value[i] & 0xffffffffULL); 2015 nint[2 * i + 1] = (uint32_t)(n->value[i] >> 32); 2016 } 2017 for (i = n->len * 2; i < nlen; i++) { 2018 nint[i] = 0; 2019 } 2020 #endif 2021 conv_i32_to_d32(dn, nint, nlen); 2022 2023 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0); 2024 conv_i32_to_d32(d32r, prod, nlen); 2025 for (i = 1; i < apowerssize; i++) { 2026 mont_mulf_noconv(prod, d32r, apowers[i - 1], 2027 dt, dn, nint, nlen, dn0); 2028 conv_i32_to_d16(apowers[i], prod, nlen); 2029 } 2030 2031 #if (BIG_CHUNK_SIZE == 32) 2032 for (i = 0; i < tmp->len; i++) { 2033 prod[i] = tmp->value[i]; 2034 } 2035 for (; i < nlen + 1; i++) { 2036 prod[i] = 0; 2037 } 2038 #else 2039 for (i = 0; i < tmp->len; i++) { 2040 prod[2 * i] = (uint32_t)(tmp->value[i] & 0xffffffffULL); 2041 prod[2 * i + 1] = (uint32_t)(tmp->value[i] >> 32); 2042 } 2043 for (i = tmp->len * 2; i < nlen + 1; i++) { 2044 prod[i] = 0; 2045 } 2046 #endif 2047 2048 bitind = nbits % BIG_CHUNK_SIZE; 2049 k = 0; 2050 l = 0; 2051 p = 0; 2052 bitcount = 0; 2053 for (i = nbits / BIG_CHUNK_SIZE; i >= 0; i--) { 2054 for (j = bitind - 1; j >= 0; j--) { 2055 bit = (e->value[i] >> j) & 1; 2056 if ((bitcount == 0) && (bit == 0)) { 2057 conv_i32_to_d32_and_d16(d32r, d16r, 2058 prod, nlen); 2059 mont_mulf_noconv(prod, d32r, d16r, 2060 dt, dn, nint, nlen, dn0); 2061 } else { 2062 bitcount++; 2063 p = p * 2 + bit; 2064 if (bit == 1) { 2065 k = k + l + 1; 2066 l = 0; 2067 } else { 2068 l++; 2069 } 2070 if (bitcount == groupbits) { 2071 for (m = 0; m < k; m++) { 2072 conv_i32_to_d32_and_d16(d32r, 2073 d16r, prod, nlen); 2074 mont_mulf_noconv(prod, d32r, 2075 d16r, dt, dn, nint, 2076 nlen, dn0); 2077 } 2078 conv_i32_to_d32(d32r, prod, nlen); 2079 mont_mulf_noconv(prod, d32r, 2080 apowers[p >> (l+1)], 2081 dt, dn, nint, nlen, dn0); 2082 for (m = 0; m < l; m++) { 2083 conv_i32_to_d32_and_d16(d32r, 2084 d16r, prod, nlen); 2085 mont_mulf_noconv(prod, d32r, 2086 d16r, dt, dn, nint, 2087 nlen, dn0); 2088 } 2089 k = 0; 2090 l = 0; 2091 p = 0; 2092 bitcount = 0; 2093 } 2094 } 2095 } 2096 bitind = BIG_CHUNK_SIZE; 2097 } 2098 2099 for (m = 0; m < k; m++) { 2100 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2101 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2102 } 2103 if (p != 0) { 2104 conv_i32_to_d32(d32r, prod, nlen); 2105 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)], 2106 dt, dn, nint, nlen, dn0); 2107 } 2108 for (m = 0; m < l; m++) { 2109 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 2110 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 2111 } 2112 2113 #if (BIG_CHUNK_SIZE == 32) 2114 for (i = 0; i < nlen; i++) { 2115 result->value[i] = prod[i]; 2116 } 2117 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--) 2118 ; 2119 #else 2120 for (i = 0; i < nlen / 2; i++) { 2121 result->value[i] = (uint64_t)(prod[2 * i]) + 2122 (((uint64_t)(prod[2 * i + 1])) << 32); 2123 } 2124 for (i = nlen / 2 - 1; (i > 0) && (result->value[i] == 0); i--) 2125 ; 2126 #endif 2127 result->len = i + 1; 2128 2129 ret: 2130 for (i = apowerssize - 1; i >= 0; i--) { 2131 if (apowers[i] != NULL) 2132 big_free(apowers[i], (2 * nlen + 1) * sizeof (double)); 2133 } 2134 if (d32r != NULL) { 2135 big_free(d32r, nlen * sizeof (double)); 2136 } 2137 if (d16r != NULL) { 2138 big_free(d16r, (2 * nlen + 1) * sizeof (double)); 2139 } 2140 if (prod != NULL) { 2141 big_free(prod, (nlen + 1) * sizeof (uint32_t)); 2142 } 2143 if (nint != NULL) { 2144 big_free(nint, nlen * sizeof (uint32_t)); 2145 } 2146 if (dt != NULL) { 2147 big_free(dt, (4 * nlen + 2) * sizeof (double)); 2148 } 2149 if (dn != NULL) { 2150 big_free(dn, nlen * sizeof (double)); 2151 } 2152 2153 #ifdef _KERNEL 2154 big_restorefp(fpu); 2155 #endif 2156 2157 return (err); 2158 } 2159 2160 #endif /* USE_FLOATING_POINT */ 2161 2162 2163 BIG_ERR_CODE 2164 big_modexp_ext(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr, 2165 big_modexp_ncp_info_t *info) 2166 { 2167 BIGNUM ma, tmp, rr; 2168 BIG_CHUNK_TYPE mavalue[BIGTMPSIZE]; 2169 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2170 BIG_CHUNK_TYPE rrvalue[BIGTMPSIZE]; 2171 BIG_ERR_CODE err; 2172 BIG_CHUNK_TYPE n0; 2173 2174 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) != 2175 BIG_OK) { 2176 return (err); 2177 } 2178 ma.len = 1; 2179 ma.value[0] = 0; 2180 2181 if ((err = big_init1(&tmp, 2 * n->len + 1, 2182 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2183 goto ret1; 2184 } 2185 2186 /* set the malloced bit to help cleanup */ 2187 rr.malloced = 0; 2188 if (n_rr == NULL) { 2189 if ((err = big_init1(&rr, 2 * n->len + 1, 2190 rrvalue, arraysize(rrvalue))) != BIG_OK) { 2191 goto ret2; 2192 } 2193 if (big_mont_rr(&rr, n) != BIG_OK) { 2194 goto ret; 2195 } 2196 n_rr = &rr; 2197 } 2198 2199 n0 = big_n0(n->value[0]); 2200 2201 if (big_cmp_abs(a, n) > 0) { 2202 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) { 2203 goto ret; 2204 } 2205 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 2206 } else { 2207 err = big_mont_conv(&ma, a, n, n0, n_rr); 2208 } 2209 if (err != BIG_OK) { 2210 goto ret; 2211 } 2212 2213 tmp.len = 1; 2214 tmp.value[0] = 1; 2215 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) { 2216 goto ret; 2217 } 2218 2219 if ((info != NULL) && (info->func != NULL)) { 2220 err = (*(info->func))(&tmp, &ma, e, n, &tmp, n0, 2221 info->ncp, info->reqp); 2222 } else { 2223 err = big_modexp_ncp_sw(&tmp, &ma, e, n, &tmp, n0); 2224 } 2225 if (err != BIG_OK) { 2226 goto ret; 2227 } 2228 2229 ma.value[0] = 1; 2230 ma.len = 1; 2231 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) { 2232 goto ret; 2233 } 2234 err = big_copy(result, &tmp); 2235 2236 ret: 2237 if (rr.malloced) { 2238 big_finish(&rr); 2239 } 2240 ret2: 2241 big_finish(&tmp); 2242 ret1: 2243 big_finish(&ma); 2244 2245 return (err); 2246 } 2247 2248 BIG_ERR_CODE 2249 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 2250 { 2251 return (big_modexp_ext(result, a, e, n, n_rr, NULL)); 2252 } 2253 2254 2255 BIG_ERR_CODE 2256 big_modexp_crt_ext(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2257 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2258 BIGNUM *p_rr, BIGNUM *q_rr, big_modexp_ncp_info_t *info) 2259 { 2260 BIGNUM ap, aq, tmp; 2261 int alen, biglen, sign; 2262 BIG_ERR_CODE err; 2263 2264 if (p->len > q->len) { 2265 biglen = p->len; 2266 } else { 2267 biglen = q->len; 2268 } 2269 2270 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) { 2271 return (err); 2272 } 2273 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) { 2274 goto ret1; 2275 } 2276 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) { 2277 goto ret2; 2278 } 2279 2280 /* 2281 * check whether a is too short - to avoid timing attacks 2282 */ 2283 alen = a->len; 2284 while ((alen > p->len) && (a->value[alen - 1] == 0)) { 2285 alen--; 2286 } 2287 if (alen < p->len + q->len) { 2288 /* 2289 * a is too short, add p*q to it before 2290 * taking it modulo p and q 2291 * this will also affect timing, but this difference 2292 * does not depend on p or q, only on a 2293 * (in "normal" operation, this path will never be 2294 * taken, so it is not a performance penalty 2295 */ 2296 if ((err = big_mul(&tmp, p, q)) != BIG_OK) { 2297 goto ret; 2298 } 2299 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) { 2300 goto ret; 2301 } 2302 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) { 2303 goto ret; 2304 } 2305 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2306 goto ret; 2307 } 2308 } else { 2309 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) { 2310 goto ret; 2311 } 2312 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) { 2313 goto ret; 2314 } 2315 } 2316 2317 if ((err = big_modexp_ext(&ap, &ap, dmodpminus1, p, p_rr, info)) != 2318 BIG_OK) { 2319 goto ret; 2320 } 2321 if ((err = big_modexp_ext(&aq, &aq, dmodqminus1, q, q_rr, info)) != 2322 BIG_OK) { 2323 goto ret; 2324 } 2325 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) { 2326 goto ret; 2327 } 2328 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) { 2329 goto ret; 2330 } 2331 sign = tmp.sign; 2332 tmp.sign = 1; 2333 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) { 2334 goto ret; 2335 } 2336 if ((sign == -1) && (!big_is_zero(&aq))) { 2337 (void) big_sub_pos(&aq, q, &aq); 2338 } 2339 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) { 2340 goto ret; 2341 } 2342 err = big_add_abs(result, &ap, &tmp); 2343 2344 ret: 2345 big_finish(&tmp); 2346 ret2: 2347 big_finish(&aq); 2348 ret1: 2349 big_finish(&ap); 2350 2351 return (err); 2352 } 2353 2354 2355 BIG_ERR_CODE 2356 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 2357 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 2358 BIGNUM *p_rr, BIGNUM *q_rr) 2359 { 2360 return (big_modexp_crt_ext(result, a, dmodpminus1, dmodqminus1, 2361 p, q, pinvmodq, p_rr, q_rr, NULL)); 2362 } 2363 2364 2365 static BIG_CHUNK_TYPE onearr[1] = {(BIG_CHUNK_TYPE)1}; 2366 BIGNUM big_One = {1, 1, 1, 0, onearr}; 2367 2368 static BIG_CHUNK_TYPE twoarr[1] = {(BIG_CHUNK_TYPE)2}; 2369 BIGNUM big_Two = {1, 1, 1, 0, twoarr}; 2370 2371 static BIG_CHUNK_TYPE fourarr[1] = {(BIG_CHUNK_TYPE)4}; 2372 static BIGNUM big_Four = {1, 1, 1, 0, fourarr}; 2373 2374 2375 BIG_ERR_CODE 2376 big_sqrt_pos(BIGNUM *result, BIGNUM *n) 2377 { 2378 BIGNUM *high, *low, *mid, *t; 2379 BIGNUM t1, t2, t3, prod; 2380 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2381 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2382 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2383 BIG_CHUNK_TYPE prodvalue[BIGTMPSIZE]; 2384 int i, nbits, diff, nrootbits, highbits; 2385 BIG_ERR_CODE err; 2386 2387 nbits = big_numbits(n); 2388 2389 if ((err = big_init1(&t1, n->len + 1, 2390 t1value, arraysize(t1value))) != BIG_OK) 2391 return (err); 2392 if ((err = big_init1(&t2, n->len + 1, 2393 t2value, arraysize(t2value))) != BIG_OK) 2394 goto ret1; 2395 if ((err = big_init1(&t3, n->len + 1, 2396 t3value, arraysize(t3value))) != BIG_OK) 2397 goto ret2; 2398 if ((err = big_init1(&prod, n->len + 1, 2399 prodvalue, arraysize(prodvalue))) != BIG_OK) 2400 goto ret3; 2401 2402 nrootbits = (nbits + 1) / 2; 2403 t1.len = t2.len = t3.len = (nrootbits - 1) / BIG_CHUNK_SIZE + 1; 2404 for (i = 0; i < t1.len; i++) { 2405 t1.value[i] = 0; 2406 t2.value[i] = BIG_CHUNK_ALLBITS; 2407 } 2408 highbits = nrootbits - BIG_CHUNK_SIZE * (t1.len - 1); 2409 if (highbits == BIG_CHUNK_SIZE) { 2410 t1.value[t1.len - 1] = BIG_CHUNK_HIGHBIT; 2411 t2.value[t2.len - 1] = BIG_CHUNK_ALLBITS; 2412 } else { 2413 t1.value[t1.len - 1] = (BIG_CHUNK_TYPE)1 << (highbits - 1); 2414 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1; 2415 } 2416 2417 high = &t2; 2418 low = &t1; 2419 mid = &t3; 2420 2421 if ((err = big_mul(&prod, high, high)) != BIG_OK) { 2422 goto ret; 2423 } 2424 diff = big_cmp_abs(&prod, n); 2425 if (diff <= 0) { 2426 err = big_copy(result, high); 2427 goto ret; 2428 } 2429 2430 (void) big_sub_pos(mid, high, low); 2431 while (big_cmp_abs(&big_One, mid) != 0) { 2432 (void) big_add_abs(mid, high, low); 2433 (void) big_half_pos(mid, mid); 2434 if ((err = big_mul(&prod, mid, mid)) != BIG_OK) 2435 goto ret; 2436 diff = big_cmp_abs(&prod, n); 2437 if (diff > 0) { 2438 t = high; 2439 high = mid; 2440 mid = t; 2441 } else if (diff < 0) { 2442 t = low; 2443 low = mid; 2444 mid = t; 2445 } else { 2446 err = big_copy(result, low); 2447 goto ret; 2448 } 2449 (void) big_sub_pos(mid, high, low); 2450 } 2451 2452 err = big_copy(result, low); 2453 ret: 2454 if (prod.malloced) big_finish(&prod); 2455 ret3: 2456 if (t3.malloced) big_finish(&t3); 2457 ret2: 2458 if (t2.malloced) big_finish(&t2); 2459 ret1: 2460 if (t1.malloced) big_finish(&t1); 2461 2462 return (err); 2463 } 2464 2465 2466 BIG_ERR_CODE 2467 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm) 2468 { 2469 BIGNUM *t, *tmp2, *m, *n; 2470 BIGNUM t1, t2, t3; 2471 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2472 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2473 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2474 int len, err; 2475 2476 if (big_is_zero(nn) || 2477 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) { 2478 *jac = 0; 2479 return (BIG_OK); 2480 } 2481 2482 if (nn->len > mm->len) { 2483 len = nn->len; 2484 } else { 2485 len = mm->len; 2486 } 2487 2488 if ((err = big_init1(&t1, len, 2489 t1value, arraysize(t1value))) != BIG_OK) { 2490 return (err); 2491 } 2492 if ((err = big_init1(&t2, len, 2493 t2value, arraysize(t2value))) != BIG_OK) { 2494 goto ret1; 2495 } 2496 if ((err = big_init1(&t3, len, 2497 t3value, arraysize(t3value))) != BIG_OK) { 2498 goto ret2; 2499 } 2500 2501 n = &t1; 2502 m = &t2; 2503 tmp2 = &t3; 2504 2505 (void) big_copy(n, nn); 2506 (void) big_copy(m, mm); 2507 2508 *jac = 1; 2509 while (big_cmp_abs(&big_One, m) != 0) { 2510 if (big_is_zero(n)) { 2511 *jac = 0; 2512 goto ret; 2513 } 2514 if ((m->value[0] & 1) == 0) { 2515 if (((n->value[0] & 7) == 3) || 2516 ((n->value[0] & 7) == 5)) 2517 *jac = -*jac; 2518 (void) big_half_pos(m, m); 2519 } else if ((n->value[0] & 1) == 0) { 2520 if (((m->value[0] & 7) == 3) || 2521 ((m->value[0] & 7) == 5)) 2522 *jac = -*jac; 2523 (void) big_half_pos(n, n); 2524 } else { 2525 if (((m->value[0] & 3) == 3) && 2526 ((n->value[0] & 3) == 3)) { 2527 *jac = -*jac; 2528 } 2529 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) { 2530 goto ret; 2531 } 2532 t = tmp2; 2533 tmp2 = m; 2534 m = n; 2535 n = t; 2536 } 2537 } 2538 err = BIG_OK; 2539 2540 ret: 2541 if (t3.malloced) big_finish(&t3); 2542 ret2: 2543 if (t2.malloced) big_finish(&t2); 2544 ret1: 2545 if (t1.malloced) big_finish(&t1); 2546 2547 return (err); 2548 } 2549 2550 2551 BIG_ERR_CODE 2552 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n) 2553 { 2554 int m, w, i; 2555 BIG_CHUNK_TYPE bit; 2556 BIGNUM ki, tmp, tmp2; 2557 BIG_CHUNK_TYPE kivalue[BIGTMPSIZE]; 2558 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2559 BIG_CHUNK_TYPE tmp2value[BIGTMPSIZE]; 2560 BIG_ERR_CODE err; 2561 2562 if (big_cmp_abs(k, &big_One) == 0) { 2563 (void) big_copy(Lk, p); 2564 (void) big_copy(Lkminus1, &big_Two); 2565 return (BIG_OK); 2566 } 2567 2568 if ((err = big_init1(&ki, k->len + 1, 2569 kivalue, arraysize(kivalue))) != BIG_OK) 2570 return (err); 2571 2572 if ((err = big_init1(&tmp, 2 * n->len +1, 2573 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2574 goto ret1; 2575 2576 if ((err = big_init1(&tmp2, n->len, 2577 tmp2value, arraysize(tmp2value))) != BIG_OK) 2578 goto ret2; 2579 2580 m = big_numbits(k); 2581 ki.len = (m - 1) / BIG_CHUNK_SIZE + 1; 2582 w = (m - 1) / BIG_CHUNK_SIZE; 2583 bit = (BIG_CHUNK_TYPE)1 << ((m - 1) % BIG_CHUNK_SIZE); 2584 for (i = 0; i < ki.len; i++) { 2585 ki.value[i] = 0; 2586 } 2587 ki.value[ki.len - 1] = bit; 2588 if (big_cmp_abs(k, &ki) != 0) { 2589 (void) big_double(&ki, &ki); 2590 } 2591 (void) big_sub_pos(&ki, &ki, k); 2592 2593 (void) big_copy(Lk, p); 2594 (void) big_copy(Lkminus1, &big_Two); 2595 2596 for (i = 0; i < m; i++) { 2597 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) { 2598 goto ret; 2599 } 2600 (void) big_add_abs(&tmp, &tmp, n); 2601 (void) big_sub_pos(&tmp, &tmp, p); 2602 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) { 2603 goto ret; 2604 } 2605 if ((ki.value[w] & bit) != 0) { 2606 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) != 2607 BIG_OK) { 2608 goto ret; 2609 } 2610 (void) big_add_abs(&tmp, &tmp, n); 2611 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2612 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) != 2613 BIG_OK) { 2614 goto ret; 2615 } 2616 (void) big_copy(Lk, &tmp2); 2617 } else { 2618 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) { 2619 goto ret; 2620 } 2621 (void) big_add_abs(&tmp, &tmp, n); 2622 (void) big_sub_pos(&tmp, &tmp, &big_Two); 2623 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) { 2624 goto ret; 2625 } 2626 (void) big_copy(Lkminus1, &tmp2); 2627 } 2628 bit = bit >> 1; 2629 if (bit == 0) { 2630 bit = BIG_CHUNK_HIGHBIT; 2631 w--; 2632 } 2633 } 2634 2635 err = BIG_OK; 2636 2637 ret: 2638 if (tmp2.malloced) big_finish(&tmp2); 2639 ret2: 2640 if (tmp.malloced) big_finish(&tmp); 2641 ret1: 2642 if (ki.malloced) big_finish(&ki); 2643 2644 return (err); 2645 } 2646 2647 2648 BIG_ERR_CODE 2649 big_isprime_pos_ext(BIGNUM *n, big_modexp_ncp_info_t *info) 2650 { 2651 BIGNUM o, nminus1, tmp, Lkminus1, Lk; 2652 BIG_CHUNK_TYPE ovalue[BIGTMPSIZE]; 2653 BIG_CHUNK_TYPE nminus1value[BIGTMPSIZE]; 2654 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2655 BIG_CHUNK_TYPE Lkminus1value[BIGTMPSIZE]; 2656 BIG_CHUNK_TYPE Lkvalue[BIGTMPSIZE]; 2657 BIG_ERR_CODE err; 2658 int e, i, jac; 2659 2660 if (big_cmp_abs(n, &big_One) == 0) { 2661 return (BIG_FALSE); 2662 } 2663 if (big_cmp_abs(n, &big_Two) == 0) { 2664 return (BIG_TRUE); 2665 } 2666 if ((n->value[0] & 1) == 0) { 2667 return (BIG_FALSE); 2668 } 2669 2670 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != 2671 BIG_OK) { 2672 return (err); 2673 } 2674 2675 if ((err = big_init1(&nminus1, n->len, 2676 nminus1value, arraysize(nminus1value))) != BIG_OK) { 2677 goto ret1; 2678 } 2679 2680 if ((err = big_init1(&tmp, 2 * n->len, 2681 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2682 goto ret2; 2683 } 2684 2685 if ((err = big_init1(&Lkminus1, n->len, 2686 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) { 2687 goto ret3; 2688 } 2689 2690 if ((err = big_init1(&Lk, n->len, 2691 Lkvalue, arraysize(Lkvalue))) != BIG_OK) { 2692 goto ret4; 2693 } 2694 2695 (void) big_sub_pos(&o, n, &big_One); /* cannot fail */ 2696 (void) big_copy(&nminus1, &o); /* cannot fail */ 2697 e = 0; 2698 while ((o.value[0] & 1) == 0) { 2699 e++; 2700 (void) big_half_pos(&o, &o); /* cannot fail */ 2701 } 2702 if ((err = big_modexp_ext(&tmp, &big_Two, &o, n, NULL, info)) != 2703 BIG_OK) { 2704 goto ret; 2705 } 2706 2707 i = 0; 2708 while ((i < e) && 2709 (big_cmp_abs(&tmp, &big_One) != 0) && 2710 (big_cmp_abs(&tmp, &nminus1) != 0)) { 2711 if ((err = 2712 big_modexp_ext(&tmp, &tmp, &big_Two, n, NULL, info)) != 2713 BIG_OK) 2714 goto ret; 2715 i++; 2716 } 2717 2718 if (!((big_cmp_abs(&tmp, &nminus1) == 0) || 2719 ((i == 0) && (big_cmp_abs(&tmp, &big_One) == 0)))) { 2720 err = BIG_FALSE; 2721 goto ret; 2722 } 2723 2724 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) { 2725 goto ret; 2726 } 2727 2728 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) { 2729 goto ret; 2730 } 2731 if (big_cmp_abs(&tmp, n) == 0) { 2732 err = BIG_FALSE; 2733 goto ret; 2734 } 2735 2736 (void) big_copy(&o, &big_Two); 2737 do { 2738 (void) big_add_abs(&o, &o, &big_One); 2739 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) { 2740 goto ret; 2741 } 2742 (void) big_sub_pos(&tmp, &tmp, &big_Four); 2743 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) { 2744 goto ret; 2745 } 2746 } while (jac != -1); 2747 2748 (void) big_add_abs(&tmp, n, &big_One); 2749 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) { 2750 goto ret; 2751 } 2752 2753 if ((big_cmp_abs(&Lkminus1, &o) == 0) && 2754 (big_cmp_abs(&Lk, &big_Two) == 0)) { 2755 err = BIG_TRUE; 2756 } else { 2757 err = BIG_FALSE; 2758 } 2759 2760 ret: 2761 if (Lk.malloced) big_finish(&Lk); 2762 ret4: 2763 if (Lkminus1.malloced) big_finish(&Lkminus1); 2764 ret3: 2765 if (tmp.malloced) big_finish(&tmp); 2766 ret2: 2767 if (nminus1.malloced) big_finish(&nminus1); 2768 ret1: 2769 if (o.malloced) big_finish(&o); 2770 2771 return (err); 2772 } 2773 2774 2775 BIG_ERR_CODE 2776 big_isprime_pos(BIGNUM *n) 2777 { 2778 return (big_isprime_pos_ext(n, NULL)); 2779 } 2780 2781 2782 #define SIEVESIZE 1000 2783 2784 uint32_t smallprimes[] = 2785 { 2786 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 2787 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 2788 }; 2789 2790 2791 BIG_ERR_CODE 2792 big_nextprime_pos_ext(BIGNUM *result, BIGNUM *n, big_modexp_ncp_info_t *info) 2793 { 2794 BIG_ERR_CODE err; 2795 int sieve[SIEVESIZE]; 2796 int i; 2797 uint32_t off, p; 2798 2799 if ((err = big_copy(result, n)) != BIG_OK) { 2800 return (err); 2801 } 2802 result->value[0] |= 1; 2803 /* CONSTCOND */ 2804 while (1) { 2805 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0; 2806 for (i = 0; 2807 i < sizeof (smallprimes) / sizeof (smallprimes[0]); i++) { 2808 p = smallprimes[i]; 2809 off = big_modhalf_pos(result, p); 2810 off = p - off; 2811 if ((off % 2) == 1) { 2812 off = off + p; 2813 } 2814 off = off / 2; 2815 while (off < SIEVESIZE) { 2816 sieve[off] = 1; 2817 off = off + p; 2818 } 2819 } 2820 2821 for (i = 0; i < SIEVESIZE; i++) { 2822 if (sieve[i] == 0) { 2823 err = big_isprime_pos_ext(result, info); 2824 if (err != BIG_FALSE) { 2825 if (err != BIG_TRUE) { 2826 return (err); 2827 } else { 2828 goto out; 2829 } 2830 } 2831 2832 } 2833 if ((err = big_add_abs(result, result, &big_Two)) != 2834 BIG_OK) { 2835 return (err); 2836 } 2837 } 2838 } 2839 2840 out: 2841 return (BIG_OK); 2842 } 2843 2844 2845 BIG_ERR_CODE 2846 big_nextprime_pos(BIGNUM *result, BIGNUM *n) 2847 { 2848 return (big_nextprime_pos_ext(result, n, NULL)); 2849 } 2850 2851 2852 BIG_ERR_CODE 2853 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n) 2854 { 2855 BIG_ERR_CODE err; 2856 2857 2858 if ((err = big_copy(result, n)) != BIG_OK) 2859 return (err); 2860 result->value[0] |= 1; 2861 while ((err = big_isprime_pos_ext(result, NULL)) != BIG_TRUE) { 2862 if (err != BIG_FALSE) 2863 return (err); 2864 if ((err = big_add_abs(result, result, &big_Two)) != BIG_OK) 2865 return (err); 2866 } 2867 return (BIG_OK); 2868 } 2869 2870 2871 /* 2872 * given m and e, computes the rest in the equation 2873 * gcd(m, e) = cm * m + ce * e 2874 */ 2875 BIG_ERR_CODE 2876 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e) 2877 { 2878 BIGNUM *xi, *ri, *riminus1, *riminus2, *t; 2879 BIGNUM *vmi, *vei, *vmiminus1, *veiminus1; 2880 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp; 2881 BIG_CHUNK_TYPE t1value[BIGTMPSIZE]; 2882 BIG_CHUNK_TYPE t2value[BIGTMPSIZE]; 2883 BIG_CHUNK_TYPE t3value[BIGTMPSIZE]; 2884 BIG_CHUNK_TYPE t4value[BIGTMPSIZE]; 2885 BIG_CHUNK_TYPE t5value[BIGTMPSIZE]; 2886 BIG_CHUNK_TYPE t6value[BIGTMPSIZE]; 2887 BIG_CHUNK_TYPE t7value[BIGTMPSIZE]; 2888 BIG_CHUNK_TYPE t8value[BIGTMPSIZE]; 2889 BIG_CHUNK_TYPE tmpvalue[BIGTMPSIZE]; 2890 BIG_ERR_CODE err; 2891 int len; 2892 2893 if (big_cmp_abs(m, e) >= 0) { 2894 len = m->len; 2895 } else { 2896 len = e->len; 2897 } 2898 2899 if ((err = big_init1(&t1, len, 2900 t1value, arraysize(t1value))) != BIG_OK) { 2901 return (err); 2902 } 2903 if ((err = big_init1(&t2, len, 2904 t2value, arraysize(t2value))) != BIG_OK) { 2905 goto ret1; 2906 } 2907 if ((err = big_init1(&t3, len, 2908 t3value, arraysize(t3value))) != BIG_OK) { 2909 goto ret2; 2910 } 2911 if ((err = big_init1(&t4, len, 2912 t4value, arraysize(t3value))) != BIG_OK) { 2913 goto ret3; 2914 } 2915 if ((err = big_init1(&t5, len, 2916 t5value, arraysize(t5value))) != BIG_OK) { 2917 goto ret4; 2918 } 2919 if ((err = big_init1(&t6, len, 2920 t6value, arraysize(t6value))) != BIG_OK) { 2921 goto ret5; 2922 } 2923 if ((err = big_init1(&t7, len, 2924 t7value, arraysize(t7value))) != BIG_OK) { 2925 goto ret6; 2926 } 2927 if ((err = big_init1(&t8, len, 2928 t8value, arraysize(t8value))) != BIG_OK) { 2929 goto ret7; 2930 } 2931 2932 if ((err = big_init1(&tmp, 2 * len, 2933 tmpvalue, arraysize(tmpvalue))) != BIG_OK) { 2934 goto ret8; 2935 } 2936 2937 ri = &t1; 2938 ri->value[0] = 1; 2939 ri->len = 1; 2940 xi = &t2; 2941 riminus1 = &t3; 2942 riminus2 = &t4; 2943 vmi = &t5; 2944 vei = &t6; 2945 vmiminus1 = &t7; 2946 veiminus1 = &t8; 2947 2948 (void) big_copy(vmiminus1, &big_One); 2949 (void) big_copy(vmi, &big_One); 2950 (void) big_copy(veiminus1, &big_One); 2951 (void) big_copy(xi, &big_One); 2952 vei->len = 1; 2953 vei->value[0] = 0; 2954 2955 (void) big_copy(riminus1, m); 2956 (void) big_copy(ri, e); 2957 2958 while (!big_is_zero(ri)) { 2959 t = riminus2; 2960 riminus2 = riminus1; 2961 riminus1 = ri; 2962 ri = t; 2963 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) { 2964 goto ret; 2965 } 2966 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) { 2967 goto ret; 2968 } 2969 t = vmiminus1; 2970 vmiminus1 = vmi; 2971 vmi = t; 2972 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) { 2973 goto ret; 2974 } 2975 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) { 2976 goto ret; 2977 } 2978 t = veiminus1; 2979 veiminus1 = vei; 2980 vei = t; 2981 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != 2982 BIG_OK) { 2983 goto ret; 2984 } 2985 } 2986 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) { 2987 goto ret; 2988 } 2989 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) { 2990 goto ret; 2991 } 2992 if (ce != NULL) { 2993 err = big_copy(ce, vei); 2994 } 2995 ret: 2996 if (tmp.malloced) big_finish(&tmp); 2997 ret8: 2998 if (t8.malloced) big_finish(&t8); 2999 ret7: 3000 if (t7.malloced) big_finish(&t7); 3001 ret6: 3002 if (t6.malloced) big_finish(&t6); 3003 ret5: 3004 if (t5.malloced) big_finish(&t5); 3005 ret4: 3006 if (t4.malloced) big_finish(&t4); 3007 ret3: 3008 if (t3.malloced) big_finish(&t3); 3009 ret2: 3010 if (t2.malloced) big_finish(&t2); 3011 ret1: 3012 if (t1.malloced) big_finish(&t1); 3013 3014 return (err); 3015 } 3016