1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #define big_div_pos_fast big_div_pos 30 31 #include "bignum.h" 32 33 /* 34 * Configuration guide 35 * ------------------- 36 * 37 * There are 4 preprocessor symbols used to configure the bignum 38 * implementation. This file contains no logic to configure based on 39 * processor; we leave that to the Makefiles to specify. 40 * 41 * USE_FLOATING_POINT 42 * Meaning: There is support for a fast floating-point implementation of 43 * Montgomery multiply. 44 * 45 * PSR_MUL 46 * Meaning: There are processor-specific versions of the low level 47 * functions to implement big_mul. Those functions are: big_mul_set_vec, 48 * big_mul_add_vec, big_mul_vec, and big_sqr_vec. PSR_MUL implies support 49 * for all 4 functions. You cannot pick and choose which subset of these 50 * functions to support; that would lead to a rat's nest of #ifdefs. 51 * 52 * HWCAP 53 * Meaning: Call multiply support functions through a function pointer. 54 * On x86, there are multiple implementations for differnt hardware 55 * capabilities, such as MMX, SSE2, etc. Tests are made at run-time, when 56 * a function is first used. So, the support functions are called through 57 * a function pointer. There is no need for that on Sparc, because there 58 * is only one implementation; support functions are called directly. 59 * Later, if there were some new VIS instruction, or something, and a 60 * run-time test were needed, rather than variant kernel modules and 61 * libraries, then HWCAP would be defined for Sparc, as well. 62 * 63 * UMUL64 64 * Meaning: It is safe to use generic C code that assumes the existence 65 * of a 32 x 32 --> 64 bit unsigned multiply. If this is not defined, 66 * then the generic code for big_mul_add_vec() must necessarily be very slow, 67 * because it must fall back to using 16 x 16 --> 32 bit multiplication. 68 * 69 */ 70 71 72 #ifdef _KERNEL 73 74 #include <sys/types.h> 75 #include <sys/kmem.h> 76 #include <sys/param.h> 77 #include <sys/sunddi.h> 78 79 #define big_malloc(size) kmem_alloc(size, KM_NOSLEEP) 80 #define big_free(ptr, size) kmem_free(ptr, size) 81 82 void * 83 big_realloc(void *from, size_t oldsize, size_t newsize) 84 { 85 void *rv; 86 87 rv = kmem_alloc(newsize, KM_NOSLEEP); 88 if (rv != NULL) 89 bcopy(from, rv, oldsize); 90 kmem_free(from, oldsize); 91 return (rv); 92 } 93 94 #else /* _KERNEL */ 95 96 #include <stdlib.h> 97 #include <stdio.h> 98 99 #ifndef MALLOC_DEBUG 100 101 #define big_malloc(size) malloc(size) 102 #define big_free(ptr, size) free(ptr) 103 104 #else 105 106 void 107 big_free(void *ptr, size_t size) 108 { 109 printf("freed %d bytes at %p\n", size, ptr); 110 free(ptr); 111 } 112 113 void * 114 big_malloc(size_t size) 115 { 116 void *rv; 117 rv = malloc(size); 118 printf("malloced %d bytes, addr:%p\n", size, rv); 119 return (rv); 120 } 121 #endif /* MALLOC_DEBUG */ 122 123 #define big_realloc(x, y, z) realloc((x), (z)) 124 125 void 126 printbignum(char *aname, BIGNUM *a) 127 { 128 int i; 129 130 (void) printf("\n%s\n%d\n", aname, a->sign*a->len); 131 for (i = a->len - 1; i >= 0; i--) { 132 (void) printf("%08x ", a->value[i]); 133 if ((i % 8 == 0) && (i != 0)) 134 (void) printf("\n"); 135 } 136 (void) printf("\n"); 137 } 138 139 #endif /* _KERNEL */ 140 141 142 /* size in 32-bit words */ 143 BIG_ERR_CODE 144 big_init(BIGNUM *number, int size) 145 { 146 number->value = big_malloc(sizeof (uint32_t) * size); 147 if (number->value == NULL) { 148 return (BIG_NO_MEM); 149 } 150 number->size = size; 151 number->len = 0; 152 number->sign = 1; 153 number->malloced = 1; 154 return (BIG_OK); 155 } 156 157 /* size in 32-bit words */ 158 BIG_ERR_CODE 159 big_init1(BIGNUM *number, int size, uint32_t *buf, int bufsize) 160 { 161 if ((buf == NULL) || (size > bufsize)) { 162 number->value = big_malloc(sizeof (uint32_t) * size); 163 if (number->value == NULL) { 164 return (BIG_NO_MEM); 165 } 166 number->size = size; 167 number->malloced = 1; 168 } else { 169 number->value = buf; 170 number->size = bufsize; 171 number->malloced = 0; 172 } 173 number->len = 0; 174 number->sign = 1; 175 176 return (BIG_OK); 177 } 178 179 void 180 big_finish(BIGNUM *number) 181 { 182 if (number->malloced == 1) { 183 big_free(number->value, sizeof (uint32_t) * number->size); 184 number->malloced = 0; 185 } 186 } 187 188 /* 189 * bn->size should be at least (len + 3) / 4 190 * converts from byte-big-endian format to bignum format (words in 191 * little endian order, but bytes within the words big endian) 192 */ 193 void 194 bytestring2bignum(BIGNUM *bn, uchar_t *kn, size_t len) 195 { 196 int i, j, offs; 197 uint32_t word; 198 uchar_t *knwordp; 199 200 #ifdef _LP64 201 /* LINTED */ 202 offs = (uint32_t)len % sizeof (uint32_t); 203 /* LINTED */ 204 bn->len = (uint32_t)len / sizeof (uint32_t); 205 /* LINTED */ 206 for (i = 0; i < (uint32_t)len / sizeof (uint32_t); i++) { 207 #else /* !_LP64 */ 208 offs = len % sizeof (uint32_t); 209 bn->len = len / sizeof (uint32_t); 210 for (i = 0; i < len / sizeof (uint32_t); i++) { 211 #endif /* _LP64 */ 212 knwordp = &(kn[len - sizeof (uint32_t) * (i + 1)]); 213 word = knwordp[0]; 214 for (j = 1; j < sizeof (uint32_t); j++) { 215 word = (word << 8)+ knwordp[j]; 216 } 217 bn->value[i] = word; 218 } 219 if (offs > 0) { 220 word = kn[0]; 221 for (i = 1; i < offs; i++) word = (word << 8) + kn[i]; 222 bn->value[bn->len++] = word; 223 } 224 while ((bn->len > 1) && (bn->value[bn->len-1] == 0)) { 225 bn->len --; 226 } 227 } 228 229 /* 230 * copies the least significant len bytes if 231 * len < bn->len * sizeof (uint32_t) 232 * converts from bignum format to byte-big-endian format. 233 * bignum format is words in little endian order, 234 * but bytes within words in native byte order. 235 */ 236 void 237 bignum2bytestring(uchar_t *kn, BIGNUM *bn, size_t len) 238 { 239 int i, j, offs; 240 uint32_t word; 241 242 if (len < sizeof (uint32_t) * bn->len) { 243 #ifdef _LP64 244 /* LINTED */ 245 for (i = 0; i < (uint32_t)len / sizeof (uint32_t); i++) { 246 #else /* !_LP64 */ 247 for (i = 0; i < len / sizeof (uint32_t); i++) { 248 #endif /* _LP64 */ 249 word = bn->value[i]; 250 for (j = 0; j < sizeof (uint32_t); j++) { 251 kn[len - sizeof (uint32_t) * i - j - 1] = 252 word & 0xff; 253 word = word >> 8; 254 } 255 } 256 #ifdef _LP64 257 /* LINTED */ 258 offs = (uint32_t)len % sizeof (uint32_t); 259 #else /* !_LP64 */ 260 offs = len % sizeof (uint32_t); 261 #endif /* _LP64 */ 262 if (offs > 0) { 263 word = bn->value[len / sizeof (uint32_t)]; 264 #ifdef _LP64 265 /* LINTED */ 266 for (i = (uint32_t)len % sizeof (uint32_t); 267 i > 0; i --) { 268 #else /* !_LP64 */ 269 for (i = len % sizeof (uint32_t); i > 0; i --) { 270 #endif /* _LP64 */ 271 kn[i - 1] = word & 0xff; 272 word = word >> 8; 273 } 274 } 275 } else { 276 for (i = 0; i < bn->len; i++) { 277 word = bn->value[i]; 278 for (j = 0; j < sizeof (uint32_t); j++) { 279 kn[len - sizeof (uint32_t) * i - j - 1] = 280 word & 0xff; 281 word = word >> 8; 282 } 283 } 284 #ifdef _LP64 285 /* LINTED */ 286 for (i = 0; i < (uint32_t)len - sizeof (uint32_t) * bn->len; 287 i++) { 288 #else /* !_LP64 */ 289 for (i = 0; i < len - sizeof (uint32_t) * bn->len; i++) { 290 #endif /* _LP64 */ 291 kn[i] = 0; 292 } 293 } 294 } 295 296 297 int 298 big_bitlength(BIGNUM *a) 299 { 300 int l, b; 301 uint32_t c; 302 303 l = a->len - 1; 304 while ((l > 0) && (a->value[l] == 0)) { 305 l--; 306 } 307 b = sizeof (uint32_t) * 8; 308 c = a->value[l]; 309 while ((b > 1) && ((c & 0x80000000) == 0)) { 310 c = c << 1; 311 b--; 312 } 313 return (l * (int)sizeof (uint32_t) * 8 + b); 314 } 315 316 317 BIG_ERR_CODE 318 big_copy(BIGNUM *dest, BIGNUM *src) 319 { 320 uint32_t *newptr; 321 int i, len; 322 323 len = src->len; 324 while ((len > 1) && (src->value[len - 1] == 0)) 325 len--; 326 src->len = len; 327 if (dest->size < len) { 328 if (dest->malloced == 1) { 329 newptr = (uint32_t *)big_realloc(dest->value, 330 sizeof (uint32_t) * dest->size, 331 sizeof (uint32_t) * len); 332 } else { 333 newptr = (uint32_t *) 334 big_malloc(sizeof (uint32_t) * len); 335 if (newptr != NULL) dest->malloced = 1; 336 } 337 if (newptr == NULL) 338 return (BIG_NO_MEM); 339 dest->value = newptr; 340 dest->size = len; 341 } 342 dest->len = len; 343 dest->sign = src->sign; 344 for (i = 0; i < len; i++) dest->value[i] = src->value[i]; 345 346 return (BIG_OK); 347 } 348 349 350 BIG_ERR_CODE 351 big_extend(BIGNUM *number, int size) 352 { 353 uint32_t *newptr; 354 int i; 355 356 if (number->size >= size) 357 return (BIG_OK); 358 if (number->malloced) { 359 number->value = 360 big_realloc(number->value, 361 sizeof (uint32_t) * number->size, 362 sizeof (uint32_t) * size); 363 } else { 364 newptr = big_malloc(sizeof (uint32_t) * size); 365 if (newptr != NULL) { 366 for (i = 0; i < number->size; i++) { 367 newptr[i] = number->value[i]; 368 } 369 } 370 number->value = newptr; 371 } 372 373 if (number->value == NULL) 374 return (BIG_NO_MEM); 375 376 number->size = size; 377 number->malloced = 1; 378 return (BIG_OK); 379 } 380 381 382 int 383 big_is_zero(BIGNUM *n) 384 { 385 int i, result; 386 387 result = 1; 388 for (i = 0; i < n->len; i++) 389 if (n->value[i] != 0) result = 0; 390 return (result); 391 } 392 393 394 BIG_ERR_CODE 395 big_add_abs(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 396 { 397 int i, shorter, longer; 398 uint32_t cy, ai; 399 uint32_t *r, *a, *b, *c; 400 BIG_ERR_CODE err; 401 402 if (aa->len > bb->len) { 403 shorter = bb->len; 404 longer = aa->len; 405 c = aa->value; 406 } else { 407 shorter = aa->len; 408 longer = bb->len; 409 c = bb->value; 410 } 411 if (result->size < longer + 1) { 412 err = big_extend(result, longer + 1); 413 if (err != BIG_OK) 414 return (err); 415 } 416 417 r = result->value; 418 a = aa->value; 419 b = bb->value; 420 cy = 0; 421 for (i = 0; i < shorter; i++) { 422 ai = a[i]; 423 r[i] = ai + b[i] + cy; 424 if (r[i] > ai) cy = 0; 425 else if (r[i] < ai) cy = 1; 426 } 427 for (; i < longer; i++) { 428 ai = c[i]; 429 r[i] = ai + cy; 430 if (r[i] >= ai) cy = 0; 431 } 432 if (cy == 1) { 433 r[i] = cy; 434 result->len = longer + 1; 435 } else { 436 result->len = longer; 437 } 438 result->sign = 1; 439 return (BIG_OK); 440 } 441 442 443 /* caller must make sure that result has at least len words allocated */ 444 void 445 big_sub_vec(uint32_t *r, uint32_t *a, uint32_t *b, int len) 446 { 447 int i; 448 uint32_t cy, ai; 449 450 cy = 1; 451 for (i = 0; i < len; i++) { 452 ai = a[i]; 453 r[i] = ai + (~b[i]) + cy; 454 if (r[i] > ai) cy = 0; 455 else if (r[i] < ai) cy = 1; 456 } 457 } 458 459 460 /* result=aa-bb it is assumed that aa>=bb */ 461 BIG_ERR_CODE 462 big_sub_pos(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 463 { 464 int i, shorter; 465 uint32_t cy, ai; 466 uint32_t *r, *a, *b; 467 BIG_ERR_CODE err; 468 469 if (aa->len > bb->len) shorter = bb->len; 470 else shorter = aa->len; 471 if (result->size < aa->len) { 472 err = big_extend(result, aa->len); 473 if (err != BIG_OK) 474 return (err); 475 } 476 477 r = result->value; 478 a = aa->value; 479 b = bb->value; 480 result->len = aa->len; 481 cy = 1; 482 for (i = 0; i < shorter; i++) { 483 ai = a[i]; 484 r[i] = ai + (~b[i]) + cy; 485 if (r[i] > ai) cy = 0; 486 else if (r[i] < ai) cy = 1; 487 } 488 for (; i < aa->len; i++) { 489 ai = a[i]; 490 r[i] = ai + (~0) + cy; 491 if (r[i] < ai) cy = 1; 492 } 493 result->sign = 1; 494 if (cy == 0) 495 return (BIG_INVALID_ARGS); 496 else 497 return (BIG_OK); 498 } 499 500 501 /* returns -1 if |aa|<|bb|, 0 if |aa|==|bb| 1 if |aa|>|bb| */ 502 int 503 big_cmp_abs(BIGNUM *aa, BIGNUM *bb) 504 { 505 int i; 506 507 if (aa->len > bb->len) { 508 for (i = aa->len - 1; i > bb->len - 1; i--) { 509 if (aa->value[i] > 0) 510 return (1); 511 } 512 } else if (aa->len < bb->len) { 513 for (i = bb->len - 1; i > aa->len - 1; i--) { 514 if (bb->value[i] > 0) 515 return (-1); 516 } 517 } else i = aa->len-1; 518 for (; i >= 0; i--) { 519 if (aa->value[i] > bb->value[i]) 520 return (1); 521 else if (aa->value[i] < bb->value[i]) 522 return (-1); 523 } 524 525 return (0); 526 } 527 528 529 BIG_ERR_CODE 530 big_sub(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 531 { 532 BIG_ERR_CODE err; 533 534 if ((bb->sign == -1) && (aa->sign == 1)) { 535 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) 536 return (err); 537 result->sign = 1; 538 } else if ((aa->sign == -1) && (bb->sign == 1)) { 539 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) 540 return (err); 541 result->sign = -1; 542 } else if ((aa->sign == 1) && (bb->sign == 1)) { 543 if (big_cmp_abs(aa, bb) >= 0) { 544 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) 545 return (err); 546 result->sign = 1; 547 } else { 548 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) 549 return (err); 550 result->sign = -1; 551 } 552 } else { 553 if (big_cmp_abs(aa, bb) >= 0) { 554 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) 555 return (err); 556 result->sign = -1; 557 } else { 558 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) 559 return (err); 560 result->sign = 1; 561 } 562 } 563 return (BIG_OK); 564 } 565 566 567 568 BIG_ERR_CODE 569 big_add(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 570 { 571 BIG_ERR_CODE err; 572 573 if ((bb->sign == -1) && (aa->sign == -1)) { 574 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) 575 return (err); 576 result->sign = -1; 577 } else if ((aa->sign == 1) && (bb->sign == 1)) { 578 if ((err = big_add_abs(result, aa, bb)) != BIG_OK) 579 return (err); 580 result->sign = 1; 581 } else if ((aa->sign == 1) && (bb->sign == -1)) { 582 if (big_cmp_abs(aa, bb) >= 0) { 583 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) 584 return (err); 585 result->sign = 1; 586 } else { 587 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) 588 return (err); 589 result->sign = -1; 590 } 591 } else { 592 if (big_cmp_abs(aa, bb) >= 0) { 593 if ((err = big_sub_pos(result, aa, bb)) != BIG_OK) 594 return (err); 595 result->sign = -1; 596 } else { 597 if ((err = big_sub_pos(result, bb, aa)) != BIG_OK) 598 return (err); 599 result->sign = 1; 600 } 601 } 602 return (BIG_OK); 603 } 604 605 606 /* result = aa/2 aa must be positive */ 607 BIG_ERR_CODE 608 big_half_pos(BIGNUM *result, BIGNUM *aa) 609 { 610 BIG_ERR_CODE err; 611 int i; 612 uint32_t cy, cy1; 613 uint32_t *a, *r; 614 615 if (result->size < aa->len) { 616 err = big_extend(result, aa->len); 617 if (err != BIG_OK) 618 return (err); 619 } 620 621 result->len = aa->len; 622 a = aa->value; 623 r = result->value; 624 cy = 0; 625 for (i = aa->len-1; i >= 0; i--) { 626 cy1 = a[i] << 31; 627 r[i] = (cy|(a[i] >> 1)); 628 cy = cy1; 629 } 630 if (r[result->len-1] == 0) result->len--; 631 return (BIG_OK); 632 } 633 634 /* result = aa*2 aa must be positive */ 635 BIG_ERR_CODE 636 big_double(BIGNUM *result, BIGNUM *aa) 637 { 638 BIG_ERR_CODE err; 639 int i, rsize; 640 uint32_t cy, cy1; 641 uint32_t *a, *r; 642 643 if ((aa->len > 0) && ((aa->value[aa->len - 1] & 0x80000000) != 0)) 644 rsize = aa->len + 1; 645 else rsize = aa->len; 646 647 if (result->size < rsize) { 648 err = big_extend(result, rsize); 649 if (err != BIG_OK) 650 return (err); 651 } 652 653 a = aa->value; 654 r = result->value; 655 if (rsize == aa->len + 1) r[rsize - 1] = 1; 656 cy = 0; 657 for (i = 0; i < aa->len; i++) { 658 cy1 = a[i] >> 31; 659 r[i] = (cy | (a[i] << 1)); 660 cy = cy1; 661 } 662 result->len = rsize; 663 return (BIG_OK); 664 } 665 666 /* returns aa mod b, aa must be nonneg, b must be a max 16-bit integer */ 667 uint32_t 668 big_mod16_pos(BIGNUM *aa, uint32_t b) 669 { 670 int i; 671 uint32_t rem; 672 673 if (aa->len == 0) 674 return (0); 675 rem = aa->value[aa->len - 1] % b; 676 for (i = aa->len - 2; i >= 0; i--) { 677 rem = ((rem << 16) | (aa->value[i] >> 16)) % b; 678 rem = ((rem << 16) | (aa->value[i] & 0xffff)) % b; 679 } 680 return (rem); 681 } 682 683 684 /* 685 * result = aa - (2^32)^lendiff * bb 686 * result->size should be at least aa->len at entry 687 * aa, bb, and result should be positive 688 */ 689 void 690 big_sub_pos_high(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 691 { 692 int i, lendiff; 693 BIGNUM res1, aa1; 694 695 lendiff = aa->len - bb->len; 696 res1.size = result->size - lendiff; 697 res1.malloced = 0; 698 res1.value = result->value + lendiff; 699 aa1.size = aa->size - lendiff; 700 aa1.value = aa->value + lendiff; 701 aa1.len = bb->len; 702 aa1.sign = 1; 703 (void) big_sub_pos(&res1, &aa1, bb); 704 if (result->value != aa->value) { 705 for (i = 0; i < lendiff; i++) { 706 result->value[i] = aa->value[i]; 707 } 708 } 709 result->len = aa->len; 710 } 711 712 713 /* 714 * returns 1, 0, or -1 depending on whether |aa| > , ==, or < 715 * (2^32)^lendiff * |bb| 716 * aa->len should be >= bb->len 717 */ 718 int 719 big_cmp_abs_high(BIGNUM *aa, BIGNUM *bb) 720 { 721 int lendiff; 722 BIGNUM aa1; 723 724 lendiff = aa->len - bb->len; 725 aa1.len = bb->len; 726 aa1.size = aa->size - lendiff; 727 aa1.malloced = 0; 728 aa1.value = aa->value + lendiff; 729 return (big_cmp_abs(&aa1, bb)); 730 } 731 732 733 /* 734 * result = aa * b where b is a max. 16-bit positive integer. 735 * result should have enough space allocated. 736 */ 737 void 738 big_mul16_low(BIGNUM *result, BIGNUM *aa, uint32_t b) 739 { 740 int i; 741 uint32_t t1, t2, ai, cy; 742 uint32_t *a, *r; 743 744 a = aa->value; 745 r = result->value; 746 cy = 0; 747 for (i = 0; i < aa->len; i++) { 748 ai = a[i]; 749 t1 = (ai & 0xffff) * b + cy; 750 t2 = (ai >> 16) * b + (t1 >> 16); 751 r[i] = (t1 & 0xffff) | (t2 << 16); 752 cy = t2 >> 16; 753 } 754 r[i] = cy; 755 result->len = aa->len + 1; 756 result->sign = aa->sign; 757 } 758 759 760 /* 761 * result = aa * b * 2^16 where b is a max. 16-bit positive integer. 762 * result should have enough space allocated. 763 */ 764 void 765 big_mul16_high(BIGNUM *result, BIGNUM *aa, uint32_t b) 766 { 767 int i; 768 uint32_t t1, t2, ai, cy, ri; 769 uint32_t *a, *r; 770 771 a = aa->value; 772 r = result->value; 773 cy = 0; 774 ri = 0; 775 for (i = 0; i < aa->len; i++) { 776 ai = a[i]; 777 t1 = (ai & 0xffff) * b + cy; 778 t2 = (ai >> 16) * b + (t1 >> 16); 779 r[i] = (t1 << 16) + ri; 780 ri = t2 & 0xffff; 781 cy = t2 >> 16; 782 } 783 r[i] = (cy << 16) + ri; 784 result->len = aa->len + 1; 785 result->sign = aa->sign; 786 } 787 788 /* it is assumed that result->size is big enough */ 789 void 790 big_shiftleft(BIGNUM *result, BIGNUM *aa, int offs) 791 { 792 int i; 793 uint32_t cy, ai; 794 795 if (offs == 0) { 796 if (result != aa) { 797 (void) big_copy(result, aa); 798 } 799 return; 800 } 801 cy = 0; 802 for (i = 0; i < aa->len; i++) { 803 ai = aa->value[i]; 804 result->value[i] = (ai << offs) | cy; 805 cy = ai >> (32 - offs); 806 } 807 if (cy != 0) { 808 result->len = aa->len + 1; 809 result->value[result->len - 1] = cy; 810 } else { 811 result->len = aa->len; 812 } 813 result->sign = aa->sign; 814 } 815 816 /* it is assumed that result->size is big enough */ 817 void 818 big_shiftright(BIGNUM *result, BIGNUM *aa, int offs) 819 { 820 int i; 821 uint32_t cy, ai; 822 823 if (offs == 0) { 824 if (result != aa) { 825 (void) big_copy(result, aa); 826 } 827 return; 828 } 829 cy = aa->value[0] >> offs; 830 for (i = 1; i < aa->len; i++) { 831 ai = aa->value[i]; 832 result->value[i-1] = (ai << (32 - offs)) | cy; 833 cy = ai >> offs; 834 } 835 result->len = aa->len; 836 result->value[result->len - 1] = cy; 837 result->sign = aa->sign; 838 } 839 840 841 /* 842 * result = aa/bb remainder = aa mod bb 843 * it is assumed that aa and bb are positive 844 */ 845 BIG_ERR_CODE 846 big_div_pos_fast(BIGNUM *result, BIGNUM *remainder, BIGNUM *aa, BIGNUM *bb) 847 { 848 BIG_ERR_CODE err; 849 int i, alen, blen, tlen, rlen, offs; 850 uint32_t higha, highb, coeff; 851 uint64_t highb64; 852 uint32_t *a, *b; 853 BIGNUM bbhigh, bblow, tresult, tmp1, tmp2; 854 uint32_t tmp1value[BIGTMPSIZE]; 855 uint32_t tmp2value[BIGTMPSIZE]; 856 uint32_t tresultvalue[BIGTMPSIZE]; 857 uint32_t bblowvalue[BIGTMPSIZE]; 858 uint32_t bbhighvalue[BIGTMPSIZE]; 859 860 a = aa->value; 861 b = bb->value; 862 alen = aa->len; 863 blen = bb->len; 864 while ((alen > 1) && (a[alen - 1] == 0)) alen = alen - 1; 865 aa->len = alen; 866 while ((blen > 1) && (b[blen - 1] == 0)) blen = blen - 1; 867 bb->len = blen; 868 if ((blen == 1) && (b[0] == 0)) 869 return (BIG_DIV_BY_0); 870 871 if (big_cmp_abs(aa, bb) < 0) { 872 if ((remainder != NULL) && 873 ((err = big_copy(remainder, aa)) != BIG_OK)) 874 return (err); 875 if (result != NULL) { 876 result->len = 1; 877 result->sign = 1; 878 result->value[0] = 0; 879 } 880 return (BIG_OK); 881 } 882 883 if ((err = big_init1(&bblow, blen + 1, 884 bblowvalue, arraysize(bblowvalue))) != BIG_OK) 885 return (err); 886 887 if ((err = big_init1(&bbhigh, blen + 1, 888 bbhighvalue, arraysize(bbhighvalue))) != BIG_OK) 889 goto ret1; 890 891 if ((err = big_init1(&tmp1, alen + 2, 892 tmp1value, arraysize(tmp1value))) != BIG_OK) 893 goto ret2; 894 895 if ((err = big_init1(&tmp2, blen + 2, 896 tmp2value, arraysize(tmp2value))) != BIG_OK) 897 goto ret3; 898 899 if ((err = big_init1(&tresult, alen - blen + 2, 900 tresultvalue, arraysize(tresultvalue))) != BIG_OK) 901 goto ret4; 902 903 offs = 0; 904 if (blen > 1) { 905 highb64 = (((uint64_t)(b[blen - 1])) << 32) | 906 ((uint64_t)(b[blen - 2])); 907 } else { 908 highb64 = (((uint64_t)(b[blen - 1])) << 32); 909 } 910 if (highb64 >= 0x1000000000000ull) { 911 highb64 = highb64 >> 16; 912 offs = 16; 913 } 914 while ((highb64 & 0x800000000000ull) == 0) { 915 highb64 = highb64 << 1; 916 offs++; 917 } 918 #ifdef _LP64 919 /* LINTED */ 920 highb = (highb64 >> 32) & 0xffffffff; 921 #else /* !_LP64 */ 922 highb = highb64 >> 32; 923 #endif /* _LP64 */ 924 925 big_shiftleft(&bblow, bb, offs); 926 if (offs <= 15) { 927 big_shiftleft(&bbhigh, &bblow, 16); 928 } else { 929 big_shiftright(&bbhigh, &bblow, 16); 930 } 931 if (bbhigh.value[bbhigh.len - 1] == 0) { 932 bbhigh.len--; 933 } else { 934 bbhigh.value[bbhigh.len] = 0; 935 } 936 937 big_shiftleft(&tmp1, aa, offs); 938 rlen = tmp1.len - bblow.len + 1; 939 tresult.len = rlen; 940 941 tmp1.len++; 942 tlen = tmp1.len; 943 tmp1.value[tmp1.len - 1] = 0; 944 for (i = 0; i < rlen; i++) { 945 higha = (tmp1.value[tlen - 1] << 16) + 946 (tmp1.value[tlen - 2] >> 16); 947 coeff = higha / (highb + 1); 948 big_mul16_high(&tmp2, &bblow, coeff); 949 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 950 bbhigh.len++; 951 while (tmp1.value[tlen - 1] > 0) { 952 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 953 coeff++; 954 } 955 bbhigh.len--; 956 tlen--; 957 tmp1.len--; 958 while (big_cmp_abs_high(&tmp1, &bbhigh) >= 0) { 959 big_sub_pos_high(&tmp1, &tmp1, &bbhigh); 960 coeff++; 961 } 962 tresult.value[rlen - i - 1] = coeff << 16; 963 higha = tmp1.value[tlen - 1]; 964 coeff = higha / (highb + 1); 965 big_mul16_low(&tmp2, &bblow, coeff); 966 tmp2.len--; 967 big_sub_pos_high(&tmp1, &tmp1, &tmp2); 968 while (big_cmp_abs_high(&tmp1, &bblow) >= 0) { 969 big_sub_pos_high(&tmp1, &tmp1, &bblow); 970 coeff++; 971 } 972 tresult.value[rlen - i - 1] = 973 tresult.value[rlen - i - 1] + coeff; 974 } 975 976 big_shiftright(&tmp1, &tmp1, offs); 977 978 err = BIG_OK; 979 980 if ((remainder != NULL) && 981 ((err = big_copy(remainder, &tmp1)) != BIG_OK)) 982 goto ret; 983 984 if (result != NULL) 985 err = big_copy(result, &tresult); 986 987 ret: 988 big_finish(&tresult); 989 ret4: 990 big_finish(&tmp1); 991 ret3: 992 big_finish(&tmp2); 993 ret2: 994 big_finish(&bbhigh); 995 ret1: 996 big_finish(&bblow); 997 return (err); 998 } 999 1000 /* 1001 * If there is no processor-specific integer implementation of 1002 * the lower level multiply functions, then this code is provided 1003 * for big_mul_set_vec(), big_mul_add_vec(), big_mul_vec() and 1004 * big_sqr_vec(). 1005 * 1006 * There are two generic implementations. One that assumes that 1007 * there is hardware and C compiler support for a 32 x 32 --> 64 1008 * bit unsigned multiply, but otherwise is not specific to any 1009 * processor, platform, or ISA. 1010 * 1011 * The other makes very few assumptions about hardware capabilities. 1012 * It does not even assume that there is any implementation of a 1013 * 32 x 32 --> 64 bit multiply that is accessible to C code and 1014 * appropriate to use. It falls constructs 32 x 32 --> 64 bit 1015 * multiplies from 16 x 16 --> 32 bit multiplies. 1016 * 1017 */ 1018 1019 #if !defined(PSR_MUL) 1020 1021 #ifdef UMUL64 1022 1023 #define UNROLL8 1024 1025 #define MUL_SET_VEC_ROUND_PREFETCH(R) \ 1026 p = pf * d; \ 1027 pf = (uint64_t)a[R+1]; \ 1028 t = p + cy; \ 1029 r[R] = (uint32_t)t; \ 1030 cy = t >> 32 1031 1032 #define MUL_SET_VEC_ROUND_NOPREFETCH(R) \ 1033 p = pf * d; \ 1034 t = p + cy; \ 1035 r[R] = (uint32_t)t; \ 1036 cy = t >> 32 1037 1038 #define MUL_ADD_VEC_ROUND_PREFETCH(R) \ 1039 t = (uint64_t)r[R]; \ 1040 p = pf * d; \ 1041 pf = (uint64_t)a[R+1]; \ 1042 t = p + t + cy; \ 1043 r[R] = (uint32_t)t; \ 1044 cy = t >> 32 1045 1046 #define MUL_ADD_VEC_ROUND_NOPREFETCH(R) \ 1047 t = (uint64_t)r[R]; \ 1048 p = pf * d; \ 1049 t = p + t + cy; \ 1050 r[R] = (uint32_t)t; \ 1051 cy = t >> 32 1052 1053 #ifdef UNROLL8 1054 1055 #define UNROLL 8 1056 1057 /* 1058 * r = a * b 1059 * where r and a are vectors; b is a single 32-bit digit 1060 */ 1061 1062 uint32_t 1063 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1064 { 1065 uint64_t d, pf, p, t, cy; 1066 1067 if (len == 0) 1068 return (0); 1069 cy = 0; 1070 d = (uint64_t)b; 1071 pf = (uint64_t)a[0]; 1072 while (len > UNROLL) { 1073 MUL_SET_VEC_ROUND_PREFETCH(0); 1074 MUL_SET_VEC_ROUND_PREFETCH(1); 1075 MUL_SET_VEC_ROUND_PREFETCH(2); 1076 MUL_SET_VEC_ROUND_PREFETCH(3); 1077 MUL_SET_VEC_ROUND_PREFETCH(4); 1078 MUL_SET_VEC_ROUND_PREFETCH(5); 1079 MUL_SET_VEC_ROUND_PREFETCH(6); 1080 MUL_SET_VEC_ROUND_PREFETCH(7); 1081 r += UNROLL; 1082 a += UNROLL; 1083 len -= UNROLL; 1084 } 1085 if (len == UNROLL) { 1086 MUL_SET_VEC_ROUND_PREFETCH(0); 1087 MUL_SET_VEC_ROUND_PREFETCH(1); 1088 MUL_SET_VEC_ROUND_PREFETCH(2); 1089 MUL_SET_VEC_ROUND_PREFETCH(3); 1090 MUL_SET_VEC_ROUND_PREFETCH(4); 1091 MUL_SET_VEC_ROUND_PREFETCH(5); 1092 MUL_SET_VEC_ROUND_PREFETCH(6); 1093 MUL_SET_VEC_ROUND_NOPREFETCH(7); 1094 return ((uint32_t)cy); 1095 } 1096 while (len > 1) { 1097 MUL_SET_VEC_ROUND_PREFETCH(0); 1098 ++r; 1099 ++a; 1100 --len; 1101 } 1102 if (len > 0) { 1103 MUL_SET_VEC_ROUND_NOPREFETCH(0); 1104 } 1105 return ((uint32_t)cy); 1106 } 1107 1108 /* 1109 * r += a * b 1110 * where r and a are vectors; b is a single 32-bit digit 1111 */ 1112 1113 uint32_t 1114 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t b) 1115 { 1116 uint64_t d, pf, p, t, cy; 1117 1118 if (len == 0) 1119 return (0); 1120 cy = 0; 1121 d = (uint64_t)b; 1122 pf = (uint64_t)a[0]; 1123 while (len > 8) { 1124 MUL_ADD_VEC_ROUND_PREFETCH(0); 1125 MUL_ADD_VEC_ROUND_PREFETCH(1); 1126 MUL_ADD_VEC_ROUND_PREFETCH(2); 1127 MUL_ADD_VEC_ROUND_PREFETCH(3); 1128 MUL_ADD_VEC_ROUND_PREFETCH(4); 1129 MUL_ADD_VEC_ROUND_PREFETCH(5); 1130 MUL_ADD_VEC_ROUND_PREFETCH(6); 1131 MUL_ADD_VEC_ROUND_PREFETCH(7); 1132 r += 8; 1133 a += 8; 1134 len -= 8; 1135 } 1136 if (len == 8) { 1137 MUL_ADD_VEC_ROUND_PREFETCH(0); 1138 MUL_ADD_VEC_ROUND_PREFETCH(1); 1139 MUL_ADD_VEC_ROUND_PREFETCH(2); 1140 MUL_ADD_VEC_ROUND_PREFETCH(3); 1141 MUL_ADD_VEC_ROUND_PREFETCH(4); 1142 MUL_ADD_VEC_ROUND_PREFETCH(5); 1143 MUL_ADD_VEC_ROUND_PREFETCH(6); 1144 MUL_ADD_VEC_ROUND_NOPREFETCH(7); 1145 return ((uint32_t)cy); 1146 } 1147 while (len > 1) { 1148 MUL_ADD_VEC_ROUND_PREFETCH(0); 1149 ++r; 1150 ++a; 1151 --len; 1152 } 1153 if (len > 0) { 1154 MUL_ADD_VEC_ROUND_NOPREFETCH(0); 1155 } 1156 return ((uint32_t)cy); 1157 } 1158 #endif /* UNROLL8 */ 1159 1160 void 1161 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1162 { 1163 uint32_t *tr, *ta; 1164 int tlen, row, col; 1165 uint64_t p, s, t, t2, cy; 1166 uint32_t d; 1167 1168 tr = r + 1; 1169 ta = a; 1170 tlen = len - 1; 1171 tr[tlen] = big_mul_set_vec(tr, ta + 1, tlen, ta[0]); 1172 while (--tlen > 0) { 1173 tr += 2; 1174 ++ta; 1175 tr[tlen] = big_mul_add_vec(tr, ta + 1, tlen, ta[0]); 1176 } 1177 s = (uint64_t)a[0]; 1178 s = s * s; 1179 r[0] = (uint32_t)s; 1180 cy = s >> 32; 1181 p = ((uint64_t)r[1] << 1) + cy; 1182 r[1] = (uint32_t)p; 1183 cy = p >> 32; 1184 row = 1; 1185 col = 2; 1186 while (row < len) { 1187 s = (uint64_t)a[row]; 1188 s = s * s; 1189 p = (uint64_t)r[col] << 1; 1190 t = p + s; 1191 d = (uint32_t)t; 1192 t2 = (uint64_t)d + cy; 1193 r[col] = (uint32_t)t2; 1194 cy = (t >> 32) + (t2 >> 32); 1195 if (row == len - 1) 1196 break; 1197 p = ((uint64_t)r[col+1] << 1) + cy; 1198 r[col+1] = (uint32_t)p; 1199 cy = p >> 32; 1200 ++row; 1201 col += 2; 1202 } 1203 r[col+1] = (uint32_t)cy; 1204 } 1205 1206 #else /* ! UMUL64 */ 1207 1208 /* 1209 * r = r + a * digit, r and a are vectors of length len 1210 * returns the carry digit 1211 */ 1212 uint32_t 1213 big_mul_add_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1214 { 1215 uint32_t cy, cy1, retcy, dlow, dhigh; 1216 int i; 1217 1218 cy1 = 0; 1219 dlow = digit & 0xffff; 1220 dhigh = digit >> 16; 1221 for (i = 0; i < len; i++) { 1222 cy = (cy1 >> 16) + dlow * (a[i] & 0xffff) + (r[i] & 0xffff); 1223 cy1 = (cy >> 16) + dlow * (a[i]>>16) + (r[i] >> 16); 1224 r[i] = (cy & 0xffff) | (cy1 << 16); 1225 } 1226 retcy = cy1 >> 16; 1227 1228 cy1 = r[0] & 0xffff; 1229 for (i = 0; i < len - 1; i++) { 1230 cy = (cy1 >> 16) + dhigh * (a[i] & 0xffff) + (r[i] >> 16); 1231 r[i] = (cy1 & 0xffff) | (cy << 16); 1232 cy1 = (cy >> 16) + dhigh * (a[i] >> 16) + (r[i + 1] & 0xffff); 1233 } 1234 cy = (cy1 >> 16) + dhigh * (a[len - 1] & 0xffff) + (r[len - 1] >> 16); 1235 r[len - 1] = (cy1 & 0xffff) | (cy << 16); 1236 retcy = (cy >> 16) + dhigh * (a[len - 1] >> 16) + retcy; 1237 1238 return (retcy); 1239 } 1240 1241 /* 1242 * r = a * digit, r and a are vectors of length len 1243 * returns the carry digit 1244 */ 1245 uint32_t 1246 big_mul_set_vec(uint32_t *r, uint32_t *a, int len, uint32_t digit) 1247 { 1248 return (big_mul_add_vec(r, a, len, digit)); 1249 } 1250 1251 void 1252 big_sqr_vec(uint32_t *r, uint32_t *a, int len) 1253 { 1254 int i; 1255 1256 r[len] = big_mul_set_vec(r, a, len, a[0]); 1257 for (i = 1; i < len; ++i) 1258 r[len + i] = big_mul_add_vec(r+i, a, len, a[i]); 1259 } 1260 1261 #endif /* UMUL64 */ 1262 1263 void 1264 big_mul_vec(uint32_t *r, uint32_t *a, int alen, uint32_t *b, int blen) 1265 { 1266 int i; 1267 1268 r[alen] = big_mul_set_vec(r, a, alen, b[0]); 1269 for (i = 1; i < blen; ++i) 1270 r[alen + i] = big_mul_add_vec(r+i, a, alen, b[i]); 1271 } 1272 1273 1274 #endif /* ! PSR_MUL */ 1275 1276 1277 /* 1278 * result = aa * bb result->value should be big enough to hold the result 1279 * 1280 * Implementation: Standard grammar school algorithm 1281 * 1282 */ 1283 1284 BIG_ERR_CODE 1285 big_mul(BIGNUM *result, BIGNUM *aa, BIGNUM *bb) 1286 { 1287 BIGNUM tmp1; 1288 uint32_t tmp1value[BIGTMPSIZE]; 1289 uint32_t *r, *t, *a, *b; 1290 BIG_ERR_CODE err; 1291 int i, alen, blen, rsize, sign, diff; 1292 1293 if (aa == bb) { 1294 diff = 0; 1295 } else { 1296 diff = big_cmp_abs(aa, bb); 1297 if (diff < 0) { 1298 BIGNUM *tt; 1299 tt = aa; 1300 aa = bb; 1301 bb = tt; 1302 } 1303 } 1304 a = aa->value; 1305 b = bb->value; 1306 alen = aa->len; 1307 blen = bb->len; 1308 while ((alen > 1) && (a[alen - 1] == 0)) alen--; 1309 aa->len = alen; 1310 while ((blen > 1) && (b[blen - 1] == 0)) blen--; 1311 bb->len = blen; 1312 1313 rsize = alen + blen; 1314 if (result->size < rsize) { 1315 err = big_extend(result, rsize); 1316 if (err != BIG_OK) 1317 return (err); 1318 /* aa or bb might be an alias to result */ 1319 a = aa->value; 1320 b = bb->value; 1321 } 1322 r = result->value; 1323 1324 if (((alen == 1) && (a[0] == 0)) || ((blen == 1) && (b[0] == 0))) { 1325 result->len = 1; 1326 result->sign = 1; 1327 r[0] = 0; 1328 return (BIG_OK); 1329 } 1330 sign = aa->sign * bb->sign; 1331 if ((alen == 1) && (a[0] == 1)) { 1332 for (i = 0; i < blen; i++) r[i] = b[i]; 1333 result->len = blen; 1334 result->sign = sign; 1335 return (BIG_OK); 1336 } 1337 if ((blen == 1) && (b[0] == 1)) { 1338 for (i = 0; i < alen; i++) r[i] = a[i]; 1339 result->len = alen; 1340 result->sign = sign; 1341 return (BIG_OK); 1342 } 1343 1344 err = big_init1(&tmp1, rsize, tmp1value, arraysize(tmp1value)); 1345 if (err != BIG_OK) 1346 return (err); 1347 t = tmp1.value; 1348 for (i = 0; i < rsize; i++) t[i] = 0; 1349 1350 if (diff == 0 && alen > 2) 1351 BIG_SQR_VEC(t, a, alen); 1352 else if (blen > 0) 1353 BIG_MUL_VEC(t, a, alen, b, blen); 1354 if (t[rsize - 1] == 0) 1355 --rsize; 1356 tmp1.len = rsize; 1357 if ((err = big_copy(result, &tmp1)) != BIG_OK) 1358 return (err); 1359 1360 result->sign = sign; 1361 1362 if (tmp1.malloced) big_finish(&tmp1); 1363 1364 return (BIG_OK); 1365 } 1366 1367 1368 /* 1369 * caller must ensure that a < n, b < n and ret->size >= 2 * n->len + 1 1370 * and that ret is not n 1371 */ 1372 BIG_ERR_CODE 1373 big_mont_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *n, uint32_t n0) 1374 { 1375 int i, j, nlen, needsubtract; 1376 uint32_t *nn, *rr; 1377 uint32_t digit, c; 1378 BIG_ERR_CODE err; 1379 1380 nlen = n->len; 1381 nn = n->value; 1382 1383 rr = ret->value; 1384 1385 if ((err = big_mul(ret, a, b)) != BIG_OK) 1386 return (err); 1387 1388 rr = ret->value; 1389 for (i = ret->len; i < 2 * nlen + 1; i++) rr[i] = 0; 1390 for (i = 0; i < nlen; i++) { 1391 digit = rr[i]; 1392 digit = digit * n0; 1393 1394 c = BIG_MUL_ADD_VEC(rr + i, nn, nlen, digit); 1395 j = i + nlen; 1396 rr[j] += c; 1397 while (rr[j] < c) { 1398 rr[j + 1] += 1; 1399 j++; 1400 c = 1; 1401 } 1402 } 1403 1404 needsubtract = 0; 1405 if ((rr[2 * nlen] != 0)) 1406 needsubtract = 1; 1407 else { 1408 for (i = 2 * nlen - 1; i >= nlen; i--) { 1409 if (rr[i] > nn[i - nlen]) { 1410 needsubtract = 1; 1411 break; 1412 } else if (rr[i] < nn[i - nlen]) break; 1413 } 1414 } 1415 if (needsubtract) 1416 big_sub_vec(rr, rr + nlen, nn, nlen); 1417 else { 1418 for (i = 0; i < nlen; i++) 1419 rr[i] = rr[i + nlen]; 1420 } 1421 for (i = nlen - 1; (i >= 0) && (rr[i] == 0); i--); 1422 ret->len = i+1; 1423 1424 return (BIG_OK); 1425 } 1426 1427 uint32_t 1428 big_n0(uint32_t n) 1429 { 1430 int i; 1431 uint32_t result, tmp; 1432 1433 result = 0; 1434 tmp = 0xffffffff; 1435 for (i = 0; i < 32; i++) { 1436 if ((tmp & 1) == 1) { 1437 result = (result >> 1) | 0x80000000; 1438 tmp = tmp - n; 1439 } else result = (result>>1); 1440 tmp = tmp >> 1; 1441 } 1442 1443 return (result); 1444 } 1445 1446 1447 int 1448 big_numbits(BIGNUM *n) 1449 { 1450 int i, j; 1451 uint32_t t; 1452 1453 for (i = n->len - 1; i > 0; i--) 1454 if (n->value[i] != 0) break; 1455 t = n->value[i]; 1456 for (j = 32; j > 0; j--) { 1457 if ((t & 0x80000000) == 0) 1458 t = t << 1; 1459 else 1460 return (32 * i + j); 1461 } 1462 return (0); 1463 } 1464 1465 /* caller must make sure that a < n */ 1466 BIG_ERR_CODE 1467 big_mont_rr(BIGNUM *result, BIGNUM *n) 1468 { 1469 BIGNUM rr; 1470 uint32_t rrvalue[BIGTMPSIZE]; 1471 int len, i; 1472 BIG_ERR_CODE err; 1473 1474 rr.malloced = 0; 1475 len = n->len; 1476 1477 if ((err = big_init1(&rr, 2 * len + 1, 1478 rrvalue, arraysize(rrvalue))) != BIG_OK) 1479 return (err); 1480 1481 for (i = 0; i < 2 * len; i++) rr.value[i] = 0; 1482 rr.value[2 * len] = 1; 1483 rr.len = 2 * len + 1; 1484 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) 1485 goto ret; 1486 err = big_copy(result, &rr); 1487 ret: 1488 if (rr.malloced) big_finish(&rr); 1489 return (err); 1490 } 1491 1492 /* caller must make sure that a < n */ 1493 BIG_ERR_CODE 1494 big_mont_conv(BIGNUM *result, BIGNUM *a, BIGNUM *n, uint32_t n0, BIGNUM *n_rr) 1495 { 1496 BIGNUM rr; 1497 uint32_t rrvalue[BIGTMPSIZE]; 1498 int len, i; 1499 BIG_ERR_CODE err; 1500 1501 rr.malloced = 0; 1502 len = n->len; 1503 1504 if ((err = big_init1(&rr, 2 * len + 1, rrvalue, arraysize(rrvalue))) 1505 != BIG_OK) 1506 return (err); 1507 1508 if (n_rr == NULL) { 1509 for (i = 0; i < 2 * len; i++) rr.value[i] = 0; 1510 rr.value[2 * len] = 1; 1511 rr.len = 2 * len + 1; 1512 if ((err = big_div_pos(NULL, &rr, &rr, n)) != BIG_OK) 1513 goto ret; 1514 n_rr = &rr; 1515 } 1516 1517 if ((err = big_mont_mul(&rr, n_rr, a, n, n0)) != BIG_OK) 1518 goto ret; 1519 err = big_copy(result, &rr); 1520 ret: 1521 if (rr.malloced) big_finish(&rr); 1522 return (err); 1523 } 1524 1525 1526 #define MAX_EXP_BIT_GROUP_SIZE 6 1527 #define APOWERS_MAX_SIZE (1 << (MAX_EXP_BIT_GROUP_SIZE - 1)) 1528 1529 #ifdef USE_FLOATING_POINT 1530 1531 /* 1532 * This version makes use of floating point for performance. 1533 */ 1534 static BIG_ERR_CODE 1535 _big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 1536 { 1537 BIGNUM ma, tmp, rr; 1538 uint32_t mavalue[BIGTMPSIZE]; 1539 uint32_t tmpvalue[BIGTMPSIZE]; 1540 uint32_t rrvalue[BIGTMPSIZE]; 1541 int i, j, k, l, m, p, bit, bitind, bitcount, nlen; 1542 BIG_ERR_CODE err; 1543 uint32_t n0; 1544 double dn0; 1545 double *dn, *dt, *d16r, *d32r; 1546 uint32_t *nint, *prod; 1547 double *apowers[APOWERS_MAX_SIZE]; 1548 int nbits, groupbits, apowerssize; 1549 1550 nbits = big_numbits(e); 1551 if (nbits < 50) { 1552 groupbits = 1; 1553 apowerssize = 1; 1554 } else { 1555 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1556 apowerssize = 1 << (groupbits - 1); 1557 } 1558 1559 if ((err = big_init1(&ma, n->len, mavalue, arraysize(mavalue))) != 1560 BIG_OK) 1561 return (err); 1562 ma.len = 1; 1563 ma.value[0] = 0; 1564 1565 if ((err = big_init1(&tmp, 2 * n->len + 1, 1566 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 1567 goto ret1; 1568 tmp.len = 1; 1569 tmp.value[0] = 0; 1570 1571 rr.malloced = 0; 1572 if (n_rr == NULL) { 1573 if ((err = big_init1(&rr, 2 * n->len + 1, 1574 rrvalue, arraysize(rrvalue))) != BIG_OK) 1575 goto ret2; 1576 if (big_mont_rr(&rr, n) != BIG_OK) 1577 goto ret2; 1578 n_rr = &rr; 1579 } 1580 1581 n0 = big_n0(n->value[0]); 1582 1583 if (big_cmp_abs(a, n) > 0) { 1584 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) 1585 goto ret2; 1586 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 1587 } else { 1588 err = big_mont_conv(&ma, a, n, n0, n_rr); 1589 } 1590 if (err != BIG_OK) 1591 goto ret3; 1592 1593 tmp.len = 1; 1594 tmp.value[0] = 1; 1595 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) 1596 goto ret3; 1597 1598 nlen = n->len; 1599 dn0 = (double)(n0 & 0xffff); 1600 1601 dn = dt = d16r = d32r = NULL; 1602 nint = prod = NULL; 1603 for (i = 0; i < apowerssize; i++) { 1604 apowers[i] = NULL; 1605 } 1606 1607 if ((dn = big_malloc(nlen * sizeof (double))) == NULL) { 1608 err = BIG_NO_MEM; 1609 goto ret; 1610 } 1611 if ((dt = big_malloc((4 * nlen + 2) * sizeof (double))) == NULL) { 1612 err = BIG_NO_MEM; 1613 goto ret; 1614 } 1615 if ((nint = big_malloc(nlen * sizeof (uint32_t))) == NULL) { 1616 err = BIG_NO_MEM; 1617 goto ret; 1618 } 1619 if ((prod = big_malloc((nlen + 1) * sizeof (uint32_t))) == NULL) { 1620 err = BIG_NO_MEM; 1621 goto ret; 1622 } 1623 if ((d16r = big_malloc((2 * nlen + 1) * sizeof (double))) == NULL) { 1624 err = BIG_NO_MEM; 1625 goto ret; 1626 } 1627 if ((d32r = big_malloc(nlen * sizeof (double))) == NULL) { 1628 err = BIG_NO_MEM; 1629 goto ret; 1630 } 1631 for (i = 0; i < apowerssize; i++) { 1632 if ((apowers[i] = big_malloc((2 * nlen + 1) * 1633 sizeof (double))) == NULL) { 1634 err = BIG_NO_MEM; 1635 goto ret; 1636 } 1637 } 1638 1639 for (i = 0; i < ma.len; i++) nint[i] = ma.value[i]; 1640 for (; i < nlen; i++) nint[i] = 0; 1641 conv_i32_to_d32_and_d16(d32r, apowers[0], nint, nlen); 1642 1643 for (i = 0; i < n->len; i++) nint[i] = n->value[i]; 1644 for (; i < nlen; i++) nint[i] = 0; 1645 conv_i32_to_d32(dn, nint, nlen); 1646 1647 mont_mulf_noconv(prod, d32r, apowers[0], dt, dn, nint, nlen, dn0); 1648 conv_i32_to_d32(d32r, prod, nlen); 1649 for (i = 1; i < apowerssize; i++) { 1650 mont_mulf_noconv(prod, d32r, apowers[i - 1], 1651 dt, dn, nint, nlen, dn0); 1652 conv_i32_to_d16(apowers[i], prod, nlen); 1653 } 1654 1655 for (i = 0; i < tmp.len; i++) prod[i] = tmp.value[i]; 1656 for (; i < nlen + 1; i++) prod[i] = 0; 1657 1658 bitind = nbits % 32; 1659 k = 0; 1660 l = 0; 1661 p = 0; 1662 bitcount = 0; 1663 for (i = nbits / 32; i >= 0; i--) { 1664 for (j = bitind - 1; j >= 0; j--) { 1665 bit = (e->value[i] >> j) & 1; 1666 if ((bitcount == 0) && (bit == 0)) { 1667 conv_i32_to_d32_and_d16(d32r, d16r, 1668 prod, nlen); 1669 mont_mulf_noconv(prod, d32r, d16r, 1670 dt, dn, nint, nlen, dn0); 1671 } else { 1672 bitcount++; 1673 p = p * 2 + bit; 1674 if (bit == 1) { 1675 k = k + l + 1; 1676 l = 0; 1677 } else { 1678 l++; 1679 } 1680 if (bitcount == groupbits) { 1681 for (m = 0; m < k; m++) { 1682 conv_i32_to_d32_and_d16( 1683 d32r, d16r, 1684 prod, nlen); 1685 mont_mulf_noconv(prod, d32r, 1686 d16r, dt, dn, nint, 1687 nlen, dn0); 1688 } 1689 conv_i32_to_d32(d32r, prod, nlen); 1690 mont_mulf_noconv(prod, d32r, 1691 apowers[p >> (l+1)], 1692 dt, dn, nint, nlen, dn0); 1693 for (m = 0; m < l; m++) { 1694 conv_i32_to_d32_and_d16( 1695 d32r, d16r, 1696 prod, nlen); 1697 mont_mulf_noconv(prod, d32r, 1698 d16r, dt, dn, nint, 1699 nlen, dn0); 1700 } 1701 k = 0; 1702 l = 0; 1703 p = 0; 1704 bitcount = 0; 1705 } 1706 } 1707 } 1708 bitind = 32; 1709 } 1710 1711 for (m = 0; m < k; m++) { 1712 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 1713 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 1714 } 1715 if (p != 0) { 1716 conv_i32_to_d32(d32r, prod, nlen); 1717 mont_mulf_noconv(prod, d32r, apowers[p >> (l + 1)], 1718 dt, dn, nint, nlen, dn0); 1719 } 1720 for (m = 0; m < l; m++) { 1721 conv_i32_to_d32_and_d16(d32r, d16r, prod, nlen); 1722 mont_mulf_noconv(prod, d32r, d16r, dt, dn, nint, nlen, dn0); 1723 } 1724 1725 ma.value[0] = 1; 1726 ma.len = 1; 1727 for (i = 0; i < nlen; i++) tmp.value[i] = prod[i]; 1728 for (i = nlen - 1; (i > 0) && (prod[i] == 0); i--); 1729 tmp.len = i + 1; 1730 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) 1731 goto ret; 1732 err = big_copy(result, &tmp); 1733 ret: 1734 for (i = apowerssize - 1; i >= 0; i--) { 1735 if (apowers[i] != NULL) 1736 big_free(apowers[i], (2 * nlen + 1) * sizeof (double)); 1737 } 1738 if (d32r != NULL) 1739 big_free(d32r, nlen * sizeof (double)); 1740 if (d16r != NULL) 1741 big_free(d16r, (2 * nlen + 1) * sizeof (double)); 1742 if (prod != NULL) 1743 big_free(prod, (nlen + 1) * sizeof (uint32_t)); 1744 if (nint != NULL) 1745 big_free(nint, nlen * sizeof (uint32_t)); 1746 if (dt != NULL) 1747 big_free(dt, (4 * nlen + 2) * sizeof (double)); 1748 if (dn != NULL) 1749 big_free(dn, nlen * sizeof (double)); 1750 1751 ret3: 1752 big_finish(&rr); 1753 ret2: 1754 big_finish(&tmp); 1755 ret1: 1756 big_finish(&ma); 1757 return (err); 1758 1759 } 1760 1761 #ifdef _KERNEL 1762 1763 #include <sys/sysmacros.h> 1764 #include <sys/regset.h> 1765 #include <sys/fpu/fpusystm.h> 1766 1767 /* the alignment for block stores to save fp registers */ 1768 #define FPR_ALIGN (64) 1769 1770 extern void big_savefp(kfpu_t *); 1771 extern void big_restorefp(kfpu_t *); 1772 1773 #endif /* _KERNEL */ 1774 1775 BIG_ERR_CODE 1776 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 1777 { 1778 #ifdef _KERNEL 1779 BIG_ERR_CODE rv; 1780 uint8_t fpua[sizeof (kfpu_t) + FPR_ALIGN]; 1781 kfpu_t *fpu; 1782 1783 #ifdef DEBUG 1784 if (!fpu_exists) 1785 return (BIG_GENERAL_ERR); 1786 #endif 1787 1788 fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, FPR_ALIGN); 1789 big_savefp(fpu); 1790 1791 rv = _big_modexp(result, a, e, n, n_rr); 1792 1793 big_restorefp(fpu); 1794 1795 return (rv); 1796 #else 1797 return (_big_modexp(result, a, e, n, n_rr)); 1798 #endif /* _KERNEL */ 1799 } 1800 1801 #else /* ! USE_FLOATING_POINT */ 1802 1803 /* 1804 * This version uses strictly integer math and is safe in the kernel 1805 * for all platforms. 1806 */ 1807 1808 /* 1809 * computes a^e mod n 1810 * assumes a < n, n odd, result->value at least as long as n->value 1811 */ 1812 BIG_ERR_CODE 1813 big_modexp(BIGNUM *result, BIGNUM *a, BIGNUM *e, BIGNUM *n, BIGNUM *n_rr) 1814 { 1815 BIGNUM ma, tmp, rr; 1816 uint32_t mavalue[BIGTMPSIZE]; 1817 uint32_t tmpvalue[BIGTMPSIZE]; 1818 uint32_t rrvalue[BIGTMPSIZE]; 1819 BIGNUM apowers[APOWERS_MAX_SIZE]; 1820 int i, j, k, l, m, p, 1821 bit, bitind, bitcount, groupbits, apowerssize; 1822 BIG_ERR_CODE err; 1823 uint32_t n0; 1824 1825 int nbits; 1826 1827 nbits = big_numbits(e); 1828 if (nbits < 50) { 1829 groupbits = 1; 1830 apowerssize = 1; 1831 } else { 1832 groupbits = MAX_EXP_BIT_GROUP_SIZE; 1833 apowerssize = 1 << (groupbits - 1); 1834 } 1835 1836 if ((err = big_init1(&ma, n->len, 1837 mavalue, arraysize(mavalue))) != BIG_OK) 1838 return (err); 1839 ma.len = 1; 1840 ma.value[0] = 0; 1841 1842 if ((err = big_init1(&tmp, 2 * n->len + 1, 1843 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 1844 goto ret1; 1845 tmp.len = 1; 1846 tmp.value[0] = 1; 1847 1848 n0 = big_n0(n->value[0]); 1849 1850 rr.malloced = 0; 1851 if (n_rr == NULL) { 1852 if ((err = big_init1(&rr, 2 * n->len + 1, 1853 rrvalue, arraysize(rrvalue))) != BIG_OK) 1854 goto ret2; 1855 1856 if (big_mont_rr(&rr, n) != BIG_OK) 1857 goto ret3; 1858 n_rr = &rr; 1859 } 1860 1861 for (i = 0; i < apowerssize; i++) apowers[i].malloced = 0; 1862 for (i = 0; i < apowerssize; i++) { 1863 if ((err = big_init1(&(apowers[i]), n->len, NULL, 0)) != 1864 BIG_OK) 1865 goto ret; 1866 } 1867 1868 if (big_cmp_abs(a, n) > 0) { 1869 if ((err = big_div_pos(NULL, &ma, a, n)) != BIG_OK) 1870 goto ret; 1871 err = big_mont_conv(&ma, &ma, n, n0, n_rr); 1872 } else { 1873 err = big_mont_conv(&ma, a, n, n0, n_rr); 1874 } 1875 if (err != BIG_OK) goto ret; 1876 1877 (void) big_copy(&(apowers[0]), &ma); 1878 if ((err = big_mont_mul(&tmp, &ma, &ma, n, n0)) != BIG_OK) 1879 goto ret; 1880 (void) big_copy(&ma, &tmp); 1881 1882 for (i = 1; i < apowerssize; i++) { 1883 if ((err = big_mont_mul(&tmp, &ma, 1884 &(apowers[i-1]), n, n0)) != BIG_OK) 1885 goto ret; 1886 (void) big_copy(&apowers[i], &tmp); 1887 } 1888 1889 tmp.len = 1; 1890 tmp.value[0] = 1; 1891 if ((err = big_mont_conv(&tmp, &tmp, n, n0, n_rr)) != BIG_OK) 1892 goto ret; 1893 1894 bitind = nbits % 32; 1895 k = 0; 1896 l = 0; 1897 p = 0; 1898 bitcount = 0; 1899 for (i = nbits / 32; i >= 0; i--) { 1900 for (j = bitind - 1; j >= 0; j--) { 1901 bit = (e->value[i] >> j) & 1; 1902 if ((bitcount == 0) && (bit == 0)) { 1903 if ((err = big_mont_mul(&tmp, 1904 &tmp, &tmp, n, n0)) != BIG_OK) 1905 goto ret; 1906 } else { 1907 bitcount++; 1908 p = p * 2 + bit; 1909 if (bit == 1) { 1910 k = k + l + 1; 1911 l = 0; 1912 } else { 1913 l++; 1914 } 1915 if (bitcount == groupbits) { 1916 for (m = 0; m < k; m++) { 1917 if ((err = big_mont_mul(&tmp, 1918 &tmp, &tmp, n, n0)) != 1919 BIG_OK) 1920 goto ret; 1921 } 1922 if ((err = big_mont_mul(&tmp, &tmp, 1923 &(apowers[p >> (l + 1)]), 1924 n, n0)) != BIG_OK) 1925 goto ret; 1926 for (m = 0; m < l; m++) { 1927 if ((err = big_mont_mul(&tmp, 1928 &tmp, &tmp, n, n0)) != 1929 BIG_OK) 1930 goto ret; 1931 } 1932 k = 0; 1933 l = 0; 1934 p = 0; 1935 bitcount = 0; 1936 } 1937 } 1938 } 1939 bitind = 32; 1940 } 1941 1942 for (m = 0; m < k; m++) { 1943 if ((err = big_mont_mul(&tmp, &tmp, &tmp, n, n0)) != BIG_OK) 1944 goto ret; 1945 } 1946 if (p != 0) { 1947 if ((err = big_mont_mul(&tmp, &tmp, 1948 &(apowers[p >> (l + 1)]), n, n0)) != BIG_OK) 1949 goto ret; 1950 } 1951 for (m = 0; m < l; m++) { 1952 if ((err = big_mont_mul(&tmp, &tmp, &tmp, n, n0)) != BIG_OK) 1953 goto ret; 1954 } 1955 1956 ma.value[0] = 1; 1957 ma.len = 1; 1958 if ((err = big_mont_mul(&tmp, &tmp, &ma, n, n0)) != BIG_OK) 1959 goto ret; 1960 err = big_copy(result, &tmp); 1961 ret: 1962 for (i = apowerssize - 1; i >= 0; i--) { 1963 big_finish(&(apowers[i])); 1964 } 1965 ret3: 1966 if (rr.malloced) big_finish(&rr); 1967 ret2: 1968 if (tmp.malloced) big_finish(&tmp); 1969 ret1: 1970 if (ma.malloced) big_finish(&ma); 1971 return (err); 1972 } 1973 1974 #endif /* USE_FLOATING_POINT */ 1975 1976 1977 BIG_ERR_CODE 1978 big_modexp_crt(BIGNUM *result, BIGNUM *a, BIGNUM *dmodpminus1, 1979 BIGNUM *dmodqminus1, BIGNUM *p, BIGNUM *q, BIGNUM *pinvmodq, 1980 BIGNUM *p_rr, BIGNUM *q_rr) 1981 { 1982 BIGNUM ap, aq, tmp; 1983 int alen, biglen, sign; 1984 BIG_ERR_CODE err; 1985 1986 if (p->len > q->len) biglen = p->len; 1987 else biglen = q->len; 1988 1989 if ((err = big_init1(&ap, p->len, NULL, 0)) != BIG_OK) 1990 return (err); 1991 if ((err = big_init1(&aq, q->len, NULL, 0)) != BIG_OK) 1992 goto ret1; 1993 if ((err = big_init1(&tmp, biglen + q->len + 1, NULL, 0)) != BIG_OK) 1994 goto ret2; 1995 1996 /* 1997 * check whether a is too short - to avoid timing attacks 1998 */ 1999 alen = a->len; 2000 while ((alen > p->len) && (a->value[alen - 1] == 0)) { 2001 alen--; 2002 } 2003 if (alen < p->len + q->len) { 2004 /* 2005 * a is too short, add p*q to it before 2006 * taking it modulo p and q 2007 * this will also affect timing, but this difference 2008 * does not depend on p or q, only on a 2009 * (in "normal" operation, this path will never be 2010 * taken, so it is not a performance penalty 2011 */ 2012 if ((err = big_mul(&tmp, p, q)) != BIG_OK) 2013 goto ret; 2014 if ((err = big_add(&tmp, &tmp, a)) != BIG_OK) 2015 goto ret; 2016 if ((err = big_div_pos(NULL, &ap, &tmp, p)) != BIG_OK) 2017 goto ret; 2018 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) 2019 goto ret; 2020 } else { 2021 if ((err = big_div_pos(NULL, &ap, a, p)) != BIG_OK) 2022 goto ret; 2023 if ((err = big_div_pos(NULL, &aq, a, q)) != BIG_OK) 2024 goto ret; 2025 } 2026 2027 if ((err = big_modexp(&ap, &ap, dmodpminus1, p, p_rr)) != BIG_OK) 2028 goto ret; 2029 if ((err = big_modexp(&aq, &aq, dmodqminus1, q, q_rr)) != BIG_OK) 2030 goto ret; 2031 if ((err = big_sub(&tmp, &aq, &ap)) != BIG_OK) 2032 goto ret; 2033 if ((err = big_mul(&tmp, &tmp, pinvmodq)) != BIG_OK) 2034 goto ret; 2035 sign = tmp.sign; 2036 tmp.sign = 1; 2037 if ((err = big_div_pos(NULL, &aq, &tmp, q)) != BIG_OK) 2038 goto ret; 2039 if ((sign == -1) && (!big_is_zero(&aq))) { 2040 (void) big_sub_pos(&aq, q, &aq); 2041 } 2042 if ((err = big_mul(&tmp, &aq, p)) != BIG_OK) 2043 goto ret; 2044 err = big_add_abs(result, &ap, &tmp); 2045 2046 ret: 2047 big_finish(&tmp); 2048 ret2: 2049 big_finish(&aq); 2050 ret1: 2051 big_finish(&ap); 2052 2053 return (err); 2054 } 2055 2056 2057 uint32_t onearr[1] = {1}; 2058 BIGNUM One = {1, 1, 1, 0, onearr}; 2059 2060 uint32_t twoarr[1] = {2}; 2061 BIGNUM Two = {1, 1, 1, 0, twoarr}; 2062 2063 uint32_t fourarr[1] = {4}; 2064 BIGNUM Four = {1, 1, 1, 0, fourarr}; 2065 2066 BIG_ERR_CODE 2067 big_sqrt_pos(BIGNUM *result, BIGNUM *n) 2068 { 2069 BIGNUM *high, *low, *mid, *t; 2070 BIGNUM t1, t2, t3, prod; 2071 uint32_t t1value[BIGTMPSIZE]; 2072 uint32_t t2value[BIGTMPSIZE]; 2073 uint32_t t3value[BIGTMPSIZE]; 2074 uint32_t prodvalue[BIGTMPSIZE]; 2075 int i, nbits, diff, nrootbits, highbits; 2076 BIG_ERR_CODE err; 2077 2078 nbits = big_numbits(n); 2079 2080 if ((err = big_init1(&t1, n->len + 1, 2081 t1value, arraysize(t1value))) != BIG_OK) 2082 return (err); 2083 if ((err = big_init1(&t2, n->len + 1, 2084 t2value, arraysize(t2value))) != BIG_OK) 2085 goto ret1; 2086 if ((err = big_init1(&t3, n->len + 1, 2087 t3value, arraysize(t3value))) != BIG_OK) 2088 goto ret2; 2089 if ((err = big_init1(&prod, n->len + 1, 2090 prodvalue, arraysize(prodvalue))) != BIG_OK) 2091 goto ret3; 2092 2093 nrootbits = (nbits + 1) / 2; 2094 t1.len = t2.len = t3.len = (nrootbits - 1) / 32 + 1; 2095 for (i = 0; i < t1.len; i++) { 2096 t1.value[i] = 0; 2097 t2.value[i] = 0xffffffff; 2098 } 2099 highbits = nrootbits - 32 * (t1.len - 1); 2100 if (highbits == 32) { 2101 t1.value[t1.len - 1] = 0x80000000; 2102 t2.value[t2.len - 1] = 0xffffffff; 2103 } else { 2104 t1.value[t1.len - 1] = 1 << (highbits - 1); 2105 t2.value[t2.len - 1] = 2 * t1.value[t1.len - 1] - 1; 2106 } 2107 high = &t2; 2108 low = &t1; 2109 mid = &t3; 2110 2111 if ((err = big_mul(&prod, high, high)) != BIG_OK) 2112 goto ret; 2113 diff = big_cmp_abs(&prod, n); 2114 if (diff <= 0) { 2115 err = big_copy(result, high); 2116 goto ret; 2117 } 2118 2119 (void) big_sub_pos(mid, high, low); 2120 while (big_cmp_abs(&One, mid) != 0) { 2121 (void) big_add_abs(mid, high, low); 2122 (void) big_half_pos(mid, mid); 2123 if ((err = big_mul(&prod, mid, mid)) != BIG_OK) 2124 goto ret; 2125 diff = big_cmp_abs(&prod, n); 2126 if (diff > 0) { 2127 t = high; 2128 high = mid; 2129 mid = t; 2130 } else if (diff < 0) { 2131 t = low; 2132 low = mid; 2133 mid = t; 2134 } else { 2135 err = big_copy(result, low); 2136 goto ret; 2137 } 2138 (void) big_sub_pos(mid, high, low); 2139 } 2140 2141 err = big_copy(result, low); 2142 ret: 2143 if (prod.malloced) big_finish(&prod); 2144 ret3: 2145 if (t3.malloced) big_finish(&t3); 2146 ret2: 2147 if (t2.malloced) big_finish(&t2); 2148 ret1: 2149 if (t1.malloced) big_finish(&t1); 2150 2151 return (err); 2152 } 2153 2154 2155 BIG_ERR_CODE 2156 big_Jacobi_pos(int *jac, BIGNUM *nn, BIGNUM *mm) 2157 { 2158 BIGNUM *t, *tmp2, *m, *n; 2159 BIGNUM t1, t2, t3; 2160 uint32_t t1value[BIGTMPSIZE]; 2161 uint32_t t2value[BIGTMPSIZE]; 2162 uint32_t t3value[BIGTMPSIZE]; 2163 int len, err; 2164 2165 if (big_is_zero(nn) || 2166 (((nn->value[0] & 1) | (mm->value[0] & 1)) == 0)) { 2167 *jac = 0; 2168 return (BIG_OK); 2169 } 2170 2171 if (nn->len > mm->len) len = nn->len; 2172 else len = mm->len; 2173 2174 if ((err = big_init1(&t1, len, 2175 t1value, arraysize(t1value))) != BIG_OK) 2176 return (err); 2177 if ((err = big_init1(&t2, len, 2178 t2value, arraysize(t2value))) != BIG_OK) 2179 goto ret1; 2180 if ((err = big_init1(&t3, len, 2181 t3value, arraysize(t3value))) != BIG_OK) 2182 goto ret2; 2183 2184 n = &t1; 2185 m = &t2; 2186 tmp2 = &t3; 2187 2188 (void) big_copy(n, nn); 2189 (void) big_copy(m, mm); 2190 2191 *jac = 1; 2192 while (big_cmp_abs(&One, m) != 0) { 2193 if (big_is_zero(n)) { 2194 *jac = 0; 2195 goto ret; 2196 } 2197 if ((m->value[0] & 1) == 0) { 2198 if (((n->value[0] & 7) == 3) || 2199 ((n->value[0] & 7) == 5)) *jac = -*jac; 2200 (void) big_half_pos(m, m); 2201 } else if ((n->value[0] & 1) == 0) { 2202 if (((m->value[0] & 7) == 3) || 2203 ((m->value[0] & 7) == 5)) *jac = -*jac; 2204 (void) big_half_pos(n, n); 2205 } else { 2206 if (((m->value[0] & 3) == 3) && 2207 ((n->value[0] & 3) == 3)) { 2208 *jac = -*jac; 2209 } 2210 if ((err = big_div_pos(NULL, tmp2, m, n)) != BIG_OK) 2211 goto ret; 2212 t = tmp2; 2213 tmp2 = m; 2214 m = n; 2215 n = t; 2216 } 2217 } 2218 err = BIG_OK; 2219 2220 ret: 2221 if (t3.malloced) big_finish(&t3); 2222 ret2: 2223 if (t2.malloced) big_finish(&t2); 2224 ret1: 2225 if (t1.malloced) big_finish(&t1); 2226 2227 return (err); 2228 } 2229 2230 2231 BIG_ERR_CODE 2232 big_Lucas(BIGNUM *Lkminus1, BIGNUM *Lk, BIGNUM *p, BIGNUM *k, BIGNUM *n) 2233 { 2234 int m, w, i; 2235 uint32_t bit; 2236 BIGNUM ki, tmp, tmp2; 2237 uint32_t kivalue[BIGTMPSIZE]; 2238 uint32_t tmpvalue[BIGTMPSIZE]; 2239 uint32_t tmp2value[BIGTMPSIZE]; 2240 BIG_ERR_CODE err; 2241 2242 if (big_cmp_abs(k, &One) == 0) { 2243 (void) big_copy(Lk, p); 2244 (void) big_copy(Lkminus1, &Two); 2245 return (BIG_OK); 2246 } 2247 2248 if ((err = big_init1(&ki, k->len + 1, 2249 kivalue, arraysize(kivalue))) != BIG_OK) 2250 return (err); 2251 2252 if ((err = big_init1(&tmp, 2 * n->len +1, 2253 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2254 goto ret1; 2255 2256 if ((err = big_init1(&tmp2, n->len, 2257 tmp2value, arraysize(tmp2value))) != BIG_OK) 2258 goto ret2; 2259 2260 m = big_numbits(k); 2261 ki.len = (m - 1) / 32 + 1; 2262 w = (m - 1) / 32; 2263 bit = 1 << ((m - 1) % 32); 2264 for (i = 0; i < ki.len; i++) ki.value[i] = 0; 2265 ki.value[ki.len - 1] = bit; 2266 if (big_cmp_abs(k, &ki) != 0) 2267 (void) big_double(&ki, &ki); 2268 (void) big_sub_pos(&ki, &ki, k); 2269 2270 (void) big_copy(Lk, p); 2271 (void) big_copy(Lkminus1, &Two); 2272 2273 for (i = 0; i < m; i++) { 2274 if ((err = big_mul(&tmp, Lk, Lkminus1)) != BIG_OK) 2275 goto ret; 2276 (void) big_add_abs(&tmp, &tmp, n); 2277 (void) big_sub_pos(&tmp, &tmp, p); 2278 if ((err = big_div_pos(NULL, &tmp2, &tmp, n)) != BIG_OK) 2279 goto ret; 2280 2281 if ((ki.value[w] & bit) != 0) { 2282 if ((err = big_mul(&tmp, Lkminus1, Lkminus1)) != 2283 BIG_OK) 2284 goto ret; 2285 (void) big_add_abs(&tmp, &tmp, n); 2286 (void) big_sub_pos(&tmp, &tmp, &Two); 2287 if ((err = big_div_pos(NULL, Lkminus1, &tmp, n)) != 2288 BIG_OK) 2289 goto ret; 2290 (void) big_copy(Lk, &tmp2); 2291 } else { 2292 if ((err = big_mul(&tmp, Lk, Lk)) != BIG_OK) 2293 goto ret; 2294 (void) big_add_abs(&tmp, &tmp, n); 2295 (void) big_sub_pos(&tmp, &tmp, &Two); 2296 if ((err = big_div_pos(NULL, Lk, &tmp, n)) != BIG_OK) 2297 goto ret; 2298 (void) big_copy(Lkminus1, &tmp2); 2299 } 2300 bit = bit >> 1; 2301 if (bit == 0) { 2302 bit = 0x80000000; 2303 w--; 2304 } 2305 } 2306 2307 err = BIG_OK; 2308 2309 ret: 2310 if (tmp2.malloced) big_finish(&tmp2); 2311 ret2: 2312 if (tmp.malloced) big_finish(&tmp); 2313 ret1: 2314 if (ki.malloced) big_finish(&ki); 2315 2316 return (err); 2317 } 2318 2319 2320 BIG_ERR_CODE 2321 big_isprime_pos(BIGNUM *n) 2322 { 2323 BIGNUM o, nminus1, tmp, Lkminus1, Lk; 2324 uint32_t ovalue[BIGTMPSIZE]; 2325 uint32_t nminus1value[BIGTMPSIZE]; 2326 uint32_t tmpvalue[BIGTMPSIZE]; 2327 uint32_t Lkminus1value[BIGTMPSIZE]; 2328 uint32_t Lkvalue[BIGTMPSIZE]; 2329 BIG_ERR_CODE err; 2330 int e, i, jac; 2331 2332 if (big_cmp_abs(n, &One) == 0) 2333 return (BIG_FALSE); 2334 if (big_cmp_abs(n, &Two) == 0) 2335 return (BIG_TRUE); 2336 if ((n->value[0] & 1) == 0) 2337 return (BIG_FALSE); 2338 2339 if ((err = big_init1(&o, n->len, ovalue, arraysize(ovalue))) != BIG_OK) 2340 return (err); 2341 2342 if ((err = big_init1(&nminus1, n->len, 2343 nminus1value, arraysize(nminus1value))) != BIG_OK) 2344 goto ret1; 2345 2346 if ((err = big_init1(&tmp, 2 * n->len, 2347 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2348 goto ret2; 2349 2350 if ((err = big_init1(&Lkminus1, n->len, 2351 Lkminus1value, arraysize(Lkminus1value))) != BIG_OK) 2352 goto ret3; 2353 2354 if ((err = big_init1(&Lk, n->len, 2355 Lkvalue, arraysize(Lkvalue))) != BIG_OK) 2356 goto ret4; 2357 2358 (void) big_sub_pos(&o, n, &One); /* cannot fail */ 2359 (void) big_copy(&nminus1, &o); /* cannot fail */ 2360 e = 0; 2361 while ((o.value[0] & 1) == 0) { 2362 e++; 2363 (void) big_half_pos(&o, &o); /* cannot fail */ 2364 } 2365 if ((err = big_modexp(&tmp, &Two, &o, n, NULL)) != BIG_OK) 2366 goto ret; 2367 i = 0; 2368 while ((i < e) && 2369 (big_cmp_abs(&tmp, &One) != 0) && 2370 (big_cmp_abs(&tmp, &nminus1) != 0)) { 2371 if ((err = big_modexp(&tmp, &tmp, &Two, n, NULL)) != BIG_OK) 2372 goto ret; 2373 i++; 2374 } 2375 if (!((big_cmp_abs(&tmp, &nminus1) == 0) || 2376 ((i == 0) && (big_cmp_abs(&tmp, &One) == 0)))) { 2377 err = BIG_FALSE; 2378 goto ret; 2379 } 2380 2381 if ((err = big_sqrt_pos(&tmp, n)) != BIG_OK) 2382 goto ret; 2383 if ((err = big_mul(&tmp, &tmp, &tmp)) != BIG_OK) 2384 goto ret; 2385 if (big_cmp_abs(&tmp, n) == 0) { 2386 err = BIG_FALSE; 2387 goto ret; 2388 } 2389 2390 (void) big_copy(&o, &Two); 2391 do { 2392 (void) big_add_abs(&o, &o, &One); 2393 if ((err = big_mul(&tmp, &o, &o)) != BIG_OK) 2394 goto ret; 2395 (void) big_sub_pos(&tmp, &tmp, &Four); 2396 if ((err = big_Jacobi_pos(&jac, &tmp, n)) != BIG_OK) 2397 goto ret; 2398 } while (jac != -1); 2399 2400 (void) big_add_abs(&tmp, n, &One); 2401 if ((err = big_Lucas(&Lkminus1, &Lk, &o, &tmp, n)) != BIG_OK) 2402 goto ret; 2403 if ((big_cmp_abs(&Lkminus1, &o) == 0) && (big_cmp_abs(&Lk, &Two) == 0)) 2404 err = BIG_TRUE; 2405 else err = BIG_FALSE; 2406 2407 ret: 2408 if (Lk.malloced) big_finish(&Lk); 2409 ret4: 2410 if (Lkminus1.malloced) big_finish(&Lkminus1); 2411 ret3: 2412 if (tmp.malloced) big_finish(&tmp); 2413 ret2: 2414 if (nminus1.malloced) big_finish(&nminus1); 2415 ret1: 2416 if (o.malloced) big_finish(&o); 2417 2418 return (err); 2419 } 2420 2421 2422 #define SIEVESIZE 1000 2423 2424 uint32_t smallprimes[] = 2425 { 2426 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 2427 51, 53, 59, 61, 67, 71, 73, 79, 83, 89, 91, 97 2428 }; 2429 2430 2431 BIG_ERR_CODE 2432 big_nextprime_pos(BIGNUM *result, BIGNUM *n) 2433 { 2434 BIG_ERR_CODE err; 2435 int sieve[SIEVESIZE]; 2436 int i; 2437 uint32_t off, p; 2438 2439 if ((err = big_copy(result, n)) != BIG_OK) 2440 return (err); 2441 result->value[0] |= 1; 2442 /* CONSTCOND */ 2443 while (1) { 2444 for (i = 0; i < SIEVESIZE; i++) sieve[i] = 0; 2445 for (i = 0; 2446 i < sizeof (smallprimes) / sizeof (uint32_t); i++) { 2447 p = smallprimes[i]; 2448 off = big_mod16_pos(result, p); 2449 off = p - off; 2450 if ((off % 2) == 1) off = off + p; 2451 off = off/2; 2452 while (off < SIEVESIZE) { 2453 sieve[off] = 1; 2454 off = off + p; 2455 } 2456 } 2457 2458 for (i = 0; i < SIEVESIZE; i++) { 2459 if (sieve[i] == 0) { 2460 err = big_isprime_pos(result); 2461 if (err != BIG_FALSE) { 2462 if (err != BIG_TRUE) 2463 return (err); 2464 else 2465 return (BIG_OK); 2466 } 2467 2468 } 2469 if ((err = big_add_abs(result, result, &Two)) != 2470 BIG_OK) 2471 return (err); 2472 } 2473 } 2474 /* NOTREACHED */ 2475 } 2476 2477 2478 BIG_ERR_CODE 2479 big_nextprime_pos_slow(BIGNUM *result, BIGNUM *n) 2480 { 2481 BIG_ERR_CODE err; 2482 2483 2484 if ((err = big_copy(result, n)) != BIG_OK) 2485 return (err); 2486 result->value[0] |= 1; 2487 while ((err = big_isprime_pos(result)) != BIG_TRUE) { 2488 if (err != BIG_FALSE) 2489 return (err); 2490 if ((err = big_add_abs(result, result, &Two)) != BIG_OK) 2491 return (err); 2492 } 2493 return (BIG_OK); 2494 } 2495 2496 2497 /* 2498 * given m and e, computes the rest in the equation 2499 * gcd(m, e) = cm * m + ce * e 2500 */ 2501 BIG_ERR_CODE 2502 big_ext_gcd_pos(BIGNUM *gcd, BIGNUM *cm, BIGNUM *ce, BIGNUM *m, BIGNUM *e) 2503 { 2504 BIGNUM *xi, *ri, *riminus1, *riminus2, *t, 2505 *vmi, *vei, *vmiminus1, *veiminus1; 2506 BIGNUM t1, t2, t3, t4, t5, t6, t7, t8, tmp; 2507 uint32_t t1value[BIGTMPSIZE]; 2508 uint32_t t2value[BIGTMPSIZE]; 2509 uint32_t t3value[BIGTMPSIZE]; 2510 uint32_t t4value[BIGTMPSIZE]; 2511 uint32_t t5value[BIGTMPSIZE]; 2512 uint32_t t6value[BIGTMPSIZE]; 2513 uint32_t t7value[BIGTMPSIZE]; 2514 uint32_t t8value[BIGTMPSIZE]; 2515 uint32_t tmpvalue[BIGTMPSIZE]; 2516 BIG_ERR_CODE err; 2517 int len; 2518 2519 if (big_cmp_abs(m, e) >= 0) len = m->len; 2520 else len = e->len; 2521 2522 if ((err = big_init1(&t1, len, 2523 t1value, arraysize(t1value))) != BIG_OK) 2524 return (err); 2525 if ((err = big_init1(&t2, len, 2526 t2value, arraysize(t2value))) != BIG_OK) 2527 goto ret1; 2528 if ((err = big_init1(&t3, len, 2529 t3value, arraysize(t3value))) != BIG_OK) 2530 goto ret2; 2531 if ((err = big_init1(&t4, len, 2532 t4value, arraysize(t3value))) != BIG_OK) 2533 goto ret3; 2534 if ((err = big_init1(&t5, len, 2535 t5value, arraysize(t5value))) != BIG_OK) 2536 goto ret4; 2537 if ((err = big_init1(&t6, len, 2538 t6value, arraysize(t6value))) != BIG_OK) 2539 goto ret5; 2540 if ((err = big_init1(&t7, len, 2541 t7value, arraysize(t7value))) != BIG_OK) 2542 goto ret6; 2543 if ((err = big_init1(&t8, len, 2544 t8value, arraysize(t8value))) != BIG_OK) 2545 goto ret7; 2546 2547 if ((err = big_init1(&tmp, 2 * len, 2548 tmpvalue, arraysize(tmpvalue))) != BIG_OK) 2549 goto ret8; 2550 2551 ri = &t1; 2552 ri->value[0] = 1; 2553 ri->len = 1; 2554 xi = &t2; 2555 riminus1 = &t3; 2556 riminus2 = &t4; 2557 vmi = &t5; 2558 vei = &t6; 2559 vmiminus1 = &t7; 2560 veiminus1 = &t8; 2561 2562 (void) big_copy(vmiminus1, &One); 2563 (void) big_copy(vmi, &One); 2564 (void) big_copy(veiminus1, &One); 2565 (void) big_copy(xi, &One); 2566 vei->len = 1; 2567 vei->value[0] = 0; 2568 2569 (void) big_copy(riminus1, m); 2570 (void) big_copy(ri, e); 2571 2572 while (!big_is_zero(ri)) { 2573 t = riminus2; 2574 riminus2 = riminus1; 2575 riminus1 = ri; 2576 ri = t; 2577 if ((err = big_mul(&tmp, vmi, xi)) != BIG_OK) 2578 goto ret; 2579 if ((err = big_sub(vmiminus1, vmiminus1, &tmp)) != BIG_OK) 2580 goto ret; 2581 t = vmiminus1; 2582 vmiminus1 = vmi; 2583 vmi = t; 2584 if ((err = big_mul(&tmp, vei, xi)) != BIG_OK) 2585 goto ret; 2586 if ((err = big_sub(veiminus1, veiminus1, &tmp)) != BIG_OK) 2587 goto ret; 2588 t = veiminus1; 2589 veiminus1 = vei; 2590 vei = t; 2591 if ((err = big_div_pos(xi, ri, riminus2, riminus1)) != BIG_OK) 2592 goto ret; 2593 } 2594 if ((gcd != NULL) && ((err = big_copy(gcd, riminus1)) != BIG_OK)) 2595 goto ret; 2596 if ((cm != NULL) && ((err = big_copy(cm, vmi)) != BIG_OK)) 2597 goto ret; 2598 if (ce != NULL) 2599 err = big_copy(ce, vei); 2600 ret: 2601 if (tmp.malloced) big_finish(&tmp); 2602 ret8: 2603 if (t8.malloced) big_finish(&t8); 2604 ret7: 2605 if (t7.malloced) big_finish(&t7); 2606 ret6: 2607 if (t6.malloced) big_finish(&t6); 2608 ret5: 2609 if (t5.malloced) big_finish(&t5); 2610 ret4: 2611 if (t4.malloced) big_finish(&t4); 2612 ret3: 2613 if (t3.malloced) big_finish(&t3); 2614 ret2: 2615 if (t2.malloced) big_finish(&t2); 2616 ret1: 2617 if (t1.malloced) big_finish(&t1); 2618 2619 return (err); 2620 } 2621