1 /* 2 * Minimal code for RSA support from LibTomMath 0.41 3 * http://libtom.org/ 4 * http://libtom.org/files/ltm-0.41.tar.bz2 5 * This library was released in public domain by Tom St Denis. 6 * 7 * The combination in this file may not use all of the optimized algorithms 8 * from LibTomMath and may be considerable slower than the LibTomMath with its 9 * default settings. The main purpose of having this version here is to make it 10 * easier to build bignum.c wrapper without having to install and build an 11 * external library. 12 * 13 * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this 14 * libtommath.c file instead of using the external LibTomMath library. 15 */ 16 17 #ifndef CHAR_BIT 18 #define CHAR_BIT 8 19 #endif 20 21 #define BN_MP_INVMOD_C 22 #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would 23 * require BN_MP_EXPTMOD_FAST_C instead */ 24 #define BN_S_MP_MUL_DIGS_C 25 #define BN_MP_INVMOD_SLOW_C 26 #define BN_S_MP_SQR_C 27 #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this 28 * would require other than mp_reduce */ 29 30 #ifdef LTM_FAST 31 32 /* Use faster div at the cost of about 1 kB */ 33 #define BN_MP_MUL_D_C 34 35 /* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */ 36 #define BN_MP_EXPTMOD_FAST_C 37 #define BN_MP_MONTGOMERY_SETUP_C 38 #define BN_FAST_MP_MONTGOMERY_REDUCE_C 39 #define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 40 #define BN_MP_MUL_2_C 41 42 /* Include faster sqr at the cost of about 0.5 kB in code */ 43 #define BN_FAST_S_MP_SQR_C 44 45 /* About 0.25 kB of code, but ~1.7kB of stack space! */ 46 #define BN_FAST_S_MP_MUL_DIGS_C 47 48 #else /* LTM_FAST */ 49 50 #define BN_MP_DIV_SMALL 51 #define BN_MP_INIT_MULTI_C 52 #define BN_MP_CLEAR_MULTI_C 53 #define BN_MP_ABS_C 54 #endif /* LTM_FAST */ 55 56 /* Current uses do not require support for negative exponent in exptmod, so we 57 * can save about 1.5 kB in leaving out invmod. */ 58 #define LTM_NO_NEG_EXP 59 60 /* from tommath.h */ 61 62 #define OPT_CAST(x) 63 64 #ifdef __x86_64__ 65 typedef unsigned long mp_digit; 66 typedef unsigned long mp_word __attribute__((mode(TI))); 67 68 #define DIGIT_BIT 60 69 #define MP_64BIT 70 #else 71 typedef unsigned long mp_digit; 72 typedef u64 mp_word; 73 74 #define DIGIT_BIT 28 75 #define MP_28BIT 76 #endif 77 78 79 #define XMALLOC os_malloc 80 #define XFREE os_free 81 #define XREALLOC os_realloc 82 83 84 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 85 86 #define MP_LT -1 /* less than */ 87 #define MP_EQ 0 /* equal to */ 88 #define MP_GT 1 /* greater than */ 89 90 #define MP_ZPOS 0 /* positive integer */ 91 #define MP_NEG 1 /* negative */ 92 93 #define MP_OKAY 0 /* ok result */ 94 #define MP_MEM -2 /* out of mem */ 95 #define MP_VAL -3 /* invalid input */ 96 97 #define MP_YES 1 /* yes response */ 98 #define MP_NO 0 /* no response */ 99 100 typedef int mp_err; 101 102 /* define this to use lower memory usage routines (exptmods mostly) */ 103 #define MP_LOW_MEM 104 105 /* default precision */ 106 #ifndef MP_PREC 107 #ifndef MP_LOW_MEM 108 #define MP_PREC 32 /* default digits of precision */ 109 #else 110 #define MP_PREC 8 /* default digits of precision */ 111 #endif 112 #endif 113 114 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */ 115 #define MP_WARRAY (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1)) 116 117 /* the infamous mp_int structure */ 118 typedef struct { 119 int used, alloc, sign; 120 mp_digit *dp; 121 } mp_int; 122 123 124 /* ---> Basic Manipulations <--- */ 125 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO) 126 #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO) 127 #define mp_isodd(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO) 128 129 130 /* prototypes for copied functions */ 131 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 132 static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode); 133 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs); 134 static int s_mp_sqr(mp_int * a, mp_int * b); 135 static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs); 136 137 #ifdef BN_FAST_S_MP_MUL_DIGS_C 138 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs); 139 #endif 140 141 #ifdef BN_MP_INIT_MULTI_C 142 static int mp_init_multi(mp_int *mp, ...); 143 #endif 144 #ifdef BN_MP_CLEAR_MULTI_C 145 static void mp_clear_multi(mp_int *mp, ...); 146 #endif 147 static int mp_lshd(mp_int * a, int b); 148 static void mp_set(mp_int * a, mp_digit b); 149 static void mp_clamp(mp_int * a); 150 static void mp_exch(mp_int * a, mp_int * b); 151 static void mp_rshd(mp_int * a, int b); 152 static void mp_zero(mp_int * a); 153 static int mp_mod_2d(mp_int * a, int b, mp_int * c); 154 static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d); 155 static int mp_init_copy(mp_int * a, mp_int * b); 156 static int mp_mul_2d(mp_int * a, int b, mp_int * c); 157 #ifndef LTM_NO_NEG_EXP 158 static int mp_div_2(mp_int * a, mp_int * b); 159 static int mp_invmod(mp_int * a, mp_int * b, mp_int * c); 160 static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c); 161 #endif /* LTM_NO_NEG_EXP */ 162 static int mp_copy(mp_int * a, mp_int * b); 163 static int mp_count_bits(mp_int * a); 164 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d); 165 static int mp_mod(mp_int * a, mp_int * b, mp_int * c); 166 static int mp_grow(mp_int * a, int size); 167 static int mp_cmp_mag(mp_int * a, mp_int * b); 168 #ifdef BN_MP_ABS_C 169 static int mp_abs(mp_int * a, mp_int * b); 170 #endif 171 static int mp_sqr(mp_int * a, mp_int * b); 172 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d); 173 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d); 174 static int mp_2expt(mp_int * a, int b); 175 static int mp_reduce_setup(mp_int * a, mp_int * b); 176 static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu); 177 static int mp_init_size(mp_int * a, int size); 178 #ifdef BN_MP_EXPTMOD_FAST_C 179 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode); 180 #endif /* BN_MP_EXPTMOD_FAST_C */ 181 #ifdef BN_FAST_S_MP_SQR_C 182 static int fast_s_mp_sqr (mp_int * a, mp_int * b); 183 #endif /* BN_FAST_S_MP_SQR_C */ 184 #ifdef BN_MP_MUL_D_C 185 static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c); 186 #endif /* BN_MP_MUL_D_C */ 187 188 189 190 /* functions from bn_<func name>.c */ 191 192 193 /* reverse an array, used for radix code */ 194 static void bn_reverse (unsigned char *s, int len) 195 { 196 int ix, iy; 197 unsigned char t; 198 199 ix = 0; 200 iy = len - 1; 201 while (ix < iy) { 202 t = s[ix]; 203 s[ix] = s[iy]; 204 s[iy] = t; 205 ++ix; 206 --iy; 207 } 208 } 209 210 211 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 212 static int s_mp_add (mp_int * a, mp_int * b, mp_int * c) 213 { 214 mp_int *x; 215 int olduse, res, min, max; 216 217 /* find sizes, we let |a| <= |b| which means we have to sort 218 * them. "x" will point to the input with the most digits 219 */ 220 if (a->used > b->used) { 221 min = b->used; 222 max = a->used; 223 x = a; 224 } else { 225 min = a->used; 226 max = b->used; 227 x = b; 228 } 229 230 /* init result */ 231 if (c->alloc < max + 1) { 232 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 233 return res; 234 } 235 } 236 237 /* get old used digit count and set new one */ 238 olduse = c->used; 239 c->used = max + 1; 240 241 { 242 register mp_digit u, *tmpa, *tmpb, *tmpc; 243 register int i; 244 245 /* alias for digit pointers */ 246 247 /* first input */ 248 tmpa = a->dp; 249 250 /* second input */ 251 tmpb = b->dp; 252 253 /* destination */ 254 tmpc = c->dp; 255 256 /* zero the carry */ 257 u = 0; 258 for (i = 0; i < min; i++) { 259 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 260 *tmpc = *tmpa++ + *tmpb++ + u; 261 262 /* U = carry bit of T[i] */ 263 u = *tmpc >> ((mp_digit)DIGIT_BIT); 264 265 /* take away carry bit from T[i] */ 266 *tmpc++ &= MP_MASK; 267 } 268 269 /* now copy higher words if any, that is in A+B 270 * if A or B has more digits add those in 271 */ 272 if (min != max) { 273 for (; i < max; i++) { 274 /* T[i] = X[i] + U */ 275 *tmpc = x->dp[i] + u; 276 277 /* U = carry bit of T[i] */ 278 u = *tmpc >> ((mp_digit)DIGIT_BIT); 279 280 /* take away carry bit from T[i] */ 281 *tmpc++ &= MP_MASK; 282 } 283 } 284 285 /* add carry */ 286 *tmpc++ = u; 287 288 /* clear digits above oldused */ 289 for (i = c->used; i < olduse; i++) { 290 *tmpc++ = 0; 291 } 292 } 293 294 mp_clamp (c); 295 return MP_OKAY; 296 } 297 298 299 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 300 static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c) 301 { 302 int olduse, res, min, max; 303 304 /* find sizes */ 305 min = b->used; 306 max = a->used; 307 308 /* init result */ 309 if (c->alloc < max) { 310 if ((res = mp_grow (c, max)) != MP_OKAY) { 311 return res; 312 } 313 } 314 olduse = c->used; 315 c->used = max; 316 317 { 318 register mp_digit u, *tmpa, *tmpb, *tmpc; 319 register int i; 320 321 /* alias for digit pointers */ 322 tmpa = a->dp; 323 tmpb = b->dp; 324 tmpc = c->dp; 325 326 /* set carry to zero */ 327 u = 0; 328 for (i = 0; i < min; i++) { 329 /* T[i] = A[i] - B[i] - U */ 330 *tmpc = *tmpa++ - *tmpb++ - u; 331 332 /* U = carry bit of T[i] 333 * Note this saves performing an AND operation since 334 * if a carry does occur it will propagate all the way to the 335 * MSB. As a result a single shift is enough to get the carry 336 */ 337 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 338 339 /* Clear carry from T[i] */ 340 *tmpc++ &= MP_MASK; 341 } 342 343 /* now copy higher words if any, e.g. if A has more digits than B */ 344 for (; i < max; i++) { 345 /* T[i] = A[i] - U */ 346 *tmpc = *tmpa++ - u; 347 348 /* U = carry bit of T[i] */ 349 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 350 351 /* Clear carry from T[i] */ 352 *tmpc++ &= MP_MASK; 353 } 354 355 /* clear digits above used (since we may not have grown result above) */ 356 for (i = c->used; i < olduse; i++) { 357 *tmpc++ = 0; 358 } 359 } 360 361 mp_clamp (c); 362 return MP_OKAY; 363 } 364 365 366 /* init a new mp_int */ 367 static int mp_init (mp_int * a) 368 { 369 int i; 370 371 /* allocate memory required and clear it */ 372 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC); 373 if (a->dp == NULL) { 374 return MP_MEM; 375 } 376 377 /* set the digits to zero */ 378 for (i = 0; i < MP_PREC; i++) { 379 a->dp[i] = 0; 380 } 381 382 /* set the used to zero, allocated digits to the default precision 383 * and sign to positive */ 384 a->used = 0; 385 a->alloc = MP_PREC; 386 a->sign = MP_ZPOS; 387 388 return MP_OKAY; 389 } 390 391 392 /* clear one (frees) */ 393 static void mp_clear (mp_int * a) 394 { 395 int i; 396 397 /* only do anything if a hasn't been freed previously */ 398 if (a->dp != NULL) { 399 /* first zero the digits */ 400 for (i = 0; i < a->used; i++) { 401 a->dp[i] = 0; 402 } 403 404 /* free ram */ 405 XFREE(a->dp); 406 407 /* reset members to make debugging easier */ 408 a->dp = NULL; 409 a->alloc = a->used = 0; 410 a->sign = MP_ZPOS; 411 } 412 } 413 414 415 /* high level addition (handles signs) */ 416 static int mp_add (mp_int * a, mp_int * b, mp_int * c) 417 { 418 int sa, sb, res; 419 420 /* get sign of both inputs */ 421 sa = a->sign; 422 sb = b->sign; 423 424 /* handle two cases, not four */ 425 if (sa == sb) { 426 /* both positive or both negative */ 427 /* add their magnitudes, copy the sign */ 428 c->sign = sa; 429 res = s_mp_add (a, b, c); 430 } else { 431 /* one positive, the other negative */ 432 /* subtract the one with the greater magnitude from */ 433 /* the one of the lesser magnitude. The result gets */ 434 /* the sign of the one with the greater magnitude. */ 435 if (mp_cmp_mag (a, b) == MP_LT) { 436 c->sign = sb; 437 res = s_mp_sub (b, a, c); 438 } else { 439 c->sign = sa; 440 res = s_mp_sub (a, b, c); 441 } 442 } 443 return res; 444 } 445 446 447 /* high level subtraction (handles signs) */ 448 static int mp_sub (mp_int * a, mp_int * b, mp_int * c) 449 { 450 int sa, sb, res; 451 452 sa = a->sign; 453 sb = b->sign; 454 455 if (sa != sb) { 456 /* subtract a negative from a positive, OR */ 457 /* subtract a positive from a negative. */ 458 /* In either case, ADD their magnitudes, */ 459 /* and use the sign of the first number. */ 460 c->sign = sa; 461 res = s_mp_add (a, b, c); 462 } else { 463 /* subtract a positive from a positive, OR */ 464 /* subtract a negative from a negative. */ 465 /* First, take the difference between their */ 466 /* magnitudes, then... */ 467 if (mp_cmp_mag (a, b) != MP_LT) { 468 /* Copy the sign from the first */ 469 c->sign = sa; 470 /* The first has a larger or equal magnitude */ 471 res = s_mp_sub (a, b, c); 472 } else { 473 /* The result has the *opposite* sign from */ 474 /* the first number. */ 475 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 476 /* The second has a larger magnitude */ 477 res = s_mp_sub (b, a, c); 478 } 479 } 480 return res; 481 } 482 483 484 /* high level multiplication (handles sign) */ 485 static int mp_mul (mp_int * a, mp_int * b, mp_int * c) 486 { 487 int res, neg; 488 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 489 490 /* use Toom-Cook? */ 491 #ifdef BN_MP_TOOM_MUL_C 492 if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) { 493 res = mp_toom_mul(a, b, c); 494 } else 495 #endif 496 #ifdef BN_MP_KARATSUBA_MUL_C 497 /* use Karatsuba? */ 498 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 499 res = mp_karatsuba_mul (a, b, c); 500 } else 501 #endif 502 { 503 /* can we use the fast multiplier? 504 * 505 * The fast multiplier can be used if the output will 506 * have less than MP_WARRAY digits and the number of 507 * digits won't affect carry propagation 508 */ 509 #ifdef BN_FAST_S_MP_MUL_DIGS_C 510 int digs = a->used + b->used + 1; 511 512 if ((digs < MP_WARRAY) && 513 MIN(a->used, b->used) <= 514 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 515 res = fast_s_mp_mul_digs (a, b, c, digs); 516 } else 517 #endif 518 #ifdef BN_S_MP_MUL_DIGS_C 519 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 520 #else 521 #error mp_mul could fail 522 res = MP_VAL; 523 #endif 524 525 } 526 c->sign = (c->used > 0) ? neg : MP_ZPOS; 527 return res; 528 } 529 530 531 /* d = a * b (mod c) */ 532 static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) 533 { 534 int res; 535 mp_int t; 536 537 if ((res = mp_init (&t)) != MP_OKAY) { 538 return res; 539 } 540 541 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 542 mp_clear (&t); 543 return res; 544 } 545 res = mp_mod (&t, c, d); 546 mp_clear (&t); 547 return res; 548 } 549 550 551 /* c = a mod b, 0 <= c < b */ 552 static int mp_mod (mp_int * a, mp_int * b, mp_int * c) 553 { 554 mp_int t; 555 int res; 556 557 if ((res = mp_init (&t)) != MP_OKAY) { 558 return res; 559 } 560 561 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) { 562 mp_clear (&t); 563 return res; 564 } 565 566 if (t.sign != b->sign) { 567 res = mp_add (b, &t, c); 568 } else { 569 res = MP_OKAY; 570 mp_exch (&t, c); 571 } 572 573 mp_clear (&t); 574 return res; 575 } 576 577 578 /* this is a shell function that calls either the normal or Montgomery 579 * exptmod functions. Originally the call to the montgomery code was 580 * embedded in the normal function but that wasted a lot of stack space 581 * for nothing (since 99% of the time the Montgomery code would be called) 582 */ 583 static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y) 584 { 585 int dr; 586 587 /* modulus P must be positive */ 588 if (P->sign == MP_NEG) { 589 return MP_VAL; 590 } 591 592 /* if exponent X is negative we have to recurse */ 593 if (X->sign == MP_NEG) { 594 #ifdef LTM_NO_NEG_EXP 595 return MP_VAL; 596 #else /* LTM_NO_NEG_EXP */ 597 #ifdef BN_MP_INVMOD_C 598 mp_int tmpG, tmpX; 599 int err; 600 601 /* first compute 1/G mod P */ 602 if ((err = mp_init(&tmpG)) != MP_OKAY) { 603 return err; 604 } 605 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) { 606 mp_clear(&tmpG); 607 return err; 608 } 609 610 /* now get |X| */ 611 if ((err = mp_init(&tmpX)) != MP_OKAY) { 612 mp_clear(&tmpG); 613 return err; 614 } 615 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 616 mp_clear_multi(&tmpG, &tmpX, NULL); 617 return err; 618 } 619 620 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 621 err = mp_exptmod(&tmpG, &tmpX, P, Y); 622 mp_clear_multi(&tmpG, &tmpX, NULL); 623 return err; 624 #else 625 #error mp_exptmod would always fail 626 /* no invmod */ 627 return MP_VAL; 628 #endif 629 #endif /* LTM_NO_NEG_EXP */ 630 } 631 632 /* modified diminished radix reduction */ 633 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C) 634 if (mp_reduce_is_2k_l(P) == MP_YES) { 635 return s_mp_exptmod(G, X, P, Y, 1); 636 } 637 #endif 638 639 #ifdef BN_MP_DR_IS_MODULUS_C 640 /* is it a DR modulus? */ 641 dr = mp_dr_is_modulus(P); 642 #else 643 /* default to no */ 644 dr = 0; 645 #endif 646 647 #ifdef BN_MP_REDUCE_IS_2K_C 648 /* if not, is it a unrestricted DR modulus? */ 649 if (dr == 0) { 650 dr = mp_reduce_is_2k(P) << 1; 651 } 652 #endif 653 654 /* if the modulus is odd or dr != 0 use the montgomery method */ 655 #ifdef BN_MP_EXPTMOD_FAST_C 656 if (mp_isodd (P) == 1 || dr != 0) { 657 return mp_exptmod_fast (G, X, P, Y, dr); 658 } else { 659 #endif 660 #ifdef BN_S_MP_EXPTMOD_C 661 /* otherwise use the generic Barrett reduction technique */ 662 return s_mp_exptmod (G, X, P, Y, 0); 663 #else 664 #error mp_exptmod could fail 665 /* no exptmod for evens */ 666 return MP_VAL; 667 #endif 668 #ifdef BN_MP_EXPTMOD_FAST_C 669 } 670 #endif 671 if (dr == 0) { 672 /* avoid compiler warnings about possibly unused variable */ 673 } 674 } 675 676 677 /* compare two ints (signed)*/ 678 static int mp_cmp (mp_int * a, mp_int * b) 679 { 680 /* compare based on sign */ 681 if (a->sign != b->sign) { 682 if (a->sign == MP_NEG) { 683 return MP_LT; 684 } else { 685 return MP_GT; 686 } 687 } 688 689 /* compare digits */ 690 if (a->sign == MP_NEG) { 691 /* if negative compare opposite direction */ 692 return mp_cmp_mag(b, a); 693 } else { 694 return mp_cmp_mag(a, b); 695 } 696 } 697 698 699 /* compare a digit */ 700 static int mp_cmp_d(mp_int * a, mp_digit b) 701 { 702 /* compare based on sign */ 703 if (a->sign == MP_NEG) { 704 return MP_LT; 705 } 706 707 /* compare based on magnitude */ 708 if (a->used > 1) { 709 return MP_GT; 710 } 711 712 /* compare the only digit of a to b */ 713 if (a->dp[0] > b) { 714 return MP_GT; 715 } else if (a->dp[0] < b) { 716 return MP_LT; 717 } else { 718 return MP_EQ; 719 } 720 } 721 722 723 #ifndef LTM_NO_NEG_EXP 724 /* hac 14.61, pp608 */ 725 static int mp_invmod (mp_int * a, mp_int * b, mp_int * c) 726 { 727 /* b cannot be negative */ 728 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 729 return MP_VAL; 730 } 731 732 #ifdef BN_FAST_MP_INVMOD_C 733 /* if the modulus is odd we can use a faster routine instead */ 734 if (mp_isodd (b) == 1) { 735 return fast_mp_invmod (a, b, c); 736 } 737 #endif 738 739 #ifdef BN_MP_INVMOD_SLOW_C 740 return mp_invmod_slow(a, b, c); 741 #endif 742 743 #ifndef BN_FAST_MP_INVMOD_C 744 #ifndef BN_MP_INVMOD_SLOW_C 745 #error mp_invmod would always fail 746 #endif 747 #endif 748 return MP_VAL; 749 } 750 #endif /* LTM_NO_NEG_EXP */ 751 752 753 /* get the size for an unsigned equivalent */ 754 static int mp_unsigned_bin_size (mp_int * a) 755 { 756 int size = mp_count_bits (a); 757 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 758 } 759 760 761 #ifndef LTM_NO_NEG_EXP 762 /* hac 14.61, pp608 */ 763 static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c) 764 { 765 mp_int x, y, u, v, A, B, C, D; 766 int res; 767 768 /* b cannot be negative */ 769 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 770 return MP_VAL; 771 } 772 773 /* init temps */ 774 if ((res = mp_init_multi(&x, &y, &u, &v, 775 &A, &B, &C, &D, NULL)) != MP_OKAY) { 776 return res; 777 } 778 779 /* x = a, y = b */ 780 if ((res = mp_mod(a, b, &x)) != MP_OKAY) { 781 goto LBL_ERR; 782 } 783 if ((res = mp_copy (b, &y)) != MP_OKAY) { 784 goto LBL_ERR; 785 } 786 787 /* 2. [modified] if x,y are both even then return an error! */ 788 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) { 789 res = MP_VAL; 790 goto LBL_ERR; 791 } 792 793 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 794 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 795 goto LBL_ERR; 796 } 797 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 798 goto LBL_ERR; 799 } 800 mp_set (&A, 1); 801 mp_set (&D, 1); 802 803 top: 804 /* 4. while u is even do */ 805 while (mp_iseven (&u) == 1) { 806 /* 4.1 u = u/2 */ 807 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 808 goto LBL_ERR; 809 } 810 /* 4.2 if A or B is odd then */ 811 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) { 812 /* A = (A+y)/2, B = (B-x)/2 */ 813 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 814 goto LBL_ERR; 815 } 816 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 817 goto LBL_ERR; 818 } 819 } 820 /* A = A/2, B = B/2 */ 821 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 822 goto LBL_ERR; 823 } 824 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 825 goto LBL_ERR; 826 } 827 } 828 829 /* 5. while v is even do */ 830 while (mp_iseven (&v) == 1) { 831 /* 5.1 v = v/2 */ 832 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 833 goto LBL_ERR; 834 } 835 /* 5.2 if C or D is odd then */ 836 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) { 837 /* C = (C+y)/2, D = (D-x)/2 */ 838 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 839 goto LBL_ERR; 840 } 841 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 842 goto LBL_ERR; 843 } 844 } 845 /* C = C/2, D = D/2 */ 846 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 847 goto LBL_ERR; 848 } 849 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 850 goto LBL_ERR; 851 } 852 } 853 854 /* 6. if u >= v then */ 855 if (mp_cmp (&u, &v) != MP_LT) { 856 /* u = u - v, A = A - C, B = B - D */ 857 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 858 goto LBL_ERR; 859 } 860 861 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 862 goto LBL_ERR; 863 } 864 865 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 866 goto LBL_ERR; 867 } 868 } else { 869 /* v - v - u, C = C - A, D = D - B */ 870 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 871 goto LBL_ERR; 872 } 873 874 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 875 goto LBL_ERR; 876 } 877 878 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 879 goto LBL_ERR; 880 } 881 } 882 883 /* if not zero goto step 4 */ 884 if (mp_iszero (&u) == 0) 885 goto top; 886 887 /* now a = C, b = D, gcd == g*v */ 888 889 /* if v != 1 then there is no inverse */ 890 if (mp_cmp_d (&v, 1) != MP_EQ) { 891 res = MP_VAL; 892 goto LBL_ERR; 893 } 894 895 /* if its too low */ 896 while (mp_cmp_d(&C, 0) == MP_LT) { 897 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 898 goto LBL_ERR; 899 } 900 } 901 902 /* too big */ 903 while (mp_cmp_mag(&C, b) != MP_LT) { 904 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 905 goto LBL_ERR; 906 } 907 } 908 909 /* C is now the inverse */ 910 mp_exch (&C, c); 911 res = MP_OKAY; 912 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 913 return res; 914 } 915 #endif /* LTM_NO_NEG_EXP */ 916 917 918 /* compare maginitude of two ints (unsigned) */ 919 static int mp_cmp_mag (mp_int * a, mp_int * b) 920 { 921 int n; 922 mp_digit *tmpa, *tmpb; 923 924 /* compare based on # of non-zero digits */ 925 if (a->used > b->used) { 926 return MP_GT; 927 } 928 929 if (a->used < b->used) { 930 return MP_LT; 931 } 932 933 /* alias for a */ 934 tmpa = a->dp + (a->used - 1); 935 936 /* alias for b */ 937 tmpb = b->dp + (a->used - 1); 938 939 /* compare based on digits */ 940 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 941 if (*tmpa > *tmpb) { 942 return MP_GT; 943 } 944 945 if (*tmpa < *tmpb) { 946 return MP_LT; 947 } 948 } 949 return MP_EQ; 950 } 951 952 953 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 954 static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) 955 { 956 int res; 957 958 /* make sure there are at least two digits */ 959 if (a->alloc < 2) { 960 if ((res = mp_grow(a, 2)) != MP_OKAY) { 961 return res; 962 } 963 } 964 965 /* zero the int */ 966 mp_zero (a); 967 968 /* read the bytes in */ 969 while (c-- > 0) { 970 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 971 return res; 972 } 973 974 #ifndef MP_8BIT 975 a->dp[0] |= *b++; 976 a->used += 1; 977 #else 978 a->dp[0] = (*b & MP_MASK); 979 a->dp[1] |= ((*b++ >> 7U) & 1); 980 a->used += 2; 981 #endif 982 } 983 mp_clamp (a); 984 return MP_OKAY; 985 } 986 987 988 /* store in unsigned [big endian] format */ 989 static int mp_to_unsigned_bin (mp_int * a, unsigned char *b) 990 { 991 int x, res; 992 mp_int t; 993 994 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 995 return res; 996 } 997 998 x = 0; 999 while (mp_iszero (&t) == 0) { 1000 #ifndef MP_8BIT 1001 b[x++] = (unsigned char) (t.dp[0] & 255); 1002 #else 1003 b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7)); 1004 #endif 1005 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) { 1006 mp_clear (&t); 1007 return res; 1008 } 1009 } 1010 bn_reverse (b, x); 1011 mp_clear (&t); 1012 return MP_OKAY; 1013 } 1014 1015 1016 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 1017 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d) 1018 { 1019 mp_digit D, r, rr; 1020 int x, res; 1021 mp_int t; 1022 1023 1024 /* if the shift count is <= 0 then we do no work */ 1025 if (b <= 0) { 1026 res = mp_copy (a, c); 1027 if (d != NULL) { 1028 mp_zero (d); 1029 } 1030 return res; 1031 } 1032 1033 if ((res = mp_init (&t)) != MP_OKAY) { 1034 return res; 1035 } 1036 1037 /* get the remainder */ 1038 if (d != NULL) { 1039 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 1040 mp_clear (&t); 1041 return res; 1042 } 1043 } 1044 1045 /* copy */ 1046 if ((res = mp_copy (a, c)) != MP_OKAY) { 1047 mp_clear (&t); 1048 return res; 1049 } 1050 1051 /* shift by as many digits in the bit count */ 1052 if (b >= (int)DIGIT_BIT) { 1053 mp_rshd (c, b / DIGIT_BIT); 1054 } 1055 1056 /* shift any bit count < DIGIT_BIT */ 1057 D = (mp_digit) (b % DIGIT_BIT); 1058 if (D != 0) { 1059 register mp_digit *tmpc, mask, shift; 1060 1061 /* mask */ 1062 mask = (((mp_digit)1) << D) - 1; 1063 1064 /* shift for lsb */ 1065 shift = DIGIT_BIT - D; 1066 1067 /* alias */ 1068 tmpc = c->dp + (c->used - 1); 1069 1070 /* carry */ 1071 r = 0; 1072 for (x = c->used - 1; x >= 0; x--) { 1073 /* get the lower bits of this word in a temp */ 1074 rr = *tmpc & mask; 1075 1076 /* shift the current word and mix in the carry bits from the previous word */ 1077 *tmpc = (*tmpc >> D) | (r << shift); 1078 --tmpc; 1079 1080 /* set the carry to the carry bits of the current word found above */ 1081 r = rr; 1082 } 1083 } 1084 mp_clamp (c); 1085 if (d != NULL) { 1086 mp_exch (&t, d); 1087 } 1088 mp_clear (&t); 1089 return MP_OKAY; 1090 } 1091 1092 1093 static int mp_init_copy (mp_int * a, mp_int * b) 1094 { 1095 int res; 1096 1097 if ((res = mp_init (a)) != MP_OKAY) { 1098 return res; 1099 } 1100 return mp_copy (b, a); 1101 } 1102 1103 1104 /* set to zero */ 1105 static void mp_zero (mp_int * a) 1106 { 1107 int n; 1108 mp_digit *tmp; 1109 1110 a->sign = MP_ZPOS; 1111 a->used = 0; 1112 1113 tmp = a->dp; 1114 for (n = 0; n < a->alloc; n++) { 1115 *tmp++ = 0; 1116 } 1117 } 1118 1119 1120 /* copy, b = a */ 1121 static int mp_copy (mp_int * a, mp_int * b) 1122 { 1123 int res, n; 1124 1125 /* if dst == src do nothing */ 1126 if (a == b) { 1127 return MP_OKAY; 1128 } 1129 1130 /* grow dest */ 1131 if (b->alloc < a->used) { 1132 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 1133 return res; 1134 } 1135 } 1136 1137 /* zero b and copy the parameters over */ 1138 { 1139 register mp_digit *tmpa, *tmpb; 1140 1141 /* pointer aliases */ 1142 1143 /* source */ 1144 tmpa = a->dp; 1145 1146 /* destination */ 1147 tmpb = b->dp; 1148 1149 /* copy all the digits */ 1150 for (n = 0; n < a->used; n++) { 1151 *tmpb++ = *tmpa++; 1152 } 1153 1154 /* clear high digits */ 1155 for (; n < b->used; n++) { 1156 *tmpb++ = 0; 1157 } 1158 } 1159 1160 /* copy used count and sign */ 1161 b->used = a->used; 1162 b->sign = a->sign; 1163 return MP_OKAY; 1164 } 1165 1166 1167 /* shift right a certain amount of digits */ 1168 static void mp_rshd (mp_int * a, int b) 1169 { 1170 int x; 1171 1172 /* if b <= 0 then ignore it */ 1173 if (b <= 0) { 1174 return; 1175 } 1176 1177 /* if b > used then simply zero it and return */ 1178 if (a->used <= b) { 1179 mp_zero (a); 1180 return; 1181 } 1182 1183 { 1184 register mp_digit *bottom, *top; 1185 1186 /* shift the digits down */ 1187 1188 /* bottom */ 1189 bottom = a->dp; 1190 1191 /* top [offset into digits] */ 1192 top = a->dp + b; 1193 1194 /* this is implemented as a sliding window where 1195 * the window is b-digits long and digits from 1196 * the top of the window are copied to the bottom 1197 * 1198 * e.g. 1199 1200 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 1201 /\ | ----> 1202 \-------------------/ ----> 1203 */ 1204 for (x = 0; x < (a->used - b); x++) { 1205 *bottom++ = *top++; 1206 } 1207 1208 /* zero the top digits */ 1209 for (; x < a->used; x++) { 1210 *bottom++ = 0; 1211 } 1212 } 1213 1214 /* remove excess digits */ 1215 a->used -= b; 1216 } 1217 1218 1219 /* swap the elements of two integers, for cases where you can't simply swap the 1220 * mp_int pointers around 1221 */ 1222 static void mp_exch (mp_int * a, mp_int * b) 1223 { 1224 mp_int t; 1225 1226 t = *a; 1227 *a = *b; 1228 *b = t; 1229 } 1230 1231 1232 /* trim unused digits 1233 * 1234 * This is used to ensure that leading zero digits are 1235 * trimed and the leading "used" digit will be non-zero 1236 * Typically very fast. Also fixes the sign if there 1237 * are no more leading digits 1238 */ 1239 static void mp_clamp (mp_int * a) 1240 { 1241 /* decrease used while the most significant digit is 1242 * zero. 1243 */ 1244 while (a->used > 0 && a->dp[a->used - 1] == 0) { 1245 --(a->used); 1246 } 1247 1248 /* reset the sign flag if used == 0 */ 1249 if (a->used == 0) { 1250 a->sign = MP_ZPOS; 1251 } 1252 } 1253 1254 1255 /* grow as required */ 1256 static int mp_grow (mp_int * a, int size) 1257 { 1258 int i; 1259 mp_digit *tmp; 1260 1261 /* if the alloc size is smaller alloc more ram */ 1262 if (a->alloc < size) { 1263 /* ensure there are always at least MP_PREC digits extra on top */ 1264 size += (MP_PREC * 2) - (size % MP_PREC); 1265 1266 /* reallocate the array a->dp 1267 * 1268 * We store the return in a temporary variable 1269 * in case the operation failed we don't want 1270 * to overwrite the dp member of a. 1271 */ 1272 tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size); 1273 if (tmp == NULL) { 1274 /* reallocation failed but "a" is still valid [can be freed] */ 1275 return MP_MEM; 1276 } 1277 1278 /* reallocation succeeded so set a->dp */ 1279 a->dp = tmp; 1280 1281 /* zero excess digits */ 1282 i = a->alloc; 1283 a->alloc = size; 1284 for (; i < a->alloc; i++) { 1285 a->dp[i] = 0; 1286 } 1287 } 1288 return MP_OKAY; 1289 } 1290 1291 1292 #ifdef BN_MP_ABS_C 1293 /* b = |a| 1294 * 1295 * Simple function copies the input and fixes the sign to positive 1296 */ 1297 static int mp_abs (mp_int * a, mp_int * b) 1298 { 1299 int res; 1300 1301 /* copy a to b */ 1302 if (a != b) { 1303 if ((res = mp_copy (a, b)) != MP_OKAY) { 1304 return res; 1305 } 1306 } 1307 1308 /* force the sign of b to positive */ 1309 b->sign = MP_ZPOS; 1310 1311 return MP_OKAY; 1312 } 1313 #endif 1314 1315 1316 /* set to a digit */ 1317 static void mp_set (mp_int * a, mp_digit b) 1318 { 1319 mp_zero (a); 1320 a->dp[0] = b & MP_MASK; 1321 a->used = (a->dp[0] != 0) ? 1 : 0; 1322 } 1323 1324 1325 #ifndef LTM_NO_NEG_EXP 1326 /* b = a/2 */ 1327 static int mp_div_2(mp_int * a, mp_int * b) 1328 { 1329 int x, res, oldused; 1330 1331 /* copy */ 1332 if (b->alloc < a->used) { 1333 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 1334 return res; 1335 } 1336 } 1337 1338 oldused = b->used; 1339 b->used = a->used; 1340 { 1341 register mp_digit r, rr, *tmpa, *tmpb; 1342 1343 /* source alias */ 1344 tmpa = a->dp + b->used - 1; 1345 1346 /* dest alias */ 1347 tmpb = b->dp + b->used - 1; 1348 1349 /* carry */ 1350 r = 0; 1351 for (x = b->used - 1; x >= 0; x--) { 1352 /* get the carry for the next iteration */ 1353 rr = *tmpa & 1; 1354 1355 /* shift the current digit, add in carry and store */ 1356 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1357 1358 /* forward carry to next iteration */ 1359 r = rr; 1360 } 1361 1362 /* zero excess digits */ 1363 tmpb = b->dp + b->used; 1364 for (x = b->used; x < oldused; x++) { 1365 *tmpb++ = 0; 1366 } 1367 } 1368 b->sign = a->sign; 1369 mp_clamp (b); 1370 return MP_OKAY; 1371 } 1372 #endif /* LTM_NO_NEG_EXP */ 1373 1374 1375 /* shift left by a certain bit count */ 1376 static int mp_mul_2d (mp_int * a, int b, mp_int * c) 1377 { 1378 mp_digit d; 1379 int res; 1380 1381 /* copy */ 1382 if (a != c) { 1383 if ((res = mp_copy (a, c)) != MP_OKAY) { 1384 return res; 1385 } 1386 } 1387 1388 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 1389 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 1390 return res; 1391 } 1392 } 1393 1394 /* shift by as many digits in the bit count */ 1395 if (b >= (int)DIGIT_BIT) { 1396 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 1397 return res; 1398 } 1399 } 1400 1401 /* shift any bit count < DIGIT_BIT */ 1402 d = (mp_digit) (b % DIGIT_BIT); 1403 if (d != 0) { 1404 register mp_digit *tmpc, shift, mask, r, rr; 1405 register int x; 1406 1407 /* bitmask for carries */ 1408 mask = (((mp_digit)1) << d) - 1; 1409 1410 /* shift for msbs */ 1411 shift = DIGIT_BIT - d; 1412 1413 /* alias */ 1414 tmpc = c->dp; 1415 1416 /* carry */ 1417 r = 0; 1418 for (x = 0; x < c->used; x++) { 1419 /* get the higher bits of the current word */ 1420 rr = (*tmpc >> shift) & mask; 1421 1422 /* shift the current word and OR in the carry */ 1423 *tmpc = ((*tmpc << d) | r) & MP_MASK; 1424 ++tmpc; 1425 1426 /* set the carry to the carry bits of the current word */ 1427 r = rr; 1428 } 1429 1430 /* set final carry */ 1431 if (r != 0) { 1432 c->dp[(c->used)++] = r; 1433 } 1434 } 1435 mp_clamp (c); 1436 return MP_OKAY; 1437 } 1438 1439 1440 #ifdef BN_MP_INIT_MULTI_C 1441 static int mp_init_multi(mp_int *mp, ...) 1442 { 1443 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 1444 int n = 0; /* Number of ok inits */ 1445 mp_int* cur_arg = mp; 1446 va_list args; 1447 1448 va_start(args, mp); /* init args to next argument from caller */ 1449 while (cur_arg != NULL) { 1450 if (mp_init(cur_arg) != MP_OKAY) { 1451 /* Oops - error! Back-track and mp_clear what we already 1452 succeeded in init-ing, then return error. 1453 */ 1454 va_list clean_args; 1455 1456 /* end the current list */ 1457 va_end(args); 1458 1459 /* now start cleaning up */ 1460 cur_arg = mp; 1461 va_start(clean_args, mp); 1462 while (n--) { 1463 mp_clear(cur_arg); 1464 cur_arg = va_arg(clean_args, mp_int*); 1465 } 1466 va_end(clean_args); 1467 return MP_MEM; 1468 } 1469 n++; 1470 cur_arg = va_arg(args, mp_int*); 1471 } 1472 va_end(args); 1473 return res; /* Assumed ok, if error flagged above. */ 1474 } 1475 #endif 1476 1477 1478 #ifdef BN_MP_CLEAR_MULTI_C 1479 static void mp_clear_multi(mp_int *mp, ...) 1480 { 1481 mp_int* next_mp = mp; 1482 va_list args; 1483 va_start(args, mp); 1484 while (next_mp != NULL) { 1485 mp_clear(next_mp); 1486 next_mp = va_arg(args, mp_int*); 1487 } 1488 va_end(args); 1489 } 1490 #endif 1491 1492 1493 /* shift left a certain amount of digits */ 1494 static int mp_lshd (mp_int * a, int b) 1495 { 1496 int x, res; 1497 1498 /* if its less than zero return */ 1499 if (b <= 0) { 1500 return MP_OKAY; 1501 } 1502 1503 /* grow to fit the new digits */ 1504 if (a->alloc < a->used + b) { 1505 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 1506 return res; 1507 } 1508 } 1509 1510 { 1511 register mp_digit *top, *bottom; 1512 1513 /* increment the used by the shift amount then copy upwards */ 1514 a->used += b; 1515 1516 /* top */ 1517 top = a->dp + a->used - 1; 1518 1519 /* base */ 1520 bottom = a->dp + a->used - 1 - b; 1521 1522 /* much like mp_rshd this is implemented using a sliding window 1523 * except the window goes the otherway around. Copying from 1524 * the bottom to the top. see bn_mp_rshd.c for more info. 1525 */ 1526 for (x = a->used - 1; x >= b; x--) { 1527 *top-- = *bottom--; 1528 } 1529 1530 /* zero the lower digits */ 1531 top = a->dp; 1532 for (x = 0; x < b; x++) { 1533 *top++ = 0; 1534 } 1535 } 1536 return MP_OKAY; 1537 } 1538 1539 1540 /* returns the number of bits in an int */ 1541 static int mp_count_bits (mp_int * a) 1542 { 1543 int r; 1544 mp_digit q; 1545 1546 /* shortcut */ 1547 if (a->used == 0) { 1548 return 0; 1549 } 1550 1551 /* get number of digits and add that */ 1552 r = (a->used - 1) * DIGIT_BIT; 1553 1554 /* take the last digit and count the bits in it */ 1555 q = a->dp[a->used - 1]; 1556 while (q > ((mp_digit) 0)) { 1557 ++r; 1558 q >>= ((mp_digit) 1); 1559 } 1560 return r; 1561 } 1562 1563 1564 /* calc a value mod 2**b */ 1565 static int mp_mod_2d (mp_int * a, int b, mp_int * c) 1566 { 1567 int x, res; 1568 1569 /* if b is <= 0 then zero the int */ 1570 if (b <= 0) { 1571 mp_zero (c); 1572 return MP_OKAY; 1573 } 1574 1575 /* if the modulus is larger than the value than return */ 1576 if (b >= (int) (a->used * DIGIT_BIT)) { 1577 res = mp_copy (a, c); 1578 return res; 1579 } 1580 1581 /* copy */ 1582 if ((res = mp_copy (a, c)) != MP_OKAY) { 1583 return res; 1584 } 1585 1586 /* zero digits above the last digit of the modulus */ 1587 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 1588 c->dp[x] = 0; 1589 } 1590 /* clear the digit that is not completely outside/inside the modulus */ 1591 c->dp[b / DIGIT_BIT] &= 1592 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 1593 mp_clamp (c); 1594 return MP_OKAY; 1595 } 1596 1597 1598 #ifdef BN_MP_DIV_SMALL 1599 1600 /* slower bit-bang division... also smaller */ 1601 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d) 1602 { 1603 mp_int ta, tb, tq, q; 1604 int res, n, n2; 1605 1606 /* is divisor zero ? */ 1607 if (mp_iszero (b) == 1) { 1608 return MP_VAL; 1609 } 1610 1611 /* if a < b then q=0, r = a */ 1612 if (mp_cmp_mag (a, b) == MP_LT) { 1613 if (d != NULL) { 1614 res = mp_copy (a, d); 1615 } else { 1616 res = MP_OKAY; 1617 } 1618 if (c != NULL) { 1619 mp_zero (c); 1620 } 1621 return res; 1622 } 1623 1624 /* init our temps */ 1625 if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL)) != MP_OKAY) { 1626 return res; 1627 } 1628 1629 1630 mp_set(&tq, 1); 1631 n = mp_count_bits(a) - mp_count_bits(b); 1632 if (((res = mp_abs(a, &ta)) != MP_OKAY) || 1633 ((res = mp_abs(b, &tb)) != MP_OKAY) || 1634 ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) || 1635 ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) { 1636 goto LBL_ERR; 1637 } 1638 1639 while (n-- >= 0) { 1640 if (mp_cmp(&tb, &ta) != MP_GT) { 1641 if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) || 1642 ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) { 1643 goto LBL_ERR; 1644 } 1645 } 1646 if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) || 1647 ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) { 1648 goto LBL_ERR; 1649 } 1650 } 1651 1652 /* now q == quotient and ta == remainder */ 1653 n = a->sign; 1654 n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG); 1655 if (c != NULL) { 1656 mp_exch(c, &q); 1657 c->sign = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2; 1658 } 1659 if (d != NULL) { 1660 mp_exch(d, &ta); 1661 d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n; 1662 } 1663 LBL_ERR: 1664 mp_clear_multi(&ta, &tb, &tq, &q, NULL); 1665 return res; 1666 } 1667 1668 #else 1669 1670 /* integer signed division. 1671 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 1672 * HAC pp.598 Algorithm 14.20 1673 * 1674 * Note that the description in HAC is horribly 1675 * incomplete. For example, it doesn't consider 1676 * the case where digits are removed from 'x' in 1677 * the inner loop. It also doesn't consider the 1678 * case that y has fewer than three digits, etc.. 1679 * 1680 * The overall algorithm is as described as 1681 * 14.20 from HAC but fixed to treat these cases. 1682 */ 1683 static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d) 1684 { 1685 mp_int q, x, y, t1, t2; 1686 int res, n, t, i, norm, neg; 1687 1688 /* is divisor zero ? */ 1689 if (mp_iszero (b) == 1) { 1690 return MP_VAL; 1691 } 1692 1693 /* if a < b then q=0, r = a */ 1694 if (mp_cmp_mag (a, b) == MP_LT) { 1695 if (d != NULL) { 1696 res = mp_copy (a, d); 1697 } else { 1698 res = MP_OKAY; 1699 } 1700 if (c != NULL) { 1701 mp_zero (c); 1702 } 1703 return res; 1704 } 1705 1706 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 1707 return res; 1708 } 1709 q.used = a->used + 2; 1710 1711 if ((res = mp_init (&t1)) != MP_OKAY) { 1712 goto LBL_Q; 1713 } 1714 1715 if ((res = mp_init (&t2)) != MP_OKAY) { 1716 goto LBL_T1; 1717 } 1718 1719 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 1720 goto LBL_T2; 1721 } 1722 1723 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 1724 goto LBL_X; 1725 } 1726 1727 /* fix the sign */ 1728 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1729 x.sign = y.sign = MP_ZPOS; 1730 1731 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1732 norm = mp_count_bits(&y) % DIGIT_BIT; 1733 if (norm < (int)(DIGIT_BIT-1)) { 1734 norm = (DIGIT_BIT-1) - norm; 1735 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 1736 goto LBL_Y; 1737 } 1738 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 1739 goto LBL_Y; 1740 } 1741 } else { 1742 norm = 0; 1743 } 1744 1745 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1746 n = x.used - 1; 1747 t = y.used - 1; 1748 1749 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1750 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1751 goto LBL_Y; 1752 } 1753 1754 while (mp_cmp (&x, &y) != MP_LT) { 1755 ++(q.dp[n - t]); 1756 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 1757 goto LBL_Y; 1758 } 1759 } 1760 1761 /* reset y by shifting it back down */ 1762 mp_rshd (&y, n - t); 1763 1764 /* step 3. for i from n down to (t + 1) */ 1765 for (i = n; i >= (t + 1); i--) { 1766 if (i > x.used) { 1767 continue; 1768 } 1769 1770 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1771 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1772 if (x.dp[i] == y.dp[t]) { 1773 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1774 } else { 1775 mp_word tmp; 1776 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1777 tmp |= ((mp_word) x.dp[i - 1]); 1778 tmp /= ((mp_word) y.dp[t]); 1779 if (tmp > (mp_word) MP_MASK) 1780 tmp = MP_MASK; 1781 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1782 } 1783 1784 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1785 xi * b**2 + xi-1 * b + xi-2 1786 1787 do q{i-t-1} -= 1; 1788 */ 1789 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1790 do { 1791 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1792 1793 /* find left hand */ 1794 mp_zero (&t1); 1795 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1796 t1.dp[1] = y.dp[t]; 1797 t1.used = 2; 1798 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1799 goto LBL_Y; 1800 } 1801 1802 /* find right hand */ 1803 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1804 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1805 t2.dp[2] = x.dp[i]; 1806 t2.used = 3; 1807 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 1808 1809 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1810 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1811 goto LBL_Y; 1812 } 1813 1814 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1815 goto LBL_Y; 1816 } 1817 1818 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 1819 goto LBL_Y; 1820 } 1821 1822 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1823 if (x.sign == MP_NEG) { 1824 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 1825 goto LBL_Y; 1826 } 1827 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 1828 goto LBL_Y; 1829 } 1830 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 1831 goto LBL_Y; 1832 } 1833 1834 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1835 } 1836 } 1837 1838 /* now q is the quotient and x is the remainder 1839 * [which we have to normalize] 1840 */ 1841 1842 /* get sign before writing to c */ 1843 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1844 1845 if (c != NULL) { 1846 mp_clamp (&q); 1847 mp_exch (&q, c); 1848 c->sign = neg; 1849 } 1850 1851 if (d != NULL) { 1852 mp_div_2d (&x, norm, &x, NULL); 1853 mp_exch (&x, d); 1854 } 1855 1856 res = MP_OKAY; 1857 1858 LBL_Y:mp_clear (&y); 1859 LBL_X:mp_clear (&x); 1860 LBL_T2:mp_clear (&t2); 1861 LBL_T1:mp_clear (&t1); 1862 LBL_Q:mp_clear (&q); 1863 return res; 1864 } 1865 1866 #endif 1867 1868 1869 #ifdef MP_LOW_MEM 1870 #define TAB_SIZE 32 1871 #else 1872 #define TAB_SIZE 256 1873 #endif 1874 1875 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 1876 { 1877 mp_int M[TAB_SIZE], res, mu; 1878 mp_digit buf; 1879 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 1880 int (*redux)(mp_int*,mp_int*,mp_int*); 1881 1882 /* find window size */ 1883 x = mp_count_bits (X); 1884 if (x <= 7) { 1885 winsize = 2; 1886 } else if (x <= 36) { 1887 winsize = 3; 1888 } else if (x <= 140) { 1889 winsize = 4; 1890 } else if (x <= 450) { 1891 winsize = 5; 1892 } else if (x <= 1303) { 1893 winsize = 6; 1894 } else if (x <= 3529) { 1895 winsize = 7; 1896 } else { 1897 winsize = 8; 1898 } 1899 1900 #ifdef MP_LOW_MEM 1901 if (winsize > 5) { 1902 winsize = 5; 1903 } 1904 #endif 1905 1906 /* init M array */ 1907 /* init first cell */ 1908 if ((err = mp_init(&M[1])) != MP_OKAY) { 1909 return err; 1910 } 1911 1912 /* now init the second half of the array */ 1913 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 1914 if ((err = mp_init(&M[x])) != MP_OKAY) { 1915 for (y = 1<<(winsize-1); y < x; y++) { 1916 mp_clear (&M[y]); 1917 } 1918 mp_clear(&M[1]); 1919 return err; 1920 } 1921 } 1922 1923 /* create mu, used for Barrett reduction */ 1924 if ((err = mp_init (&mu)) != MP_OKAY) { 1925 goto LBL_M; 1926 } 1927 1928 if (redmode == 0) { 1929 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 1930 goto LBL_MU; 1931 } 1932 redux = mp_reduce; 1933 } else { 1934 if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) { 1935 goto LBL_MU; 1936 } 1937 redux = mp_reduce_2k_l; 1938 } 1939 1940 /* create M table 1941 * 1942 * The M table contains powers of the base, 1943 * e.g. M[x] = G**x mod P 1944 * 1945 * The first half of the table is not 1946 * computed though accept for M[0] and M[1] 1947 */ 1948 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 1949 goto LBL_MU; 1950 } 1951 1952 /* compute the value at M[1<<(winsize-1)] by squaring 1953 * M[1] (winsize-1) times 1954 */ 1955 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 1956 goto LBL_MU; 1957 } 1958 1959 for (x = 0; x < (winsize - 1); x++) { 1960 /* square it */ 1961 if ((err = mp_sqr (&M[1 << (winsize - 1)], 1962 &M[1 << (winsize - 1)])) != MP_OKAY) { 1963 goto LBL_MU; 1964 } 1965 1966 /* reduce modulo P */ 1967 if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 1968 goto LBL_MU; 1969 } 1970 } 1971 1972 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 1973 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 1974 */ 1975 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 1976 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 1977 goto LBL_MU; 1978 } 1979 if ((err = redux (&M[x], P, &mu)) != MP_OKAY) { 1980 goto LBL_MU; 1981 } 1982 } 1983 1984 /* setup result */ 1985 if ((err = mp_init (&res)) != MP_OKAY) { 1986 goto LBL_MU; 1987 } 1988 mp_set (&res, 1); 1989 1990 /* set initial mode and bit cnt */ 1991 mode = 0; 1992 bitcnt = 1; 1993 buf = 0; 1994 digidx = X->used - 1; 1995 bitcpy = 0; 1996 bitbuf = 0; 1997 1998 for (;;) { 1999 /* grab next digit as required */ 2000 if (--bitcnt == 0) { 2001 /* if digidx == -1 we are out of digits */ 2002 if (digidx == -1) { 2003 break; 2004 } 2005 /* read next digit and reset the bitcnt */ 2006 buf = X->dp[digidx--]; 2007 bitcnt = (int) DIGIT_BIT; 2008 } 2009 2010 /* grab the next msb from the exponent */ 2011 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 2012 buf <<= (mp_digit)1; 2013 2014 /* if the bit is zero and mode == 0 then we ignore it 2015 * These represent the leading zero bits before the first 1 bit 2016 * in the exponent. Technically this opt is not required but it 2017 * does lower the # of trivial squaring/reductions used 2018 */ 2019 if (mode == 0 && y == 0) { 2020 continue; 2021 } 2022 2023 /* if the bit is zero and mode == 1 then we square */ 2024 if (mode == 1 && y == 0) { 2025 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 2026 goto LBL_RES; 2027 } 2028 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 2029 goto LBL_RES; 2030 } 2031 continue; 2032 } 2033 2034 /* else we add it to the window */ 2035 bitbuf |= (y << (winsize - ++bitcpy)); 2036 mode = 2; 2037 2038 if (bitcpy == winsize) { 2039 /* ok window is filled so square as required and multiply */ 2040 /* square first */ 2041 for (x = 0; x < winsize; x++) { 2042 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 2043 goto LBL_RES; 2044 } 2045 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 2046 goto LBL_RES; 2047 } 2048 } 2049 2050 /* then multiply */ 2051 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 2052 goto LBL_RES; 2053 } 2054 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 2055 goto LBL_RES; 2056 } 2057 2058 /* empty window and reset */ 2059 bitcpy = 0; 2060 bitbuf = 0; 2061 mode = 1; 2062 } 2063 } 2064 2065 /* if bits remain then square/multiply */ 2066 if (mode == 2 && bitcpy > 0) { 2067 /* square then multiply if the bit is set */ 2068 for (x = 0; x < bitcpy; x++) { 2069 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 2070 goto LBL_RES; 2071 } 2072 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 2073 goto LBL_RES; 2074 } 2075 2076 bitbuf <<= 1; 2077 if ((bitbuf & (1 << winsize)) != 0) { 2078 /* then multiply */ 2079 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 2080 goto LBL_RES; 2081 } 2082 if ((err = redux (&res, P, &mu)) != MP_OKAY) { 2083 goto LBL_RES; 2084 } 2085 } 2086 } 2087 } 2088 2089 mp_exch (&res, Y); 2090 err = MP_OKAY; 2091 LBL_RES:mp_clear (&res); 2092 LBL_MU:mp_clear (&mu); 2093 LBL_M: 2094 mp_clear(&M[1]); 2095 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 2096 mp_clear (&M[x]); 2097 } 2098 return err; 2099 } 2100 2101 2102 /* computes b = a*a */ 2103 static int mp_sqr (mp_int * a, mp_int * b) 2104 { 2105 int res; 2106 2107 #ifdef BN_MP_TOOM_SQR_C 2108 /* use Toom-Cook? */ 2109 if (a->used >= TOOM_SQR_CUTOFF) { 2110 res = mp_toom_sqr(a, b); 2111 /* Karatsuba? */ 2112 } else 2113 #endif 2114 #ifdef BN_MP_KARATSUBA_SQR_C 2115 if (a->used >= KARATSUBA_SQR_CUTOFF) { 2116 res = mp_karatsuba_sqr (a, b); 2117 } else 2118 #endif 2119 { 2120 #ifdef BN_FAST_S_MP_SQR_C 2121 /* can we use the fast comba multiplier? */ 2122 if ((a->used * 2 + 1) < MP_WARRAY && 2123 a->used < 2124 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 2125 res = fast_s_mp_sqr (a, b); 2126 } else 2127 #endif 2128 #ifdef BN_S_MP_SQR_C 2129 res = s_mp_sqr (a, b); 2130 #else 2131 #error mp_sqr could fail 2132 res = MP_VAL; 2133 #endif 2134 } 2135 b->sign = MP_ZPOS; 2136 return res; 2137 } 2138 2139 2140 /* reduces a modulo n where n is of the form 2**p - d 2141 This differs from reduce_2k since "d" can be larger 2142 than a single digit. 2143 */ 2144 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2145 { 2146 mp_int q; 2147 int p, res; 2148 2149 if ((res = mp_init(&q)) != MP_OKAY) { 2150 return res; 2151 } 2152 2153 p = mp_count_bits(n); 2154 top: 2155 /* q = a/2**p, a = a mod 2**p */ 2156 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 2157 goto ERR; 2158 } 2159 2160 /* q = q * d */ 2161 if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 2162 goto ERR; 2163 } 2164 2165 /* a = a + q */ 2166 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 2167 goto ERR; 2168 } 2169 2170 if (mp_cmp_mag(a, n) != MP_LT) { 2171 s_mp_sub(a, n, a); 2172 goto top; 2173 } 2174 2175 ERR: 2176 mp_clear(&q); 2177 return res; 2178 } 2179 2180 2181 /* determines the setup value */ 2182 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2183 { 2184 int res; 2185 mp_int tmp; 2186 2187 if ((res = mp_init(&tmp)) != MP_OKAY) { 2188 return res; 2189 } 2190 2191 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2192 goto ERR; 2193 } 2194 2195 if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) { 2196 goto ERR; 2197 } 2198 2199 ERR: 2200 mp_clear(&tmp); 2201 return res; 2202 } 2203 2204 2205 /* computes a = 2**b 2206 * 2207 * Simple algorithm which zeroes the int, grows it then just sets one bit 2208 * as required. 2209 */ 2210 static int mp_2expt (mp_int * a, int b) 2211 { 2212 int res; 2213 2214 /* zero a as per default */ 2215 mp_zero (a); 2216 2217 /* grow a to accommodate the single bit */ 2218 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 2219 return res; 2220 } 2221 2222 /* set the used count of where the bit will go */ 2223 a->used = b / DIGIT_BIT + 1; 2224 2225 /* put the single bit in its place */ 2226 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 2227 2228 return MP_OKAY; 2229 } 2230 2231 2232 /* pre-calculate the value required for Barrett reduction 2233 * For a given modulus "b" it calulates the value required in "a" 2234 */ 2235 static int mp_reduce_setup (mp_int * a, mp_int * b) 2236 { 2237 int res; 2238 2239 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 2240 return res; 2241 } 2242 return mp_div (a, b, a, NULL); 2243 } 2244 2245 2246 /* reduces x mod m, assumes 0 < x < m**2, mu is 2247 * precomputed via mp_reduce_setup. 2248 * From HAC pp.604 Algorithm 14.42 2249 */ 2250 static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu) 2251 { 2252 mp_int q; 2253 int res, um = m->used; 2254 2255 /* q = x */ 2256 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 2257 return res; 2258 } 2259 2260 /* q1 = x / b**(k-1) */ 2261 mp_rshd (&q, um - 1); 2262 2263 /* according to HAC this optimization is ok */ 2264 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2265 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 2266 goto CLEANUP; 2267 } 2268 } else { 2269 #ifdef BN_S_MP_MUL_HIGH_DIGS_C 2270 if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 2271 goto CLEANUP; 2272 } 2273 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C) 2274 if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) { 2275 goto CLEANUP; 2276 } 2277 #else 2278 { 2279 #error mp_reduce would always fail 2280 res = MP_VAL; 2281 goto CLEANUP; 2282 } 2283 #endif 2284 } 2285 2286 /* q3 = q2 / b**(k+1) */ 2287 mp_rshd (&q, um + 1); 2288 2289 /* x = x mod b**(k+1), quick (no division) */ 2290 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2291 goto CLEANUP; 2292 } 2293 2294 /* q = q * m mod b**(k+1), quick (no division) */ 2295 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 2296 goto CLEANUP; 2297 } 2298 2299 /* x = x - q */ 2300 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 2301 goto CLEANUP; 2302 } 2303 2304 /* If x < 0, add b**(k+1) to it */ 2305 if (mp_cmp_d (x, 0) == MP_LT) { 2306 mp_set (&q, 1); 2307 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) { 2308 goto CLEANUP; 2309 } 2310 if ((res = mp_add (x, &q, x)) != MP_OKAY) { 2311 goto CLEANUP; 2312 } 2313 } 2314 2315 /* Back off if it's too big */ 2316 while (mp_cmp (x, m) != MP_LT) { 2317 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 2318 goto CLEANUP; 2319 } 2320 } 2321 2322 CLEANUP: 2323 mp_clear (&q); 2324 2325 return res; 2326 } 2327 2328 2329 /* multiplies |a| * |b| and only computes up to digs digits of result 2330 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2331 * many digits of output are created. 2332 */ 2333 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2334 { 2335 mp_int t; 2336 int res, pa, pb, ix, iy; 2337 mp_digit u; 2338 mp_word r; 2339 mp_digit tmpx, *tmpt, *tmpy; 2340 2341 #ifdef BN_FAST_S_MP_MUL_DIGS_C 2342 /* can we use the fast multiplier? */ 2343 if (((digs) < MP_WARRAY) && 2344 MIN (a->used, b->used) < 2345 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2346 return fast_s_mp_mul_digs (a, b, c, digs); 2347 } 2348 #endif 2349 2350 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 2351 return res; 2352 } 2353 t.used = digs; 2354 2355 /* compute the digits of the product directly */ 2356 pa = a->used; 2357 for (ix = 0; ix < pa; ix++) { 2358 /* set the carry to zero */ 2359 u = 0; 2360 2361 /* limit ourselves to making digs digits of output */ 2362 pb = MIN (b->used, digs - ix); 2363 2364 /* setup some aliases */ 2365 /* copy of the digit from a used within the nested loop */ 2366 tmpx = a->dp[ix]; 2367 2368 /* an alias for the destination shifted ix places */ 2369 tmpt = t.dp + ix; 2370 2371 /* an alias for the digits of b */ 2372 tmpy = b->dp; 2373 2374 /* compute the columns of the output and propagate the carry */ 2375 for (iy = 0; iy < pb; iy++) { 2376 /* compute the column as a mp_word */ 2377 r = ((mp_word)*tmpt) + 2378 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2379 ((mp_word) u); 2380 2381 /* the new column is the lower part of the result */ 2382 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2383 2384 /* get the carry word from the result */ 2385 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2386 } 2387 /* set carry if it is placed below digs */ 2388 if (ix + iy < digs) { 2389 *tmpt = u; 2390 } 2391 } 2392 2393 mp_clamp (&t); 2394 mp_exch (&t, c); 2395 2396 mp_clear (&t); 2397 return MP_OKAY; 2398 } 2399 2400 2401 #ifdef BN_FAST_S_MP_MUL_DIGS_C 2402 /* Fast (comba) multiplier 2403 * 2404 * This is the fast column-array [comba] multiplier. It is 2405 * designed to compute the columns of the product first 2406 * then handle the carries afterwards. This has the effect 2407 * of making the nested loops that compute the columns very 2408 * simple and schedulable on super-scalar processors. 2409 * 2410 * This has been modified to produce a variable number of 2411 * digits of output so if say only a half-product is required 2412 * you don't have to compute the upper half (a feature 2413 * required for fast Barrett reduction). 2414 * 2415 * Based on Algorithm 14.12 on pp.595 of HAC. 2416 * 2417 */ 2418 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2419 { 2420 int olduse, res, pa, ix, iz; 2421 mp_digit W[MP_WARRAY]; 2422 register mp_word _W; 2423 2424 /* grow the destination as required */ 2425 if (c->alloc < digs) { 2426 if ((res = mp_grow (c, digs)) != MP_OKAY) { 2427 return res; 2428 } 2429 } 2430 2431 /* number of output digits to produce */ 2432 pa = MIN(digs, a->used + b->used); 2433 2434 /* clear the carry */ 2435 _W = 0; 2436 os_memset(W, 0, sizeof(W)); 2437 for (ix = 0; ix < pa; ix++) { 2438 int tx, ty; 2439 int iy; 2440 mp_digit *tmpx, *tmpy; 2441 2442 /* get offsets into the two bignums */ 2443 ty = MIN(b->used-1, ix); 2444 tx = ix - ty; 2445 2446 /* setup temp aliases */ 2447 tmpx = a->dp + tx; 2448 tmpy = b->dp + ty; 2449 2450 /* this is the number of times the loop will iterrate, essentially 2451 while (tx++ < a->used && ty-- >= 0) { ... } 2452 */ 2453 iy = MIN(a->used-tx, ty+1); 2454 2455 /* execute loop */ 2456 for (iz = 0; iz < iy; ++iz) { 2457 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2458 2459 } 2460 2461 /* store term */ 2462 W[ix] = ((mp_digit)_W) & MP_MASK; 2463 2464 /* make next carry */ 2465 _W = _W >> ((mp_word)DIGIT_BIT); 2466 } 2467 2468 /* setup dest */ 2469 olduse = c->used; 2470 c->used = pa; 2471 2472 { 2473 register mp_digit *tmpc; 2474 tmpc = c->dp; 2475 for (ix = 0; ix < pa+1; ix++) { 2476 /* now extract the previous digit [below the carry] */ 2477 *tmpc++ = W[ix]; 2478 } 2479 2480 /* clear unused digits [that existed in the old copy of c] */ 2481 for (; ix < olduse; ix++) { 2482 *tmpc++ = 0; 2483 } 2484 } 2485 mp_clamp (c); 2486 return MP_OKAY; 2487 } 2488 #endif /* BN_FAST_S_MP_MUL_DIGS_C */ 2489 2490 2491 /* init an mp_init for a given size */ 2492 static int mp_init_size (mp_int * a, int size) 2493 { 2494 int x; 2495 2496 /* pad size so there are always extra digits */ 2497 size += (MP_PREC * 2) - (size % MP_PREC); 2498 2499 /* alloc mem */ 2500 a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size); 2501 if (a->dp == NULL) { 2502 return MP_MEM; 2503 } 2504 2505 /* set the members */ 2506 a->used = 0; 2507 a->alloc = size; 2508 a->sign = MP_ZPOS; 2509 2510 /* zero the digits */ 2511 for (x = 0; x < size; x++) { 2512 a->dp[x] = 0; 2513 } 2514 2515 return MP_OKAY; 2516 } 2517 2518 2519 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 2520 static int s_mp_sqr (mp_int * a, mp_int * b) 2521 { 2522 mp_int t; 2523 int res, ix, iy, pa; 2524 mp_word r; 2525 mp_digit u, tmpx, *tmpt; 2526 2527 pa = a->used; 2528 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 2529 return res; 2530 } 2531 2532 /* default used is maximum possible size */ 2533 t.used = 2*pa + 1; 2534 2535 for (ix = 0; ix < pa; ix++) { 2536 /* first calculate the digit at 2*ix */ 2537 /* calculate double precision result */ 2538 r = ((mp_word) t.dp[2*ix]) + 2539 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 2540 2541 /* store lower part in result */ 2542 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 2543 2544 /* get the carry */ 2545 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 2546 2547 /* left hand side of A[ix] * A[iy] */ 2548 tmpx = a->dp[ix]; 2549 2550 /* alias for where to store the results */ 2551 tmpt = t.dp + (2*ix + 1); 2552 2553 for (iy = ix + 1; iy < pa; iy++) { 2554 /* first calculate the product */ 2555 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 2556 2557 /* now calculate the double precision result, note we use 2558 * addition instead of *2 since it's easier to optimize 2559 */ 2560 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 2561 2562 /* store lower part */ 2563 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2564 2565 /* get carry */ 2566 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 2567 } 2568 /* propagate upwards */ 2569 while (u != ((mp_digit) 0)) { 2570 r = ((mp_word) *tmpt) + ((mp_word) u); 2571 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2572 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 2573 } 2574 } 2575 2576 mp_clamp (&t); 2577 mp_exch (&t, b); 2578 mp_clear (&t); 2579 return MP_OKAY; 2580 } 2581 2582 2583 /* multiplies |a| * |b| and does not compute the lower digs digits 2584 * [meant to get the higher part of the product] 2585 */ 2586 static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs) 2587 { 2588 mp_int t; 2589 int res, pa, pb, ix, iy; 2590 mp_digit u; 2591 mp_word r; 2592 mp_digit tmpx, *tmpt, *tmpy; 2593 2594 /* can we use the fast multiplier? */ 2595 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C 2596 if (((a->used + b->used + 1) < MP_WARRAY) 2597 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 2598 return fast_s_mp_mul_high_digs (a, b, c, digs); 2599 } 2600 #endif 2601 2602 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 2603 return res; 2604 } 2605 t.used = a->used + b->used + 1; 2606 2607 pa = a->used; 2608 pb = b->used; 2609 for (ix = 0; ix < pa; ix++) { 2610 /* clear the carry */ 2611 u = 0; 2612 2613 /* left hand side of A[ix] * B[iy] */ 2614 tmpx = a->dp[ix]; 2615 2616 /* alias to the address of where the digits will be stored */ 2617 tmpt = &(t.dp[digs]); 2618 2619 /* alias for where to read the right hand side from */ 2620 tmpy = b->dp + (digs - ix); 2621 2622 for (iy = digs - ix; iy < pb; iy++) { 2623 /* calculate the double precision result */ 2624 r = ((mp_word)*tmpt) + 2625 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2626 ((mp_word) u); 2627 2628 /* get the lower part */ 2629 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2630 2631 /* carry the carry */ 2632 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2633 } 2634 *tmpt = u; 2635 } 2636 mp_clamp (&t); 2637 mp_exch (&t, c); 2638 mp_clear (&t); 2639 return MP_OKAY; 2640 } 2641 2642 2643 #ifdef BN_MP_MONTGOMERY_SETUP_C 2644 /* setups the montgomery reduction stuff */ 2645 static int 2646 mp_montgomery_setup (mp_int * n, mp_digit * rho) 2647 { 2648 mp_digit x, b; 2649 2650 /* fast inversion mod 2**k 2651 * 2652 * Based on the fact that 2653 * 2654 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 2655 * => 2*X*A - X*X*A*A = 1 2656 * => 2*(1) - (1) = 1 2657 */ 2658 b = n->dp[0]; 2659 2660 if ((b & 1) == 0) { 2661 return MP_VAL; 2662 } 2663 2664 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 2665 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 2666 #if !defined(MP_8BIT) 2667 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 2668 #endif 2669 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT)) 2670 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 2671 #endif 2672 #ifdef MP_64BIT 2673 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 2674 #endif 2675 2676 /* rho = -1/m mod b */ 2677 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 2678 2679 return MP_OKAY; 2680 } 2681 #endif 2682 2683 2684 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C 2685 /* computes xR**-1 == x (mod N) via Montgomery Reduction 2686 * 2687 * This is an optimized implementation of montgomery_reduce 2688 * which uses the comba method to quickly calculate the columns of the 2689 * reduction. 2690 * 2691 * Based on Algorithm 14.32 on pp.601 of HAC. 2692 */ 2693 static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho) 2694 { 2695 int ix, res, olduse; 2696 mp_word W[MP_WARRAY]; 2697 2698 /* get old used count */ 2699 olduse = x->used; 2700 2701 /* grow a as required */ 2702 if (x->alloc < n->used + 1) { 2703 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 2704 return res; 2705 } 2706 } 2707 2708 /* first we have to get the digits of the input into 2709 * an array of double precision words W[...] 2710 */ 2711 { 2712 register mp_word *_W; 2713 register mp_digit *tmpx; 2714 2715 /* alias for the W[] array */ 2716 _W = W; 2717 2718 /* alias for the digits of x*/ 2719 tmpx = x->dp; 2720 2721 /* copy the digits of a into W[0..a->used-1] */ 2722 for (ix = 0; ix < x->used; ix++) { 2723 *_W++ = *tmpx++; 2724 } 2725 2726 /* zero the high words of W[a->used..m->used*2] */ 2727 for (; ix < n->used * 2 + 1; ix++) { 2728 *_W++ = 0; 2729 } 2730 } 2731 2732 /* now we proceed to zero successive digits 2733 * from the least significant upwards 2734 */ 2735 for (ix = 0; ix < n->used; ix++) { 2736 /* mu = ai * m' mod b 2737 * 2738 * We avoid a double precision multiplication (which isn't required) 2739 * by casting the value down to a mp_digit. Note this requires 2740 * that W[ix-1] have the carry cleared (see after the inner loop) 2741 */ 2742 register mp_digit mu; 2743 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 2744 2745 /* a = a + mu * m * b**i 2746 * 2747 * This is computed in place and on the fly. The multiplication 2748 * by b**i is handled by offseting which columns the results 2749 * are added to. 2750 * 2751 * Note the comba method normally doesn't handle carries in the 2752 * inner loop In this case we fix the carry from the previous 2753 * column since the Montgomery reduction requires digits of the 2754 * result (so far) [see above] to work. This is 2755 * handled by fixing up one carry after the inner loop. The 2756 * carry fixups are done in order so after these loops the 2757 * first m->used words of W[] have the carries fixed 2758 */ 2759 { 2760 register int iy; 2761 register mp_digit *tmpn; 2762 register mp_word *_W; 2763 2764 /* alias for the digits of the modulus */ 2765 tmpn = n->dp; 2766 2767 /* Alias for the columns set by an offset of ix */ 2768 _W = W + ix; 2769 2770 /* inner loop */ 2771 for (iy = 0; iy < n->used; iy++) { 2772 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 2773 } 2774 } 2775 2776 /* now fix carry for next digit, W[ix+1] */ 2777 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 2778 } 2779 2780 /* now we have to propagate the carries and 2781 * shift the words downward [all those least 2782 * significant digits we zeroed]. 2783 */ 2784 { 2785 register mp_digit *tmpx; 2786 register mp_word *_W, *_W1; 2787 2788 /* nox fix rest of carries */ 2789 2790 /* alias for current word */ 2791 _W1 = W + ix; 2792 2793 /* alias for next word, where the carry goes */ 2794 _W = W + ++ix; 2795 2796 for (; ix <= n->used * 2 + 1; ix++) { 2797 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 2798 } 2799 2800 /* copy out, A = A/b**n 2801 * 2802 * The result is A/b**n but instead of converting from an 2803 * array of mp_word to mp_digit than calling mp_rshd 2804 * we just copy them in the right order 2805 */ 2806 2807 /* alias for destination word */ 2808 tmpx = x->dp; 2809 2810 /* alias for shifted double precision result */ 2811 _W = W + n->used; 2812 2813 for (ix = 0; ix < n->used + 1; ix++) { 2814 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 2815 } 2816 2817 /* zero oldused digits, if the input a was larger than 2818 * m->used+1 we'll have to clear the digits 2819 */ 2820 for (; ix < olduse; ix++) { 2821 *tmpx++ = 0; 2822 } 2823 } 2824 2825 /* set the max used and clamp */ 2826 x->used = n->used + 1; 2827 mp_clamp (x); 2828 2829 /* if A >= m then A = A - m */ 2830 if (mp_cmp_mag (x, n) != MP_LT) { 2831 return s_mp_sub (x, n, x); 2832 } 2833 return MP_OKAY; 2834 } 2835 #endif 2836 2837 2838 #ifdef BN_MP_MUL_2_C 2839 /* b = a*2 */ 2840 static int mp_mul_2(mp_int * a, mp_int * b) 2841 { 2842 int x, res, oldused; 2843 2844 /* grow to accommodate result */ 2845 if (b->alloc < a->used + 1) { 2846 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 2847 return res; 2848 } 2849 } 2850 2851 oldused = b->used; 2852 b->used = a->used; 2853 2854 { 2855 register mp_digit r, rr, *tmpa, *tmpb; 2856 2857 /* alias for source */ 2858 tmpa = a->dp; 2859 2860 /* alias for dest */ 2861 tmpb = b->dp; 2862 2863 /* carry */ 2864 r = 0; 2865 for (x = 0; x < a->used; x++) { 2866 2867 /* get what will be the *next* carry bit from the 2868 * MSB of the current digit 2869 */ 2870 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 2871 2872 /* now shift up this digit, add in the carry [from the previous] */ 2873 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 2874 2875 /* copy the carry that would be from the source 2876 * digit into the next iteration 2877 */ 2878 r = rr; 2879 } 2880 2881 /* new leading digit? */ 2882 if (r != 0) { 2883 /* add a MSB which is always 1 at this point */ 2884 *tmpb = 1; 2885 ++(b->used); 2886 } 2887 2888 /* now zero any excess digits on the destination 2889 * that we didn't write to 2890 */ 2891 tmpb = b->dp + b->used; 2892 for (x = b->used; x < oldused; x++) { 2893 *tmpb++ = 0; 2894 } 2895 } 2896 b->sign = a->sign; 2897 return MP_OKAY; 2898 } 2899 #endif 2900 2901 2902 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 2903 /* 2904 * shifts with subtractions when the result is greater than b. 2905 * 2906 * The method is slightly modified to shift B unconditionally up to just under 2907 * the leading bit of b. This saves a lot of multiple precision shifting. 2908 */ 2909 static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b) 2910 { 2911 int x, bits, res; 2912 2913 /* how many bits of last digit does b use */ 2914 bits = mp_count_bits (b) % DIGIT_BIT; 2915 2916 if (b->used > 1) { 2917 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 2918 return res; 2919 } 2920 } else { 2921 mp_set(a, 1); 2922 bits = 1; 2923 } 2924 2925 2926 /* now compute C = A * B mod b */ 2927 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 2928 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 2929 return res; 2930 } 2931 if (mp_cmp_mag (a, b) != MP_LT) { 2932 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 2933 return res; 2934 } 2935 } 2936 } 2937 2938 return MP_OKAY; 2939 } 2940 #endif 2941 2942 2943 #ifdef BN_MP_EXPTMOD_FAST_C 2944 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 2945 * 2946 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 2947 * The value of k changes based on the size of the exponent. 2948 * 2949 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 2950 */ 2951 2952 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 2953 { 2954 mp_int M[TAB_SIZE], res; 2955 mp_digit buf, mp; 2956 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 2957 2958 /* use a pointer to the reduction algorithm. This allows us to use 2959 * one of many reduction algorithms without modding the guts of 2960 * the code with if statements everywhere. 2961 */ 2962 int (*redux)(mp_int*,mp_int*,mp_digit); 2963 2964 /* find window size */ 2965 x = mp_count_bits (X); 2966 if (x <= 7) { 2967 winsize = 2; 2968 } else if (x <= 36) { 2969 winsize = 3; 2970 } else if (x <= 140) { 2971 winsize = 4; 2972 } else if (x <= 450) { 2973 winsize = 5; 2974 } else if (x <= 1303) { 2975 winsize = 6; 2976 } else if (x <= 3529) { 2977 winsize = 7; 2978 } else { 2979 winsize = 8; 2980 } 2981 2982 #ifdef MP_LOW_MEM 2983 if (winsize > 5) { 2984 winsize = 5; 2985 } 2986 #endif 2987 2988 /* init M array */ 2989 /* init first cell */ 2990 if ((err = mp_init(&M[1])) != MP_OKAY) { 2991 return err; 2992 } 2993 2994 /* now init the second half of the array */ 2995 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 2996 if ((err = mp_init(&M[x])) != MP_OKAY) { 2997 for (y = 1<<(winsize-1); y < x; y++) { 2998 mp_clear (&M[y]); 2999 } 3000 mp_clear(&M[1]); 3001 return err; 3002 } 3003 } 3004 3005 /* determine and setup reduction code */ 3006 if (redmode == 0) { 3007 #ifdef BN_MP_MONTGOMERY_SETUP_C 3008 /* now setup montgomery */ 3009 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 3010 goto LBL_M; 3011 } 3012 #else 3013 err = MP_VAL; 3014 goto LBL_M; 3015 #endif 3016 3017 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 3018 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C 3019 if (((P->used * 2 + 1) < MP_WARRAY) && 3020 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 3021 redux = fast_mp_montgomery_reduce; 3022 } else 3023 #endif 3024 { 3025 #ifdef BN_MP_MONTGOMERY_REDUCE_C 3026 /* use slower baseline Montgomery method */ 3027 redux = mp_montgomery_reduce; 3028 #else 3029 err = MP_VAL; 3030 goto LBL_M; 3031 #endif 3032 } 3033 } else if (redmode == 1) { 3034 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C) 3035 /* setup DR reduction for moduli of the form B**k - b */ 3036 mp_dr_setup(P, &mp); 3037 redux = mp_dr_reduce; 3038 #else 3039 err = MP_VAL; 3040 goto LBL_M; 3041 #endif 3042 } else { 3043 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C) 3044 /* setup DR reduction for moduli of the form 2**k - b */ 3045 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 3046 goto LBL_M; 3047 } 3048 redux = mp_reduce_2k; 3049 #else 3050 err = MP_VAL; 3051 goto LBL_M; 3052 #endif 3053 } 3054 3055 /* setup result */ 3056 if ((err = mp_init (&res)) != MP_OKAY) { 3057 goto LBL_M; 3058 } 3059 3060 /* create M table 3061 * 3062 3063 * 3064 * The first half of the table is not computed though accept for M[0] and M[1] 3065 */ 3066 3067 if (redmode == 0) { 3068 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C 3069 /* now we need R mod m */ 3070 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 3071 goto LBL_RES; 3072 } 3073 #else 3074 err = MP_VAL; 3075 goto LBL_RES; 3076 #endif 3077 3078 /* now set M[1] to G * R mod m */ 3079 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { 3080 goto LBL_RES; 3081 } 3082 } else { 3083 mp_set(&res, 1); 3084 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 3085 goto LBL_RES; 3086 } 3087 } 3088 3089 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 3090 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3091 goto LBL_RES; 3092 } 3093 3094 for (x = 0; x < (winsize - 1); x++) { 3095 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 3096 goto LBL_RES; 3097 } 3098 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 3099 goto LBL_RES; 3100 } 3101 } 3102 3103 /* create upper table */ 3104 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3105 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3106 goto LBL_RES; 3107 } 3108 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 3109 goto LBL_RES; 3110 } 3111 } 3112 3113 /* set initial mode and bit cnt */ 3114 mode = 0; 3115 bitcnt = 1; 3116 buf = 0; 3117 digidx = X->used - 1; 3118 bitcpy = 0; 3119 bitbuf = 0; 3120 3121 for (;;) { 3122 /* grab next digit as required */ 3123 if (--bitcnt == 0) { 3124 /* if digidx == -1 we are out of digits so break */ 3125 if (digidx == -1) { 3126 break; 3127 } 3128 /* read next digit and reset bitcnt */ 3129 buf = X->dp[digidx--]; 3130 bitcnt = (int)DIGIT_BIT; 3131 } 3132 3133 /* grab the next msb from the exponent */ 3134 y = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1; 3135 buf <<= (mp_digit)1; 3136 3137 /* if the bit is zero and mode == 0 then we ignore it 3138 * These represent the leading zero bits before the first 1 bit 3139 * in the exponent. Technically this opt is not required but it 3140 * does lower the # of trivial squaring/reductions used 3141 */ 3142 if (mode == 0 && y == 0) { 3143 continue; 3144 } 3145 3146 /* if the bit is zero and mode == 1 then we square */ 3147 if (mode == 1 && y == 0) { 3148 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3149 goto LBL_RES; 3150 } 3151 if ((err = redux (&res, P, mp)) != MP_OKAY) { 3152 goto LBL_RES; 3153 } 3154 continue; 3155 } 3156 3157 /* else we add it to the window */ 3158 bitbuf |= (y << (winsize - ++bitcpy)); 3159 mode = 2; 3160 3161 if (bitcpy == winsize) { 3162 /* ok window is filled so square as required and multiply */ 3163 /* square first */ 3164 for (x = 0; x < winsize; x++) { 3165 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3166 goto LBL_RES; 3167 } 3168 if ((err = redux (&res, P, mp)) != MP_OKAY) { 3169 goto LBL_RES; 3170 } 3171 } 3172 3173 /* then multiply */ 3174 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 3175 goto LBL_RES; 3176 } 3177 if ((err = redux (&res, P, mp)) != MP_OKAY) { 3178 goto LBL_RES; 3179 } 3180 3181 /* empty window and reset */ 3182 bitcpy = 0; 3183 bitbuf = 0; 3184 mode = 1; 3185 } 3186 } 3187 3188 /* if bits remain then square/multiply */ 3189 if (mode == 2 && bitcpy > 0) { 3190 /* square then multiply if the bit is set */ 3191 for (x = 0; x < bitcpy; x++) { 3192 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 3193 goto LBL_RES; 3194 } 3195 if ((err = redux (&res, P, mp)) != MP_OKAY) { 3196 goto LBL_RES; 3197 } 3198 3199 /* get next bit of the window */ 3200 bitbuf <<= 1; 3201 if ((bitbuf & (1 << winsize)) != 0) { 3202 /* then multiply */ 3203 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 3204 goto LBL_RES; 3205 } 3206 if ((err = redux (&res, P, mp)) != MP_OKAY) { 3207 goto LBL_RES; 3208 } 3209 } 3210 } 3211 } 3212 3213 if (redmode == 0) { 3214 /* fixup result if Montgomery reduction is used 3215 * recall that any value in a Montgomery system is 3216 * actually multiplied by R mod n. So we have 3217 * to reduce one more time to cancel out the factor 3218 * of R. 3219 */ 3220 if ((err = redux(&res, P, mp)) != MP_OKAY) { 3221 goto LBL_RES; 3222 } 3223 } 3224 3225 /* swap res with Y */ 3226 mp_exch (&res, Y); 3227 err = MP_OKAY; 3228 LBL_RES:mp_clear (&res); 3229 LBL_M: 3230 mp_clear(&M[1]); 3231 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3232 mp_clear (&M[x]); 3233 } 3234 return err; 3235 } 3236 #endif 3237 3238 3239 #ifdef BN_FAST_S_MP_SQR_C 3240 /* the jist of squaring... 3241 * you do like mult except the offset of the tmpx [one that 3242 * starts closer to zero] can't equal the offset of tmpy. 3243 * So basically you set up iy like before then you min it with 3244 * (ty-tx) so that it never happens. You double all those 3245 * you add in the inner loop 3246 3247 After that loop you do the squares and add them in. 3248 */ 3249 3250 static int fast_s_mp_sqr (mp_int * a, mp_int * b) 3251 { 3252 int olduse, res, pa, ix, iz; 3253 mp_digit W[MP_WARRAY], *tmpx; 3254 mp_word W1; 3255 3256 /* grow the destination as required */ 3257 pa = a->used + a->used; 3258 if (b->alloc < pa) { 3259 if ((res = mp_grow (b, pa)) != MP_OKAY) { 3260 return res; 3261 } 3262 } 3263 3264 /* number of output digits to produce */ 3265 W1 = 0; 3266 for (ix = 0; ix < pa; ix++) { 3267 int tx, ty, iy; 3268 mp_word _W; 3269 mp_digit *tmpy; 3270 3271 /* clear counter */ 3272 _W = 0; 3273 3274 /* get offsets into the two bignums */ 3275 ty = MIN(a->used-1, ix); 3276 tx = ix - ty; 3277 3278 /* setup temp aliases */ 3279 tmpx = a->dp + tx; 3280 tmpy = a->dp + ty; 3281 3282 /* this is the number of times the loop will iterrate, essentially 3283 while (tx++ < a->used && ty-- >= 0) { ... } 3284 */ 3285 iy = MIN(a->used-tx, ty+1); 3286 3287 /* now for squaring tx can never equal ty 3288 * we halve the distance since they approach at a rate of 2x 3289 * and we have to round because odd cases need to be executed 3290 */ 3291 iy = MIN(iy, (ty-tx+1)>>1); 3292 3293 /* execute loop */ 3294 for (iz = 0; iz < iy; iz++) { 3295 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3296 } 3297 3298 /* double the inner product and add carry */ 3299 _W = _W + _W + W1; 3300 3301 /* even columns have the square term in them */ 3302 if ((ix&1) == 0) { 3303 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 3304 } 3305 3306 /* store it */ 3307 W[ix] = (mp_digit)(_W & MP_MASK); 3308 3309 /* make next carry */ 3310 W1 = _W >> ((mp_word)DIGIT_BIT); 3311 } 3312 3313 /* setup dest */ 3314 olduse = b->used; 3315 b->used = a->used+a->used; 3316 3317 { 3318 mp_digit *tmpb; 3319 tmpb = b->dp; 3320 for (ix = 0; ix < pa; ix++) { 3321 *tmpb++ = W[ix] & MP_MASK; 3322 } 3323 3324 /* clear unused digits [that existed in the old copy of c] */ 3325 for (; ix < olduse; ix++) { 3326 *tmpb++ = 0; 3327 } 3328 } 3329 mp_clamp (b); 3330 return MP_OKAY; 3331 } 3332 #endif 3333 3334 3335 #ifdef BN_MP_MUL_D_C 3336 /* multiply by a digit */ 3337 static int 3338 mp_mul_d (mp_int * a, mp_digit b, mp_int * c) 3339 { 3340 mp_digit u, *tmpa, *tmpc; 3341 mp_word r; 3342 int ix, res, olduse; 3343 3344 /* make sure c is big enough to hold a*b */ 3345 if (c->alloc < a->used + 1) { 3346 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 3347 return res; 3348 } 3349 } 3350 3351 /* get the original destinations used count */ 3352 olduse = c->used; 3353 3354 /* set the sign */ 3355 c->sign = a->sign; 3356 3357 /* alias for a->dp [source] */ 3358 tmpa = a->dp; 3359 3360 /* alias for c->dp [dest] */ 3361 tmpc = c->dp; 3362 3363 /* zero carry */ 3364 u = 0; 3365 3366 /* compute columns */ 3367 for (ix = 0; ix < a->used; ix++) { 3368 /* compute product and carry sum for this term */ 3369 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 3370 3371 /* mask off higher bits to get a single digit */ 3372 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3373 3374 /* send carry into next iteration */ 3375 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 3376 } 3377 3378 /* store final carry [if any] and increment ix offset */ 3379 *tmpc++ = u; 3380 ++ix; 3381 3382 /* now zero digits above the top */ 3383 while (ix++ < olduse) { 3384 *tmpc++ = 0; 3385 } 3386 3387 /* set used count */ 3388 c->used = a->used + 1; 3389 mp_clamp(c); 3390 3391 return MP_OKAY; 3392 } 3393 #endif 3394