Lines Matching +full:p +full:- +full:256
29 * that right-shifting a signed negative integer copies the sign bit
30 * (arithmetic right-shift). This is "implementation-defined behaviour",
39 | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
41 | ((-((uint64_t)(x) >> 63)) << (64 - (n))))
48 * Convert an integer from unsigned big-endian encoding to a sequence of
49 * 30-bit words in little-endian order. The final "partial" word is
60 while (len -- > 0) { in be8_to_le30()
69 acc = b >> (30 - acc_len); in be8_to_le30()
70 acc_len -= 22; in be8_to_le30()
77 * Convert an integer (30-bit words, little-endian) to unsigned
78 * big-endian encoding. The total encoding length is provided; all
89 while (len -- > 0) { in le30_to_be8()
95 acc = w >> (8 - acc_len); in le30_to_be8()
100 acc_len -= 8; in le30_to_be8()
107 * nine 30-bit words, for values up to 2^270-1. Result is encoded over
117 * 10376293531797946367 = 9 * (2^30-1)^2 + 9663676406 in mul9()
120 * Thus, adding together 9 products of 30-bit integers, with in mul9()
225 * Square a 270-bit integer, represented as an array of nine 30-bit words.
296 * Base field modulus for P-256.
305 * The 'b' curve equation coefficient for P-256.
331 d[3] -= w << 6; in add_f256()
332 d[6] -= w << 12; in add_f256()
353 * We really compute a - b + 2*p to make sure that the result is in sub_f256()
356 w = a[0] - b[0] - 0x00002; in sub_f256()
358 w = a[1] - b[1] + ARSH(w, 30); in sub_f256()
360 w = a[2] - b[2] + ARSH(w, 30); in sub_f256()
362 w = a[3] - b[3] + ARSH(w, 30) + 0x00080; in sub_f256()
364 w = a[4] - b[4] + ARSH(w, 30); in sub_f256()
366 w = a[5] - b[5] + ARSH(w, 30); in sub_f256()
368 w = a[6] - b[6] + ARSH(w, 30) + 0x02000; in sub_f256()
370 w = a[7] - b[7] + ARSH(w, 30) - 0x08000; in sub_f256()
372 w = a[8] - b[8] + ARSH(w, 30) + 0x20000; in sub_f256()
376 d[3] -= w << 6; in sub_f256()
377 d[6] -= w << 12; in sub_f256()
407 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in mul_f256()
409 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in mul_f256()
411 * For a word x at bit offset n (n >= 256), we have: in mul_f256()
412 * x*2^n = x*2^(n-32) - x*2^(n-64) in mul_f256()
413 * - x*2^(n - 160) + x*2^(n-256) mod p in mul_f256()
418 * We use 64-bit intermediate words to allow for carries to in mul_f256()
425 for (i = 17; i >= 9; i --) { in mul_f256()
429 s[i - 1] += ARSHW(y, 2); in mul_f256()
430 s[i - 2] += (y << 28) & 0x3FFFFFFF; in mul_f256()
431 s[i - 2] -= ARSHW(y, 4); in mul_f256()
432 s[i - 3] -= (y << 26) & 0x3FFFFFFF; in mul_f256()
433 s[i - 5] -= ARSHW(y, 10); in mul_f256()
434 s[i - 6] -= (y << 20) & 0x3FFFFFFF; in mul_f256()
435 s[i - 8] += ARSHW(y, 16); in mul_f256()
436 s[i - 9] += (y << 14) & 0x3FFFFFFF; in mul_f256()
460 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The in mul_f256()
462 * able to switch to 32-bit operations. in mul_f256()
468 * One extra round of reduction, for cc*2^256, which means in mul_f256()
469 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) in mul_f256()
479 d[3] -= z << 6; in mul_f256()
480 d[6] -= (z << 12) & 0x3FFFFFFF; in mul_f256()
481 d[7] -= ARSH(z, 18); in mul_f256()
485 d[0] -= c; in mul_f256()
488 d[7] -= c << 14; in mul_f256()
519 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in square_f256()
521 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in square_f256()
523 * For a word x at bit offset n (n >= 256), we have: in square_f256()
524 * x*2^n = x*2^(n-32) - x*2^(n-64) in square_f256()
525 * - x*2^(n - 160) + x*2^(n-256) mod p in square_f256()
530 * We use 64-bit intermediate words to allow for carries to in square_f256()
537 for (i = 17; i >= 9; i --) { in square_f256()
541 s[i - 1] += ARSHW(y, 2); in square_f256()
542 s[i - 2] += (y << 28) & 0x3FFFFFFF; in square_f256()
543 s[i - 2] -= ARSHW(y, 4); in square_f256()
544 s[i - 3] -= (y << 26) & 0x3FFFFFFF; in square_f256()
545 s[i - 5] -= ARSHW(y, 10); in square_f256()
546 s[i - 6] -= (y << 20) & 0x3FFFFFFF; in square_f256()
547 s[i - 8] += ARSHW(y, 16); in square_f256()
548 s[i - 9] += (y << 14) & 0x3FFFFFFF; in square_f256()
572 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The in square_f256()
574 * able to switch to 32-bit operations. in square_f256()
580 * One extra round of reduction, for cc*2^256, which means in square_f256()
581 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) in square_f256()
591 d[3] -= z << 6; in square_f256()
592 d[6] -= (z << 12) & 0x3FFFFFFF; in square_f256()
593 d[7] -= ARSH(z, 18); in square_f256()
597 d[0] -= c; in square_f256()
600 d[7] -= c << 14; in square_f256()
612 * Perform a "final reduction" in field F256 (field for curve P-256).
629 w = d[i] - F256[i] - cc; in reduce_final_f256()
639 * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
646 * Coordinates are represented in arrays of 32-bit integers, each holding
658 * - If the point is the point at infinity, then all three coordinates
660 * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
665 p256_to_affine(p256_jacobian *P) in p256_to_affine() argument
672 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is in p256_to_affine()
673 * p-2. Exponent bit pattern (from high to low) is: in p256_to_affine()
674 * - 32 bits of value 1 in p256_to_affine()
675 * - 31 bits of value 0 in p256_to_affine()
676 * - 1 bit of value 1 in p256_to_affine()
677 * - 96 bits of value 0 in p256_to_affine()
678 * - 94 bits of value 1 in p256_to_affine()
679 * - 1 bit of value 0 in p256_to_affine()
680 * - 1 bit of value 1 in p256_to_affine()
681 * Thus, we precompute z^(2^31-1) to speed things up. in p256_to_affine()
689 * A simple square-and-multiply for z^(2^31-1). We could save about in p256_to_affine()
693 memcpy(t1, P->z, sizeof P->z); in p256_to_affine()
696 mul_f256(t1, t1, P->z); in p256_to_affine()
700 * Square-and-multiply. Apart from the squarings, we have a few in p256_to_affine()
704 memcpy(t2, P->z, sizeof P->z); in p256_to_affine()
705 for (i = 1; i < 256; i ++) { in p256_to_affine()
717 mul_f256(t2, t2, P->z); in p256_to_affine()
726 mul_f256(P->x, t1, P->x); in p256_to_affine()
728 mul_f256(P->y, t1, P->y); in p256_to_affine()
729 reduce_final_f256(P->x); in p256_to_affine()
730 reduce_final_f256(P->y); in p256_to_affine()
736 mul_f256(P->z, P->z, t2); in p256_to_affine()
737 reduce_final_f256(P->z); in p256_to_affine()
741 * Double a point in P-256. This function works for all valid points,
751 * m = 3*(x + z^2)*(x - z^2) in p256_double()
752 * x' = m^2 - 2*s in p256_double()
753 * y' = m*(s - x') - 8*y^4 in p256_double()
758 * - If y = 0 then z' = 0. But there is no such point in P-256 in p256_double()
760 * - If z = 0 then z' = 0. in p256_double()
767 square_f256(t1, Q->z); in p256_double()
770 * Compute x-z^2 in t2 and x+z^2 in t1. in p256_double()
772 add_f256(t2, Q->x, t1); in p256_double()
773 sub_f256(t1, Q->x, t1); in p256_double()
776 * Compute 3*(x+z^2)*(x-z^2) in t1. in p256_double()
785 square_f256(t3, Q->y); in p256_double()
787 mul_f256(t2, Q->x, t3); in p256_double()
791 * Compute x' = m^2 - 2*s. in p256_double()
793 square_f256(Q->x, t1); in p256_double()
794 sub_f256(Q->x, Q->x, t2); in p256_double()
795 sub_f256(Q->x, Q->x, t2); in p256_double()
800 mul_f256(t4, Q->y, Q->z); in p256_double()
801 add_f256(Q->z, t4, t4); in p256_double()
804 * Compute y' = m*(s - x') - 8*y^4. Note that we already have in p256_double()
807 sub_f256(t2, t2, Q->x); in p256_double()
808 mul_f256(Q->y, t1, t2); in p256_double()
811 sub_f256(Q->y, Q->y, t4); in p256_double()
819 * - If P1 == 0 but P2 != 0
820 * - If P1 != 0 but P2 == 0
821 * - If P1 == P2
827 * - P1 and P2 have the same Y coordinate
828 * - P1 == 0 and P2 == 0
829 * - The Y coordinate of one of the points is 0 and the other point is
834 * curve P-256.
839 * - If the result is not the point at infinity, then it is correct.
840 * - Otherwise, if the returned value is 1, then this is a case of
842 * - Otherwise, P1 == P2, so a "double" operation should have been
855 * h = u2 - u1 in p256_add()
856 * r = s2 - s1 in p256_add()
857 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add()
858 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add()
868 square_f256(t3, P2->z); in p256_add()
869 mul_f256(t1, P1->x, t3); in p256_add()
870 mul_f256(t4, P2->z, t3); in p256_add()
871 mul_f256(t3, P1->y, t4); in p256_add()
876 square_f256(t4, P1->z); in p256_add()
877 mul_f256(t2, P2->x, t4); in p256_add()
878 mul_f256(t5, P1->z, t4); in p256_add()
879 mul_f256(t4, P2->y, t5); in p256_add()
882 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add()
893 ret = (ret | -ret) >> 31; in p256_add()
903 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add()
905 square_f256(P1->x, t4); in p256_add()
906 sub_f256(P1->x, P1->x, t5); in p256_add()
907 sub_f256(P1->x, P1->x, t6); in p256_add()
908 sub_f256(P1->x, P1->x, t6); in p256_add()
911 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add()
913 sub_f256(t6, t6, P1->x); in p256_add()
914 mul_f256(P1->y, t4, t6); in p256_add()
916 sub_f256(P1->y, P1->y, t1); in p256_add()
921 mul_f256(t1, P1->z, P2->z); in p256_add()
922 mul_f256(P1->z, t1, t2); in p256_add()
929 * case when P2 is a non-zero point in affine coordinate.
933 * - If P1 == 0
934 * - If P1 == P2
940 * - P1 and P2 have the same Y coordinate
941 * - The Y coordinate of P2 is 0 and P1 is the point at infinity.
945 * curve P-256.
950 * - If the result is not the point at infinity, then it is correct.
951 * - Otherwise, if the returned value is 1, then this is a case of
953 * - Otherwise, P1 == P2, so a "double" operation should have been
966 * h = u2 - u1 in p256_add_mixed()
967 * r = s2 - s1 in p256_add_mixed()
968 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add_mixed()
969 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add_mixed()
979 memcpy(t1, P1->x, sizeof t1); in p256_add_mixed()
980 memcpy(t3, P1->y, sizeof t3); in p256_add_mixed()
985 square_f256(t4, P1->z); in p256_add_mixed()
986 mul_f256(t2, P2->x, t4); in p256_add_mixed()
987 mul_f256(t5, P1->z, t4); in p256_add_mixed()
988 mul_f256(t4, P2->y, t5); in p256_add_mixed()
991 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add_mixed()
1002 ret = (ret | -ret) >> 31; in p256_add_mixed()
1012 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add_mixed()
1014 square_f256(P1->x, t4); in p256_add_mixed()
1015 sub_f256(P1->x, P1->x, t5); in p256_add_mixed()
1016 sub_f256(P1->x, P1->x, t6); in p256_add_mixed()
1017 sub_f256(P1->x, P1->x, t6); in p256_add_mixed()
1020 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add_mixed()
1022 sub_f256(t6, t6, P1->x); in p256_add_mixed()
1023 mul_f256(P1->y, t4, t6); in p256_add_mixed()
1025 sub_f256(P1->y, P1->y, t1); in p256_add_mixed()
1030 mul_f256(P1->z, P1->z, t2); in p256_add_mixed()
1036 * Decode a P-256 point. This function does not support the point at
1040 p256_decode(p256_jacobian *P, const void *src, size_t len) in p256_decode() argument
1088 memcpy(P->x, tx, sizeof tx); in p256_decode()
1089 memcpy(P->y, ty, sizeof ty); in p256_decode()
1090 memset(P->z, 0, sizeof P->z); in p256_decode()
1091 P->z[0] = 1; in p256_decode()
1100 p256_encode(void *dst, const p256_jacobian *P) in p256_encode() argument
1106 le30_to_be8(buf + 1, 32, P->x); in p256_encode()
1107 le30_to_be8(buf + 33, 32, P->y); in p256_encode()
1116 p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen) in p256_mul() argument
1122 * We use a 2-bit window to handle multiplier bits by pairs. in p256_mul()
1131 P2 = *P; in p256_mul()
1133 P3 = *P; in p256_mul()
1141 while (xlen -- > 0) { in p256_mul()
1144 for (k = 6; k >= 0; k -= 2) { in p256_mul()
1150 T = *P; in p256_mul()
1163 *P = Q; in p256_mul()
1169 * the point are encoded as 9 words of 30 bits each (little-endian
1266 * Lookup one of the Gwin[] values, by index. This is constant-time.
1279 m = -EQ(idx, k + 1); in lookup_Gwin()
1284 memcpy(T->x, &xy[0], sizeof T->x); in lookup_Gwin()
1285 memcpy(T->y, &xy[9], sizeof T->y); in lookup_Gwin()
1286 memset(T->z, 0, sizeof T->z); in lookup_Gwin()
1287 T->z[0] = 1; in lookup_Gwin()
1291 * Multiply the generator by an integer. The integer is assumed non-zero
1295 p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen) in p256_mulgen() argument
1301 * We use a 4-bit window to handle multiplier bits by groups in p256_mulgen()
1303 * points in affine coordinates; we use a constant-time lookup. in p256_mulgen()
1310 while (xlen -- > 0) { in p256_mulgen()
1335 *P = Q; in p256_mulgen()
1384 p256_jacobian P; in api_mul() local
1390 r = p256_decode(&P, G, Glen); in api_mul()
1391 p256_mul(&P, x, xlen); in api_mul()
1392 p256_to_affine(&P); in api_mul()
1393 p256_encode(G, &P); in api_mul()
1401 p256_jacobian P; in api_mulgen() local
1404 p256_mulgen(&P, x, xlen); in api_mulgen()
1405 p256_to_affine(&P); in api_mulgen()
1406 p256_encode(R, &P); in api_mulgen()
1415 p256_jacobian P, Q; in api_muladd() local
1423 r = p256_decode(&P, A, len); in api_muladd()
1424 p256_mul(&P, x, xlen); in api_muladd()
1435 t = p256_add(&P, &Q); in api_muladd()
1436 reduce_final_f256(P.z); in api_muladd()
1439 z |= P.z[i]; in api_muladd()
1445 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we in api_muladd()
1448 * z = 0, t = 0 return P (normal addition) in api_muladd()
1449 * z = 0, t = 1 return P (normal addition) in api_muladd()
1451 * z = 1, t = 1 report an error (P+Q = 0) in api_muladd()
1453 CCOPY(z & ~t, &P, &Q, sizeof Q); in api_muladd()
1454 p256_to_affine(&P); in api_muladd()
1455 p256_encode(A, &P); in api_muladd()