Lines Matching +full:- +full:p1
75 * A field element is encoded as five 64-bit integers, in basis 2^52.
79 * - top limb is less than 2^48 + 2^30
80 * - the other limbs fit on 53 bits each
85 #define MASK48 (BIT(48) - BIT(0))
86 #define MASK52 (BIT(52) - BIT(0))
94 /* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
145 w = a[1] - (s << 44); in f256_partial_reduce()
147 cc = -(w >> 52) & 0xFFF; /* cc < 16 */ in f256_partial_reduce()
148 w = a[2] - cc; in f256_partial_reduce()
151 w = a[3] - cc - (s << 36); in f256_partial_reduce()
155 a[4] = w + (s << 16) - cc; /* a[4] < 2^48 + 2^30 */ in f256_partial_reduce()
170 * We compute d = 2^13*p + a - b; this ensures a positive in f256_sub()
179 * of exact-width types, but this requires the compiler to optimize in f256_sub()
180 * away the writes and reads from RAM), and right-shifting a in f256_sub()
181 * signed negative value is implementation-defined. Therefore, in f256_sub()
185 w = a[0] - b[0] - BIT(13); in f256_sub()
188 cc |= -(cc & BIT(11)); in f256_sub()
189 w = a[1] - b[1] + cc; in f256_sub()
192 cc |= -(cc & BIT(11)); in f256_sub()
193 w = a[2] - b[2] + cc; in f256_sub()
196 cc |= -(cc & BIT(11)); in f256_sub()
197 w = a[3] - b[3] + cc; in f256_sub()
200 cc |= -(cc & BIT(11)); in f256_sub()
201 t[4] = (BIT(61) - BIT(29)) + a[4] - b[4] + cc; in f256_sub()
205 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in f256_sub()
208 * 0 <= t[0] <= 2^52 - 1 in f256_sub()
209 * 0 <= t[1] <= 2^52 - 1 in f256_sub()
210 * 2^5 <= t[2] <= 2^52 + 2^5 - 1 in f256_sub()
211 * 2^49 <= t[3] <= 2^52 + 2^49 - 1 in f256_sub()
212 * 2^59 < t[4] <= 2^61 + 2^60 - 2^29 in f256_sub()
220 w = t[1] - (s << 44); in f256_sub()
221 d[1] = w & MASK52; /* d[1] <= 2^52 - 1 */ in f256_sub()
222 cc = -(w >> 52) & 0xFFF; /* cc <= 48 */ in f256_sub()
223 w = t[2] - cc; in f256_sub()
226 w = t[3] - cc - (s << 36); in f256_sub()
228 d[3] = w + (cc << 52); /* t[3] <= 2^52 + 2^49 - 1 */ in f256_sub()
229 d[4] = (t[4] & MASK48) + (s << 16) - cc; /* d[4] < 2^48 + 2^30 */ in f256_sub()
268 * t <- (t + x*b + f*p) / 2^64 in f256_montymul()
292 - ((unsigned __int128)f << 16); in f256_montymul()
303 w = t[1] - (s << 44); in f256_montymul()
305 cc = -(w >> 52) & 0xFFF; /* cc < 16 */ in f256_montymul()
306 w = t[2] - cc; in f256_montymul()
309 w = t[3] - cc - (s << 36); in f256_montymul()
313 t[4] = w + (s << 16) - cc; /* t[4] < 2^48 + 2^30 */ in f256_montymul()
349 * t <- (t + x*b + f*p) / 2^64 in f256_montymul()
405 w = t[1] - (s << 44); in f256_montymul()
407 cc = -(w >> 52) & 0xFFF; /* cc < 16 */ in f256_montymul()
408 w = t[2] - cc; in f256_montymul()
411 w = t[3] - cc - (s << 36); in f256_montymul()
415 t[4] = w + (s << 16) - cc; /* t[4] < 2^48 + 2^30 */ in f256_montymul()
485 * We compute a^(p-2) mod p. The exponent pattern (from high to in f256_invert()
487 * - 32 bits of value 1 in f256_invert()
488 * - 31 bits of value 0 in f256_invert()
489 * - 1 bit of value 1 in f256_invert()
490 * - 96 bits of value 0 in f256_invert()
491 * - 94 bits of value 1 in f256_invert()
492 * - 1 bit of value 0 in f256_invert()
493 * - 1 bit of value 1 in f256_invert()
494 * To speed up the square-and-multiply algorithm, we precompute in f256_invert()
495 * a^(2^31-1). in f256_invert()
508 for (i = 224; i >= 0; i --) { in f256_invert()
550 * We compute t = r + (2^256 - p) = r + 2^224 - 2^192 - 2^96 + 1. in f256_final_reduce()
552 * want to return r - p = t - 2^256. in f256_final_reduce()
577 w = t[1] - BIT(44); in f256_final_reduce()
580 w = t[2] - cc; in f256_final_reduce()
583 w = t[3] - BIT(36) - cc; in f256_final_reduce()
586 t[4] -= cc; in f256_final_reduce()
593 cc = -(t[4] >> 48); in f256_final_reduce()
603 * - In affine coordinates, the point-at-infinity cannot be encoded.
604 * - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
605 * if Z = 0 then this is the point-at-infinity.
685 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3. in point_decode()
706 memcpy(P->x, x, sizeof x); in point_decode()
707 memcpy(P->y, y, sizeof y); in point_decode()
708 memcpy(P->z, F256_R, sizeof F256_R); in point_decode()
714 * - The point is converted back to affine coordinates.
715 * - Final reduction is performed.
716 * - The point is encoded into the provided buffer.
718 * If the point is the point-at-infinity, all operations are performed,
728 f256_invert(t2, P->z); in point_encode()
733 f256_montymul(t1, P->x, t1); in point_encode()
734 f256_montymul(t2, P->y, t2); in point_encode()
748 /* Return success if and only if P->z != 0. */ in point_encode()
749 z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4]; in point_encode()
755 * Note: if the source point is the point-at-infinity, then the result is
756 * still the point-at-infinity, which is correct. Moreover, if the three
766 * m = 3*(x + z^2)*(x - z^2) in p256_double()
767 * x' = m^2 - 2*s in p256_double()
768 * y' = m*(s - x') - 8*y^4 in p256_double()
773 * - If y = 0 then z' = 0. But there is no such point in P-256 in p256_double()
775 * - If z = 0 then z' = 0. in p256_double()
782 f256_montysquare(t1, P->z); in p256_double()
785 * Compute x-z^2 in t2 and x+z^2 in t1. in p256_double()
787 f256_add(t2, P->x, t1); in p256_double()
788 f256_sub(t1, P->x, t1); in p256_double()
791 * Compute 3*(x+z^2)*(x-z^2) in t1. in p256_double()
800 f256_montysquare(t3, P->y); in p256_double()
802 f256_montymul(t2, P->x, t3); in p256_double()
806 * Compute x' = m^2 - 2*s. in p256_double()
808 f256_montysquare(P->x, t1); in p256_double()
809 f256_sub(P->x, P->x, t2); in p256_double()
810 f256_sub(P->x, P->x, t2); in p256_double()
815 f256_montymul(t4, P->y, P->z); in p256_double()
816 f256_add(P->z, t4, t4); in p256_double()
817 f256_partial_reduce(P->z); in p256_double()
820 * Compute y' = m*(s - x') - 8*y^4. Note that we already have in p256_double()
823 f256_sub(t2, t2, P->x); in p256_double()
824 f256_montymul(P->y, t1, t2); in p256_double()
827 f256_sub(P->y, P->y, t4); in p256_double()
831 * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
834 * - If P1 == 0 but P2 != 0
835 * - If P1 != 0 but P2 == 0
836 * - If P1 == P2
838 * In all three cases, P1 is set to the point at infinity.
842 * - P1 and P2 have the same Y coordinate.
843 * - P1 == 0 and P2 == 0.
844 * - The Y coordinate of one of the points is 0 and the other point is
849 * curve P-256.
851 * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
854 * - If the result is not the point at infinity, then it is correct.
855 * - Otherwise, if the returned value is 1, then this is a case of
856 * P1+P2 == 0, so the result is indeed the point at infinity.
857 * - Otherwise, P1 == P2, so a "double" operation should have been
861 * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
864 p256_add(p256_jacobian *P1, const p256_jacobian *P2) in p256_add() argument
873 * h = u2 - u1 in p256_add()
874 * r = s2 - s1 in p256_add()
875 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add()
876 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add()
885 f256_montysquare(t3, P2->z); in p256_add()
886 f256_montymul(t1, P1->x, t3); in p256_add()
887 f256_montymul(t4, P2->z, t3); in p256_add()
888 f256_montymul(t3, P1->y, t4); in p256_add()
893 f256_montysquare(t4, P1->z); in p256_add()
894 f256_montymul(t2, P2->x, t4); in p256_add()
895 f256_montymul(t5, P1->z, t4); in p256_add()
896 f256_montymul(t4, P2->y, t5); in p256_add()
899 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add()
908 ret = (ret | -ret) >> 31; in p256_add()
918 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add()
920 f256_montysquare(P1->x, t4); in p256_add()
921 f256_sub(P1->x, P1->x, t5); in p256_add()
922 f256_sub(P1->x, P1->x, t6); in p256_add()
923 f256_sub(P1->x, P1->x, t6); in p256_add()
926 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add()
928 f256_sub(t6, t6, P1->x); in p256_add()
929 f256_montymul(P1->y, t4, t6); in p256_add()
931 f256_sub(P1->y, P1->y, t1); in p256_add()
936 f256_montymul(t1, P1->z, P2->z); in p256_add()
937 f256_montymul(P1->z, t1, t2); in p256_add()
943 * Point addition (mixed coordinates): P1 is replaced with P1+P2.
944 * This is a specialised function for the case when P2 is a non-zero point
949 * - If P1 == 0
950 * - If P1 == P2
952 * In both cases, P1 is set to the point at infinity.
956 * - P1 and P2 have the same Y (affine) coordinate.
957 * - The Y coordinate of P2 is 0 and P1 is the point at infinity.
961 * curve P-256.
963 * Therefore, assuming that P1 != 0 on input, then the caller
966 * - If the result is not the point at infinity, then it is correct.
967 * - Otherwise, if the returned value is 1, then this is a case of
968 * P1+P2 == 0, so the result is indeed the point at infinity.
969 * - Otherwise, P1 == P2, so a "double" operation should have been
976 p256_add_mixed(p256_jacobian *P1, const p256_affine *P2) in p256_add_mixed() argument
985 * h = u2 - u1 in p256_add_mixed()
986 * r = s2 - s1 in p256_add_mixed()
987 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add_mixed()
988 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add_mixed()
997 memcpy(t1, P1->x, sizeof t1); in p256_add_mixed()
998 memcpy(t3, P1->y, sizeof t3); in p256_add_mixed()
1003 f256_montysquare(t4, P1->z); in p256_add_mixed()
1004 f256_montymul(t2, P2->x, t4); in p256_add_mixed()
1005 f256_montymul(t5, P1->z, t4); in p256_add_mixed()
1006 f256_montymul(t4, P2->y, t5); in p256_add_mixed()
1009 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add_mixed()
1018 ret = (ret | -ret) >> 31; in p256_add_mixed()
1028 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add_mixed()
1030 f256_montysquare(P1->x, t4); in p256_add_mixed()
1031 f256_sub(P1->x, P1->x, t5); in p256_add_mixed()
1032 f256_sub(P1->x, P1->x, t6); in p256_add_mixed()
1033 f256_sub(P1->x, P1->x, t6); in p256_add_mixed()
1036 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add_mixed()
1038 f256_sub(t6, t6, P1->x); in p256_add_mixed()
1039 f256_montymul(P1->y, t4, t6); in p256_add_mixed()
1041 f256_sub(P1->y, P1->y, t1); in p256_add_mixed()
1046 f256_montymul(P1->z, P1->z, t2); in p256_add_mixed()
1054 * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
1055 * This is a specialised function for the case when P2 is a non-zero point
1061 p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
1070 * h = u2 - u1
1071 * r = s2 - s1
1072 * x3 = r^2 - h^3 - 2 * u1 * h^2
1073 * y3 = r * (u1 * h^2 - x3) - s1 * h^3
1078 * - If P1 is the point-at-infinity (z1 = 0), then z3 is
1081 * - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
1084 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
1085 * we correctly get z3 = 0 (the point-at-infinity).
1087 * To fix the case P1 = 0, we perform at the end a copy of P2
1088 * over P1, conditional to z1 = 0.
1090 * For P1 = P2: in that case, both h and r are set to 0, and
1092 * occurrence to make a mask which will be all-one if P1 = P2,
1093 * or all-zero otherwise; then we can compute the double of P2
1100 * m = 3*(x2 + 1)*(x2 - 1)
1101 * x' = m^2 - 2*s
1102 * y' = m*(s - x') - 8*y2^4
1112 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
1114 zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4];
1115 zz = ((zz | -zz) >> 63) - (uint64_t)1;
1120 memcpy(t1, P1->x, sizeof t1);
1121 memcpy(t3, P1->y, sizeof t3);
1126 f256_montysquare(t4, P1->z);
1127 f256_montymul(t2, P2->x, t4);
1128 f256_montymul(t5, P1->z, t4);
1129 f256_montymul(t4, P2->y, t5);
1132 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
1139 * If both h = 0 and r = 0, then P1 = P2, and we want to set
1140 * the mask tt to -1; otherwise, the mask will be 0.
1146 tt = ((tt | -tt) >> 63) - (uint64_t)1;
1156 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
1158 f256_montysquare(P1->x, t4);
1159 f256_sub(P1->x, P1->x, t5);
1160 f256_sub(P1->x, P1->x, t6);
1161 f256_sub(P1->x, P1->x, t6);
1164 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
1166 f256_sub(t6, t6, P1->x);
1167 f256_montymul(P1->y, t4, t6);
1169 f256_sub(P1->y, P1->y, t1);
1174 f256_montymul(P1->z, P1->z, t2);
1177 * The "double" result, in case P1 = P2.
1183 f256_add(t1, P2->y, P2->y);
1189 f256_montysquare(t2, P2->y);
1192 f256_montymul(t3, P2->x, t3);
1195 * Compute m = 3*(x2^2 - 1) (in t4).
1197 f256_montysquare(t4, P2->x);
1203 * Compute x' = m^2 - 2*s (in t5).
1210 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
1223 P1->x[i] |= tt & t5[i];
1224 P1->y[i] |= tt & t6[i];
1225 P1->z[i] |= tt & t1[i];
1229 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
1234 P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
1235 P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
1236 P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
1246 * - All provided points are valid points on the curve.
1247 * - Multiplier is non-zero, and smaller than the curve order.
1248 * - Everything is in Montgomery representation.
1259 while (klen -- > 0) { in point_mul_inner()
1284 * bits are non-zero. in point_mul_inner()
1288 m = -(uint64_t)EQ(bits, n + 1); in point_mul_inner()
1305 * If qz is still 1, then Q was all-zeros, and this in point_mul_inner()
1308 m = -(uint64_t)(bnz & qz); in point_mul_inner()
1347 * - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2 in window_to_affine()
1348 * - on input (z_1,z_2,... z_n): in window_to_affine()
1349 * recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1 in window_to_affine()
1350 * recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2 in window_to_affine()
1351 * multiply elements of r1 by m2 -> s1 in window_to_affine()
1352 * multiply elements of r2 by m1 -> s2 in window_to_affine()
1359 * - Depth 1: in window_to_affine()
1368 * - Depth 2: in window_to_affine()
1369 * z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12 in window_to_affine()
1371 * z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56 in window_to_affine()
1373 * z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A in window_to_affine()
1376 * - Depth 3: in window_to_affine()
1377 * z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678 in window_to_affine()
1378 * z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234 in window_to_affine()
1380 * z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE in window_to_affine()
1381 * zD <- zD*z9ABC, zE*z9ABC in window_to_affine()
1384 * - Depth 4: in window_to_affine()
1408 memcpy(z[num >> 1], jac[num - 1].z, sizeof zt); in window_to_affine()
1409 memcpy(jac[num - 1].z, F256_R, sizeof F256_R); in window_to_affine()
1423 n = (num + s - 1) >> k; in window_to_affine()
1448 * - Source point is a valid curve point.
1449 * - Source point is not the point-at-infinity.
1450 * - Integer is not 0, and is lower than the curve order.
1452 * (but the process is still constant-time).
1468 window.jac[i - 1] = window.jac[(i >> 1) - 1]; in p256_mul()
1470 p256_double(&window.jac[i - 1]); in p256_mul()
1472 p256_add(&window.jac[i - 1], &window.jac[i >> 1]); in p256_mul()
1590 * - Integer is not 0, and is lower than the curve order.
1592 * (but the process is still constant-time).
1602 * - klen <= 32
1603 * - k != 0
1604 * - k is lower than the curve order
1607 * Constant-time behaviour: only klen may be observable.
1626 c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]); in check_scalar()
1629 c = -1; in check_scalar()
1671 * window of u*P+v*Q points, to merge the two doubling-ladders in api_muladd()
1674 * - During the computation, we may hit the point-at-infinity. in api_muladd()
1679 * - A 4-bit window would be too large, since it would involve in api_muladd()
1680 * 16*16-1 = 255 points. For the same window size as in the in api_muladd()
1682 * to 2 bits, and thus perform twice as many non-doubling in api_muladd()
1685 * - The window may itself contain the point-at-infinity, and in api_muladd()