ec_p256_m62.c - OpenGrok cross reference for /freebsd/contrib/bearssl/src/ec/ec_p256

Lines Matching +full:- +full:p1
75  * A field element is encoded as five 64-bit integers, in basis 2^52.
79  *   - top limb is less than 2^48 + 2^30
80  *   - the other limbs fit on 53 bits each
85 #define MASK48   (BIT(48) - BIT(0))
86 #define MASK52   (BIT(52) - BIT(0))
94 /* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
145 	w = a[1] - (s << 44);  in f256_partial_reduce()
147 	cc = -(w >> 52) & 0xFFF;    /* cc < 16 */  in f256_partial_reduce()
148 	w = a[2] - cc;  in f256_partial_reduce()
151 	w = a[3] - cc - (s << 36);  in f256_partial_reduce()
155 	a[4] = w + (s << 16) - cc;  /* a[4] < 2^48 + 2^30 */  in f256_partial_reduce()
170 	 * We compute d = 2^13*p + a - b; this ensures a positive  in f256_sub()
179 	 * of exact-width types, but this requires the compiler to optimize  in f256_sub()
180 	 * away the writes and reads from RAM), and right-shifting a  in f256_sub()
181 	 * signed negative value is implementation-defined. Therefore,  in f256_sub()
185 	w = a[0] - b[0] - BIT(13);  in f256_sub()
188 	cc |= -(cc & BIT(11));  in f256_sub()
189 	w = a[1] - b[1] + cc;  in f256_sub()
192 	cc |= -(cc & BIT(11));  in f256_sub()
193 	w = a[2] - b[2] + cc;  in f256_sub()
196 	cc |= -(cc & BIT(11));  in f256_sub()
197 	w = a[3] - b[3] + cc;  in f256_sub()
200 	cc |= -(cc & BIT(11));  in f256_sub()
201 	t[4] = (BIT(61) - BIT(29)) + a[4] - b[4] + cc;  in f256_sub()
205 	 *  2^256 = 2^224 - 2^192 - 2^96 + 1 mod p  in f256_sub()
208 	 *    0 <= t[0] <= 2^52 - 1  in f256_sub()
209 	 *    0 <= t[1] <= 2^52 - 1  in f256_sub()
210 	 *    2^5 <= t[2] <= 2^52 + 2^5 - 1  in f256_sub()
211 	 *    2^49 <= t[3] <= 2^52 + 2^49 - 1  in f256_sub()
212 	 *    2^59 < t[4] <= 2^61 + 2^60 - 2^29  in f256_sub()
220 	w = t[1] - (s << 44);  in f256_sub()
221 	d[1] = w & MASK52;           /* d[1] <= 2^52 - 1 */  in f256_sub()
222 	cc = -(w >> 52) & 0xFFF;     /* cc <= 48 */  in f256_sub()
223 	w = t[2] - cc;  in f256_sub()
226 	w = t[3] - cc - (s << 36);  in f256_sub()
228 	d[3] = w + (cc << 52);       /* t[3] <= 2^52 + 2^49 - 1 */  in f256_sub()
229 	d[4] = (t[4] & MASK48) + (s << 16) - cc;  /* d[4] < 2^48 + 2^30 */  in f256_sub()
268 		 *   t <- (t + x*b + f*p) / 2^64  in f256_montymul()
292 			- ((unsigned __int128)f << 16);  in f256_montymul()
303 		w = t[1] - (s << 44);  in f256_montymul()
305 		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */  in f256_montymul()
306 		w = t[2] - cc;  in f256_montymul()
309 		w = t[3] - cc - (s << 36);  in f256_montymul()
313 		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */  in f256_montymul()
349 		 *   t <- (t + x*b + f*p) / 2^64  in f256_montymul()
405 		w = t[1] - (s << 44);  in f256_montymul()
407 		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */  in f256_montymul()
408 		w = t[2] - cc;  in f256_montymul()
411 		w = t[3] - cc - (s << 36);  in f256_montymul()
415 		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */  in f256_montymul()
485 	 * We compute a^(p-2) mod p. The exponent pattern (from high to  in f256_invert()
487 	 *  - 32 bits of value 1  in f256_invert()
488 	 *  - 31 bits of value 0  in f256_invert()
489 	 *  - 1 bit of value 1  in f256_invert()
490 	 *  - 96 bits of value 0  in f256_invert()
491 	 *  - 94 bits of value 1  in f256_invert()
492 	 *  - 1 bit of value 0  in f256_invert()
493 	 *  - 1 bit of value 1  in f256_invert()
494 	 * To speed up the square-and-multiply algorithm, we precompute  in f256_invert()
495 	 * a^(2^31-1).  in f256_invert()
508 	for (i = 224; i >= 0; i --) {  in f256_invert()
550 	 * We compute t = r + (2^256 - p) = r + 2^224 - 2^192 - 2^96 + 1.  in f256_final_reduce()
552 	 * want to return r - p = t - 2^256.  in f256_final_reduce()
577 	w = t[1] - BIT(44);  in f256_final_reduce()
580 	w = t[2] - cc;  in f256_final_reduce()
583 	w = t[3] - BIT(36) - cc;  in f256_final_reduce()
586 	t[4] -= cc;  in f256_final_reduce()
593 	cc = -(t[4] >> 48);  in f256_final_reduce()
603  *  - In affine coordinates, the point-at-infinity cannot be encoded.
604  *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
605  *    if Z = 0 then this is the point-at-infinity.
685 	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.  in point_decode()
706 	memcpy(P->x, x, sizeof x);  in point_decode()
707 	memcpy(P->y, y, sizeof y);  in point_decode()
708 	memcpy(P->z, F256_R, sizeof F256_R);  in point_decode()
714  *  - The point is converted back to affine coordinates.
715  *  - Final reduction is performed.
716  *  - The point is encoded into the provided buffer.
718  * If the point is the point-at-infinity, all operations are performed,
728 	f256_invert(t2, P->z);  in point_encode()
733 	f256_montymul(t1, P->x, t1);  in point_encode()
734 	f256_montymul(t2, P->y, t2);  in point_encode()
748 	/* Return success if and only if P->z != 0. */  in point_encode()
749 	z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4];  in point_encode()
755  * Note: if the source point is the point-at-infinity, then the result is
756  * still the point-at-infinity, which is correct. Moreover, if the three
766 	 *   m = 3*(x + z^2)*(x - z^2)  in p256_double()
767 	 *   x' = m^2 - 2*s  in p256_double()
768 	 *   y' = m*(s - x') - 8*y^4  in p256_double()
773 	 *   - If y = 0 then z' = 0. But there is no such point in P-256  in p256_double()
775 	 *   - If z = 0 then z' = 0.  in p256_double()
782 	f256_montysquare(t1, P->z);  in p256_double()
785 	 * Compute x-z^2 in t2 and x+z^2 in t1.  in p256_double()
787 	f256_add(t2, P->x, t1);  in p256_double()
788 	f256_sub(t1, P->x, t1);  in p256_double()
791 	 * Compute 3*(x+z^2)*(x-z^2) in t1.  in p256_double()
800 	f256_montysquare(t3, P->y);  in p256_double()
802 	f256_montymul(t2, P->x, t3);  in p256_double()
806 	 * Compute x' = m^2 - 2*s.  in p256_double()
808 	f256_montysquare(P->x, t1);  in p256_double()
809 	f256_sub(P->x, P->x, t2);  in p256_double()
810 	f256_sub(P->x, P->x, t2);  in p256_double()
815 	f256_montymul(t4, P->y, P->z);  in p256_double()
816 	f256_add(P->z, t4, t4);  in p256_double()
817 	f256_partial_reduce(P->z);  in p256_double()
820 	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have  in p256_double()
823 	f256_sub(t2, t2, P->x);  in p256_double()
824 	f256_montymul(P->y, t1, t2);  in p256_double()
827 	f256_sub(P->y, P->y, t4);  in p256_double()
831  * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
834  *   - If P1 == 0 but P2 != 0
835  *   - If P1 != 0 but P2 == 0
836  *   - If P1 == P2
838  * In all three cases, P1 is set to the point at infinity.
842  *   - P1 and P2 have the same Y coordinate.
843  *   - P1 == 0 and P2 == 0.
844  *   - The Y coordinate of one of the points is 0 and the other point is
849  * curve P-256.
851  * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
854  *   - If the result is not the point at infinity, then it is correct.
855  *   - Otherwise, if the returned value is 1, then this is a case of
856  *     P1+P2 == 0, so the result is indeed the point at infinity.
857  *   - Otherwise, P1 == P2, so a "double" operation should have been
861  * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
864 p256_add(p256_jacobian *P1, const p256_jacobian *P2)  in p256_add()  argument
873 	 *   h = u2 - u1  in p256_add()
874 	 *   r = s2 - s1  in p256_add()
875 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add()
876 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add()
885 	f256_montysquare(t3, P2->z);  in p256_add()
886 	f256_montymul(t1, P1->x, t3);  in p256_add()
887 	f256_montymul(t4, P2->z, t3);  in p256_add()
888 	f256_montymul(t3, P1->y, t4);  in p256_add()
893 	f256_montysquare(t4, P1->z);  in p256_add()
894 	f256_montymul(t2, P2->x, t4);  in p256_add()
895 	f256_montymul(t5, P1->z, t4);  in p256_add()
896 	f256_montymul(t4, P2->y, t5);  in p256_add()
899 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add()
908 	ret = (ret | -ret) >> 31;  in p256_add()
918 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add()
920 	f256_montysquare(P1->x, t4);  in p256_add()
921 	f256_sub(P1->x, P1->x, t5);  in p256_add()
922 	f256_sub(P1->x, P1->x, t6);  in p256_add()
923 	f256_sub(P1->x, P1->x, t6);  in p256_add()
926 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add()
928 	f256_sub(t6, t6, P1->x);  in p256_add()
929 	f256_montymul(P1->y, t4, t6);  in p256_add()
931 	f256_sub(P1->y, P1->y, t1);  in p256_add()
936 	f256_montymul(t1, P1->z, P2->z);  in p256_add()
937 	f256_montymul(P1->z, t1, t2);  in p256_add()
943  * Point addition (mixed coordinates): P1 is replaced with P1+P2.
944  * This is a specialised function for the case when P2 is a non-zero point
949  *   - If P1 == 0
950  *   - If P1 == P2
952  * In both cases, P1 is set to the point at infinity.
956  *   - P1 and P2 have the same Y (affine) coordinate.
957  *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
961  * curve P-256.
963  * Therefore, assuming that P1 != 0 on input, then the caller
966  *   - If the result is not the point at infinity, then it is correct.
967  *   - Otherwise, if the returned value is 1, then this is a case of
968  *     P1+P2 == 0, so the result is indeed the point at infinity.
969  *   - Otherwise, P1 == P2, so a "double" operation should have been
976 p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)  in p256_add_mixed()  argument
985 	 *   h = u2 - u1  in p256_add_mixed()
986 	 *   r = s2 - s1  in p256_add_mixed()
987 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add_mixed()
988 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add_mixed()
997 	memcpy(t1, P1->x, sizeof t1);  in p256_add_mixed()
998 	memcpy(t3, P1->y, sizeof t3);  in p256_add_mixed()
1003 	f256_montysquare(t4, P1->z);  in p256_add_mixed()
1004 	f256_montymul(t2, P2->x, t4);  in p256_add_mixed()
1005 	f256_montymul(t5, P1->z, t4);  in p256_add_mixed()
1006 	f256_montymul(t4, P2->y, t5);  in p256_add_mixed()
1009 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add_mixed()
1018 	ret = (ret | -ret) >> 31;  in p256_add_mixed()
1028 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add_mixed()
1030 	f256_montysquare(P1->x, t4);  in p256_add_mixed()
1031 	f256_sub(P1->x, P1->x, t5);  in p256_add_mixed()
1032 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1033 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1036 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add_mixed()
1038 	f256_sub(t6, t6, P1->x);  in p256_add_mixed()
1039 	f256_montymul(P1->y, t4, t6);  in p256_add_mixed()
1041 	f256_sub(P1->y, P1->y, t1);  in p256_add_mixed()
1046 	f256_montymul(P1->z, P1->z, t2);  in p256_add_mixed()
1054  * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
1055  * This is a specialised function for the case when P2 is a non-zero point
1061 p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
1070 	 *   h = u2 - u1
1071 	 *   r = s2 - s1
1072 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
1073 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
1078 	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
1081 	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
1084 	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
1085 	 * we correctly get z3 = 0 (the point-at-infinity).
1087 	 * To fix the case P1 = 0, we perform at the end a copy of P2
1088 	 * over P1, conditional to z1 = 0.
1090 	 * For P1 = P2: in that case, both h and r are set to 0, and
1092 	 * occurrence to make a mask which will be all-one if P1 = P2,
1093 	 * or all-zero otherwise; then we can compute the double of P2
1100 	 *   m = 3*(x2 + 1)*(x2 - 1)
1101 	 *   x' = m^2 - 2*s
1102 	 *   y' = m*(s - x') - 8*y2^4
1112 	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
1114 	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4];
1115 	zz = ((zz | -zz) >> 63) - (uint64_t)1;
1120 	memcpy(t1, P1->x, sizeof t1);
1121 	memcpy(t3, P1->y, sizeof t3);
1126 	f256_montysquare(t4, P1->z);
1127 	f256_montymul(t2, P2->x, t4);
1128 	f256_montymul(t5, P1->z, t4);
1129 	f256_montymul(t4, P2->y, t5);
1132 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
1139 	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
1140 	 * the mask tt to -1; otherwise, the mask will be 0.
1146 	tt = ((tt | -tt) >> 63) - (uint64_t)1;
1156 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
1158 	f256_montysquare(P1->x, t4);
1159 	f256_sub(P1->x, P1->x, t5);
1160 	f256_sub(P1->x, P1->x, t6);
1161 	f256_sub(P1->x, P1->x, t6);
1164 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
1166 	f256_sub(t6, t6, P1->x);
1167 	f256_montymul(P1->y, t4, t6);
1169 	f256_sub(P1->y, P1->y, t1);
1174 	f256_montymul(P1->z, P1->z, t2);
1177 	 * The "double" result, in case P1 = P2.
1183 	f256_add(t1, P2->y, P2->y);
1189 	f256_montysquare(t2, P2->y);
1192 	f256_montymul(t3, P2->x, t3);
1195 	 * Compute m = 3*(x2^2 - 1) (in t4).
1197 	f256_montysquare(t4, P2->x);
1203 	 * Compute x' = m^2 - 2*s (in t5).
1210 	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
1223 		P1->x[i] |= tt & t5[i];
1224 		P1->y[i] |= tt & t6[i];
1225 		P1->z[i] |= tt & t1[i];
1229 	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
1234 		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
1235 		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
1236 		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
1246  *  - All provided points are valid points on the curve.
1247  *  - Multiplier is non-zero, and smaller than the curve order.
1248  *  - Everything is in Montgomery representation.
1259 	while (klen -- > 0) {  in point_mul_inner()
1284 			 * bits are non-zero.  in point_mul_inner()
1288 				m = -(uint64_t)EQ(bits, n + 1);  in point_mul_inner()
1305 			 * If qz is still 1, then Q was all-zeros, and this  in point_mul_inner()
1308 			m = -(uint64_t)(bnz & qz);  in point_mul_inner()
1347 	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2  in window_to_affine()
1348 	 *  - on input (z_1,z_2,... z_n):  in window_to_affine()
1349 	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1  in window_to_affine()
1350 	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2  in window_to_affine()
1351 	 *       multiply elements of r1 by m2 -> s1  in window_to_affine()
1352 	 *       multiply elements of r2 by m1 -> s2  in window_to_affine()
1359 	 *  - Depth 1:  in window_to_affine()
1368 	 *  - Depth 2:  in window_to_affine()
1369 	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12  in window_to_affine()
1371 	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56  in window_to_affine()
1373 	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A  in window_to_affine()
1376 	 *  - Depth 3:  in window_to_affine()
1377 	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678  in window_to_affine()
1378 	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234  in window_to_affine()
1380 	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE  in window_to_affine()
1381 	 *      zD <- zD*z9ABC, zE*z9ABC  in window_to_affine()
1384 	 *  - Depth 4:  in window_to_affine()
1408 		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);  in window_to_affine()
1409 		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);  in window_to_affine()
1423 		n = (num + s - 1) >> k;  in window_to_affine()
1448  *  - Source point is a valid curve point.
1449  *  - Source point is not the point-at-infinity.
1450  *  - Integer is not 0, and is lower than the curve order.
1452  * (but the process is still constant-time).
1468 		window.jac[i - 1] = window.jac[(i >> 1) - 1];  in p256_mul()
1470 			p256_double(&window.jac[i - 1]);  in p256_mul()
1472 			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);  in p256_mul()
1590  *  - Integer is not 0, and is lower than the curve order.
1592  * (but the process is still constant-time).
1602  *  - klen <= 32
1603  *  - k != 0
1604  *  - k is lower than the curve order
1607  * Constant-time behaviour: only klen may be observable.
1626 			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);  in check_scalar()
1629 		c = -1;  in check_scalar()
1671 	 * window of u*P+v*Q points, to merge the two doubling-ladders  in api_muladd()
1674 	 *  - During the computation, we may hit the point-at-infinity.  in api_muladd()
1679 	 *  - A 4-bit window would be too large, since it would involve  in api_muladd()
1680 	 *    16*16-1 = 255 points. For the same window size as in the  in api_muladd()
1682 	 *    to 2 bits, and thus perform twice as many non-doubling  in api_muladd()
1685 	 *  - The window may itself contain the point-at-infinity, and  in api_muladd()