ec_p256_m64.c - OpenGrok cross reference for /freebsd/contrib/bearssl/src/ec/ec_p256

Lines Matching +full:1 +full:p1
71 	return 1;  in api_xoff()
76  * Values may reach up to 2^256-1. Montgomery multiplication is used.
107 	w = (unsigned __int128)a[1] + b[1] + (w >> 64);  in f256_add()
108 	d[1] = (uint64_t)w;  in f256_add()
116 	 * Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p.  in f256_add()
120 	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);  in f256_add()
121 	d[1] = (uint64_t)w;  in f256_add()
122 	/* Here, carry "w >> 64" can only be 0 or -1 */  in f256_add()
123 	w = (unsigned __int128)d[2] - ((w >> 64) & 1);  in f256_add()
125 	/* Again, carry is 0 or -1. But there can be carry only if t = 1,  in f256_add()
127 	w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t;  in f256_add()
136 	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);  in f256_add()
137 	d[1] = (uint64_t)w;  in f256_add()
138 	w = (unsigned __int128)d[2] - ((w >> 64) & 1);  in f256_add()
140 	d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1);  in f256_add()
148 	cc = _addcarry_u64(cc, a[1], b[1], &d[1]);  in f256_add()
158 	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);  in f256_add()
160 	cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_add()
167 	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);  in f256_add()
169 	(void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_add()
187 	w = (unsigned __int128)a[1] - b[1] - ((w >> 64) & 1);  in f256_sub()
188 	d[1] = (uint64_t)w;  in f256_sub()
189 	w = (unsigned __int128)a[2] - b[2] - ((w >> 64) & 1);  in f256_sub()
191 	w = (unsigned __int128)a[3] - b[3] - ((w >> 64) & 1);  in f256_sub()
193 	t = (uint64_t)(w >> 64) & 1;  in f256_sub()
196 	 * If there is a borrow (t = 1), then we must add the modulus  in f256_sub()
197 	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1.  in f256_sub()
201 	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);  in f256_sub()
202 	d[1] = (uint64_t)w;  in f256_sub()
203 	/* Here, carry "w >> 64" can only be 0 or +1 */  in f256_sub()
206 	/* Again, carry is 0 or +1 */  in f256_sub()
209 	t = (uint64_t)(w >> 64) & 1;  in f256_sub()
217 	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);  in f256_sub()
218 	d[1] = (uint64_t)w;  in f256_sub()
229 	cc = _subborrow_u64(cc, a[1], b[1], &d[1]);  in f256_sub()
239 	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);  in f256_sub()
241 	cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_sub()
248 	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);  in f256_sub()
250 	(void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_sub()
270 	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.  in f256_montymul()
274 	 * Step 1: t <- (a[0]*b + f*p) / 2^64  in f256_montymul()
275 	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this  in f256_montymul()
283 	z = (unsigned __int128)b[1] * x + (z >> 64) + (uint64_t)(f << 32);  in f256_montymul()
300 	for (i = 1; i < 4; i ++) {  in f256_montymul()
306 		z = (unsigned __int128)b[1] * x + t1 + (z >> 64);  in f256_montymul()
336 	 *   a <= 2^256-1  in f256_montymul()
337 	 *   b <= 2^256-1  in f256_montymul()
338 	 *   F <= 2^256-1  in f256_montymul()
340 	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)  in f256_montymul()
341 	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p  in f256_montymul()
345 	 *   t4 can be only 0 or 1  in f256_montymul()
359 	d[1] = t1;  in f256_montymul()
373 	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.  in f256_montymul()
377 	 * Step 1: t <- (a[0]*b + f*p) / 2^64  in f256_montymul()
378 	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this  in f256_montymul()
389 	zl = _umul128(b[1], x, &zh);  in f256_montymul()
421 	for (i = 1; i < 4; i ++) {  in f256_montymul()
430 		zl = _umul128(b[1], x, &zh);  in f256_montymul()
466 	 *   a <= 2^256-1  in f256_montymul()
467 	 *   b <= 2^256-1  in f256_montymul()
468 	 *   F <= 2^256-1  in f256_montymul()
470 	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)  in f256_montymul()
471 	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p  in f256_montymul()
475 	 *   t4 can be only 0 or 1  in f256_montymul()
483 	(void)_addcarry_u64(k, t3, (t4 << 32) - (t4 << 1), &t3);  in f256_montymul()
486 	d[1] = t1;  in f256_montymul()
533 	 * Montgomery multiplication by 1 is division by 2^256 modulo p.  in f256_frommonty()
535 	static const uint64_t one[] = { 1, 0, 0, 0 };  in f256_frommonty()
550 	 *  - 32 bits of value 1  in f256_invert()
552 	 *  - 1 bit of value 1  in f256_invert()
554 	 *  - 94 bits of value 1  in f256_invert()
555 	 *  - 1 bit of value 0  in f256_invert()
556 	 *  - 1 bit of value 1  in f256_invert()
558 	 * a^(2^31-1).  in f256_invert()
604 	 * We add 2^224 - 2^192 - 2^96 + 1 to a. If there is no carry,  in f256_final_reduce()
608 	z = (unsigned __int128)a[0] + 1;  in f256_final_reduce()
610 	z = (unsigned __int128)a[1] + (z >> 64) - ((uint64_t)1 << 32);  in f256_final_reduce()
619 	a[1] ^= cc & (a[1] ^ t1);  in f256_final_reduce()
628 	k = _addcarry_u64(0, a[0], (uint64_t)1, &t0);  in f256_final_reduce()
629 	k = _addcarry_u64(k, a[1], -((uint64_t)1 << 32), &t1);  in f256_final_reduce()
630 	k = _addcarry_u64(k, a[2], -(uint64_t)1, &t2);  in f256_final_reduce()
631 	k = _addcarry_u64(k, a[3], ((uint64_t)1 << 32) - 2, &t3);  in f256_final_reduce()
635 	a[1] ^= m & (a[1] ^ t1);  in f256_final_reduce()
662  * with z = 1. If the encoding is invalid, or encodes a point which is
664  * returns 0. Otherwise, 1 is returned.
683 	x[3] = br_dec64be(buf +  1);  in point_decode()
685 	x[1] = br_dec64be(buf + 17);  in point_decode()
689 	y[1] = br_dec64be(buf + 49);  in point_decode()
709 	tt = t[0] | t[1] | t[2] | t[3];  in point_decode()
730  * the encoded point is written in the buffer, and 1 is returned.
737 	/* Set t1 = 1/z^2 and t2 = 1/z^3. */  in point_encode()
755 	br_enc64be(buf +  1, t1[3]);  in point_encode()
757 	br_enc64be(buf + 17, t1[1]);  in point_encode()
761 	br_enc64be(buf + 49, t2[1]);  in point_encode()
765 	z = P->z[0] | P->z[1] | P->z[2] | P->z[3];  in point_encode()
851  * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
854  *   - If P1 == 0 but P2 != 0
855  *   - If P1 != 0 but P2 == 0
856  *   - If P1 == P2
858  * In all three cases, P1 is set to the point at infinity.
862  *   - P1 and P2 have the same Y coordinate.
863  *   - P1 == 0 and P2 == 0.
871  * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
875  *   - Otherwise, if the returned value is 1, then this is a case of
876  *     P1+P2 == 0, so the result is indeed the point at infinity.
877  *   - Otherwise, P1 == P2, so a "double" operation should have been
881  * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
884 p256_add(p256_jacobian *P1, const p256_jacobian *P2)  in p256_add()  argument
906 	f256_montymul(t1, P1->x, t3);  in p256_add()
908 	f256_montymul(t3, P1->y, t4);  in p256_add()
913 	f256_montysquare(t4, P1->z);  in p256_add()
915 	f256_montymul(t5, P1->z, t4);  in p256_add()
926 	tt = t4[0] | t4[1] | t4[2] | t4[3];  in p256_add()
940 	f256_montysquare(P1->x, t4);  in p256_add()
941 	f256_sub(P1->x, P1->x, t5);  in p256_add()
942 	f256_sub(P1->x, P1->x, t6);  in p256_add()
943 	f256_sub(P1->x, P1->x, t6);  in p256_add()
948 	f256_sub(t6, t6, P1->x);  in p256_add()
949 	f256_montymul(P1->y, t4, t6);  in p256_add()
951 	f256_sub(P1->y, P1->y, t1);  in p256_add()
956 	f256_montymul(t1, P1->z, P2->z);  in p256_add()
957 	f256_montymul(P1->z, t1, t2);  in p256_add()
963  * Point addition (mixed coordinates): P1 is replaced with P1+P2.
969  *   - If P1 == 0
970  *   - If P1 == P2
972  * In both cases, P1 is set to the point at infinity.
976  *   - P1 and P2 have the same Y (affine) coordinate.
977  *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
983  * Therefore, assuming that P1 != 0 on input, then the caller
987  *   - Otherwise, if the returned value is 1, then this is a case of
988  *     P1+P2 == 0, so the result is indeed the point at infinity.
989  *   - Otherwise, P1 == P2, so a "double" operation should have been
996 p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)  in p256_add_mixed()  argument
1017 	memcpy(t1, P1->x, sizeof t1);  in p256_add_mixed()
1018 	memcpy(t3, P1->y, sizeof t3);  in p256_add_mixed()
1023 	f256_montysquare(t4, P1->z);  in p256_add_mixed()
1025 	f256_montymul(t5, P1->z, t4);  in p256_add_mixed()
1036 	tt = t4[0] | t4[1] | t4[2] | t4[3];  in p256_add_mixed()
1050 	f256_montysquare(P1->x, t4);  in p256_add_mixed()
1051 	f256_sub(P1->x, P1->x, t5);  in p256_add_mixed()
1052 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1053 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1058 	f256_sub(t6, t6, P1->x);  in p256_add_mixed()
1059 	f256_montymul(P1->y, t4, t6);  in p256_add_mixed()
1061 	f256_sub(P1->y, P1->y, t1);  in p256_add_mixed()
1066 	f256_montymul(P1->z, P1->z, t2);  in p256_add_mixed()
1074  * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
1081 p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
1098 	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
1101 	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
1104 	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
1107 	 * To fix the case P1 = 0, we perform at the end a copy of P2
1108 	 * over P1, conditional to z1 = 0.
1110 	 * For P1 = P2: in that case, both h and r are set to 0, and
1112 	 * occurrence to make a mask which will be all-one if P1 = P2,
1117 	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
1120 	 *   m = 3*(x2 + 1)*(x2 - 1)
1132 	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
1134 	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3];
1135 	zz = ((zz | -zz) >> 63) - (uint64_t)1;
1140 	memcpy(t1, P1->x, sizeof t1);
1141 	memcpy(t3, P1->y, sizeof t3);
1146 	f256_montysquare(t4, P1->z);
1148 	f256_montymul(t5, P1->z, t4);
1159 	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
1160 	 * the mask tt to -1; otherwise, the mask will be 0.
1164 	tt = t2[0] | t2[1] | t2[2] | t2[3] | t4[0] | t4[1] | t4[2] | t4[3];
1165 	tt = ((tt | -tt) >> 63) - (uint64_t)1;
1177 	f256_montysquare(P1->x, t4);
1178 	f256_sub(P1->x, P1->x, t5);
1179 	f256_sub(P1->x, P1->x, t6);
1180 	f256_sub(P1->x, P1->x, t6);
1185 	f256_sub(t6, t6, P1->x);
1186 	f256_montymul(P1->y, t4, t6);
1188 	f256_sub(P1->y, P1->y, t1);
1193 	f256_montymul(P1->z, P1->z, t2);
1196 	 * The "double" result, in case P1 = P2.
1213 	 * Compute m = 3*(x2^2 - 1) (in t4).
1241 		P1->x[i] |= tt & t5[i];
1242 		P1->y[i] |= tt & t6[i];
1243 		P1->z[i] |= tt & t1[i];
1247 	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
1252 		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
1253 		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
1254 		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
1261  * provided, with points 1*P to 15*P in affine coordinates.
1276 	qz = 1;  in point_mul_inner()
1306 				m = -(uint64_t)EQ(bits, n + 1);  in point_mul_inner()
1308 				T.x[1] |= m & W[n].x[1];  in point_mul_inner()
1312 				T.y[1] |= m & W[n].y[1];  in point_mul_inner()
1321 			 * If qz is still 1, then Q was all-zeros, and this  in point_mul_inner()
1355 	 * we compute u = 1/(z1*z2*z3*z4), and then we have:  in window_to_affine()
1356 	 *   1/z1 = u*z2*z3*z4  in window_to_affine()
1357 	 *   1/z2 = u*z1*z3*z4  in window_to_affine()
1358 	 *   1/z3 = u*z1*z2*z4  in window_to_affine()
1359 	 *   1/z4 = u*z1*z2*z3  in window_to_affine()
1366 	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2  in window_to_affine()
1373 	 * hexadecimal, starting at 1).  in window_to_affine()
1375 	 *  - Depth 1:  in window_to_affine()
1415 	 * extra one with coordinate Z = 1 (in Montgomery representation).  in window_to_affine()
1417 	for (i = 0; (i + 1) < num; i += 2) {  in window_to_affine()
1419 		memcpy(jac[i].z, jac[i + 1].z, sizeof zt);  in window_to_affine()
1420 		memcpy(jac[i + 1].z, zt, sizeof zt);  in window_to_affine()
1421 		f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z);  in window_to_affine()
1423 	if ((num & 1) != 0) {  in window_to_affine()
1424 		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);  in window_to_affine()
1425 		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);  in window_to_affine()
1433 	for (k = 1, s = 2; s < num; k ++, s <<= 1) {  in window_to_affine()
1437 			f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]);  in window_to_affine()
1439 		n = (num + s - 1) >> k;  in window_to_affine()
1440 		for (i = 0; i < (n >> 1); i ++) {  in window_to_affine()
1441 			f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]);  in window_to_affine()
1443 		if ((n & 1) != 0) {  in window_to_affine()
1444 			memmove(z[n >> 1], z[n], sizeof zt);  in window_to_affine()
1484 		window.jac[i - 1] = window.jac[(i >> 1) - 1];  in p256_mul()
1485 		if ((i & 1) == 0) {  in p256_mul()
1486 			p256_double(&window.jac[i - 1]);  in p256_mul()
1488 			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);  in p256_mul()
1506  * contains (n+1)*G (affine coordinates, in Montgomery representation).
1617  * Return 1 if all of the following hold:
1645 		c = -1;  in check_scalar()
1696 	 *    16*16-1 = 255 points. For the same window size as in the  in api_muladd()
1735 	z = P.z[0] | P.z[1] | P.z[2] | P.z[3];  in api_muladd()
1740 	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we  in api_muladd()
1744 	 *   s = 0, t = 1   return P (normal addition)  in api_muladd()
1745 	 *   s = 1, t = 0   return Q (a 'double' case)  in api_muladd()
1746 	 *   s = 1, t = 1   report an error (P+Q = 0)  in api_muladd()