ec_p256_m64.c - OpenGrok cross reference for /freebsd/contrib/bearssl/src/ec/ec_p256

Lines Matching +full:p +full:- +full:256
75  * A field element is encoded as four 64-bit integers, in basis 2^64.
76  * Values may reach up to 2^256-1. Montgomery multiplication is used.
79 /* R = 2^256 mod p */
85 /* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
116 	 * Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p.  in f256_add()
120 	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);  in f256_add()
122 	/* Here, carry "w >> 64" can only be 0 or -1 */  in f256_add()
123 	w = (unsigned __int128)d[2] - ((w >> 64) & 1);  in f256_add()
125 	/* Again, carry is 0 or -1. But there can be carry only if t = 1,  in f256_add()
126 	   in which case the addition of (t << 32) - t is positive. */  in f256_add()
127 	w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t;  in f256_add()
136 	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);  in f256_add()
138 	w = (unsigned __int128)d[2] - ((w >> 64) & 1);  in f256_add()
140 	d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1);  in f256_add()
153 	 * If there is a carry, then we want to subtract p, which we  in f256_add()
154 	 * do by adding 2^256 - p.  in f256_add()
158 	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);  in f256_add()
159 	cc = _addcarry_u64(cc, d[2], -t, &d[2]);  in f256_add()
160 	cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_add()
167 	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);  in f256_add()
168 	cc = _addcarry_u64(cc, d[2], -t, &d[2]);  in f256_add()
169 	(void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_add()
185 	w = (unsigned __int128)a[0] - b[0];  in f256_sub()
187 	w = (unsigned __int128)a[1] - b[1] - ((w >> 64) & 1);  in f256_sub()
189 	w = (unsigned __int128)a[2] - b[2] - ((w >> 64) & 1);  in f256_sub()
191 	w = (unsigned __int128)a[3] - b[3] - ((w >> 64) & 1);  in f256_sub()
197 	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1.  in f256_sub()
199 	w = (unsigned __int128)d[0] - t;  in f256_sub()
201 	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);  in f256_sub()
207 	w = (unsigned __int128)d[3] + (w >> 64) - (t << 32) + t;  in f256_sub()
215 	w = (unsigned __int128)d[0] - t;  in f256_sub()
217 	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);  in f256_sub()
221 	d[3] += (uint64_t)(w >> 64) - (t << 32) + t;  in f256_sub()
234 	 * If there is a borrow, then we need to add p. We (virtually)  in f256_sub()
235 	 * add 2^256, then subtract 2^256 - p.  in f256_sub()
239 	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);  in f256_sub()
240 	cc = _subborrow_u64(cc, d[2], -t, &d[2]);  in f256_sub()
241 	cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_sub()
244 	 * If there still is a borrow, then we need to add p again.  in f256_sub()
248 	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);  in f256_sub()
249 	cc = _subborrow_u64(cc, d[2], -t, &d[2]);  in f256_sub()
250 	(void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);  in f256_sub()
268 	 * When computing d <- d + a[u]*b, we also add f*p such  in f256_montymul()
269 	 * that d + a[u]*b + f*p is a multiple of 2^64. Since  in f256_montymul()
270 	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.  in f256_montymul()
274 	 * Step 1: t <- (a[0]*b + f*p) / 2^64  in f256_montymul()
275 	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this  in f256_montymul()
276 	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.  in f256_montymul()
278 	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.  in f256_montymul()
290 	ff = ((unsigned __int128)f << 64) - ((unsigned __int128)f << 32);  in f256_montymul()
298 	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64  in f256_montymul()
303 		/* t <- (t + x*b - f) / 2^64 */  in f256_montymul()
316 		/* t <- t + f*2^32, carry in the upper half of z */  in f256_montymul()
322 		/* t <- t + f*2^192 - f*2^160 + f*2^128 */  in f256_montymul()
324 			- ((unsigned __int128)f << 32) + f;  in f256_montymul()
333 	 * At that point, we have computed t = (a*b + F*p) / 2^256, where  in f256_montymul()
334 	 * F is a 256-bit integer whose limbs are the "f" coefficients  in f256_montymul()
336 	 *   a <= 2^256-1  in f256_montymul()
337 	 *   b <= 2^256-1  in f256_montymul()
338 	 *   F <= 2^256-1  in f256_montymul()
340 	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)  in f256_montymul()
341 	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p  in f256_montymul()
343 	 *   t < 2^256 + p - 2  in f256_montymul()
344 	 * Since p < 2^256, it follows that:  in f256_montymul()
346 	 *   t - p < 2^256  in f256_montymul()
347 	 * We can therefore subtract p from t, conditionally on t4, to  in f256_montymul()
348 	 * get a nonnegative result that fits on 256 bits.  in f256_montymul()
352 	z = (unsigned __int128)t1 - (t4 << 32) + (z >> 64);  in f256_montymul()
354 	z = (unsigned __int128)t2 - (z >> 127);  in f256_montymul()
356 	t3 = t3 - (uint64_t)(z >> 127) - t4 + (t4 << 32);  in f256_montymul()
371 	 * When computing d <- d + a[u]*b, we also add f*p such  in f256_montymul()
372 	 * that d + a[u]*b + f*p is a multiple of 2^64. Since  in f256_montymul()
373 	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.  in f256_montymul()
377 	 * Step 1: t <- (a[0]*b + f*p) / 2^64  in f256_montymul()
378 	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this  in f256_montymul()
379 	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.  in f256_montymul()
381 	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.  in f256_montymul()
419 	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64  in f256_montymul()
423 		/* f = t0 + x * b[0]; -- computed below */  in f256_montymul()
425 		/* t <- (t + x*b - f) / 2^64 */  in f256_montymul()
450 		/* t <- t + f*2^32, carry in k */  in f256_montymul()
454 		/* t <- t + f*2^192 - f*2^160 + f*2^128 */  in f256_montymul()
463 	 * At that point, we have computed t = (a*b + F*p) / 2^256, where  in f256_montymul()
464 	 * F is a 256-bit integer whose limbs are the "f" coefficients  in f256_montymul()
466 	 *   a <= 2^256-1  in f256_montymul()
467 	 *   b <= 2^256-1  in f256_montymul()
468 	 *   F <= 2^256-1  in f256_montymul()
470 	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)  in f256_montymul()
471 	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p  in f256_montymul()
473 	 *   t < 2^256 + p - 2  in f256_montymul()
474 	 * Since p < 2^256, it follows that:  in f256_montymul()
476 	 *   t - p < 2^256  in f256_montymul()
477 	 * We can therefore subtract p from t, conditionally on t4, to  in f256_montymul()
478 	 * get a nonnegative result that fits on 256 bits.  in f256_montymul()
481 	k = _addcarry_u64(k, t1, -(t4 << 32), &t1);  in f256_montymul()
482 	k = _addcarry_u64(k, t2, -t4, &t2);  in f256_montymul()
483 	(void)_addcarry_u64(k, t3, (t4 << 32) - (t4 << 1), &t3);  in f256_montymul()
511 	 * R2 = 2^512 mod p.  in f256_tomonty()
512 	 * If R = 2^256 mod p, then R2 = R^2 mod p; and the Montgomery  in f256_tomonty()
513 	 * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the  in f256_tomonty()
533 	 * Montgomery multiplication by 1 is division by 2^256 modulo p.  in f256_frommonty()
541  * Inversion in the field. If the source value is 0 modulo p, then this
542  * returns 0 or p. This function uses Montgomery representation.
548 	 * We compute a^(p-2) mod p. The exponent pattern (from high to  in f256_invert()
550 	 *  - 32 bits of value 1  in f256_invert()
551 	 *  - 31 bits of value 0  in f256_invert()
552 	 *  - 1 bit of value 1  in f256_invert()
553 	 *  - 96 bits of value 0  in f256_invert()
554 	 *  - 94 bits of value 1  in f256_invert()
555 	 *  - 1 bit of value 0  in f256_invert()
556 	 *  - 1 bit of value 1  in f256_invert()
557 	 * To speed up the square-and-multiply algorithm, we precompute  in f256_invert()
558 	 * a^(2^31-1).  in f256_invert()
571 	for (i = 224; i >= 0; i --) {  in f256_invert()
592  * Input value fits on 256 bits. This function subtracts p if and only
593  * if the input is greater than or equal to p.
604 	 * We add 2^224 - 2^192 - 2^96 + 1 to a. If there is no carry,  in f256_final_reduce()
605 	 * then a < p; otherwise, the addition result we computed is  in f256_final_reduce()
610 	z = (unsigned __int128)a[1] + (z >> 64) - ((uint64_t)1 << 32);  in f256_final_reduce()
612 	z = (unsigned __int128)a[2] - (z >> 127);  in f256_final_reduce()
614 	z = (unsigned __int128)a[3] - (z >> 127) + 0xFFFFFFFF;  in f256_final_reduce()
616 	cc = -(uint64_t)(z >> 64);  in f256_final_reduce()
629 	k = _addcarry_u64(k, a[1], -((uint64_t)1 << 32), &t1);  in f256_final_reduce()
630 	k = _addcarry_u64(k, a[2], -(uint64_t)1, &t2);  in f256_final_reduce()
631 	k = _addcarry_u64(k, a[3], ((uint64_t)1 << 32) - 2, &t3);  in f256_final_reduce()
632 	m = -(uint64_t)k;  in f256_final_reduce()
645  *  - In affine coordinates, the point-at-infinity cannot be encoded.
646  *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
647  *    if Z = 0 then this is the point-at-infinity.
669 point_decode(p256_jacobian *P, const unsigned char *buf)  in point_decode()  argument
695 	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.  in point_decode()
698 	 * 0 and not p.  in point_decode()
716 	memcpy(P->x, x, sizeof x);  in point_decode()
717 	memcpy(P->y, y, sizeof y);  in point_decode()
718 	memcpy(P->z, F256_R, sizeof F256_R);  in point_decode()
724  *  - The point is converted back to affine coordinates.
725  *  - Final reduction is performed.
726  *  - The point is encoded into the provided buffer.
728  * If the point is the point-at-infinity, all operations are performed,
733 point_encode(unsigned char *buf, const p256_jacobian *P)  in point_encode()  argument
738 	f256_invert(t2, P->z);  in point_encode()
743 	f256_montymul(t1, P->x, t1);  in point_encode()
744 	f256_montymul(t2, P->y, t2);  in point_encode()
764 	/* Return success if and only if P->z != 0. */  in point_encode()
765 	z = P->z[0] | P->z[1] | P->z[2] | P->z[3];  in point_encode()
770  * Point doubling in Jacobian coordinates: point P is doubled.
771  * Note: if the source point is the point-at-infinity, then the result is
772  * still the point-at-infinity, which is correct. Moreover, if the three
777  * result will also have all-zero coordinate encodings, not the alternate
778  * encoding as the integer p.)
781 p256_double(p256_jacobian *P)  in p256_double()  argument
787 	 *   m = 3*(x + z^2)*(x - z^2)  in p256_double()
788 	 *   x' = m^2 - 2*s  in p256_double()
789 	 *   y' = m*(s - x') - 8*y^4  in p256_double()
794 	 *   - If y = 0 then z' = 0. But there is no such point in P-256  in p256_double()
796 	 *   - If z = 0 then z' = 0.  in p256_double()
803 	f256_montysquare(t1, P->z);  in p256_double()
806 	 * Compute x-z^2 in t2 and x+z^2 in t1.  in p256_double()
808 	f256_add(t2, P->x, t1);  in p256_double()
809 	f256_sub(t1, P->x, t1);  in p256_double()
812 	 * Compute 3*(x+z^2)*(x-z^2) in t1.  in p256_double()
821 	f256_montysquare(t3, P->y);  in p256_double()
823 	f256_montymul(t2, P->x, t3);  in p256_double()
827 	 * Compute x' = m^2 - 2*s.  in p256_double()
829 	f256_montysquare(P->x, t1);  in p256_double()
830 	f256_sub(P->x, P->x, t2);  in p256_double()
831 	f256_sub(P->x, P->x, t2);  in p256_double()
836 	f256_montymul(t4, P->y, P->z);  in p256_double()
837 	f256_add(P->z, t4, t4);  in p256_double()
840 	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have  in p256_double()
843 	f256_sub(t2, t2, P->x);  in p256_double()
844 	f256_montymul(P->y, t1, t2);  in p256_double()
847 	f256_sub(P->y, P->y, t4);  in p256_double()
854  *   - If P1 == 0 but P2 != 0
855  *   - If P1 != 0 but P2 == 0
856  *   - If P1 == P2
862  *   - P1 and P2 have the same Y coordinate.
863  *   - P1 == 0 and P2 == 0.
864  *   - The Y coordinate of one of the points is 0 and the other point is
869  * curve P-256.
874  *   - If the result is not the point at infinity, then it is correct.
875  *   - Otherwise, if the returned value is 1, then this is a case of
877  *   - Otherwise, P1 == P2, so a "double" operation should have been
893 	 *   h = u2 - u1  in p256_add()
894 	 *   r = s2 - s1  in p256_add()
895 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add()
896 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add()
905 	f256_montysquare(t3, P2->z);  in p256_add()
906 	f256_montymul(t1, P1->x, t3);  in p256_add()
907 	f256_montymul(t4, P2->z, t3);  in p256_add()
908 	f256_montymul(t3, P1->y, t4);  in p256_add()
913 	f256_montysquare(t4, P1->z);  in p256_add()
914 	f256_montymul(t2, P2->x, t4);  in p256_add()
915 	f256_montymul(t5, P1->z, t4);  in p256_add()
916 	f256_montymul(t4, P2->y, t5);  in p256_add()
919 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add()
928 	ret = (ret | -ret) >> 31;  in p256_add()
938 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add()
940 	f256_montysquare(P1->x, t4);  in p256_add()
941 	f256_sub(P1->x, P1->x, t5);  in p256_add()
942 	f256_sub(P1->x, P1->x, t6);  in p256_add()
943 	f256_sub(P1->x, P1->x, t6);  in p256_add()
946 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add()
948 	f256_sub(t6, t6, P1->x);  in p256_add()
949 	f256_montymul(P1->y, t4, t6);  in p256_add()
951 	f256_sub(P1->y, P1->y, t1);  in p256_add()
956 	f256_montymul(t1, P1->z, P2->z);  in p256_add()
957 	f256_montymul(P1->z, t1, t2);  in p256_add()
964  * This is a specialised function for the case when P2 is a non-zero point
969  *   - If P1 == 0
970  *   - If P1 == P2
976  *   - P1 and P2 have the same Y (affine) coordinate.
977  *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
981  * curve P-256.
986  *   - If the result is not the point at infinity, then it is correct.
987  *   - Otherwise, if the returned value is 1, then this is a case of
989  *   - Otherwise, P1 == P2, so a "double" operation should have been
1005 	 *   h = u2 - u1  in p256_add_mixed()
1006 	 *   r = s2 - s1  in p256_add_mixed()
1007 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add_mixed()
1008 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add_mixed()
1017 	memcpy(t1, P1->x, sizeof t1);  in p256_add_mixed()
1018 	memcpy(t3, P1->y, sizeof t3);  in p256_add_mixed()
1023 	f256_montysquare(t4, P1->z);  in p256_add_mixed()
1024 	f256_montymul(t2, P2->x, t4);  in p256_add_mixed()
1025 	f256_montymul(t5, P1->z, t4);  in p256_add_mixed()
1026 	f256_montymul(t4, P2->y, t5);  in p256_add_mixed()
1029 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add_mixed()
1038 	ret = (ret | -ret) >> 31;  in p256_add_mixed()
1048 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add_mixed()
1050 	f256_montysquare(P1->x, t4);  in p256_add_mixed()
1051 	f256_sub(P1->x, P1->x, t5);  in p256_add_mixed()
1052 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1053 	f256_sub(P1->x, P1->x, t6);  in p256_add_mixed()
1056 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add_mixed()
1058 	f256_sub(t6, t6, P1->x);  in p256_add_mixed()
1059 	f256_montymul(P1->y, t4, t6);  in p256_add_mixed()
1061 	f256_sub(P1->y, P1->y, t1);  in p256_add_mixed()
1066 	f256_montymul(P1->z, P1->z, t2);  in p256_add_mixed()
1075  * This is a specialised function for the case when P2 is a non-zero point
1090 	 *   h = u2 - u1
1091 	 *   r = s2 - s1
1092 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
1093 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
1098 	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
1101 	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
1105 	 * we correctly get z3 = 0 (the point-at-infinity).
1112 	 * occurrence to make a mask which will be all-one if P1 = P2,
1113 	 * or all-zero otherwise; then we can compute the double of P2
1120 	 *   m = 3*(x2 + 1)*(x2 - 1)
1121 	 *   x' = m^2 - 2*s
1122 	 *   y' = m*(s - x') - 8*y2^4
1132 	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
1134 	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3];
1135 	zz = ((zz | -zz) >> 63) - (uint64_t)1;
1140 	memcpy(t1, P1->x, sizeof t1);
1141 	memcpy(t3, P1->y, sizeof t3);
1146 	f256_montysquare(t4, P1->z);
1147 	f256_montymul(t2, P2->x, t4);
1148 	f256_montymul(t5, P1->z, t4);
1149 	f256_montymul(t4, P2->y, t5);
1152 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
1160 	 * the mask tt to -1; otherwise, the mask will be 0.
1165 	tt = ((tt | -tt) >> 63) - (uint64_t)1;
1175 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
1177 	f256_montysquare(P1->x, t4);
1178 	f256_sub(P1->x, P1->x, t5);
1179 	f256_sub(P1->x, P1->x, t6);
1180 	f256_sub(P1->x, P1->x, t6);
1183 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
1185 	f256_sub(t6, t6, P1->x);
1186 	f256_montymul(P1->y, t4, t6);
1188 	f256_sub(P1->y, P1->y, t1);
1193 	f256_montymul(P1->z, P1->z, t2);
1202 	f256_add(t1, P2->y, P2->y);
1207 	f256_montysquare(t2, P2->y);
1210 	f256_montymul(t3, P2->x, t3);
1213 	 * Compute m = 3*(x2^2 - 1) (in t4).
1215 	f256_montysquare(t4, P2->x);
1221 	 * Compute x' = m^2 - 2*s (in t5).
1228 	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
1241 		P1->x[i] |= tt & t5[i];
1242 		P1->y[i] |= tt & t6[i];
1243 		P1->z[i] |= tt & t1[i];
1252 		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
1253 		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
1254 		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
1261  * provided, with points 1*P to 15*P in affine coordinates.
1264  *  - All provided points are valid points on the curve.
1265  *  - Multiplier is non-zero, and smaller than the curve order.
1266  *  - Everything is in Montgomery representation.
1277 	while (klen -- > 0) {  in point_mul_inner()
1302 			 * bits are non-zero.  in point_mul_inner()
1306 				m = -(uint64_t)EQ(bits, n + 1);  in point_mul_inner()
1321 			 * If qz is still 1, then Q was all-zeros, and this  in point_mul_inner()
1324 			m = -(uint64_t)(bnz & qz);  in point_mul_inner()
1363 	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2  in window_to_affine()
1364 	 *  - on input (z_1,z_2,... z_n):  in window_to_affine()
1365 	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1  in window_to_affine()
1366 	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2  in window_to_affine()
1367 	 *       multiply elements of r1 by m2 -> s1  in window_to_affine()
1368 	 *       multiply elements of r2 by m1 -> s2  in window_to_affine()
1375 	 *  - Depth 1:  in window_to_affine()
1384 	 *  - Depth 2:  in window_to_affine()
1385 	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12  in window_to_affine()
1387 	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56  in window_to_affine()
1389 	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A  in window_to_affine()
1392 	 *  - Depth 3:  in window_to_affine()
1393 	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678  in window_to_affine()
1394 	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234  in window_to_affine()
1396 	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE  in window_to_affine()
1397 	 *      zD <- zD*z9ABC, zE*z9ABC  in window_to_affine()
1400 	 *  - Depth 4:  in window_to_affine()
1424 		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);  in window_to_affine()
1425 		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);  in window_to_affine()
1439 		n = (num + s - 1) >> k;  in window_to_affine()
1464  *  - Source point is a valid curve point.
1465  *  - Source point is not the point-at-infinity.
1466  *  - Integer is not 0, and is lower than the curve order.
1468  * (but the process is still constant-time).
1471 p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen)  in p256_mul()  argument
1482 	window.jac[0] = *P;  in p256_mul()
1484 		window.jac[i - 1] = window.jac[(i >> 1) - 1];  in p256_mul()
1486 			p256_double(&window.jac[i - 1]);  in p256_mul()
1488 			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);  in p256_mul()
1501 	point_mul_inner(P, window.aff, k, klen);  in p256_mul()
1603  * integer. Return is written in *P.
1606  *  - Integer is not 0, and is lower than the curve order.
1608  * (but the process is still constant-time).
1611 p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen)  in p256_mulgen()  argument
1613 	point_mul_inner(P, P256_Gwin, k, klen);  in p256_mulgen()
1618  *  - klen <= 32
1619  *  - k != 0
1620  *  - k is lower than the curve order
1623  * Constant-time behaviour: only klen may be observable.
1642 			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);  in check_scalar()
1645 		c = -1;  in check_scalar()
1655 	p256_jacobian P;  in api_mul()  local
1662 	r &= point_decode(&P, G);  in api_mul()
1663 	p256_mul(&P, k, klen);  in api_mul()
1664 	r &= point_encode(G, &P);  in api_mul()
1672 	p256_jacobian P;  in api_mulgen()  local
1675 	p256_mulgen(&P, k, klen);  in api_mulgen()
1676 	point_encode(R, &P);  in api_mulgen()
1687 	 * window of u*P+v*Q points, to merge the two doubling-ladders  in api_muladd()
1690 	 *  - During the computation, we may hit the point-at-infinity.  in api_muladd()
1695 	 *  - A 4-bit window would be too large, since it would involve  in api_muladd()
1696 	 *    16*16-1 = 255 points. For the same window size as in the  in api_muladd()
1698 	 *    to 2 bits, and thus perform twice as many non-doubling  in api_muladd()
1701 	 *  - The window may itself contain the point-at-infinity, and  in api_muladd()
1713 	p256_jacobian P, Q;  in api_muladd()  local
1721 	r = point_decode(&P, A);  in api_muladd()
1722 	p256_mul(&P, x, xlen);  in api_muladd()
1733 	t = p256_add(&P, &Q);  in api_muladd()
1734 	f256_final_reduce(P.z);  in api_muladd()
1735 	z = P.z[0] | P.z[1] | P.z[2] | P.z[3];  in api_muladd()
1740 	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we  in api_muladd()
1743 	 *   s = 0, t = 0   return P (normal addition)  in api_muladd()
1744 	 *   s = 0, t = 1   return P (normal addition)  in api_muladd()
1746 	 *   s = 1, t = 1   report an error (P+Q = 0)  in api_muladd()
1748 	CCOPY(s & ~t, &P, &Q, sizeof Q);  in api_muladd()
1749 	point_encode(A, &P);  in api_muladd()