ec_p256_m31.c - OpenGrok cross reference for /freebsd/contrib/bearssl/src/ec/ec_p256

Lines Matching +full:p +full:- +full:256
29  * that right-shifting a signed negative integer copies the sign bit
30  * (arithmetic right-shift). This is "implementation-defined behaviour",
39                       | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
41                       | ((-((uint64_t)(x) >> 63)) << (64 - (n))))
48  * Convert an integer from unsigned big-endian encoding to a sequence of
49  * 30-bit words in little-endian order. The final "partial" word is
60 	while (len -- > 0) {  in be8_to_le30()
69 			acc = b >> (30 - acc_len);  in be8_to_le30()
70 			acc_len -= 22;  in be8_to_le30()
77  * Convert an integer (30-bit words, little-endian) to unsigned
78  * big-endian encoding. The total encoding length is provided; all
89 	while (len -- > 0) {  in le30_to_be8()
95 			acc = w >> (8 - acc_len);  in le30_to_be8()
100 			acc_len -= 8;  in le30_to_be8()
107  * nine 30-bit words, for values up to 2^270-1. Result is encoded over
117 	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406  in mul9()
120 	 * Thus, adding together 9 products of 30-bit integers, with  in mul9()
225  * Square a 270-bit integer, represented as an array of nine 30-bit words.
296  * Base field modulus for P-256.
305  * The 'b' curve equation coefficient for P-256.
331 	d[3] -= w << 6;  in add_f256()
332 	d[6] -= w << 12;  in add_f256()
353 	 * We really compute a - b + 2*p to make sure that the result is  in sub_f256()
356 	w = a[0] - b[0] - 0x00002;  in sub_f256()
358 	w = a[1] - b[1] + ARSH(w, 30);  in sub_f256()
360 	w = a[2] - b[2] + ARSH(w, 30);  in sub_f256()
362 	w = a[3] - b[3] + ARSH(w, 30) + 0x00080;  in sub_f256()
364 	w = a[4] - b[4] + ARSH(w, 30);  in sub_f256()
366 	w = a[5] - b[5] + ARSH(w, 30);  in sub_f256()
368 	w = a[6] - b[6] + ARSH(w, 30) + 0x02000;  in sub_f256()
370 	w = a[7] - b[7] + ARSH(w, 30) - 0x08000;  in sub_f256()
372 	w = a[8] - b[8] + ARSH(w, 30) + 0x20000;  in sub_f256()
376 	d[3] -= w << 6;  in sub_f256()
377 	d[6] -= w << 12;  in sub_f256()
407 	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1  in mul_f256()
409 	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p  in mul_f256()
411 	 * For a word x at bit offset n (n >= 256), we have:  in mul_f256()
412 	 *    x*2^n = x*2^(n-32) - x*2^(n-64)  in mul_f256()
413 	 *            - x*2^(n - 160) + x*2^(n-256) mod p  in mul_f256()
418 	 * We use 64-bit intermediate words to allow for carries to  in mul_f256()
425 	for (i = 17; i >= 9; i --) {  in mul_f256()
429 		s[i - 1] += ARSHW(y, 2);  in mul_f256()
430 		s[i - 2] += (y << 28) & 0x3FFFFFFF;  in mul_f256()
431 		s[i - 2] -= ARSHW(y, 4);  in mul_f256()
432 		s[i - 3] -= (y << 26) & 0x3FFFFFFF;  in mul_f256()
433 		s[i - 5] -= ARSHW(y, 10);  in mul_f256()
434 		s[i - 6] -= (y << 20) & 0x3FFFFFFF;  in mul_f256()
435 		s[i - 8] += ARSHW(y, 16);  in mul_f256()
436 		s[i - 9] += (y << 14) & 0x3FFFFFFF;  in mul_f256()
460 	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The  in mul_f256()
462 	 * able to switch to 32-bit operations.  in mul_f256()
468 	 * One extra round of reduction, for cc*2^256, which means  in mul_f256()
469 	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)  in mul_f256()
479 	d[3] -= z << 6;  in mul_f256()
480 	d[6] -= (z << 12) & 0x3FFFFFFF;  in mul_f256()
481 	d[7] -= ARSH(z, 18);  in mul_f256()
485 	d[0] -= c;  in mul_f256()
488 	d[7] -= c << 14;  in mul_f256()
519 	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1  in square_f256()
521 	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p  in square_f256()
523 	 * For a word x at bit offset n (n >= 256), we have:  in square_f256()
524 	 *    x*2^n = x*2^(n-32) - x*2^(n-64)  in square_f256()
525 	 *            - x*2^(n - 160) + x*2^(n-256) mod p  in square_f256()
530 	 * We use 64-bit intermediate words to allow for carries to  in square_f256()
537 	for (i = 17; i >= 9; i --) {  in square_f256()
541 		s[i - 1] += ARSHW(y, 2);  in square_f256()
542 		s[i - 2] += (y << 28) & 0x3FFFFFFF;  in square_f256()
543 		s[i - 2] -= ARSHW(y, 4);  in square_f256()
544 		s[i - 3] -= (y << 26) & 0x3FFFFFFF;  in square_f256()
545 		s[i - 5] -= ARSHW(y, 10);  in square_f256()
546 		s[i - 6] -= (y << 20) & 0x3FFFFFFF;  in square_f256()
547 		s[i - 8] += ARSHW(y, 16);  in square_f256()
548 		s[i - 9] += (y << 14) & 0x3FFFFFFF;  in square_f256()
572 	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The  in square_f256()
574 	 * able to switch to 32-bit operations.  in square_f256()
580 	 * One extra round of reduction, for cc*2^256, which means  in square_f256()
581 	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)  in square_f256()
591 	d[3] -= z << 6;  in square_f256()
592 	d[6] -= (z << 12) & 0x3FFFFFFF;  in square_f256()
593 	d[7] -= ARSH(z, 18);  in square_f256()
597 	d[0] -= c;  in square_f256()
600 	d[7] -= c << 14;  in square_f256()
612  * Perform a "final reduction" in field F256 (field for curve P-256).
629 		w = d[i] - F256[i] - cc;  in reduce_final_f256()
639  * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
646  * Coordinates are represented in arrays of 32-bit integers, each holding
658  *  - If the point is the point at infinity, then all three coordinates
660  *  - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
665 p256_to_affine(p256_jacobian *P)  in p256_to_affine()  argument
672 	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is  in p256_to_affine()
673 	 * p-2. Exponent bit pattern (from high to low) is:  in p256_to_affine()
674 	 *  - 32 bits of value 1  in p256_to_affine()
675 	 *  - 31 bits of value 0  in p256_to_affine()
676 	 *  - 1 bit of value 1  in p256_to_affine()
677 	 *  - 96 bits of value 0  in p256_to_affine()
678 	 *  - 94 bits of value 1  in p256_to_affine()
679 	 *  - 1 bit of value 0  in p256_to_affine()
680 	 *  - 1 bit of value 1  in p256_to_affine()
681 	 * Thus, we precompute z^(2^31-1) to speed things up.  in p256_to_affine()
689 	 * A simple square-and-multiply for z^(2^31-1). We could save about  in p256_to_affine()
693 	memcpy(t1, P->z, sizeof P->z);  in p256_to_affine()
696 		mul_f256(t1, t1, P->z);  in p256_to_affine()
700 	 * Square-and-multiply. Apart from the squarings, we have a few  in p256_to_affine()
704 	memcpy(t2, P->z, sizeof P->z);  in p256_to_affine()
705 	for (i = 1; i < 256; i ++) {  in p256_to_affine()
717 			mul_f256(t2, t2, P->z);  in p256_to_affine()
726 	mul_f256(P->x, t1, P->x);  in p256_to_affine()
728 	mul_f256(P->y, t1, P->y);  in p256_to_affine()
729 	reduce_final_f256(P->x);  in p256_to_affine()
730 	reduce_final_f256(P->y);  in p256_to_affine()
736 	mul_f256(P->z, P->z, t2);  in p256_to_affine()
737 	reduce_final_f256(P->z);  in p256_to_affine()
741  * Double a point in P-256. This function works for all valid points,
751 	 *   m = 3*(x + z^2)*(x - z^2)  in p256_double()
752 	 *   x' = m^2 - 2*s  in p256_double()
753 	 *   y' = m*(s - x') - 8*y^4  in p256_double()
758 	 *   - If y = 0 then z' = 0. But there is no such point in P-256  in p256_double()
760 	 *   - If z = 0 then z' = 0.  in p256_double()
767 	square_f256(t1, Q->z);  in p256_double()
770 	 * Compute x-z^2 in t2 and x+z^2 in t1.  in p256_double()
772 	add_f256(t2, Q->x, t1);  in p256_double()
773 	sub_f256(t1, Q->x, t1);  in p256_double()
776 	 * Compute 3*(x+z^2)*(x-z^2) in t1.  in p256_double()
785 	square_f256(t3, Q->y);  in p256_double()
787 	mul_f256(t2, Q->x, t3);  in p256_double()
791 	 * Compute x' = m^2 - 2*s.  in p256_double()
793 	square_f256(Q->x, t1);  in p256_double()
794 	sub_f256(Q->x, Q->x, t2);  in p256_double()
795 	sub_f256(Q->x, Q->x, t2);  in p256_double()
800 	mul_f256(t4, Q->y, Q->z);  in p256_double()
801 	add_f256(Q->z, t4, t4);  in p256_double()
804 	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have  in p256_double()
807 	sub_f256(t2, t2, Q->x);  in p256_double()
808 	mul_f256(Q->y, t1, t2);  in p256_double()
811 	sub_f256(Q->y, Q->y, t4);  in p256_double()
819  *   - If P1 == 0 but P2 != 0
820  *   - If P1 != 0 but P2 == 0
821  *   - If P1 == P2
827  *   - P1 and P2 have the same Y coordinate
828  *   - P1 == 0 and P2 == 0
829  *   - The Y coordinate of one of the points is 0 and the other point is
834  * curve P-256.
839  *   - If the result is not the point at infinity, then it is correct.
840  *   - Otherwise, if the returned value is 1, then this is a case of
842  *   - Otherwise, P1 == P2, so a "double" operation should have been
855 	 *   h = u2 - u1  in p256_add()
856 	 *   r = s2 - s1  in p256_add()
857 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add()
858 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add()
868 	square_f256(t3, P2->z);  in p256_add()
869 	mul_f256(t1, P1->x, t3);  in p256_add()
870 	mul_f256(t4, P2->z, t3);  in p256_add()
871 	mul_f256(t3, P1->y, t4);  in p256_add()
876 	square_f256(t4, P1->z);  in p256_add()
877 	mul_f256(t2, P2->x, t4);  in p256_add()
878 	mul_f256(t5, P1->z, t4);  in p256_add()
879 	mul_f256(t4, P2->y, t5);  in p256_add()
882 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add()
893 	ret = (ret | -ret) >> 31;  in p256_add()
903 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add()
905 	square_f256(P1->x, t4);  in p256_add()
906 	sub_f256(P1->x, P1->x, t5);  in p256_add()
907 	sub_f256(P1->x, P1->x, t6);  in p256_add()
908 	sub_f256(P1->x, P1->x, t6);  in p256_add()
911 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add()
913 	sub_f256(t6, t6, P1->x);  in p256_add()
914 	mul_f256(P1->y, t4, t6);  in p256_add()
916 	sub_f256(P1->y, P1->y, t1);  in p256_add()
921 	mul_f256(t1, P1->z, P2->z);  in p256_add()
922 	mul_f256(P1->z, t1, t2);  in p256_add()
929  * case when P2 is a non-zero point in affine coordinate.
933  *   - If P1 == 0
934  *   - If P1 == P2
940  *   - P1 and P2 have the same Y coordinate
941  *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
945  * curve P-256.
950  *   - If the result is not the point at infinity, then it is correct.
951  *   - Otherwise, if the returned value is 1, then this is a case of
953  *   - Otherwise, P1 == P2, so a "double" operation should have been
966 	 *   h = u2 - u1  in p256_add_mixed()
967 	 *   r = s2 - s1  in p256_add_mixed()
968 	 *   x3 = r^2 - h^3 - 2 * u1 * h^2  in p256_add_mixed()
969 	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3  in p256_add_mixed()
979 	memcpy(t1, P1->x, sizeof t1);  in p256_add_mixed()
980 	memcpy(t3, P1->y, sizeof t3);  in p256_add_mixed()
985 	square_f256(t4, P1->z);  in p256_add_mixed()
986 	mul_f256(t2, P2->x, t4);  in p256_add_mixed()
987 	mul_f256(t5, P1->z, t4);  in p256_add_mixed()
988 	mul_f256(t4, P2->y, t5);  in p256_add_mixed()
991 	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).  in p256_add_mixed()
1002 	ret = (ret | -ret) >> 31;  in p256_add_mixed()
1012 	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.  in p256_add_mixed()
1014 	square_f256(P1->x, t4);  in p256_add_mixed()
1015 	sub_f256(P1->x, P1->x, t5);  in p256_add_mixed()
1016 	sub_f256(P1->x, P1->x, t6);  in p256_add_mixed()
1017 	sub_f256(P1->x, P1->x, t6);  in p256_add_mixed()
1020 	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.  in p256_add_mixed()
1022 	sub_f256(t6, t6, P1->x);  in p256_add_mixed()
1023 	mul_f256(P1->y, t4, t6);  in p256_add_mixed()
1025 	sub_f256(P1->y, P1->y, t1);  in p256_add_mixed()
1030 	mul_f256(P1->z, P1->z, t2);  in p256_add_mixed()
1036  * Decode a P-256 point. This function does not support the point at
1040 p256_decode(p256_jacobian *P, const void *src, size_t len)  in p256_decode()  argument
1088 	memcpy(P->x, tx, sizeof tx);  in p256_decode()
1089 	memcpy(P->y, ty, sizeof ty);  in p256_decode()
1090 	memset(P->z, 0, sizeof P->z);  in p256_decode()
1091 	P->z[0] = 1;  in p256_decode()
1100 p256_encode(void *dst, const p256_jacobian *P)  in p256_encode()  argument
1106 	le30_to_be8(buf + 1, 32, P->x);  in p256_encode()
1107 	le30_to_be8(buf + 33, 32, P->y);  in p256_encode()
1116 p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)  in p256_mul()  argument
1122 	 * We use a 2-bit window to handle multiplier bits by pairs.  in p256_mul()
1131 	P2 = *P;  in p256_mul()
1133 	P3 = *P;  in p256_mul()
1141 	while (xlen -- > 0) {  in p256_mul()
1144 		for (k = 6; k >= 0; k -= 2) {  in p256_mul()
1150 			T = *P;  in p256_mul()
1163 	*P = Q;  in p256_mul()
1169  * the point are encoded as 9 words of 30 bits each (little-endian
1266  * Lookup one of the Gwin[] values, by index. This is constant-time.
1279 		m = -EQ(idx, k + 1);  in lookup_Gwin()
1284 	memcpy(T->x, &xy[0], sizeof T->x);  in lookup_Gwin()
1285 	memcpy(T->y, &xy[9], sizeof T->y);  in lookup_Gwin()
1286 	memset(T->z, 0, sizeof T->z);  in lookup_Gwin()
1287 	T->z[0] = 1;  in lookup_Gwin()
1291  * Multiply the generator by an integer. The integer is assumed non-zero
1295 p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)  in p256_mulgen()  argument
1301 	 * We use a 4-bit window to handle multiplier bits by groups  in p256_mulgen()
1303 	 * points in affine coordinates; we use a constant-time lookup.  in p256_mulgen()
1310 	while (xlen -- > 0) {  in p256_mulgen()
1335 	*P = Q;  in p256_mulgen()
1384 	p256_jacobian P;  in api_mul()  local
1390 	r = p256_decode(&P, G, Glen);  in api_mul()
1391 	p256_mul(&P, x, xlen);  in api_mul()
1392 	p256_to_affine(&P);  in api_mul()
1393 	p256_encode(G, &P);  in api_mul()
1401 	p256_jacobian P;  in api_mulgen()  local
1404 	p256_mulgen(&P, x, xlen);  in api_mulgen()
1405 	p256_to_affine(&P);  in api_mulgen()
1406 	p256_encode(R, &P);  in api_mulgen()
1415 	p256_jacobian P, Q;  in api_muladd()  local
1423 	r = p256_decode(&P, A, len);  in api_muladd()
1424 	p256_mul(&P, x, xlen);  in api_muladd()
1435 	t = p256_add(&P, &Q);  in api_muladd()
1436 	reduce_final_f256(P.z);  in api_muladd()
1439 		z |= P.z[i];  in api_muladd()
1445 	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we  in api_muladd()
1448 	 *   z = 0, t = 0   return P (normal addition)  in api_muladd()
1449 	 *   z = 0, t = 1   return P (normal addition)  in api_muladd()
1451 	 *   z = 1, t = 1   report an error (P+Q = 0)  in api_muladd()
1453 	CCOPY(z & ~t, &P, &Q, sizeof Q);  in api_muladd()
1454 	p256_to_affine(&P);  in api_muladd()
1455 	p256_encode(A, &P);  in api_muladd()