Lines Matching +full:40 +full:- +full:bit
29 * that right-shifting a signed negative integer copies the sign bit
30 * (arithmetic right-shift). This is "implementation-defined behaviour",
39 | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
45 * Convert an integer from unsigned big-endian encoding to a sequence of
46 * 13-bit words in little-endian order. The final "partial" word is
57 while (len -- > 0) { in be8_to_le13()
63 acc_len -= 13; in be8_to_le13()
70 * Convert an integer (13-bit words, little-endian) to unsigned
71 * big-endian encoding. The total encoding length is provided; all
82 while (len -- > 0) { in le13_to_be8()
89 acc_len -= 8; in le13_to_be8()
116 * mul20() multiplies two 260-bit integers together. Each word must fit
118 * receives 40 words. All overlaps allowed.
120 * square20() computes the square of a 260-bit integer. Each word must
122 * receives 40 words. All overlaps allowed.
131 * Two-level Karatsuba: turns a 20x20 multiplication into in mul20()
132 * nine 5x5 multiplications. We use 13-bit words but do not in mul20()
135 * - First Karatsuba decomposition turns the 20x20 mul on in mul20()
136 * 13-bit words into three 10x10 muls, two on 13-bit words in mul20()
137 * and one on 14-bit words. in mul20()
139 * - Second Karatsuba decomposition further splits these into: in mul20()
141 * * four 5x5 muls on 13-bit words in mul20()
142 * * four 5x5 muls on 14-bit words in mul20()
143 * * one 5x5 mul on 15-bit words in mul20()
145 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit in mul20()
146 * or 15-bit words, respectively. in mul20()
174 (dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \ in mul20()
176 (dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \ in mul20()
178 (dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \ in mul20()
180 (dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \ in mul20()
182 (dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \ in mul20()
223 * each, so we can add product results together "as is" in 32-bit in mul20()
226 for (i = 0; i < 40; i += 5) { in mul20()
267 w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]); in mul20()
268 w[80 + 1] = MUL15(u[40 + 0], v[40 + 1]) in mul20()
269 + MUL15(u[40 + 1], v[40 + 0]); in mul20()
270 w[80 + 2] = MUL15(u[40 + 0], v[40 + 2]) in mul20()
271 + MUL15(u[40 + 1], v[40 + 1]) in mul20()
272 + MUL15(u[40 + 2], v[40 + 0]); in mul20()
273 w[80 + 3] = MUL15(u[40 + 0], v[40 + 3]) in mul20()
274 + MUL15(u[40 + 1], v[40 + 2]) in mul20()
275 + MUL15(u[40 + 2], v[40 + 1]) in mul20()
276 + MUL15(u[40 + 3], v[40 + 0]); in mul20()
277 w[80 + 4] = MUL15(u[40 + 0], v[40 + 4]) in mul20()
278 + MUL15(u[40 + 1], v[40 + 3]) in mul20()
279 + MUL15(u[40 + 2], v[40 + 2]) in mul20()
280 + MUL15(u[40 + 3], v[40 + 1]); in mul20()
281 /* + MUL15(u[40 + 4], v[40 + 0]) */ in mul20()
282 w[80 + 5] = MUL15(u[40 + 1], v[40 + 4]) in mul20()
283 + MUL15(u[40 + 2], v[40 + 3]) in mul20()
284 + MUL15(u[40 + 3], v[40 + 2]) in mul20()
285 + MUL15(u[40 + 4], v[40 + 1]); in mul20()
286 w[80 + 6] = MUL15(u[40 + 2], v[40 + 4]) in mul20()
287 + MUL15(u[40 + 3], v[40 + 3]) in mul20()
288 + MUL15(u[40 + 4], v[40 + 2]); in mul20()
289 w[80 + 7] = MUL15(u[40 + 3], v[40 + 4]) in mul20()
290 + MUL15(u[40 + 4], v[40 + 3]); in mul20()
291 w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]); in mul20()
295 w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]); in mul20()
298 * The products on 14-bit words in slots 6 and 7 yield values in mul20()
301 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit. in mul20()
303 * bit of reduction here. in mul20()
330 /* first-level recomposition */ in mul20()
343 cc = norm13(d, w, 40); in mul20()
990 * Modulus for field F256 (field for point coordinates in curve P-256).
999 * The 'b' curve equation coefficient for P-256.
1008 * Perform a "short reduction" in field F256 (field for curve P-256).
1020 d[14] -= x << 10; in reduce_f256()
1021 d[7] -= x << 5; in reduce_f256()
1027 * Perform a "final reduction" in field F256 (field for curve P-256).
1045 w = t[i] - F256[i] - cc; in reduce_final_f256()
1056 * 2^256-2^224+2^192+2^96-1 (for NIST curve P-256). Operands are arrays
1057 * of 20 words, each containing 13 bits of data, in little-endian order.
1058 * On input, upper word may be up to 13 bits (hence value up to 2^260-1);
1064 uint32_t t[40], cc; in mul_f256()
1078 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in mul_f256()
1080 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in mul_f256()
1082 * For a word x at bit offset n (n >= 256), we have: in mul_f256()
1083 * x*2^n = x*2^(n-32) - x*2^(n-64) in mul_f256()
1084 * - x*2^(n - 160) + x*2^(n-256) mod p in mul_f256()
1089 for (i = 39; i >= 20; i --) { in mul_f256()
1093 t[i - 2] += ARSH(x, 6); in mul_f256()
1094 t[i - 3] += (x << 7) & 0x1FFF; in mul_f256()
1095 t[i - 4] -= ARSH(x, 12); in mul_f256()
1096 t[i - 5] -= (x << 1) & 0x1FFF; in mul_f256()
1097 t[i - 12] -= ARSH(x, 4); in mul_f256()
1098 t[i - 13] -= (x << 9) & 0x1FFF; in mul_f256()
1099 t[i - 19] += ARSH(x, 9); in mul_f256()
1100 t[i - 20] += (x << 4) & 0x1FFF; in mul_f256()
1106 * but not two much: worst case is the chain involving t[i - 3], in mul_f256()
1108 * starting values are 13-bit each, all words fit on 20 bits in mul_f256()
1109 * (21 to account for the sign bit). in mul_f256()
1116 * bits, and the values fit on 21 bits, values fit in 32-bit words, in mul_f256()
1122 t[14] -= cc << 10; in mul_f256()
1123 t[7] -= cc << 5; in mul_f256()
1135 t[0] -= cc; in mul_f256()
1138 t[17] -= cc << 3; in mul_f256()
1145 * Square an integer modulo 2^256-2^224+2^192+2^96-1 (for NIST curve
1146 * P-256). Operand is an array of 20 words, each containing 13 bits of
1147 * data, in little-endian order. On input, upper word may be up to 13
1148 * bits (hence value up to 2^260-1); on output, value fits on 257 bits
1154 uint32_t t[40], cc; in square_f256()
1167 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1 in square_f256()
1169 * 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p in square_f256()
1171 * For a word x at bit offset n (n >= 256), we have: in square_f256()
1172 * x*2^n = x*2^(n-32) - x*2^(n-64) in square_f256()
1173 * - x*2^(n - 160) + x*2^(n-256) mod p in square_f256()
1178 for (i = 39; i >= 20; i --) { in square_f256()
1182 t[i - 2] += ARSH(x, 6); in square_f256()
1183 t[i - 3] += (x << 7) & 0x1FFF; in square_f256()
1184 t[i - 4] -= ARSH(x, 12); in square_f256()
1185 t[i - 5] -= (x << 1) & 0x1FFF; in square_f256()
1186 t[i - 12] -= ARSH(x, 4); in square_f256()
1187 t[i - 13] -= (x << 9) & 0x1FFF; in square_f256()
1188 t[i - 19] += ARSH(x, 9); in square_f256()
1189 t[i - 20] += (x << 4) & 0x1FFF; in square_f256()
1195 * but not two much: worst case is the chain involving t[i - 3], in square_f256()
1197 * starting values are 13-bit each, all words fit on 20 bits in square_f256()
1198 * (21 to account for the sign bit). in square_f256()
1205 * bits, and the values fit on 21 bits, values fit in 32-bit words, in square_f256()
1211 t[14] -= cc << 10; in square_f256()
1212 t[7] -= cc << 5; in square_f256()
1224 t[0] -= cc; in square_f256()
1227 t[17] -= cc << 3; in square_f256()
1234 * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
1241 * Coordinates are represented in arrays of 32-bit integers, each holding
1253 * - If the point is the point at infinity, then all three coordinates
1255 * - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
1267 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is in p256_to_affine()
1268 * p-2. Exponent bit pattern (from high to low) is: in p256_to_affine()
1269 * - 32 bits of value 1 in p256_to_affine()
1270 * - 31 bits of value 0 in p256_to_affine()
1271 * - 1 bit of value 1 in p256_to_affine()
1272 * - 96 bits of value 0 in p256_to_affine()
1273 * - 94 bits of value 1 in p256_to_affine()
1274 * - 1 bit of value 0 in p256_to_affine()
1275 * - 1 bit of value 1 in p256_to_affine()
1276 * Thus, we precompute z^(2^31-1) to speed things up. in p256_to_affine()
1284 * A simple square-and-multiply for z^(2^31-1). We could save about in p256_to_affine()
1286 * this would require a bit more code, and extra stack buffers. in p256_to_affine()
1288 memcpy(t1, P->z, sizeof P->z); in p256_to_affine()
1291 mul_f256(t1, t1, P->z); in p256_to_affine()
1295 * Square-and-multiply. Apart from the squarings, we have a few in p256_to_affine()
1297 * for setting 1 bit, and by t1 for setting 31 bits. in p256_to_affine()
1299 memcpy(t2, P->z, sizeof P->z); in p256_to_affine()
1312 mul_f256(t2, t2, P->z); in p256_to_affine()
1321 mul_f256(P->x, t1, P->x); in p256_to_affine()
1323 mul_f256(P->y, t1, P->y); in p256_to_affine()
1324 reduce_final_f256(P->x); in p256_to_affine()
1325 reduce_final_f256(P->y); in p256_to_affine()
1331 mul_f256(P->z, P->z, t2); in p256_to_affine()
1332 reduce_final_f256(P->z); in p256_to_affine()
1336 * Double a point in P-256. This function works for all valid points,
1346 * m = 3*(x + z^2)*(x - z^2) in p256_double()
1347 * x' = m^2 - 2*s in p256_double()
1348 * y' = m*(s - x') - 8*y^4 in p256_double()
1353 * - If y = 0 then z' = 0. But there is no such point in P-256 in p256_double()
1355 * - If z = 0 then z' = 0. in p256_double()
1363 square_f256(t1, Q->z); in p256_double()
1366 * Compute x-z^2 in t2 and x+z^2 in t1. in p256_double()
1369 t2[i] = (F256[i] << 1) + Q->x[i] - t1[i]; in p256_double()
1370 t1[i] += Q->x[i]; in p256_double()
1376 * Compute 3*(x+z^2)*(x-z^2) in t1. in p256_double()
1387 square_f256(t3, Q->y); in p256_double()
1392 mul_f256(t2, Q->x, t3); in p256_double()
1400 * Compute x' = m^2 - 2*s. in p256_double()
1402 square_f256(Q->x, t1); in p256_double()
1404 Q->x[i] += (F256[i] << 2) - (t2[i] << 1); in p256_double()
1406 norm13(Q->x, Q->x, 20); in p256_double()
1407 reduce_f256(Q->x); in p256_double()
1412 mul_f256(t4, Q->y, Q->z); in p256_double()
1414 Q->z[i] = t4[i] << 1; in p256_double()
1416 norm13(Q->z, Q->z, 20); in p256_double()
1417 reduce_f256(Q->z); in p256_double()
1420 * Compute y' = m*(s - x') - 8*y^4. Note that we already have in p256_double()
1424 t2[i] += (F256[i] << 1) - Q->x[i]; in p256_double()
1427 mul_f256(Q->y, t1, t2); in p256_double()
1430 Q->y[i] += (F256[i] << 2) - (t4[i] << 1); in p256_double()
1432 norm13(Q->y, Q->y, 20); in p256_double()
1433 reduce_f256(Q->y); in p256_double()
1441 * - If P1 == 0 but P2 != 0
1442 * - If P1 != 0 but P2 == 0
1443 * - If P1 == P2
1449 * - P1 and P2 have the same Y coordinate
1450 * - P1 == 0 and P2 == 0
1451 * - The Y coordinate of one of the points is 0 and the other point is
1456 * curve P-256.
1461 * - If the result is not the point at infinity, then it is correct.
1462 * - Otherwise, if the returned value is 1, then this is a case of
1464 * - Otherwise, P1 == P2, so a "double" operation should have been
1477 * h = u2 - u1 in p256_add()
1478 * r = s2 - s1 in p256_add()
1479 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add()
1480 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add()
1490 square_f256(t3, P2->z); in p256_add()
1491 mul_f256(t1, P1->x, t3); in p256_add()
1492 mul_f256(t4, P2->z, t3); in p256_add()
1493 mul_f256(t3, P1->y, t4); in p256_add()
1498 square_f256(t4, P1->z); in p256_add()
1499 mul_f256(t2, P2->x, t4); in p256_add()
1500 mul_f256(t5, P1->z, t4); in p256_add()
1501 mul_f256(t4, P2->y, t5); in p256_add()
1504 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add()
1509 t2[i] += (F256[i] << 1) - t1[i]; in p256_add()
1510 t4[i] += (F256[i] << 1) - t3[i]; in p256_add()
1520 ret = (ret | -ret) >> 31; in p256_add()
1530 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add()
1532 square_f256(P1->x, t4); in p256_add()
1534 P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); in p256_add()
1536 norm13(P1->x, P1->x, 20); in p256_add()
1537 reduce_f256(P1->x); in p256_add()
1540 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add()
1543 t6[i] += (F256[i] << 1) - P1->x[i]; in p256_add()
1546 mul_f256(P1->y, t4, t6); in p256_add()
1549 P1->y[i] += (F256[i] << 1) - t1[i]; in p256_add()
1551 norm13(P1->y, P1->y, 20); in p256_add()
1552 reduce_f256(P1->y); in p256_add()
1557 mul_f256(t1, P1->z, P2->z); in p256_add()
1558 mul_f256(P1->z, t1, t2); in p256_add()
1565 * case when P2 is a non-zero point in affine coordinate.
1569 * - If P1 == 0
1570 * - If P1 == P2
1576 * - P1 and P2 have the same Y coordinate
1577 * - The Y coordinate of P2 is 0 and P1 is the point at infinity.
1581 * curve P-256.
1586 * - If the result is not the point at infinity, then it is correct.
1587 * - Otherwise, if the returned value is 1, then this is a case of
1589 * - Otherwise, P1 == P2, so a "double" operation should have been
1602 * h = u2 - u1 in p256_add_mixed()
1603 * r = s2 - s1 in p256_add_mixed()
1604 * x3 = r^2 - h^3 - 2 * u1 * h^2 in p256_add_mixed()
1605 * y3 = r * (u1 * h^2 - x3) - s1 * h^3 in p256_add_mixed()
1615 memcpy(t1, P1->x, sizeof t1); in p256_add_mixed()
1616 memcpy(t3, P1->y, sizeof t3); in p256_add_mixed()
1621 square_f256(t4, P1->z); in p256_add_mixed()
1622 mul_f256(t2, P2->x, t4); in p256_add_mixed()
1623 mul_f256(t5, P1->z, t4); in p256_add_mixed()
1624 mul_f256(t4, P2->y, t5); in p256_add_mixed()
1627 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4). in p256_add_mixed()
1632 t2[i] += (F256[i] << 1) - t1[i]; in p256_add_mixed()
1633 t4[i] += (F256[i] << 1) - t3[i]; in p256_add_mixed()
1643 ret = (ret | -ret) >> 31; in p256_add_mixed()
1653 * Compute x3 = r^2 - h^3 - 2*u1*h^2. in p256_add_mixed()
1655 square_f256(P1->x, t4); in p256_add_mixed()
1657 P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1); in p256_add_mixed()
1659 norm13(P1->x, P1->x, 20); in p256_add_mixed()
1660 reduce_f256(P1->x); in p256_add_mixed()
1663 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3. in p256_add_mixed()
1666 t6[i] += (F256[i] << 1) - P1->x[i]; in p256_add_mixed()
1669 mul_f256(P1->y, t4, t6); in p256_add_mixed()
1672 P1->y[i] += (F256[i] << 1) - t1[i]; in p256_add_mixed()
1674 norm13(P1->y, P1->y, 20); in p256_add_mixed()
1675 reduce_f256(P1->y); in p256_add_mixed()
1680 mul_f256(P1->z, P1->z, t2); in p256_add_mixed()
1686 * Decode a P-256 point. This function does not support the point at
1705 * least significant bit of the Y coordinate), but it is explicitly in p256_decode()
1726 t1[i] += (F256[i] << 3) - MUL15(3, tx[i]) + P256_B[i] - t2[i]; in p256_decode()
1738 memcpy(P->x, tx, sizeof tx); in p256_decode()
1739 memcpy(P->y, ty, sizeof ty); in p256_decode()
1740 memset(P->z, 0, sizeof P->z); in p256_decode()
1741 P->z[0] = 1; in p256_decode()
1756 le13_to_be8(buf + 1, 32, P->x); in p256_encode()
1757 le13_to_be8(buf + 33, 32, P->y); in p256_encode()
1772 * We use a 2-bit window to handle multiplier bits by pairs. in p256_mul()
1791 while (xlen -- > 0) { in p256_mul()
1794 for (k = 6; k >= 0; k -= 2) { in p256_mul()
1819 * the point are encoded as 20 words of 13 bits each (little-endian
1820 * order); 13-bit words are then grouped 2-by-2 into 32-bit words
1821 * (little-endian order within each word).
1917 * Lookup one of the Gwin[] values, by index. This is constant-time.
1930 m = -EQ(idx, k + 1); in lookup_Gwin()
1936 T->x[(u << 1) + 0] = xy[u] & 0xFFFF; in lookup_Gwin()
1937 T->x[(u << 1) + 1] = xy[u] >> 16; in lookup_Gwin()
1938 T->y[(u << 1) + 0] = xy[u + 10] & 0xFFFF; in lookup_Gwin()
1939 T->y[(u << 1) + 1] = xy[u + 10] >> 16; in lookup_Gwin()
1941 memset(T->z, 0, sizeof T->z); in lookup_Gwin()
1942 T->z[0] = 1; in lookup_Gwin()
1946 * Multiply the generator by an integer. The integer is assumed non-zero
1956 * We use a 4-bit window to handle multiplier bits by groups in p256_mulgen()
1958 * points in affine coordinates; we use a constant-time lookup. in p256_mulgen()
1965 while (xlen -- > 0) { in p256_mulgen()