Lines Matching +full:20 +full:w

37 	for (u = 0; u < 20; u ++) {
40 for (u = 0; u < 20; u ++) {
48 for (u = 0; u < 20; u ++) {
49 uint32_t w;
52 w = x[u];
56 w <<= k;
60 tmp[35 - k] |= (unsigned char)w;
61 tmp[34 - k] |= (unsigned char)(w >> 8);
62 tmp[33 - k] |= (unsigned char)(w >> 16);
63 tmp[32 - k] |= (unsigned char)(w >> 24);
140 * value is the resulting carry. The source (w) and destination (d)
144 norm13(uint32_t *d, const uint32_t *w, size_t len) in norm13() argument
153 z = w[u] + cc; in norm13()
162 * on 13 bits; source operands use 20 words, destination operand
166 * fit on 13 bits; source operand uses 20 words, destination operand
176 * Two-level Karatsuba: turns a 20x20 multiplication into in mul20()
180 * - First Karatsuba decomposition turns the 20x20 mul on in mul20()
193 uint32_t u[45], v[45], w[90]; in mul20() local
231 #define CPR1(w, cprcc) do { \ in mul20() argument
232 uint32_t cprz = (w) + cprcc; \ in mul20()
233 (w) = cprz & 0x1FFF; \ in mul20()
252 memcpy(u, a, 20 * sizeof *a); in mul20()
259 memcpy(v, b, 20 * sizeof *b); in mul20()
272 w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]); in mul20()
273 w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1]) in mul20()
275 w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2]) in mul20()
278 w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3]) in mul20()
282 w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4]) in mul20()
287 w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4]) in mul20()
291 w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4]) in mul20()
294 w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4]) in mul20()
296 w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]); in mul20()
297 w[(i << 1) + 9] = 0; in mul20()
312 w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]); in mul20()
313 w[80 + 1] = MUL15(u[40 + 0], v[40 + 1]) in mul20()
315 w[80 + 2] = MUL15(u[40 + 0], v[40 + 2]) in mul20()
318 w[80 + 3] = MUL15(u[40 + 0], v[40 + 3]) in mul20()
322 w[80 + 4] = MUL15(u[40 + 0], v[40 + 4]) in mul20()
327 w[80 + 5] = MUL15(u[40 + 1], v[40 + 4]) in mul20()
331 w[80 + 6] = MUL15(u[40 + 2], v[40 + 4]) in mul20()
334 w[80 + 7] = MUL15(u[40 + 3], v[40 + 4]) in mul20()
336 w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]); in mul20()
338 CPR(w, 80); in mul20()
340 w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]); in mul20()
350 CPR(w, 60); in mul20()
351 CPR(w, 70); in mul20()
358 ZSUB2F(w, 8, w, 0, w, 2); in mul20()
359 ZSUB2F(w, 9, w, 1, w, 3); in mul20()
360 ZADDT(w, 1, w, 8); in mul20()
361 ZADDT(w, 2, w, 9); in mul20()
364 ZSUB2F(w, 10, w, 4, w, 6); in mul20()
365 ZSUB2F(w, 11, w, 5, w, 7); in mul20()
366 ZADDT(w, 5, w, 10); in mul20()
367 ZADDT(w, 6, w, 11); in mul20()
370 ZSUB2F(w, 16, w, 12, w, 14); in mul20()
371 ZSUB2F(w, 17, w, 13, w, 15); in mul20()
372 ZADDT(w, 13, w, 16); in mul20()
373 ZADDT(w, 14, w, 17); in mul20()
376 ZSUB2F(w, 12, w, 0, w, 4); in mul20()
377 ZSUB2F(w, 13, w, 1, w, 5); in mul20()
378 ZSUB2F(w, 14, w, 2, w, 6); in mul20()
379 ZSUB2F(w, 15, w, 3, w, 7); in mul20()
380 ZADDT(w, 2, w, 12); in mul20()
381 ZADDT(w, 3, w, 13); in mul20()
382 ZADDT(w, 4, w, 14); in mul20()
383 ZADDT(w, 5, w, 15); in mul20()
388 cc = norm13(d, w, 40); in mul20()
621 t[20] = MUL15(a[ 1], b[19]) in mul20()
930 t[20] = MUL15(a[10], a[10]) in square20()
1046 uint32_t t[20]; in reduce_final_f255()
1052 for (i = 0; i < 20; i ++) { in reduce_final_f255()
1053 uint32_t w; in reduce_final_f255() local
1055 w = t[i] + cc; in reduce_final_f255()
1056 cc = w >> 13; in reduce_final_f255()
1057 t[i] = w & 0x1FFF; in reduce_final_f255()
1068 uint32_t t[40], cc, w; in f255_mulgen() local
1083 * Since the modulus is 2^255-19 and word 20 corresponds to in f255_mulgen()
1084 * offset 20*13 = 260, word 20+k must be added to word k with in f255_mulgen()
1092 w = t[x] + cc + MUL15(t[(x) + 20], 608); \ in f255_mulgen()
1093 t[x] = w & 0x1FFF; \ in f255_mulgen()
1094 cc = w >> 13; \ in f255_mulgen()
1120 cc = MUL15(w >> 8, 19); in f255_mulgen()
1124 w = t[x] + cc; \ in f255_mulgen()
1125 d[x] = w & 0x1FFF; \ in f255_mulgen()
1126 cc = w >> 13; \ in f255_mulgen()
1155 * Operands are arrays of 20 words, each containing 13 bits of data, in
1173 uint32_t cc, w; in f255_add() local
1176 for (i = 0; i < 20; i ++) { in f255_add()
1177 w = a[i] + b[i] + cc; in f255_add()
1178 d[i] = w & 0x1FFF; in f255_add()
1179 cc = w >> 13; in f255_add()
1181 cc = MUL15(w >> 8, 19); in f255_add()
1183 for (i = 0; i < 20; i ++) { in f255_add()
1184 w = d[i] + cc; in f255_add()
1185 d[i] = w & 0x1FFF; in f255_add()
1186 cc = w >> 13; in f255_add()
1202 uint32_t cc, w; in f255_sub() local
1205 for (i = 0; i < 20; i ++) { in f255_sub()
1206 w = a[i] - b[i] + cc; in f255_sub()
1207 d[i] = w & 0x1FFF; in f255_sub()
1208 cc = ARSH(w, 13); in f255_sub()
1210 cc = MUL15((w + 0x200) >> 8, 19); in f255_sub()
1212 for (i = 0; i < 20; i ++) { in f255_sub()
1213 w = d[i] + cc; in f255_sub()
1214 d[i] = w & 0x1FFF; in f255_sub()
1215 cc = w >> 13; in f255_sub()
1227 uint32_t cc, w; in f255_mul_a24() local
1230 for (i = 0; i < 20; i ++) { in f255_mul_a24()
1231 w = MUL15(a[i], 121665) + cc; in f255_mul_a24()
1232 d[i] = w & 0x1FFF; in f255_mul_a24()
1233 cc = w >> 13; in f255_mul_a24()
1235 cc = MUL15(w >> 8, 19); in f255_mul_a24()
1237 for (i = 0; i < 20; i ++) { in f255_mul_a24()
1238 w = d[i] + cc; in f255_mul_a24()
1239 d[i] = w & 0x1FFF; in f255_mul_a24()
1240 cc = w >> 13; in f255_mul_a24()
1288 for (i = 0; i < 20; i ++) { in cswap()
1303 uint32_t x1[20], x2[20], x3[20], z2[20], z3[20]; in api_mul()
1304 uint32_t a[20], aa[20], b[20], bb[20]; in api_mul()
1305 uint32_t c[20], d[20], e[20], da[20], cb[20]; in api_mul()