Lines Matching +full:- +full:10 +full:v

34 	 * Implementation notes: we split the 130-bit values into ten  in poly1305_inner()
35 * 13-bit words. This gives us some space for carries and allows in poly1305_inner()
36 * using only 32x32->32 multiplications, which are way faster than in poly1305_inner()
37 * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also in poly1305_inner()
38 * help in making constant-time code on the Cortex-M3. in poly1305_inner()
40 * Since we compute modulo 2^130-5, the "upper words" become in poly1305_inner()
45 * In each loop iteration, a[] and r[] words are 13-bit each, in poly1305_inner()
53 uint32_t b[10]; in poly1305_inner()
54 unsigned u, v; in poly1305_inner() local
58 * If there is a partial block, right-pad it with zeros. in poly1305_inner()
71 v = br_dec16le(buf); in poly1305_inner()
72 a[0] += v & 0x01FFF; in poly1305_inner()
73 v >>= 13; in poly1305_inner()
74 v |= buf[2] << 3; in poly1305_inner()
75 v |= buf[3] << 11; in poly1305_inner()
76 a[1] += v & 0x01FFF; in poly1305_inner()
77 v >>= 13; in poly1305_inner()
78 v |= buf[4] << 6; in poly1305_inner()
79 a[2] += v & 0x01FFF; in poly1305_inner()
80 v >>= 13; in poly1305_inner()
81 v |= buf[5] << 1; in poly1305_inner()
82 v |= buf[6] << 9; in poly1305_inner()
83 a[3] += v & 0x01FFF; in poly1305_inner()
84 v >>= 13; in poly1305_inner()
85 v |= buf[7] << 4; in poly1305_inner()
86 v |= buf[8] << 12; in poly1305_inner()
87 a[4] += v & 0x01FFF; in poly1305_inner()
88 v >>= 13; in poly1305_inner()
89 v |= buf[9] << 7; in poly1305_inner()
90 a[5] += v & 0x01FFF; in poly1305_inner()
91 v >>= 13; in poly1305_inner()
92 v |= buf[10] << 2; in poly1305_inner()
93 v |= buf[11] << 10; in poly1305_inner()
94 a[6] += v & 0x01FFF; in poly1305_inner()
95 v >>= 13; in poly1305_inner()
96 v |= buf[12] << 5; in poly1305_inner()
97 a[7] += v & 0x01FFF; in poly1305_inner()
98 v = br_dec16le(buf + 13); in poly1305_inner()
99 a[8] += v & 0x01FFF; in poly1305_inner()
100 v >>= 13; in poly1305_inner()
101 v |= buf[15] << 3; in poly1305_inner()
102 a[9] += v | 0x00800; in poly1305_inner()
108 * a 32-bit word and still have some room for carries. in poly1305_inner()
116 * (they are 5 times a 13-bit word) so the full summation in poly1305_inner()
117 * may yield values up to 46 times a 27-bit word, which in poly1305_inner()
118 * does not fit on a 32-bit word. To avoid that issue, we in poly1305_inner()
123 for (u = 0; u < 10; u ++) { in poly1305_inner()
127 + MUL15(a[0], r[u + 9 - 0]) in poly1305_inner()
128 + MUL15(a[1], r[u + 9 - 1]) in poly1305_inner()
129 + MUL15(a[2], r[u + 9 - 2]) in poly1305_inner()
130 + MUL15(a[3], r[u + 9 - 3]) in poly1305_inner()
131 + MUL15(a[4], r[u + 9 - 4]); in poly1305_inner()
136 for (u = 0; u < 10; u ++) { in poly1305_inner()
140 + MUL15(a[5], r[u + 9 - 5]) in poly1305_inner()
141 + MUL15(a[6], r[u + 9 - 6]) in poly1305_inner()
142 + MUL15(a[7], r[u + 9 - 7]) in poly1305_inner()
143 + MUL15(a[8], r[u + 9 - 8]) in poly1305_inner()
144 + MUL15(a[9], r[u + 9 - 9]); in poly1305_inner()
160 len -= 16; in poly1305_inner()
171 uint32_t z, r[19], acc[10], cc, ctl; in br_poly1305_ctmul32_run()
197 * Decode the 'r' value into 13-bit words, with the "clamping" in br_poly1305_ctmul32_run()
202 r[10] = z >> 13; in br_poly1305_ctmul32_run()
217 * Extend r[] with the 5x factor pre-applied. in br_poly1305_ctmul32_run()
220 r[i] = MUL15(5, r[i + 10]); in br_poly1305_ctmul32_run()
240 * and applying the '2^130 = -5 mod p' rule. Note that the output in br_poly1305_ctmul32_run()
246 for (i = 1; i < 10; i ++) { in br_poly1305_ctmul32_run()
256 * We may still have a value in the 2^130-5..2^130-1 range, in in br_poly1305_ctmul32_run()
258 * in constant-time, between 'acc' and 'acc-p', in br_poly1305_ctmul32_run()
261 for (i = 1; i < 10; i ++) { in br_poly1305_ctmul32_run()
264 acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]); in br_poly1305_ctmul32_run()
265 for (i = 1; i < 10; i ++) { in br_poly1305_ctmul32_run()
266 acc[i] &= ~(-ctl); in br_poly1305_ctmul32_run()
270 * Convert back the accumulator to 32-bit words, and add the in br_poly1305_ctmul32_run()
276 z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18); in br_poly1305_ctmul32_run()
285 br_enc16le((unsigned char *)tag + 10, z & 0xFFFF); in br_poly1305_ctmul32_run()