Lines Matching +full:a +full:- +full:z

5  * a copy of this software and associated documentation files (the
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 poly1305_inner(uint32_t *a, const uint32_t *r, const void *data, size_t len) in poly1305_inner() argument
34 * Implementation notes: we split the 130-bit values into ten in poly1305_inner()
35 * 13-bit words. This gives us some space for carries and allows in poly1305_inner()
36 * using only 32x32->32 multiplications, which are way faster than in poly1305_inner()
37 * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also in poly1305_inner()
38 * help in making constant-time code on the Cortex-M3. in poly1305_inner()
40 * Since we compute modulo 2^130-5, the "upper words" become in poly1305_inner()
41 * low words with a factor of 5; that is, x*2^130 = x*5 mod p. in poly1305_inner()
45 * In each loop iteration, a[] and r[] words are 13-bit each, in poly1305_inner()
46 * except a[1] which may use 14 bits. in poly1305_inner()
55 uint32_t z, cc1, cc2; in poly1305_inner() local
58 * If there is a partial block, right-pad it with zeros. in poly1305_inner()
72 a[0] += v & 0x01FFF; in poly1305_inner()
76 a[1] += v & 0x01FFF; in poly1305_inner()
79 a[2] += v & 0x01FFF; in poly1305_inner()
83 a[3] += v & 0x01FFF; in poly1305_inner()
87 a[4] += v & 0x01FFF; in poly1305_inner()
90 a[5] += v & 0x01FFF; in poly1305_inner()
94 a[6] += v & 0x01FFF; in poly1305_inner()
97 a[7] += v & 0x01FFF; in poly1305_inner()
99 a[8] += v & 0x01FFF; in poly1305_inner()
102 a[9] += v | 0x00800; in poly1305_inner()
105 * At that point, all a[] values fit on 14 bits, while in poly1305_inner()
108 * a 32-bit word and still have some room for carries. in poly1305_inner()
112 * Now a[] contains words with values up to 14 bits each. in poly1305_inner()
116 * (they are 5 times a 13-bit word) so the full summation in poly1305_inner()
117 * may yield values up to 46 times a 27-bit word, which in poly1305_inner()
118 * does not fit on a 32-bit word. To avoid that issue, we in poly1305_inner()
119 * must split the loop below in two, with a carry in poly1305_inner()
127 + MUL15(a[0], r[u + 9 - 0]) in poly1305_inner()
128 + MUL15(a[1], r[u + 9 - 1]) in poly1305_inner()
129 + MUL15(a[2], r[u + 9 - 2]) in poly1305_inner()
130 + MUL15(a[3], r[u + 9 - 3]) in poly1305_inner()
131 + MUL15(a[4], r[u + 9 - 4]); in poly1305_inner()
140 + MUL15(a[5], r[u + 9 - 5]) in poly1305_inner()
141 + MUL15(a[6], r[u + 9 - 6]) in poly1305_inner()
142 + MUL15(a[7], r[u + 9 - 7]) in poly1305_inner()
143 + MUL15(a[8], r[u + 9 - 8]) in poly1305_inner()
144 + MUL15(a[9], r[u + 9 - 9]); in poly1305_inner()
148 memcpy(a, b, sizeof b); in poly1305_inner()
151 * The two carries "loop back" with a factor of 5. We in poly1305_inner()
152 * propagate them into a[0] and a[1]. in poly1305_inner()
154 z = cc1 + cc2; in poly1305_inner()
155 z += (z << 2) + a[0]; in poly1305_inner()
156 a[0] = z & 0x1FFF; in poly1305_inner()
157 a[1] += z >> 13; in poly1305_inner()
160 len -= 16; in poly1305_inner()
171 uint32_t z, r[19], acc[10], cc, ctl; in br_poly1305_ctmul32_run() local
197 * Decode the 'r' value into 13-bit words, with the "clamping" in br_poly1305_ctmul32_run()
200 z = br_dec32le(pkey) & 0x03FFFFFF; in br_poly1305_ctmul32_run()
201 r[9] = z & 0x1FFF; in br_poly1305_ctmul32_run()
202 r[10] = z >> 13; in br_poly1305_ctmul32_run()
203 z = (br_dec32le(pkey + 3) >> 2) & 0x03FFFF03; in br_poly1305_ctmul32_run()
204 r[11] = z & 0x1FFF; in br_poly1305_ctmul32_run()
205 r[12] = z >> 13; in br_poly1305_ctmul32_run()
206 z = (br_dec32le(pkey + 6) >> 4) & 0x03FFC0FF; in br_poly1305_ctmul32_run()
207 r[13] = z & 0x1FFF; in br_poly1305_ctmul32_run()
208 r[14] = z >> 13; in br_poly1305_ctmul32_run()
209 z = (br_dec32le(pkey + 9) >> 6) & 0x03F03FFF; in br_poly1305_ctmul32_run()
210 r[15] = z & 0x1FFF; in br_poly1305_ctmul32_run()
211 r[16] = z >> 13; in br_poly1305_ctmul32_run()
212 z = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF; in br_poly1305_ctmul32_run()
213 r[17] = z & 0x1FFF; in br_poly1305_ctmul32_run()
214 r[18] = z >> 13; in br_poly1305_ctmul32_run()
217 * Extend r[] with the 5x factor pre-applied. in br_poly1305_ctmul32_run()
240 * and applying the '2^130 = -5 mod p' rule. Note that the output in br_poly1305_ctmul32_run()
242 * acc[1] may be (very slightly) above 2^13. A single loop back in br_poly1305_ctmul32_run()
247 z = acc[i] + cc; in br_poly1305_ctmul32_run()
248 acc[i] = z & 0x1FFF; in br_poly1305_ctmul32_run()
249 cc = z >> 13; in br_poly1305_ctmul32_run()
251 z = acc[0] + cc + (cc << 2); in br_poly1305_ctmul32_run()
252 acc[0] = z & 0x1FFF; in br_poly1305_ctmul32_run()
253 acc[1] += z >> 13; in br_poly1305_ctmul32_run()
256 * We may still have a value in the 2^130-5..2^130-1 range, in in br_poly1305_ctmul32_run()
258 * in constant-time, between 'acc' and 'acc-p', in br_poly1305_ctmul32_run()
264 acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]); in br_poly1305_ctmul32_run()
266 acc[i] &= ~(-ctl); in br_poly1305_ctmul32_run()
270 * Convert back the accumulator to 32-bit words, and add the in br_poly1305_ctmul32_run()
274 z = acc[0] + (acc[1] << 13) + br_dec16le(pkey + 16); in br_poly1305_ctmul32_run()
275 br_enc16le((unsigned char *)tag, z & 0xFFFF); in br_poly1305_ctmul32_run()
276 z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18); in br_poly1305_ctmul32_run()
277 br_enc16le((unsigned char *)tag + 2, z & 0xFFFF); in br_poly1305_ctmul32_run()
278 z = (z >> 16) + (acc[3] << 7) + br_dec16le(pkey + 20); in br_poly1305_ctmul32_run()
279 br_enc16le((unsigned char *)tag + 4, z & 0xFFFF); in br_poly1305_ctmul32_run()
280 z = (z >> 16) + (acc[4] << 4) + br_dec16le(pkey + 22); in br_poly1305_ctmul32_run()
281 br_enc16le((unsigned char *)tag + 6, z & 0xFFFF); in br_poly1305_ctmul32_run()
282 z = (z >> 16) + (acc[5] << 1) + (acc[6] << 14) + br_dec16le(pkey + 24); in br_poly1305_ctmul32_run()
283 br_enc16le((unsigned char *)tag + 8, z & 0xFFFF); in br_poly1305_ctmul32_run()
284 z = (z >> 16) + (acc[7] << 11) + br_dec16le(pkey + 26); in br_poly1305_ctmul32_run()
285 br_enc16le((unsigned char *)tag + 10, z & 0xFFFF); in br_poly1305_ctmul32_run()
286 z = (z >> 16) + (acc[8] << 8) + br_dec16le(pkey + 28); in br_poly1305_ctmul32_run()
287 br_enc16le((unsigned char *)tag + 12, z & 0xFFFF); in br_poly1305_ctmul32_run()
288 z = (z >> 16) + (acc[9] << 5) + br_dec16le(pkey + 30); in br_poly1305_ctmul32_run()
289 br_enc16le((unsigned char *)tag + 14, z & 0xFFFF); in br_poly1305_ctmul32_run()