1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * This is a "reference" implementation of Poly1305 that uses the 29 * generic "i15" code for big integers. It is slow, but it handles all 30 * big-integer operations with generic code, thereby avoiding most 31 * tricky situations with carry propagation and modular reduction. 32 */ 33 34 /* 35 * Modulus: 2^130-5. 36 */ 37 static const uint16_t P1305[] = { 38 0x008A, 39 0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF 40 }; 41 42 /* 43 * -p mod 2^15. 44 */ 45 #define P0I 0x4CCD 46 47 /* 48 * R^2 mod p, for conversion to Montgomery representation (R = 2^135, 49 * since we use 9 words of 15 bits each, and 15*9 = 135). 50 */ 51 static const uint16_t R2[] = { 52 0x008A, 53 0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 54 }; 55 56 /* 57 * Perform the inner processing of blocks for Poly1305. The "r" array 58 * is in Montgomery representation, while the "a" array is not. 59 */ 60 static void 61 poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len) 62 { 63 const unsigned char *buf; 64 65 buf = data; 66 while (len > 0) { 67 unsigned char tmp[16], rev[16]; 68 uint16_t b[10]; 69 uint32_t ctl; 70 int i; 71 72 /* 73 * If there is a partial block, right-pad it with zeros. 74 */ 75 if (len < 16) { 76 memset(tmp, 0, sizeof tmp); 77 memcpy(tmp, buf, len); 78 buf = tmp; 79 len = 16; 80 } 81 82 /* 83 * Decode next block and apply the "high bit". Since 84 * decoding is little-endian, we must byte-swap the buffer. 85 */ 86 for (i = 0; i < 16; i ++) { 87 rev[i] = buf[15 - i]; 88 } 89 br_i15_decode_mod(b, rev, sizeof rev, P1305); 90 b[9] |= 0x0100; 91 92 /* 93 * Add the accumulator to the decoded block (modular 94 * addition). 95 */ 96 ctl = br_i15_add(b, a, 1); 97 ctl |= NOT(br_i15_sub(b, P1305, 0)); 98 br_i15_sub(b, P1305, ctl); 99 100 /* 101 * Multiply by r, result is the new accumulator value. 102 */ 103 br_i15_montymul(a, b, r, P1305, P0I); 104 105 buf += 16; 106 len -= 16; 107 } 108 } 109 110 /* 111 * Byteswap a 16-byte value. 112 */ 113 static void 114 byteswap16(unsigned char *buf) 115 { 116 int i; 117 118 for (i = 0; i < 8; i ++) { 119 unsigned x; 120 121 x = buf[i]; 122 buf[i] = buf[15 - i]; 123 buf[15 - i] = x; 124 } 125 } 126 127 /* see bearssl_block.h */ 128 void 129 br_poly1305_i15_run(const void *key, const void *iv, 130 void *data, size_t len, const void *aad, size_t aad_len, 131 void *tag, br_chacha20_run ichacha, int encrypt) 132 { 133 unsigned char pkey[32], foot[16]; 134 uint16_t t[10], r[10], acc[10]; 135 136 /* 137 * Compute the MAC key. The 'r' value is the first 16 bytes of 138 * pkey[]. 139 */ 140 memset(pkey, 0, sizeof pkey); 141 ichacha(key, iv, 0, pkey, sizeof pkey); 142 143 /* 144 * If encrypting, ChaCha20 must run first, followed by Poly1305. 145 * When decrypting, the operations are reversed. 146 */ 147 if (encrypt) { 148 ichacha(key, iv, 1, data, len); 149 } 150 151 /* 152 * Run Poly1305. We must process the AAD, then ciphertext, then 153 * the footer (with the lengths). Note that the AAD and ciphertext 154 * are meant to be padded with zeros up to the next multiple of 16, 155 * and the length of the footer is 16 bytes as well. 156 */ 157 158 /* 159 * Apply the "clamping" operation on the encoded 'r' value. 160 */ 161 pkey[ 3] &= 0x0F; 162 pkey[ 7] &= 0x0F; 163 pkey[11] &= 0x0F; 164 pkey[15] &= 0x0F; 165 pkey[ 4] &= 0xFC; 166 pkey[ 8] &= 0xFC; 167 pkey[12] &= 0xFC; 168 169 /* 170 * Decode the clamped 'r' value. Decoding should use little-endian 171 * so we must byteswap the value first. 172 */ 173 byteswap16(pkey); 174 br_i15_decode_mod(t, pkey, 16, P1305); 175 176 /* 177 * Convert 'r' to Montgomery representation. 178 */ 179 br_i15_montymul(r, t, R2, P1305, P0I); 180 181 /* 182 * Accumulator is 0. 183 */ 184 br_i15_zero(acc, 0x8A); 185 186 /* 187 * Process the additional authenticated data, ciphertext, and 188 * footer in due order. 189 */ 190 br_enc64le(foot, (uint64_t)aad_len); 191 br_enc64le(foot + 8, (uint64_t)len); 192 poly1305_inner(acc, r, aad, aad_len); 193 poly1305_inner(acc, r, data, len); 194 poly1305_inner(acc, r, foot, sizeof foot); 195 196 /* 197 * Decode the value 's'. Again, a byteswap is needed. 198 */ 199 byteswap16(pkey + 16); 200 br_i15_decode_mod(t, pkey + 16, 16, P1305); 201 202 /* 203 * Add the value 's' to the accumulator. That addition is done 204 * modulo 2^128, so we just ignore the carry. 205 */ 206 br_i15_add(acc, t, 1); 207 208 /* 209 * Encode the result (128 low bits) to the tag. Encoding should 210 * be little-endian. 211 */ 212 br_i15_encode(tag, 16, acc); 213 byteswap16(tag); 214 215 /* 216 * If decrypting, then ChaCha20 runs _after_ Poly1305. 217 */ 218 if (!encrypt) { 219 ichacha(key, iv, 1, data, len); 220 } 221 } 222