1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* see bearssl_block.h */ 28 void 29 br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx, 30 const void *key, size_t len) 31 { 32 ctx->vtable = &br_aes_ct64_ctrcbc_vtable; 33 ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len); 34 } 35 36 static void 37 xorbuf(void *dst, const void *src, size_t len) 38 { 39 unsigned char *d; 40 const unsigned char *s; 41 42 d = dst; 43 s = src; 44 while (len -- > 0) { 45 *d ++ ^= *s ++; 46 } 47 } 48 49 /* see bearssl_block.h */ 50 void 51 br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx, 52 void *ctr, void *data, size_t len) 53 { 54 unsigned char *buf; 55 unsigned char *ivbuf; 56 uint32_t iv0, iv1, iv2, iv3; 57 uint64_t sk_exp[120]; 58 59 br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); 60 61 /* 62 * We keep the counter as four 32-bit values, with big-endian 63 * convention, because that's what is expected for purposes of 64 * incrementing the counter value. 65 */ 66 ivbuf = ctr; 67 iv0 = br_dec32be(ivbuf + 0); 68 iv1 = br_dec32be(ivbuf + 4); 69 iv2 = br_dec32be(ivbuf + 8); 70 iv3 = br_dec32be(ivbuf + 12); 71 72 buf = data; 73 while (len > 0) { 74 uint64_t q[8]; 75 uint32_t w[16]; 76 unsigned char tmp[64]; 77 int i, j; 78 79 /* 80 * The bitslice implementation expects values in 81 * little-endian convention, so we have to byteswap them. 82 */ 83 j = (len >= 64) ? 16 : (int)(len >> 2); 84 for (i = 0; i < j; i += 4) { 85 uint32_t carry; 86 87 w[i + 0] = br_swap32(iv0); 88 w[i + 1] = br_swap32(iv1); 89 w[i + 2] = br_swap32(iv2); 90 w[i + 3] = br_swap32(iv3); 91 iv3 ++; 92 carry = ~(iv3 | -iv3) >> 31; 93 iv2 += carry; 94 carry &= -(~(iv2 | -iv2) >> 31); 95 iv1 += carry; 96 carry &= -(~(iv1 | -iv1) >> 31); 97 iv0 += carry; 98 } 99 memset(w + i, 0, (16 - i) * sizeof(uint32_t)); 100 101 for (i = 0; i < 4; i ++) { 102 br_aes_ct64_interleave_in( 103 &q[i], &q[i + 4], w + (i << 2)); 104 } 105 br_aes_ct64_ortho(q); 106 br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); 107 br_aes_ct64_ortho(q); 108 for (i = 0; i < 4; i ++) { 109 br_aes_ct64_interleave_out( 110 w + (i << 2), q[i], q[i + 4]); 111 } 112 113 br_range_enc32le(tmp, w, 16); 114 if (len <= 64) { 115 xorbuf(buf, tmp, len); 116 break; 117 } 118 xorbuf(buf, tmp, 64); 119 buf += 64; 120 len -= 64; 121 } 122 br_enc32be(ivbuf + 0, iv0); 123 br_enc32be(ivbuf + 4, iv1); 124 br_enc32be(ivbuf + 8, iv2); 125 br_enc32be(ivbuf + 12, iv3); 126 } 127 128 /* see bearssl_block.h */ 129 void 130 br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx, 131 void *cbcmac, const void *data, size_t len) 132 { 133 const unsigned char *buf; 134 uint32_t cm0, cm1, cm2, cm3; 135 uint64_t q[8]; 136 uint64_t sk_exp[120]; 137 138 br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); 139 140 cm0 = br_dec32le((unsigned char *)cbcmac + 0); 141 cm1 = br_dec32le((unsigned char *)cbcmac + 4); 142 cm2 = br_dec32le((unsigned char *)cbcmac + 8); 143 cm3 = br_dec32le((unsigned char *)cbcmac + 12); 144 145 buf = data; 146 memset(q, 0, sizeof q); 147 while (len > 0) { 148 uint32_t w[4]; 149 150 w[0] = cm0 ^ br_dec32le(buf + 0); 151 w[1] = cm1 ^ br_dec32le(buf + 4); 152 w[2] = cm2 ^ br_dec32le(buf + 8); 153 w[3] = cm3 ^ br_dec32le(buf + 12); 154 155 br_aes_ct64_interleave_in(&q[0], &q[4], w); 156 br_aes_ct64_ortho(q); 157 br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); 158 br_aes_ct64_ortho(q); 159 br_aes_ct64_interleave_out(w, q[0], q[4]); 160 161 cm0 = w[0]; 162 cm1 = w[1]; 163 cm2 = w[2]; 164 cm3 = w[3]; 165 buf += 16; 166 len -= 16; 167 } 168 169 br_enc32le((unsigned char *)cbcmac + 0, cm0); 170 br_enc32le((unsigned char *)cbcmac + 4, cm1); 171 br_enc32le((unsigned char *)cbcmac + 8, cm2); 172 br_enc32le((unsigned char *)cbcmac + 12, cm3); 173 } 174 175 /* see bearssl_block.h */ 176 void 177 br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx, 178 void *ctr, void *cbcmac, void *data, size_t len) 179 { 180 /* 181 * When encrypting, the CBC-MAC processing must be lagging by 182 * one block, since it operates on the encrypted values, so 183 * it must wait for that encryption to complete. 184 */ 185 186 unsigned char *buf; 187 unsigned char *ivbuf; 188 uint32_t iv0, iv1, iv2, iv3; 189 uint32_t cm0, cm1, cm2, cm3; 190 uint64_t sk_exp[120]; 191 uint64_t q[8]; 192 int first_iter; 193 194 br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); 195 196 /* 197 * We keep the counter as four 32-bit values, with big-endian 198 * convention, because that's what is expected for purposes of 199 * incrementing the counter value. 200 */ 201 ivbuf = ctr; 202 iv0 = br_dec32be(ivbuf + 0); 203 iv1 = br_dec32be(ivbuf + 4); 204 iv2 = br_dec32be(ivbuf + 8); 205 iv3 = br_dec32be(ivbuf + 12); 206 207 /* 208 * The current CBC-MAC value is kept in little-endian convention. 209 */ 210 cm0 = br_dec32le((unsigned char *)cbcmac + 0); 211 cm1 = br_dec32le((unsigned char *)cbcmac + 4); 212 cm2 = br_dec32le((unsigned char *)cbcmac + 8); 213 cm3 = br_dec32le((unsigned char *)cbcmac + 12); 214 215 buf = data; 216 first_iter = 1; 217 memset(q, 0, sizeof q); 218 while (len > 0) { 219 uint32_t w[8], carry; 220 221 /* 222 * The bitslice implementation expects values in 223 * little-endian convention, so we have to byteswap them. 224 */ 225 w[0] = br_swap32(iv0); 226 w[1] = br_swap32(iv1); 227 w[2] = br_swap32(iv2); 228 w[3] = br_swap32(iv3); 229 iv3 ++; 230 carry = ~(iv3 | -iv3) >> 31; 231 iv2 += carry; 232 carry &= -(~(iv2 | -iv2) >> 31); 233 iv1 += carry; 234 carry &= -(~(iv1 | -iv1) >> 31); 235 iv0 += carry; 236 237 /* 238 * The block for CBC-MAC. 239 */ 240 w[4] = cm0; 241 w[5] = cm1; 242 w[6] = cm2; 243 w[7] = cm3; 244 245 br_aes_ct64_interleave_in(&q[0], &q[4], w); 246 br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); 247 br_aes_ct64_ortho(q); 248 br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); 249 br_aes_ct64_ortho(q); 250 br_aes_ct64_interleave_out(w, q[0], q[4]); 251 br_aes_ct64_interleave_out(w + 4, q[1], q[5]); 252 253 /* 254 * We do the XOR with the plaintext in 32-bit registers, 255 * so that the value are available for CBC-MAC processing 256 * as well. 257 */ 258 w[0] ^= br_dec32le(buf + 0); 259 w[1] ^= br_dec32le(buf + 4); 260 w[2] ^= br_dec32le(buf + 8); 261 w[3] ^= br_dec32le(buf + 12); 262 br_enc32le(buf + 0, w[0]); 263 br_enc32le(buf + 4, w[1]); 264 br_enc32le(buf + 8, w[2]); 265 br_enc32le(buf + 12, w[3]); 266 267 buf += 16; 268 len -= 16; 269 270 /* 271 * We set the cm* values to the block to encrypt in the 272 * next iteration. 273 */ 274 if (first_iter) { 275 first_iter = 0; 276 cm0 ^= w[0]; 277 cm1 ^= w[1]; 278 cm2 ^= w[2]; 279 cm3 ^= w[3]; 280 } else { 281 cm0 = w[0] ^ w[4]; 282 cm1 = w[1] ^ w[5]; 283 cm2 = w[2] ^ w[6]; 284 cm3 = w[3] ^ w[7]; 285 } 286 287 /* 288 * If this was the last iteration, then compute the 289 * extra block encryption to complete CBC-MAC. 290 */ 291 if (len == 0) { 292 w[0] = cm0; 293 w[1] = cm1; 294 w[2] = cm2; 295 w[3] = cm3; 296 br_aes_ct64_interleave_in(&q[0], &q[4], w); 297 br_aes_ct64_ortho(q); 298 br_aes_ct64_bitslice_encrypt( 299 ctx->num_rounds, sk_exp, q); 300 br_aes_ct64_ortho(q); 301 br_aes_ct64_interleave_out(w, q[0], q[4]); 302 cm0 = w[0]; 303 cm1 = w[1]; 304 cm2 = w[2]; 305 cm3 = w[3]; 306 break; 307 } 308 } 309 310 br_enc32be(ivbuf + 0, iv0); 311 br_enc32be(ivbuf + 4, iv1); 312 br_enc32be(ivbuf + 8, iv2); 313 br_enc32be(ivbuf + 12, iv3); 314 br_enc32le((unsigned char *)cbcmac + 0, cm0); 315 br_enc32le((unsigned char *)cbcmac + 4, cm1); 316 br_enc32le((unsigned char *)cbcmac + 8, cm2); 317 br_enc32le((unsigned char *)cbcmac + 12, cm3); 318 } 319 320 /* see bearssl_block.h */ 321 void 322 br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx, 323 void *ctr, void *cbcmac, void *data, size_t len) 324 { 325 unsigned char *buf; 326 unsigned char *ivbuf; 327 uint32_t iv0, iv1, iv2, iv3; 328 uint32_t cm0, cm1, cm2, cm3; 329 uint64_t sk_exp[120]; 330 uint64_t q[8]; 331 332 br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey); 333 334 /* 335 * We keep the counter as four 32-bit values, with big-endian 336 * convention, because that's what is expected for purposes of 337 * incrementing the counter value. 338 */ 339 ivbuf = ctr; 340 iv0 = br_dec32be(ivbuf + 0); 341 iv1 = br_dec32be(ivbuf + 4); 342 iv2 = br_dec32be(ivbuf + 8); 343 iv3 = br_dec32be(ivbuf + 12); 344 345 /* 346 * The current CBC-MAC value is kept in little-endian convention. 347 */ 348 cm0 = br_dec32le((unsigned char *)cbcmac + 0); 349 cm1 = br_dec32le((unsigned char *)cbcmac + 4); 350 cm2 = br_dec32le((unsigned char *)cbcmac + 8); 351 cm3 = br_dec32le((unsigned char *)cbcmac + 12); 352 353 buf = data; 354 memset(q, 0, sizeof q); 355 while (len > 0) { 356 uint32_t w[8], carry; 357 unsigned char tmp[16]; 358 359 /* 360 * The bitslice implementation expects values in 361 * little-endian convention, so we have to byteswap them. 362 */ 363 w[0] = br_swap32(iv0); 364 w[1] = br_swap32(iv1); 365 w[2] = br_swap32(iv2); 366 w[3] = br_swap32(iv3); 367 iv3 ++; 368 carry = ~(iv3 | -iv3) >> 31; 369 iv2 += carry; 370 carry &= -(~(iv2 | -iv2) >> 31); 371 iv1 += carry; 372 carry &= -(~(iv1 | -iv1) >> 31); 373 iv0 += carry; 374 375 /* 376 * The block for CBC-MAC. 377 */ 378 w[4] = cm0 ^ br_dec32le(buf + 0); 379 w[5] = cm1 ^ br_dec32le(buf + 4); 380 w[6] = cm2 ^ br_dec32le(buf + 8); 381 w[7] = cm3 ^ br_dec32le(buf + 12); 382 383 br_aes_ct64_interleave_in(&q[0], &q[4], w); 384 br_aes_ct64_interleave_in(&q[1], &q[5], w + 4); 385 br_aes_ct64_ortho(q); 386 br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q); 387 br_aes_ct64_ortho(q); 388 br_aes_ct64_interleave_out(w, q[0], q[4]); 389 br_aes_ct64_interleave_out(w + 4, q[1], q[5]); 390 391 br_enc32le(tmp + 0, w[0]); 392 br_enc32le(tmp + 4, w[1]); 393 br_enc32le(tmp + 8, w[2]); 394 br_enc32le(tmp + 12, w[3]); 395 xorbuf(buf, tmp, 16); 396 cm0 = w[4]; 397 cm1 = w[5]; 398 cm2 = w[6]; 399 cm3 = w[7]; 400 buf += 16; 401 len -= 16; 402 } 403 404 br_enc32be(ivbuf + 0, iv0); 405 br_enc32be(ivbuf + 4, iv1); 406 br_enc32be(ivbuf + 8, iv2); 407 br_enc32be(ivbuf + 12, iv3); 408 br_enc32le((unsigned char *)cbcmac + 0, cm0); 409 br_enc32le((unsigned char *)cbcmac + 4, cm1); 410 br_enc32le((unsigned char *)cbcmac + 8, cm2); 411 br_enc32le((unsigned char *)cbcmac + 12, cm3); 412 } 413 414 /* see bearssl_block.h */ 415 const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = { 416 sizeof(br_aes_ct64_ctrcbc_keys), 417 16, 418 4, 419 (void (*)(const br_block_ctrcbc_class **, const void *, size_t)) 420 &br_aes_ct64_ctrcbc_init, 421 (void (*)(const br_block_ctrcbc_class *const *, 422 void *, void *, void *, size_t)) 423 &br_aes_ct64_ctrcbc_encrypt, 424 (void (*)(const br_block_ctrcbc_class *const *, 425 void *, void *, void *, size_t)) 426 &br_aes_ct64_ctrcbc_decrypt, 427 (void (*)(const br_block_ctrcbc_class *const *, 428 void *, void *, size_t)) 429 &br_aes_ct64_ctrcbc_ctr, 430 (void (*)(const br_block_ctrcbc_class *const *, 431 void *, const void *, size_t)) 432 &br_aes_ct64_ctrcbc_mac 433 }; 434