1 /* 2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining 5 * a copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sublicense, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25 #include "inner.h" 26 27 /* 28 * Implementation Notes 29 * ==================== 30 * 31 * The combined CTR + CBC-MAC functions can only handle full blocks, 32 * so some buffering is necessary. Moreover, EAX has a special padding 33 * rule for CBC-MAC, which implies that we cannot compute the MAC over 34 * the last received full block until we know whether we are at the 35 * end of the data or not. 36 * 37 * - 'ptr' contains a value from 1 to 16, which is the number of bytes 38 * accumulated in buf[] that still needs to be processed with the 39 * current OMAC computation. Beware that this can go to 16: a 40 * complete block cannot be processed until it is known whether it 41 * is the last block or not. However, it can never be 0, because 42 * OMAC^t works on an input that is at least one-block long. 43 * 44 * - When processing the message itself, CTR encryption/decryption is 45 * also done at the same time. The first 'ptr' bytes of buf[] then 46 * contains the encrypted bytes, while the last '16 - ptr' bytes of 47 * buf[] are the remnants of the stream block, to be used against 48 * the next input bytes, when available. 49 * 50 * - The current counter and running CBC-MAC values are kept in 'ctr' 51 * and 'cbcmac', respectively. 52 * 53 * - The derived keys for padding are kept in L2 and L4 (double and 54 * quadruple of Enc_K(0^n), in GF(2^128), respectively). 55 */ 56 57 /* 58 * Start an OMAC computation; the first block is the big-endian 59 * representation of the provided value ('val' must fit on one byte). 60 * We make it a delayed block because it may also be the last one, 61 */ 62 static void 63 omac_start(br_eax_context *ctx, unsigned val) 64 { 65 memset(ctx->cbcmac, 0, sizeof ctx->cbcmac); 66 memset(ctx->buf, 0, sizeof ctx->buf); 67 ctx->buf[15] = val; 68 ctx->ptr = 16; 69 } 70 71 /* 72 * Double a value in finite field GF(2^128), defined with modulus 73 * X^128+X^7+X^2+X+1. 74 */ 75 static void 76 double_gf128(unsigned char *dst, const unsigned char *src) 77 { 78 unsigned cc; 79 int i; 80 81 cc = 0x87 & -((unsigned)src[0] >> 7); 82 for (i = 15; i >= 0; i --) { 83 unsigned z; 84 85 z = (src[i] << 1) ^ cc; 86 cc = z >> 8; 87 dst[i] = (unsigned char)z; 88 } 89 } 90 91 /* 92 * Apply padding to the last block, currently in ctx->buf (with 93 * ctx->ptr bytes), and finalize OMAC computation. 94 */ 95 static void 96 do_pad(br_eax_context *ctx) 97 { 98 unsigned char *pad; 99 size_t ptr, u; 100 101 ptr = ctx->ptr; 102 if (ptr == 16) { 103 pad = ctx->L2; 104 } else { 105 ctx->buf[ptr ++] = 0x80; 106 memset(ctx->buf + ptr, 0x00, 16 - ptr); 107 pad = ctx->L4; 108 } 109 for (u = 0; u < sizeof ctx->buf; u ++) { 110 ctx->buf[u] ^= pad[u]; 111 } 112 (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf); 113 } 114 115 /* 116 * Apply CBC-MAC on the provided data, with buffering management. 117 * 118 * Upon entry, two situations are acceptable: 119 * 120 * ctx->ptr == 0: there is no data to process in ctx->buf 121 * ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf 122 * 123 * Upon exit, ctx->ptr may be zero only if it was already zero on entry, 124 * and len == 0. In all other situations, ctx->ptr will be non-zero on 125 * exit (and may have value 16). 126 */ 127 static void 128 do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len) 129 { 130 size_t ptr; 131 132 if (len == 0) { 133 return; 134 } 135 ptr = len & (size_t)15; 136 if (ptr == 0) { 137 len -= 16; 138 ptr = 16; 139 } else { 140 len -= ptr; 141 } 142 if (ctx->ptr == 16) { 143 (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, 144 ctx->buf, sizeof ctx->buf); 145 } 146 (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len); 147 memcpy(ctx->buf, (const unsigned char *)data + len, ptr); 148 ctx->ptr = ptr; 149 } 150 151 /* see bearssl_aead.h */ 152 void 153 br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx) 154 { 155 unsigned char tmp[16], iv[16]; 156 157 ctx->vtable = &br_eax_vtable; 158 ctx->bctx = bctx; 159 160 /* 161 * Encrypt a whole-zero block to compute L2 and L4. 162 */ 163 memset(tmp, 0, sizeof tmp); 164 memset(iv, 0, sizeof iv); 165 (*bctx)->ctr(bctx, iv, tmp, sizeof tmp); 166 double_gf128(ctx->L2, tmp); 167 double_gf128(ctx->L4, ctx->L2); 168 } 169 170 /* see bearssl_aead.h */ 171 void 172 br_eax_capture(const br_eax_context *ctx, br_eax_state *st) 173 { 174 /* 175 * We capture the three OMAC* states _after_ processing the 176 * initial block (assuming that nonce, message and AAD are 177 * all non-empty). 178 */ 179 int i; 180 181 memset(st->st, 0, sizeof st->st); 182 for (i = 0; i < 3; i ++) { 183 unsigned char tmp[16]; 184 185 memset(tmp, 0, sizeof tmp); 186 tmp[15] = (unsigned char)i; 187 (*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp); 188 } 189 } 190 191 /* see bearssl_aead.h */ 192 void 193 br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len) 194 { 195 /* 196 * Process nonce with OMAC^0. 197 */ 198 omac_start(ctx, 0); 199 do_cbcmac_chunk(ctx, nonce, len); 200 do_pad(ctx); 201 memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); 202 203 /* 204 * Start OMAC^1 for the AAD ("header" in the EAX specification). 205 */ 206 omac_start(ctx, 1); 207 208 /* 209 * We use ctx->head[0] as temporary flag to mark that we are 210 * using a "normal" reset(). 211 */ 212 ctx->head[0] = 0; 213 } 214 215 /* see bearssl_aead.h */ 216 void 217 br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st, 218 const void *nonce, size_t len) 219 { 220 if (len == 0) { 221 omac_start(ctx, 0); 222 } else { 223 memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac); 224 ctx->ptr = 0; 225 do_cbcmac_chunk(ctx, nonce, len); 226 } 227 do_pad(ctx); 228 memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); 229 230 memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac); 231 ctx->ptr = 0; 232 233 memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr); 234 235 /* 236 * We use ctx->head[0] as a flag to indicate that we use a 237 * a recorded state, with ctx->ctr containing the preprocessed 238 * first block for OMAC^2. 239 */ 240 ctx->head[0] = 1; 241 } 242 243 /* see bearssl_aead.h */ 244 void 245 br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st, 246 const void *nonce, size_t len) 247 { 248 if (len == 0) { 249 omac_start(ctx, 0); 250 } else { 251 memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac); 252 ctx->ptr = 0; 253 do_cbcmac_chunk(ctx, nonce, len); 254 } 255 do_pad(ctx); 256 memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac); 257 memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce); 258 259 memcpy(ctx->head, st->st[1], sizeof ctx->head); 260 261 memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac); 262 ctx->ptr = 0; 263 } 264 265 /* see bearssl_aead.h */ 266 void 267 br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len) 268 { 269 size_t ptr; 270 271 ptr = ctx->ptr; 272 273 /* 274 * If there is a partial block, first complete it. 275 */ 276 if (ptr < 16) { 277 size_t clen; 278 279 clen = 16 - ptr; 280 if (len <= clen) { 281 memcpy(ctx->buf + ptr, data, len); 282 ctx->ptr = ptr + len; 283 return; 284 } 285 memcpy(ctx->buf + ptr, data, clen); 286 data = (const unsigned char *)data + clen; 287 len -= clen; 288 } 289 290 /* 291 * We now have a full block in buf[], and this is not the last 292 * block. 293 */ 294 do_cbcmac_chunk(ctx, data, len); 295 } 296 297 /* see bearssl_aead.h */ 298 void 299 br_eax_flip(br_eax_context *ctx) 300 { 301 int from_capture; 302 303 /* 304 * ctx->head[0] may be non-zero if the context was reset with 305 * a pre-AAD captured state. In that case, ctx->ctr[] contains 306 * the state for OMAC^2 _after_ processing the first block. 307 */ 308 from_capture = ctx->head[0]; 309 310 /* 311 * Complete the OMAC computation on the AAD. 312 */ 313 do_pad(ctx); 314 memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac); 315 316 /* 317 * Start OMAC^2 for the encrypted data. 318 * If the context was initialized from a captured state, then 319 * the OMAC^2 value is in the ctr[] array. 320 */ 321 if (from_capture) { 322 memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac); 323 ctx->ptr = 0; 324 } else { 325 omac_start(ctx, 2); 326 } 327 328 /* 329 * Initial counter value for CTR is the processed nonce. 330 */ 331 memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce); 332 } 333 334 /* see bearssl_aead.h */ 335 void 336 br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len) 337 { 338 unsigned char *dbuf; 339 size_t ptr; 340 341 /* 342 * Ensure that there is actual data to process. 343 */ 344 if (len == 0) { 345 return; 346 } 347 348 dbuf = data; 349 ptr = ctx->ptr; 350 351 /* 352 * We may have ptr == 0 here if we initialized from a captured 353 * state. In that case, there is no partially consumed block 354 * or unprocessed data. 355 */ 356 if (ptr != 0 && ptr != 16) { 357 /* 358 * We have a partially consumed block. 359 */ 360 size_t u, clen; 361 362 clen = 16 - ptr; 363 if (len <= clen) { 364 clen = len; 365 } 366 if (encrypt) { 367 for (u = 0; u < clen; u ++) { 368 ctx->buf[ptr + u] ^= dbuf[u]; 369 } 370 memcpy(dbuf, ctx->buf + ptr, clen); 371 } else { 372 for (u = 0; u < clen; u ++) { 373 unsigned dx, sx; 374 375 sx = ctx->buf[ptr + u]; 376 dx = dbuf[u]; 377 ctx->buf[ptr + u] = dx; 378 dbuf[u] = sx ^ dx; 379 } 380 } 381 382 if (len <= clen) { 383 ctx->ptr = ptr + clen; 384 return; 385 } 386 dbuf += clen; 387 len -= clen; 388 } 389 390 /* 391 * We now have a complete encrypted block in buf[] that must still 392 * be processed with OMAC, and this is not the final buf. 393 * Exception: when ptr == 0, no block has been produced yet. 394 */ 395 if (ptr != 0) { 396 (*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, 397 ctx->buf, sizeof ctx->buf); 398 } 399 400 /* 401 * Do CTR encryption or decryption and CBC-MAC for all full blocks 402 * except the last. 403 */ 404 ptr = len & (size_t)15; 405 if (ptr == 0) { 406 len -= 16; 407 ptr = 16; 408 } else { 409 len -= ptr; 410 } 411 if (encrypt) { 412 (*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, 413 dbuf, len); 414 } else { 415 (*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac, 416 dbuf, len); 417 } 418 dbuf += len; 419 420 /* 421 * Compute next block of CTR stream, and use it to finish 422 * encrypting or decrypting the data. 423 */ 424 memset(ctx->buf, 0, sizeof ctx->buf); 425 (*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf); 426 if (encrypt) { 427 size_t u; 428 429 for (u = 0; u < ptr; u ++) { 430 ctx->buf[u] ^= dbuf[u]; 431 } 432 memcpy(dbuf, ctx->buf, ptr); 433 } else { 434 size_t u; 435 436 for (u = 0; u < ptr; u ++) { 437 unsigned dx, sx; 438 439 sx = ctx->buf[u]; 440 dx = dbuf[u]; 441 ctx->buf[u] = dx; 442 dbuf[u] = sx ^ dx; 443 } 444 } 445 ctx->ptr = ptr; 446 } 447 448 /* 449 * Complete tag computation. The final tag is written in ctx->cbcmac. 450 */ 451 static void 452 do_final(br_eax_context *ctx) 453 { 454 size_t u; 455 456 do_pad(ctx); 457 458 /* 459 * Authentication tag is the XOR of the three OMAC outputs for 460 * the nonce, AAD and encrypted data. 461 */ 462 for (u = 0; u < 16; u ++) { 463 ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u]; 464 } 465 } 466 467 /* see bearssl_aead.h */ 468 void 469 br_eax_get_tag(br_eax_context *ctx, void *tag) 470 { 471 do_final(ctx); 472 memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac); 473 } 474 475 /* see bearssl_aead.h */ 476 void 477 br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len) 478 { 479 do_final(ctx); 480 memcpy(tag, ctx->cbcmac, len); 481 } 482 483 /* see bearssl_aead.h */ 484 uint32_t 485 br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len) 486 { 487 unsigned char tmp[16]; 488 size_t u; 489 int x; 490 491 br_eax_get_tag(ctx, tmp); 492 x = 0; 493 for (u = 0; u < len; u ++) { 494 x |= tmp[u] ^ ((const unsigned char *)tag)[u]; 495 } 496 return EQ0(x); 497 } 498 499 /* see bearssl_aead.h */ 500 uint32_t 501 br_eax_check_tag(br_eax_context *ctx, const void *tag) 502 { 503 return br_eax_check_tag_trunc(ctx, tag, 16); 504 } 505 506 /* see bearssl_aead.h */ 507 const br_aead_class br_eax_vtable = { 508 16, 509 (void (*)(const br_aead_class **, const void *, size_t)) 510 &br_eax_reset, 511 (void (*)(const br_aead_class **, const void *, size_t)) 512 &br_eax_aad_inject, 513 (void (*)(const br_aead_class **)) 514 &br_eax_flip, 515 (void (*)(const br_aead_class **, int, void *, size_t)) 516 &br_eax_run, 517 (void (*)(const br_aead_class **, void *)) 518 &br_eax_get_tag, 519 (uint32_t (*)(const br_aead_class **, const void *)) 520 &br_eax_check_tag, 521 (void (*)(const br_aead_class **, void *, size_t)) 522 &br_eax_get_tag_trunc, 523 (uint32_t (*)(const br_aead_class **, const void *, size_t)) 524 &br_eax_check_tag_trunc 525 }; 526