1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/zfs_context.h> 26 #include <modes/modes.h> 27 #include <sys/crypto/common.h> 28 #include <sys/crypto/icp.h> 29 #include <sys/crypto/impl.h> 30 #include <sys/byteorder.h> 31 #include <sys/simd.h> 32 #include <modes/gcm_impl.h> 33 #ifdef CAN_USE_GCM_ASM 34 #include <aes/aes_impl.h> 35 #endif 36 37 #define GHASH(c, d, t, o) \ 38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ 39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ 40 (uint64_t *)(void *)(t)); 41 42 /* Select GCM implementation */ 43 #define IMPL_FASTEST (UINT32_MAX) 44 #define IMPL_CYCLE (UINT32_MAX-1) 45 #ifdef CAN_USE_GCM_ASM 46 #define IMPL_AVX (UINT32_MAX-2) 47 #endif 48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 49 static uint32_t icp_gcm_impl = IMPL_FASTEST; 50 static uint32_t user_sel_impl = IMPL_FASTEST; 51 52 #ifdef CAN_USE_GCM_ASM 53 /* Does the architecture we run on support the MOVBE instruction? */ 54 boolean_t gcm_avx_can_use_movbe = B_FALSE; 55 /* 56 * Whether to use the optimized openssl gcm and ghash implementations. 57 * Set to true if module parameter icp_gcm_impl == "avx". 58 */ 59 static boolean_t gcm_use_avx = B_FALSE; 60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) 61 62 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); 63 64 static inline boolean_t gcm_avx_will_work(void); 65 static inline void gcm_set_avx(boolean_t); 66 static inline boolean_t gcm_toggle_avx(void); 67 static inline size_t gcm_simd_get_htab_size(boolean_t); 68 69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, 70 crypto_data_t *, size_t); 71 72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, 75 size_t, size_t); 76 #endif /* ifdef CAN_USE_GCM_ASM */ 77 78 /* 79 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode 80 * is done in another function. 81 */ 82 int 83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 84 crypto_data_t *out, size_t block_size, 85 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 86 void (*copy_block)(uint8_t *, uint8_t *), 87 void (*xor_block)(uint8_t *, uint8_t *)) 88 { 89 #ifdef CAN_USE_GCM_ASM 90 if (ctx->gcm_use_avx == B_TRUE) 91 return (gcm_mode_encrypt_contiguous_blocks_avx( 92 ctx, data, length, out, block_size)); 93 #endif 94 95 const gcm_impl_ops_t *gops; 96 size_t remainder = length; 97 size_t need = 0; 98 uint8_t *datap = (uint8_t *)data; 99 uint8_t *blockp; 100 uint8_t *lastp; 101 void *iov_or_mp; 102 offset_t offset; 103 uint8_t *out_data_1; 104 uint8_t *out_data_2; 105 size_t out_data_1_len; 106 uint64_t counter; 107 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 108 109 if (length + ctx->gcm_remainder_len < block_size) { 110 /* accumulate bytes here and return */ 111 bcopy(datap, 112 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, 113 length); 114 ctx->gcm_remainder_len += length; 115 if (ctx->gcm_copy_to == NULL) { 116 ctx->gcm_copy_to = datap; 117 } 118 return (CRYPTO_SUCCESS); 119 } 120 121 lastp = (uint8_t *)ctx->gcm_cb; 122 crypto_init_ptrs(out, &iov_or_mp, &offset); 123 124 gops = gcm_impl_get_ops(); 125 do { 126 /* Unprocessed data from last call. */ 127 if (ctx->gcm_remainder_len > 0) { 128 need = block_size - ctx->gcm_remainder_len; 129 130 if (need > remainder) 131 return (CRYPTO_DATA_LEN_RANGE); 132 133 bcopy(datap, &((uint8_t *)ctx->gcm_remainder) 134 [ctx->gcm_remainder_len], need); 135 136 blockp = (uint8_t *)ctx->gcm_remainder; 137 } else { 138 blockp = datap; 139 } 140 141 /* 142 * Increment counter. Counter bits are confined 143 * to the bottom 32 bits of the counter block. 144 */ 145 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 146 counter = htonll(counter + 1); 147 counter &= counter_mask; 148 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 149 150 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 151 (uint8_t *)ctx->gcm_tmp); 152 xor_block(blockp, (uint8_t *)ctx->gcm_tmp); 153 154 lastp = (uint8_t *)ctx->gcm_tmp; 155 156 ctx->gcm_processed_data_len += block_size; 157 158 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, 159 &out_data_1_len, &out_data_2, block_size); 160 161 /* copy block to where it belongs */ 162 if (out_data_1_len == block_size) { 163 copy_block(lastp, out_data_1); 164 } else { 165 bcopy(lastp, out_data_1, out_data_1_len); 166 if (out_data_2 != NULL) { 167 bcopy(lastp + out_data_1_len, 168 out_data_2, 169 block_size - out_data_1_len); 170 } 171 } 172 /* update offset */ 173 out->cd_offset += block_size; 174 175 /* add ciphertext to the hash */ 176 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); 177 178 /* Update pointer to next block of data to be processed. */ 179 if (ctx->gcm_remainder_len != 0) { 180 datap += need; 181 ctx->gcm_remainder_len = 0; 182 } else { 183 datap += block_size; 184 } 185 186 remainder = (size_t)&data[length] - (size_t)datap; 187 188 /* Incomplete last block. */ 189 if (remainder > 0 && remainder < block_size) { 190 bcopy(datap, ctx->gcm_remainder, remainder); 191 ctx->gcm_remainder_len = remainder; 192 ctx->gcm_copy_to = datap; 193 goto out; 194 } 195 ctx->gcm_copy_to = NULL; 196 197 } while (remainder > 0); 198 out: 199 return (CRYPTO_SUCCESS); 200 } 201 202 int 203 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 204 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 205 void (*copy_block)(uint8_t *, uint8_t *), 206 void (*xor_block)(uint8_t *, uint8_t *)) 207 { 208 (void) copy_block; 209 #ifdef CAN_USE_GCM_ASM 210 if (ctx->gcm_use_avx == B_TRUE) 211 return (gcm_encrypt_final_avx(ctx, out, block_size)); 212 #endif 213 214 const gcm_impl_ops_t *gops; 215 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 216 uint8_t *ghash, *macp = NULL; 217 int i, rv; 218 219 if (out->cd_length < 220 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { 221 return (CRYPTO_DATA_LEN_RANGE); 222 } 223 224 gops = gcm_impl_get_ops(); 225 ghash = (uint8_t *)ctx->gcm_ghash; 226 227 if (ctx->gcm_remainder_len > 0) { 228 uint64_t counter; 229 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; 230 231 /* 232 * Here is where we deal with data that is not a 233 * multiple of the block size. 234 */ 235 236 /* 237 * Increment counter. 238 */ 239 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 240 counter = htonll(counter + 1); 241 counter &= counter_mask; 242 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 243 244 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 245 (uint8_t *)ctx->gcm_tmp); 246 247 macp = (uint8_t *)ctx->gcm_remainder; 248 bzero(macp + ctx->gcm_remainder_len, 249 block_size - ctx->gcm_remainder_len); 250 251 /* XOR with counter block */ 252 for (i = 0; i < ctx->gcm_remainder_len; i++) { 253 macp[i] ^= tmpp[i]; 254 } 255 256 /* add ciphertext to the hash */ 257 GHASH(ctx, macp, ghash, gops); 258 259 ctx->gcm_processed_data_len += ctx->gcm_remainder_len; 260 } 261 262 ctx->gcm_len_a_len_c[1] = 263 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 264 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 265 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 266 (uint8_t *)ctx->gcm_J0); 267 xor_block((uint8_t *)ctx->gcm_J0, ghash); 268 269 if (ctx->gcm_remainder_len > 0) { 270 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); 271 if (rv != CRYPTO_SUCCESS) 272 return (rv); 273 } 274 out->cd_offset += ctx->gcm_remainder_len; 275 ctx->gcm_remainder_len = 0; 276 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 277 if (rv != CRYPTO_SUCCESS) 278 return (rv); 279 out->cd_offset += ctx->gcm_tag_len; 280 281 return (CRYPTO_SUCCESS); 282 } 283 284 /* 285 * This will only deal with decrypting the last block of the input that 286 * might not be a multiple of block length. 287 */ 288 static void 289 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, 290 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 291 void (*xor_block)(uint8_t *, uint8_t *)) 292 { 293 uint8_t *datap, *outp, *counterp; 294 uint64_t counter; 295 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 296 int i; 297 298 /* 299 * Increment counter. 300 * Counter bits are confined to the bottom 32 bits 301 */ 302 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 303 counter = htonll(counter + 1); 304 counter &= counter_mask; 305 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 306 307 datap = (uint8_t *)ctx->gcm_remainder; 308 outp = &((ctx->gcm_pt_buf)[index]); 309 counterp = (uint8_t *)ctx->gcm_tmp; 310 311 /* authentication tag */ 312 bzero((uint8_t *)ctx->gcm_tmp, block_size); 313 bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); 314 315 /* add ciphertext to the hash */ 316 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); 317 318 /* decrypt remaining ciphertext */ 319 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); 320 321 /* XOR with counter block */ 322 for (i = 0; i < ctx->gcm_remainder_len; i++) { 323 outp[i] = datap[i] ^ counterp[i]; 324 } 325 } 326 327 int 328 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 329 crypto_data_t *out, size_t block_size, 330 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 331 void (*copy_block)(uint8_t *, uint8_t *), 332 void (*xor_block)(uint8_t *, uint8_t *)) 333 { 334 (void) out, (void) block_size, (void) encrypt_block, (void) copy_block, 335 (void) xor_block; 336 size_t new_len; 337 uint8_t *new; 338 339 /* 340 * Copy contiguous ciphertext input blocks to plaintext buffer. 341 * Ciphertext will be decrypted in the final. 342 */ 343 if (length > 0) { 344 new_len = ctx->gcm_pt_buf_len + length; 345 new = vmem_alloc(new_len, ctx->gcm_kmflag); 346 if (new == NULL) { 347 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 348 ctx->gcm_pt_buf = NULL; 349 return (CRYPTO_HOST_MEMORY); 350 } 351 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); 352 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 353 ctx->gcm_pt_buf = new; 354 ctx->gcm_pt_buf_len = new_len; 355 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], 356 length); 357 ctx->gcm_processed_data_len += length; 358 } 359 360 ctx->gcm_remainder_len = 0; 361 return (CRYPTO_SUCCESS); 362 } 363 364 int 365 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 366 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 367 void (*xor_block)(uint8_t *, uint8_t *)) 368 { 369 #ifdef CAN_USE_GCM_ASM 370 if (ctx->gcm_use_avx == B_TRUE) 371 return (gcm_decrypt_final_avx(ctx, out, block_size)); 372 #endif 373 374 const gcm_impl_ops_t *gops; 375 size_t pt_len; 376 size_t remainder; 377 uint8_t *ghash; 378 uint8_t *blockp; 379 uint8_t *cbp; 380 uint64_t counter; 381 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 382 int processed = 0, rv; 383 384 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); 385 386 gops = gcm_impl_get_ops(); 387 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 388 ghash = (uint8_t *)ctx->gcm_ghash; 389 blockp = ctx->gcm_pt_buf; 390 remainder = pt_len; 391 while (remainder > 0) { 392 /* Incomplete last block */ 393 if (remainder < block_size) { 394 bcopy(blockp, ctx->gcm_remainder, remainder); 395 ctx->gcm_remainder_len = remainder; 396 /* 397 * not expecting anymore ciphertext, just 398 * compute plaintext for the remaining input 399 */ 400 gcm_decrypt_incomplete_block(ctx, block_size, 401 processed, encrypt_block, xor_block); 402 ctx->gcm_remainder_len = 0; 403 goto out; 404 } 405 /* add ciphertext to the hash */ 406 GHASH(ctx, blockp, ghash, gops); 407 408 /* 409 * Increment counter. 410 * Counter bits are confined to the bottom 32 bits 411 */ 412 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 413 counter = htonll(counter + 1); 414 counter &= counter_mask; 415 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 416 417 cbp = (uint8_t *)ctx->gcm_tmp; 418 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); 419 420 /* XOR with ciphertext */ 421 xor_block(cbp, blockp); 422 423 processed += block_size; 424 blockp += block_size; 425 remainder -= block_size; 426 } 427 out: 428 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 429 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 430 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 431 (uint8_t *)ctx->gcm_J0); 432 xor_block((uint8_t *)ctx->gcm_J0, ghash); 433 434 /* compare the input authentication tag with what we calculated */ 435 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 436 /* They don't match */ 437 return (CRYPTO_INVALID_MAC); 438 } else { 439 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 440 if (rv != CRYPTO_SUCCESS) 441 return (rv); 442 out->cd_offset += pt_len; 443 } 444 return (CRYPTO_SUCCESS); 445 } 446 447 static int 448 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) 449 { 450 size_t tag_len; 451 452 /* 453 * Check the length of the authentication tag (in bits). 454 */ 455 tag_len = gcm_param->ulTagBits; 456 switch (tag_len) { 457 case 32: 458 case 64: 459 case 96: 460 case 104: 461 case 112: 462 case 120: 463 case 128: 464 break; 465 default: 466 return (CRYPTO_MECHANISM_PARAM_INVALID); 467 } 468 469 if (gcm_param->ulIvLen == 0) 470 return (CRYPTO_MECHANISM_PARAM_INVALID); 471 472 return (CRYPTO_SUCCESS); 473 } 474 475 static void 476 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, 477 gcm_ctx_t *ctx, size_t block_size, 478 void (*copy_block)(uint8_t *, uint8_t *), 479 void (*xor_block)(uint8_t *, uint8_t *)) 480 { 481 const gcm_impl_ops_t *gops; 482 uint8_t *cb; 483 ulong_t remainder = iv_len; 484 ulong_t processed = 0; 485 uint8_t *datap, *ghash; 486 uint64_t len_a_len_c[2]; 487 488 gops = gcm_impl_get_ops(); 489 ghash = (uint8_t *)ctx->gcm_ghash; 490 cb = (uint8_t *)ctx->gcm_cb; 491 if (iv_len == 12) { 492 bcopy(iv, cb, 12); 493 cb[12] = 0; 494 cb[13] = 0; 495 cb[14] = 0; 496 cb[15] = 1; 497 /* J0 will be used again in the final */ 498 copy_block(cb, (uint8_t *)ctx->gcm_J0); 499 } else { 500 /* GHASH the IV */ 501 do { 502 if (remainder < block_size) { 503 bzero(cb, block_size); 504 bcopy(&(iv[processed]), cb, remainder); 505 datap = (uint8_t *)cb; 506 remainder = 0; 507 } else { 508 datap = (uint8_t *)(&(iv[processed])); 509 processed += block_size; 510 remainder -= block_size; 511 } 512 GHASH(ctx, datap, ghash, gops); 513 } while (remainder > 0); 514 515 len_a_len_c[0] = 0; 516 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); 517 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); 518 519 /* J0 will be used again in the final */ 520 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); 521 } 522 } 523 524 static int 525 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 526 unsigned char *auth_data, size_t auth_data_len, size_t block_size, 527 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 528 void (*copy_block)(uint8_t *, uint8_t *), 529 void (*xor_block)(uint8_t *, uint8_t *)) 530 { 531 const gcm_impl_ops_t *gops; 532 uint8_t *ghash, *datap, *authp; 533 size_t remainder, processed; 534 535 /* encrypt zero block to get subkey H */ 536 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 537 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, 538 (uint8_t *)ctx->gcm_H); 539 540 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 541 copy_block, xor_block); 542 543 gops = gcm_impl_get_ops(); 544 authp = (uint8_t *)ctx->gcm_tmp; 545 ghash = (uint8_t *)ctx->gcm_ghash; 546 bzero(authp, block_size); 547 bzero(ghash, block_size); 548 549 processed = 0; 550 remainder = auth_data_len; 551 do { 552 if (remainder < block_size) { 553 /* 554 * There's not a block full of data, pad rest of 555 * buffer with zero 556 */ 557 bzero(authp, block_size); 558 bcopy(&(auth_data[processed]), authp, remainder); 559 datap = (uint8_t *)authp; 560 remainder = 0; 561 } else { 562 datap = (uint8_t *)(&(auth_data[processed])); 563 processed += block_size; 564 remainder -= block_size; 565 } 566 567 /* add auth data to the hash */ 568 GHASH(ctx, datap, ghash, gops); 569 570 } while (remainder > 0); 571 572 return (CRYPTO_SUCCESS); 573 } 574 575 /* 576 * The following function is called at encrypt or decrypt init time 577 * for AES GCM mode. 578 * 579 * Init the GCM context struct. Handle the cycle and avx implementations here. 580 */ 581 int 582 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 583 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 584 void (*copy_block)(uint8_t *, uint8_t *), 585 void (*xor_block)(uint8_t *, uint8_t *)) 586 { 587 int rv; 588 CK_AES_GCM_PARAMS *gcm_param; 589 590 if (param != NULL) { 591 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; 592 593 if ((rv = gcm_validate_args(gcm_param)) != 0) { 594 return (rv); 595 } 596 597 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; 598 gcm_ctx->gcm_tag_len >>= 3; 599 gcm_ctx->gcm_processed_data_len = 0; 600 601 /* these values are in bits */ 602 gcm_ctx->gcm_len_a_len_c[0] 603 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); 604 605 rv = CRYPTO_SUCCESS; 606 gcm_ctx->gcm_flags |= GCM_MODE; 607 } else { 608 return (CRYPTO_MECHANISM_PARAM_INVALID); 609 } 610 611 #ifdef CAN_USE_GCM_ASM 612 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 613 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 614 } else { 615 /* 616 * Handle the "cycle" implementation by creating avx and 617 * non-avx contexts alternately. 618 */ 619 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 620 /* 621 * We don't handle byte swapped key schedules in the avx 622 * code path. 623 */ 624 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 625 if (ks->ops->needs_byteswap == B_TRUE) { 626 gcm_ctx->gcm_use_avx = B_FALSE; 627 } 628 /* Use the MOVBE and the BSWAP variants alternately. */ 629 if (gcm_ctx->gcm_use_avx == B_TRUE && 630 zfs_movbe_available() == B_TRUE) { 631 (void) atomic_toggle_boolean_nv( 632 (volatile boolean_t *)&gcm_avx_can_use_movbe); 633 } 634 } 635 /* Allocate Htab memory as needed. */ 636 if (gcm_ctx->gcm_use_avx == B_TRUE) { 637 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 638 639 if (htab_len == 0) { 640 return (CRYPTO_MECHANISM_PARAM_INVALID); 641 } 642 gcm_ctx->gcm_htab_len = htab_len; 643 gcm_ctx->gcm_Htable = 644 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); 645 646 if (gcm_ctx->gcm_Htable == NULL) { 647 return (CRYPTO_HOST_MEMORY); 648 } 649 } 650 /* Avx and non avx context initialization differs from here on. */ 651 if (gcm_ctx->gcm_use_avx == B_FALSE) { 652 #endif /* ifdef CAN_USE_GCM_ASM */ 653 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 654 gcm_param->pAAD, gcm_param->ulAADLen, block_size, 655 encrypt_block, copy_block, xor_block) != 0) { 656 rv = CRYPTO_MECHANISM_PARAM_INVALID; 657 } 658 #ifdef CAN_USE_GCM_ASM 659 } else { 660 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 661 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { 662 rv = CRYPTO_MECHANISM_PARAM_INVALID; 663 } 664 } 665 #endif /* ifdef CAN_USE_GCM_ASM */ 666 667 return (rv); 668 } 669 670 int 671 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 672 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 673 void (*copy_block)(uint8_t *, uint8_t *), 674 void (*xor_block)(uint8_t *, uint8_t *)) 675 { 676 int rv; 677 CK_AES_GMAC_PARAMS *gmac_param; 678 679 if (param != NULL) { 680 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; 681 682 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); 683 gcm_ctx->gcm_processed_data_len = 0; 684 685 /* these values are in bits */ 686 gcm_ctx->gcm_len_a_len_c[0] 687 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); 688 689 rv = CRYPTO_SUCCESS; 690 gcm_ctx->gcm_flags |= GMAC_MODE; 691 } else { 692 return (CRYPTO_MECHANISM_PARAM_INVALID); 693 } 694 695 #ifdef CAN_USE_GCM_ASM 696 /* 697 * Handle the "cycle" implementation by creating avx and non avx 698 * contexts alternately. 699 */ 700 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 701 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 702 } else { 703 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 704 } 705 /* We don't handle byte swapped key schedules in the avx code path. */ 706 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 707 if (ks->ops->needs_byteswap == B_TRUE) { 708 gcm_ctx->gcm_use_avx = B_FALSE; 709 } 710 /* Allocate Htab memory as needed. */ 711 if (gcm_ctx->gcm_use_avx == B_TRUE) { 712 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 713 714 if (htab_len == 0) { 715 return (CRYPTO_MECHANISM_PARAM_INVALID); 716 } 717 gcm_ctx->gcm_htab_len = htab_len; 718 gcm_ctx->gcm_Htable = 719 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); 720 721 if (gcm_ctx->gcm_Htable == NULL) { 722 return (CRYPTO_HOST_MEMORY); 723 } 724 } 725 726 /* Avx and non avx context initialization differs from here on. */ 727 if (gcm_ctx->gcm_use_avx == B_FALSE) { 728 #endif /* ifdef CAN_USE_GCM_ASM */ 729 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 730 gmac_param->pAAD, gmac_param->ulAADLen, block_size, 731 encrypt_block, copy_block, xor_block) != 0) { 732 rv = CRYPTO_MECHANISM_PARAM_INVALID; 733 } 734 #ifdef CAN_USE_GCM_ASM 735 } else { 736 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 737 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { 738 rv = CRYPTO_MECHANISM_PARAM_INVALID; 739 } 740 } 741 #endif /* ifdef CAN_USE_GCM_ASM */ 742 743 return (rv); 744 } 745 746 void * 747 gcm_alloc_ctx(int kmflag) 748 { 749 gcm_ctx_t *gcm_ctx; 750 751 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 752 return (NULL); 753 754 gcm_ctx->gcm_flags = GCM_MODE; 755 return (gcm_ctx); 756 } 757 758 void * 759 gmac_alloc_ctx(int kmflag) 760 { 761 gcm_ctx_t *gcm_ctx; 762 763 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 764 return (NULL); 765 766 gcm_ctx->gcm_flags = GMAC_MODE; 767 return (gcm_ctx); 768 } 769 770 void 771 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) 772 { 773 ctx->gcm_kmflag = kmflag; 774 } 775 776 /* GCM implementation that contains the fastest methods */ 777 static gcm_impl_ops_t gcm_fastest_impl = { 778 .name = "fastest" 779 }; 780 781 /* All compiled in implementations */ 782 static const gcm_impl_ops_t *gcm_all_impl[] = { 783 &gcm_generic_impl, 784 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 785 &gcm_pclmulqdq_impl, 786 #endif 787 }; 788 789 /* Indicate that benchmark has been completed */ 790 static boolean_t gcm_impl_initialized = B_FALSE; 791 792 /* Hold all supported implementations */ 793 static size_t gcm_supp_impl_cnt = 0; 794 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; 795 796 /* 797 * Returns the GCM operations for encrypt/decrypt/key setup. When a 798 * SIMD implementation is not allowed in the current context, then 799 * fallback to the fastest generic implementation. 800 */ 801 const gcm_impl_ops_t * 802 gcm_impl_get_ops() 803 { 804 if (!kfpu_allowed()) 805 return (&gcm_generic_impl); 806 807 const gcm_impl_ops_t *ops = NULL; 808 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 809 810 switch (impl) { 811 case IMPL_FASTEST: 812 ASSERT(gcm_impl_initialized); 813 ops = &gcm_fastest_impl; 814 break; 815 case IMPL_CYCLE: 816 /* Cycle through supported implementations */ 817 ASSERT(gcm_impl_initialized); 818 ASSERT3U(gcm_supp_impl_cnt, >, 0); 819 static size_t cycle_impl_idx = 0; 820 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; 821 ops = gcm_supp_impl[idx]; 822 break; 823 #ifdef CAN_USE_GCM_ASM 824 case IMPL_AVX: 825 /* 826 * Make sure that we return a valid implementation while 827 * switching to the avx implementation since there still 828 * may be unfinished non-avx contexts around. 829 */ 830 ops = &gcm_generic_impl; 831 break; 832 #endif 833 default: 834 ASSERT3U(impl, <, gcm_supp_impl_cnt); 835 ASSERT3U(gcm_supp_impl_cnt, >, 0); 836 if (impl < ARRAY_SIZE(gcm_all_impl)) 837 ops = gcm_supp_impl[impl]; 838 break; 839 } 840 841 ASSERT3P(ops, !=, NULL); 842 843 return (ops); 844 } 845 846 /* 847 * Initialize all supported implementations. 848 */ 849 void 850 gcm_impl_init(void) 851 { 852 gcm_impl_ops_t *curr_impl; 853 int i, c; 854 855 /* Move supported implementations into gcm_supp_impls */ 856 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { 857 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; 858 859 if (curr_impl->is_supported()) 860 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; 861 } 862 gcm_supp_impl_cnt = c; 863 864 /* 865 * Set the fastest implementation given the assumption that the 866 * hardware accelerated version is the fastest. 867 */ 868 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 869 if (gcm_pclmulqdq_impl.is_supported()) { 870 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, 871 sizeof (gcm_fastest_impl)); 872 } else 873 #endif 874 { 875 memcpy(&gcm_fastest_impl, &gcm_generic_impl, 876 sizeof (gcm_fastest_impl)); 877 } 878 879 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); 880 881 #ifdef CAN_USE_GCM_ASM 882 /* 883 * Use the avx implementation if it's available and the implementation 884 * hasn't changed from its default value of fastest on module load. 885 */ 886 if (gcm_avx_will_work()) { 887 #ifdef HAVE_MOVBE 888 if (zfs_movbe_available() == B_TRUE) { 889 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); 890 } 891 #endif 892 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { 893 gcm_set_avx(B_TRUE); 894 } 895 } 896 #endif 897 /* Finish initialization */ 898 atomic_swap_32(&icp_gcm_impl, user_sel_impl); 899 gcm_impl_initialized = B_TRUE; 900 } 901 902 static const struct { 903 char *name; 904 uint32_t sel; 905 } gcm_impl_opts[] = { 906 { "cycle", IMPL_CYCLE }, 907 { "fastest", IMPL_FASTEST }, 908 #ifdef CAN_USE_GCM_ASM 909 { "avx", IMPL_AVX }, 910 #endif 911 }; 912 913 /* 914 * Function sets desired gcm implementation. 915 * 916 * If we are called before init(), user preference will be saved in 917 * user_sel_impl, and applied in later init() call. This occurs when module 918 * parameter is specified on module load. Otherwise, directly update 919 * icp_gcm_impl. 920 * 921 * @val Name of gcm implementation to use 922 * @param Unused. 923 */ 924 int 925 gcm_impl_set(const char *val) 926 { 927 int err = -EINVAL; 928 char req_name[GCM_IMPL_NAME_MAX]; 929 uint32_t impl = GCM_IMPL_READ(user_sel_impl); 930 size_t i; 931 932 /* sanitize input */ 933 i = strnlen(val, GCM_IMPL_NAME_MAX); 934 if (i == 0 || i >= GCM_IMPL_NAME_MAX) 935 return (err); 936 937 strlcpy(req_name, val, GCM_IMPL_NAME_MAX); 938 while (i > 0 && isspace(req_name[i-1])) 939 i--; 940 req_name[i] = '\0'; 941 942 /* Check mandatory options */ 943 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 944 #ifdef CAN_USE_GCM_ASM 945 /* Ignore avx implementation if it won't work. */ 946 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 947 continue; 948 } 949 #endif 950 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { 951 impl = gcm_impl_opts[i].sel; 952 err = 0; 953 break; 954 } 955 } 956 957 /* check all supported impl if init() was already called */ 958 if (err != 0 && gcm_impl_initialized) { 959 /* check all supported implementations */ 960 for (i = 0; i < gcm_supp_impl_cnt; i++) { 961 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { 962 impl = i; 963 err = 0; 964 break; 965 } 966 } 967 } 968 #ifdef CAN_USE_GCM_ASM 969 /* 970 * Use the avx implementation if available and the requested one is 971 * avx or fastest. 972 */ 973 if (gcm_avx_will_work() == B_TRUE && 974 (impl == IMPL_AVX || impl == IMPL_FASTEST)) { 975 gcm_set_avx(B_TRUE); 976 } else { 977 gcm_set_avx(B_FALSE); 978 } 979 #endif 980 981 if (err == 0) { 982 if (gcm_impl_initialized) 983 atomic_swap_32(&icp_gcm_impl, impl); 984 else 985 atomic_swap_32(&user_sel_impl, impl); 986 } 987 988 return (err); 989 } 990 991 #if defined(_KERNEL) && defined(__linux__) 992 993 static int 994 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) 995 { 996 return (gcm_impl_set(val)); 997 } 998 999 static int 1000 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) 1001 { 1002 int i, cnt = 0; 1003 char *fmt; 1004 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 1005 1006 ASSERT(gcm_impl_initialized); 1007 1008 /* list mandatory options */ 1009 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 1010 #ifdef CAN_USE_GCM_ASM 1011 /* Ignore avx implementation if it won't work. */ 1012 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 1013 continue; 1014 } 1015 #endif 1016 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; 1017 cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); 1018 } 1019 1020 /* list all supported implementations */ 1021 for (i = 0; i < gcm_supp_impl_cnt; i++) { 1022 fmt = (i == impl) ? "[%s] " : "%s "; 1023 cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); 1024 } 1025 1026 return (cnt); 1027 } 1028 1029 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, 1030 NULL, 0644); 1031 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); 1032 #endif /* defined(__KERNEL) */ 1033 1034 #ifdef CAN_USE_GCM_ASM 1035 #define GCM_BLOCK_LEN 16 1036 /* 1037 * The openssl asm routines are 6x aggregated and need that many bytes 1038 * at minimum. 1039 */ 1040 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) 1041 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) 1042 /* 1043 * Ensure the chunk size is reasonable since we are allocating a 1044 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. 1045 */ 1046 #define GCM_AVX_MAX_CHUNK_SIZE \ 1047 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) 1048 1049 /* Clear the FPU registers since they hold sensitive internal state. */ 1050 #define clear_fpu_regs() clear_fpu_regs_avx() 1051 #define GHASH_AVX(ctx, in, len) \ 1052 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ 1053 in, len) 1054 1055 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) 1056 1057 /* Get the chunk size module parameter. */ 1058 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size 1059 1060 /* 1061 * Module parameter: number of bytes to process at once while owning the FPU. 1062 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is 1063 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. 1064 */ 1065 static uint32_t gcm_avx_chunk_size = 1066 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1067 1068 extern void clear_fpu_regs_avx(void); 1069 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); 1070 extern void aes_encrypt_intel(const uint32_t rk[], int nr, 1071 const uint32_t pt[4], uint32_t ct[4]); 1072 1073 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); 1074 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, 1075 const uint8_t *in, size_t len); 1076 1077 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, 1078 const void *, uint64_t *, uint64_t *); 1079 1080 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, 1081 const void *, uint64_t *, uint64_t *); 1082 1083 static inline boolean_t 1084 gcm_avx_will_work(void) 1085 { 1086 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ 1087 return (kfpu_allowed() && 1088 zfs_avx_available() && zfs_aes_available() && 1089 zfs_pclmulqdq_available()); 1090 } 1091 1092 static inline void 1093 gcm_set_avx(boolean_t val) 1094 { 1095 if (gcm_avx_will_work() == B_TRUE) { 1096 atomic_swap_32(&gcm_use_avx, val); 1097 } 1098 } 1099 1100 static inline boolean_t 1101 gcm_toggle_avx(void) 1102 { 1103 if (gcm_avx_will_work() == B_TRUE) { 1104 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); 1105 } else { 1106 return (B_FALSE); 1107 } 1108 } 1109 1110 static inline size_t 1111 gcm_simd_get_htab_size(boolean_t simd_mode) 1112 { 1113 switch (simd_mode) { 1114 case B_TRUE: 1115 return (2 * 6 * 2 * sizeof (uint64_t)); 1116 1117 default: 1118 return (0); 1119 } 1120 } 1121 1122 /* 1123 * Clear sensitive data in the context. 1124 * 1125 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and 1126 * ctx->gcm_Htable contain the hash sub key which protects authentication. 1127 * 1128 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for 1129 * a known plaintext attack, they consists of the IV and the first and last 1130 * counter respectively. If they should be cleared is debatable. 1131 */ 1132 static inline void 1133 gcm_clear_ctx(gcm_ctx_t *ctx) 1134 { 1135 bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); 1136 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 1137 bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1138 bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); 1139 } 1140 1141 /* Increment the GCM counter block by n. */ 1142 static inline void 1143 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) 1144 { 1145 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 1146 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); 1147 1148 counter = htonll(counter + n); 1149 counter &= counter_mask; 1150 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 1151 } 1152 1153 /* 1154 * Encrypt multiple blocks of data in GCM mode. 1155 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines 1156 * if possible. While processing a chunk the FPU is "locked". 1157 */ 1158 static int 1159 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, 1160 size_t length, crypto_data_t *out, size_t block_size) 1161 { 1162 size_t bleft = length; 1163 size_t need = 0; 1164 size_t done = 0; 1165 uint8_t *datap = (uint8_t *)data; 1166 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1167 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1168 uint64_t *ghash = ctx->gcm_ghash; 1169 uint64_t *cb = ctx->gcm_cb; 1170 uint8_t *ct_buf = NULL; 1171 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1172 int rv = CRYPTO_SUCCESS; 1173 1174 ASSERT(block_size == GCM_BLOCK_LEN); 1175 /* 1176 * If the last call left an incomplete block, try to fill 1177 * it first. 1178 */ 1179 if (ctx->gcm_remainder_len > 0) { 1180 need = block_size - ctx->gcm_remainder_len; 1181 if (length < need) { 1182 /* Accumulate bytes here and return. */ 1183 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1184 ctx->gcm_remainder_len, length); 1185 1186 ctx->gcm_remainder_len += length; 1187 if (ctx->gcm_copy_to == NULL) { 1188 ctx->gcm_copy_to = datap; 1189 } 1190 return (CRYPTO_SUCCESS); 1191 } else { 1192 /* Complete incomplete block. */ 1193 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1194 ctx->gcm_remainder_len, need); 1195 1196 ctx->gcm_copy_to = NULL; 1197 } 1198 } 1199 1200 /* Allocate a buffer to encrypt to if there is enough input. */ 1201 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1202 ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); 1203 if (ct_buf == NULL) { 1204 return (CRYPTO_HOST_MEMORY); 1205 } 1206 } 1207 1208 /* If we completed an incomplete block, encrypt and write it out. */ 1209 if (ctx->gcm_remainder_len > 0) { 1210 kfpu_begin(); 1211 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1212 (const uint32_t *)cb, (uint32_t *)tmp); 1213 1214 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); 1215 GHASH_AVX(ctx, tmp, block_size); 1216 clear_fpu_regs(); 1217 kfpu_end(); 1218 rv = crypto_put_output_data(tmp, out, block_size); 1219 out->cd_offset += block_size; 1220 gcm_incr_counter_block(ctx); 1221 ctx->gcm_processed_data_len += block_size; 1222 bleft -= need; 1223 datap += need; 1224 ctx->gcm_remainder_len = 0; 1225 } 1226 1227 /* Do the bulk encryption in chunk_size blocks. */ 1228 for (; bleft >= chunk_size; bleft -= chunk_size) { 1229 kfpu_begin(); 1230 done = aesni_gcm_encrypt( 1231 datap, ct_buf, chunk_size, key, cb, ghash); 1232 1233 clear_fpu_regs(); 1234 kfpu_end(); 1235 if (done != chunk_size) { 1236 rv = CRYPTO_FAILED; 1237 goto out_nofpu; 1238 } 1239 rv = crypto_put_output_data(ct_buf, out, chunk_size); 1240 if (rv != CRYPTO_SUCCESS) { 1241 goto out_nofpu; 1242 } 1243 out->cd_offset += chunk_size; 1244 datap += chunk_size; 1245 ctx->gcm_processed_data_len += chunk_size; 1246 } 1247 /* Check if we are already done. */ 1248 if (bleft == 0) { 1249 goto out_nofpu; 1250 } 1251 /* Bulk encrypt the remaining data. */ 1252 kfpu_begin(); 1253 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1254 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); 1255 if (done == 0) { 1256 rv = CRYPTO_FAILED; 1257 goto out; 1258 } 1259 rv = crypto_put_output_data(ct_buf, out, done); 1260 if (rv != CRYPTO_SUCCESS) { 1261 goto out; 1262 } 1263 out->cd_offset += done; 1264 ctx->gcm_processed_data_len += done; 1265 datap += done; 1266 bleft -= done; 1267 1268 } 1269 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ 1270 while (bleft > 0) { 1271 if (bleft < block_size) { 1272 bcopy(datap, ctx->gcm_remainder, bleft); 1273 ctx->gcm_remainder_len = bleft; 1274 ctx->gcm_copy_to = datap; 1275 goto out; 1276 } 1277 /* Encrypt, hash and write out. */ 1278 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1279 (const uint32_t *)cb, (uint32_t *)tmp); 1280 1281 gcm_xor_avx(datap, tmp); 1282 GHASH_AVX(ctx, tmp, block_size); 1283 rv = crypto_put_output_data(tmp, out, block_size); 1284 if (rv != CRYPTO_SUCCESS) { 1285 goto out; 1286 } 1287 out->cd_offset += block_size; 1288 gcm_incr_counter_block(ctx); 1289 ctx->gcm_processed_data_len += block_size; 1290 datap += block_size; 1291 bleft -= block_size; 1292 } 1293 out: 1294 clear_fpu_regs(); 1295 kfpu_end(); 1296 out_nofpu: 1297 if (ct_buf != NULL) { 1298 vmem_free(ct_buf, chunk_size); 1299 } 1300 return (rv); 1301 } 1302 1303 /* 1304 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual 1305 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. 1306 */ 1307 static int 1308 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1309 { 1310 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; 1311 uint32_t *J0 = (uint32_t *)ctx->gcm_J0; 1312 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; 1313 size_t rem_len = ctx->gcm_remainder_len; 1314 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1315 int aes_rounds = ((aes_key_t *)keysched)->nr; 1316 int rv; 1317 1318 ASSERT(block_size == GCM_BLOCK_LEN); 1319 1320 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { 1321 return (CRYPTO_DATA_LEN_RANGE); 1322 } 1323 1324 kfpu_begin(); 1325 /* Pad last incomplete block with zeros, encrypt and hash. */ 1326 if (rem_len > 0) { 1327 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1328 const uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1329 1330 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); 1331 bzero(remainder + rem_len, block_size - rem_len); 1332 for (int i = 0; i < rem_len; i++) { 1333 remainder[i] ^= tmp[i]; 1334 } 1335 GHASH_AVX(ctx, remainder, block_size); 1336 ctx->gcm_processed_data_len += rem_len; 1337 /* No need to increment counter_block, it's the last block. */ 1338 } 1339 /* Finish tag. */ 1340 ctx->gcm_len_a_len_c[1] = 1341 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 1342 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); 1343 aes_encrypt_intel(keysched, aes_rounds, J0, J0); 1344 1345 gcm_xor_avx((uint8_t *)J0, ghash); 1346 clear_fpu_regs(); 1347 kfpu_end(); 1348 1349 /* Output remainder. */ 1350 if (rem_len > 0) { 1351 rv = crypto_put_output_data(remainder, out, rem_len); 1352 if (rv != CRYPTO_SUCCESS) 1353 return (rv); 1354 } 1355 out->cd_offset += rem_len; 1356 ctx->gcm_remainder_len = 0; 1357 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 1358 if (rv != CRYPTO_SUCCESS) 1359 return (rv); 1360 1361 out->cd_offset += ctx->gcm_tag_len; 1362 /* Clear sensitive data in the context before returning. */ 1363 gcm_clear_ctx(ctx); 1364 return (CRYPTO_SUCCESS); 1365 } 1366 1367 /* 1368 * Finalize decryption: We just have accumulated crypto text, so now we 1369 * decrypt it here inplace. 1370 */ 1371 static int 1372 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1373 { 1374 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); 1375 ASSERT3U(block_size, ==, 16); 1376 1377 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1378 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 1379 uint8_t *datap = ctx->gcm_pt_buf; 1380 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1381 uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1382 uint64_t *ghash = ctx->gcm_ghash; 1383 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; 1384 int rv = CRYPTO_SUCCESS; 1385 size_t bleft, done; 1386 1387 /* 1388 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be 1389 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of 1390 * GCM_AVX_MIN_DECRYPT_BYTES. 1391 */ 1392 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { 1393 kfpu_begin(); 1394 done = aesni_gcm_decrypt(datap, datap, chunk_size, 1395 (const void *)key, ctx->gcm_cb, ghash); 1396 clear_fpu_regs(); 1397 kfpu_end(); 1398 if (done != chunk_size) { 1399 return (CRYPTO_FAILED); 1400 } 1401 datap += done; 1402 } 1403 /* Decrypt remainder, which is less than chunk size, in one go. */ 1404 kfpu_begin(); 1405 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { 1406 done = aesni_gcm_decrypt(datap, datap, bleft, 1407 (const void *)key, ctx->gcm_cb, ghash); 1408 if (done == 0) { 1409 clear_fpu_regs(); 1410 kfpu_end(); 1411 return (CRYPTO_FAILED); 1412 } 1413 datap += done; 1414 bleft -= done; 1415 } 1416 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); 1417 1418 /* 1419 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain, 1420 * decrypt them block by block. 1421 */ 1422 while (bleft > 0) { 1423 /* Incomplete last block. */ 1424 if (bleft < block_size) { 1425 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; 1426 1427 bzero(lastb, block_size); 1428 bcopy(datap, lastb, bleft); 1429 /* The GCM processing. */ 1430 GHASH_AVX(ctx, lastb, block_size); 1431 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1432 for (size_t i = 0; i < bleft; i++) { 1433 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; 1434 } 1435 break; 1436 } 1437 /* The GCM processing. */ 1438 GHASH_AVX(ctx, datap, block_size); 1439 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1440 gcm_xor_avx((uint8_t *)tmp, datap); 1441 gcm_incr_counter_block(ctx); 1442 1443 datap += block_size; 1444 bleft -= block_size; 1445 } 1446 if (rv != CRYPTO_SUCCESS) { 1447 clear_fpu_regs(); 1448 kfpu_end(); 1449 return (rv); 1450 } 1451 /* Decryption done, finish the tag. */ 1452 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 1453 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); 1454 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, 1455 (uint32_t *)ctx->gcm_J0); 1456 1457 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); 1458 1459 /* We are done with the FPU, restore its state. */ 1460 clear_fpu_regs(); 1461 kfpu_end(); 1462 1463 /* Compare the input authentication tag with what we calculated. */ 1464 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 1465 /* They don't match. */ 1466 return (CRYPTO_INVALID_MAC); 1467 } 1468 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 1469 if (rv != CRYPTO_SUCCESS) { 1470 return (rv); 1471 } 1472 out->cd_offset += pt_len; 1473 gcm_clear_ctx(ctx); 1474 return (CRYPTO_SUCCESS); 1475 } 1476 1477 /* 1478 * Initialize the GCM params H, Htabtle and the counter block. Save the 1479 * initial counter block. 1480 */ 1481 static int 1482 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 1483 unsigned char *auth_data, size_t auth_data_len, size_t block_size) 1484 { 1485 uint8_t *cb = (uint8_t *)ctx->gcm_cb; 1486 uint64_t *H = ctx->gcm_H; 1487 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1488 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; 1489 uint8_t *datap = auth_data; 1490 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1491 size_t bleft; 1492 1493 ASSERT(block_size == GCM_BLOCK_LEN); 1494 1495 /* Init H (encrypt zero block) and create the initial counter block. */ 1496 bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); 1497 bzero(H, sizeof (ctx->gcm_H)); 1498 kfpu_begin(); 1499 aes_encrypt_intel(keysched, aes_rounds, 1500 (const uint32_t *)H, (uint32_t *)H); 1501 1502 gcm_init_htab_avx(ctx->gcm_Htable, H); 1503 1504 if (iv_len == 12) { 1505 bcopy(iv, cb, 12); 1506 cb[12] = 0; 1507 cb[13] = 0; 1508 cb[14] = 0; 1509 cb[15] = 1; 1510 /* We need the ICB later. */ 1511 bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1512 } else { 1513 /* 1514 * Most consumers use 12 byte IVs, so it's OK to use the 1515 * original routines for other IV sizes, just avoid nesting 1516 * kfpu_begin calls. 1517 */ 1518 clear_fpu_regs(); 1519 kfpu_end(); 1520 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 1521 aes_copy_block, aes_xor_block); 1522 kfpu_begin(); 1523 } 1524 1525 /* Openssl post increments the counter, adjust for that. */ 1526 gcm_incr_counter_block(ctx); 1527 1528 /* Ghash AAD in chunk_size blocks. */ 1529 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { 1530 GHASH_AVX(ctx, datap, chunk_size); 1531 datap += chunk_size; 1532 clear_fpu_regs(); 1533 kfpu_end(); 1534 kfpu_begin(); 1535 } 1536 /* Ghash the remainder and handle possible incomplete GCM block. */ 1537 if (bleft > 0) { 1538 size_t incomp = bleft % block_size; 1539 1540 bleft -= incomp; 1541 if (bleft > 0) { 1542 GHASH_AVX(ctx, datap, bleft); 1543 datap += bleft; 1544 } 1545 if (incomp > 0) { 1546 /* Zero pad and hash incomplete last block. */ 1547 uint8_t *authp = (uint8_t *)ctx->gcm_tmp; 1548 1549 bzero(authp, block_size); 1550 bcopy(datap, authp, incomp); 1551 GHASH_AVX(ctx, authp, block_size); 1552 } 1553 } 1554 clear_fpu_regs(); 1555 kfpu_end(); 1556 return (CRYPTO_SUCCESS); 1557 } 1558 1559 #if defined(_KERNEL) 1560 static int 1561 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) 1562 { 1563 unsigned long val; 1564 char val_rounded[16]; 1565 int error = 0; 1566 1567 error = kstrtoul(buf, 0, &val); 1568 if (error) 1569 return (error); 1570 1571 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1572 1573 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) 1574 return (-EINVAL); 1575 1576 snprintf(val_rounded, 16, "%u", (uint32_t)val); 1577 error = param_set_uint(val_rounded, kp); 1578 return (error); 1579 } 1580 1581 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, 1582 param_get_uint, &gcm_avx_chunk_size, 0644); 1583 1584 MODULE_PARM_DESC(icp_gcm_avx_chunk_size, 1585 "How many bytes to process while owning the FPU"); 1586 1587 #endif /* defined(__KERNEL) */ 1588 #endif /* ifdef CAN_USE_GCM_ASM */ 1589