1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/zfs_context.h> 26 #include <modes/modes.h> 27 #include <sys/crypto/common.h> 28 #include <sys/crypto/icp.h> 29 #include <sys/crypto/impl.h> 30 #include <sys/byteorder.h> 31 #include <sys/simd.h> 32 #include <modes/gcm_impl.h> 33 #ifdef CAN_USE_GCM_ASM 34 #include <aes/aes_impl.h> 35 #endif 36 37 #define GHASH(c, d, t, o) \ 38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ 39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ 40 (uint64_t *)(void *)(t)); 41 42 /* Select GCM implementation */ 43 #define IMPL_FASTEST (UINT32_MAX) 44 #define IMPL_CYCLE (UINT32_MAX-1) 45 #ifdef CAN_USE_GCM_ASM 46 #define IMPL_AVX (UINT32_MAX-2) 47 #endif 48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 49 static uint32_t icp_gcm_impl = IMPL_FASTEST; 50 static uint32_t user_sel_impl = IMPL_FASTEST; 51 52 #ifdef CAN_USE_GCM_ASM 53 /* Does the architecture we run on support the MOVBE instruction? */ 54 boolean_t gcm_avx_can_use_movbe = B_FALSE; 55 /* 56 * Whether to use the optimized openssl gcm and ghash implementations. 57 * Set to true if module parameter icp_gcm_impl == "avx". 58 */ 59 static boolean_t gcm_use_avx = B_FALSE; 60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) 61 62 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); 63 64 static inline boolean_t gcm_avx_will_work(void); 65 static inline void gcm_set_avx(boolean_t); 66 static inline boolean_t gcm_toggle_avx(void); 67 static inline size_t gcm_simd_get_htab_size(boolean_t); 68 69 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, 70 crypto_data_t *, size_t); 71 72 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 73 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 74 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, 75 size_t, size_t); 76 #endif /* ifdef CAN_USE_GCM_ASM */ 77 78 /* 79 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode 80 * is done in another function. 81 */ 82 int 83 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 84 crypto_data_t *out, size_t block_size, 85 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 86 void (*copy_block)(uint8_t *, uint8_t *), 87 void (*xor_block)(uint8_t *, uint8_t *)) 88 { 89 #ifdef CAN_USE_GCM_ASM 90 if (ctx->gcm_use_avx == B_TRUE) 91 return (gcm_mode_encrypt_contiguous_blocks_avx( 92 ctx, data, length, out, block_size)); 93 #endif 94 95 const gcm_impl_ops_t *gops; 96 size_t remainder = length; 97 size_t need = 0; 98 uint8_t *datap = (uint8_t *)data; 99 uint8_t *blockp; 100 uint8_t *lastp; 101 void *iov_or_mp; 102 offset_t offset; 103 uint8_t *out_data_1; 104 uint8_t *out_data_2; 105 size_t out_data_1_len; 106 uint64_t counter; 107 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 108 109 if (length + ctx->gcm_remainder_len < block_size) { 110 /* accumulate bytes here and return */ 111 bcopy(datap, 112 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, 113 length); 114 ctx->gcm_remainder_len += length; 115 if (ctx->gcm_copy_to == NULL) { 116 ctx->gcm_copy_to = datap; 117 } 118 return (CRYPTO_SUCCESS); 119 } 120 121 lastp = (uint8_t *)ctx->gcm_cb; 122 crypto_init_ptrs(out, &iov_or_mp, &offset); 123 124 gops = gcm_impl_get_ops(); 125 do { 126 /* Unprocessed data from last call. */ 127 if (ctx->gcm_remainder_len > 0) { 128 need = block_size - ctx->gcm_remainder_len; 129 130 if (need > remainder) 131 return (CRYPTO_DATA_LEN_RANGE); 132 133 bcopy(datap, &((uint8_t *)ctx->gcm_remainder) 134 [ctx->gcm_remainder_len], need); 135 136 blockp = (uint8_t *)ctx->gcm_remainder; 137 } else { 138 blockp = datap; 139 } 140 141 /* 142 * Increment counter. Counter bits are confined 143 * to the bottom 32 bits of the counter block. 144 */ 145 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 146 counter = htonll(counter + 1); 147 counter &= counter_mask; 148 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 149 150 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 151 (uint8_t *)ctx->gcm_tmp); 152 xor_block(blockp, (uint8_t *)ctx->gcm_tmp); 153 154 lastp = (uint8_t *)ctx->gcm_tmp; 155 156 ctx->gcm_processed_data_len += block_size; 157 158 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, 159 &out_data_1_len, &out_data_2, block_size); 160 161 /* copy block to where it belongs */ 162 if (out_data_1_len == block_size) { 163 copy_block(lastp, out_data_1); 164 } else { 165 bcopy(lastp, out_data_1, out_data_1_len); 166 if (out_data_2 != NULL) { 167 bcopy(lastp + out_data_1_len, 168 out_data_2, 169 block_size - out_data_1_len); 170 } 171 } 172 /* update offset */ 173 out->cd_offset += block_size; 174 175 /* add ciphertext to the hash */ 176 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); 177 178 /* Update pointer to next block of data to be processed. */ 179 if (ctx->gcm_remainder_len != 0) { 180 datap += need; 181 ctx->gcm_remainder_len = 0; 182 } else { 183 datap += block_size; 184 } 185 186 remainder = (size_t)&data[length] - (size_t)datap; 187 188 /* Incomplete last block. */ 189 if (remainder > 0 && remainder < block_size) { 190 bcopy(datap, ctx->gcm_remainder, remainder); 191 ctx->gcm_remainder_len = remainder; 192 ctx->gcm_copy_to = datap; 193 goto out; 194 } 195 ctx->gcm_copy_to = NULL; 196 197 } while (remainder > 0); 198 out: 199 return (CRYPTO_SUCCESS); 200 } 201 202 /* ARGSUSED */ 203 int 204 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 205 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 206 void (*copy_block)(uint8_t *, uint8_t *), 207 void (*xor_block)(uint8_t *, uint8_t *)) 208 { 209 #ifdef CAN_USE_GCM_ASM 210 if (ctx->gcm_use_avx == B_TRUE) 211 return (gcm_encrypt_final_avx(ctx, out, block_size)); 212 #endif 213 214 const gcm_impl_ops_t *gops; 215 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 216 uint8_t *ghash, *macp = NULL; 217 int i, rv; 218 219 if (out->cd_length < 220 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { 221 return (CRYPTO_DATA_LEN_RANGE); 222 } 223 224 gops = gcm_impl_get_ops(); 225 ghash = (uint8_t *)ctx->gcm_ghash; 226 227 if (ctx->gcm_remainder_len > 0) { 228 uint64_t counter; 229 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; 230 231 /* 232 * Here is where we deal with data that is not a 233 * multiple of the block size. 234 */ 235 236 /* 237 * Increment counter. 238 */ 239 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 240 counter = htonll(counter + 1); 241 counter &= counter_mask; 242 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 243 244 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 245 (uint8_t *)ctx->gcm_tmp); 246 247 macp = (uint8_t *)ctx->gcm_remainder; 248 bzero(macp + ctx->gcm_remainder_len, 249 block_size - ctx->gcm_remainder_len); 250 251 /* XOR with counter block */ 252 for (i = 0; i < ctx->gcm_remainder_len; i++) { 253 macp[i] ^= tmpp[i]; 254 } 255 256 /* add ciphertext to the hash */ 257 GHASH(ctx, macp, ghash, gops); 258 259 ctx->gcm_processed_data_len += ctx->gcm_remainder_len; 260 } 261 262 ctx->gcm_len_a_len_c[1] = 263 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 264 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 265 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 266 (uint8_t *)ctx->gcm_J0); 267 xor_block((uint8_t *)ctx->gcm_J0, ghash); 268 269 if (ctx->gcm_remainder_len > 0) { 270 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); 271 if (rv != CRYPTO_SUCCESS) 272 return (rv); 273 } 274 out->cd_offset += ctx->gcm_remainder_len; 275 ctx->gcm_remainder_len = 0; 276 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 277 if (rv != CRYPTO_SUCCESS) 278 return (rv); 279 out->cd_offset += ctx->gcm_tag_len; 280 281 return (CRYPTO_SUCCESS); 282 } 283 284 /* 285 * This will only deal with decrypting the last block of the input that 286 * might not be a multiple of block length. 287 */ 288 static void 289 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, 290 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 291 void (*xor_block)(uint8_t *, uint8_t *)) 292 { 293 uint8_t *datap, *outp, *counterp; 294 uint64_t counter; 295 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 296 int i; 297 298 /* 299 * Increment counter. 300 * Counter bits are confined to the bottom 32 bits 301 */ 302 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 303 counter = htonll(counter + 1); 304 counter &= counter_mask; 305 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 306 307 datap = (uint8_t *)ctx->gcm_remainder; 308 outp = &((ctx->gcm_pt_buf)[index]); 309 counterp = (uint8_t *)ctx->gcm_tmp; 310 311 /* authentication tag */ 312 bzero((uint8_t *)ctx->gcm_tmp, block_size); 313 bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); 314 315 /* add ciphertext to the hash */ 316 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); 317 318 /* decrypt remaining ciphertext */ 319 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); 320 321 /* XOR with counter block */ 322 for (i = 0; i < ctx->gcm_remainder_len; i++) { 323 outp[i] = datap[i] ^ counterp[i]; 324 } 325 } 326 327 /* ARGSUSED */ 328 int 329 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 330 crypto_data_t *out, size_t block_size, 331 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 332 void (*copy_block)(uint8_t *, uint8_t *), 333 void (*xor_block)(uint8_t *, uint8_t *)) 334 { 335 size_t new_len; 336 uint8_t *new; 337 338 /* 339 * Copy contiguous ciphertext input blocks to plaintext buffer. 340 * Ciphertext will be decrypted in the final. 341 */ 342 if (length > 0) { 343 new_len = ctx->gcm_pt_buf_len + length; 344 new = vmem_alloc(new_len, ctx->gcm_kmflag); 345 if (new == NULL) { 346 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 347 ctx->gcm_pt_buf = NULL; 348 return (CRYPTO_HOST_MEMORY); 349 } 350 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); 351 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 352 ctx->gcm_pt_buf = new; 353 ctx->gcm_pt_buf_len = new_len; 354 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], 355 length); 356 ctx->gcm_processed_data_len += length; 357 } 358 359 ctx->gcm_remainder_len = 0; 360 return (CRYPTO_SUCCESS); 361 } 362 363 int 364 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 365 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 366 void (*xor_block)(uint8_t *, uint8_t *)) 367 { 368 #ifdef CAN_USE_GCM_ASM 369 if (ctx->gcm_use_avx == B_TRUE) 370 return (gcm_decrypt_final_avx(ctx, out, block_size)); 371 #endif 372 373 const gcm_impl_ops_t *gops; 374 size_t pt_len; 375 size_t remainder; 376 uint8_t *ghash; 377 uint8_t *blockp; 378 uint8_t *cbp; 379 uint64_t counter; 380 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 381 int processed = 0, rv; 382 383 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); 384 385 gops = gcm_impl_get_ops(); 386 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 387 ghash = (uint8_t *)ctx->gcm_ghash; 388 blockp = ctx->gcm_pt_buf; 389 remainder = pt_len; 390 while (remainder > 0) { 391 /* Incomplete last block */ 392 if (remainder < block_size) { 393 bcopy(blockp, ctx->gcm_remainder, remainder); 394 ctx->gcm_remainder_len = remainder; 395 /* 396 * not expecting anymore ciphertext, just 397 * compute plaintext for the remaining input 398 */ 399 gcm_decrypt_incomplete_block(ctx, block_size, 400 processed, encrypt_block, xor_block); 401 ctx->gcm_remainder_len = 0; 402 goto out; 403 } 404 /* add ciphertext to the hash */ 405 GHASH(ctx, blockp, ghash, gops); 406 407 /* 408 * Increment counter. 409 * Counter bits are confined to the bottom 32 bits 410 */ 411 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 412 counter = htonll(counter + 1); 413 counter &= counter_mask; 414 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 415 416 cbp = (uint8_t *)ctx->gcm_tmp; 417 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); 418 419 /* XOR with ciphertext */ 420 xor_block(cbp, blockp); 421 422 processed += block_size; 423 blockp += block_size; 424 remainder -= block_size; 425 } 426 out: 427 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 428 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 429 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 430 (uint8_t *)ctx->gcm_J0); 431 xor_block((uint8_t *)ctx->gcm_J0, ghash); 432 433 /* compare the input authentication tag with what we calculated */ 434 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 435 /* They don't match */ 436 return (CRYPTO_INVALID_MAC); 437 } else { 438 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 439 if (rv != CRYPTO_SUCCESS) 440 return (rv); 441 out->cd_offset += pt_len; 442 } 443 return (CRYPTO_SUCCESS); 444 } 445 446 static int 447 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) 448 { 449 size_t tag_len; 450 451 /* 452 * Check the length of the authentication tag (in bits). 453 */ 454 tag_len = gcm_param->ulTagBits; 455 switch (tag_len) { 456 case 32: 457 case 64: 458 case 96: 459 case 104: 460 case 112: 461 case 120: 462 case 128: 463 break; 464 default: 465 return (CRYPTO_MECHANISM_PARAM_INVALID); 466 } 467 468 if (gcm_param->ulIvLen == 0) 469 return (CRYPTO_MECHANISM_PARAM_INVALID); 470 471 return (CRYPTO_SUCCESS); 472 } 473 474 static void 475 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, 476 gcm_ctx_t *ctx, size_t block_size, 477 void (*copy_block)(uint8_t *, uint8_t *), 478 void (*xor_block)(uint8_t *, uint8_t *)) 479 { 480 const gcm_impl_ops_t *gops; 481 uint8_t *cb; 482 ulong_t remainder = iv_len; 483 ulong_t processed = 0; 484 uint8_t *datap, *ghash; 485 uint64_t len_a_len_c[2]; 486 487 gops = gcm_impl_get_ops(); 488 ghash = (uint8_t *)ctx->gcm_ghash; 489 cb = (uint8_t *)ctx->gcm_cb; 490 if (iv_len == 12) { 491 bcopy(iv, cb, 12); 492 cb[12] = 0; 493 cb[13] = 0; 494 cb[14] = 0; 495 cb[15] = 1; 496 /* J0 will be used again in the final */ 497 copy_block(cb, (uint8_t *)ctx->gcm_J0); 498 } else { 499 /* GHASH the IV */ 500 do { 501 if (remainder < block_size) { 502 bzero(cb, block_size); 503 bcopy(&(iv[processed]), cb, remainder); 504 datap = (uint8_t *)cb; 505 remainder = 0; 506 } else { 507 datap = (uint8_t *)(&(iv[processed])); 508 processed += block_size; 509 remainder -= block_size; 510 } 511 GHASH(ctx, datap, ghash, gops); 512 } while (remainder > 0); 513 514 len_a_len_c[0] = 0; 515 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); 516 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); 517 518 /* J0 will be used again in the final */ 519 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); 520 } 521 } 522 523 static int 524 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 525 unsigned char *auth_data, size_t auth_data_len, size_t block_size, 526 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 527 void (*copy_block)(uint8_t *, uint8_t *), 528 void (*xor_block)(uint8_t *, uint8_t *)) 529 { 530 const gcm_impl_ops_t *gops; 531 uint8_t *ghash, *datap, *authp; 532 size_t remainder, processed; 533 534 /* encrypt zero block to get subkey H */ 535 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 536 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, 537 (uint8_t *)ctx->gcm_H); 538 539 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 540 copy_block, xor_block); 541 542 gops = gcm_impl_get_ops(); 543 authp = (uint8_t *)ctx->gcm_tmp; 544 ghash = (uint8_t *)ctx->gcm_ghash; 545 bzero(authp, block_size); 546 bzero(ghash, block_size); 547 548 processed = 0; 549 remainder = auth_data_len; 550 do { 551 if (remainder < block_size) { 552 /* 553 * There's not a block full of data, pad rest of 554 * buffer with zero 555 */ 556 bzero(authp, block_size); 557 bcopy(&(auth_data[processed]), authp, remainder); 558 datap = (uint8_t *)authp; 559 remainder = 0; 560 } else { 561 datap = (uint8_t *)(&(auth_data[processed])); 562 processed += block_size; 563 remainder -= block_size; 564 } 565 566 /* add auth data to the hash */ 567 GHASH(ctx, datap, ghash, gops); 568 569 } while (remainder > 0); 570 571 return (CRYPTO_SUCCESS); 572 } 573 574 /* 575 * The following function is called at encrypt or decrypt init time 576 * for AES GCM mode. 577 * 578 * Init the GCM context struct. Handle the cycle and avx implementations here. 579 */ 580 int 581 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 582 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 583 void (*copy_block)(uint8_t *, uint8_t *), 584 void (*xor_block)(uint8_t *, uint8_t *)) 585 { 586 int rv; 587 CK_AES_GCM_PARAMS *gcm_param; 588 589 if (param != NULL) { 590 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; 591 592 if ((rv = gcm_validate_args(gcm_param)) != 0) { 593 return (rv); 594 } 595 596 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; 597 gcm_ctx->gcm_tag_len >>= 3; 598 gcm_ctx->gcm_processed_data_len = 0; 599 600 /* these values are in bits */ 601 gcm_ctx->gcm_len_a_len_c[0] 602 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); 603 604 rv = CRYPTO_SUCCESS; 605 gcm_ctx->gcm_flags |= GCM_MODE; 606 } else { 607 return (CRYPTO_MECHANISM_PARAM_INVALID); 608 } 609 610 #ifdef CAN_USE_GCM_ASM 611 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 612 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 613 } else { 614 /* 615 * Handle the "cycle" implementation by creating avx and 616 * non-avx contexts alternately. 617 */ 618 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 619 /* 620 * We don't handle byte swapped key schedules in the avx 621 * code path. 622 */ 623 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 624 if (ks->ops->needs_byteswap == B_TRUE) { 625 gcm_ctx->gcm_use_avx = B_FALSE; 626 } 627 /* Use the MOVBE and the BSWAP variants alternately. */ 628 if (gcm_ctx->gcm_use_avx == B_TRUE && 629 zfs_movbe_available() == B_TRUE) { 630 (void) atomic_toggle_boolean_nv( 631 (volatile boolean_t *)&gcm_avx_can_use_movbe); 632 } 633 } 634 /* Allocate Htab memory as needed. */ 635 if (gcm_ctx->gcm_use_avx == B_TRUE) { 636 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 637 638 if (htab_len == 0) { 639 return (CRYPTO_MECHANISM_PARAM_INVALID); 640 } 641 gcm_ctx->gcm_htab_len = htab_len; 642 gcm_ctx->gcm_Htable = 643 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); 644 645 if (gcm_ctx->gcm_Htable == NULL) { 646 return (CRYPTO_HOST_MEMORY); 647 } 648 } 649 /* Avx and non avx context initialization differs from here on. */ 650 if (gcm_ctx->gcm_use_avx == B_FALSE) { 651 #endif /* ifdef CAN_USE_GCM_ASM */ 652 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 653 gcm_param->pAAD, gcm_param->ulAADLen, block_size, 654 encrypt_block, copy_block, xor_block) != 0) { 655 rv = CRYPTO_MECHANISM_PARAM_INVALID; 656 } 657 #ifdef CAN_USE_GCM_ASM 658 } else { 659 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 660 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { 661 rv = CRYPTO_MECHANISM_PARAM_INVALID; 662 } 663 } 664 #endif /* ifdef CAN_USE_GCM_ASM */ 665 666 return (rv); 667 } 668 669 int 670 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 671 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 672 void (*copy_block)(uint8_t *, uint8_t *), 673 void (*xor_block)(uint8_t *, uint8_t *)) 674 { 675 int rv; 676 CK_AES_GMAC_PARAMS *gmac_param; 677 678 if (param != NULL) { 679 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; 680 681 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); 682 gcm_ctx->gcm_processed_data_len = 0; 683 684 /* these values are in bits */ 685 gcm_ctx->gcm_len_a_len_c[0] 686 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); 687 688 rv = CRYPTO_SUCCESS; 689 gcm_ctx->gcm_flags |= GMAC_MODE; 690 } else { 691 return (CRYPTO_MECHANISM_PARAM_INVALID); 692 } 693 694 #ifdef CAN_USE_GCM_ASM 695 /* 696 * Handle the "cycle" implementation by creating avx and non avx 697 * contexts alternately. 698 */ 699 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 700 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 701 } else { 702 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 703 } 704 /* We don't handle byte swapped key schedules in the avx code path. */ 705 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 706 if (ks->ops->needs_byteswap == B_TRUE) { 707 gcm_ctx->gcm_use_avx = B_FALSE; 708 } 709 /* Allocate Htab memory as needed. */ 710 if (gcm_ctx->gcm_use_avx == B_TRUE) { 711 size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); 712 713 if (htab_len == 0) { 714 return (CRYPTO_MECHANISM_PARAM_INVALID); 715 } 716 gcm_ctx->gcm_htab_len = htab_len; 717 gcm_ctx->gcm_Htable = 718 (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); 719 720 if (gcm_ctx->gcm_Htable == NULL) { 721 return (CRYPTO_HOST_MEMORY); 722 } 723 } 724 725 /* Avx and non avx context initialization differs from here on. */ 726 if (gcm_ctx->gcm_use_avx == B_FALSE) { 727 #endif /* ifdef CAN_USE_GCM_ASM */ 728 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 729 gmac_param->pAAD, gmac_param->ulAADLen, block_size, 730 encrypt_block, copy_block, xor_block) != 0) { 731 rv = CRYPTO_MECHANISM_PARAM_INVALID; 732 } 733 #ifdef CAN_USE_GCM_ASM 734 } else { 735 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 736 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { 737 rv = CRYPTO_MECHANISM_PARAM_INVALID; 738 } 739 } 740 #endif /* ifdef CAN_USE_GCM_ASM */ 741 742 return (rv); 743 } 744 745 void * 746 gcm_alloc_ctx(int kmflag) 747 { 748 gcm_ctx_t *gcm_ctx; 749 750 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 751 return (NULL); 752 753 gcm_ctx->gcm_flags = GCM_MODE; 754 return (gcm_ctx); 755 } 756 757 void * 758 gmac_alloc_ctx(int kmflag) 759 { 760 gcm_ctx_t *gcm_ctx; 761 762 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 763 return (NULL); 764 765 gcm_ctx->gcm_flags = GMAC_MODE; 766 return (gcm_ctx); 767 } 768 769 void 770 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) 771 { 772 ctx->gcm_kmflag = kmflag; 773 } 774 775 /* GCM implementation that contains the fastest methods */ 776 static gcm_impl_ops_t gcm_fastest_impl = { 777 .name = "fastest" 778 }; 779 780 /* All compiled in implementations */ 781 const gcm_impl_ops_t *gcm_all_impl[] = { 782 &gcm_generic_impl, 783 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 784 &gcm_pclmulqdq_impl, 785 #endif 786 }; 787 788 /* Indicate that benchmark has been completed */ 789 static boolean_t gcm_impl_initialized = B_FALSE; 790 791 /* Hold all supported implementations */ 792 static size_t gcm_supp_impl_cnt = 0; 793 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; 794 795 /* 796 * Returns the GCM operations for encrypt/decrypt/key setup. When a 797 * SIMD implementation is not allowed in the current context, then 798 * fallback to the fastest generic implementation. 799 */ 800 const gcm_impl_ops_t * 801 gcm_impl_get_ops() 802 { 803 if (!kfpu_allowed()) 804 return (&gcm_generic_impl); 805 806 const gcm_impl_ops_t *ops = NULL; 807 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 808 809 switch (impl) { 810 case IMPL_FASTEST: 811 ASSERT(gcm_impl_initialized); 812 ops = &gcm_fastest_impl; 813 break; 814 case IMPL_CYCLE: 815 /* Cycle through supported implementations */ 816 ASSERT(gcm_impl_initialized); 817 ASSERT3U(gcm_supp_impl_cnt, >, 0); 818 static size_t cycle_impl_idx = 0; 819 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; 820 ops = gcm_supp_impl[idx]; 821 break; 822 #ifdef CAN_USE_GCM_ASM 823 case IMPL_AVX: 824 /* 825 * Make sure that we return a valid implementation while 826 * switching to the avx implementation since there still 827 * may be unfinished non-avx contexts around. 828 */ 829 ops = &gcm_generic_impl; 830 break; 831 #endif 832 default: 833 ASSERT3U(impl, <, gcm_supp_impl_cnt); 834 ASSERT3U(gcm_supp_impl_cnt, >, 0); 835 if (impl < ARRAY_SIZE(gcm_all_impl)) 836 ops = gcm_supp_impl[impl]; 837 break; 838 } 839 840 ASSERT3P(ops, !=, NULL); 841 842 return (ops); 843 } 844 845 /* 846 * Initialize all supported implementations. 847 */ 848 void 849 gcm_impl_init(void) 850 { 851 gcm_impl_ops_t *curr_impl; 852 int i, c; 853 854 /* Move supported implementations into gcm_supp_impls */ 855 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { 856 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; 857 858 if (curr_impl->is_supported()) 859 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; 860 } 861 gcm_supp_impl_cnt = c; 862 863 /* 864 * Set the fastest implementation given the assumption that the 865 * hardware accelerated version is the fastest. 866 */ 867 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 868 if (gcm_pclmulqdq_impl.is_supported()) { 869 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, 870 sizeof (gcm_fastest_impl)); 871 } else 872 #endif 873 { 874 memcpy(&gcm_fastest_impl, &gcm_generic_impl, 875 sizeof (gcm_fastest_impl)); 876 } 877 878 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); 879 880 #ifdef CAN_USE_GCM_ASM 881 /* 882 * Use the avx implementation if it's available and the implementation 883 * hasn't changed from its default value of fastest on module load. 884 */ 885 if (gcm_avx_will_work()) { 886 #ifdef HAVE_MOVBE 887 if (zfs_movbe_available() == B_TRUE) { 888 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); 889 } 890 #endif 891 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { 892 gcm_set_avx(B_TRUE); 893 } 894 } 895 #endif 896 /* Finish initialization */ 897 atomic_swap_32(&icp_gcm_impl, user_sel_impl); 898 gcm_impl_initialized = B_TRUE; 899 } 900 901 static const struct { 902 char *name; 903 uint32_t sel; 904 } gcm_impl_opts[] = { 905 { "cycle", IMPL_CYCLE }, 906 { "fastest", IMPL_FASTEST }, 907 #ifdef CAN_USE_GCM_ASM 908 { "avx", IMPL_AVX }, 909 #endif 910 }; 911 912 /* 913 * Function sets desired gcm implementation. 914 * 915 * If we are called before init(), user preference will be saved in 916 * user_sel_impl, and applied in later init() call. This occurs when module 917 * parameter is specified on module load. Otherwise, directly update 918 * icp_gcm_impl. 919 * 920 * @val Name of gcm implementation to use 921 * @param Unused. 922 */ 923 int 924 gcm_impl_set(const char *val) 925 { 926 int err = -EINVAL; 927 char req_name[GCM_IMPL_NAME_MAX]; 928 uint32_t impl = GCM_IMPL_READ(user_sel_impl); 929 size_t i; 930 931 /* sanitize input */ 932 i = strnlen(val, GCM_IMPL_NAME_MAX); 933 if (i == 0 || i >= GCM_IMPL_NAME_MAX) 934 return (err); 935 936 strlcpy(req_name, val, GCM_IMPL_NAME_MAX); 937 while (i > 0 && isspace(req_name[i-1])) 938 i--; 939 req_name[i] = '\0'; 940 941 /* Check mandatory options */ 942 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 943 #ifdef CAN_USE_GCM_ASM 944 /* Ignore avx implementation if it won't work. */ 945 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 946 continue; 947 } 948 #endif 949 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { 950 impl = gcm_impl_opts[i].sel; 951 err = 0; 952 break; 953 } 954 } 955 956 /* check all supported impl if init() was already called */ 957 if (err != 0 && gcm_impl_initialized) { 958 /* check all supported implementations */ 959 for (i = 0; i < gcm_supp_impl_cnt; i++) { 960 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { 961 impl = i; 962 err = 0; 963 break; 964 } 965 } 966 } 967 #ifdef CAN_USE_GCM_ASM 968 /* 969 * Use the avx implementation if available and the requested one is 970 * avx or fastest. 971 */ 972 if (gcm_avx_will_work() == B_TRUE && 973 (impl == IMPL_AVX || impl == IMPL_FASTEST)) { 974 gcm_set_avx(B_TRUE); 975 } else { 976 gcm_set_avx(B_FALSE); 977 } 978 #endif 979 980 if (err == 0) { 981 if (gcm_impl_initialized) 982 atomic_swap_32(&icp_gcm_impl, impl); 983 else 984 atomic_swap_32(&user_sel_impl, impl); 985 } 986 987 return (err); 988 } 989 990 #if defined(_KERNEL) && defined(__linux__) 991 992 static int 993 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) 994 { 995 return (gcm_impl_set(val)); 996 } 997 998 static int 999 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) 1000 { 1001 int i, cnt = 0; 1002 char *fmt; 1003 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 1004 1005 ASSERT(gcm_impl_initialized); 1006 1007 /* list mandatory options */ 1008 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 1009 #ifdef CAN_USE_GCM_ASM 1010 /* Ignore avx implementation if it won't work. */ 1011 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 1012 continue; 1013 } 1014 #endif 1015 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; 1016 cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); 1017 } 1018 1019 /* list all supported implementations */ 1020 for (i = 0; i < gcm_supp_impl_cnt; i++) { 1021 fmt = (i == impl) ? "[%s] " : "%s "; 1022 cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); 1023 } 1024 1025 return (cnt); 1026 } 1027 1028 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, 1029 NULL, 0644); 1030 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); 1031 #endif /* defined(__KERNEL) */ 1032 1033 #ifdef CAN_USE_GCM_ASM 1034 #define GCM_BLOCK_LEN 16 1035 /* 1036 * The openssl asm routines are 6x aggregated and need that many bytes 1037 * at minimum. 1038 */ 1039 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) 1040 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) 1041 /* 1042 * Ensure the chunk size is reasonable since we are allocating a 1043 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. 1044 */ 1045 #define GCM_AVX_MAX_CHUNK_SIZE \ 1046 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) 1047 1048 /* Get the chunk size module parameter. */ 1049 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size 1050 1051 /* Clear the FPU registers since they hold sensitive internal state. */ 1052 #define clear_fpu_regs() clear_fpu_regs_avx() 1053 #define GHASH_AVX(ctx, in, len) \ 1054 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ 1055 in, len) 1056 1057 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) 1058 1059 /* 1060 * Module parameter: number of bytes to process at once while owning the FPU. 1061 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is 1062 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. 1063 */ 1064 static uint32_t gcm_avx_chunk_size = 1065 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1066 1067 extern void clear_fpu_regs_avx(void); 1068 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); 1069 extern void aes_encrypt_intel(const uint32_t rk[], int nr, 1070 const uint32_t pt[4], uint32_t ct[4]); 1071 1072 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); 1073 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, 1074 const uint8_t *in, size_t len); 1075 1076 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, 1077 const void *, uint64_t *, uint64_t *); 1078 1079 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, 1080 const void *, uint64_t *, uint64_t *); 1081 1082 static inline boolean_t 1083 gcm_avx_will_work(void) 1084 { 1085 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ 1086 return (kfpu_allowed() && 1087 zfs_avx_available() && zfs_aes_available() && 1088 zfs_pclmulqdq_available()); 1089 } 1090 1091 static inline void 1092 gcm_set_avx(boolean_t val) 1093 { 1094 if (gcm_avx_will_work() == B_TRUE) { 1095 atomic_swap_32(&gcm_use_avx, val); 1096 } 1097 } 1098 1099 static inline boolean_t 1100 gcm_toggle_avx(void) 1101 { 1102 if (gcm_avx_will_work() == B_TRUE) { 1103 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); 1104 } else { 1105 return (B_FALSE); 1106 } 1107 } 1108 1109 static inline size_t 1110 gcm_simd_get_htab_size(boolean_t simd_mode) 1111 { 1112 switch (simd_mode) { 1113 case B_TRUE: 1114 return (2 * 6 * 2 * sizeof (uint64_t)); 1115 1116 default: 1117 return (0); 1118 } 1119 } 1120 1121 /* 1122 * Clear sensitive data in the context. 1123 * 1124 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and 1125 * ctx->gcm_Htable contain the hash sub key which protects authentication. 1126 * 1127 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for 1128 * a known plaintext attack, they consists of the IV and the first and last 1129 * counter respectively. If they should be cleared is debatable. 1130 */ 1131 static inline void 1132 gcm_clear_ctx(gcm_ctx_t *ctx) 1133 { 1134 bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); 1135 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 1136 bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1137 bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); 1138 } 1139 1140 /* Increment the GCM counter block by n. */ 1141 static inline void 1142 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) 1143 { 1144 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 1145 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); 1146 1147 counter = htonll(counter + n); 1148 counter &= counter_mask; 1149 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 1150 } 1151 1152 /* 1153 * Encrypt multiple blocks of data in GCM mode. 1154 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines 1155 * if possible. While processing a chunk the FPU is "locked". 1156 */ 1157 static int 1158 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, 1159 size_t length, crypto_data_t *out, size_t block_size) 1160 { 1161 size_t bleft = length; 1162 size_t need = 0; 1163 size_t done = 0; 1164 uint8_t *datap = (uint8_t *)data; 1165 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1166 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1167 uint64_t *ghash = ctx->gcm_ghash; 1168 uint64_t *cb = ctx->gcm_cb; 1169 uint8_t *ct_buf = NULL; 1170 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1171 int rv = CRYPTO_SUCCESS; 1172 1173 ASSERT(block_size == GCM_BLOCK_LEN); 1174 /* 1175 * If the last call left an incomplete block, try to fill 1176 * it first. 1177 */ 1178 if (ctx->gcm_remainder_len > 0) { 1179 need = block_size - ctx->gcm_remainder_len; 1180 if (length < need) { 1181 /* Accumulate bytes here and return. */ 1182 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1183 ctx->gcm_remainder_len, length); 1184 1185 ctx->gcm_remainder_len += length; 1186 if (ctx->gcm_copy_to == NULL) { 1187 ctx->gcm_copy_to = datap; 1188 } 1189 return (CRYPTO_SUCCESS); 1190 } else { 1191 /* Complete incomplete block. */ 1192 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1193 ctx->gcm_remainder_len, need); 1194 1195 ctx->gcm_copy_to = NULL; 1196 } 1197 } 1198 1199 /* Allocate a buffer to encrypt to if there is enough input. */ 1200 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1201 ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); 1202 if (ct_buf == NULL) { 1203 return (CRYPTO_HOST_MEMORY); 1204 } 1205 } 1206 1207 /* If we completed an incomplete block, encrypt and write it out. */ 1208 if (ctx->gcm_remainder_len > 0) { 1209 kfpu_begin(); 1210 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1211 (const uint32_t *)cb, (uint32_t *)tmp); 1212 1213 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); 1214 GHASH_AVX(ctx, tmp, block_size); 1215 clear_fpu_regs(); 1216 kfpu_end(); 1217 rv = crypto_put_output_data(tmp, out, block_size); 1218 out->cd_offset += block_size; 1219 gcm_incr_counter_block(ctx); 1220 ctx->gcm_processed_data_len += block_size; 1221 bleft -= need; 1222 datap += need; 1223 ctx->gcm_remainder_len = 0; 1224 } 1225 1226 /* Do the bulk encryption in chunk_size blocks. */ 1227 for (; bleft >= chunk_size; bleft -= chunk_size) { 1228 kfpu_begin(); 1229 done = aesni_gcm_encrypt( 1230 datap, ct_buf, chunk_size, key, cb, ghash); 1231 1232 clear_fpu_regs(); 1233 kfpu_end(); 1234 if (done != chunk_size) { 1235 rv = CRYPTO_FAILED; 1236 goto out_nofpu; 1237 } 1238 rv = crypto_put_output_data(ct_buf, out, chunk_size); 1239 if (rv != CRYPTO_SUCCESS) { 1240 goto out_nofpu; 1241 } 1242 out->cd_offset += chunk_size; 1243 datap += chunk_size; 1244 ctx->gcm_processed_data_len += chunk_size; 1245 } 1246 /* Check if we are already done. */ 1247 if (bleft == 0) { 1248 goto out_nofpu; 1249 } 1250 /* Bulk encrypt the remaining data. */ 1251 kfpu_begin(); 1252 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1253 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); 1254 if (done == 0) { 1255 rv = CRYPTO_FAILED; 1256 goto out; 1257 } 1258 rv = crypto_put_output_data(ct_buf, out, done); 1259 if (rv != CRYPTO_SUCCESS) { 1260 goto out; 1261 } 1262 out->cd_offset += done; 1263 ctx->gcm_processed_data_len += done; 1264 datap += done; 1265 bleft -= done; 1266 1267 } 1268 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ 1269 while (bleft > 0) { 1270 if (bleft < block_size) { 1271 bcopy(datap, ctx->gcm_remainder, bleft); 1272 ctx->gcm_remainder_len = bleft; 1273 ctx->gcm_copy_to = datap; 1274 goto out; 1275 } 1276 /* Encrypt, hash and write out. */ 1277 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1278 (const uint32_t *)cb, (uint32_t *)tmp); 1279 1280 gcm_xor_avx(datap, tmp); 1281 GHASH_AVX(ctx, tmp, block_size); 1282 rv = crypto_put_output_data(tmp, out, block_size); 1283 if (rv != CRYPTO_SUCCESS) { 1284 goto out; 1285 } 1286 out->cd_offset += block_size; 1287 gcm_incr_counter_block(ctx); 1288 ctx->gcm_processed_data_len += block_size; 1289 datap += block_size; 1290 bleft -= block_size; 1291 } 1292 out: 1293 clear_fpu_regs(); 1294 kfpu_end(); 1295 out_nofpu: 1296 if (ct_buf != NULL) { 1297 vmem_free(ct_buf, chunk_size); 1298 } 1299 return (rv); 1300 } 1301 1302 /* 1303 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual 1304 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. 1305 */ 1306 static int 1307 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1308 { 1309 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; 1310 uint32_t *J0 = (uint32_t *)ctx->gcm_J0; 1311 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; 1312 size_t rem_len = ctx->gcm_remainder_len; 1313 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1314 int aes_rounds = ((aes_key_t *)keysched)->nr; 1315 int rv; 1316 1317 ASSERT(block_size == GCM_BLOCK_LEN); 1318 1319 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { 1320 return (CRYPTO_DATA_LEN_RANGE); 1321 } 1322 1323 kfpu_begin(); 1324 /* Pad last incomplete block with zeros, encrypt and hash. */ 1325 if (rem_len > 0) { 1326 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1327 const uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1328 1329 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); 1330 bzero(remainder + rem_len, block_size - rem_len); 1331 for (int i = 0; i < rem_len; i++) { 1332 remainder[i] ^= tmp[i]; 1333 } 1334 GHASH_AVX(ctx, remainder, block_size); 1335 ctx->gcm_processed_data_len += rem_len; 1336 /* No need to increment counter_block, it's the last block. */ 1337 } 1338 /* Finish tag. */ 1339 ctx->gcm_len_a_len_c[1] = 1340 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 1341 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); 1342 aes_encrypt_intel(keysched, aes_rounds, J0, J0); 1343 1344 gcm_xor_avx((uint8_t *)J0, ghash); 1345 clear_fpu_regs(); 1346 kfpu_end(); 1347 1348 /* Output remainder. */ 1349 if (rem_len > 0) { 1350 rv = crypto_put_output_data(remainder, out, rem_len); 1351 if (rv != CRYPTO_SUCCESS) 1352 return (rv); 1353 } 1354 out->cd_offset += rem_len; 1355 ctx->gcm_remainder_len = 0; 1356 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 1357 if (rv != CRYPTO_SUCCESS) 1358 return (rv); 1359 1360 out->cd_offset += ctx->gcm_tag_len; 1361 /* Clear sensitive data in the context before returning. */ 1362 gcm_clear_ctx(ctx); 1363 return (CRYPTO_SUCCESS); 1364 } 1365 1366 /* 1367 * Finalize decryption: We just have accumulated crypto text, so now we 1368 * decrypt it here inplace. 1369 */ 1370 static int 1371 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1372 { 1373 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); 1374 ASSERT3U(block_size, ==, 16); 1375 1376 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1377 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 1378 uint8_t *datap = ctx->gcm_pt_buf; 1379 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1380 uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1381 uint64_t *ghash = ctx->gcm_ghash; 1382 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; 1383 int rv = CRYPTO_SUCCESS; 1384 size_t bleft, done; 1385 1386 /* 1387 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be 1388 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of 1389 * GCM_AVX_MIN_DECRYPT_BYTES. 1390 */ 1391 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { 1392 kfpu_begin(); 1393 done = aesni_gcm_decrypt(datap, datap, chunk_size, 1394 (const void *)key, ctx->gcm_cb, ghash); 1395 clear_fpu_regs(); 1396 kfpu_end(); 1397 if (done != chunk_size) { 1398 return (CRYPTO_FAILED); 1399 } 1400 datap += done; 1401 } 1402 /* Decrypt remainder, which is less then chunk size, in one go. */ 1403 kfpu_begin(); 1404 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { 1405 done = aesni_gcm_decrypt(datap, datap, bleft, 1406 (const void *)key, ctx->gcm_cb, ghash); 1407 if (done == 0) { 1408 clear_fpu_regs(); 1409 kfpu_end(); 1410 return (CRYPTO_FAILED); 1411 } 1412 datap += done; 1413 bleft -= done; 1414 } 1415 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); 1416 1417 /* 1418 * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, 1419 * decrypt them block by block. 1420 */ 1421 while (bleft > 0) { 1422 /* Incomplete last block. */ 1423 if (bleft < block_size) { 1424 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; 1425 1426 bzero(lastb, block_size); 1427 bcopy(datap, lastb, bleft); 1428 /* The GCM processing. */ 1429 GHASH_AVX(ctx, lastb, block_size); 1430 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1431 for (size_t i = 0; i < bleft; i++) { 1432 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; 1433 } 1434 break; 1435 } 1436 /* The GCM processing. */ 1437 GHASH_AVX(ctx, datap, block_size); 1438 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1439 gcm_xor_avx((uint8_t *)tmp, datap); 1440 gcm_incr_counter_block(ctx); 1441 1442 datap += block_size; 1443 bleft -= block_size; 1444 } 1445 if (rv != CRYPTO_SUCCESS) { 1446 clear_fpu_regs(); 1447 kfpu_end(); 1448 return (rv); 1449 } 1450 /* Decryption done, finish the tag. */ 1451 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 1452 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); 1453 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, 1454 (uint32_t *)ctx->gcm_J0); 1455 1456 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); 1457 1458 /* We are done with the FPU, restore its state. */ 1459 clear_fpu_regs(); 1460 kfpu_end(); 1461 1462 /* Compare the input authentication tag with what we calculated. */ 1463 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 1464 /* They don't match. */ 1465 return (CRYPTO_INVALID_MAC); 1466 } 1467 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 1468 if (rv != CRYPTO_SUCCESS) { 1469 return (rv); 1470 } 1471 out->cd_offset += pt_len; 1472 gcm_clear_ctx(ctx); 1473 return (CRYPTO_SUCCESS); 1474 } 1475 1476 /* 1477 * Initialize the GCM params H, Htabtle and the counter block. Save the 1478 * initial counter block. 1479 */ 1480 static int 1481 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 1482 unsigned char *auth_data, size_t auth_data_len, size_t block_size) 1483 { 1484 uint8_t *cb = (uint8_t *)ctx->gcm_cb; 1485 uint64_t *H = ctx->gcm_H; 1486 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1487 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; 1488 uint8_t *datap = auth_data; 1489 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1490 size_t bleft; 1491 1492 ASSERT(block_size == GCM_BLOCK_LEN); 1493 1494 /* Init H (encrypt zero block) and create the initial counter block. */ 1495 bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); 1496 bzero(H, sizeof (ctx->gcm_H)); 1497 kfpu_begin(); 1498 aes_encrypt_intel(keysched, aes_rounds, 1499 (const uint32_t *)H, (uint32_t *)H); 1500 1501 gcm_init_htab_avx(ctx->gcm_Htable, H); 1502 1503 if (iv_len == 12) { 1504 bcopy(iv, cb, 12); 1505 cb[12] = 0; 1506 cb[13] = 0; 1507 cb[14] = 0; 1508 cb[15] = 1; 1509 /* We need the ICB later. */ 1510 bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1511 } else { 1512 /* 1513 * Most consumers use 12 byte IVs, so it's OK to use the 1514 * original routines for other IV sizes, just avoid nesting 1515 * kfpu_begin calls. 1516 */ 1517 clear_fpu_regs(); 1518 kfpu_end(); 1519 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 1520 aes_copy_block, aes_xor_block); 1521 kfpu_begin(); 1522 } 1523 1524 /* Openssl post increments the counter, adjust for that. */ 1525 gcm_incr_counter_block(ctx); 1526 1527 /* Ghash AAD in chunk_size blocks. */ 1528 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { 1529 GHASH_AVX(ctx, datap, chunk_size); 1530 datap += chunk_size; 1531 clear_fpu_regs(); 1532 kfpu_end(); 1533 kfpu_begin(); 1534 } 1535 /* Ghash the remainder and handle possible incomplete GCM block. */ 1536 if (bleft > 0) { 1537 size_t incomp = bleft % block_size; 1538 1539 bleft -= incomp; 1540 if (bleft > 0) { 1541 GHASH_AVX(ctx, datap, bleft); 1542 datap += bleft; 1543 } 1544 if (incomp > 0) { 1545 /* Zero pad and hash incomplete last block. */ 1546 uint8_t *authp = (uint8_t *)ctx->gcm_tmp; 1547 1548 bzero(authp, block_size); 1549 bcopy(datap, authp, incomp); 1550 GHASH_AVX(ctx, authp, block_size); 1551 } 1552 } 1553 clear_fpu_regs(); 1554 kfpu_end(); 1555 return (CRYPTO_SUCCESS); 1556 } 1557 1558 #if defined(_KERNEL) 1559 static int 1560 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) 1561 { 1562 unsigned long val; 1563 char val_rounded[16]; 1564 int error = 0; 1565 1566 error = kstrtoul(buf, 0, &val); 1567 if (error) 1568 return (error); 1569 1570 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1571 1572 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) 1573 return (-EINVAL); 1574 1575 snprintf(val_rounded, 16, "%u", (uint32_t)val); 1576 error = param_set_uint(val_rounded, kp); 1577 return (error); 1578 } 1579 1580 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, 1581 param_get_uint, &gcm_avx_chunk_size, 0644); 1582 1583 MODULE_PARM_DESC(icp_gcm_avx_chunk_size, 1584 "How many bytes to process while owning the FPU"); 1585 1586 #endif /* defined(__KERNEL) */ 1587 #endif /* ifdef CAN_USE_GCM_ASM */ 1588