1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/zfs_context.h> 26 #include <modes/modes.h> 27 #include <sys/crypto/common.h> 28 #include <sys/crypto/icp.h> 29 #include <sys/crypto/impl.h> 30 #include <sys/byteorder.h> 31 #include <sys/simd.h> 32 #include <modes/gcm_impl.h> 33 #ifdef CAN_USE_GCM_ASM 34 #include <aes/aes_impl.h> 35 #endif 36 37 #define GHASH(c, d, t, o) \ 38 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ 39 (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ 40 (uint64_t *)(void *)(t)); 41 42 /* Select GCM implementation */ 43 #define IMPL_FASTEST (UINT32_MAX) 44 #define IMPL_CYCLE (UINT32_MAX-1) 45 #ifdef CAN_USE_GCM_ASM 46 #define IMPL_AVX (UINT32_MAX-2) 47 #endif 48 #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 49 static uint32_t icp_gcm_impl = IMPL_FASTEST; 50 static uint32_t user_sel_impl = IMPL_FASTEST; 51 52 #ifdef CAN_USE_GCM_ASM 53 /* Does the architecture we run on support the MOVBE instruction? */ 54 boolean_t gcm_avx_can_use_movbe = B_FALSE; 55 /* 56 * Whether to use the optimized openssl gcm and ghash implementations. 57 * Set to true if module parameter icp_gcm_impl == "avx". 58 */ 59 static boolean_t gcm_use_avx = B_FALSE; 60 #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) 61 62 static inline boolean_t gcm_avx_will_work(void); 63 static inline void gcm_set_avx(boolean_t); 64 static inline boolean_t gcm_toggle_avx(void); 65 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); 66 67 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, 68 crypto_data_t *, size_t); 69 70 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 71 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); 72 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, 73 size_t, size_t); 74 #endif /* ifdef CAN_USE_GCM_ASM */ 75 76 /* 77 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode 78 * is done in another function. 79 */ 80 int 81 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 82 crypto_data_t *out, size_t block_size, 83 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 84 void (*copy_block)(uint8_t *, uint8_t *), 85 void (*xor_block)(uint8_t *, uint8_t *)) 86 { 87 #ifdef CAN_USE_GCM_ASM 88 if (ctx->gcm_use_avx == B_TRUE) 89 return (gcm_mode_encrypt_contiguous_blocks_avx( 90 ctx, data, length, out, block_size)); 91 #endif 92 93 const gcm_impl_ops_t *gops; 94 size_t remainder = length; 95 size_t need = 0; 96 uint8_t *datap = (uint8_t *)data; 97 uint8_t *blockp; 98 uint8_t *lastp; 99 void *iov_or_mp; 100 offset_t offset; 101 uint8_t *out_data_1; 102 uint8_t *out_data_2; 103 size_t out_data_1_len; 104 uint64_t counter; 105 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 106 107 if (length + ctx->gcm_remainder_len < block_size) { 108 /* accumulate bytes here and return */ 109 bcopy(datap, 110 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, 111 length); 112 ctx->gcm_remainder_len += length; 113 if (ctx->gcm_copy_to == NULL) { 114 ctx->gcm_copy_to = datap; 115 } 116 return (CRYPTO_SUCCESS); 117 } 118 119 lastp = (uint8_t *)ctx->gcm_cb; 120 crypto_init_ptrs(out, &iov_or_mp, &offset); 121 122 gops = gcm_impl_get_ops(); 123 do { 124 /* Unprocessed data from last call. */ 125 if (ctx->gcm_remainder_len > 0) { 126 need = block_size - ctx->gcm_remainder_len; 127 128 if (need > remainder) 129 return (CRYPTO_DATA_LEN_RANGE); 130 131 bcopy(datap, &((uint8_t *)ctx->gcm_remainder) 132 [ctx->gcm_remainder_len], need); 133 134 blockp = (uint8_t *)ctx->gcm_remainder; 135 } else { 136 blockp = datap; 137 } 138 139 /* 140 * Increment counter. Counter bits are confined 141 * to the bottom 32 bits of the counter block. 142 */ 143 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 144 counter = htonll(counter + 1); 145 counter &= counter_mask; 146 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 147 148 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 149 (uint8_t *)ctx->gcm_tmp); 150 xor_block(blockp, (uint8_t *)ctx->gcm_tmp); 151 152 lastp = (uint8_t *)ctx->gcm_tmp; 153 154 ctx->gcm_processed_data_len += block_size; 155 156 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, 157 &out_data_1_len, &out_data_2, block_size); 158 159 /* copy block to where it belongs */ 160 if (out_data_1_len == block_size) { 161 copy_block(lastp, out_data_1); 162 } else { 163 bcopy(lastp, out_data_1, out_data_1_len); 164 if (out_data_2 != NULL) { 165 bcopy(lastp + out_data_1_len, 166 out_data_2, 167 block_size - out_data_1_len); 168 } 169 } 170 /* update offset */ 171 out->cd_offset += block_size; 172 173 /* add ciphertext to the hash */ 174 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); 175 176 /* Update pointer to next block of data to be processed. */ 177 if (ctx->gcm_remainder_len != 0) { 178 datap += need; 179 ctx->gcm_remainder_len = 0; 180 } else { 181 datap += block_size; 182 } 183 184 remainder = (size_t)&data[length] - (size_t)datap; 185 186 /* Incomplete last block. */ 187 if (remainder > 0 && remainder < block_size) { 188 bcopy(datap, ctx->gcm_remainder, remainder); 189 ctx->gcm_remainder_len = remainder; 190 ctx->gcm_copy_to = datap; 191 goto out; 192 } 193 ctx->gcm_copy_to = NULL; 194 195 } while (remainder > 0); 196 out: 197 return (CRYPTO_SUCCESS); 198 } 199 200 /* ARGSUSED */ 201 int 202 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 203 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 204 void (*copy_block)(uint8_t *, uint8_t *), 205 void (*xor_block)(uint8_t *, uint8_t *)) 206 { 207 #ifdef CAN_USE_GCM_ASM 208 if (ctx->gcm_use_avx == B_TRUE) 209 return (gcm_encrypt_final_avx(ctx, out, block_size)); 210 #endif 211 212 const gcm_impl_ops_t *gops; 213 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 214 uint8_t *ghash, *macp = NULL; 215 int i, rv; 216 217 if (out->cd_length < 218 (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { 219 return (CRYPTO_DATA_LEN_RANGE); 220 } 221 222 gops = gcm_impl_get_ops(); 223 ghash = (uint8_t *)ctx->gcm_ghash; 224 225 if (ctx->gcm_remainder_len > 0) { 226 uint64_t counter; 227 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; 228 229 /* 230 * Here is where we deal with data that is not a 231 * multiple of the block size. 232 */ 233 234 /* 235 * Increment counter. 236 */ 237 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 238 counter = htonll(counter + 1); 239 counter &= counter_mask; 240 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 241 242 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, 243 (uint8_t *)ctx->gcm_tmp); 244 245 macp = (uint8_t *)ctx->gcm_remainder; 246 bzero(macp + ctx->gcm_remainder_len, 247 block_size - ctx->gcm_remainder_len); 248 249 /* XOR with counter block */ 250 for (i = 0; i < ctx->gcm_remainder_len; i++) { 251 macp[i] ^= tmpp[i]; 252 } 253 254 /* add ciphertext to the hash */ 255 GHASH(ctx, macp, ghash, gops); 256 257 ctx->gcm_processed_data_len += ctx->gcm_remainder_len; 258 } 259 260 ctx->gcm_len_a_len_c[1] = 261 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 262 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 263 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 264 (uint8_t *)ctx->gcm_J0); 265 xor_block((uint8_t *)ctx->gcm_J0, ghash); 266 267 if (ctx->gcm_remainder_len > 0) { 268 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); 269 if (rv != CRYPTO_SUCCESS) 270 return (rv); 271 } 272 out->cd_offset += ctx->gcm_remainder_len; 273 ctx->gcm_remainder_len = 0; 274 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 275 if (rv != CRYPTO_SUCCESS) 276 return (rv); 277 out->cd_offset += ctx->gcm_tag_len; 278 279 return (CRYPTO_SUCCESS); 280 } 281 282 /* 283 * This will only deal with decrypting the last block of the input that 284 * might not be a multiple of block length. 285 */ 286 static void 287 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, 288 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 289 void (*xor_block)(uint8_t *, uint8_t *)) 290 { 291 uint8_t *datap, *outp, *counterp; 292 uint64_t counter; 293 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 294 int i; 295 296 /* 297 * Increment counter. 298 * Counter bits are confined to the bottom 32 bits 299 */ 300 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 301 counter = htonll(counter + 1); 302 counter &= counter_mask; 303 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 304 305 datap = (uint8_t *)ctx->gcm_remainder; 306 outp = &((ctx->gcm_pt_buf)[index]); 307 counterp = (uint8_t *)ctx->gcm_tmp; 308 309 /* authentication tag */ 310 bzero((uint8_t *)ctx->gcm_tmp, block_size); 311 bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); 312 313 /* add ciphertext to the hash */ 314 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); 315 316 /* decrypt remaining ciphertext */ 317 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); 318 319 /* XOR with counter block */ 320 for (i = 0; i < ctx->gcm_remainder_len; i++) { 321 outp[i] = datap[i] ^ counterp[i]; 322 } 323 } 324 325 /* ARGSUSED */ 326 int 327 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, 328 crypto_data_t *out, size_t block_size, 329 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 330 void (*copy_block)(uint8_t *, uint8_t *), 331 void (*xor_block)(uint8_t *, uint8_t *)) 332 { 333 size_t new_len; 334 uint8_t *new; 335 336 /* 337 * Copy contiguous ciphertext input blocks to plaintext buffer. 338 * Ciphertext will be decrypted in the final. 339 */ 340 if (length > 0) { 341 new_len = ctx->gcm_pt_buf_len + length; 342 new = vmem_alloc(new_len, ctx->gcm_kmflag); 343 if (new == NULL) { 344 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 345 ctx->gcm_pt_buf = NULL; 346 return (CRYPTO_HOST_MEMORY); 347 } 348 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); 349 vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); 350 ctx->gcm_pt_buf = new; 351 ctx->gcm_pt_buf_len = new_len; 352 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], 353 length); 354 ctx->gcm_processed_data_len += length; 355 } 356 357 ctx->gcm_remainder_len = 0; 358 return (CRYPTO_SUCCESS); 359 } 360 361 int 362 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, 363 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 364 void (*xor_block)(uint8_t *, uint8_t *)) 365 { 366 #ifdef CAN_USE_GCM_ASM 367 if (ctx->gcm_use_avx == B_TRUE) 368 return (gcm_decrypt_final_avx(ctx, out, block_size)); 369 #endif 370 371 const gcm_impl_ops_t *gops; 372 size_t pt_len; 373 size_t remainder; 374 uint8_t *ghash; 375 uint8_t *blockp; 376 uint8_t *cbp; 377 uint64_t counter; 378 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 379 int processed = 0, rv; 380 381 ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); 382 383 gops = gcm_impl_get_ops(); 384 pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 385 ghash = (uint8_t *)ctx->gcm_ghash; 386 blockp = ctx->gcm_pt_buf; 387 remainder = pt_len; 388 while (remainder > 0) { 389 /* Incomplete last block */ 390 if (remainder < block_size) { 391 bcopy(blockp, ctx->gcm_remainder, remainder); 392 ctx->gcm_remainder_len = remainder; 393 /* 394 * not expecting anymore ciphertext, just 395 * compute plaintext for the remaining input 396 */ 397 gcm_decrypt_incomplete_block(ctx, block_size, 398 processed, encrypt_block, xor_block); 399 ctx->gcm_remainder_len = 0; 400 goto out; 401 } 402 /* add ciphertext to the hash */ 403 GHASH(ctx, blockp, ghash, gops); 404 405 /* 406 * Increment counter. 407 * Counter bits are confined to the bottom 32 bits 408 */ 409 counter = ntohll(ctx->gcm_cb[1] & counter_mask); 410 counter = htonll(counter + 1); 411 counter &= counter_mask; 412 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 413 414 cbp = (uint8_t *)ctx->gcm_tmp; 415 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); 416 417 /* XOR with ciphertext */ 418 xor_block(cbp, blockp); 419 420 processed += block_size; 421 blockp += block_size; 422 remainder -= block_size; 423 } 424 out: 425 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 426 GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); 427 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, 428 (uint8_t *)ctx->gcm_J0); 429 xor_block((uint8_t *)ctx->gcm_J0, ghash); 430 431 /* compare the input authentication tag with what we calculated */ 432 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 433 /* They don't match */ 434 return (CRYPTO_INVALID_MAC); 435 } else { 436 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 437 if (rv != CRYPTO_SUCCESS) 438 return (rv); 439 out->cd_offset += pt_len; 440 } 441 return (CRYPTO_SUCCESS); 442 } 443 444 static int 445 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) 446 { 447 size_t tag_len; 448 449 /* 450 * Check the length of the authentication tag (in bits). 451 */ 452 tag_len = gcm_param->ulTagBits; 453 switch (tag_len) { 454 case 32: 455 case 64: 456 case 96: 457 case 104: 458 case 112: 459 case 120: 460 case 128: 461 break; 462 default: 463 return (CRYPTO_MECHANISM_PARAM_INVALID); 464 } 465 466 if (gcm_param->ulIvLen == 0) 467 return (CRYPTO_MECHANISM_PARAM_INVALID); 468 469 return (CRYPTO_SUCCESS); 470 } 471 472 static void 473 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, 474 gcm_ctx_t *ctx, size_t block_size, 475 void (*copy_block)(uint8_t *, uint8_t *), 476 void (*xor_block)(uint8_t *, uint8_t *)) 477 { 478 const gcm_impl_ops_t *gops; 479 uint8_t *cb; 480 ulong_t remainder = iv_len; 481 ulong_t processed = 0; 482 uint8_t *datap, *ghash; 483 uint64_t len_a_len_c[2]; 484 485 gops = gcm_impl_get_ops(); 486 ghash = (uint8_t *)ctx->gcm_ghash; 487 cb = (uint8_t *)ctx->gcm_cb; 488 if (iv_len == 12) { 489 bcopy(iv, cb, 12); 490 cb[12] = 0; 491 cb[13] = 0; 492 cb[14] = 0; 493 cb[15] = 1; 494 /* J0 will be used again in the final */ 495 copy_block(cb, (uint8_t *)ctx->gcm_J0); 496 } else { 497 /* GHASH the IV */ 498 do { 499 if (remainder < block_size) { 500 bzero(cb, block_size); 501 bcopy(&(iv[processed]), cb, remainder); 502 datap = (uint8_t *)cb; 503 remainder = 0; 504 } else { 505 datap = (uint8_t *)(&(iv[processed])); 506 processed += block_size; 507 remainder -= block_size; 508 } 509 GHASH(ctx, datap, ghash, gops); 510 } while (remainder > 0); 511 512 len_a_len_c[0] = 0; 513 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); 514 GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); 515 516 /* J0 will be used again in the final */ 517 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); 518 } 519 } 520 521 static int 522 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 523 unsigned char *auth_data, size_t auth_data_len, size_t block_size, 524 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 525 void (*copy_block)(uint8_t *, uint8_t *), 526 void (*xor_block)(uint8_t *, uint8_t *)) 527 { 528 const gcm_impl_ops_t *gops; 529 uint8_t *ghash, *datap, *authp; 530 size_t remainder, processed; 531 532 /* encrypt zero block to get subkey H */ 533 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 534 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, 535 (uint8_t *)ctx->gcm_H); 536 537 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 538 copy_block, xor_block); 539 540 gops = gcm_impl_get_ops(); 541 authp = (uint8_t *)ctx->gcm_tmp; 542 ghash = (uint8_t *)ctx->gcm_ghash; 543 bzero(authp, block_size); 544 bzero(ghash, block_size); 545 546 processed = 0; 547 remainder = auth_data_len; 548 do { 549 if (remainder < block_size) { 550 /* 551 * There's not a block full of data, pad rest of 552 * buffer with zero 553 */ 554 bzero(authp, block_size); 555 bcopy(&(auth_data[processed]), authp, remainder); 556 datap = (uint8_t *)authp; 557 remainder = 0; 558 } else { 559 datap = (uint8_t *)(&(auth_data[processed])); 560 processed += block_size; 561 remainder -= block_size; 562 } 563 564 /* add auth data to the hash */ 565 GHASH(ctx, datap, ghash, gops); 566 567 } while (remainder > 0); 568 569 return (CRYPTO_SUCCESS); 570 } 571 572 /* 573 * The following function is called at encrypt or decrypt init time 574 * for AES GCM mode. 575 * 576 * Init the GCM context struct. Handle the cycle and avx implementations here. 577 */ 578 int 579 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 580 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 581 void (*copy_block)(uint8_t *, uint8_t *), 582 void (*xor_block)(uint8_t *, uint8_t *)) 583 { 584 int rv; 585 CK_AES_GCM_PARAMS *gcm_param; 586 587 if (param != NULL) { 588 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; 589 590 if ((rv = gcm_validate_args(gcm_param)) != 0) { 591 return (rv); 592 } 593 594 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; 595 gcm_ctx->gcm_tag_len >>= 3; 596 gcm_ctx->gcm_processed_data_len = 0; 597 598 /* these values are in bits */ 599 gcm_ctx->gcm_len_a_len_c[0] 600 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); 601 602 rv = CRYPTO_SUCCESS; 603 gcm_ctx->gcm_flags |= GCM_MODE; 604 } else { 605 return (CRYPTO_MECHANISM_PARAM_INVALID); 606 } 607 608 #ifdef CAN_USE_GCM_ASM 609 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 610 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 611 } else { 612 /* 613 * Handle the "cycle" implementation by creating avx and 614 * non-avx contexts alternately. 615 */ 616 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 617 /* 618 * We don't handle byte swapped key schedules in the avx 619 * code path. 620 */ 621 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 622 if (ks->ops->needs_byteswap == B_TRUE) { 623 gcm_ctx->gcm_use_avx = B_FALSE; 624 } 625 /* Use the MOVBE and the BSWAP variants alternately. */ 626 if (gcm_ctx->gcm_use_avx == B_TRUE && 627 zfs_movbe_available() == B_TRUE) { 628 (void) atomic_toggle_boolean_nv( 629 (volatile boolean_t *)&gcm_avx_can_use_movbe); 630 } 631 } 632 /* Avx and non avx context initialization differs from here on. */ 633 if (gcm_ctx->gcm_use_avx == B_FALSE) { 634 #endif /* ifdef CAN_USE_GCM_ASM */ 635 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 636 gcm_param->pAAD, gcm_param->ulAADLen, block_size, 637 encrypt_block, copy_block, xor_block) != 0) { 638 rv = CRYPTO_MECHANISM_PARAM_INVALID; 639 } 640 #ifdef CAN_USE_GCM_ASM 641 } else { 642 if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, 643 gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { 644 rv = CRYPTO_MECHANISM_PARAM_INVALID; 645 } 646 } 647 #endif /* ifdef CAN_USE_GCM_ASM */ 648 649 return (rv); 650 } 651 652 int 653 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, 654 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), 655 void (*copy_block)(uint8_t *, uint8_t *), 656 void (*xor_block)(uint8_t *, uint8_t *)) 657 { 658 int rv; 659 CK_AES_GMAC_PARAMS *gmac_param; 660 661 if (param != NULL) { 662 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; 663 664 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); 665 gcm_ctx->gcm_processed_data_len = 0; 666 667 /* these values are in bits */ 668 gcm_ctx->gcm_len_a_len_c[0] 669 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); 670 671 rv = CRYPTO_SUCCESS; 672 gcm_ctx->gcm_flags |= GMAC_MODE; 673 } else { 674 return (CRYPTO_MECHANISM_PARAM_INVALID); 675 } 676 677 #ifdef CAN_USE_GCM_ASM 678 /* 679 * Handle the "cycle" implementation by creating avx and non avx 680 * contexts alternately. 681 */ 682 if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { 683 gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; 684 } else { 685 gcm_ctx->gcm_use_avx = gcm_toggle_avx(); 686 } 687 /* We don't handle byte swapped key schedules in the avx code path. */ 688 aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; 689 if (ks->ops->needs_byteswap == B_TRUE) { 690 gcm_ctx->gcm_use_avx = B_FALSE; 691 } 692 /* Avx and non avx context initialization differs from here on. */ 693 if (gcm_ctx->gcm_use_avx == B_FALSE) { 694 #endif /* ifdef CAN_USE_GCM_ASM */ 695 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 696 gmac_param->pAAD, gmac_param->ulAADLen, block_size, 697 encrypt_block, copy_block, xor_block) != 0) { 698 rv = CRYPTO_MECHANISM_PARAM_INVALID; 699 } 700 #ifdef CAN_USE_GCM_ASM 701 } else { 702 if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, 703 gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { 704 rv = CRYPTO_MECHANISM_PARAM_INVALID; 705 } 706 } 707 #endif /* ifdef CAN_USE_GCM_ASM */ 708 709 return (rv); 710 } 711 712 void * 713 gcm_alloc_ctx(int kmflag) 714 { 715 gcm_ctx_t *gcm_ctx; 716 717 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 718 return (NULL); 719 720 gcm_ctx->gcm_flags = GCM_MODE; 721 return (gcm_ctx); 722 } 723 724 void * 725 gmac_alloc_ctx(int kmflag) 726 { 727 gcm_ctx_t *gcm_ctx; 728 729 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) 730 return (NULL); 731 732 gcm_ctx->gcm_flags = GMAC_MODE; 733 return (gcm_ctx); 734 } 735 736 void 737 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) 738 { 739 ctx->gcm_kmflag = kmflag; 740 } 741 742 /* GCM implementation that contains the fastest methods */ 743 static gcm_impl_ops_t gcm_fastest_impl = { 744 .name = "fastest" 745 }; 746 747 /* All compiled in implementations */ 748 const gcm_impl_ops_t *gcm_all_impl[] = { 749 &gcm_generic_impl, 750 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 751 &gcm_pclmulqdq_impl, 752 #endif 753 }; 754 755 /* Indicate that benchmark has been completed */ 756 static boolean_t gcm_impl_initialized = B_FALSE; 757 758 /* Hold all supported implementations */ 759 static size_t gcm_supp_impl_cnt = 0; 760 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; 761 762 /* 763 * Returns the GCM operations for encrypt/decrypt/key setup. When a 764 * SIMD implementation is not allowed in the current context, then 765 * fallback to the fastest generic implementation. 766 */ 767 const gcm_impl_ops_t * 768 gcm_impl_get_ops() 769 { 770 if (!kfpu_allowed()) 771 return (&gcm_generic_impl); 772 773 const gcm_impl_ops_t *ops = NULL; 774 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 775 776 switch (impl) { 777 case IMPL_FASTEST: 778 ASSERT(gcm_impl_initialized); 779 ops = &gcm_fastest_impl; 780 break; 781 case IMPL_CYCLE: 782 /* Cycle through supported implementations */ 783 ASSERT(gcm_impl_initialized); 784 ASSERT3U(gcm_supp_impl_cnt, >, 0); 785 static size_t cycle_impl_idx = 0; 786 size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; 787 ops = gcm_supp_impl[idx]; 788 break; 789 #ifdef CAN_USE_GCM_ASM 790 case IMPL_AVX: 791 /* 792 * Make sure that we return a valid implementation while 793 * switching to the avx implementation since there still 794 * may be unfinished non-avx contexts around. 795 */ 796 ops = &gcm_generic_impl; 797 break; 798 #endif 799 default: 800 ASSERT3U(impl, <, gcm_supp_impl_cnt); 801 ASSERT3U(gcm_supp_impl_cnt, >, 0); 802 if (impl < ARRAY_SIZE(gcm_all_impl)) 803 ops = gcm_supp_impl[impl]; 804 break; 805 } 806 807 ASSERT3P(ops, !=, NULL); 808 809 return (ops); 810 } 811 812 /* 813 * Initialize all supported implementations. 814 */ 815 void 816 gcm_impl_init(void) 817 { 818 gcm_impl_ops_t *curr_impl; 819 int i, c; 820 821 /* Move supported implementations into gcm_supp_impls */ 822 for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { 823 curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; 824 825 if (curr_impl->is_supported()) 826 gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; 827 } 828 gcm_supp_impl_cnt = c; 829 830 /* 831 * Set the fastest implementation given the assumption that the 832 * hardware accelerated version is the fastest. 833 */ 834 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) 835 if (gcm_pclmulqdq_impl.is_supported()) { 836 memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, 837 sizeof (gcm_fastest_impl)); 838 } else 839 #endif 840 { 841 memcpy(&gcm_fastest_impl, &gcm_generic_impl, 842 sizeof (gcm_fastest_impl)); 843 } 844 845 strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); 846 847 #ifdef CAN_USE_GCM_ASM 848 /* 849 * Use the avx implementation if it's available and the implementation 850 * hasn't changed from its default value of fastest on module load. 851 */ 852 if (gcm_avx_will_work()) { 853 #ifdef HAVE_MOVBE 854 if (zfs_movbe_available() == B_TRUE) { 855 atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); 856 } 857 #endif 858 if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { 859 gcm_set_avx(B_TRUE); 860 } 861 } 862 #endif 863 /* Finish initialization */ 864 atomic_swap_32(&icp_gcm_impl, user_sel_impl); 865 gcm_impl_initialized = B_TRUE; 866 } 867 868 static const struct { 869 char *name; 870 uint32_t sel; 871 } gcm_impl_opts[] = { 872 { "cycle", IMPL_CYCLE }, 873 { "fastest", IMPL_FASTEST }, 874 #ifdef CAN_USE_GCM_ASM 875 { "avx", IMPL_AVX }, 876 #endif 877 }; 878 879 /* 880 * Function sets desired gcm implementation. 881 * 882 * If we are called before init(), user preference will be saved in 883 * user_sel_impl, and applied in later init() call. This occurs when module 884 * parameter is specified on module load. Otherwise, directly update 885 * icp_gcm_impl. 886 * 887 * @val Name of gcm implementation to use 888 * @param Unused. 889 */ 890 int 891 gcm_impl_set(const char *val) 892 { 893 int err = -EINVAL; 894 char req_name[GCM_IMPL_NAME_MAX]; 895 uint32_t impl = GCM_IMPL_READ(user_sel_impl); 896 size_t i; 897 898 /* sanitize input */ 899 i = strnlen(val, GCM_IMPL_NAME_MAX); 900 if (i == 0 || i >= GCM_IMPL_NAME_MAX) 901 return (err); 902 903 strlcpy(req_name, val, GCM_IMPL_NAME_MAX); 904 while (i > 0 && isspace(req_name[i-1])) 905 i--; 906 req_name[i] = '\0'; 907 908 /* Check mandatory options */ 909 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 910 #ifdef CAN_USE_GCM_ASM 911 /* Ignore avx implementation if it won't work. */ 912 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 913 continue; 914 } 915 #endif 916 if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { 917 impl = gcm_impl_opts[i].sel; 918 err = 0; 919 break; 920 } 921 } 922 923 /* check all supported impl if init() was already called */ 924 if (err != 0 && gcm_impl_initialized) { 925 /* check all supported implementations */ 926 for (i = 0; i < gcm_supp_impl_cnt; i++) { 927 if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { 928 impl = i; 929 err = 0; 930 break; 931 } 932 } 933 } 934 #ifdef CAN_USE_GCM_ASM 935 /* 936 * Use the avx implementation if available and the requested one is 937 * avx or fastest. 938 */ 939 if (gcm_avx_will_work() == B_TRUE && 940 (impl == IMPL_AVX || impl == IMPL_FASTEST)) { 941 gcm_set_avx(B_TRUE); 942 } else { 943 gcm_set_avx(B_FALSE); 944 } 945 #endif 946 947 if (err == 0) { 948 if (gcm_impl_initialized) 949 atomic_swap_32(&icp_gcm_impl, impl); 950 else 951 atomic_swap_32(&user_sel_impl, impl); 952 } 953 954 return (err); 955 } 956 957 #if defined(_KERNEL) && defined(__linux__) 958 959 static int 960 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) 961 { 962 return (gcm_impl_set(val)); 963 } 964 965 static int 966 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) 967 { 968 int i, cnt = 0; 969 char *fmt; 970 const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); 971 972 ASSERT(gcm_impl_initialized); 973 974 /* list mandatory options */ 975 for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { 976 #ifdef CAN_USE_GCM_ASM 977 /* Ignore avx implementation if it won't work. */ 978 if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { 979 continue; 980 } 981 #endif 982 fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; 983 cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); 984 } 985 986 /* list all supported implementations */ 987 for (i = 0; i < gcm_supp_impl_cnt; i++) { 988 fmt = (i == impl) ? "[%s] " : "%s "; 989 cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); 990 } 991 992 return (cnt); 993 } 994 995 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, 996 NULL, 0644); 997 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); 998 #endif /* defined(__KERNEL) */ 999 1000 #ifdef CAN_USE_GCM_ASM 1001 #define GCM_BLOCK_LEN 16 1002 /* 1003 * The openssl asm routines are 6x aggregated and need that many bytes 1004 * at minimum. 1005 */ 1006 #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) 1007 #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) 1008 /* 1009 * Ensure the chunk size is reasonable since we are allocating a 1010 * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. 1011 */ 1012 #define GCM_AVX_MAX_CHUNK_SIZE \ 1013 (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) 1014 1015 /* Get the chunk size module parameter. */ 1016 #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size 1017 1018 /* Clear the FPU registers since they hold sensitive internal state. */ 1019 #define clear_fpu_regs() clear_fpu_regs_avx() 1020 #define GHASH_AVX(ctx, in, len) \ 1021 gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \ 1022 in, len) 1023 1024 #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) 1025 1026 /* 1027 * Module parameter: number of bytes to process at once while owning the FPU. 1028 * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is 1029 * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. 1030 */ 1031 static uint32_t gcm_avx_chunk_size = 1032 ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1033 1034 extern void clear_fpu_regs_avx(void); 1035 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); 1036 extern void aes_encrypt_intel(const uint32_t rk[], int nr, 1037 const uint32_t pt[4], uint32_t ct[4]); 1038 1039 extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]); 1040 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2], 1041 const uint8_t *in, size_t len); 1042 1043 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, 1044 const void *, uint64_t *, uint64_t *); 1045 1046 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, 1047 const void *, uint64_t *, uint64_t *); 1048 1049 static inline boolean_t 1050 gcm_avx_will_work(void) 1051 { 1052 /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ 1053 return (kfpu_allowed() && 1054 zfs_avx_available() && zfs_aes_available() && 1055 zfs_pclmulqdq_available()); 1056 } 1057 1058 static inline void 1059 gcm_set_avx(boolean_t val) 1060 { 1061 if (gcm_avx_will_work() == B_TRUE) { 1062 atomic_swap_32(&gcm_use_avx, val); 1063 } 1064 } 1065 1066 static inline boolean_t 1067 gcm_toggle_avx(void) 1068 { 1069 if (gcm_avx_will_work() == B_TRUE) { 1070 return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); 1071 } else { 1072 return (B_FALSE); 1073 } 1074 } 1075 1076 /* 1077 * Clear sensitive data in the context. 1078 * 1079 * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and 1080 * ctx->gcm_Htable contain the hash sub key which protects authentication. 1081 * 1082 * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for 1083 * a known plaintext attack, they consists of the IV and the first and last 1084 * counter respectively. If they should be cleared is debatable. 1085 */ 1086 static inline void 1087 gcm_clear_ctx(gcm_ctx_t *ctx) 1088 { 1089 bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); 1090 bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); 1091 bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable)); 1092 bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1093 bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); 1094 } 1095 1096 /* Increment the GCM counter block by n. */ 1097 static inline void 1098 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) 1099 { 1100 uint64_t counter_mask = ntohll(0x00000000ffffffffULL); 1101 uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); 1102 1103 counter = htonll(counter + n); 1104 counter &= counter_mask; 1105 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; 1106 } 1107 1108 /* 1109 * Encrypt multiple blocks of data in GCM mode. 1110 * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines 1111 * if possible. While processing a chunk the FPU is "locked". 1112 */ 1113 static int 1114 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, 1115 size_t length, crypto_data_t *out, size_t block_size) 1116 { 1117 size_t bleft = length; 1118 size_t need = 0; 1119 size_t done = 0; 1120 uint8_t *datap = (uint8_t *)data; 1121 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1122 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1123 uint64_t *ghash = ctx->gcm_ghash; 1124 uint64_t *cb = ctx->gcm_cb; 1125 uint8_t *ct_buf = NULL; 1126 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1127 int rv = CRYPTO_SUCCESS; 1128 1129 ASSERT(block_size == GCM_BLOCK_LEN); 1130 /* 1131 * If the last call left an incomplete block, try to fill 1132 * it first. 1133 */ 1134 if (ctx->gcm_remainder_len > 0) { 1135 need = block_size - ctx->gcm_remainder_len; 1136 if (length < need) { 1137 /* Accumulate bytes here and return. */ 1138 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1139 ctx->gcm_remainder_len, length); 1140 1141 ctx->gcm_remainder_len += length; 1142 if (ctx->gcm_copy_to == NULL) { 1143 ctx->gcm_copy_to = datap; 1144 } 1145 return (CRYPTO_SUCCESS); 1146 } else { 1147 /* Complete incomplete block. */ 1148 bcopy(datap, (uint8_t *)ctx->gcm_remainder + 1149 ctx->gcm_remainder_len, need); 1150 1151 ctx->gcm_copy_to = NULL; 1152 } 1153 } 1154 1155 /* Allocate a buffer to encrypt to if there is enough input. */ 1156 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1157 ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); 1158 if (ct_buf == NULL) { 1159 return (CRYPTO_HOST_MEMORY); 1160 } 1161 } 1162 1163 /* If we completed an incomplete block, encrypt and write it out. */ 1164 if (ctx->gcm_remainder_len > 0) { 1165 kfpu_begin(); 1166 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1167 (const uint32_t *)cb, (uint32_t *)tmp); 1168 1169 gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); 1170 GHASH_AVX(ctx, tmp, block_size); 1171 clear_fpu_regs(); 1172 kfpu_end(); 1173 rv = crypto_put_output_data(tmp, out, block_size); 1174 out->cd_offset += block_size; 1175 gcm_incr_counter_block(ctx); 1176 ctx->gcm_processed_data_len += block_size; 1177 bleft -= need; 1178 datap += need; 1179 ctx->gcm_remainder_len = 0; 1180 } 1181 1182 /* Do the bulk encryption in chunk_size blocks. */ 1183 for (; bleft >= chunk_size; bleft -= chunk_size) { 1184 kfpu_begin(); 1185 done = aesni_gcm_encrypt( 1186 datap, ct_buf, chunk_size, key, cb, ghash); 1187 1188 clear_fpu_regs(); 1189 kfpu_end(); 1190 if (done != chunk_size) { 1191 rv = CRYPTO_FAILED; 1192 goto out_nofpu; 1193 } 1194 rv = crypto_put_output_data(ct_buf, out, chunk_size); 1195 if (rv != CRYPTO_SUCCESS) { 1196 goto out_nofpu; 1197 } 1198 out->cd_offset += chunk_size; 1199 datap += chunk_size; 1200 ctx->gcm_processed_data_len += chunk_size; 1201 } 1202 /* Check if we are already done. */ 1203 if (bleft == 0) { 1204 goto out_nofpu; 1205 } 1206 /* Bulk encrypt the remaining data. */ 1207 kfpu_begin(); 1208 if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { 1209 done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); 1210 if (done == 0) { 1211 rv = CRYPTO_FAILED; 1212 goto out; 1213 } 1214 rv = crypto_put_output_data(ct_buf, out, done); 1215 if (rv != CRYPTO_SUCCESS) { 1216 goto out; 1217 } 1218 out->cd_offset += done; 1219 ctx->gcm_processed_data_len += done; 1220 datap += done; 1221 bleft -= done; 1222 1223 } 1224 /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ 1225 while (bleft > 0) { 1226 if (bleft < block_size) { 1227 bcopy(datap, ctx->gcm_remainder, bleft); 1228 ctx->gcm_remainder_len = bleft; 1229 ctx->gcm_copy_to = datap; 1230 goto out; 1231 } 1232 /* Encrypt, hash and write out. */ 1233 aes_encrypt_intel(key->encr_ks.ks32, key->nr, 1234 (const uint32_t *)cb, (uint32_t *)tmp); 1235 1236 gcm_xor_avx(datap, tmp); 1237 GHASH_AVX(ctx, tmp, block_size); 1238 rv = crypto_put_output_data(tmp, out, block_size); 1239 if (rv != CRYPTO_SUCCESS) { 1240 goto out; 1241 } 1242 out->cd_offset += block_size; 1243 gcm_incr_counter_block(ctx); 1244 ctx->gcm_processed_data_len += block_size; 1245 datap += block_size; 1246 bleft -= block_size; 1247 } 1248 out: 1249 clear_fpu_regs(); 1250 kfpu_end(); 1251 out_nofpu: 1252 if (ct_buf != NULL) { 1253 vmem_free(ct_buf, chunk_size); 1254 } 1255 return (rv); 1256 } 1257 1258 /* 1259 * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual 1260 * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. 1261 */ 1262 static int 1263 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1264 { 1265 uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; 1266 uint32_t *J0 = (uint32_t *)ctx->gcm_J0; 1267 uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; 1268 size_t rem_len = ctx->gcm_remainder_len; 1269 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1270 int aes_rounds = ((aes_key_t *)keysched)->nr; 1271 int rv; 1272 1273 ASSERT(block_size == GCM_BLOCK_LEN); 1274 1275 if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { 1276 return (CRYPTO_DATA_LEN_RANGE); 1277 } 1278 1279 kfpu_begin(); 1280 /* Pad last incomplete block with zeros, encrypt and hash. */ 1281 if (rem_len > 0) { 1282 uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; 1283 const uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1284 1285 aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); 1286 bzero(remainder + rem_len, block_size - rem_len); 1287 for (int i = 0; i < rem_len; i++) { 1288 remainder[i] ^= tmp[i]; 1289 } 1290 GHASH_AVX(ctx, remainder, block_size); 1291 ctx->gcm_processed_data_len += rem_len; 1292 /* No need to increment counter_block, it's the last block. */ 1293 } 1294 /* Finish tag. */ 1295 ctx->gcm_len_a_len_c[1] = 1296 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); 1297 GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); 1298 aes_encrypt_intel(keysched, aes_rounds, J0, J0); 1299 1300 gcm_xor_avx((uint8_t *)J0, ghash); 1301 clear_fpu_regs(); 1302 kfpu_end(); 1303 1304 /* Output remainder. */ 1305 if (rem_len > 0) { 1306 rv = crypto_put_output_data(remainder, out, rem_len); 1307 if (rv != CRYPTO_SUCCESS) 1308 return (rv); 1309 } 1310 out->cd_offset += rem_len; 1311 ctx->gcm_remainder_len = 0; 1312 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); 1313 if (rv != CRYPTO_SUCCESS) 1314 return (rv); 1315 1316 out->cd_offset += ctx->gcm_tag_len; 1317 /* Clear sensitive data in the context before returning. */ 1318 gcm_clear_ctx(ctx); 1319 return (CRYPTO_SUCCESS); 1320 } 1321 1322 /* 1323 * Finalize decryption: We just have accumulated crypto text, so now we 1324 * decrypt it here inplace. 1325 */ 1326 static int 1327 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) 1328 { 1329 ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); 1330 ASSERT3U(block_size, ==, 16); 1331 1332 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1333 size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; 1334 uint8_t *datap = ctx->gcm_pt_buf; 1335 const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); 1336 uint32_t *cb = (uint32_t *)ctx->gcm_cb; 1337 uint64_t *ghash = ctx->gcm_ghash; 1338 uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; 1339 int rv = CRYPTO_SUCCESS; 1340 size_t bleft, done; 1341 1342 /* 1343 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be 1344 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of 1345 * GCM_AVX_MIN_DECRYPT_BYTES. 1346 */ 1347 for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { 1348 kfpu_begin(); 1349 done = aesni_gcm_decrypt(datap, datap, chunk_size, 1350 (const void *)key, ctx->gcm_cb, ghash); 1351 clear_fpu_regs(); 1352 kfpu_end(); 1353 if (done != chunk_size) { 1354 return (CRYPTO_FAILED); 1355 } 1356 datap += done; 1357 } 1358 /* Decrypt remainder, which is less then chunk size, in one go. */ 1359 kfpu_begin(); 1360 if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { 1361 done = aesni_gcm_decrypt(datap, datap, bleft, 1362 (const void *)key, ctx->gcm_cb, ghash); 1363 if (done == 0) { 1364 clear_fpu_regs(); 1365 kfpu_end(); 1366 return (CRYPTO_FAILED); 1367 } 1368 datap += done; 1369 bleft -= done; 1370 } 1371 ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); 1372 1373 /* 1374 * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, 1375 * decrypt them block by block. 1376 */ 1377 while (bleft > 0) { 1378 /* Incomplete last block. */ 1379 if (bleft < block_size) { 1380 uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; 1381 1382 bzero(lastb, block_size); 1383 bcopy(datap, lastb, bleft); 1384 /* The GCM processing. */ 1385 GHASH_AVX(ctx, lastb, block_size); 1386 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1387 for (size_t i = 0; i < bleft; i++) { 1388 datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; 1389 } 1390 break; 1391 } 1392 /* The GCM processing. */ 1393 GHASH_AVX(ctx, datap, block_size); 1394 aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); 1395 gcm_xor_avx((uint8_t *)tmp, datap); 1396 gcm_incr_counter_block(ctx); 1397 1398 datap += block_size; 1399 bleft -= block_size; 1400 } 1401 if (rv != CRYPTO_SUCCESS) { 1402 clear_fpu_regs(); 1403 kfpu_end(); 1404 return (rv); 1405 } 1406 /* Decryption done, finish the tag. */ 1407 ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); 1408 GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); 1409 aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, 1410 (uint32_t *)ctx->gcm_J0); 1411 1412 gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); 1413 1414 /* We are done with the FPU, restore its state. */ 1415 clear_fpu_regs(); 1416 kfpu_end(); 1417 1418 /* Compare the input authentication tag with what we calculated. */ 1419 if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { 1420 /* They don't match. */ 1421 return (CRYPTO_INVALID_MAC); 1422 } 1423 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); 1424 if (rv != CRYPTO_SUCCESS) { 1425 return (rv); 1426 } 1427 out->cd_offset += pt_len; 1428 gcm_clear_ctx(ctx); 1429 return (CRYPTO_SUCCESS); 1430 } 1431 1432 /* 1433 * Initialize the GCM params H, Htabtle and the counter block. Save the 1434 * initial counter block. 1435 */ 1436 static int 1437 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, 1438 unsigned char *auth_data, size_t auth_data_len, size_t block_size) 1439 { 1440 uint8_t *cb = (uint8_t *)ctx->gcm_cb; 1441 uint64_t *H = ctx->gcm_H; 1442 const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; 1443 int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; 1444 uint8_t *datap = auth_data; 1445 size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; 1446 size_t bleft; 1447 1448 ASSERT(block_size == GCM_BLOCK_LEN); 1449 1450 /* Init H (encrypt zero block) and create the initial counter block. */ 1451 bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); 1452 bzero(H, sizeof (ctx->gcm_H)); 1453 kfpu_begin(); 1454 aes_encrypt_intel(keysched, aes_rounds, 1455 (const uint32_t *)H, (uint32_t *)H); 1456 1457 gcm_init_htab_avx(ctx->gcm_Htable, H); 1458 1459 if (iv_len == 12) { 1460 bcopy(iv, cb, 12); 1461 cb[12] = 0; 1462 cb[13] = 0; 1463 cb[14] = 0; 1464 cb[15] = 1; 1465 /* We need the ICB later. */ 1466 bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); 1467 } else { 1468 /* 1469 * Most consumers use 12 byte IVs, so it's OK to use the 1470 * original routines for other IV sizes, just avoid nesting 1471 * kfpu_begin calls. 1472 */ 1473 clear_fpu_regs(); 1474 kfpu_end(); 1475 gcm_format_initial_blocks(iv, iv_len, ctx, block_size, 1476 aes_copy_block, aes_xor_block); 1477 kfpu_begin(); 1478 } 1479 1480 /* Openssl post increments the counter, adjust for that. */ 1481 gcm_incr_counter_block(ctx); 1482 1483 /* Ghash AAD in chunk_size blocks. */ 1484 for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { 1485 GHASH_AVX(ctx, datap, chunk_size); 1486 datap += chunk_size; 1487 clear_fpu_regs(); 1488 kfpu_end(); 1489 kfpu_begin(); 1490 } 1491 /* Ghash the remainder and handle possible incomplete GCM block. */ 1492 if (bleft > 0) { 1493 size_t incomp = bleft % block_size; 1494 1495 bleft -= incomp; 1496 if (bleft > 0) { 1497 GHASH_AVX(ctx, datap, bleft); 1498 datap += bleft; 1499 } 1500 if (incomp > 0) { 1501 /* Zero pad and hash incomplete last block. */ 1502 uint8_t *authp = (uint8_t *)ctx->gcm_tmp; 1503 1504 bzero(authp, block_size); 1505 bcopy(datap, authp, incomp); 1506 GHASH_AVX(ctx, authp, block_size); 1507 } 1508 } 1509 clear_fpu_regs(); 1510 kfpu_end(); 1511 return (CRYPTO_SUCCESS); 1512 } 1513 1514 #if defined(_KERNEL) 1515 static int 1516 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) 1517 { 1518 unsigned long val; 1519 char val_rounded[16]; 1520 int error = 0; 1521 1522 error = kstrtoul(buf, 0, &val); 1523 if (error) 1524 return (error); 1525 1526 val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; 1527 1528 if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) 1529 return (-EINVAL); 1530 1531 snprintf(val_rounded, 16, "%u", (uint32_t)val); 1532 error = param_set_uint(val_rounded, kp); 1533 return (error); 1534 } 1535 1536 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, 1537 param_get_uint, &gcm_avx_chunk_size, 0644); 1538 1539 MODULE_PARM_DESC(icp_gcm_avx_chunk_size, 1540 "How many bytes to process while owning the FPU"); 1541 1542 #endif /* defined(__KERNEL) */ 1543 #endif /* ifdef CAN_USE_GCM_ASM */ 1544