1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Support for AES-NI and VAES instructions. This file contains glue code. 4 * The real AES implementations are in aesni-intel_asm.S and other .S files. 5 * 6 * Copyright (C) 2008, Intel Corp. 7 * Author: Huang Ying <ying.huang@intel.com> 8 * 9 * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD 10 * interface for 64-bit kernels. 11 * Authors: Adrian Hoban <adrian.hoban@intel.com> 12 * Gabriele Paoloni <gabriele.paoloni@intel.com> 13 * Tadeusz Struk (tadeusz.struk@intel.com) 14 * Aidan O'Mahony (aidan.o.mahony@intel.com) 15 * Copyright (c) 2010, Intel Corporation. 16 * 17 * Copyright 2024 Google LLC 18 */ 19 20 #include <linux/hardirq.h> 21 #include <linux/types.h> 22 #include <linux/module.h> 23 #include <linux/err.h> 24 #include <crypto/algapi.h> 25 #include <crypto/aes.h> 26 #include <crypto/ctr.h> 27 #include <crypto/b128ops.h> 28 #include <crypto/gcm.h> 29 #include <crypto/xts.h> 30 #include <asm/cpu_device_id.h> 31 #include <asm/simd.h> 32 #include <crypto/scatterwalk.h> 33 #include <crypto/internal/aead.h> 34 #include <crypto/internal/simd.h> 35 #include <crypto/internal/skcipher.h> 36 #include <linux/jump_label.h> 37 #include <linux/workqueue.h> 38 #include <linux/spinlock.h> 39 #include <linux/static_call.h> 40 41 42 #define AESNI_ALIGN 16 43 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN))) 44 #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE - 1)) 45 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1)) 46 #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA) 47 #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA) 48 49 struct aesni_xts_ctx { 50 struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR; 51 struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR; 52 }; 53 54 static inline void *aes_align_addr(void *addr) 55 { 56 if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN) 57 return addr; 58 return PTR_ALIGN(addr, AESNI_ALIGN); 59 } 60 61 asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, 62 unsigned int key_len); 63 asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in); 64 asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in); 65 asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, 66 const u8 *in, unsigned int len); 67 asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, 68 const u8 *in, unsigned int len); 69 asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, 70 const u8 *in, unsigned int len, u8 *iv); 71 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 72 const u8 *in, unsigned int len, u8 *iv); 73 asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, 74 const u8 *in, unsigned int len, u8 *iv); 75 asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, 76 const u8 *in, unsigned int len, u8 *iv); 77 78 asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out, 79 const u8 *in, unsigned int len, u8 *iv); 80 81 asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out, 82 const u8 *in, unsigned int len, u8 *iv); 83 84 #ifdef CONFIG_X86_64 85 86 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, 87 const u8 *in, unsigned int len, u8 *iv); 88 DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc); 89 90 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv, 91 void *keys, u8 *out, unsigned int num_bytes); 92 asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv, 93 void *keys, u8 *out, unsigned int num_bytes); 94 asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv, 95 void *keys, u8 *out, unsigned int num_bytes); 96 97 98 asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, 99 const void *keys, u8 *out, unsigned int num_bytes, 100 unsigned int byte_ctr); 101 102 asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, 103 const void *keys, u8 *out, unsigned int num_bytes, 104 unsigned int byte_ctr); 105 106 asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, 107 const void *keys, u8 *out, unsigned int num_bytes, 108 unsigned int byte_ctr); 109 #endif 110 111 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) 112 { 113 return aes_align_addr(raw_ctx); 114 } 115 116 static inline struct aesni_xts_ctx *aes_xts_ctx(struct crypto_skcipher *tfm) 117 { 118 return aes_align_addr(crypto_skcipher_ctx(tfm)); 119 } 120 121 static int aes_set_key_common(struct crypto_aes_ctx *ctx, 122 const u8 *in_key, unsigned int key_len) 123 { 124 int err; 125 126 if (!crypto_simd_usable()) 127 return aes_expandkey(ctx, in_key, key_len); 128 129 err = aes_check_keylen(key_len); 130 if (err) 131 return err; 132 133 kernel_fpu_begin(); 134 aesni_set_key(ctx, in_key, key_len); 135 kernel_fpu_end(); 136 return 0; 137 } 138 139 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, 140 unsigned int key_len) 141 { 142 return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm)), in_key, 143 key_len); 144 } 145 146 static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 147 { 148 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 149 150 if (!crypto_simd_usable()) { 151 aes_encrypt(ctx, dst, src); 152 } else { 153 kernel_fpu_begin(); 154 aesni_enc(ctx, dst, src); 155 kernel_fpu_end(); 156 } 157 } 158 159 static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) 160 { 161 struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); 162 163 if (!crypto_simd_usable()) { 164 aes_decrypt(ctx, dst, src); 165 } else { 166 kernel_fpu_begin(); 167 aesni_dec(ctx, dst, src); 168 kernel_fpu_end(); 169 } 170 } 171 172 static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, 173 unsigned int len) 174 { 175 return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm)), key, len); 176 } 177 178 static int ecb_encrypt(struct skcipher_request *req) 179 { 180 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 181 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 182 struct skcipher_walk walk; 183 unsigned int nbytes; 184 int err; 185 186 err = skcipher_walk_virt(&walk, req, false); 187 188 while ((nbytes = walk.nbytes)) { 189 kernel_fpu_begin(); 190 aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, 191 nbytes & AES_BLOCK_MASK); 192 kernel_fpu_end(); 193 nbytes &= AES_BLOCK_SIZE - 1; 194 err = skcipher_walk_done(&walk, nbytes); 195 } 196 197 return err; 198 } 199 200 static int ecb_decrypt(struct skcipher_request *req) 201 { 202 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 203 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 204 struct skcipher_walk walk; 205 unsigned int nbytes; 206 int err; 207 208 err = skcipher_walk_virt(&walk, req, false); 209 210 while ((nbytes = walk.nbytes)) { 211 kernel_fpu_begin(); 212 aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, 213 nbytes & AES_BLOCK_MASK); 214 kernel_fpu_end(); 215 nbytes &= AES_BLOCK_SIZE - 1; 216 err = skcipher_walk_done(&walk, nbytes); 217 } 218 219 return err; 220 } 221 222 static int cbc_encrypt(struct skcipher_request *req) 223 { 224 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 225 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 226 struct skcipher_walk walk; 227 unsigned int nbytes; 228 int err; 229 230 err = skcipher_walk_virt(&walk, req, false); 231 232 while ((nbytes = walk.nbytes)) { 233 kernel_fpu_begin(); 234 aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, 235 nbytes & AES_BLOCK_MASK, walk.iv); 236 kernel_fpu_end(); 237 nbytes &= AES_BLOCK_SIZE - 1; 238 err = skcipher_walk_done(&walk, nbytes); 239 } 240 241 return err; 242 } 243 244 static int cbc_decrypt(struct skcipher_request *req) 245 { 246 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 247 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 248 struct skcipher_walk walk; 249 unsigned int nbytes; 250 int err; 251 252 err = skcipher_walk_virt(&walk, req, false); 253 254 while ((nbytes = walk.nbytes)) { 255 kernel_fpu_begin(); 256 aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, 257 nbytes & AES_BLOCK_MASK, walk.iv); 258 kernel_fpu_end(); 259 nbytes &= AES_BLOCK_SIZE - 1; 260 err = skcipher_walk_done(&walk, nbytes); 261 } 262 263 return err; 264 } 265 266 static int cts_cbc_encrypt(struct skcipher_request *req) 267 { 268 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 269 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 270 int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; 271 struct scatterlist *src = req->src, *dst = req->dst; 272 struct scatterlist sg_src[2], sg_dst[2]; 273 struct skcipher_request subreq; 274 struct skcipher_walk walk; 275 int err; 276 277 skcipher_request_set_tfm(&subreq, tfm); 278 skcipher_request_set_callback(&subreq, skcipher_request_flags(req), 279 NULL, NULL); 280 281 if (req->cryptlen <= AES_BLOCK_SIZE) { 282 if (req->cryptlen < AES_BLOCK_SIZE) 283 return -EINVAL; 284 cbc_blocks = 1; 285 } 286 287 if (cbc_blocks > 0) { 288 skcipher_request_set_crypt(&subreq, req->src, req->dst, 289 cbc_blocks * AES_BLOCK_SIZE, 290 req->iv); 291 292 err = cbc_encrypt(&subreq); 293 if (err) 294 return err; 295 296 if (req->cryptlen == AES_BLOCK_SIZE) 297 return 0; 298 299 dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); 300 if (req->dst != req->src) 301 dst = scatterwalk_ffwd(sg_dst, req->dst, 302 subreq.cryptlen); 303 } 304 305 /* handle ciphertext stealing */ 306 skcipher_request_set_crypt(&subreq, src, dst, 307 req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, 308 req->iv); 309 310 err = skcipher_walk_virt(&walk, &subreq, false); 311 if (err) 312 return err; 313 314 kernel_fpu_begin(); 315 aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, 316 walk.nbytes, walk.iv); 317 kernel_fpu_end(); 318 319 return skcipher_walk_done(&walk, 0); 320 } 321 322 static int cts_cbc_decrypt(struct skcipher_request *req) 323 { 324 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 325 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 326 int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; 327 struct scatterlist *src = req->src, *dst = req->dst; 328 struct scatterlist sg_src[2], sg_dst[2]; 329 struct skcipher_request subreq; 330 struct skcipher_walk walk; 331 int err; 332 333 skcipher_request_set_tfm(&subreq, tfm); 334 skcipher_request_set_callback(&subreq, skcipher_request_flags(req), 335 NULL, NULL); 336 337 if (req->cryptlen <= AES_BLOCK_SIZE) { 338 if (req->cryptlen < AES_BLOCK_SIZE) 339 return -EINVAL; 340 cbc_blocks = 1; 341 } 342 343 if (cbc_blocks > 0) { 344 skcipher_request_set_crypt(&subreq, req->src, req->dst, 345 cbc_blocks * AES_BLOCK_SIZE, 346 req->iv); 347 348 err = cbc_decrypt(&subreq); 349 if (err) 350 return err; 351 352 if (req->cryptlen == AES_BLOCK_SIZE) 353 return 0; 354 355 dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); 356 if (req->dst != req->src) 357 dst = scatterwalk_ffwd(sg_dst, req->dst, 358 subreq.cryptlen); 359 } 360 361 /* handle ciphertext stealing */ 362 skcipher_request_set_crypt(&subreq, src, dst, 363 req->cryptlen - cbc_blocks * AES_BLOCK_SIZE, 364 req->iv); 365 366 err = skcipher_walk_virt(&walk, &subreq, false); 367 if (err) 368 return err; 369 370 kernel_fpu_begin(); 371 aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, 372 walk.nbytes, walk.iv); 373 kernel_fpu_end(); 374 375 return skcipher_walk_done(&walk, 0); 376 } 377 378 #ifdef CONFIG_X86_64 379 static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, 380 const u8 *in, unsigned int len, u8 *iv) 381 { 382 /* 383 * based on key length, override with the by8 version 384 * of ctr mode encryption/decryption for improved performance 385 * aes_set_key_common() ensures that key length is one of 386 * {128,192,256} 387 */ 388 if (ctx->key_length == AES_KEYSIZE_128) 389 aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len); 390 else if (ctx->key_length == AES_KEYSIZE_192) 391 aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len); 392 else 393 aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len); 394 } 395 396 static int ctr_crypt(struct skcipher_request *req) 397 { 398 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 399 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 400 u8 keystream[AES_BLOCK_SIZE]; 401 struct skcipher_walk walk; 402 unsigned int nbytes; 403 int err; 404 405 err = skcipher_walk_virt(&walk, req, false); 406 407 while ((nbytes = walk.nbytes) > 0) { 408 kernel_fpu_begin(); 409 if (nbytes & AES_BLOCK_MASK) 410 static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr, 411 walk.src.virt.addr, 412 nbytes & AES_BLOCK_MASK, 413 walk.iv); 414 nbytes &= ~AES_BLOCK_MASK; 415 416 if (walk.nbytes == walk.total && nbytes > 0) { 417 aesni_enc(ctx, keystream, walk.iv); 418 crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes, 419 walk.src.virt.addr + walk.nbytes - nbytes, 420 keystream, nbytes); 421 crypto_inc(walk.iv, AES_BLOCK_SIZE); 422 nbytes = 0; 423 } 424 kernel_fpu_end(); 425 err = skcipher_walk_done(&walk, nbytes); 426 } 427 return err; 428 } 429 430 static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out, 431 const u8 *in, unsigned int len, u8 *iv, 432 unsigned int byte_ctr) 433 { 434 if (ctx->key_length == AES_KEYSIZE_128) 435 aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len, 436 byte_ctr); 437 else if (ctx->key_length == AES_KEYSIZE_192) 438 aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len, 439 byte_ctr); 440 else 441 aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len, 442 byte_ctr); 443 } 444 445 static int xctr_crypt(struct skcipher_request *req) 446 { 447 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 448 struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm)); 449 u8 keystream[AES_BLOCK_SIZE]; 450 struct skcipher_walk walk; 451 unsigned int nbytes; 452 unsigned int byte_ctr = 0; 453 int err; 454 __le32 block[AES_BLOCK_SIZE / sizeof(__le32)]; 455 456 err = skcipher_walk_virt(&walk, req, false); 457 458 while ((nbytes = walk.nbytes) > 0) { 459 kernel_fpu_begin(); 460 if (nbytes & AES_BLOCK_MASK) 461 aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr, 462 walk.src.virt.addr, nbytes & AES_BLOCK_MASK, 463 walk.iv, byte_ctr); 464 nbytes &= ~AES_BLOCK_MASK; 465 byte_ctr += walk.nbytes - nbytes; 466 467 if (walk.nbytes == walk.total && nbytes > 0) { 468 memcpy(block, walk.iv, AES_BLOCK_SIZE); 469 block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE); 470 aesni_enc(ctx, keystream, (u8 *)block); 471 crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - 472 nbytes, walk.src.virt.addr + walk.nbytes 473 - nbytes, keystream, nbytes); 474 byte_ctr += nbytes; 475 nbytes = 0; 476 } 477 kernel_fpu_end(); 478 err = skcipher_walk_done(&walk, nbytes); 479 } 480 return err; 481 } 482 #endif 483 484 static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key, 485 unsigned int keylen) 486 { 487 struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); 488 int err; 489 490 err = xts_verify_key(tfm, key, keylen); 491 if (err) 492 return err; 493 494 keylen /= 2; 495 496 /* first half of xts-key is for crypt */ 497 err = aes_set_key_common(&ctx->crypt_ctx, key, keylen); 498 if (err) 499 return err; 500 501 /* second half of xts-key is for tweak */ 502 return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen); 503 } 504 505 typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key, 506 u8 iv[AES_BLOCK_SIZE]); 507 typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key, 508 const u8 *src, u8 *dst, unsigned int len, 509 u8 tweak[AES_BLOCK_SIZE]); 510 511 /* This handles cases where the source and/or destination span pages. */ 512 static noinline int 513 xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func) 514 { 515 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 516 const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); 517 int tail = req->cryptlen % AES_BLOCK_SIZE; 518 struct scatterlist sg_src[2], sg_dst[2]; 519 struct skcipher_request subreq; 520 struct skcipher_walk walk; 521 struct scatterlist *src, *dst; 522 int err; 523 524 /* 525 * If the message length isn't divisible by the AES block size, then 526 * separate off the last full block and the partial block. This ensures 527 * that they are processed in the same call to the assembly function, 528 * which is required for ciphertext stealing. 529 */ 530 if (tail) { 531 skcipher_request_set_tfm(&subreq, tfm); 532 skcipher_request_set_callback(&subreq, 533 skcipher_request_flags(req), 534 NULL, NULL); 535 skcipher_request_set_crypt(&subreq, req->src, req->dst, 536 req->cryptlen - tail - AES_BLOCK_SIZE, 537 req->iv); 538 req = &subreq; 539 } 540 541 err = skcipher_walk_virt(&walk, req, false); 542 543 while (walk.nbytes) { 544 kernel_fpu_begin(); 545 (*crypt_func)(&ctx->crypt_ctx, 546 walk.src.virt.addr, walk.dst.virt.addr, 547 walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv); 548 kernel_fpu_end(); 549 err = skcipher_walk_done(&walk, 550 walk.nbytes & (AES_BLOCK_SIZE - 1)); 551 } 552 553 if (err || !tail) 554 return err; 555 556 /* Do ciphertext stealing with the last full block and partial block. */ 557 558 dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); 559 if (req->dst != req->src) 560 dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); 561 562 skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, 563 req->iv); 564 565 err = skcipher_walk_virt(&walk, req, false); 566 if (err) 567 return err; 568 569 kernel_fpu_begin(); 570 (*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr, 571 walk.nbytes, req->iv); 572 kernel_fpu_end(); 573 574 return skcipher_walk_done(&walk, 0); 575 } 576 577 /* __always_inline to avoid indirect call in fastpath */ 578 static __always_inline int 579 xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv, 580 xts_crypt_func crypt_func) 581 { 582 struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 583 const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); 584 const unsigned int cryptlen = req->cryptlen; 585 struct scatterlist *src = req->src; 586 struct scatterlist *dst = req->dst; 587 588 if (unlikely(cryptlen < AES_BLOCK_SIZE)) 589 return -EINVAL; 590 591 kernel_fpu_begin(); 592 (*encrypt_iv)(&ctx->tweak_ctx, req->iv); 593 594 /* 595 * In practice, virtually all XTS plaintexts and ciphertexts are either 596 * 512 or 4096 bytes, aligned such that they don't span page boundaries. 597 * To optimize the performance of these cases, and also any other case 598 * where no page boundary is spanned, the below fast-path handles 599 * single-page sources and destinations as efficiently as possible. 600 */ 601 if (likely(src->length >= cryptlen && dst->length >= cryptlen && 602 src->offset + cryptlen <= PAGE_SIZE && 603 dst->offset + cryptlen <= PAGE_SIZE)) { 604 struct page *src_page = sg_page(src); 605 struct page *dst_page = sg_page(dst); 606 void *src_virt = kmap_local_page(src_page) + src->offset; 607 void *dst_virt = kmap_local_page(dst_page) + dst->offset; 608 609 (*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen, 610 req->iv); 611 kunmap_local(dst_virt); 612 kunmap_local(src_virt); 613 kernel_fpu_end(); 614 return 0; 615 } 616 kernel_fpu_end(); 617 return xts_crypt_slowpath(req, crypt_func); 618 } 619 620 static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, 621 u8 iv[AES_BLOCK_SIZE]) 622 { 623 aesni_enc(tweak_key, iv, iv); 624 } 625 626 static void aesni_xts_encrypt(const struct crypto_aes_ctx *key, 627 const u8 *src, u8 *dst, unsigned int len, 628 u8 tweak[AES_BLOCK_SIZE]) 629 { 630 aesni_xts_enc(key, dst, src, len, tweak); 631 } 632 633 static void aesni_xts_decrypt(const struct crypto_aes_ctx *key, 634 const u8 *src, u8 *dst, unsigned int len, 635 u8 tweak[AES_BLOCK_SIZE]) 636 { 637 aesni_xts_dec(key, dst, src, len, tweak); 638 } 639 640 static int xts_encrypt_aesni(struct skcipher_request *req) 641 { 642 return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt); 643 } 644 645 static int xts_decrypt_aesni(struct skcipher_request *req) 646 { 647 return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt); 648 } 649 650 static struct crypto_alg aesni_cipher_alg = { 651 .cra_name = "aes", 652 .cra_driver_name = "aes-aesni", 653 .cra_priority = 300, 654 .cra_flags = CRYPTO_ALG_TYPE_CIPHER, 655 .cra_blocksize = AES_BLOCK_SIZE, 656 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 657 .cra_module = THIS_MODULE, 658 .cra_u = { 659 .cipher = { 660 .cia_min_keysize = AES_MIN_KEY_SIZE, 661 .cia_max_keysize = AES_MAX_KEY_SIZE, 662 .cia_setkey = aes_set_key, 663 .cia_encrypt = aesni_encrypt, 664 .cia_decrypt = aesni_decrypt 665 } 666 } 667 }; 668 669 static struct skcipher_alg aesni_skciphers[] = { 670 { 671 .base = { 672 .cra_name = "__ecb(aes)", 673 .cra_driver_name = "__ecb-aes-aesni", 674 .cra_priority = 400, 675 .cra_flags = CRYPTO_ALG_INTERNAL, 676 .cra_blocksize = AES_BLOCK_SIZE, 677 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 678 .cra_module = THIS_MODULE, 679 }, 680 .min_keysize = AES_MIN_KEY_SIZE, 681 .max_keysize = AES_MAX_KEY_SIZE, 682 .setkey = aesni_skcipher_setkey, 683 .encrypt = ecb_encrypt, 684 .decrypt = ecb_decrypt, 685 }, { 686 .base = { 687 .cra_name = "__cbc(aes)", 688 .cra_driver_name = "__cbc-aes-aesni", 689 .cra_priority = 400, 690 .cra_flags = CRYPTO_ALG_INTERNAL, 691 .cra_blocksize = AES_BLOCK_SIZE, 692 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 693 .cra_module = THIS_MODULE, 694 }, 695 .min_keysize = AES_MIN_KEY_SIZE, 696 .max_keysize = AES_MAX_KEY_SIZE, 697 .ivsize = AES_BLOCK_SIZE, 698 .setkey = aesni_skcipher_setkey, 699 .encrypt = cbc_encrypt, 700 .decrypt = cbc_decrypt, 701 }, { 702 .base = { 703 .cra_name = "__cts(cbc(aes))", 704 .cra_driver_name = "__cts-cbc-aes-aesni", 705 .cra_priority = 400, 706 .cra_flags = CRYPTO_ALG_INTERNAL, 707 .cra_blocksize = AES_BLOCK_SIZE, 708 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 709 .cra_module = THIS_MODULE, 710 }, 711 .min_keysize = AES_MIN_KEY_SIZE, 712 .max_keysize = AES_MAX_KEY_SIZE, 713 .ivsize = AES_BLOCK_SIZE, 714 .walksize = 2 * AES_BLOCK_SIZE, 715 .setkey = aesni_skcipher_setkey, 716 .encrypt = cts_cbc_encrypt, 717 .decrypt = cts_cbc_decrypt, 718 #ifdef CONFIG_X86_64 719 }, { 720 .base = { 721 .cra_name = "__ctr(aes)", 722 .cra_driver_name = "__ctr-aes-aesni", 723 .cra_priority = 400, 724 .cra_flags = CRYPTO_ALG_INTERNAL, 725 .cra_blocksize = 1, 726 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 727 .cra_module = THIS_MODULE, 728 }, 729 .min_keysize = AES_MIN_KEY_SIZE, 730 .max_keysize = AES_MAX_KEY_SIZE, 731 .ivsize = AES_BLOCK_SIZE, 732 .chunksize = AES_BLOCK_SIZE, 733 .setkey = aesni_skcipher_setkey, 734 .encrypt = ctr_crypt, 735 .decrypt = ctr_crypt, 736 #endif 737 }, { 738 .base = { 739 .cra_name = "__xts(aes)", 740 .cra_driver_name = "__xts-aes-aesni", 741 .cra_priority = 401, 742 .cra_flags = CRYPTO_ALG_INTERNAL, 743 .cra_blocksize = AES_BLOCK_SIZE, 744 .cra_ctxsize = XTS_AES_CTX_SIZE, 745 .cra_module = THIS_MODULE, 746 }, 747 .min_keysize = 2 * AES_MIN_KEY_SIZE, 748 .max_keysize = 2 * AES_MAX_KEY_SIZE, 749 .ivsize = AES_BLOCK_SIZE, 750 .walksize = 2 * AES_BLOCK_SIZE, 751 .setkey = xts_setkey_aesni, 752 .encrypt = xts_encrypt_aesni, 753 .decrypt = xts_decrypt_aesni, 754 } 755 }; 756 757 static 758 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; 759 760 #ifdef CONFIG_X86_64 761 /* 762 * XCTR does not have a non-AVX implementation, so it must be enabled 763 * conditionally. 764 */ 765 static struct skcipher_alg aesni_xctr = { 766 .base = { 767 .cra_name = "__xctr(aes)", 768 .cra_driver_name = "__xctr-aes-aesni", 769 .cra_priority = 400, 770 .cra_flags = CRYPTO_ALG_INTERNAL, 771 .cra_blocksize = 1, 772 .cra_ctxsize = CRYPTO_AES_CTX_SIZE, 773 .cra_module = THIS_MODULE, 774 }, 775 .min_keysize = AES_MIN_KEY_SIZE, 776 .max_keysize = AES_MAX_KEY_SIZE, 777 .ivsize = AES_BLOCK_SIZE, 778 .chunksize = AES_BLOCK_SIZE, 779 .setkey = aesni_skcipher_setkey, 780 .encrypt = xctr_crypt, 781 .decrypt = xctr_crypt, 782 }; 783 784 static struct simd_skcipher_alg *aesni_simd_xctr; 785 786 asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key, 787 u8 iv[AES_BLOCK_SIZE]); 788 789 #define DEFINE_XTS_ALG(suffix, driver_name, priority) \ 790 \ 791 asmlinkage void \ 792 aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ 793 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ 794 asmlinkage void \ 795 aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src, \ 796 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \ 797 \ 798 static int xts_encrypt_##suffix(struct skcipher_request *req) \ 799 { \ 800 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix); \ 801 } \ 802 \ 803 static int xts_decrypt_##suffix(struct skcipher_request *req) \ 804 { \ 805 return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix); \ 806 } \ 807 \ 808 static struct skcipher_alg aes_xts_alg_##suffix = { \ 809 .base = { \ 810 .cra_name = "__xts(aes)", \ 811 .cra_driver_name = "__" driver_name, \ 812 .cra_priority = priority, \ 813 .cra_flags = CRYPTO_ALG_INTERNAL, \ 814 .cra_blocksize = AES_BLOCK_SIZE, \ 815 .cra_ctxsize = XTS_AES_CTX_SIZE, \ 816 .cra_module = THIS_MODULE, \ 817 }, \ 818 .min_keysize = 2 * AES_MIN_KEY_SIZE, \ 819 .max_keysize = 2 * AES_MAX_KEY_SIZE, \ 820 .ivsize = AES_BLOCK_SIZE, \ 821 .walksize = 2 * AES_BLOCK_SIZE, \ 822 .setkey = xts_setkey_aesni, \ 823 .encrypt = xts_encrypt_##suffix, \ 824 .decrypt = xts_decrypt_##suffix, \ 825 }; \ 826 \ 827 static struct simd_skcipher_alg *aes_xts_simdalg_##suffix 828 829 DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500); 830 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 831 DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600); 832 DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700); 833 DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800); 834 #endif 835 836 /* The common part of the x86_64 AES-GCM key struct */ 837 struct aes_gcm_key { 838 /* Expanded AES key and the AES key length in bytes */ 839 struct crypto_aes_ctx aes_key; 840 841 /* RFC4106 nonce (used only by the rfc4106 algorithms) */ 842 u32 rfc4106_nonce; 843 }; 844 845 /* Key struct used by the AES-NI implementations of AES-GCM */ 846 struct aes_gcm_key_aesni { 847 /* 848 * Common part of the key. The assembly code requires 16-byte alignment 849 * for the round keys; we get this by them being located at the start of 850 * the struct and the whole struct being 16-byte aligned. 851 */ 852 struct aes_gcm_key base; 853 854 /* 855 * Powers of the hash key H^8 through H^1. These are 128-bit values. 856 * They all have an extra factor of x^-1 and are byte-reversed. 16-byte 857 * alignment is required by the assembly code. 858 */ 859 u64 h_powers[8][2] __aligned(16); 860 861 /* 862 * h_powers_xored[i] contains the two 64-bit halves of h_powers[i] XOR'd 863 * together. It's used for Karatsuba multiplication. 16-byte alignment 864 * is required by the assembly code. 865 */ 866 u64 h_powers_xored[8] __aligned(16); 867 868 /* 869 * H^1 times x^64 (and also the usual extra factor of x^-1). 16-byte 870 * alignment is required by the assembly code. 871 */ 872 u64 h_times_x64[2] __aligned(16); 873 }; 874 #define AES_GCM_KEY_AESNI(key) \ 875 container_of((key), struct aes_gcm_key_aesni, base) 876 #define AES_GCM_KEY_AESNI_SIZE \ 877 (sizeof(struct aes_gcm_key_aesni) + (15 & ~(CRYPTO_MINALIGN - 1))) 878 879 /* Key struct used by the VAES + AVX10 implementations of AES-GCM */ 880 struct aes_gcm_key_avx10 { 881 /* 882 * Common part of the key. The assembly code prefers 16-byte alignment 883 * for the round keys; we get this by them being located at the start of 884 * the struct and the whole struct being 64-byte aligned. 885 */ 886 struct aes_gcm_key base; 887 888 /* 889 * Powers of the hash key H^16 through H^1. These are 128-bit values. 890 * They all have an extra factor of x^-1 and are byte-reversed. This 891 * array is aligned to a 64-byte boundary to make it naturally aligned 892 * for 512-bit loads, which can improve performance. (The assembly code 893 * doesn't *need* the alignment; this is just an optimization.) 894 */ 895 u64 h_powers[16][2] __aligned(64); 896 897 /* Three padding blocks required by the assembly code */ 898 u64 padding[3][2]; 899 }; 900 #define AES_GCM_KEY_AVX10(key) \ 901 container_of((key), struct aes_gcm_key_avx10, base) 902 #define AES_GCM_KEY_AVX10_SIZE \ 903 (sizeof(struct aes_gcm_key_avx10) + (63 & ~(CRYPTO_MINALIGN - 1))) 904 905 /* 906 * These flags are passed to the AES-GCM helper functions to specify the 907 * specific version of AES-GCM (RFC4106 or not), whether it's encryption or 908 * decryption, and which assembly functions should be called. Assembly 909 * functions are selected using flags instead of function pointers to avoid 910 * indirect calls (which are very expensive on x86) regardless of inlining. 911 */ 912 #define FLAG_RFC4106 BIT(0) 913 #define FLAG_ENC BIT(1) 914 #define FLAG_AVX BIT(2) 915 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 916 # define FLAG_AVX10_256 BIT(3) 917 # define FLAG_AVX10_512 BIT(4) 918 #else 919 /* 920 * This should cause all calls to the AVX10 assembly functions to be 921 * optimized out, avoiding the need to ifdef each call individually. 922 */ 923 # define FLAG_AVX10_256 0 924 # define FLAG_AVX10_512 0 925 #endif 926 927 static inline struct aes_gcm_key * 928 aes_gcm_key_get(struct crypto_aead *tfm, int flags) 929 { 930 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) 931 return PTR_ALIGN(crypto_aead_ctx(tfm), 64); 932 else 933 return PTR_ALIGN(crypto_aead_ctx(tfm), 16); 934 } 935 936 asmlinkage void 937 aes_gcm_precompute_aesni(struct aes_gcm_key_aesni *key); 938 asmlinkage void 939 aes_gcm_precompute_aesni_avx(struct aes_gcm_key_aesni *key); 940 asmlinkage void 941 aes_gcm_precompute_vaes_avx10_256(struct aes_gcm_key_avx10 *key); 942 asmlinkage void 943 aes_gcm_precompute_vaes_avx10_512(struct aes_gcm_key_avx10 *key); 944 945 static void aes_gcm_precompute(struct aes_gcm_key *key, int flags) 946 { 947 /* 948 * To make things a bit easier on the assembly side, the AVX10 949 * implementations use the same key format. Therefore, a single 950 * function using 256-bit vectors would suffice here. However, it's 951 * straightforward to provide a 512-bit one because of how the assembly 952 * code is structured, and it works nicely because the total size of the 953 * key powers is a multiple of 512 bits. So we take advantage of that. 954 * 955 * A similar situation applies to the AES-NI implementations. 956 */ 957 if (flags & FLAG_AVX10_512) 958 aes_gcm_precompute_vaes_avx10_512(AES_GCM_KEY_AVX10(key)); 959 else if (flags & FLAG_AVX10_256) 960 aes_gcm_precompute_vaes_avx10_256(AES_GCM_KEY_AVX10(key)); 961 else if (flags & FLAG_AVX) 962 aes_gcm_precompute_aesni_avx(AES_GCM_KEY_AESNI(key)); 963 else 964 aes_gcm_precompute_aesni(AES_GCM_KEY_AESNI(key)); 965 } 966 967 asmlinkage void 968 aes_gcm_aad_update_aesni(const struct aes_gcm_key_aesni *key, 969 u8 ghash_acc[16], const u8 *aad, int aadlen); 970 asmlinkage void 971 aes_gcm_aad_update_aesni_avx(const struct aes_gcm_key_aesni *key, 972 u8 ghash_acc[16], const u8 *aad, int aadlen); 973 asmlinkage void 974 aes_gcm_aad_update_vaes_avx10(const struct aes_gcm_key_avx10 *key, 975 u8 ghash_acc[16], const u8 *aad, int aadlen); 976 977 static void aes_gcm_aad_update(const struct aes_gcm_key *key, u8 ghash_acc[16], 978 const u8 *aad, int aadlen, int flags) 979 { 980 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) 981 aes_gcm_aad_update_vaes_avx10(AES_GCM_KEY_AVX10(key), ghash_acc, 982 aad, aadlen); 983 else if (flags & FLAG_AVX) 984 aes_gcm_aad_update_aesni_avx(AES_GCM_KEY_AESNI(key), ghash_acc, 985 aad, aadlen); 986 else 987 aes_gcm_aad_update_aesni(AES_GCM_KEY_AESNI(key), ghash_acc, 988 aad, aadlen); 989 } 990 991 asmlinkage void 992 aes_gcm_enc_update_aesni(const struct aes_gcm_key_aesni *key, 993 const u32 le_ctr[4], u8 ghash_acc[16], 994 const u8 *src, u8 *dst, int datalen); 995 asmlinkage void 996 aes_gcm_enc_update_aesni_avx(const struct aes_gcm_key_aesni *key, 997 const u32 le_ctr[4], u8 ghash_acc[16], 998 const u8 *src, u8 *dst, int datalen); 999 asmlinkage void 1000 aes_gcm_enc_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, 1001 const u32 le_ctr[4], u8 ghash_acc[16], 1002 const u8 *src, u8 *dst, int datalen); 1003 asmlinkage void 1004 aes_gcm_enc_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, 1005 const u32 le_ctr[4], u8 ghash_acc[16], 1006 const u8 *src, u8 *dst, int datalen); 1007 1008 asmlinkage void 1009 aes_gcm_dec_update_aesni(const struct aes_gcm_key_aesni *key, 1010 const u32 le_ctr[4], u8 ghash_acc[16], 1011 const u8 *src, u8 *dst, int datalen); 1012 asmlinkage void 1013 aes_gcm_dec_update_aesni_avx(const struct aes_gcm_key_aesni *key, 1014 const u32 le_ctr[4], u8 ghash_acc[16], 1015 const u8 *src, u8 *dst, int datalen); 1016 asmlinkage void 1017 aes_gcm_dec_update_vaes_avx10_256(const struct aes_gcm_key_avx10 *key, 1018 const u32 le_ctr[4], u8 ghash_acc[16], 1019 const u8 *src, u8 *dst, int datalen); 1020 asmlinkage void 1021 aes_gcm_dec_update_vaes_avx10_512(const struct aes_gcm_key_avx10 *key, 1022 const u32 le_ctr[4], u8 ghash_acc[16], 1023 const u8 *src, u8 *dst, int datalen); 1024 1025 /* __always_inline to optimize out the branches based on @flags */ 1026 static __always_inline void 1027 aes_gcm_update(const struct aes_gcm_key *key, 1028 const u32 le_ctr[4], u8 ghash_acc[16], 1029 const u8 *src, u8 *dst, int datalen, int flags) 1030 { 1031 if (flags & FLAG_ENC) { 1032 if (flags & FLAG_AVX10_512) 1033 aes_gcm_enc_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), 1034 le_ctr, ghash_acc, 1035 src, dst, datalen); 1036 else if (flags & FLAG_AVX10_256) 1037 aes_gcm_enc_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), 1038 le_ctr, ghash_acc, 1039 src, dst, datalen); 1040 else if (flags & FLAG_AVX) 1041 aes_gcm_enc_update_aesni_avx(AES_GCM_KEY_AESNI(key), 1042 le_ctr, ghash_acc, 1043 src, dst, datalen); 1044 else 1045 aes_gcm_enc_update_aesni(AES_GCM_KEY_AESNI(key), le_ctr, 1046 ghash_acc, src, dst, datalen); 1047 } else { 1048 if (flags & FLAG_AVX10_512) 1049 aes_gcm_dec_update_vaes_avx10_512(AES_GCM_KEY_AVX10(key), 1050 le_ctr, ghash_acc, 1051 src, dst, datalen); 1052 else if (flags & FLAG_AVX10_256) 1053 aes_gcm_dec_update_vaes_avx10_256(AES_GCM_KEY_AVX10(key), 1054 le_ctr, ghash_acc, 1055 src, dst, datalen); 1056 else if (flags & FLAG_AVX) 1057 aes_gcm_dec_update_aesni_avx(AES_GCM_KEY_AESNI(key), 1058 le_ctr, ghash_acc, 1059 src, dst, datalen); 1060 else 1061 aes_gcm_dec_update_aesni(AES_GCM_KEY_AESNI(key), 1062 le_ctr, ghash_acc, 1063 src, dst, datalen); 1064 } 1065 } 1066 1067 asmlinkage void 1068 aes_gcm_enc_final_aesni(const struct aes_gcm_key_aesni *key, 1069 const u32 le_ctr[4], u8 ghash_acc[16], 1070 u64 total_aadlen, u64 total_datalen); 1071 asmlinkage void 1072 aes_gcm_enc_final_aesni_avx(const struct aes_gcm_key_aesni *key, 1073 const u32 le_ctr[4], u8 ghash_acc[16], 1074 u64 total_aadlen, u64 total_datalen); 1075 asmlinkage void 1076 aes_gcm_enc_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, 1077 const u32 le_ctr[4], u8 ghash_acc[16], 1078 u64 total_aadlen, u64 total_datalen); 1079 1080 /* __always_inline to optimize out the branches based on @flags */ 1081 static __always_inline void 1082 aes_gcm_enc_final(const struct aes_gcm_key *key, 1083 const u32 le_ctr[4], u8 ghash_acc[16], 1084 u64 total_aadlen, u64 total_datalen, int flags) 1085 { 1086 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) 1087 aes_gcm_enc_final_vaes_avx10(AES_GCM_KEY_AVX10(key), 1088 le_ctr, ghash_acc, 1089 total_aadlen, total_datalen); 1090 else if (flags & FLAG_AVX) 1091 aes_gcm_enc_final_aesni_avx(AES_GCM_KEY_AESNI(key), 1092 le_ctr, ghash_acc, 1093 total_aadlen, total_datalen); 1094 else 1095 aes_gcm_enc_final_aesni(AES_GCM_KEY_AESNI(key), 1096 le_ctr, ghash_acc, 1097 total_aadlen, total_datalen); 1098 } 1099 1100 asmlinkage bool __must_check 1101 aes_gcm_dec_final_aesni(const struct aes_gcm_key_aesni *key, 1102 const u32 le_ctr[4], const u8 ghash_acc[16], 1103 u64 total_aadlen, u64 total_datalen, 1104 const u8 tag[16], int taglen); 1105 asmlinkage bool __must_check 1106 aes_gcm_dec_final_aesni_avx(const struct aes_gcm_key_aesni *key, 1107 const u32 le_ctr[4], const u8 ghash_acc[16], 1108 u64 total_aadlen, u64 total_datalen, 1109 const u8 tag[16], int taglen); 1110 asmlinkage bool __must_check 1111 aes_gcm_dec_final_vaes_avx10(const struct aes_gcm_key_avx10 *key, 1112 const u32 le_ctr[4], const u8 ghash_acc[16], 1113 u64 total_aadlen, u64 total_datalen, 1114 const u8 tag[16], int taglen); 1115 1116 /* __always_inline to optimize out the branches based on @flags */ 1117 static __always_inline bool __must_check 1118 aes_gcm_dec_final(const struct aes_gcm_key *key, const u32 le_ctr[4], 1119 u8 ghash_acc[16], u64 total_aadlen, u64 total_datalen, 1120 u8 tag[16], int taglen, int flags) 1121 { 1122 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) 1123 return aes_gcm_dec_final_vaes_avx10(AES_GCM_KEY_AVX10(key), 1124 le_ctr, ghash_acc, 1125 total_aadlen, total_datalen, 1126 tag, taglen); 1127 else if (flags & FLAG_AVX) 1128 return aes_gcm_dec_final_aesni_avx(AES_GCM_KEY_AESNI(key), 1129 le_ctr, ghash_acc, 1130 total_aadlen, total_datalen, 1131 tag, taglen); 1132 else 1133 return aes_gcm_dec_final_aesni(AES_GCM_KEY_AESNI(key), 1134 le_ctr, ghash_acc, 1135 total_aadlen, total_datalen, 1136 tag, taglen); 1137 } 1138 1139 /* 1140 * This is the Integrity Check Value (aka the authentication tag) length and can 1141 * be 8, 12 or 16 bytes long. 1142 */ 1143 static int common_rfc4106_set_authsize(struct crypto_aead *aead, 1144 unsigned int authsize) 1145 { 1146 switch (authsize) { 1147 case 8: 1148 case 12: 1149 case 16: 1150 break; 1151 default: 1152 return -EINVAL; 1153 } 1154 1155 return 0; 1156 } 1157 1158 static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, 1159 unsigned int authsize) 1160 { 1161 switch (authsize) { 1162 case 4: 1163 case 8: 1164 case 12: 1165 case 13: 1166 case 14: 1167 case 15: 1168 case 16: 1169 break; 1170 default: 1171 return -EINVAL; 1172 } 1173 1174 return 0; 1175 } 1176 1177 /* 1178 * This is the setkey function for the x86_64 implementations of AES-GCM. It 1179 * saves the RFC4106 nonce if applicable, expands the AES key, and precomputes 1180 * powers of the hash key. 1181 * 1182 * To comply with the crypto_aead API, this has to be usable in no-SIMD context. 1183 * For that reason, this function includes a portable C implementation of the 1184 * needed logic. However, the portable C implementation is very slow, taking 1185 * about the same time as encrypting 37 KB of data. To be ready for users that 1186 * may set a key even somewhat frequently, we therefore also include a SIMD 1187 * assembly implementation, expanding the AES key using AES-NI and precomputing 1188 * the hash key powers using PCLMULQDQ or VPCLMULQDQ. 1189 */ 1190 static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key, 1191 unsigned int keylen, int flags) 1192 { 1193 struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags); 1194 int err; 1195 1196 if (flags & FLAG_RFC4106) { 1197 if (keylen < 4) 1198 return -EINVAL; 1199 keylen -= 4; 1200 key->rfc4106_nonce = get_unaligned_be32(raw_key + keylen); 1201 } 1202 1203 /* The assembly code assumes the following offsets. */ 1204 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0); 1205 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480); 1206 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496); 1207 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624); 1208 BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688); 1209 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_enc) != 0); 1210 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, base.aes_key.key_length) != 480); 1211 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, h_powers) != 512); 1212 BUILD_BUG_ON(offsetof(struct aes_gcm_key_avx10, padding) != 768); 1213 1214 if (likely(crypto_simd_usable())) { 1215 err = aes_check_keylen(keylen); 1216 if (err) 1217 return err; 1218 kernel_fpu_begin(); 1219 aesni_set_key(&key->aes_key, raw_key, keylen); 1220 aes_gcm_precompute(key, flags); 1221 kernel_fpu_end(); 1222 } else { 1223 static const u8 x_to_the_minus1[16] __aligned(__alignof__(be128)) = { 1224 [0] = 0xc2, [15] = 1 1225 }; 1226 static const u8 x_to_the_63[16] __aligned(__alignof__(be128)) = { 1227 [7] = 1, 1228 }; 1229 be128 h1 = {}; 1230 be128 h; 1231 int i; 1232 1233 err = aes_expandkey(&key->aes_key, raw_key, keylen); 1234 if (err) 1235 return err; 1236 1237 /* Encrypt the all-zeroes block to get the hash key H^1 */ 1238 aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1); 1239 1240 /* Compute H^1 * x^-1 */ 1241 h = h1; 1242 gf128mul_lle(&h, (const be128 *)x_to_the_minus1); 1243 1244 /* Compute the needed key powers */ 1245 if (flags & (FLAG_AVX10_256 | FLAG_AVX10_512)) { 1246 struct aes_gcm_key_avx10 *k = AES_GCM_KEY_AVX10(key); 1247 1248 for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { 1249 k->h_powers[i][0] = be64_to_cpu(h.b); 1250 k->h_powers[i][1] = be64_to_cpu(h.a); 1251 gf128mul_lle(&h, &h1); 1252 } 1253 memset(k->padding, 0, sizeof(k->padding)); 1254 } else { 1255 struct aes_gcm_key_aesni *k = AES_GCM_KEY_AESNI(key); 1256 1257 for (i = ARRAY_SIZE(k->h_powers) - 1; i >= 0; i--) { 1258 k->h_powers[i][0] = be64_to_cpu(h.b); 1259 k->h_powers[i][1] = be64_to_cpu(h.a); 1260 k->h_powers_xored[i] = k->h_powers[i][0] ^ 1261 k->h_powers[i][1]; 1262 gf128mul_lle(&h, &h1); 1263 } 1264 gf128mul_lle(&h1, (const be128 *)x_to_the_63); 1265 k->h_times_x64[0] = be64_to_cpu(h1.b); 1266 k->h_times_x64[1] = be64_to_cpu(h1.a); 1267 } 1268 } 1269 return 0; 1270 } 1271 1272 /* 1273 * Initialize @ghash_acc, then pass all @assoclen bytes of associated data 1274 * (a.k.a. additional authenticated data) from @sg_src through the GHASH update 1275 * assembly function. kernel_fpu_begin() must have already been called. 1276 */ 1277 static void gcm_process_assoc(const struct aes_gcm_key *key, u8 ghash_acc[16], 1278 struct scatterlist *sg_src, unsigned int assoclen, 1279 int flags) 1280 { 1281 struct scatter_walk walk; 1282 /* 1283 * The assembly function requires that the length of any non-last 1284 * segment of associated data be a multiple of 16 bytes, so this 1285 * function does the buffering needed to achieve that. 1286 */ 1287 unsigned int pos = 0; 1288 u8 buf[16]; 1289 1290 memset(ghash_acc, 0, 16); 1291 scatterwalk_start(&walk, sg_src); 1292 1293 while (assoclen) { 1294 unsigned int len_this_page = scatterwalk_clamp(&walk, assoclen); 1295 void *mapped = scatterwalk_map(&walk); 1296 const void *src = mapped; 1297 unsigned int len; 1298 1299 assoclen -= len_this_page; 1300 scatterwalk_advance(&walk, len_this_page); 1301 if (unlikely(pos)) { 1302 len = min(len_this_page, 16 - pos); 1303 memcpy(&buf[pos], src, len); 1304 pos += len; 1305 src += len; 1306 len_this_page -= len; 1307 if (pos < 16) 1308 goto next; 1309 aes_gcm_aad_update(key, ghash_acc, buf, 16, flags); 1310 pos = 0; 1311 } 1312 len = len_this_page; 1313 if (unlikely(assoclen)) /* Not the last segment yet? */ 1314 len = round_down(len, 16); 1315 aes_gcm_aad_update(key, ghash_acc, src, len, flags); 1316 src += len; 1317 len_this_page -= len; 1318 if (unlikely(len_this_page)) { 1319 memcpy(buf, src, len_this_page); 1320 pos = len_this_page; 1321 } 1322 next: 1323 scatterwalk_unmap(mapped); 1324 scatterwalk_pagedone(&walk, 0, assoclen); 1325 if (need_resched()) { 1326 kernel_fpu_end(); 1327 kernel_fpu_begin(); 1328 } 1329 } 1330 if (unlikely(pos)) 1331 aes_gcm_aad_update(key, ghash_acc, buf, pos, flags); 1332 } 1333 1334 1335 /* __always_inline to optimize out the branches based on @flags */ 1336 static __always_inline int 1337 gcm_crypt(struct aead_request *req, int flags) 1338 { 1339 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 1340 const struct aes_gcm_key *key = aes_gcm_key_get(tfm, flags); 1341 unsigned int assoclen = req->assoclen; 1342 struct skcipher_walk walk; 1343 unsigned int nbytes; 1344 u8 ghash_acc[16]; /* GHASH accumulator */ 1345 u32 le_ctr[4]; /* Counter in little-endian format */ 1346 int taglen; 1347 int err; 1348 1349 /* Initialize the counter and determine the associated data length. */ 1350 le_ctr[0] = 2; 1351 if (flags & FLAG_RFC4106) { 1352 if (unlikely(assoclen != 16 && assoclen != 20)) 1353 return -EINVAL; 1354 assoclen -= 8; 1355 le_ctr[1] = get_unaligned_be32(req->iv + 4); 1356 le_ctr[2] = get_unaligned_be32(req->iv + 0); 1357 le_ctr[3] = key->rfc4106_nonce; /* already byte-swapped */ 1358 } else { 1359 le_ctr[1] = get_unaligned_be32(req->iv + 8); 1360 le_ctr[2] = get_unaligned_be32(req->iv + 4); 1361 le_ctr[3] = get_unaligned_be32(req->iv + 0); 1362 } 1363 1364 /* Begin walking through the plaintext or ciphertext. */ 1365 if (flags & FLAG_ENC) 1366 err = skcipher_walk_aead_encrypt(&walk, req, false); 1367 else 1368 err = skcipher_walk_aead_decrypt(&walk, req, false); 1369 1370 /* 1371 * Since the AES-GCM assembly code requires that at least three assembly 1372 * functions be called to process any message (this is needed to support 1373 * incremental updates cleanly), to reduce overhead we try to do all 1374 * three calls in the same kernel FPU section if possible. We close the 1375 * section and start a new one if there are multiple data segments or if 1376 * rescheduling is needed while processing the associated data. 1377 */ 1378 kernel_fpu_begin(); 1379 1380 /* Pass the associated data through GHASH. */ 1381 gcm_process_assoc(key, ghash_acc, req->src, assoclen, flags); 1382 1383 /* En/decrypt the data and pass the ciphertext through GHASH. */ 1384 while ((nbytes = walk.nbytes) != 0) { 1385 if (unlikely(nbytes < walk.total)) { 1386 /* 1387 * Non-last segment. In this case, the assembly 1388 * function requires that the length be a multiple of 16 1389 * (AES_BLOCK_SIZE) bytes. The needed buffering of up 1390 * to 16 bytes is handled by the skcipher_walk. Here we 1391 * just need to round down to a multiple of 16. 1392 */ 1393 nbytes = round_down(nbytes, AES_BLOCK_SIZE); 1394 aes_gcm_update(key, le_ctr, ghash_acc, 1395 walk.src.virt.addr, walk.dst.virt.addr, 1396 nbytes, flags); 1397 le_ctr[0] += nbytes / AES_BLOCK_SIZE; 1398 kernel_fpu_end(); 1399 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 1400 kernel_fpu_begin(); 1401 } else { 1402 /* Last segment: process all remaining data. */ 1403 aes_gcm_update(key, le_ctr, ghash_acc, 1404 walk.src.virt.addr, walk.dst.virt.addr, 1405 nbytes, flags); 1406 err = skcipher_walk_done(&walk, 0); 1407 /* 1408 * The low word of the counter isn't used by the 1409 * finalize, so there's no need to increment it here. 1410 */ 1411 } 1412 } 1413 if (err) 1414 goto out; 1415 1416 /* Finalize */ 1417 taglen = crypto_aead_authsize(tfm); 1418 if (flags & FLAG_ENC) { 1419 /* Finish computing the auth tag. */ 1420 aes_gcm_enc_final(key, le_ctr, ghash_acc, assoclen, 1421 req->cryptlen, flags); 1422 1423 /* Store the computed auth tag in the dst scatterlist. */ 1424 scatterwalk_map_and_copy(ghash_acc, req->dst, req->assoclen + 1425 req->cryptlen, taglen, 1); 1426 } else { 1427 unsigned int datalen = req->cryptlen - taglen; 1428 u8 tag[16]; 1429 1430 /* Get the transmitted auth tag from the src scatterlist. */ 1431 scatterwalk_map_and_copy(tag, req->src, req->assoclen + datalen, 1432 taglen, 0); 1433 /* 1434 * Finish computing the auth tag and compare it to the 1435 * transmitted one. The assembly function does the actual tag 1436 * comparison. Here, just check the boolean result. 1437 */ 1438 if (!aes_gcm_dec_final(key, le_ctr, ghash_acc, assoclen, 1439 datalen, tag, taglen, flags)) 1440 err = -EBADMSG; 1441 } 1442 out: 1443 kernel_fpu_end(); 1444 return err; 1445 } 1446 1447 #define DEFINE_GCM_ALGS(suffix, flags, generic_driver_name, rfc_driver_name, \ 1448 ctxsize, priority) \ 1449 \ 1450 static int gcm_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \ 1451 unsigned int keylen) \ 1452 { \ 1453 return gcm_setkey(tfm, raw_key, keylen, (flags)); \ 1454 } \ 1455 \ 1456 static int gcm_encrypt_##suffix(struct aead_request *req) \ 1457 { \ 1458 return gcm_crypt(req, (flags) | FLAG_ENC); \ 1459 } \ 1460 \ 1461 static int gcm_decrypt_##suffix(struct aead_request *req) \ 1462 { \ 1463 return gcm_crypt(req, (flags)); \ 1464 } \ 1465 \ 1466 static int rfc4106_setkey_##suffix(struct crypto_aead *tfm, const u8 *raw_key, \ 1467 unsigned int keylen) \ 1468 { \ 1469 return gcm_setkey(tfm, raw_key, keylen, (flags) | FLAG_RFC4106); \ 1470 } \ 1471 \ 1472 static int rfc4106_encrypt_##suffix(struct aead_request *req) \ 1473 { \ 1474 return gcm_crypt(req, (flags) | FLAG_RFC4106 | FLAG_ENC); \ 1475 } \ 1476 \ 1477 static int rfc4106_decrypt_##suffix(struct aead_request *req) \ 1478 { \ 1479 return gcm_crypt(req, (flags) | FLAG_RFC4106); \ 1480 } \ 1481 \ 1482 static struct aead_alg aes_gcm_algs_##suffix[] = { { \ 1483 .setkey = gcm_setkey_##suffix, \ 1484 .setauthsize = generic_gcmaes_set_authsize, \ 1485 .encrypt = gcm_encrypt_##suffix, \ 1486 .decrypt = gcm_decrypt_##suffix, \ 1487 .ivsize = GCM_AES_IV_SIZE, \ 1488 .chunksize = AES_BLOCK_SIZE, \ 1489 .maxauthsize = 16, \ 1490 .base = { \ 1491 .cra_name = "__gcm(aes)", \ 1492 .cra_driver_name = "__" generic_driver_name, \ 1493 .cra_priority = (priority), \ 1494 .cra_flags = CRYPTO_ALG_INTERNAL, \ 1495 .cra_blocksize = 1, \ 1496 .cra_ctxsize = (ctxsize), \ 1497 .cra_module = THIS_MODULE, \ 1498 }, \ 1499 }, { \ 1500 .setkey = rfc4106_setkey_##suffix, \ 1501 .setauthsize = common_rfc4106_set_authsize, \ 1502 .encrypt = rfc4106_encrypt_##suffix, \ 1503 .decrypt = rfc4106_decrypt_##suffix, \ 1504 .ivsize = GCM_RFC4106_IV_SIZE, \ 1505 .chunksize = AES_BLOCK_SIZE, \ 1506 .maxauthsize = 16, \ 1507 .base = { \ 1508 .cra_name = "__rfc4106(gcm(aes))", \ 1509 .cra_driver_name = "__" rfc_driver_name, \ 1510 .cra_priority = (priority), \ 1511 .cra_flags = CRYPTO_ALG_INTERNAL, \ 1512 .cra_blocksize = 1, \ 1513 .cra_ctxsize = (ctxsize), \ 1514 .cra_module = THIS_MODULE, \ 1515 }, \ 1516 } }; \ 1517 \ 1518 static struct simd_aead_alg *aes_gcm_simdalgs_##suffix[2] \ 1519 1520 /* aes_gcm_algs_aesni */ 1521 DEFINE_GCM_ALGS(aesni, /* no flags */ 0, 1522 "generic-gcm-aesni", "rfc4106-gcm-aesni", 1523 AES_GCM_KEY_AESNI_SIZE, 400); 1524 1525 /* aes_gcm_algs_aesni_avx */ 1526 DEFINE_GCM_ALGS(aesni_avx, FLAG_AVX, 1527 "generic-gcm-aesni-avx", "rfc4106-gcm-aesni-avx", 1528 AES_GCM_KEY_AESNI_SIZE, 500); 1529 1530 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 1531 /* aes_gcm_algs_vaes_avx10_256 */ 1532 DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256, 1533 "generic-gcm-vaes-avx10_256", "rfc4106-gcm-vaes-avx10_256", 1534 AES_GCM_KEY_AVX10_SIZE, 700); 1535 1536 /* aes_gcm_algs_vaes_avx10_512 */ 1537 DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512, 1538 "generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512", 1539 AES_GCM_KEY_AVX10_SIZE, 800); 1540 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ 1541 1542 /* 1543 * This is a list of CPU models that are known to suffer from downclocking when 1544 * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode 1545 * implementations with zmm registers won't be used by default. Implementations 1546 * with ymm registers (256-bit vectors) will be used by default instead. 1547 */ 1548 static const struct x86_cpu_id zmm_exclusion_list[] = { 1549 X86_MATCH_VFM(INTEL_SKYLAKE_X, 0), 1550 X86_MATCH_VFM(INTEL_ICELAKE_X, 0), 1551 X86_MATCH_VFM(INTEL_ICELAKE_D, 0), 1552 X86_MATCH_VFM(INTEL_ICELAKE, 0), 1553 X86_MATCH_VFM(INTEL_ICELAKE_L, 0), 1554 X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0), 1555 X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0), 1556 X86_MATCH_VFM(INTEL_TIGERLAKE, 0), 1557 /* Allow Rocket Lake and later, and Sapphire Rapids and later. */ 1558 /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */ 1559 {}, 1560 }; 1561 1562 static int __init register_avx_algs(void) 1563 { 1564 int err; 1565 1566 if (!boot_cpu_has(X86_FEATURE_AVX)) 1567 return 0; 1568 err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1, 1569 &aes_xts_simdalg_aesni_avx); 1570 if (err) 1571 return err; 1572 err = simd_register_aeads_compat(aes_gcm_algs_aesni_avx, 1573 ARRAY_SIZE(aes_gcm_algs_aesni_avx), 1574 aes_gcm_simdalgs_aesni_avx); 1575 if (err) 1576 return err; 1577 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 1578 if (!boot_cpu_has(X86_FEATURE_AVX2) || 1579 !boot_cpu_has(X86_FEATURE_VAES) || 1580 !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) || 1581 !boot_cpu_has(X86_FEATURE_PCLMULQDQ) || 1582 !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) 1583 return 0; 1584 err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1, 1585 &aes_xts_simdalg_vaes_avx2); 1586 if (err) 1587 return err; 1588 1589 if (!boot_cpu_has(X86_FEATURE_AVX512BW) || 1590 !boot_cpu_has(X86_FEATURE_AVX512VL) || 1591 !boot_cpu_has(X86_FEATURE_BMI2) || 1592 !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | 1593 XFEATURE_MASK_AVX512, NULL)) 1594 return 0; 1595 1596 err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1, 1597 &aes_xts_simdalg_vaes_avx10_256); 1598 if (err) 1599 return err; 1600 err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_256, 1601 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), 1602 aes_gcm_simdalgs_vaes_avx10_256); 1603 if (err) 1604 return err; 1605 1606 if (x86_match_cpu(zmm_exclusion_list)) { 1607 int i; 1608 1609 aes_xts_alg_vaes_avx10_512.base.cra_priority = 1; 1610 for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++) 1611 aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1; 1612 } 1613 1614 err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1, 1615 &aes_xts_simdalg_vaes_avx10_512); 1616 if (err) 1617 return err; 1618 err = simd_register_aeads_compat(aes_gcm_algs_vaes_avx10_512, 1619 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), 1620 aes_gcm_simdalgs_vaes_avx10_512); 1621 if (err) 1622 return err; 1623 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */ 1624 return 0; 1625 } 1626 1627 static void unregister_avx_algs(void) 1628 { 1629 if (aes_xts_simdalg_aesni_avx) 1630 simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1, 1631 &aes_xts_simdalg_aesni_avx); 1632 if (aes_gcm_simdalgs_aesni_avx[0]) 1633 simd_unregister_aeads(aes_gcm_algs_aesni_avx, 1634 ARRAY_SIZE(aes_gcm_algs_aesni_avx), 1635 aes_gcm_simdalgs_aesni_avx); 1636 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 1637 if (aes_xts_simdalg_vaes_avx2) 1638 simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1, 1639 &aes_xts_simdalg_vaes_avx2); 1640 if (aes_xts_simdalg_vaes_avx10_256) 1641 simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1, 1642 &aes_xts_simdalg_vaes_avx10_256); 1643 if (aes_gcm_simdalgs_vaes_avx10_256[0]) 1644 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_256, 1645 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256), 1646 aes_gcm_simdalgs_vaes_avx10_256); 1647 if (aes_xts_simdalg_vaes_avx10_512) 1648 simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1, 1649 &aes_xts_simdalg_vaes_avx10_512); 1650 if (aes_gcm_simdalgs_vaes_avx10_512[0]) 1651 simd_unregister_aeads(aes_gcm_algs_vaes_avx10_512, 1652 ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512), 1653 aes_gcm_simdalgs_vaes_avx10_512); 1654 #endif 1655 } 1656 #else /* CONFIG_X86_64 */ 1657 static struct aead_alg aes_gcm_algs_aesni[0]; 1658 static struct simd_aead_alg *aes_gcm_simdalgs_aesni[0]; 1659 1660 static int __init register_avx_algs(void) 1661 { 1662 return 0; 1663 } 1664 1665 static void unregister_avx_algs(void) 1666 { 1667 } 1668 #endif /* !CONFIG_X86_64 */ 1669 1670 static const struct x86_cpu_id aesni_cpu_id[] = { 1671 X86_MATCH_FEATURE(X86_FEATURE_AES, NULL), 1672 {} 1673 }; 1674 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); 1675 1676 static int __init aesni_init(void) 1677 { 1678 int err; 1679 1680 if (!x86_match_cpu(aesni_cpu_id)) 1681 return -ENODEV; 1682 #ifdef CONFIG_X86_64 1683 if (boot_cpu_has(X86_FEATURE_AVX)) { 1684 /* optimize performance of ctr mode encryption transform */ 1685 static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm); 1686 pr_info("AES CTR mode by8 optimization enabled\n"); 1687 } 1688 #endif /* CONFIG_X86_64 */ 1689 1690 err = crypto_register_alg(&aesni_cipher_alg); 1691 if (err) 1692 return err; 1693 1694 err = simd_register_skciphers_compat(aesni_skciphers, 1695 ARRAY_SIZE(aesni_skciphers), 1696 aesni_simd_skciphers); 1697 if (err) 1698 goto unregister_cipher; 1699 1700 err = simd_register_aeads_compat(aes_gcm_algs_aesni, 1701 ARRAY_SIZE(aes_gcm_algs_aesni), 1702 aes_gcm_simdalgs_aesni); 1703 if (err) 1704 goto unregister_skciphers; 1705 1706 #ifdef CONFIG_X86_64 1707 if (boot_cpu_has(X86_FEATURE_AVX)) 1708 err = simd_register_skciphers_compat(&aesni_xctr, 1, 1709 &aesni_simd_xctr); 1710 if (err) 1711 goto unregister_aeads; 1712 #endif /* CONFIG_X86_64 */ 1713 1714 err = register_avx_algs(); 1715 if (err) 1716 goto unregister_avx; 1717 1718 return 0; 1719 1720 unregister_avx: 1721 unregister_avx_algs(); 1722 #ifdef CONFIG_X86_64 1723 if (aesni_simd_xctr) 1724 simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); 1725 unregister_aeads: 1726 #endif /* CONFIG_X86_64 */ 1727 simd_unregister_aeads(aes_gcm_algs_aesni, 1728 ARRAY_SIZE(aes_gcm_algs_aesni), 1729 aes_gcm_simdalgs_aesni); 1730 unregister_skciphers: 1731 simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), 1732 aesni_simd_skciphers); 1733 unregister_cipher: 1734 crypto_unregister_alg(&aesni_cipher_alg); 1735 return err; 1736 } 1737 1738 static void __exit aesni_exit(void) 1739 { 1740 simd_unregister_aeads(aes_gcm_algs_aesni, 1741 ARRAY_SIZE(aes_gcm_algs_aesni), 1742 aes_gcm_simdalgs_aesni); 1743 simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), 1744 aesni_simd_skciphers); 1745 crypto_unregister_alg(&aesni_cipher_alg); 1746 #ifdef CONFIG_X86_64 1747 if (boot_cpu_has(X86_FEATURE_AVX)) 1748 simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr); 1749 #endif /* CONFIG_X86_64 */ 1750 unregister_avx_algs(); 1751 } 1752 1753 late_initcall(aesni_init); 1754 module_exit(aesni_exit); 1755 1756 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); 1757 MODULE_LICENSE("GPL"); 1758 MODULE_ALIAS_CRYPTO("aes"); 1759