1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions. 4 * 5 * Copyright (C) 2015 - 2018 Linaro Ltd. 6 * Copyright (C) 2023 Google LLC. 7 */ 8 9 #include <asm/hwcap.h> 10 #include <asm/neon.h> 11 #include <crypto/aes.h> 12 #include <crypto/b128ops.h> 13 #include <crypto/gcm.h> 14 #include <crypto/gf128mul.h> 15 #include <crypto/ghash.h> 16 #include <crypto/internal/aead.h> 17 #include <crypto/internal/hash.h> 18 #include <crypto/internal/skcipher.h> 19 #include <crypto/scatterwalk.h> 20 #include <linux/cpufeature.h> 21 #include <linux/errno.h> 22 #include <linux/jump_label.h> 23 #include <linux/kernel.h> 24 #include <linux/module.h> 25 #include <linux/string.h> 26 #include <linux/unaligned.h> 27 28 MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions"); 29 MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>"); 30 MODULE_LICENSE("GPL"); 31 MODULE_ALIAS_CRYPTO("ghash"); 32 MODULE_ALIAS_CRYPTO("gcm(aes)"); 33 MODULE_ALIAS_CRYPTO("rfc4106(gcm(aes))"); 34 35 #define RFC4106_NONCE_SIZE 4 36 37 struct ghash_key { 38 be128 k; 39 u64 h[][2]; 40 }; 41 42 struct gcm_key { 43 u64 h[4][2]; 44 u32 rk[AES_MAX_KEYLENGTH_U32]; 45 int rounds; 46 u8 nonce[]; // for RFC4106 nonce 47 }; 48 49 struct arm_ghash_desc_ctx { 50 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 51 }; 52 53 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 54 u64 const h[][2], const char *head); 55 56 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 57 u64 const h[][2], const char *head); 58 59 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64); 60 61 static int ghash_init(struct shash_desc *desc) 62 { 63 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 64 65 *ctx = (struct arm_ghash_desc_ctx){}; 66 return 0; 67 } 68 69 static void ghash_do_update(int blocks, u64 dg[], const char *src, 70 struct ghash_key *key, const char *head) 71 { 72 kernel_neon_begin(); 73 if (static_branch_likely(&use_p64)) 74 pmull_ghash_update_p64(blocks, dg, src, key->h, head); 75 else 76 pmull_ghash_update_p8(blocks, dg, src, key->h, head); 77 kernel_neon_end(); 78 } 79 80 static int ghash_update(struct shash_desc *desc, const u8 *src, 81 unsigned int len) 82 { 83 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 84 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 85 int blocks; 86 87 blocks = len / GHASH_BLOCK_SIZE; 88 ghash_do_update(blocks, ctx->digest, src, key, NULL); 89 return len - blocks * GHASH_BLOCK_SIZE; 90 } 91 92 static int ghash_export(struct shash_desc *desc, void *out) 93 { 94 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 95 u8 *dst = out; 96 97 put_unaligned_be64(ctx->digest[1], dst); 98 put_unaligned_be64(ctx->digest[0], dst + 8); 99 return 0; 100 } 101 102 static int ghash_import(struct shash_desc *desc, const void *in) 103 { 104 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 105 const u8 *src = in; 106 107 ctx->digest[1] = get_unaligned_be64(src); 108 ctx->digest[0] = get_unaligned_be64(src + 8); 109 return 0; 110 } 111 112 static int ghash_finup(struct shash_desc *desc, const u8 *src, 113 unsigned int len, u8 *dst) 114 { 115 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 116 struct arm_ghash_desc_ctx *ctx = shash_desc_ctx(desc); 117 118 if (len) { 119 u8 buf[GHASH_BLOCK_SIZE] = {}; 120 121 memcpy(buf, src, len); 122 ghash_do_update(1, ctx->digest, buf, key, NULL); 123 memzero_explicit(buf, sizeof(buf)); 124 } 125 return ghash_export(desc, dst); 126 } 127 128 static void ghash_reflect(u64 h[], const be128 *k) 129 { 130 u64 carry = be64_to_cpu(k->a) >> 63; 131 132 h[0] = (be64_to_cpu(k->b) << 1) | carry; 133 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); 134 135 if (carry) 136 h[1] ^= 0xc200000000000000UL; 137 } 138 139 static int ghash_setkey(struct crypto_shash *tfm, 140 const u8 *inkey, unsigned int keylen) 141 { 142 struct ghash_key *key = crypto_shash_ctx(tfm); 143 144 if (keylen != GHASH_BLOCK_SIZE) 145 return -EINVAL; 146 147 /* needed for the fallback */ 148 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 149 ghash_reflect(key->h[0], &key->k); 150 151 if (static_branch_likely(&use_p64)) { 152 be128 h = key->k; 153 154 gf128mul_lle(&h, &key->k); 155 ghash_reflect(key->h[1], &h); 156 157 gf128mul_lle(&h, &key->k); 158 ghash_reflect(key->h[2], &h); 159 160 gf128mul_lle(&h, &key->k); 161 ghash_reflect(key->h[3], &h); 162 } 163 return 0; 164 } 165 166 static struct shash_alg ghash_alg = { 167 .digestsize = GHASH_DIGEST_SIZE, 168 .init = ghash_init, 169 .update = ghash_update, 170 .finup = ghash_finup, 171 .setkey = ghash_setkey, 172 .export = ghash_export, 173 .import = ghash_import, 174 .descsize = sizeof(struct arm_ghash_desc_ctx), 175 .statesize = sizeof(struct ghash_desc_ctx), 176 177 .base.cra_name = "ghash", 178 .base.cra_driver_name = "ghash-ce", 179 .base.cra_priority = 300, 180 .base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, 181 .base.cra_blocksize = GHASH_BLOCK_SIZE, 182 .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 183 .base.cra_module = THIS_MODULE, 184 }; 185 186 void pmull_gcm_encrypt(int blocks, u64 dg[], const char *src, 187 struct gcm_key const *k, char *dst, 188 const char *iv, int rounds, u32 counter); 189 190 void pmull_gcm_enc_final(int blocks, u64 dg[], char *tag, 191 struct gcm_key const *k, char *head, 192 const char *iv, int rounds, u32 counter); 193 194 void pmull_gcm_decrypt(int bytes, u64 dg[], const char *src, 195 struct gcm_key const *k, char *dst, 196 const char *iv, int rounds, u32 counter); 197 198 int pmull_gcm_dec_final(int bytes, u64 dg[], char *tag, 199 struct gcm_key const *k, char *head, 200 const char *iv, int rounds, u32 counter, 201 const char *otag, int authsize); 202 203 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 204 unsigned int keylen) 205 { 206 struct gcm_key *ctx = crypto_aead_ctx(tfm); 207 struct aes_enckey aes_key; 208 be128 h, k; 209 int ret; 210 211 ret = aes_prepareenckey(&aes_key, inkey, keylen); 212 if (ret) 213 return -EINVAL; 214 215 aes_encrypt(&aes_key, (u8 *)&k, (u8[AES_BLOCK_SIZE]){}); 216 217 /* 218 * Note: this assumes that the arm implementation of the AES library 219 * stores the standard round keys in k.rndkeys. 220 */ 221 memcpy(ctx->rk, aes_key.k.rndkeys, sizeof(ctx->rk)); 222 ctx->rounds = 6 + keylen / 4; 223 224 memzero_explicit(&aes_key, sizeof(aes_key)); 225 226 ghash_reflect(ctx->h[0], &k); 227 228 h = k; 229 gf128mul_lle(&h, &k); 230 ghash_reflect(ctx->h[1], &h); 231 232 gf128mul_lle(&h, &k); 233 ghash_reflect(ctx->h[2], &h); 234 235 gf128mul_lle(&h, &k); 236 ghash_reflect(ctx->h[3], &h); 237 238 return 0; 239 } 240 241 static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 242 { 243 return crypto_gcm_check_authsize(authsize); 244 } 245 246 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], 247 int *buf_count, struct gcm_key *ctx) 248 { 249 if (*buf_count > 0) { 250 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); 251 252 memcpy(&buf[*buf_count], src, buf_added); 253 254 *buf_count += buf_added; 255 src += buf_added; 256 count -= buf_added; 257 } 258 259 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { 260 int blocks = count / GHASH_BLOCK_SIZE; 261 262 pmull_ghash_update_p64(blocks, dg, src, ctx->h, 263 *buf_count ? buf : NULL); 264 265 src += blocks * GHASH_BLOCK_SIZE; 266 count %= GHASH_BLOCK_SIZE; 267 *buf_count = 0; 268 } 269 270 if (count > 0) { 271 memcpy(buf, src, count); 272 *buf_count = count; 273 } 274 } 275 276 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 277 { 278 struct crypto_aead *aead = crypto_aead_reqtfm(req); 279 struct gcm_key *ctx = crypto_aead_ctx(aead); 280 u8 buf[GHASH_BLOCK_SIZE]; 281 struct scatter_walk walk; 282 int buf_count = 0; 283 284 scatterwalk_start(&walk, req->src); 285 286 do { 287 unsigned int n; 288 289 n = scatterwalk_next(&walk, len); 290 gcm_update_mac(dg, walk.addr, n, buf, &buf_count, ctx); 291 scatterwalk_done_src(&walk, n); 292 293 if (unlikely(len / SZ_4K > (len - n) / SZ_4K)) { 294 kernel_neon_end(); 295 kernel_neon_begin(); 296 } 297 298 len -= n; 299 } while (len); 300 301 if (buf_count) { 302 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 303 pmull_ghash_update_p64(1, dg, buf, ctx->h, NULL); 304 } 305 } 306 307 static int gcm_encrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 308 { 309 struct crypto_aead *aead = crypto_aead_reqtfm(req); 310 struct gcm_key *ctx = crypto_aead_ctx(aead); 311 struct skcipher_walk walk; 312 u8 buf[AES_BLOCK_SIZE]; 313 u32 counter = 2; 314 u64 dg[2] = {}; 315 be128 lengths; 316 const u8 *src; 317 u8 *tag, *dst; 318 int tail, err; 319 320 err = skcipher_walk_aead_encrypt(&walk, req, false); 321 322 kernel_neon_begin(); 323 324 if (assoclen) 325 gcm_calculate_auth_mac(req, dg, assoclen); 326 327 src = walk.src.virt.addr; 328 dst = walk.dst.virt.addr; 329 330 while (walk.nbytes >= AES_BLOCK_SIZE) { 331 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 332 333 pmull_gcm_encrypt(nblocks, dg, src, ctx, dst, iv, 334 ctx->rounds, counter); 335 counter += nblocks; 336 337 if (walk.nbytes == walk.total) { 338 src += nblocks * AES_BLOCK_SIZE; 339 dst += nblocks * AES_BLOCK_SIZE; 340 break; 341 } 342 343 kernel_neon_end(); 344 345 err = skcipher_walk_done(&walk, 346 walk.nbytes % AES_BLOCK_SIZE); 347 if (err) 348 return err; 349 350 src = walk.src.virt.addr; 351 dst = walk.dst.virt.addr; 352 353 kernel_neon_begin(); 354 } 355 356 357 lengths.a = cpu_to_be64(assoclen * 8); 358 lengths.b = cpu_to_be64(req->cryptlen * 8); 359 360 tag = (u8 *)&lengths; 361 tail = walk.nbytes % AES_BLOCK_SIZE; 362 363 /* 364 * Bounce via a buffer unless we are encrypting in place and src/dst 365 * are not pointing to the start of the walk buffer. In that case, we 366 * can do a NEON load/xor/store sequence in place as long as we move 367 * the plain/ciphertext and keystream to the start of the register. If 368 * not, do a memcpy() to the end of the buffer so we can reuse the same 369 * logic. 370 */ 371 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 372 src = memcpy(buf + sizeof(buf) - tail, src, tail); 373 374 pmull_gcm_enc_final(tail, dg, tag, ctx, (u8 *)src, iv, 375 ctx->rounds, counter); 376 kernel_neon_end(); 377 378 if (unlikely(tail && src != dst)) 379 memcpy(dst, src, tail); 380 381 if (walk.nbytes) { 382 err = skcipher_walk_done(&walk, 0); 383 if (err) 384 return err; 385 } 386 387 /* copy authtag to end of dst */ 388 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, 389 crypto_aead_authsize(aead), 1); 390 391 return 0; 392 } 393 394 static int gcm_decrypt(struct aead_request *req, const u8 *iv, u32 assoclen) 395 { 396 struct crypto_aead *aead = crypto_aead_reqtfm(req); 397 struct gcm_key *ctx = crypto_aead_ctx(aead); 398 int authsize = crypto_aead_authsize(aead); 399 struct skcipher_walk walk; 400 u8 otag[AES_BLOCK_SIZE]; 401 u8 buf[AES_BLOCK_SIZE]; 402 u32 counter = 2; 403 u64 dg[2] = {}; 404 be128 lengths; 405 const u8 *src; 406 u8 *tag, *dst; 407 int tail, err, ret; 408 409 scatterwalk_map_and_copy(otag, req->src, 410 req->assoclen + req->cryptlen - authsize, 411 authsize, 0); 412 413 err = skcipher_walk_aead_decrypt(&walk, req, false); 414 415 kernel_neon_begin(); 416 417 if (assoclen) 418 gcm_calculate_auth_mac(req, dg, assoclen); 419 420 src = walk.src.virt.addr; 421 dst = walk.dst.virt.addr; 422 423 while (walk.nbytes >= AES_BLOCK_SIZE) { 424 int nblocks = walk.nbytes / AES_BLOCK_SIZE; 425 426 pmull_gcm_decrypt(nblocks, dg, src, ctx, dst, iv, 427 ctx->rounds, counter); 428 counter += nblocks; 429 430 if (walk.nbytes == walk.total) { 431 src += nblocks * AES_BLOCK_SIZE; 432 dst += nblocks * AES_BLOCK_SIZE; 433 break; 434 } 435 436 kernel_neon_end(); 437 438 err = skcipher_walk_done(&walk, 439 walk.nbytes % AES_BLOCK_SIZE); 440 if (err) 441 return err; 442 443 src = walk.src.virt.addr; 444 dst = walk.dst.virt.addr; 445 446 kernel_neon_begin(); 447 } 448 449 lengths.a = cpu_to_be64(assoclen * 8); 450 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 451 452 tag = (u8 *)&lengths; 453 tail = walk.nbytes % AES_BLOCK_SIZE; 454 455 if (unlikely(tail && (tail == walk.nbytes || src != dst))) 456 src = memcpy(buf + sizeof(buf) - tail, src, tail); 457 458 ret = pmull_gcm_dec_final(tail, dg, tag, ctx, (u8 *)src, iv, 459 ctx->rounds, counter, otag, authsize); 460 kernel_neon_end(); 461 462 if (unlikely(tail && src != dst)) 463 memcpy(dst, src, tail); 464 465 if (walk.nbytes) { 466 err = skcipher_walk_done(&walk, 0); 467 if (err) 468 return err; 469 } 470 471 return ret ? -EBADMSG : 0; 472 } 473 474 static int gcm_aes_encrypt(struct aead_request *req) 475 { 476 return gcm_encrypt(req, req->iv, req->assoclen); 477 } 478 479 static int gcm_aes_decrypt(struct aead_request *req) 480 { 481 return gcm_decrypt(req, req->iv, req->assoclen); 482 } 483 484 static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 485 unsigned int keylen) 486 { 487 struct gcm_key *ctx = crypto_aead_ctx(tfm); 488 int err; 489 490 keylen -= RFC4106_NONCE_SIZE; 491 err = gcm_aes_setkey(tfm, inkey, keylen); 492 if (err) 493 return err; 494 495 memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 496 return 0; 497 } 498 499 static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 500 { 501 return crypto_rfc4106_check_authsize(authsize); 502 } 503 504 static int rfc4106_encrypt(struct aead_request *req) 505 { 506 struct crypto_aead *aead = crypto_aead_reqtfm(req); 507 struct gcm_key *ctx = crypto_aead_ctx(aead); 508 u8 iv[GCM_AES_IV_SIZE]; 509 510 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 511 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 512 513 return crypto_ipsec_check_assoclen(req->assoclen) ?: 514 gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 515 } 516 517 static int rfc4106_decrypt(struct aead_request *req) 518 { 519 struct crypto_aead *aead = crypto_aead_reqtfm(req); 520 struct gcm_key *ctx = crypto_aead_ctx(aead); 521 u8 iv[GCM_AES_IV_SIZE]; 522 523 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 524 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 525 526 return crypto_ipsec_check_assoclen(req->assoclen) ?: 527 gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 528 } 529 530 static struct aead_alg gcm_aes_algs[] = {{ 531 .ivsize = GCM_AES_IV_SIZE, 532 .chunksize = AES_BLOCK_SIZE, 533 .maxauthsize = AES_BLOCK_SIZE, 534 .setkey = gcm_aes_setkey, 535 .setauthsize = gcm_aes_setauthsize, 536 .encrypt = gcm_aes_encrypt, 537 .decrypt = gcm_aes_decrypt, 538 539 .base.cra_name = "gcm(aes)", 540 .base.cra_driver_name = "gcm-aes-ce", 541 .base.cra_priority = 400, 542 .base.cra_blocksize = 1, 543 .base.cra_ctxsize = sizeof(struct gcm_key), 544 .base.cra_module = THIS_MODULE, 545 }, { 546 .ivsize = GCM_RFC4106_IV_SIZE, 547 .chunksize = AES_BLOCK_SIZE, 548 .maxauthsize = AES_BLOCK_SIZE, 549 .setkey = rfc4106_setkey, 550 .setauthsize = rfc4106_setauthsize, 551 .encrypt = rfc4106_encrypt, 552 .decrypt = rfc4106_decrypt, 553 554 .base.cra_name = "rfc4106(gcm(aes))", 555 .base.cra_driver_name = "rfc4106-gcm-aes-ce", 556 .base.cra_priority = 400, 557 .base.cra_blocksize = 1, 558 .base.cra_ctxsize = sizeof(struct gcm_key) + RFC4106_NONCE_SIZE, 559 .base.cra_module = THIS_MODULE, 560 }}; 561 562 static int __init ghash_ce_mod_init(void) 563 { 564 int err; 565 566 if (!(elf_hwcap & HWCAP_NEON)) 567 return -ENODEV; 568 569 if (elf_hwcap2 & HWCAP2_PMULL) { 570 err = crypto_register_aeads(gcm_aes_algs, 571 ARRAY_SIZE(gcm_aes_algs)); 572 if (err) 573 return err; 574 ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]); 575 static_branch_enable(&use_p64); 576 } 577 578 err = crypto_register_shash(&ghash_alg); 579 if (err) 580 goto err_aead; 581 582 return 0; 583 584 err_aead: 585 if (elf_hwcap2 & HWCAP2_PMULL) 586 crypto_unregister_aeads(gcm_aes_algs, 587 ARRAY_SIZE(gcm_aes_algs)); 588 return err; 589 } 590 591 static void __exit ghash_ce_mod_exit(void) 592 { 593 crypto_unregister_shash(&ghash_alg); 594 if (elf_hwcap2 & HWCAP2_PMULL) 595 crypto_unregister_aeads(gcm_aes_algs, 596 ARRAY_SIZE(gcm_aes_algs)); 597 } 598 599 module_init(ghash_ce_mod_init); 600 module_exit(ghash_ce_mod_exit); 601