1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 4 * 5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/neon.h> 9 #include <asm/simd.h> 10 #include <linux/unaligned.h> 11 #include <crypto/aes.h> 12 #include <crypto/gcm.h> 13 #include <crypto/algapi.h> 14 #include <crypto/b128ops.h> 15 #include <crypto/gf128mul.h> 16 #include <crypto/internal/aead.h> 17 #include <crypto/internal/hash.h> 18 #include <crypto/internal/simd.h> 19 #include <crypto/internal/skcipher.h> 20 #include <crypto/scatterwalk.h> 21 #include <linux/cpufeature.h> 22 #include <linux/crypto.h> 23 #include <linux/module.h> 24 25 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); 26 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 27 MODULE_LICENSE("GPL v2"); 28 MODULE_ALIAS_CRYPTO("ghash"); 29 30 #define GHASH_BLOCK_SIZE 16 31 #define GHASH_DIGEST_SIZE 16 32 33 #define RFC4106_NONCE_SIZE 4 34 35 struct ghash_key { 36 be128 k; 37 u64 h[][2]; 38 }; 39 40 struct ghash_desc_ctx { 41 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 42 u8 buf[GHASH_BLOCK_SIZE]; 43 u32 count; 44 }; 45 46 struct gcm_aes_ctx { 47 struct crypto_aes_ctx aes_key; 48 u8 nonce[RFC4106_NONCE_SIZE]; 49 struct ghash_key ghash_key; 50 }; 51 52 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 53 u64 const h[][2], const char *head); 54 55 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 56 u64 const h[][2], const char *head); 57 58 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], 59 u64 const h[][2], u64 dg[], u8 ctr[], 60 u32 const rk[], int rounds, u8 tag[]); 61 asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], 62 u64 const h[][2], u64 dg[], u8 ctr[], 63 u32 const rk[], int rounds, const u8 l[], 64 const u8 tag[], u64 authsize); 65 66 static int ghash_init(struct shash_desc *desc) 67 { 68 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 69 70 *ctx = (struct ghash_desc_ctx){}; 71 return 0; 72 } 73 74 static void ghash_do_update(int blocks, u64 dg[], const char *src, 75 struct ghash_key *key, const char *head) 76 { 77 be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; 78 79 do { 80 const u8 *in = src; 81 82 if (head) { 83 in = head; 84 blocks++; 85 head = NULL; 86 } else { 87 src += GHASH_BLOCK_SIZE; 88 } 89 90 crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE); 91 gf128mul_lle(&dst, &key->k); 92 } while (--blocks); 93 94 dg[0] = be64_to_cpu(dst.b); 95 dg[1] = be64_to_cpu(dst.a); 96 } 97 98 static __always_inline 99 void ghash_do_simd_update(int blocks, u64 dg[], const char *src, 100 struct ghash_key *key, const char *head, 101 void (*simd_update)(int blocks, u64 dg[], 102 const char *src, 103 u64 const h[][2], 104 const char *head)) 105 { 106 if (likely(crypto_simd_usable())) { 107 kernel_neon_begin(); 108 simd_update(blocks, dg, src, key->h, head); 109 kernel_neon_end(); 110 } else { 111 ghash_do_update(blocks, dg, src, key, head); 112 } 113 } 114 115 /* avoid hogging the CPU for too long */ 116 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE) 117 118 static int ghash_update(struct shash_desc *desc, const u8 *src, 119 unsigned int len) 120 { 121 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 122 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 123 124 ctx->count += len; 125 126 if ((partial + len) >= GHASH_BLOCK_SIZE) { 127 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 128 int blocks; 129 130 if (partial) { 131 int p = GHASH_BLOCK_SIZE - partial; 132 133 memcpy(ctx->buf + partial, src, p); 134 src += p; 135 len -= p; 136 } 137 138 blocks = len / GHASH_BLOCK_SIZE; 139 len %= GHASH_BLOCK_SIZE; 140 141 do { 142 int chunk = min(blocks, MAX_BLOCKS); 143 144 ghash_do_simd_update(chunk, ctx->digest, src, key, 145 partial ? ctx->buf : NULL, 146 pmull_ghash_update_p8); 147 148 blocks -= chunk; 149 src += chunk * GHASH_BLOCK_SIZE; 150 partial = 0; 151 } while (unlikely(blocks > 0)); 152 } 153 if (len) 154 memcpy(ctx->buf + partial, src, len); 155 return 0; 156 } 157 158 static int ghash_final(struct shash_desc *desc, u8 *dst) 159 { 160 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 161 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 162 163 if (partial) { 164 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 165 166 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); 167 168 ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL, 169 pmull_ghash_update_p8); 170 } 171 put_unaligned_be64(ctx->digest[1], dst); 172 put_unaligned_be64(ctx->digest[0], dst + 8); 173 174 memzero_explicit(ctx, sizeof(*ctx)); 175 return 0; 176 } 177 178 static void ghash_reflect(u64 h[], const be128 *k) 179 { 180 u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0; 181 182 h[0] = (be64_to_cpu(k->b) << 1) | carry; 183 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); 184 185 if (carry) 186 h[1] ^= 0xc200000000000000UL; 187 } 188 189 static int ghash_setkey(struct crypto_shash *tfm, 190 const u8 *inkey, unsigned int keylen) 191 { 192 struct ghash_key *key = crypto_shash_ctx(tfm); 193 194 if (keylen != GHASH_BLOCK_SIZE) 195 return -EINVAL; 196 197 /* needed for the fallback */ 198 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 199 200 ghash_reflect(key->h[0], &key->k); 201 return 0; 202 } 203 204 static struct shash_alg ghash_alg = { 205 .base.cra_name = "ghash", 206 .base.cra_driver_name = "ghash-neon", 207 .base.cra_priority = 150, 208 .base.cra_blocksize = GHASH_BLOCK_SIZE, 209 .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 210 .base.cra_module = THIS_MODULE, 211 212 .digestsize = GHASH_DIGEST_SIZE, 213 .init = ghash_init, 214 .update = ghash_update, 215 .final = ghash_final, 216 .setkey = ghash_setkey, 217 .descsize = sizeof(struct ghash_desc_ctx), 218 }; 219 220 static int num_rounds(struct crypto_aes_ctx *ctx) 221 { 222 /* 223 * # of rounds specified by AES: 224 * 128 bit key 10 rounds 225 * 192 bit key 12 rounds 226 * 256 bit key 14 rounds 227 * => n byte key => 6 + (n/4) rounds 228 */ 229 return 6 + ctx->key_length / 4; 230 } 231 232 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 233 unsigned int keylen) 234 { 235 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 236 u8 key[GHASH_BLOCK_SIZE]; 237 be128 h; 238 int ret; 239 240 ret = aes_expandkey(&ctx->aes_key, inkey, keylen); 241 if (ret) 242 return -EINVAL; 243 244 aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){}); 245 246 /* needed for the fallback */ 247 memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE); 248 249 ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k); 250 251 h = ctx->ghash_key.k; 252 gf128mul_lle(&h, &ctx->ghash_key.k); 253 ghash_reflect(ctx->ghash_key.h[1], &h); 254 255 gf128mul_lle(&h, &ctx->ghash_key.k); 256 ghash_reflect(ctx->ghash_key.h[2], &h); 257 258 gf128mul_lle(&h, &ctx->ghash_key.k); 259 ghash_reflect(ctx->ghash_key.h[3], &h); 260 261 return 0; 262 } 263 264 static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 265 { 266 return crypto_gcm_check_authsize(authsize); 267 } 268 269 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], 270 int *buf_count, struct gcm_aes_ctx *ctx) 271 { 272 if (*buf_count > 0) { 273 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); 274 275 memcpy(&buf[*buf_count], src, buf_added); 276 277 *buf_count += buf_added; 278 src += buf_added; 279 count -= buf_added; 280 } 281 282 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { 283 int blocks = count / GHASH_BLOCK_SIZE; 284 285 ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key, 286 *buf_count ? buf : NULL, 287 pmull_ghash_update_p64); 288 289 src += blocks * GHASH_BLOCK_SIZE; 290 count %= GHASH_BLOCK_SIZE; 291 *buf_count = 0; 292 } 293 294 if (count > 0) { 295 memcpy(buf, src, count); 296 *buf_count = count; 297 } 298 } 299 300 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 301 { 302 struct crypto_aead *aead = crypto_aead_reqtfm(req); 303 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 304 u8 buf[GHASH_BLOCK_SIZE]; 305 struct scatter_walk walk; 306 int buf_count = 0; 307 308 scatterwalk_start(&walk, req->src); 309 310 do { 311 u32 n = scatterwalk_clamp(&walk, len); 312 u8 *p; 313 314 if (!n) { 315 scatterwalk_start(&walk, sg_next(walk.sg)); 316 n = scatterwalk_clamp(&walk, len); 317 } 318 p = scatterwalk_map(&walk); 319 320 gcm_update_mac(dg, p, n, buf, &buf_count, ctx); 321 len -= n; 322 323 scatterwalk_unmap(p); 324 scatterwalk_advance(&walk, n); 325 scatterwalk_done(&walk, 0, len); 326 } while (len); 327 328 if (buf_count) { 329 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 330 ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL, 331 pmull_ghash_update_p64); 332 } 333 } 334 335 static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) 336 { 337 struct crypto_aead *aead = crypto_aead_reqtfm(req); 338 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 339 int nrounds = num_rounds(&ctx->aes_key); 340 struct skcipher_walk walk; 341 u8 buf[AES_BLOCK_SIZE]; 342 u64 dg[2] = {}; 343 be128 lengths; 344 u8 *tag; 345 int err; 346 347 lengths.a = cpu_to_be64(assoclen * 8); 348 lengths.b = cpu_to_be64(req->cryptlen * 8); 349 350 if (assoclen) 351 gcm_calculate_auth_mac(req, dg, assoclen); 352 353 put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 354 355 err = skcipher_walk_aead_encrypt(&walk, req, false); 356 357 do { 358 const u8 *src = walk.src.virt.addr; 359 u8 *dst = walk.dst.virt.addr; 360 int nbytes = walk.nbytes; 361 362 tag = (u8 *)&lengths; 363 364 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { 365 src = dst = memcpy(buf + sizeof(buf) - nbytes, 366 src, nbytes); 367 } else if (nbytes < walk.total) { 368 nbytes &= ~(AES_BLOCK_SIZE - 1); 369 tag = NULL; 370 } 371 372 kernel_neon_begin(); 373 pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, 374 dg, iv, ctx->aes_key.key_enc, nrounds, 375 tag); 376 kernel_neon_end(); 377 378 if (unlikely(!nbytes)) 379 break; 380 381 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) 382 memcpy(walk.dst.virt.addr, 383 buf + sizeof(buf) - nbytes, nbytes); 384 385 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 386 } while (walk.nbytes); 387 388 if (err) 389 return err; 390 391 /* copy authtag to end of dst */ 392 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, 393 crypto_aead_authsize(aead), 1); 394 395 return 0; 396 } 397 398 static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) 399 { 400 struct crypto_aead *aead = crypto_aead_reqtfm(req); 401 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 402 unsigned int authsize = crypto_aead_authsize(aead); 403 int nrounds = num_rounds(&ctx->aes_key); 404 struct skcipher_walk walk; 405 u8 otag[AES_BLOCK_SIZE]; 406 u8 buf[AES_BLOCK_SIZE]; 407 u64 dg[2] = {}; 408 be128 lengths; 409 u8 *tag; 410 int ret; 411 int err; 412 413 lengths.a = cpu_to_be64(assoclen * 8); 414 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 415 416 if (assoclen) 417 gcm_calculate_auth_mac(req, dg, assoclen); 418 419 put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 420 421 scatterwalk_map_and_copy(otag, req->src, 422 req->assoclen + req->cryptlen - authsize, 423 authsize, 0); 424 425 err = skcipher_walk_aead_decrypt(&walk, req, false); 426 427 do { 428 const u8 *src = walk.src.virt.addr; 429 u8 *dst = walk.dst.virt.addr; 430 int nbytes = walk.nbytes; 431 432 tag = (u8 *)&lengths; 433 434 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { 435 src = dst = memcpy(buf + sizeof(buf) - nbytes, 436 src, nbytes); 437 } else if (nbytes < walk.total) { 438 nbytes &= ~(AES_BLOCK_SIZE - 1); 439 tag = NULL; 440 } 441 442 kernel_neon_begin(); 443 ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, 444 dg, iv, ctx->aes_key.key_enc, 445 nrounds, tag, otag, authsize); 446 kernel_neon_end(); 447 448 if (unlikely(!nbytes)) 449 break; 450 451 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) 452 memcpy(walk.dst.virt.addr, 453 buf + sizeof(buf) - nbytes, nbytes); 454 455 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 456 } while (walk.nbytes); 457 458 if (err) 459 return err; 460 461 return ret ? -EBADMSG : 0; 462 } 463 464 static int gcm_aes_encrypt(struct aead_request *req) 465 { 466 u8 iv[AES_BLOCK_SIZE]; 467 468 memcpy(iv, req->iv, GCM_AES_IV_SIZE); 469 return gcm_encrypt(req, iv, req->assoclen); 470 } 471 472 static int gcm_aes_decrypt(struct aead_request *req) 473 { 474 u8 iv[AES_BLOCK_SIZE]; 475 476 memcpy(iv, req->iv, GCM_AES_IV_SIZE); 477 return gcm_decrypt(req, iv, req->assoclen); 478 } 479 480 static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 481 unsigned int keylen) 482 { 483 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 484 int err; 485 486 keylen -= RFC4106_NONCE_SIZE; 487 err = gcm_aes_setkey(tfm, inkey, keylen); 488 if (err) 489 return err; 490 491 memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 492 return 0; 493 } 494 495 static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 496 { 497 return crypto_rfc4106_check_authsize(authsize); 498 } 499 500 static int rfc4106_encrypt(struct aead_request *req) 501 { 502 struct crypto_aead *aead = crypto_aead_reqtfm(req); 503 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 504 u8 iv[AES_BLOCK_SIZE]; 505 506 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 507 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 508 509 return crypto_ipsec_check_assoclen(req->assoclen) ?: 510 gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 511 } 512 513 static int rfc4106_decrypt(struct aead_request *req) 514 { 515 struct crypto_aead *aead = crypto_aead_reqtfm(req); 516 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 517 u8 iv[AES_BLOCK_SIZE]; 518 519 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 520 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 521 522 return crypto_ipsec_check_assoclen(req->assoclen) ?: 523 gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 524 } 525 526 static struct aead_alg gcm_aes_algs[] = {{ 527 .ivsize = GCM_AES_IV_SIZE, 528 .chunksize = AES_BLOCK_SIZE, 529 .maxauthsize = AES_BLOCK_SIZE, 530 .setkey = gcm_aes_setkey, 531 .setauthsize = gcm_aes_setauthsize, 532 .encrypt = gcm_aes_encrypt, 533 .decrypt = gcm_aes_decrypt, 534 535 .base.cra_name = "gcm(aes)", 536 .base.cra_driver_name = "gcm-aes-ce", 537 .base.cra_priority = 300, 538 .base.cra_blocksize = 1, 539 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 540 4 * sizeof(u64[2]), 541 .base.cra_module = THIS_MODULE, 542 }, { 543 .ivsize = GCM_RFC4106_IV_SIZE, 544 .chunksize = AES_BLOCK_SIZE, 545 .maxauthsize = AES_BLOCK_SIZE, 546 .setkey = rfc4106_setkey, 547 .setauthsize = rfc4106_setauthsize, 548 .encrypt = rfc4106_encrypt, 549 .decrypt = rfc4106_decrypt, 550 551 .base.cra_name = "rfc4106(gcm(aes))", 552 .base.cra_driver_name = "rfc4106-gcm-aes-ce", 553 .base.cra_priority = 300, 554 .base.cra_blocksize = 1, 555 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 556 4 * sizeof(u64[2]), 557 .base.cra_module = THIS_MODULE, 558 }}; 559 560 static int __init ghash_ce_mod_init(void) 561 { 562 if (!cpu_have_named_feature(ASIMD)) 563 return -ENODEV; 564 565 if (cpu_have_named_feature(PMULL)) 566 return crypto_register_aeads(gcm_aes_algs, 567 ARRAY_SIZE(gcm_aes_algs)); 568 569 return crypto_register_shash(&ghash_alg); 570 } 571 572 static void __exit ghash_ce_mod_exit(void) 573 { 574 if (cpu_have_named_feature(PMULL)) 575 crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 576 else 577 crypto_unregister_shash(&ghash_alg); 578 } 579 580 static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = { 581 { cpu_feature(PMULL) }, { } 582 }; 583 MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); 584 585 module_init(ghash_ce_mod_init); 586 module_exit(ghash_ce_mod_exit); 587