1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Accelerated GHASH implementation with ARMv8 PMULL instructions. 4 * 5 * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/neon.h> 9 #include <asm/simd.h> 10 #include <linux/unaligned.h> 11 #include <crypto/aes.h> 12 #include <crypto/gcm.h> 13 #include <crypto/algapi.h> 14 #include <crypto/b128ops.h> 15 #include <crypto/gf128mul.h> 16 #include <crypto/internal/aead.h> 17 #include <crypto/internal/hash.h> 18 #include <crypto/internal/simd.h> 19 #include <crypto/internal/skcipher.h> 20 #include <crypto/scatterwalk.h> 21 #include <linux/cpufeature.h> 22 #include <linux/crypto.h> 23 #include <linux/module.h> 24 25 MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions"); 26 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 27 MODULE_LICENSE("GPL v2"); 28 MODULE_ALIAS_CRYPTO("ghash"); 29 30 #define GHASH_BLOCK_SIZE 16 31 #define GHASH_DIGEST_SIZE 16 32 33 #define RFC4106_NONCE_SIZE 4 34 35 struct ghash_key { 36 be128 k; 37 u64 h[][2]; 38 }; 39 40 struct ghash_desc_ctx { 41 u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)]; 42 u8 buf[GHASH_BLOCK_SIZE]; 43 u32 count; 44 }; 45 46 struct gcm_aes_ctx { 47 struct crypto_aes_ctx aes_key; 48 u8 nonce[RFC4106_NONCE_SIZE]; 49 struct ghash_key ghash_key; 50 }; 51 52 asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src, 53 u64 const h[][2], const char *head); 54 55 asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, 56 u64 const h[][2], const char *head); 57 58 asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], 59 u64 const h[][2], u64 dg[], u8 ctr[], 60 u32 const rk[], int rounds, u8 tag[]); 61 asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], 62 u64 const h[][2], u64 dg[], u8 ctr[], 63 u32 const rk[], int rounds, const u8 l[], 64 const u8 tag[], u64 authsize); 65 66 static int ghash_init(struct shash_desc *desc) 67 { 68 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 69 70 *ctx = (struct ghash_desc_ctx){}; 71 return 0; 72 } 73 74 static void ghash_do_update(int blocks, u64 dg[], const char *src, 75 struct ghash_key *key, const char *head) 76 { 77 be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) }; 78 79 do { 80 const u8 *in = src; 81 82 if (head) { 83 in = head; 84 blocks++; 85 head = NULL; 86 } else { 87 src += GHASH_BLOCK_SIZE; 88 } 89 90 crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE); 91 gf128mul_lle(&dst, &key->k); 92 } while (--blocks); 93 94 dg[0] = be64_to_cpu(dst.b); 95 dg[1] = be64_to_cpu(dst.a); 96 } 97 98 static __always_inline 99 void ghash_do_simd_update(int blocks, u64 dg[], const char *src, 100 struct ghash_key *key, const char *head, 101 void (*simd_update)(int blocks, u64 dg[], 102 const char *src, 103 u64 const h[][2], 104 const char *head)) 105 { 106 if (likely(crypto_simd_usable())) { 107 kernel_neon_begin(); 108 simd_update(blocks, dg, src, key->h, head); 109 kernel_neon_end(); 110 } else { 111 ghash_do_update(blocks, dg, src, key, head); 112 } 113 } 114 115 /* avoid hogging the CPU for too long */ 116 #define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE) 117 118 static int ghash_update(struct shash_desc *desc, const u8 *src, 119 unsigned int len) 120 { 121 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 122 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 123 124 ctx->count += len; 125 126 if ((partial + len) >= GHASH_BLOCK_SIZE) { 127 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 128 int blocks; 129 130 if (partial) { 131 int p = GHASH_BLOCK_SIZE - partial; 132 133 memcpy(ctx->buf + partial, src, p); 134 src += p; 135 len -= p; 136 } 137 138 blocks = len / GHASH_BLOCK_SIZE; 139 len %= GHASH_BLOCK_SIZE; 140 141 do { 142 int chunk = min(blocks, MAX_BLOCKS); 143 144 ghash_do_simd_update(chunk, ctx->digest, src, key, 145 partial ? ctx->buf : NULL, 146 pmull_ghash_update_p8); 147 148 blocks -= chunk; 149 src += chunk * GHASH_BLOCK_SIZE; 150 partial = 0; 151 } while (unlikely(blocks > 0)); 152 } 153 if (len) 154 memcpy(ctx->buf + partial, src, len); 155 return 0; 156 } 157 158 static int ghash_final(struct shash_desc *desc, u8 *dst) 159 { 160 struct ghash_desc_ctx *ctx = shash_desc_ctx(desc); 161 unsigned int partial = ctx->count % GHASH_BLOCK_SIZE; 162 163 if (partial) { 164 struct ghash_key *key = crypto_shash_ctx(desc->tfm); 165 166 memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial); 167 168 ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL, 169 pmull_ghash_update_p8); 170 } 171 put_unaligned_be64(ctx->digest[1], dst); 172 put_unaligned_be64(ctx->digest[0], dst + 8); 173 174 memzero_explicit(ctx, sizeof(*ctx)); 175 return 0; 176 } 177 178 static void ghash_reflect(u64 h[], const be128 *k) 179 { 180 u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0; 181 182 h[0] = (be64_to_cpu(k->b) << 1) | carry; 183 h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63); 184 185 if (carry) 186 h[1] ^= 0xc200000000000000UL; 187 } 188 189 static int ghash_setkey(struct crypto_shash *tfm, 190 const u8 *inkey, unsigned int keylen) 191 { 192 struct ghash_key *key = crypto_shash_ctx(tfm); 193 194 if (keylen != GHASH_BLOCK_SIZE) 195 return -EINVAL; 196 197 /* needed for the fallback */ 198 memcpy(&key->k, inkey, GHASH_BLOCK_SIZE); 199 200 ghash_reflect(key->h[0], &key->k); 201 return 0; 202 } 203 204 static struct shash_alg ghash_alg = { 205 .base.cra_name = "ghash", 206 .base.cra_driver_name = "ghash-neon", 207 .base.cra_priority = 150, 208 .base.cra_blocksize = GHASH_BLOCK_SIZE, 209 .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]), 210 .base.cra_module = THIS_MODULE, 211 212 .digestsize = GHASH_DIGEST_SIZE, 213 .init = ghash_init, 214 .update = ghash_update, 215 .final = ghash_final, 216 .setkey = ghash_setkey, 217 .descsize = sizeof(struct ghash_desc_ctx), 218 }; 219 220 static int num_rounds(struct crypto_aes_ctx *ctx) 221 { 222 /* 223 * # of rounds specified by AES: 224 * 128 bit key 10 rounds 225 * 192 bit key 12 rounds 226 * 256 bit key 14 rounds 227 * => n byte key => 6 + (n/4) rounds 228 */ 229 return 6 + ctx->key_length / 4; 230 } 231 232 static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *inkey, 233 unsigned int keylen) 234 { 235 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 236 u8 key[GHASH_BLOCK_SIZE]; 237 be128 h; 238 int ret; 239 240 ret = aes_expandkey(&ctx->aes_key, inkey, keylen); 241 if (ret) 242 return -EINVAL; 243 244 aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){}); 245 246 /* needed for the fallback */ 247 memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE); 248 249 ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k); 250 251 h = ctx->ghash_key.k; 252 gf128mul_lle(&h, &ctx->ghash_key.k); 253 ghash_reflect(ctx->ghash_key.h[1], &h); 254 255 gf128mul_lle(&h, &ctx->ghash_key.k); 256 ghash_reflect(ctx->ghash_key.h[2], &h); 257 258 gf128mul_lle(&h, &ctx->ghash_key.k); 259 ghash_reflect(ctx->ghash_key.h[3], &h); 260 261 return 0; 262 } 263 264 static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 265 { 266 return crypto_gcm_check_authsize(authsize); 267 } 268 269 static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[], 270 int *buf_count, struct gcm_aes_ctx *ctx) 271 { 272 if (*buf_count > 0) { 273 int buf_added = min(count, GHASH_BLOCK_SIZE - *buf_count); 274 275 memcpy(&buf[*buf_count], src, buf_added); 276 277 *buf_count += buf_added; 278 src += buf_added; 279 count -= buf_added; 280 } 281 282 if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) { 283 int blocks = count / GHASH_BLOCK_SIZE; 284 285 ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key, 286 *buf_count ? buf : NULL, 287 pmull_ghash_update_p64); 288 289 src += blocks * GHASH_BLOCK_SIZE; 290 count %= GHASH_BLOCK_SIZE; 291 *buf_count = 0; 292 } 293 294 if (count > 0) { 295 memcpy(buf, src, count); 296 *buf_count = count; 297 } 298 } 299 300 static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[], u32 len) 301 { 302 struct crypto_aead *aead = crypto_aead_reqtfm(req); 303 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 304 u8 buf[GHASH_BLOCK_SIZE]; 305 struct scatter_walk walk; 306 int buf_count = 0; 307 308 scatterwalk_start(&walk, req->src); 309 310 do { 311 unsigned int n; 312 313 n = scatterwalk_next(&walk, len); 314 gcm_update_mac(dg, walk.addr, n, buf, &buf_count, ctx); 315 scatterwalk_done_src(&walk, n); 316 len -= n; 317 } while (len); 318 319 if (buf_count) { 320 memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count); 321 ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL, 322 pmull_ghash_update_p64); 323 } 324 } 325 326 static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen) 327 { 328 struct crypto_aead *aead = crypto_aead_reqtfm(req); 329 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 330 int nrounds = num_rounds(&ctx->aes_key); 331 struct skcipher_walk walk; 332 u8 buf[AES_BLOCK_SIZE]; 333 u64 dg[2] = {}; 334 be128 lengths; 335 u8 *tag; 336 int err; 337 338 lengths.a = cpu_to_be64(assoclen * 8); 339 lengths.b = cpu_to_be64(req->cryptlen * 8); 340 341 if (assoclen) 342 gcm_calculate_auth_mac(req, dg, assoclen); 343 344 put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 345 346 err = skcipher_walk_aead_encrypt(&walk, req, false); 347 348 do { 349 const u8 *src = walk.src.virt.addr; 350 u8 *dst = walk.dst.virt.addr; 351 int nbytes = walk.nbytes; 352 353 tag = (u8 *)&lengths; 354 355 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { 356 src = dst = memcpy(buf + sizeof(buf) - nbytes, 357 src, nbytes); 358 } else if (nbytes < walk.total) { 359 nbytes &= ~(AES_BLOCK_SIZE - 1); 360 tag = NULL; 361 } 362 363 kernel_neon_begin(); 364 pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h, 365 dg, iv, ctx->aes_key.key_enc, nrounds, 366 tag); 367 kernel_neon_end(); 368 369 if (unlikely(!nbytes)) 370 break; 371 372 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) 373 memcpy(walk.dst.virt.addr, 374 buf + sizeof(buf) - nbytes, nbytes); 375 376 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 377 } while (walk.nbytes); 378 379 if (err) 380 return err; 381 382 /* copy authtag to end of dst */ 383 scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen, 384 crypto_aead_authsize(aead), 1); 385 386 return 0; 387 } 388 389 static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen) 390 { 391 struct crypto_aead *aead = crypto_aead_reqtfm(req); 392 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 393 unsigned int authsize = crypto_aead_authsize(aead); 394 int nrounds = num_rounds(&ctx->aes_key); 395 struct skcipher_walk walk; 396 u8 otag[AES_BLOCK_SIZE]; 397 u8 buf[AES_BLOCK_SIZE]; 398 u64 dg[2] = {}; 399 be128 lengths; 400 u8 *tag; 401 int ret; 402 int err; 403 404 lengths.a = cpu_to_be64(assoclen * 8); 405 lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8); 406 407 if (assoclen) 408 gcm_calculate_auth_mac(req, dg, assoclen); 409 410 put_unaligned_be32(2, iv + GCM_AES_IV_SIZE); 411 412 scatterwalk_map_and_copy(otag, req->src, 413 req->assoclen + req->cryptlen - authsize, 414 authsize, 0); 415 416 err = skcipher_walk_aead_decrypt(&walk, req, false); 417 418 do { 419 const u8 *src = walk.src.virt.addr; 420 u8 *dst = walk.dst.virt.addr; 421 int nbytes = walk.nbytes; 422 423 tag = (u8 *)&lengths; 424 425 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) { 426 src = dst = memcpy(buf + sizeof(buf) - nbytes, 427 src, nbytes); 428 } else if (nbytes < walk.total) { 429 nbytes &= ~(AES_BLOCK_SIZE - 1); 430 tag = NULL; 431 } 432 433 kernel_neon_begin(); 434 ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, 435 dg, iv, ctx->aes_key.key_enc, 436 nrounds, tag, otag, authsize); 437 kernel_neon_end(); 438 439 if (unlikely(!nbytes)) 440 break; 441 442 if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) 443 memcpy(walk.dst.virt.addr, 444 buf + sizeof(buf) - nbytes, nbytes); 445 446 err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 447 } while (walk.nbytes); 448 449 if (err) 450 return err; 451 452 return ret ? -EBADMSG : 0; 453 } 454 455 static int gcm_aes_encrypt(struct aead_request *req) 456 { 457 u8 iv[AES_BLOCK_SIZE]; 458 459 memcpy(iv, req->iv, GCM_AES_IV_SIZE); 460 return gcm_encrypt(req, iv, req->assoclen); 461 } 462 463 static int gcm_aes_decrypt(struct aead_request *req) 464 { 465 u8 iv[AES_BLOCK_SIZE]; 466 467 memcpy(iv, req->iv, GCM_AES_IV_SIZE); 468 return gcm_decrypt(req, iv, req->assoclen); 469 } 470 471 static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey, 472 unsigned int keylen) 473 { 474 struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm); 475 int err; 476 477 keylen -= RFC4106_NONCE_SIZE; 478 err = gcm_aes_setkey(tfm, inkey, keylen); 479 if (err) 480 return err; 481 482 memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE); 483 return 0; 484 } 485 486 static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize) 487 { 488 return crypto_rfc4106_check_authsize(authsize); 489 } 490 491 static int rfc4106_encrypt(struct aead_request *req) 492 { 493 struct crypto_aead *aead = crypto_aead_reqtfm(req); 494 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 495 u8 iv[AES_BLOCK_SIZE]; 496 497 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 498 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 499 500 return crypto_ipsec_check_assoclen(req->assoclen) ?: 501 gcm_encrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 502 } 503 504 static int rfc4106_decrypt(struct aead_request *req) 505 { 506 struct crypto_aead *aead = crypto_aead_reqtfm(req); 507 struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead); 508 u8 iv[AES_BLOCK_SIZE]; 509 510 memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE); 511 memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE); 512 513 return crypto_ipsec_check_assoclen(req->assoclen) ?: 514 gcm_decrypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE); 515 } 516 517 static struct aead_alg gcm_aes_algs[] = {{ 518 .ivsize = GCM_AES_IV_SIZE, 519 .chunksize = AES_BLOCK_SIZE, 520 .maxauthsize = AES_BLOCK_SIZE, 521 .setkey = gcm_aes_setkey, 522 .setauthsize = gcm_aes_setauthsize, 523 .encrypt = gcm_aes_encrypt, 524 .decrypt = gcm_aes_decrypt, 525 526 .base.cra_name = "gcm(aes)", 527 .base.cra_driver_name = "gcm-aes-ce", 528 .base.cra_priority = 300, 529 .base.cra_blocksize = 1, 530 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 531 4 * sizeof(u64[2]), 532 .base.cra_module = THIS_MODULE, 533 }, { 534 .ivsize = GCM_RFC4106_IV_SIZE, 535 .chunksize = AES_BLOCK_SIZE, 536 .maxauthsize = AES_BLOCK_SIZE, 537 .setkey = rfc4106_setkey, 538 .setauthsize = rfc4106_setauthsize, 539 .encrypt = rfc4106_encrypt, 540 .decrypt = rfc4106_decrypt, 541 542 .base.cra_name = "rfc4106(gcm(aes))", 543 .base.cra_driver_name = "rfc4106-gcm-aes-ce", 544 .base.cra_priority = 300, 545 .base.cra_blocksize = 1, 546 .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) + 547 4 * sizeof(u64[2]), 548 .base.cra_module = THIS_MODULE, 549 }}; 550 551 static int __init ghash_ce_mod_init(void) 552 { 553 if (!cpu_have_named_feature(ASIMD)) 554 return -ENODEV; 555 556 if (cpu_have_named_feature(PMULL)) 557 return crypto_register_aeads(gcm_aes_algs, 558 ARRAY_SIZE(gcm_aes_algs)); 559 560 return crypto_register_shash(&ghash_alg); 561 } 562 563 static void __exit ghash_ce_mod_exit(void) 564 { 565 if (cpu_have_named_feature(PMULL)) 566 crypto_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs)); 567 else 568 crypto_unregister_shash(&ghash_alg); 569 } 570 571 static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = { 572 { cpu_feature(PMULL) }, { } 573 }; 574 MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); 575 576 module_init(ghash_ce_mod_init); 577 module_exit(ghash_ce_mod_exit); 578