19a3444d9SMark Johnston /* 29a3444d9SMark Johnston * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved. 39a3444d9SMark Johnston * Copyright (c) 2021, Intel Corporation. All Rights Reserved. 49a3444d9SMark Johnston * 59a3444d9SMark Johnston * Licensed under the Apache License 2.0 (the "License"). You may not use 69a3444d9SMark Johnston * this file except in compliance with the License. You can obtain a copy 79a3444d9SMark Johnston * in the file LICENSE in the source distribution or at 89a3444d9SMark Johnston * https://www.openssl.org/source/license.html 99a3444d9SMark Johnston */ 109a3444d9SMark Johnston 119a3444d9SMark Johnston /* 12*9b1d8728SMark Johnston * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using 13*9b1d8728SMark Johnston * AES-NI and VAES extensions respectively. These were ported from 14*9b1d8728SMark Johnston * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc. The 15*9b1d8728SMark Johnston * AES-NI implementation makes use of a generic C implementation for partial 16*9b1d8728SMark Johnston * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined. 179a3444d9SMark Johnston */ 189a3444d9SMark Johnston 199a3444d9SMark Johnston #include <sys/endian.h> 209a3444d9SMark Johnston #include <sys/systm.h> 219a3444d9SMark Johnston 229a3444d9SMark Johnston #include <crypto/openssl/ossl.h> 239a3444d9SMark Johnston #include <crypto/openssl/ossl_aes_gcm.h> 249a3444d9SMark Johnston #include <crypto/openssl/ossl_cipher.h> 259a3444d9SMark Johnston 269a3444d9SMark Johnston #include <opencrypto/cryptodev.h> 279a3444d9SMark Johnston 289a3444d9SMark Johnston _Static_assert( 299a3444d9SMark Johnston sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context), 309a3444d9SMark Johnston "ossl_gcm_context too large"); 319a3444d9SMark Johnston 329a3444d9SMark Johnston void aesni_set_encrypt_key(const void *key, int bits, void *ctx); 339a3444d9SMark Johnston 349a3444d9SMark Johnston static void 359a3444d9SMark Johnston gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen) 369a3444d9SMark Johnston { 379a3444d9SMark Johnston KASSERT(keylen == 128 || keylen == 192 || keylen == 256, 389a3444d9SMark Johnston ("%s: invalid key length %zu", __func__, keylen)); 399a3444d9SMark Johnston 409a3444d9SMark Johnston memset(&ctx->gcm, 0, sizeof(ctx->gcm)); 419a3444d9SMark Johnston memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks)); 429a3444d9SMark Johnston aesni_set_encrypt_key(key, keylen, &ctx->aes_ks); 439a3444d9SMark Johnston ctx->ops->init(ctx, key, keylen); 449a3444d9SMark Johnston } 459a3444d9SMark Johnston 469a3444d9SMark Johnston static void 479a3444d9SMark Johnston gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len) 489a3444d9SMark Johnston { 499a3444d9SMark Johnston (void)ctx->ops->finish(ctx, NULL, 0); 509a3444d9SMark Johnston memcpy(tag, ctx->gcm.Xi.c, len); 519a3444d9SMark Johnston } 529a3444d9SMark Johnston 539a3444d9SMark Johnston void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx); 549a3444d9SMark Johnston void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx); 559a3444d9SMark Johnston void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx, 569a3444d9SMark Johnston const unsigned char *iv, size_t ivlen); 579a3444d9SMark Johnston void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad, 589a3444d9SMark Johnston size_t len); 599a3444d9SMark Johnston void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx, 609a3444d9SMark Johnston unsigned int *pblocklen, const unsigned char *in, size_t len, 619a3444d9SMark Johnston unsigned char *out); 629a3444d9SMark Johnston void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx, 639a3444d9SMark Johnston unsigned int *pblocklen, const unsigned char *in, size_t len, 649a3444d9SMark Johnston unsigned char *out); 659a3444d9SMark Johnston void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen); 669a3444d9SMark Johnston 679a3444d9SMark Johnston static void 689a3444d9SMark Johnston gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen) 699a3444d9SMark Johnston { 709a3444d9SMark Johnston ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm); 719a3444d9SMark Johnston } 729a3444d9SMark Johnston 739a3444d9SMark Johnston static void 749a3444d9SMark Johnston gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv, 759a3444d9SMark Johnston size_t len) 769a3444d9SMark Johnston { 779a3444d9SMark Johnston KASSERT(len == AES_GCM_IV_LEN, 789a3444d9SMark Johnston ("%s: invalid IV length %zu", __func__, len)); 799a3444d9SMark Johnston 809a3444d9SMark Johnston ctx->gcm.Yi.u[0] = 0; /* Current counter */ 819a3444d9SMark Johnston ctx->gcm.Yi.u[1] = 0; 829a3444d9SMark Johnston ctx->gcm.Xi.u[0] = 0; /* AAD hash */ 839a3444d9SMark Johnston ctx->gcm.Xi.u[1] = 0; 849a3444d9SMark Johnston ctx->gcm.len.u[0] = 0; /* AAD length */ 859a3444d9SMark Johnston ctx->gcm.len.u[1] = 0; /* Message length */ 869a3444d9SMark Johnston ctx->gcm.ares = 0; 879a3444d9SMark Johnston ctx->gcm.mres = 0; 889a3444d9SMark Johnston 899a3444d9SMark Johnston ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len); 909a3444d9SMark Johnston } 919a3444d9SMark Johnston 929a3444d9SMark Johnston static int 939a3444d9SMark Johnston gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad, 949a3444d9SMark Johnston size_t len) 959a3444d9SMark Johnston { 969a3444d9SMark Johnston uint64_t alen = ctx->gcm.len.u[0]; 979a3444d9SMark Johnston size_t lenblks; 989a3444d9SMark Johnston unsigned int ares; 999a3444d9SMark Johnston 1009a3444d9SMark Johnston /* Bad sequence: call of AAD update after message processing */ 1019a3444d9SMark Johnston if (ctx->gcm.len.u[1]) 1029a3444d9SMark Johnston return -2; 1039a3444d9SMark Johnston 1049a3444d9SMark Johnston alen += len; 1059a3444d9SMark Johnston /* AAD is limited by 2^64 bits, thus 2^61 bytes */ 1069a3444d9SMark Johnston if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len)) 1079a3444d9SMark Johnston return -1; 1089a3444d9SMark Johnston ctx->gcm.len.u[0] = alen; 1099a3444d9SMark Johnston 1109a3444d9SMark Johnston ares = ctx->gcm.ares; 1119a3444d9SMark Johnston /* Partial AAD block left from previous AAD update calls */ 1129a3444d9SMark Johnston if (ares > 0) { 1139a3444d9SMark Johnston /* 1149a3444d9SMark Johnston * Fill partial block buffer till full block 1159a3444d9SMark Johnston * (note, the hash is stored reflected) 1169a3444d9SMark Johnston */ 1179a3444d9SMark Johnston while (ares > 0 && len > 0) { 1189a3444d9SMark Johnston ctx->gcm.Xi.c[15 - ares] ^= *(aad++); 1199a3444d9SMark Johnston --len; 1209a3444d9SMark Johnston ares = (ares + 1) % AES_BLOCK_LEN; 1219a3444d9SMark Johnston } 1229a3444d9SMark Johnston /* Full block gathered */ 1239a3444d9SMark Johnston if (ares == 0) { 1249a3444d9SMark Johnston ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); 1259a3444d9SMark Johnston } else { /* no more AAD */ 1269a3444d9SMark Johnston ctx->gcm.ares = ares; 1279a3444d9SMark Johnston return 0; 1289a3444d9SMark Johnston } 1299a3444d9SMark Johnston } 1309a3444d9SMark Johnston 1319a3444d9SMark Johnston /* Bulk AAD processing */ 1329a3444d9SMark Johnston lenblks = len & ((size_t)(-AES_BLOCK_LEN)); 1339a3444d9SMark Johnston if (lenblks > 0) { 1349a3444d9SMark Johnston ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks); 1359a3444d9SMark Johnston aad += lenblks; 1369a3444d9SMark Johnston len -= lenblks; 1379a3444d9SMark Johnston } 1389a3444d9SMark Johnston 1399a3444d9SMark Johnston /* Add remaining AAD to the hash (note, the hash is stored reflected) */ 1409a3444d9SMark Johnston if (len > 0) { 1419a3444d9SMark Johnston ares = (unsigned int)len; 1429a3444d9SMark Johnston for (size_t i = 0; i < len; ++i) 1439a3444d9SMark Johnston ctx->gcm.Xi.c[15 - i] ^= aad[i]; 1449a3444d9SMark Johnston } 1459a3444d9SMark Johnston 1469a3444d9SMark Johnston ctx->gcm.ares = ares; 1479a3444d9SMark Johnston 1489a3444d9SMark Johnston return 0; 1499a3444d9SMark Johnston } 1509a3444d9SMark Johnston 1519a3444d9SMark Johnston static int 1529a3444d9SMark Johnston _gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, 1539a3444d9SMark Johnston unsigned char *out, size_t len, bool encrypt) 1549a3444d9SMark Johnston { 1559a3444d9SMark Johnston uint64_t mlen = ctx->gcm.len.u[1]; 1569a3444d9SMark Johnston 1579a3444d9SMark Johnston mlen += len; 1589a3444d9SMark Johnston if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 1599a3444d9SMark Johnston return -1; 1609a3444d9SMark Johnston 1619a3444d9SMark Johnston ctx->gcm.len.u[1] = mlen; 1629a3444d9SMark Johnston 1639a3444d9SMark Johnston /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */ 1649a3444d9SMark Johnston if (ctx->gcm.ares > 0) { 1659a3444d9SMark Johnston ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); 1669a3444d9SMark Johnston ctx->gcm.ares = 0; 1679a3444d9SMark Johnston } 1689a3444d9SMark Johnston 1699a3444d9SMark Johnston if (encrypt) { 1709a3444d9SMark Johnston ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, 1719a3444d9SMark Johnston in, len, out); 1729a3444d9SMark Johnston } else { 1739a3444d9SMark Johnston ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, 1749a3444d9SMark Johnston in, len, out); 1759a3444d9SMark Johnston } 1769a3444d9SMark Johnston 1779a3444d9SMark Johnston return 0; 1789a3444d9SMark Johnston } 1799a3444d9SMark Johnston 1809a3444d9SMark Johnston static int 1819a3444d9SMark Johnston gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, 1829a3444d9SMark Johnston unsigned char *out, size_t len) 1839a3444d9SMark Johnston { 1849a3444d9SMark Johnston return _gcm_encrypt_avx512(ctx, in, out, len, true); 1859a3444d9SMark Johnston } 1869a3444d9SMark Johnston 1879a3444d9SMark Johnston static int 1889a3444d9SMark Johnston gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, 1899a3444d9SMark Johnston unsigned char *out, size_t len) 1909a3444d9SMark Johnston { 1919a3444d9SMark Johnston return _gcm_encrypt_avx512(ctx, in, out, len, false); 1929a3444d9SMark Johnston } 1939a3444d9SMark Johnston 1949a3444d9SMark Johnston static int 1959a3444d9SMark Johnston gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag, 1969a3444d9SMark Johnston size_t len) 1979a3444d9SMark Johnston { 1989a3444d9SMark Johnston unsigned int *res = &ctx->gcm.mres; 1999a3444d9SMark Johnston 2009a3444d9SMark Johnston /* Finalize AAD processing */ 2019a3444d9SMark Johnston if (ctx->gcm.ares > 0) 2029a3444d9SMark Johnston res = &ctx->gcm.ares; 2039a3444d9SMark Johnston 2049a3444d9SMark Johnston ossl_aes_gcm_finalize_avx512(ctx, *res); 2059a3444d9SMark Johnston 2069a3444d9SMark Johnston ctx->gcm.ares = ctx->gcm.mres = 0; 2079a3444d9SMark Johnston 2089a3444d9SMark Johnston if (tag != NULL) 2099a3444d9SMark Johnston return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len); 2109a3444d9SMark Johnston return 0; 2119a3444d9SMark Johnston } 2129a3444d9SMark Johnston 2139a3444d9SMark Johnston static const struct ossl_aes_gcm_ops gcm_ops_avx512 = { 2149a3444d9SMark Johnston .init = gcm_init_avx512, 2159a3444d9SMark Johnston .setiv = gcm_setiv_avx512, 2169a3444d9SMark Johnston .aad = gcm_aad_avx512, 2179a3444d9SMark Johnston .encrypt = gcm_encrypt_avx512, 2189a3444d9SMark Johnston .decrypt = gcm_decrypt_avx512, 2199a3444d9SMark Johnston .finish = gcm_finish_avx512, 2209a3444d9SMark Johnston .tag = gcm_tag, 2219a3444d9SMark Johnston }; 2229a3444d9SMark Johnston 223*9b1d8728SMark Johnston size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, 224*9b1d8728SMark Johnston const void *key, unsigned char ivec[16], uint64_t *Xi); 225*9b1d8728SMark Johnston size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, 226*9b1d8728SMark Johnston const void *key, unsigned char ivec[16], uint64_t *Xi); 227*9b1d8728SMark Johnston void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks); 228*9b1d8728SMark Johnston void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, 229*9b1d8728SMark Johnston size_t blocks, void *ks, const unsigned char *iv); 230*9b1d8728SMark Johnston 231*9b1d8728SMark Johnston void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]); 232*9b1d8728SMark Johnston void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]); 233*9b1d8728SMark Johnston void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in, 234*9b1d8728SMark Johnston size_t len); 235*9b1d8728SMark Johnston 236*9b1d8728SMark Johnston static void 237*9b1d8728SMark Johnston gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen) 238*9b1d8728SMark Johnston { 239*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks); 240*9b1d8728SMark Johnston 241*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 242*9b1d8728SMark Johnston ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]); 243*9b1d8728SMark Johnston ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]); 244*9b1d8728SMark Johnston #endif 245*9b1d8728SMark Johnston 246*9b1d8728SMark Johnston gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u); 247*9b1d8728SMark Johnston } 248*9b1d8728SMark Johnston 249*9b1d8728SMark Johnston static void 250*9b1d8728SMark Johnston gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv, 251*9b1d8728SMark Johnston size_t len) 252*9b1d8728SMark Johnston { 253*9b1d8728SMark Johnston uint32_t ctr; 254*9b1d8728SMark Johnston 255*9b1d8728SMark Johnston KASSERT(len == AES_GCM_IV_LEN, 256*9b1d8728SMark Johnston ("%s: invalid IV length %zu", __func__, len)); 257*9b1d8728SMark Johnston 258*9b1d8728SMark Johnston ctx->gcm.len.u[0] = 0; 259*9b1d8728SMark Johnston ctx->gcm.len.u[1] = 0; 260*9b1d8728SMark Johnston ctx->gcm.ares = ctx->gcm.mres = 0; 261*9b1d8728SMark Johnston 262*9b1d8728SMark Johnston memcpy(ctx->gcm.Yi.c, iv, len); 263*9b1d8728SMark Johnston ctx->gcm.Yi.c[12] = 0; 264*9b1d8728SMark Johnston ctx->gcm.Yi.c[13] = 0; 265*9b1d8728SMark Johnston ctx->gcm.Yi.c[14] = 0; 266*9b1d8728SMark Johnston ctx->gcm.Yi.c[15] = 1; 267*9b1d8728SMark Johnston ctr = 1; 268*9b1d8728SMark Johnston 269*9b1d8728SMark Johnston ctx->gcm.Xi.u[0] = 0; 270*9b1d8728SMark Johnston ctx->gcm.Xi.u[1] = 0; 271*9b1d8728SMark Johnston 272*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks); 273*9b1d8728SMark Johnston ctr++; 274*9b1d8728SMark Johnston 275*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 276*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 277*9b1d8728SMark Johnston #else 278*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 279*9b1d8728SMark Johnston #endif 280*9b1d8728SMark Johnston } 281*9b1d8728SMark Johnston 282*9b1d8728SMark Johnston static int 283*9b1d8728SMark Johnston gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad, 284*9b1d8728SMark Johnston size_t len) 285*9b1d8728SMark Johnston { 286*9b1d8728SMark Johnston size_t i; 287*9b1d8728SMark Johnston unsigned int n; 288*9b1d8728SMark Johnston uint64_t alen = ctx->gcm.len.u[0]; 289*9b1d8728SMark Johnston 290*9b1d8728SMark Johnston if (ctx->gcm.len.u[1]) 291*9b1d8728SMark Johnston return -2; 292*9b1d8728SMark Johnston 293*9b1d8728SMark Johnston alen += len; 294*9b1d8728SMark Johnston if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len)) 295*9b1d8728SMark Johnston return -1; 296*9b1d8728SMark Johnston ctx->gcm.len.u[0] = alen; 297*9b1d8728SMark Johnston 298*9b1d8728SMark Johnston n = ctx->gcm.ares; 299*9b1d8728SMark Johnston if (n) { 300*9b1d8728SMark Johnston while (n && len) { 301*9b1d8728SMark Johnston ctx->gcm.Xi.c[n] ^= *(aad++); 302*9b1d8728SMark Johnston --len; 303*9b1d8728SMark Johnston n = (n + 1) % 16; 304*9b1d8728SMark Johnston } 305*9b1d8728SMark Johnston if (n == 0) 306*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 307*9b1d8728SMark Johnston else { 308*9b1d8728SMark Johnston ctx->gcm.ares = n; 309*9b1d8728SMark Johnston return 0; 310*9b1d8728SMark Johnston } 311*9b1d8728SMark Johnston } 312*9b1d8728SMark Johnston if ((i = (len & (size_t)-AES_BLOCK_LEN))) { 313*9b1d8728SMark Johnston gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i); 314*9b1d8728SMark Johnston aad += i; 315*9b1d8728SMark Johnston len -= i; 316*9b1d8728SMark Johnston } 317*9b1d8728SMark Johnston if (len) { 318*9b1d8728SMark Johnston n = (unsigned int)len; 319*9b1d8728SMark Johnston for (i = 0; i < len; ++i) 320*9b1d8728SMark Johnston ctx->gcm.Xi.c[i] ^= aad[i]; 321*9b1d8728SMark Johnston } 322*9b1d8728SMark Johnston 323*9b1d8728SMark Johnston ctx->gcm.ares = n; 324*9b1d8728SMark Johnston return 0; 325*9b1d8728SMark Johnston } 326*9b1d8728SMark Johnston 327*9b1d8728SMark Johnston static int 328*9b1d8728SMark Johnston gcm_encrypt(struct ossl_gcm_context *ctx, const unsigned char *in, 329*9b1d8728SMark Johnston unsigned char *out, size_t len) 330*9b1d8728SMark Johnston { 331*9b1d8728SMark Johnston unsigned int n, ctr, mres; 332*9b1d8728SMark Johnston size_t i; 333*9b1d8728SMark Johnston uint64_t mlen = ctx->gcm.len.u[1]; 334*9b1d8728SMark Johnston 335*9b1d8728SMark Johnston mlen += len; 336*9b1d8728SMark Johnston if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 337*9b1d8728SMark Johnston return -1; 338*9b1d8728SMark Johnston ctx->gcm.len.u[1] = mlen; 339*9b1d8728SMark Johnston 340*9b1d8728SMark Johnston mres = ctx->gcm.mres; 341*9b1d8728SMark Johnston 342*9b1d8728SMark Johnston if (ctx->gcm.ares) { 343*9b1d8728SMark Johnston /* First call to encrypt finalizes GHASH(AAD) */ 344*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 345*9b1d8728SMark Johnston ctx->gcm.ares = 0; 346*9b1d8728SMark Johnston } 347*9b1d8728SMark Johnston 348*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 349*9b1d8728SMark Johnston ctr = bswap32(ctx->gcm.Yi.d[3]); 350*9b1d8728SMark Johnston #else 351*9b1d8728SMark Johnston ctr = ctx->gcm.Yi.d[3]; 352*9b1d8728SMark Johnston #endif 353*9b1d8728SMark Johnston 354*9b1d8728SMark Johnston n = mres % 16; 355*9b1d8728SMark Johnston for (i = 0; i < len; ++i) { 356*9b1d8728SMark Johnston if (n == 0) { 357*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, 358*9b1d8728SMark Johnston &ctx->aes_ks); 359*9b1d8728SMark Johnston ++ctr; 360*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 361*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 362*9b1d8728SMark Johnston #else 363*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 364*9b1d8728SMark Johnston #endif 365*9b1d8728SMark Johnston } 366*9b1d8728SMark Johnston ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n]; 367*9b1d8728SMark Johnston mres = n = (n + 1) % 16; 368*9b1d8728SMark Johnston if (n == 0) 369*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 370*9b1d8728SMark Johnston } 371*9b1d8728SMark Johnston 372*9b1d8728SMark Johnston ctx->gcm.mres = mres; 373*9b1d8728SMark Johnston return 0; 374*9b1d8728SMark Johnston } 375*9b1d8728SMark Johnston 376*9b1d8728SMark Johnston static int 377*9b1d8728SMark Johnston gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in, 378*9b1d8728SMark Johnston unsigned char *out, size_t len) 379*9b1d8728SMark Johnston { 380*9b1d8728SMark Johnston unsigned int n, ctr, mres; 381*9b1d8728SMark Johnston size_t i; 382*9b1d8728SMark Johnston uint64_t mlen = ctx->gcm.len.u[1]; 383*9b1d8728SMark Johnston 384*9b1d8728SMark Johnston mlen += len; 385*9b1d8728SMark Johnston if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 386*9b1d8728SMark Johnston return -1; 387*9b1d8728SMark Johnston ctx->gcm.len.u[1] = mlen; 388*9b1d8728SMark Johnston 389*9b1d8728SMark Johnston mres = ctx->gcm.mres; 390*9b1d8728SMark Johnston 391*9b1d8728SMark Johnston if (ctx->gcm.ares) { 392*9b1d8728SMark Johnston /* First call to encrypt finalizes GHASH(AAD) */ 393*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 394*9b1d8728SMark Johnston ctx->gcm.ares = 0; 395*9b1d8728SMark Johnston } 396*9b1d8728SMark Johnston 397*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 398*9b1d8728SMark Johnston ctr = bswap32(ctx->gcm.Yi.d[3]); 399*9b1d8728SMark Johnston #else 400*9b1d8728SMark Johnston ctr = ctx->gcm.Yi.d[3]; 401*9b1d8728SMark Johnston #endif 402*9b1d8728SMark Johnston 403*9b1d8728SMark Johnston n = mres % 16; 404*9b1d8728SMark Johnston if (n) { 405*9b1d8728SMark Johnston while (n && len) { 406*9b1d8728SMark Johnston ctx->gcm.Xi.c[n] ^= *(out++) = *(in++) ^ ctx->gcm.EKi.c[n]; 407*9b1d8728SMark Johnston --len; 408*9b1d8728SMark Johnston n = (n + 1) % 16; 409*9b1d8728SMark Johnston } 410*9b1d8728SMark Johnston if (n == 0) { 411*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 412*9b1d8728SMark Johnston mres = 0; 413*9b1d8728SMark Johnston } else { 414*9b1d8728SMark Johnston ctx->gcm.mres = n; 415*9b1d8728SMark Johnston return 0; 416*9b1d8728SMark Johnston } 417*9b1d8728SMark Johnston } 418*9b1d8728SMark Johnston if ((i = (len & (size_t)-16))) { 419*9b1d8728SMark Johnston size_t j = i / 16; 420*9b1d8728SMark Johnston 421*9b1d8728SMark Johnston aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); 422*9b1d8728SMark Johnston ctr += (unsigned int)j; 423*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 424*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 425*9b1d8728SMark Johnston #else 426*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 427*9b1d8728SMark Johnston #endif 428*9b1d8728SMark Johnston in += i; 429*9b1d8728SMark Johnston len -= i; 430*9b1d8728SMark Johnston while (j--) { 431*9b1d8728SMark Johnston for (i = 0; i < 16; ++i) 432*9b1d8728SMark Johnston ctx->gcm.Xi.c[i] ^= out[i]; 433*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 434*9b1d8728SMark Johnston out += 16; 435*9b1d8728SMark Johnston } 436*9b1d8728SMark Johnston } 437*9b1d8728SMark Johnston if (len) { 438*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); 439*9b1d8728SMark Johnston ++ctr; 440*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 441*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 442*9b1d8728SMark Johnston #else 443*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 444*9b1d8728SMark Johnston #endif 445*9b1d8728SMark Johnston while (len--) { 446*9b1d8728SMark Johnston ctx->gcm.Xi.c[mres++] ^= out[n] = in[n] ^ ctx->gcm.EKi.c[n]; 447*9b1d8728SMark Johnston ++n; 448*9b1d8728SMark Johnston } 449*9b1d8728SMark Johnston } 450*9b1d8728SMark Johnston 451*9b1d8728SMark Johnston ctx->gcm.mres = mres; 452*9b1d8728SMark Johnston return 0; 453*9b1d8728SMark Johnston } 454*9b1d8728SMark Johnston 455*9b1d8728SMark Johnston static int 456*9b1d8728SMark Johnston gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in, 457*9b1d8728SMark Johnston unsigned char *out, size_t len) 458*9b1d8728SMark Johnston { 459*9b1d8728SMark Johnston size_t bulk = 0, res; 460*9b1d8728SMark Johnston int error; 461*9b1d8728SMark Johnston 462*9b1d8728SMark Johnston res = (AES_BLOCK_LEN - ctx->gcm.mres) % AES_BLOCK_LEN; 463*9b1d8728SMark Johnston if ((error = gcm_encrypt(ctx, in, out, res)) != 0) 464*9b1d8728SMark Johnston return error; 465*9b1d8728SMark Johnston 466*9b1d8728SMark Johnston bulk = aesni_gcm_encrypt(in + res, out + res, len - res, 467*9b1d8728SMark Johnston &ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u); 468*9b1d8728SMark Johnston ctx->gcm.len.u[1] += bulk; 469*9b1d8728SMark Johnston bulk += res; 470*9b1d8728SMark Johnston 471*9b1d8728SMark Johnston if ((error = gcm_encrypt_ctr32(ctx, in + bulk, out + bulk, 472*9b1d8728SMark Johnston len - bulk)) != 0) 473*9b1d8728SMark Johnston return error; 474*9b1d8728SMark Johnston 475*9b1d8728SMark Johnston return 0; 476*9b1d8728SMark Johnston } 477*9b1d8728SMark Johnston 478*9b1d8728SMark Johnston static int 479*9b1d8728SMark Johnston gcm_decrypt(struct ossl_gcm_context *ctx, const unsigned char *in, 480*9b1d8728SMark Johnston unsigned char *out, size_t len) 481*9b1d8728SMark Johnston { 482*9b1d8728SMark Johnston unsigned int n, ctr, mres; 483*9b1d8728SMark Johnston size_t i; 484*9b1d8728SMark Johnston uint64_t mlen = ctx->gcm.len.u[1]; 485*9b1d8728SMark Johnston 486*9b1d8728SMark Johnston mlen += len; 487*9b1d8728SMark Johnston if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 488*9b1d8728SMark Johnston return -1; 489*9b1d8728SMark Johnston ctx->gcm.len.u[1] = mlen; 490*9b1d8728SMark Johnston 491*9b1d8728SMark Johnston mres = ctx->gcm.mres; 492*9b1d8728SMark Johnston 493*9b1d8728SMark Johnston if (ctx->gcm.ares) { 494*9b1d8728SMark Johnston /* First call to encrypt finalizes GHASH(AAD) */ 495*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 496*9b1d8728SMark Johnston ctx->gcm.ares = 0; 497*9b1d8728SMark Johnston } 498*9b1d8728SMark Johnston 499*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 500*9b1d8728SMark Johnston ctr = bswap32(ctx->gcm.Yi.d[3]); 501*9b1d8728SMark Johnston #else 502*9b1d8728SMark Johnston ctr = ctx->gcm.Yi.d[3]; 503*9b1d8728SMark Johnston #endif 504*9b1d8728SMark Johnston 505*9b1d8728SMark Johnston n = mres % 16; 506*9b1d8728SMark Johnston for (i = 0; i < len; ++i) { 507*9b1d8728SMark Johnston uint8_t c; 508*9b1d8728SMark Johnston if (n == 0) { 509*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, 510*9b1d8728SMark Johnston &ctx->aes_ks); 511*9b1d8728SMark Johnston ++ctr; 512*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 513*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 514*9b1d8728SMark Johnston #else 515*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 516*9b1d8728SMark Johnston #endif 517*9b1d8728SMark Johnston } 518*9b1d8728SMark Johnston c = in[i]; 519*9b1d8728SMark Johnston out[i] = c ^ ctx->gcm.EKi.c[n]; 520*9b1d8728SMark Johnston ctx->gcm.Xi.c[n] ^= c; 521*9b1d8728SMark Johnston mres = n = (n + 1) % 16; 522*9b1d8728SMark Johnston if (n == 0) 523*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 524*9b1d8728SMark Johnston } 525*9b1d8728SMark Johnston 526*9b1d8728SMark Johnston ctx->gcm.mres = mres; 527*9b1d8728SMark Johnston return 0; 528*9b1d8728SMark Johnston } 529*9b1d8728SMark Johnston 530*9b1d8728SMark Johnston static int 531*9b1d8728SMark Johnston gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in, 532*9b1d8728SMark Johnston unsigned char *out, size_t len) 533*9b1d8728SMark Johnston { 534*9b1d8728SMark Johnston unsigned int n, ctr, mres; 535*9b1d8728SMark Johnston size_t i; 536*9b1d8728SMark Johnston uint64_t mlen = ctx->gcm.len.u[1]; 537*9b1d8728SMark Johnston 538*9b1d8728SMark Johnston mlen += len; 539*9b1d8728SMark Johnston if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) 540*9b1d8728SMark Johnston return -1; 541*9b1d8728SMark Johnston ctx->gcm.len.u[1] = mlen; 542*9b1d8728SMark Johnston 543*9b1d8728SMark Johnston mres = ctx->gcm.mres; 544*9b1d8728SMark Johnston 545*9b1d8728SMark Johnston if (ctx->gcm.ares) { 546*9b1d8728SMark Johnston /* First call to decrypt finalizes GHASH(AAD) */ 547*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 548*9b1d8728SMark Johnston ctx->gcm.ares = 0; 549*9b1d8728SMark Johnston } 550*9b1d8728SMark Johnston 551*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 552*9b1d8728SMark Johnston ctr = bswap32(ctx->gcm.Yi.d[3]); 553*9b1d8728SMark Johnston #else 554*9b1d8728SMark Johnston ctr = ctx->gcm.Yi.d[3]; 555*9b1d8728SMark Johnston #endif 556*9b1d8728SMark Johnston 557*9b1d8728SMark Johnston n = mres % 16; 558*9b1d8728SMark Johnston if (n) { 559*9b1d8728SMark Johnston while (n && len) { 560*9b1d8728SMark Johnston uint8_t c = *(in++); 561*9b1d8728SMark Johnston *(out++) = c ^ ctx->gcm.EKi.c[n]; 562*9b1d8728SMark Johnston ctx->gcm.Xi.c[n] ^= c; 563*9b1d8728SMark Johnston --len; 564*9b1d8728SMark Johnston n = (n + 1) % 16; 565*9b1d8728SMark Johnston } 566*9b1d8728SMark Johnston if (n == 0) { 567*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 568*9b1d8728SMark Johnston mres = 0; 569*9b1d8728SMark Johnston } else { 570*9b1d8728SMark Johnston ctx->gcm.mres = n; 571*9b1d8728SMark Johnston return 0; 572*9b1d8728SMark Johnston } 573*9b1d8728SMark Johnston } 574*9b1d8728SMark Johnston if ((i = (len & (size_t)-16))) { 575*9b1d8728SMark Johnston size_t j = i / 16; 576*9b1d8728SMark Johnston 577*9b1d8728SMark Johnston while (j--) { 578*9b1d8728SMark Johnston size_t k; 579*9b1d8728SMark Johnston for (k = 0; k < 16; ++k) 580*9b1d8728SMark Johnston ctx->gcm.Xi.c[k] ^= in[k]; 581*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 582*9b1d8728SMark Johnston in += 16; 583*9b1d8728SMark Johnston } 584*9b1d8728SMark Johnston j = i / 16; 585*9b1d8728SMark Johnston in -= i; 586*9b1d8728SMark Johnston aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); 587*9b1d8728SMark Johnston ctr += (unsigned int)j; 588*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 589*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 590*9b1d8728SMark Johnston #else 591*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 592*9b1d8728SMark Johnston #endif 593*9b1d8728SMark Johnston out += i; 594*9b1d8728SMark Johnston in += i; 595*9b1d8728SMark Johnston len -= i; 596*9b1d8728SMark Johnston } 597*9b1d8728SMark Johnston if (len) { 598*9b1d8728SMark Johnston aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); 599*9b1d8728SMark Johnston ++ctr; 600*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 601*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = bswap32(ctr); 602*9b1d8728SMark Johnston #else 603*9b1d8728SMark Johnston ctx->gcm.Yi.d[3] = ctr; 604*9b1d8728SMark Johnston #endif 605*9b1d8728SMark Johnston while (len--) { 606*9b1d8728SMark Johnston uint8_t c = in[n]; 607*9b1d8728SMark Johnston ctx->gcm.Xi.c[mres++] ^= c; 608*9b1d8728SMark Johnston out[n] = c ^ ctx->gcm.EKi.c[n]; 609*9b1d8728SMark Johnston ++n; 610*9b1d8728SMark Johnston } 611*9b1d8728SMark Johnston } 612*9b1d8728SMark Johnston 613*9b1d8728SMark Johnston ctx->gcm.mres = mres; 614*9b1d8728SMark Johnston return 0; 615*9b1d8728SMark Johnston } 616*9b1d8728SMark Johnston 617*9b1d8728SMark Johnston static int 618*9b1d8728SMark Johnston gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in, 619*9b1d8728SMark Johnston unsigned char *out, size_t len) 620*9b1d8728SMark Johnston { 621*9b1d8728SMark Johnston size_t bulk = 0, res; 622*9b1d8728SMark Johnston int error; 623*9b1d8728SMark Johnston 624*9b1d8728SMark Johnston res = (AES_BLOCK_LEN - ctx->gcm.mres) % AES_BLOCK_LEN; 625*9b1d8728SMark Johnston if ((error = gcm_decrypt(ctx, in, out, res)) != 0) 626*9b1d8728SMark Johnston return error; 627*9b1d8728SMark Johnston 628*9b1d8728SMark Johnston bulk = aesni_gcm_decrypt(in, out, len, &ctx->aes_ks, ctx->gcm.Yi.c, 629*9b1d8728SMark Johnston ctx->gcm.Xi.u); 630*9b1d8728SMark Johnston ctx->gcm.len.u[1] += bulk; 631*9b1d8728SMark Johnston bulk += res; 632*9b1d8728SMark Johnston 633*9b1d8728SMark Johnston if ((error = gcm_decrypt_ctr32(ctx, in + bulk, out + bulk, len - bulk)) != 0) 634*9b1d8728SMark Johnston return error; 635*9b1d8728SMark Johnston 636*9b1d8728SMark Johnston return 0; 637*9b1d8728SMark Johnston } 638*9b1d8728SMark Johnston 639*9b1d8728SMark Johnston static int 640*9b1d8728SMark Johnston gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag, 641*9b1d8728SMark Johnston size_t len) 642*9b1d8728SMark Johnston { 643*9b1d8728SMark Johnston uint64_t alen = ctx->gcm.len.u[0] << 3; 644*9b1d8728SMark Johnston uint64_t clen = ctx->gcm.len.u[1] << 3; 645*9b1d8728SMark Johnston 646*9b1d8728SMark Johnston if (ctx->gcm.mres || ctx->gcm.ares) 647*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 648*9b1d8728SMark Johnston 649*9b1d8728SMark Johnston #if BYTE_ORDER == LITTLE_ENDIAN 650*9b1d8728SMark Johnston alen = bswap64(alen); 651*9b1d8728SMark Johnston clen = bswap64(clen); 652*9b1d8728SMark Johnston #endif 653*9b1d8728SMark Johnston 654*9b1d8728SMark Johnston ctx->gcm.Xi.u[0] ^= alen; 655*9b1d8728SMark Johnston ctx->gcm.Xi.u[1] ^= clen; 656*9b1d8728SMark Johnston gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); 657*9b1d8728SMark Johnston 658*9b1d8728SMark Johnston ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0]; 659*9b1d8728SMark Johnston ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1]; 660*9b1d8728SMark Johnston 661*9b1d8728SMark Johnston if (tag != NULL) 662*9b1d8728SMark Johnston return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len); 663*9b1d8728SMark Johnston return 0; 664*9b1d8728SMark Johnston } 665*9b1d8728SMark Johnston 666*9b1d8728SMark Johnston static const struct ossl_aes_gcm_ops gcm_ops_aesni = { 667*9b1d8728SMark Johnston .init = gcm_init_aesni, 668*9b1d8728SMark Johnston .setiv = gcm_setiv_aesni, 669*9b1d8728SMark Johnston .aad = gcm_aad_aesni, 670*9b1d8728SMark Johnston .encrypt = gcm_encrypt_aesni, 671*9b1d8728SMark Johnston .decrypt = gcm_decrypt_aesni, 672*9b1d8728SMark Johnston .finish = gcm_finish_aesni, 673*9b1d8728SMark Johnston .tag = gcm_tag, 674*9b1d8728SMark Johnston }; 675*9b1d8728SMark Johnston 676*9b1d8728SMark Johnston int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx); 677*9b1d8728SMark Johnston 678*9b1d8728SMark Johnston int 679*9b1d8728SMark Johnston ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, 680*9b1d8728SMark Johnston void *_ctx) 681*9b1d8728SMark Johnston { 682*9b1d8728SMark Johnston struct ossl_gcm_context *ctx; 683*9b1d8728SMark Johnston 684*9b1d8728SMark Johnston ctx = _ctx; 685*9b1d8728SMark Johnston ctx->ops = &gcm_ops_aesni; 686*9b1d8728SMark Johnston gcm_init(ctx, key, klen); 687*9b1d8728SMark Johnston return (0); 688*9b1d8728SMark Johnston } 689*9b1d8728SMark Johnston 6909a3444d9SMark Johnston int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx); 6919a3444d9SMark Johnston 6929a3444d9SMark Johnston int 6939a3444d9SMark Johnston ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, 6949a3444d9SMark Johnston void *_ctx) 6959a3444d9SMark Johnston { 6969a3444d9SMark Johnston struct ossl_gcm_context *ctx; 6979a3444d9SMark Johnston 6989a3444d9SMark Johnston ctx = _ctx; 6999a3444d9SMark Johnston ctx->ops = &gcm_ops_avx512; 7009a3444d9SMark Johnston gcm_init(ctx, key, klen); 7019a3444d9SMark Johnston return (0); 7029a3444d9SMark Johnston } 703