xref: /freebsd/sys/crypto/openssl/amd64/ossl_aes_gcm.c (revision 5ca8e32633c4ffbbcd6762e5888b6a4ba0708c6c)
1 /*
2  * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3  * Copyright (c) 2021, Intel Corporation. All Rights Reserved.
4  *
5  * Licensed under the Apache License 2.0 (the "License").  You may not use
6  * this file except in compliance with the License.  You can obtain a copy
7  * in the file LICENSE in the source distribution or at
8  * https://www.openssl.org/source/license.html
9  */
10 
11 /*
12  * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using
13  * AES-NI and VAES extensions respectively.  These were ported from
14  * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc.  The
15  * AES-NI implementation makes use of a generic C implementation for partial
16  * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined.
17  */
18 
19 #include <sys/endian.h>
20 #include <sys/systm.h>
21 
22 #include <crypto/openssl/ossl.h>
23 #include <crypto/openssl/ossl_aes_gcm.h>
24 #include <crypto/openssl/ossl_cipher.h>
25 
26 #include <opencrypto/cryptodev.h>
27 
28 _Static_assert(
29     sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context),
30     "ossl_gcm_context too large");
31 
32 void aesni_set_encrypt_key(const void *key, int bits, void *ctx);
33 
34 static void
35 gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
36 {
37 	KASSERT(keylen == 128 || keylen == 192 || keylen == 256,
38 	    ("%s: invalid key length %zu", __func__, keylen));
39 
40 	memset(&ctx->gcm, 0, sizeof(ctx->gcm));
41 	memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks));
42 	aesni_set_encrypt_key(key, keylen, &ctx->aes_ks);
43 	ctx->ops->init(ctx, key, keylen);
44 }
45 
46 static void
47 gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len)
48 {
49 	(void)ctx->ops->finish(ctx, NULL, 0);
50 	memcpy(tag, ctx->gcm.Xi.c, len);
51 }
52 
53 void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx);
54 void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx);
55 void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx,
56     const unsigned char *iv, size_t ivlen);
57 void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad,
58     size_t len);
59 void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx,
60     unsigned int *pblocklen, const unsigned char *in, size_t len,
61     unsigned char *out);
62 void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx,
63     unsigned int *pblocklen, const unsigned char *in, size_t len,
64     unsigned char *out);
65 void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen);
66 
67 static void
68 gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
69 {
70 	ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm);
71 }
72 
73 static void
74 gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv,
75     size_t len)
76 {
77 	KASSERT(len == AES_GCM_IV_LEN,
78 	    ("%s: invalid IV length %zu", __func__, len));
79 
80 	ctx->gcm.Yi.u[0] = 0;		/* Current counter */
81 	ctx->gcm.Yi.u[1] = 0;
82 	ctx->gcm.Xi.u[0] = 0;		/* AAD hash */
83 	ctx->gcm.Xi.u[1] = 0;
84 	ctx->gcm.len.u[0] = 0;		/* AAD length */
85 	ctx->gcm.len.u[1] = 0;		/* Message length */
86 	ctx->gcm.ares = 0;
87 	ctx->gcm.mres = 0;
88 
89 	ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len);
90 }
91 
92 static int
93 gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad,
94     size_t len)
95 {
96 	uint64_t alen = ctx->gcm.len.u[0];
97 	size_t lenblks;
98 	unsigned int ares;
99 
100 	/* Bad sequence: call of AAD update after message processing */
101 	if (ctx->gcm.len.u[1])
102 		return -2;
103 
104 	alen += len;
105 	/* AAD is limited by 2^64 bits, thus 2^61 bytes */
106 	if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
107 		return -1;
108 	ctx->gcm.len.u[0] = alen;
109 
110 	ares = ctx->gcm.ares;
111 	/* Partial AAD block left from previous AAD update calls */
112 	if (ares > 0) {
113 		/*
114 		 * Fill partial block buffer till full block
115 		 * (note, the hash is stored reflected)
116 		 */
117 		while (ares > 0 && len > 0) {
118 			ctx->gcm.Xi.c[15 - ares] ^= *(aad++);
119 			--len;
120 			ares = (ares + 1) % AES_BLOCK_LEN;
121 		}
122 		/* Full block gathered */
123 		if (ares == 0) {
124 			ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
125 		} else { /* no more AAD */
126 			ctx->gcm.ares = ares;
127 			return 0;
128 		}
129 	}
130 
131 	/* Bulk AAD processing */
132 	lenblks = len & ((size_t)(-AES_BLOCK_LEN));
133 	if (lenblks > 0) {
134 		ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks);
135 		aad += lenblks;
136 		len -= lenblks;
137 	}
138 
139 	/* Add remaining AAD to the hash (note, the hash is stored reflected) */
140 	if (len > 0) {
141 		ares = (unsigned int)len;
142 		for (size_t i = 0; i < len; ++i)
143 			ctx->gcm.Xi.c[15 - i] ^= aad[i];
144 	}
145 
146 	ctx->gcm.ares = ares;
147 
148 	return 0;
149 }
150 
151 static int
152 _gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
153     unsigned char *out, size_t len, bool encrypt)
154 {
155 	uint64_t mlen = ctx->gcm.len.u[1];
156 
157 	mlen += len;
158 	if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
159 		return -1;
160 
161 	ctx->gcm.len.u[1] = mlen;
162 
163 	/* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */
164 	if (ctx->gcm.ares > 0) {
165 		ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx);
166 		ctx->gcm.ares = 0;
167 	}
168 
169 	if (encrypt) {
170 		ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
171 		    in, len, out);
172 	} else {
173 		ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres,
174 		    in, len, out);
175 	}
176 
177 	return 0;
178 }
179 
180 static int
181 gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
182     unsigned char *out, size_t len)
183 {
184 	return _gcm_encrypt_avx512(ctx, in, out, len, true);
185 }
186 
187 static int
188 gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in,
189     unsigned char *out, size_t len)
190 {
191 	return _gcm_encrypt_avx512(ctx, in, out, len, false);
192 }
193 
194 static int
195 gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag,
196     size_t len)
197 {
198 	unsigned int *res = &ctx->gcm.mres;
199 
200 	/* Finalize AAD processing */
201 	if (ctx->gcm.ares > 0)
202 		res = &ctx->gcm.ares;
203 
204 	ossl_aes_gcm_finalize_avx512(ctx, *res);
205 
206 	ctx->gcm.ares = ctx->gcm.mres = 0;
207 
208 	if (tag != NULL)
209 		return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
210 	return 0;
211 }
212 
213 static const struct ossl_aes_gcm_ops gcm_ops_avx512 = {
214 	.init = gcm_init_avx512,
215 	.setiv = gcm_setiv_avx512,
216 	.aad = gcm_aad_avx512,
217 	.encrypt = gcm_encrypt_avx512,
218 	.decrypt = gcm_decrypt_avx512,
219 	.finish = gcm_finish_avx512,
220 	.tag = gcm_tag,
221 };
222 
223 size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len,
224     const void *key, unsigned char ivec[16], uint64_t *Xi);
225 size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len,
226     const void *key, unsigned char ivec[16], uint64_t *Xi);
227 void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks);
228 void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
229     size_t blocks, void *ks, const unsigned char *iv);
230 
231 void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]);
232 void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]);
233 void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in,
234     size_t len);
235 
236 static void
237 gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen)
238 {
239 	aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks);
240 
241 #if BYTE_ORDER == LITTLE_ENDIAN
242 	ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]);
243 	ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]);
244 #endif
245 
246 	gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u);
247 }
248 
249 static void
250 gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv,
251     size_t len)
252 {
253 	uint32_t ctr;
254 
255 	KASSERT(len == AES_GCM_IV_LEN,
256 	    ("%s: invalid IV length %zu", __func__, len));
257 
258 	ctx->gcm.len.u[0] = 0;
259 	ctx->gcm.len.u[1] = 0;
260 	ctx->gcm.ares = ctx->gcm.mres = 0;
261 
262 	memcpy(ctx->gcm.Yi.c, iv, len);
263 	ctx->gcm.Yi.c[12] = 0;
264 	ctx->gcm.Yi.c[13] = 0;
265 	ctx->gcm.Yi.c[14] = 0;
266 	ctx->gcm.Yi.c[15] = 1;
267 	ctr = 1;
268 
269 	ctx->gcm.Xi.u[0] = 0;
270 	ctx->gcm.Xi.u[1] = 0;
271 
272 	aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks);
273 	ctr++;
274 
275 #if BYTE_ORDER == LITTLE_ENDIAN
276 	ctx->gcm.Yi.d[3] = bswap32(ctr);
277 #else
278 	ctx->gcm.Yi.d[3] = ctr;
279 #endif
280 }
281 
282 static int
283 gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad,
284     size_t len)
285 {
286 	size_t i;
287 	unsigned int n;
288 	uint64_t alen = ctx->gcm.len.u[0];
289 
290 	if (ctx->gcm.len.u[1])
291 		return -2;
292 
293 	alen += len;
294 	if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len))
295 		return -1;
296 	ctx->gcm.len.u[0] = alen;
297 
298 	n = ctx->gcm.ares;
299 	if (n) {
300 		while (n && len) {
301 			ctx->gcm.Xi.c[n] ^= *(aad++);
302 			--len;
303 			n = (n + 1) % 16;
304 		}
305 		if (n == 0)
306 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
307 		else {
308 			ctx->gcm.ares = n;
309 			return 0;
310 		}
311 	}
312 	if ((i = (len & (size_t)-AES_BLOCK_LEN))) {
313 		gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i);
314 		aad += i;
315 		len -= i;
316 	}
317 	if (len) {
318 		n = (unsigned int)len;
319 		for (i = 0; i < len; ++i)
320 			ctx->gcm.Xi.c[i] ^= aad[i];
321 	}
322 
323 	ctx->gcm.ares = n;
324 	return 0;
325 }
326 
327 static int
328 gcm_encrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
329     unsigned char *out, size_t len)
330 {
331 	unsigned int n, ctr, mres;
332 	size_t i;
333 	uint64_t mlen = ctx->gcm.len.u[1];
334 
335 	mlen += len;
336 	if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
337 		return -1;
338 	ctx->gcm.len.u[1] = mlen;
339 
340 	mres = ctx->gcm.mres;
341 
342 	if (ctx->gcm.ares) {
343 		/* First call to encrypt finalizes GHASH(AAD) */
344 		gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
345 		ctx->gcm.ares = 0;
346 	}
347 
348 #if BYTE_ORDER == LITTLE_ENDIAN
349 	ctr = bswap32(ctx->gcm.Yi.d[3]);
350 #else
351 	ctr = ctx->gcm.Yi.d[3];
352 #endif
353 
354 	n = mres % 16;
355 	for (i = 0; i < len; ++i) {
356 		if (n == 0) {
357 			aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
358 			    &ctx->aes_ks);
359 			++ctr;
360 #if BYTE_ORDER == LITTLE_ENDIAN
361 			ctx->gcm.Yi.d[3] = bswap32(ctr);
362 #else
363 			ctx->gcm.Yi.d[3] = ctr;
364 #endif
365 		}
366 		ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n];
367 		mres = n = (n + 1) % 16;
368 		if (n == 0)
369 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
370 	}
371 
372 	ctx->gcm.mres = mres;
373 	return 0;
374 }
375 
376 static int
377 gcm_encrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
378     unsigned char *out, size_t len)
379 {
380 	unsigned int n, ctr, mres;
381 	size_t i;
382 	uint64_t mlen = ctx->gcm.len.u[1];
383 
384 	mlen += len;
385 	if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
386 		return -1;
387 	ctx->gcm.len.u[1] = mlen;
388 
389 	mres = ctx->gcm.mres;
390 
391 	if (ctx->gcm.ares) {
392 		/* First call to encrypt finalizes GHASH(AAD) */
393 		gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
394 		ctx->gcm.ares = 0;
395 	}
396 
397 #if BYTE_ORDER == LITTLE_ENDIAN
398 	ctr = bswap32(ctx->gcm.Yi.d[3]);
399 #else
400 	ctr = ctx->gcm.Yi.d[3];
401 #endif
402 
403 	n = mres % 16;
404 	if (n) {
405 		while (n && len) {
406 			ctx->gcm.Xi.c[n] ^= *(out++) = *(in++) ^ ctx->gcm.EKi.c[n];
407 			--len;
408 			n = (n + 1) % 16;
409 		}
410 		if (n == 0) {
411 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
412 			mres = 0;
413 		} else {
414 			ctx->gcm.mres = n;
415 			return 0;
416 		}
417 	}
418 	if ((i = (len & (size_t)-16))) {
419 		size_t j = i / 16;
420 
421 		aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
422 		ctr += (unsigned int)j;
423 #if BYTE_ORDER == LITTLE_ENDIAN
424 		ctx->gcm.Yi.d[3] = bswap32(ctr);
425 #else
426 		ctx->gcm.Yi.d[3] = ctr;
427 #endif
428 		in += i;
429 		len -= i;
430 		while (j--) {
431 			for (i = 0; i < 16; ++i)
432 				ctx->gcm.Xi.c[i] ^= out[i];
433 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
434 			out += 16;
435 		}
436 	}
437 	if (len) {
438 		aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
439 		++ctr;
440 #if BYTE_ORDER == LITTLE_ENDIAN
441 		ctx->gcm.Yi.d[3] = bswap32(ctr);
442 #else
443 		ctx->gcm.Yi.d[3] = ctr;
444 #endif
445 		while (len--) {
446 			ctx->gcm.Xi.c[mres++] ^= out[n] = in[n] ^ ctx->gcm.EKi.c[n];
447 			++n;
448 		}
449 	}
450 
451 	ctx->gcm.mres = mres;
452 	return 0;
453 }
454 
455 static int
456 gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
457     unsigned char *out, size_t len)
458 {
459 	size_t bulk = 0, res;
460 	int error;
461 
462 	res = MIN(len, (AES_BLOCK_LEN - ctx->gcm.mres) % AES_BLOCK_LEN);
463 	if ((error = gcm_encrypt(ctx, in, out, res)) != 0)
464 		return error;
465 
466 	bulk = aesni_gcm_encrypt(in + res, out + res, len - res,
467 	    &ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u);
468 	ctx->gcm.len.u[1] += bulk;
469 	bulk += res;
470 
471 	if ((error = gcm_encrypt_ctr32(ctx, in + bulk, out + bulk,
472 	    len - bulk)) != 0)
473 		return error;
474 
475 	return 0;
476 }
477 
478 static int
479 gcm_decrypt(struct ossl_gcm_context *ctx, const unsigned char *in,
480     unsigned char *out, size_t len)
481 {
482 	unsigned int n, ctr, mres;
483 	size_t i;
484 	uint64_t mlen = ctx->gcm.len.u[1];
485 
486 	mlen += len;
487 	if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
488 		return -1;
489 	ctx->gcm.len.u[1] = mlen;
490 
491 	mres = ctx->gcm.mres;
492 
493 	if (ctx->gcm.ares) {
494 		/* First call to encrypt finalizes GHASH(AAD) */
495 		gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
496 		ctx->gcm.ares = 0;
497 	}
498 
499 #if BYTE_ORDER == LITTLE_ENDIAN
500 	ctr = bswap32(ctx->gcm.Yi.d[3]);
501 #else
502 	ctr = ctx->gcm.Yi.d[3];
503 #endif
504 
505 	n = mres % 16;
506 	for (i = 0; i < len; ++i) {
507 		uint8_t c;
508 		if (n == 0) {
509 			aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c,
510 			    &ctx->aes_ks);
511 			++ctr;
512 #if BYTE_ORDER == LITTLE_ENDIAN
513 			ctx->gcm.Yi.d[3] = bswap32(ctr);
514 #else
515 			ctx->gcm.Yi.d[3] = ctr;
516 #endif
517 		}
518 		c = in[i];
519 		out[i] = c ^ ctx->gcm.EKi.c[n];
520 		ctx->gcm.Xi.c[n] ^= c;
521 		mres = n = (n + 1) % 16;
522 		if (n == 0)
523 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
524 	}
525 
526 	ctx->gcm.mres = mres;
527 	return 0;
528 }
529 
530 static int
531 gcm_decrypt_ctr32(struct ossl_gcm_context *ctx, const unsigned char *in,
532     unsigned char *out, size_t len)
533 {
534 	unsigned int n, ctr, mres;
535 	size_t i;
536 	uint64_t mlen = ctx->gcm.len.u[1];
537 
538 	mlen += len;
539 	if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len))
540 		return -1;
541 	ctx->gcm.len.u[1] = mlen;
542 
543 	mres = ctx->gcm.mres;
544 
545 	if (ctx->gcm.ares) {
546 		/* First call to decrypt finalizes GHASH(AAD) */
547 		gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
548 		ctx->gcm.ares = 0;
549 	}
550 
551 #if BYTE_ORDER == LITTLE_ENDIAN
552 	ctr = bswap32(ctx->gcm.Yi.d[3]);
553 #else
554 	ctr = ctx->gcm.Yi.d[3];
555 #endif
556 
557 	n = mres % 16;
558 	if (n) {
559 		while (n && len) {
560 			uint8_t c = *(in++);
561 			*(out++) = c ^ ctx->gcm.EKi.c[n];
562 			ctx->gcm.Xi.c[n] ^= c;
563 			--len;
564 			n = (n + 1) % 16;
565 		}
566 		if (n == 0) {
567 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
568 			mres = 0;
569 		} else {
570 			ctx->gcm.mres = n;
571 			return 0;
572 		}
573 	}
574 	if ((i = (len & (size_t)-16))) {
575 		size_t j = i / 16;
576 
577 		while (j--) {
578 			size_t k;
579 			for (k = 0; k < 16; ++k)
580 				ctx->gcm.Xi.c[k] ^= in[k];
581 			gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
582 			in += 16;
583 		}
584 		j = i / 16;
585 		in -= i;
586 		aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c);
587 		ctr += (unsigned int)j;
588 #if BYTE_ORDER == LITTLE_ENDIAN
589 		ctx->gcm.Yi.d[3] = bswap32(ctr);
590 #else
591 		ctx->gcm.Yi.d[3] = ctr;
592 #endif
593 		out += i;
594 		in += i;
595 		len -= i;
596 	}
597 	if (len) {
598 		aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks);
599 		++ctr;
600 #if BYTE_ORDER == LITTLE_ENDIAN
601 		ctx->gcm.Yi.d[3] = bswap32(ctr);
602 #else
603 		ctx->gcm.Yi.d[3] = ctr;
604 #endif
605 		while (len--) {
606 			uint8_t c = in[n];
607 			ctx->gcm.Xi.c[mres++] ^= c;
608 			out[n] = c ^ ctx->gcm.EKi.c[n];
609 			++n;
610 		}
611 	}
612 
613 	ctx->gcm.mres = mres;
614 	return 0;
615 }
616 
617 static int
618 gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in,
619     unsigned char *out, size_t len)
620 {
621 	size_t bulk = 0, res;
622 	int error;
623 
624 	res = MIN(len, (AES_BLOCK_LEN - ctx->gcm.mres) % AES_BLOCK_LEN);
625 	if ((error = gcm_decrypt(ctx, in, out, res)) != 0)
626 		return error;
627 
628 	bulk = aesni_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks,
629 	    ctx->gcm.Yi.c, ctx->gcm.Xi.u);
630 	ctx->gcm.len.u[1] += bulk;
631 	bulk += res;
632 
633 	if ((error = gcm_decrypt_ctr32(ctx, in + bulk, out + bulk, len - bulk)) != 0)
634 		return error;
635 
636 	return 0;
637 }
638 
639 static int
640 gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag,
641     size_t len)
642 {
643 	uint64_t alen = ctx->gcm.len.u[0] << 3;
644 	uint64_t clen = ctx->gcm.len.u[1] << 3;
645 
646 	if (ctx->gcm.mres || ctx->gcm.ares)
647 		gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
648 
649 #if BYTE_ORDER == LITTLE_ENDIAN
650 	alen = bswap64(alen);
651 	clen = bswap64(clen);
652 #endif
653 
654 	ctx->gcm.Xi.u[0] ^= alen;
655 	ctx->gcm.Xi.u[1] ^= clen;
656 	gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable);
657 
658 	ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0];
659 	ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1];
660 
661 	if (tag != NULL)
662 		return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len);
663 	return 0;
664 }
665 
666 static const struct ossl_aes_gcm_ops gcm_ops_aesni = {
667 	.init = gcm_init_aesni,
668 	.setiv = gcm_setiv_aesni,
669 	.aad = gcm_aad_aesni,
670 	.encrypt = gcm_encrypt_aesni,
671 	.decrypt = gcm_decrypt_aesni,
672 	.finish = gcm_finish_aesni,
673 	.tag = gcm_tag,
674 };
675 
676 int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx);
677 
678 int
679 ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen,
680     void *_ctx)
681 {
682 	struct ossl_gcm_context *ctx;
683 
684 	ctx = _ctx;
685 	ctx->ops = &gcm_ops_aesni;
686 	gcm_init(ctx, key, klen);
687 	return (0);
688 }
689 
690 int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx);
691 
692 int
693 ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen,
694     void *_ctx)
695 {
696 	struct ossl_gcm_context *ctx;
697 
698 	ctx = _ctx;
699 	ctx->ops = &gcm_ops_avx512;
700 	gcm_init(ctx, key, klen);
701 	return (0);
702 }
703