xref: /freebsd/sys/contrib/openzfs/module/icp/algs/modes/gcm.c (revision 53a2e2635ab2d17bed1de7b4e0d782dd23ceb6ea)
161145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy  * CDDL HEADER START
4eda14cbcSMatt Macy  *
5eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy  * See the License for the specific language governing permissions
12eda14cbcSMatt Macy  * and limitations under the License.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy  *
20eda14cbcSMatt Macy  * CDDL HEADER END
21eda14cbcSMatt Macy  */
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy  */
25eda14cbcSMatt Macy 
26eda14cbcSMatt Macy #include <sys/zfs_context.h>
272a58b312SMartin Matuska #include <sys/cmn_err.h>
28eda14cbcSMatt Macy #include <modes/modes.h>
29eda14cbcSMatt Macy #include <sys/crypto/common.h>
30eda14cbcSMatt Macy #include <sys/crypto/icp.h>
31eda14cbcSMatt Macy #include <sys/crypto/impl.h>
32eda14cbcSMatt Macy #include <sys/byteorder.h>
33eda14cbcSMatt Macy #include <sys/simd.h>
34eda14cbcSMatt Macy #include <modes/gcm_impl.h>
35eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
36eda14cbcSMatt Macy #include <aes/aes_impl.h>
37eda14cbcSMatt Macy #endif
38eda14cbcSMatt Macy 
39eda14cbcSMatt Macy #define	GHASH(c, d, t, o) \
40eda14cbcSMatt Macy 	xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
41eda14cbcSMatt Macy 	(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
42eda14cbcSMatt Macy 	(uint64_t *)(void *)(t));
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy /* Select GCM implementation */
45eda14cbcSMatt Macy #define	IMPL_FASTEST	(UINT32_MAX)
46eda14cbcSMatt Macy #define	IMPL_CYCLE	(UINT32_MAX-1)
47eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
48eda14cbcSMatt Macy #define	IMPL_AVX	(UINT32_MAX-2)
49*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
50*53a2e263SMartin Matuska #define	IMPL_AVX2	(UINT32_MAX-3)
51*53a2e263SMartin Matuska #endif
52eda14cbcSMatt Macy #endif
53eda14cbcSMatt Macy #define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
54eda14cbcSMatt Macy static uint32_t icp_gcm_impl = IMPL_FASTEST;
55eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST;
56eda14cbcSMatt Macy 
57eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
58eda14cbcSMatt Macy /* Does the architecture we run on support the MOVBE instruction? */
59eda14cbcSMatt Macy boolean_t gcm_avx_can_use_movbe = B_FALSE;
60eda14cbcSMatt Macy /*
61eda14cbcSMatt Macy  * Whether to use the optimized openssl gcm and ghash implementations.
62eda14cbcSMatt Macy  */
63*53a2e263SMartin Matuska static gcm_impl gcm_impl_used = GCM_IMPL_GENERIC;
64*53a2e263SMartin Matuska #define	GCM_IMPL_USED	(*(volatile gcm_impl *)&gcm_impl_used)
65eda14cbcSMatt Macy 
6615f0b8c3SMartin Matuska extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *);
677877fdebSMatt Macy 
68eda14cbcSMatt Macy static inline boolean_t gcm_avx_will_work(void);
69*53a2e263SMartin Matuska static inline boolean_t gcm_avx2_will_work(void);
70*53a2e263SMartin Matuska static inline void gcm_use_impl(gcm_impl impl);
71*53a2e263SMartin Matuska static inline gcm_impl gcm_toggle_impl(void);
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
74eda14cbcSMatt Macy     crypto_data_t *, size_t);
75eda14cbcSMatt Macy 
76eda14cbcSMatt Macy static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
77eda14cbcSMatt Macy static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
782a58b312SMartin Matuska static int gcm_init_avx(gcm_ctx_t *, const uint8_t *, size_t, const uint8_t *,
79eda14cbcSMatt Macy     size_t, size_t);
80eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
81eda14cbcSMatt Macy 
82eda14cbcSMatt Macy /*
83eda14cbcSMatt Macy  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
84eda14cbcSMatt Macy  * is done in another function.
85eda14cbcSMatt Macy  */
86eda14cbcSMatt Macy int
gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))87eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
88eda14cbcSMatt Macy     crypto_data_t *out, size_t block_size,
89eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
90eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
91eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
92eda14cbcSMatt Macy {
93eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
94*53a2e263SMartin Matuska 	if (ctx->impl != GCM_IMPL_GENERIC)
95eda14cbcSMatt Macy 		return (gcm_mode_encrypt_contiguous_blocks_avx(
96eda14cbcSMatt Macy 		    ctx, data, length, out, block_size));
97eda14cbcSMatt Macy #endif
98eda14cbcSMatt Macy 
99eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
100eda14cbcSMatt Macy 	size_t remainder = length;
101eda14cbcSMatt Macy 	size_t need = 0;
102eda14cbcSMatt Macy 	uint8_t *datap = (uint8_t *)data;
103eda14cbcSMatt Macy 	uint8_t *blockp;
104eda14cbcSMatt Macy 	uint8_t *lastp;
105eda14cbcSMatt Macy 	void *iov_or_mp;
106eda14cbcSMatt Macy 	offset_t offset;
107eda14cbcSMatt Macy 	uint8_t *out_data_1;
108eda14cbcSMatt Macy 	uint8_t *out_data_2;
109eda14cbcSMatt Macy 	size_t out_data_1_len;
110eda14cbcSMatt Macy 	uint64_t counter;
111eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy 	if (length + ctx->gcm_remainder_len < block_size) {
114eda14cbcSMatt Macy 		/* accumulate bytes here and return */
115da5137abSMartin Matuska 		memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
116da5137abSMartin Matuska 		    datap,
117eda14cbcSMatt Macy 		    length);
118eda14cbcSMatt Macy 		ctx->gcm_remainder_len += length;
119eda14cbcSMatt Macy 		if (ctx->gcm_copy_to == NULL) {
120eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
121eda14cbcSMatt Macy 		}
122eda14cbcSMatt Macy 		return (CRYPTO_SUCCESS);
123eda14cbcSMatt Macy 	}
124eda14cbcSMatt Macy 
125eda14cbcSMatt Macy 	crypto_init_ptrs(out, &iov_or_mp, &offset);
126eda14cbcSMatt Macy 
127eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
128eda14cbcSMatt Macy 	do {
129eda14cbcSMatt Macy 		/* Unprocessed data from last call. */
130eda14cbcSMatt Macy 		if (ctx->gcm_remainder_len > 0) {
131eda14cbcSMatt Macy 			need = block_size - ctx->gcm_remainder_len;
132eda14cbcSMatt Macy 
133eda14cbcSMatt Macy 			if (need > remainder)
134eda14cbcSMatt Macy 				return (CRYPTO_DATA_LEN_RANGE);
135eda14cbcSMatt Macy 
136da5137abSMartin Matuska 			memcpy(&((uint8_t *)ctx->gcm_remainder)
137da5137abSMartin Matuska 			    [ctx->gcm_remainder_len], datap, need);
138eda14cbcSMatt Macy 
139eda14cbcSMatt Macy 			blockp = (uint8_t *)ctx->gcm_remainder;
140eda14cbcSMatt Macy 		} else {
141eda14cbcSMatt Macy 			blockp = datap;
142eda14cbcSMatt Macy 		}
143eda14cbcSMatt Macy 
144eda14cbcSMatt Macy 		/*
145eda14cbcSMatt Macy 		 * Increment counter. Counter bits are confined
146eda14cbcSMatt Macy 		 * to the bottom 32 bits of the counter block.
147eda14cbcSMatt Macy 		 */
148eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
149eda14cbcSMatt Macy 		counter = htonll(counter + 1);
150eda14cbcSMatt Macy 		counter &= counter_mask;
151eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
154eda14cbcSMatt Macy 		    (uint8_t *)ctx->gcm_tmp);
155eda14cbcSMatt Macy 		xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
156eda14cbcSMatt Macy 
157eda14cbcSMatt Macy 		lastp = (uint8_t *)ctx->gcm_tmp;
158eda14cbcSMatt Macy 
159eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
160eda14cbcSMatt Macy 
161eda14cbcSMatt Macy 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
162eda14cbcSMatt Macy 		    &out_data_1_len, &out_data_2, block_size);
163eda14cbcSMatt Macy 
164eda14cbcSMatt Macy 		/* copy block to where it belongs */
165eda14cbcSMatt Macy 		if (out_data_1_len == block_size) {
166eda14cbcSMatt Macy 			copy_block(lastp, out_data_1);
167eda14cbcSMatt Macy 		} else {
168da5137abSMartin Matuska 			memcpy(out_data_1, lastp, out_data_1_len);
169eda14cbcSMatt Macy 			if (out_data_2 != NULL) {
170da5137abSMartin Matuska 				memcpy(out_data_2,
171da5137abSMartin Matuska 				    lastp + out_data_1_len,
172eda14cbcSMatt Macy 				    block_size - out_data_1_len);
173eda14cbcSMatt Macy 			}
174eda14cbcSMatt Macy 		}
175eda14cbcSMatt Macy 		/* update offset */
176eda14cbcSMatt Macy 		out->cd_offset += block_size;
177eda14cbcSMatt Macy 
178eda14cbcSMatt Macy 		/* add ciphertext to the hash */
179eda14cbcSMatt Macy 		GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
180eda14cbcSMatt Macy 
181eda14cbcSMatt Macy 		/* Update pointer to next block of data to be processed. */
182eda14cbcSMatt Macy 		if (ctx->gcm_remainder_len != 0) {
183eda14cbcSMatt Macy 			datap += need;
184eda14cbcSMatt Macy 			ctx->gcm_remainder_len = 0;
185eda14cbcSMatt Macy 		} else {
186eda14cbcSMatt Macy 			datap += block_size;
187eda14cbcSMatt Macy 		}
188eda14cbcSMatt Macy 
189eda14cbcSMatt Macy 		remainder = (size_t)&data[length] - (size_t)datap;
190eda14cbcSMatt Macy 
191eda14cbcSMatt Macy 		/* Incomplete last block. */
192eda14cbcSMatt Macy 		if (remainder > 0 && remainder < block_size) {
193da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, datap, remainder);
194eda14cbcSMatt Macy 			ctx->gcm_remainder_len = remainder;
195eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
196eda14cbcSMatt Macy 			goto out;
197eda14cbcSMatt Macy 		}
198eda14cbcSMatt Macy 		ctx->gcm_copy_to = NULL;
199eda14cbcSMatt Macy 
200eda14cbcSMatt Macy 	} while (remainder > 0);
201eda14cbcSMatt Macy out:
202eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
203eda14cbcSMatt Macy }
204eda14cbcSMatt Macy 
205eda14cbcSMatt Macy int
gcm_encrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))206eda14cbcSMatt Macy gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
207eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
208eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
209eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
210eda14cbcSMatt Macy {
211e92ffd9bSMartin Matuska 	(void) copy_block;
212eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
213*53a2e263SMartin Matuska 	if (ctx->impl != GCM_IMPL_GENERIC)
214eda14cbcSMatt Macy 		return (gcm_encrypt_final_avx(ctx, out, block_size));
215eda14cbcSMatt Macy #endif
216eda14cbcSMatt Macy 
217eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
218eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
219eda14cbcSMatt Macy 	uint8_t *ghash, *macp = NULL;
220eda14cbcSMatt Macy 	int i, rv;
221eda14cbcSMatt Macy 
222eda14cbcSMatt Macy 	if (out->cd_length <
223eda14cbcSMatt Macy 	    (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
224eda14cbcSMatt Macy 		return (CRYPTO_DATA_LEN_RANGE);
225eda14cbcSMatt Macy 	}
226eda14cbcSMatt Macy 
227eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
228eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
229eda14cbcSMatt Macy 
230eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
231eda14cbcSMatt Macy 		uint64_t counter;
232eda14cbcSMatt Macy 		uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
233eda14cbcSMatt Macy 
234eda14cbcSMatt Macy 		/*
235eda14cbcSMatt Macy 		 * Here is where we deal with data that is not a
236eda14cbcSMatt Macy 		 * multiple of the block size.
237eda14cbcSMatt Macy 		 */
238eda14cbcSMatt Macy 
239eda14cbcSMatt Macy 		/*
240eda14cbcSMatt Macy 		 * Increment counter.
241eda14cbcSMatt Macy 		 */
242eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
243eda14cbcSMatt Macy 		counter = htonll(counter + 1);
244eda14cbcSMatt Macy 		counter &= counter_mask;
245eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
246eda14cbcSMatt Macy 
247eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
248eda14cbcSMatt Macy 		    (uint8_t *)ctx->gcm_tmp);
249eda14cbcSMatt Macy 
250eda14cbcSMatt Macy 		macp = (uint8_t *)ctx->gcm_remainder;
251da5137abSMartin Matuska 		memset(macp + ctx->gcm_remainder_len, 0,
252eda14cbcSMatt Macy 		    block_size - ctx->gcm_remainder_len);
253eda14cbcSMatt Macy 
254eda14cbcSMatt Macy 		/* XOR with counter block */
255eda14cbcSMatt Macy 		for (i = 0; i < ctx->gcm_remainder_len; i++) {
256eda14cbcSMatt Macy 			macp[i] ^= tmpp[i];
257eda14cbcSMatt Macy 		}
258eda14cbcSMatt Macy 
259eda14cbcSMatt Macy 		/* add ciphertext to the hash */
260eda14cbcSMatt Macy 		GHASH(ctx, macp, ghash, gops);
261eda14cbcSMatt Macy 
262eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
263eda14cbcSMatt Macy 	}
264eda14cbcSMatt Macy 
265eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] =
266eda14cbcSMatt Macy 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
267eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
268eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
269eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_J0);
270eda14cbcSMatt Macy 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
271eda14cbcSMatt Macy 
272eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
273eda14cbcSMatt Macy 		rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
274eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
275eda14cbcSMatt Macy 			return (rv);
276eda14cbcSMatt Macy 	}
277eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_remainder_len;
278eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
279eda14cbcSMatt Macy 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
280eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS)
281eda14cbcSMatt Macy 		return (rv);
282eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_tag_len;
283eda14cbcSMatt Macy 
284eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
285eda14cbcSMatt Macy }
286eda14cbcSMatt Macy 
287eda14cbcSMatt Macy /*
288eda14cbcSMatt Macy  * This will only deal with decrypting the last block of the input that
289eda14cbcSMatt Macy  * might not be a multiple of block length.
290eda14cbcSMatt Macy  */
291eda14cbcSMatt Macy static void
gcm_decrypt_incomplete_block(gcm_ctx_t * ctx,size_t block_size,size_t index,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))292eda14cbcSMatt Macy gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
293eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
294eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
295eda14cbcSMatt Macy {
296eda14cbcSMatt Macy 	uint8_t *datap, *outp, *counterp;
297eda14cbcSMatt Macy 	uint64_t counter;
298eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
299eda14cbcSMatt Macy 	int i;
300eda14cbcSMatt Macy 
301eda14cbcSMatt Macy 	/*
302eda14cbcSMatt Macy 	 * Increment counter.
303eda14cbcSMatt Macy 	 * Counter bits are confined to the bottom 32 bits
304eda14cbcSMatt Macy 	 */
305eda14cbcSMatt Macy 	counter = ntohll(ctx->gcm_cb[1] & counter_mask);
306eda14cbcSMatt Macy 	counter = htonll(counter + 1);
307eda14cbcSMatt Macy 	counter &= counter_mask;
308eda14cbcSMatt Macy 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
309eda14cbcSMatt Macy 
310eda14cbcSMatt Macy 	datap = (uint8_t *)ctx->gcm_remainder;
311eda14cbcSMatt Macy 	outp = &((ctx->gcm_pt_buf)[index]);
312eda14cbcSMatt Macy 	counterp = (uint8_t *)ctx->gcm_tmp;
313eda14cbcSMatt Macy 
314eda14cbcSMatt Macy 	/* authentication tag */
315da5137abSMartin Matuska 	memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
316da5137abSMartin Matuska 	memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
317eda14cbcSMatt Macy 
318eda14cbcSMatt Macy 	/* add ciphertext to the hash */
319eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
320eda14cbcSMatt Macy 
321eda14cbcSMatt Macy 	/* decrypt remaining ciphertext */
322eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
323eda14cbcSMatt Macy 
324eda14cbcSMatt Macy 	/* XOR with counter block */
325eda14cbcSMatt Macy 	for (i = 0; i < ctx->gcm_remainder_len; i++) {
326eda14cbcSMatt Macy 		outp[i] = datap[i] ^ counterp[i];
327eda14cbcSMatt Macy 	}
328eda14cbcSMatt Macy }
329eda14cbcSMatt Macy 
330eda14cbcSMatt Macy int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))331eda14cbcSMatt Macy gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
332eda14cbcSMatt Macy     crypto_data_t *out, size_t block_size,
333eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
334eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
335eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
336eda14cbcSMatt Macy {
337e92ffd9bSMartin Matuska 	(void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
338e92ffd9bSMartin Matuska 	    (void) xor_block;
339eda14cbcSMatt Macy 	size_t new_len;
340eda14cbcSMatt Macy 	uint8_t *new;
341eda14cbcSMatt Macy 
342eda14cbcSMatt Macy 	/*
343eda14cbcSMatt Macy 	 * Copy contiguous ciphertext input blocks to plaintext buffer.
344eda14cbcSMatt Macy 	 * Ciphertext will be decrypted in the final.
345eda14cbcSMatt Macy 	 */
346eda14cbcSMatt Macy 	if (length > 0) {
347eda14cbcSMatt Macy 		new_len = ctx->gcm_pt_buf_len + length;
348c03c5b1cSMartin Matuska 		new = vmem_alloc(new_len, KM_SLEEP);
349eda14cbcSMatt Macy 		if (new == NULL) {
350eda14cbcSMatt Macy 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
351eda14cbcSMatt Macy 			ctx->gcm_pt_buf = NULL;
352eda14cbcSMatt Macy 			return (CRYPTO_HOST_MEMORY);
353eda14cbcSMatt Macy 		}
354c03c5b1cSMartin Matuska 
355c03c5b1cSMartin Matuska 		if (ctx->gcm_pt_buf != NULL) {
356da5137abSMartin Matuska 			memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
357eda14cbcSMatt Macy 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
358c03c5b1cSMartin Matuska 		} else {
359c03c5b1cSMartin Matuska 			ASSERT0(ctx->gcm_pt_buf_len);
360c03c5b1cSMartin Matuska 		}
361c03c5b1cSMartin Matuska 
362eda14cbcSMatt Macy 		ctx->gcm_pt_buf = new;
363eda14cbcSMatt Macy 		ctx->gcm_pt_buf_len = new_len;
364da5137abSMartin Matuska 		memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
365eda14cbcSMatt Macy 		    length);
366eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += length;
367eda14cbcSMatt Macy 	}
368eda14cbcSMatt Macy 
369eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
370eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
371eda14cbcSMatt Macy }
372eda14cbcSMatt Macy 
373eda14cbcSMatt Macy int
gcm_decrypt_final(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))374eda14cbcSMatt Macy gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
375eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
376eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
377eda14cbcSMatt Macy {
378eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
379*53a2e263SMartin Matuska 	if (ctx->impl != GCM_IMPL_GENERIC)
380eda14cbcSMatt Macy 		return (gcm_decrypt_final_avx(ctx, out, block_size));
381eda14cbcSMatt Macy #endif
382eda14cbcSMatt Macy 
383eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
384eda14cbcSMatt Macy 	size_t pt_len;
385eda14cbcSMatt Macy 	size_t remainder;
386eda14cbcSMatt Macy 	uint8_t *ghash;
387eda14cbcSMatt Macy 	uint8_t *blockp;
388eda14cbcSMatt Macy 	uint8_t *cbp;
389eda14cbcSMatt Macy 	uint64_t counter;
390eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
391eda14cbcSMatt Macy 	int processed = 0, rv;
392eda14cbcSMatt Macy 
393eda14cbcSMatt Macy 	ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
394eda14cbcSMatt Macy 
395eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
396eda14cbcSMatt Macy 	pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
397eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
398eda14cbcSMatt Macy 	blockp = ctx->gcm_pt_buf;
399eda14cbcSMatt Macy 	remainder = pt_len;
400eda14cbcSMatt Macy 	while (remainder > 0) {
401eda14cbcSMatt Macy 		/* Incomplete last block */
402eda14cbcSMatt Macy 		if (remainder < block_size) {
403da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, blockp, remainder);
404eda14cbcSMatt Macy 			ctx->gcm_remainder_len = remainder;
405eda14cbcSMatt Macy 			/*
406eda14cbcSMatt Macy 			 * not expecting anymore ciphertext, just
407eda14cbcSMatt Macy 			 * compute plaintext for the remaining input
408eda14cbcSMatt Macy 			 */
409eda14cbcSMatt Macy 			gcm_decrypt_incomplete_block(ctx, block_size,
410eda14cbcSMatt Macy 			    processed, encrypt_block, xor_block);
411eda14cbcSMatt Macy 			ctx->gcm_remainder_len = 0;
412eda14cbcSMatt Macy 			goto out;
413eda14cbcSMatt Macy 		}
414eda14cbcSMatt Macy 		/* add ciphertext to the hash */
415eda14cbcSMatt Macy 		GHASH(ctx, blockp, ghash, gops);
416eda14cbcSMatt Macy 
417eda14cbcSMatt Macy 		/*
418eda14cbcSMatt Macy 		 * Increment counter.
419eda14cbcSMatt Macy 		 * Counter bits are confined to the bottom 32 bits
420eda14cbcSMatt Macy 		 */
421eda14cbcSMatt Macy 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
422eda14cbcSMatt Macy 		counter = htonll(counter + 1);
423eda14cbcSMatt Macy 		counter &= counter_mask;
424eda14cbcSMatt Macy 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
425eda14cbcSMatt Macy 
426eda14cbcSMatt Macy 		cbp = (uint8_t *)ctx->gcm_tmp;
427eda14cbcSMatt Macy 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
428eda14cbcSMatt Macy 
429eda14cbcSMatt Macy 		/* XOR with ciphertext */
430eda14cbcSMatt Macy 		xor_block(cbp, blockp);
431eda14cbcSMatt Macy 
432eda14cbcSMatt Macy 		processed += block_size;
433eda14cbcSMatt Macy 		blockp += block_size;
434eda14cbcSMatt Macy 		remainder -= block_size;
435eda14cbcSMatt Macy 	}
436eda14cbcSMatt Macy out:
437eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
438eda14cbcSMatt Macy 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
439eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
440eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_J0);
441eda14cbcSMatt Macy 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
442eda14cbcSMatt Macy 
443eda14cbcSMatt Macy 	/* compare the input authentication tag with what we calculated */
444da5137abSMartin Matuska 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
445eda14cbcSMatt Macy 		/* They don't match */
446eda14cbcSMatt Macy 		return (CRYPTO_INVALID_MAC);
447eda14cbcSMatt Macy 	} else {
448eda14cbcSMatt Macy 		rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
449eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
450eda14cbcSMatt Macy 			return (rv);
451eda14cbcSMatt Macy 		out->cd_offset += pt_len;
452eda14cbcSMatt Macy 	}
453eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
454eda14cbcSMatt Macy }
455eda14cbcSMatt Macy 
456eda14cbcSMatt Macy static int
gcm_validate_args(CK_AES_GCM_PARAMS * gcm_param)457eda14cbcSMatt Macy gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
458eda14cbcSMatt Macy {
459eda14cbcSMatt Macy 	size_t tag_len;
460eda14cbcSMatt Macy 
461eda14cbcSMatt Macy 	/*
462eda14cbcSMatt Macy 	 * Check the length of the authentication tag (in bits).
463eda14cbcSMatt Macy 	 */
464eda14cbcSMatt Macy 	tag_len = gcm_param->ulTagBits;
465eda14cbcSMatt Macy 	switch (tag_len) {
466eda14cbcSMatt Macy 	case 32:
467eda14cbcSMatt Macy 	case 64:
468eda14cbcSMatt Macy 	case 96:
469eda14cbcSMatt Macy 	case 104:
470eda14cbcSMatt Macy 	case 112:
471eda14cbcSMatt Macy 	case 120:
472eda14cbcSMatt Macy 	case 128:
473eda14cbcSMatt Macy 		break;
474eda14cbcSMatt Macy 	default:
475eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
476eda14cbcSMatt Macy 	}
477eda14cbcSMatt Macy 
478eda14cbcSMatt Macy 	if (gcm_param->ulIvLen == 0)
479eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
480eda14cbcSMatt Macy 
481eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
482eda14cbcSMatt Macy }
483eda14cbcSMatt Macy 
484eda14cbcSMatt Macy static void
gcm_format_initial_blocks(const uint8_t * iv,ulong_t iv_len,gcm_ctx_t * ctx,size_t block_size,void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))4852a58b312SMartin Matuska gcm_format_initial_blocks(const uint8_t *iv, ulong_t iv_len,
486eda14cbcSMatt Macy     gcm_ctx_t *ctx, size_t block_size,
487eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
488eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
489eda14cbcSMatt Macy {
490eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
491eda14cbcSMatt Macy 	uint8_t *cb;
492eda14cbcSMatt Macy 	ulong_t remainder = iv_len;
493eda14cbcSMatt Macy 	ulong_t processed = 0;
494eda14cbcSMatt Macy 	uint8_t *datap, *ghash;
495eda14cbcSMatt Macy 	uint64_t len_a_len_c[2];
496eda14cbcSMatt Macy 
497eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
498eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
499eda14cbcSMatt Macy 	cb = (uint8_t *)ctx->gcm_cb;
500eda14cbcSMatt Macy 	if (iv_len == 12) {
501da5137abSMartin Matuska 		memcpy(cb, iv, 12);
502eda14cbcSMatt Macy 		cb[12] = 0;
503eda14cbcSMatt Macy 		cb[13] = 0;
504eda14cbcSMatt Macy 		cb[14] = 0;
505eda14cbcSMatt Macy 		cb[15] = 1;
506eda14cbcSMatt Macy 		/* J0 will be used again in the final */
507eda14cbcSMatt Macy 		copy_block(cb, (uint8_t *)ctx->gcm_J0);
508eda14cbcSMatt Macy 	} else {
509eda14cbcSMatt Macy 		/* GHASH the IV */
510eda14cbcSMatt Macy 		do {
511eda14cbcSMatt Macy 			if (remainder < block_size) {
512da5137abSMartin Matuska 				memset(cb, 0, block_size);
513da5137abSMartin Matuska 				memcpy(cb, &(iv[processed]), remainder);
514eda14cbcSMatt Macy 				datap = (uint8_t *)cb;
515eda14cbcSMatt Macy 				remainder = 0;
516eda14cbcSMatt Macy 			} else {
517eda14cbcSMatt Macy 				datap = (uint8_t *)(&(iv[processed]));
518eda14cbcSMatt Macy 				processed += block_size;
519eda14cbcSMatt Macy 				remainder -= block_size;
520eda14cbcSMatt Macy 			}
521eda14cbcSMatt Macy 			GHASH(ctx, datap, ghash, gops);
522eda14cbcSMatt Macy 		} while (remainder > 0);
523eda14cbcSMatt Macy 
524eda14cbcSMatt Macy 		len_a_len_c[0] = 0;
525eda14cbcSMatt Macy 		len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
526eda14cbcSMatt Macy 		GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
527eda14cbcSMatt Macy 
528eda14cbcSMatt Macy 		/* J0 will be used again in the final */
529eda14cbcSMatt Macy 		copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
530eda14cbcSMatt Macy 	}
531eda14cbcSMatt Macy }
532eda14cbcSMatt Macy 
533eda14cbcSMatt Macy static int
gcm_init(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))5342a58b312SMartin Matuska gcm_init(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
5352a58b312SMartin Matuska     const uint8_t *auth_data, size_t auth_data_len, size_t block_size,
536eda14cbcSMatt Macy     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
537eda14cbcSMatt Macy     void (*copy_block)(uint8_t *, uint8_t *),
538eda14cbcSMatt Macy     void (*xor_block)(uint8_t *, uint8_t *))
539eda14cbcSMatt Macy {
540eda14cbcSMatt Macy 	const gcm_impl_ops_t *gops;
541eda14cbcSMatt Macy 	uint8_t *ghash, *datap, *authp;
542eda14cbcSMatt Macy 	size_t remainder, processed;
543eda14cbcSMatt Macy 
544eda14cbcSMatt Macy 	/* encrypt zero block to get subkey H */
545da5137abSMartin Matuska 	memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
546eda14cbcSMatt Macy 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
547eda14cbcSMatt Macy 	    (uint8_t *)ctx->gcm_H);
548eda14cbcSMatt Macy 
549eda14cbcSMatt Macy 	gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
550eda14cbcSMatt Macy 	    copy_block, xor_block);
551eda14cbcSMatt Macy 
552eda14cbcSMatt Macy 	gops = gcm_impl_get_ops();
553eda14cbcSMatt Macy 	authp = (uint8_t *)ctx->gcm_tmp;
554eda14cbcSMatt Macy 	ghash = (uint8_t *)ctx->gcm_ghash;
555da5137abSMartin Matuska 	memset(authp, 0, block_size);
556da5137abSMartin Matuska 	memset(ghash, 0, block_size);
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	processed = 0;
559eda14cbcSMatt Macy 	remainder = auth_data_len;
560eda14cbcSMatt Macy 	do {
561eda14cbcSMatt Macy 		if (remainder < block_size) {
562eda14cbcSMatt Macy 			/*
563eda14cbcSMatt Macy 			 * There's not a block full of data, pad rest of
564eda14cbcSMatt Macy 			 * buffer with zero
565eda14cbcSMatt Macy 			 */
566c03c5b1cSMartin Matuska 
567c03c5b1cSMartin Matuska 			if (auth_data != NULL) {
568da5137abSMartin Matuska 				memset(authp, 0, block_size);
569da5137abSMartin Matuska 				memcpy(authp, &(auth_data[processed]),
570da5137abSMartin Matuska 				    remainder);
571c03c5b1cSMartin Matuska 			} else {
572c03c5b1cSMartin Matuska 				ASSERT0(remainder);
573c03c5b1cSMartin Matuska 			}
574c03c5b1cSMartin Matuska 
575eda14cbcSMatt Macy 			datap = (uint8_t *)authp;
576eda14cbcSMatt Macy 			remainder = 0;
577eda14cbcSMatt Macy 		} else {
578eda14cbcSMatt Macy 			datap = (uint8_t *)(&(auth_data[processed]));
579eda14cbcSMatt Macy 			processed += block_size;
580eda14cbcSMatt Macy 			remainder -= block_size;
581eda14cbcSMatt Macy 		}
582eda14cbcSMatt Macy 
583eda14cbcSMatt Macy 		/* add auth data to the hash */
584eda14cbcSMatt Macy 		GHASH(ctx, datap, ghash, gops);
585eda14cbcSMatt Macy 
586eda14cbcSMatt Macy 	} while (remainder > 0);
587eda14cbcSMatt Macy 
588eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
589eda14cbcSMatt Macy }
590eda14cbcSMatt Macy 
591eda14cbcSMatt Macy /*
5922a58b312SMartin Matuska  * Init the GCM context struct. Handle the cycle and avx implementations here.
5932a58b312SMartin Matuska  */
59475e1fea6SMartin Matuska int
gcm_init_ctx(gcm_ctx_t * gcm_ctx,char * param,size_t block_size,int (* encrypt_block)(const void *,const uint8_t *,uint8_t *),void (* copy_block)(uint8_t *,uint8_t *),void (* xor_block)(uint8_t *,uint8_t *))59575e1fea6SMartin Matuska gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param,
5962a58b312SMartin Matuska     size_t block_size, int (*encrypt_block)(const void *, const uint8_t *,
5972a58b312SMartin Matuska     uint8_t *), void (*copy_block)(uint8_t *, uint8_t *),
5982a58b312SMartin Matuska     void (*xor_block)(uint8_t *, uint8_t *))
5992a58b312SMartin Matuska {
600eda14cbcSMatt Macy 	CK_AES_GCM_PARAMS *gcm_param;
6012a58b312SMartin Matuska 	int rv = CRYPTO_SUCCESS;
6022a58b312SMartin Matuska 	size_t tag_len, iv_len;
603eda14cbcSMatt Macy 
604eda14cbcSMatt Macy 	if (param != NULL) {
605eda14cbcSMatt Macy 		gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
606eda14cbcSMatt Macy 
6072a58b312SMartin Matuska 		/* GCM mode. */
608eda14cbcSMatt Macy 		if ((rv = gcm_validate_args(gcm_param)) != 0) {
609eda14cbcSMatt Macy 			return (rv);
610eda14cbcSMatt Macy 		}
6112a58b312SMartin Matuska 		gcm_ctx->gcm_flags |= GCM_MODE;
612eda14cbcSMatt Macy 
6132a58b312SMartin Matuska 		size_t tbits = gcm_param->ulTagBits;
6142a58b312SMartin Matuska 		tag_len = CRYPTO_BITS2BYTES(tbits);
6152a58b312SMartin Matuska 		iv_len = gcm_param->ulIvLen;
61675e1fea6SMartin Matuska 
6172a58b312SMartin Matuska 		gcm_ctx->gcm_tag_len = tag_len;
618eda14cbcSMatt Macy 		gcm_ctx->gcm_processed_data_len = 0;
619eda14cbcSMatt Macy 
620eda14cbcSMatt Macy 		/* these values are in bits */
621eda14cbcSMatt Macy 		gcm_ctx->gcm_len_a_len_c[0]
622eda14cbcSMatt Macy 		    = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
623eda14cbcSMatt Macy 	} else {
624eda14cbcSMatt Macy 		return (CRYPTO_MECHANISM_PARAM_INVALID);
625eda14cbcSMatt Macy 	}
626eda14cbcSMatt Macy 
6272a58b312SMartin Matuska 	const uint8_t *iv = (const uint8_t *)gcm_param->pIv;
6282a58b312SMartin Matuska 	const uint8_t *aad = (const uint8_t *)gcm_param->pAAD;
6292a58b312SMartin Matuska 	size_t aad_len = gcm_param->ulAADLen;
6302a58b312SMartin Matuska 
631eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
6322a58b312SMartin Matuska 	boolean_t needs_bswap =
6332a58b312SMartin Matuska 	    ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap;
6342a58b312SMartin Matuska 
635eda14cbcSMatt Macy 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
636*53a2e263SMartin Matuska 		gcm_ctx->impl = GCM_IMPL_USED;
637eda14cbcSMatt Macy 	} else {
638eda14cbcSMatt Macy 		/*
639*53a2e263SMartin Matuska 		 * Handle the "cycle" implementation by creating different
640*53a2e263SMartin Matuska 		 * contexts, one per implementation.
641eda14cbcSMatt Macy 		 */
642*53a2e263SMartin Matuska 		gcm_ctx->impl = gcm_toggle_impl();
6432a58b312SMartin Matuska 
644*53a2e263SMartin Matuska 		/* The AVX impl. doesn't handle byte swapped key schedules. */
645*53a2e263SMartin Matuska 		if (needs_bswap == B_TRUE) {
646*53a2e263SMartin Matuska 			gcm_ctx->impl = GCM_IMPL_GENERIC;
647eda14cbcSMatt Macy 		}
6482a58b312SMartin Matuska 		/*
649*53a2e263SMartin Matuska 		 * If this is an AVX context, use the MOVBE and the BSWAP
65075e1fea6SMartin Matuska 		 * variants alternately.
6512a58b312SMartin Matuska 		 */
652*53a2e263SMartin Matuska 		if (gcm_ctx->impl == GCM_IMPL_AVX &&
653eda14cbcSMatt Macy 		    zfs_movbe_available() == B_TRUE) {
654eda14cbcSMatt Macy 			(void) atomic_toggle_boolean_nv(
655eda14cbcSMatt Macy 			    (volatile boolean_t *)&gcm_avx_can_use_movbe);
656eda14cbcSMatt Macy 		}
657eda14cbcSMatt Macy 	}
658eda14cbcSMatt Macy 	/*
6592a58b312SMartin Matuska 	 * We don't handle byte swapped key schedules in the avx code path,
6602a58b312SMartin Matuska 	 * still they could be created by the aes generic implementation.
6612a58b312SMartin Matuska 	 * Make sure not to use them since we'll corrupt data if we do.
662eda14cbcSMatt Macy 	 */
663*53a2e263SMartin Matuska 	if (gcm_ctx->impl != GCM_IMPL_GENERIC && needs_bswap == B_TRUE) {
664*53a2e263SMartin Matuska 		gcm_ctx->impl = GCM_IMPL_GENERIC;
6652a58b312SMartin Matuska 
6662a58b312SMartin Matuska 		cmn_err_once(CE_WARN,
6672a58b312SMartin Matuska 		    "ICP: Can't use the aes generic or cycle implementations "
668*53a2e263SMartin Matuska 		    "in combination with the gcm avx or avx2-vaes "
669*53a2e263SMartin Matuska 		    "implementation!");
6702a58b312SMartin Matuska 		cmn_err_once(CE_WARN,
6712a58b312SMartin Matuska 		    "ICP: Falling back to a compatible implementation, "
6722a58b312SMartin Matuska 		    "aes-gcm performance will likely be degraded.");
6732a58b312SMartin Matuska 		cmn_err_once(CE_WARN,
6742a58b312SMartin Matuska 		    "ICP: Choose at least the x86_64 aes implementation to "
6752a58b312SMartin Matuska 		    "restore performance.");
676eda14cbcSMatt Macy 	}
6772a58b312SMartin Matuska 
678*53a2e263SMartin Matuska 	/*
679*53a2e263SMartin Matuska 	 * AVX implementations use Htable with sizes depending on
680*53a2e263SMartin Matuska 	 * implementation.
681*53a2e263SMartin Matuska 	 */
682*53a2e263SMartin Matuska 	if (gcm_ctx->impl != GCM_IMPL_GENERIC) {
683*53a2e263SMartin Matuska 		rv = gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len,
684*53a2e263SMartin Matuska 		    block_size);
6857877fdebSMatt Macy 	}
686*53a2e263SMartin Matuska 	else
687eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
6882a58b312SMartin Matuska 	if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size,
6892a58b312SMartin Matuska 	    encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) {
690eda14cbcSMatt Macy 		rv = CRYPTO_MECHANISM_PARAM_INVALID;
691eda14cbcSMatt Macy 	}
692eda14cbcSMatt Macy 
693eda14cbcSMatt Macy 	return (rv);
694eda14cbcSMatt Macy }
695eda14cbcSMatt Macy 
696eda14cbcSMatt Macy void *
gcm_alloc_ctx(int kmflag)697eda14cbcSMatt Macy gcm_alloc_ctx(int kmflag)
698eda14cbcSMatt Macy {
699eda14cbcSMatt Macy 	gcm_ctx_t *gcm_ctx;
700eda14cbcSMatt Macy 
701eda14cbcSMatt Macy 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
702eda14cbcSMatt Macy 		return (NULL);
703eda14cbcSMatt Macy 
704eda14cbcSMatt Macy 	gcm_ctx->gcm_flags = GCM_MODE;
705eda14cbcSMatt Macy 	return (gcm_ctx);
706eda14cbcSMatt Macy }
707eda14cbcSMatt Macy 
708eda14cbcSMatt Macy /* GCM implementation that contains the fastest methods */
709eda14cbcSMatt Macy static gcm_impl_ops_t gcm_fastest_impl = {
710eda14cbcSMatt Macy 	.name = "fastest"
711eda14cbcSMatt Macy };
712eda14cbcSMatt Macy 
713eda14cbcSMatt Macy /* All compiled in implementations */
714e92ffd9bSMartin Matuska static const gcm_impl_ops_t *gcm_all_impl[] = {
715eda14cbcSMatt Macy 	&gcm_generic_impl,
716eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
717eda14cbcSMatt Macy 	&gcm_pclmulqdq_impl,
718eda14cbcSMatt Macy #endif
719eda14cbcSMatt Macy };
720eda14cbcSMatt Macy 
721eda14cbcSMatt Macy /* Indicate that benchmark has been completed */
722eda14cbcSMatt Macy static boolean_t gcm_impl_initialized = B_FALSE;
723eda14cbcSMatt Macy 
724eda14cbcSMatt Macy /* Hold all supported implementations */
725eda14cbcSMatt Macy static size_t gcm_supp_impl_cnt = 0;
726eda14cbcSMatt Macy static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
727eda14cbcSMatt Macy 
728eda14cbcSMatt Macy /*
729eda14cbcSMatt Macy  * Returns the GCM operations for encrypt/decrypt/key setup.  When a
730eda14cbcSMatt Macy  * SIMD implementation is not allowed in the current context, then
731eda14cbcSMatt Macy  * fallback to the fastest generic implementation.
732eda14cbcSMatt Macy  */
733eda14cbcSMatt Macy const gcm_impl_ops_t *
gcm_impl_get_ops(void)734716fd348SMartin Matuska gcm_impl_get_ops(void)
735eda14cbcSMatt Macy {
736eda14cbcSMatt Macy 	if (!kfpu_allowed())
737eda14cbcSMatt Macy 		return (&gcm_generic_impl);
738eda14cbcSMatt Macy 
739eda14cbcSMatt Macy 	const gcm_impl_ops_t *ops = NULL;
740eda14cbcSMatt Macy 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
741eda14cbcSMatt Macy 
742eda14cbcSMatt Macy 	switch (impl) {
743eda14cbcSMatt Macy 	case IMPL_FASTEST:
744eda14cbcSMatt Macy 		ASSERT(gcm_impl_initialized);
745eda14cbcSMatt Macy 		ops = &gcm_fastest_impl;
746eda14cbcSMatt Macy 		break;
747eda14cbcSMatt Macy 	case IMPL_CYCLE:
748eda14cbcSMatt Macy 		/* Cycle through supported implementations */
749eda14cbcSMatt Macy 		ASSERT(gcm_impl_initialized);
750eda14cbcSMatt Macy 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
751eda14cbcSMatt Macy 		static size_t cycle_impl_idx = 0;
752eda14cbcSMatt Macy 		size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
753eda14cbcSMatt Macy 		ops = gcm_supp_impl[idx];
754eda14cbcSMatt Macy 		break;
755eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
756eda14cbcSMatt Macy 	case IMPL_AVX:
757*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
758*53a2e263SMartin Matuska 	case IMPL_AVX2:
759*53a2e263SMartin Matuska #endif
760eda14cbcSMatt Macy 		/*
761eda14cbcSMatt Macy 		 * Make sure that we return a valid implementation while
762eda14cbcSMatt Macy 		 * switching to the avx implementation since there still
763eda14cbcSMatt Macy 		 * may be unfinished non-avx contexts around.
764eda14cbcSMatt Macy 		 */
765eda14cbcSMatt Macy 		ops = &gcm_generic_impl;
766eda14cbcSMatt Macy 		break;
767eda14cbcSMatt Macy #endif
768eda14cbcSMatt Macy 	default:
769eda14cbcSMatt Macy 		ASSERT3U(impl, <, gcm_supp_impl_cnt);
770eda14cbcSMatt Macy 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
771eda14cbcSMatt Macy 		if (impl < ARRAY_SIZE(gcm_all_impl))
772eda14cbcSMatt Macy 			ops = gcm_supp_impl[impl];
773eda14cbcSMatt Macy 		break;
774eda14cbcSMatt Macy 	}
775eda14cbcSMatt Macy 
776eda14cbcSMatt Macy 	ASSERT3P(ops, !=, NULL);
777eda14cbcSMatt Macy 
778eda14cbcSMatt Macy 	return (ops);
779eda14cbcSMatt Macy }
780eda14cbcSMatt Macy 
781eda14cbcSMatt Macy /*
782eda14cbcSMatt Macy  * Initialize all supported implementations.
783eda14cbcSMatt Macy  */
784eda14cbcSMatt Macy void
gcm_impl_init(void)785eda14cbcSMatt Macy gcm_impl_init(void)
786eda14cbcSMatt Macy {
787eda14cbcSMatt Macy 	gcm_impl_ops_t *curr_impl;
788eda14cbcSMatt Macy 	int i, c;
789eda14cbcSMatt Macy 
790eda14cbcSMatt Macy 	/* Move supported implementations into gcm_supp_impls */
791eda14cbcSMatt Macy 	for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
792eda14cbcSMatt Macy 		curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
793eda14cbcSMatt Macy 
794eda14cbcSMatt Macy 		if (curr_impl->is_supported())
795eda14cbcSMatt Macy 			gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
796eda14cbcSMatt Macy 	}
797eda14cbcSMatt Macy 	gcm_supp_impl_cnt = c;
798eda14cbcSMatt Macy 
799eda14cbcSMatt Macy 	/*
800eda14cbcSMatt Macy 	 * Set the fastest implementation given the assumption that the
801eda14cbcSMatt Macy 	 * hardware accelerated version is the fastest.
802eda14cbcSMatt Macy 	 */
803eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
804eda14cbcSMatt Macy 	if (gcm_pclmulqdq_impl.is_supported()) {
805eda14cbcSMatt Macy 		memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
806eda14cbcSMatt Macy 		    sizeof (gcm_fastest_impl));
807eda14cbcSMatt Macy 	} else
808eda14cbcSMatt Macy #endif
809eda14cbcSMatt Macy 	{
810eda14cbcSMatt Macy 		memcpy(&gcm_fastest_impl, &gcm_generic_impl,
811eda14cbcSMatt Macy 		    sizeof (gcm_fastest_impl));
812eda14cbcSMatt Macy 	}
813eda14cbcSMatt Macy 
814eda14cbcSMatt Macy 	strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
815eda14cbcSMatt Macy 
816eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
817eda14cbcSMatt Macy 	/*
818eda14cbcSMatt Macy 	 * Use the avx implementation if it's available and the implementation
819eda14cbcSMatt Macy 	 * hasn't changed from its default value of fastest on module load.
820eda14cbcSMatt Macy 	 */
821*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
822*53a2e263SMartin Matuska 	if (gcm_avx2_will_work()) {
823*53a2e263SMartin Matuska 		if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
824*53a2e263SMartin Matuska 			gcm_use_impl(GCM_IMPL_AVX2);
825*53a2e263SMartin Matuska 		}
826*53a2e263SMartin Matuska 	} else
827*53a2e263SMartin Matuska #endif
828eda14cbcSMatt Macy 	if (gcm_avx_will_work()) {
829eda14cbcSMatt Macy #ifdef HAVE_MOVBE
830eda14cbcSMatt Macy 		if (zfs_movbe_available() == B_TRUE) {
831eda14cbcSMatt Macy 			atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
832eda14cbcSMatt Macy 		}
833eda14cbcSMatt Macy #endif
834eda14cbcSMatt Macy 		if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
835*53a2e263SMartin Matuska 			gcm_use_impl(GCM_IMPL_AVX);
836eda14cbcSMatt Macy 		}
837eda14cbcSMatt Macy 	}
838eda14cbcSMatt Macy #endif
839eda14cbcSMatt Macy 	/* Finish initialization */
840eda14cbcSMatt Macy 	atomic_swap_32(&icp_gcm_impl, user_sel_impl);
841eda14cbcSMatt Macy 	gcm_impl_initialized = B_TRUE;
842eda14cbcSMatt Macy }
843eda14cbcSMatt Macy 
844eda14cbcSMatt Macy static const struct {
845a0b956f5SMartin Matuska 	const char *name;
846eda14cbcSMatt Macy 	uint32_t sel;
847eda14cbcSMatt Macy } gcm_impl_opts[] = {
848eda14cbcSMatt Macy 		{ "cycle",	IMPL_CYCLE },
849eda14cbcSMatt Macy 		{ "fastest",	IMPL_FASTEST },
850eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
851eda14cbcSMatt Macy 		{ "avx",	IMPL_AVX },
852*53a2e263SMartin Matuska 		{ "avx2-vaes",	IMPL_AVX2 },
853eda14cbcSMatt Macy #endif
854eda14cbcSMatt Macy };
855eda14cbcSMatt Macy 
856eda14cbcSMatt Macy /*
857eda14cbcSMatt Macy  * Function sets desired gcm implementation.
858eda14cbcSMatt Macy  *
859eda14cbcSMatt Macy  * If we are called before init(), user preference will be saved in
860eda14cbcSMatt Macy  * user_sel_impl, and applied in later init() call. This occurs when module
861eda14cbcSMatt Macy  * parameter is specified on module load. Otherwise, directly update
862eda14cbcSMatt Macy  * icp_gcm_impl.
863eda14cbcSMatt Macy  *
864eda14cbcSMatt Macy  * @val		Name of gcm implementation to use
865eda14cbcSMatt Macy  * @param	Unused.
866eda14cbcSMatt Macy  */
867eda14cbcSMatt Macy int
gcm_impl_set(const char * val)868eda14cbcSMatt Macy gcm_impl_set(const char *val)
869eda14cbcSMatt Macy {
870eda14cbcSMatt Macy 	int err = -EINVAL;
871eda14cbcSMatt Macy 	char req_name[GCM_IMPL_NAME_MAX];
872eda14cbcSMatt Macy 	uint32_t impl = GCM_IMPL_READ(user_sel_impl);
873eda14cbcSMatt Macy 	size_t i;
874eda14cbcSMatt Macy 
875eda14cbcSMatt Macy 	/* sanitize input */
876eda14cbcSMatt Macy 	i = strnlen(val, GCM_IMPL_NAME_MAX);
877eda14cbcSMatt Macy 	if (i == 0 || i >= GCM_IMPL_NAME_MAX)
878eda14cbcSMatt Macy 		return (err);
879eda14cbcSMatt Macy 
880eda14cbcSMatt Macy 	strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
881eda14cbcSMatt Macy 	while (i > 0 && isspace(req_name[i-1]))
882eda14cbcSMatt Macy 		i--;
883eda14cbcSMatt Macy 	req_name[i] = '\0';
884eda14cbcSMatt Macy 
885eda14cbcSMatt Macy 	/* Check mandatory options */
886eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
887eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
888*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
889eda14cbcSMatt Macy 		/* Ignore avx implementation if it won't work. */
890*53a2e263SMartin Matuska 		if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
891*53a2e263SMartin Matuska 		    !gcm_avx2_will_work()) {
892*53a2e263SMartin Matuska 			continue;
893*53a2e263SMartin Matuska 		}
894*53a2e263SMartin Matuska #endif
895eda14cbcSMatt Macy 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
896eda14cbcSMatt Macy 			continue;
897eda14cbcSMatt Macy 		}
898eda14cbcSMatt Macy #endif
899eda14cbcSMatt Macy 		if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
900eda14cbcSMatt Macy 			impl = gcm_impl_opts[i].sel;
901eda14cbcSMatt Macy 			err = 0;
902eda14cbcSMatt Macy 			break;
903eda14cbcSMatt Macy 		}
904eda14cbcSMatt Macy 	}
905eda14cbcSMatt Macy 
906eda14cbcSMatt Macy 	/* check all supported impl if init() was already called */
907eda14cbcSMatt Macy 	if (err != 0 && gcm_impl_initialized) {
908eda14cbcSMatt Macy 		/* check all supported implementations */
909eda14cbcSMatt Macy 		for (i = 0; i < gcm_supp_impl_cnt; i++) {
910eda14cbcSMatt Macy 			if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
911eda14cbcSMatt Macy 				impl = i;
912eda14cbcSMatt Macy 				err = 0;
913eda14cbcSMatt Macy 				break;
914eda14cbcSMatt Macy 			}
915eda14cbcSMatt Macy 		}
916eda14cbcSMatt Macy 	}
917eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
918eda14cbcSMatt Macy 	/*
919eda14cbcSMatt Macy 	 * Use the avx implementation if available and the requested one is
920eda14cbcSMatt Macy 	 * avx or fastest.
921eda14cbcSMatt Macy 	 */
922*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
923*53a2e263SMartin Matuska 	if (gcm_avx2_will_work() == B_TRUE &&
924*53a2e263SMartin Matuska 	    (impl == IMPL_AVX2 || impl == IMPL_FASTEST)) {
925*53a2e263SMartin Matuska 		gcm_use_impl(GCM_IMPL_AVX2);
926*53a2e263SMartin Matuska 	} else
927*53a2e263SMartin Matuska #endif
928eda14cbcSMatt Macy 	if (gcm_avx_will_work() == B_TRUE &&
929eda14cbcSMatt Macy 	    (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
930*53a2e263SMartin Matuska 		gcm_use_impl(GCM_IMPL_AVX);
931eda14cbcSMatt Macy 	} else {
932*53a2e263SMartin Matuska 		gcm_use_impl(GCM_IMPL_GENERIC);
933eda14cbcSMatt Macy 	}
934eda14cbcSMatt Macy #endif
935eda14cbcSMatt Macy 
936eda14cbcSMatt Macy 	if (err == 0) {
937eda14cbcSMatt Macy 		if (gcm_impl_initialized)
938eda14cbcSMatt Macy 			atomic_swap_32(&icp_gcm_impl, impl);
939eda14cbcSMatt Macy 		else
940eda14cbcSMatt Macy 			atomic_swap_32(&user_sel_impl, impl);
941eda14cbcSMatt Macy 	}
942eda14cbcSMatt Macy 
943eda14cbcSMatt Macy 	return (err);
944eda14cbcSMatt Macy }
945eda14cbcSMatt Macy 
946eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__)
947eda14cbcSMatt Macy 
948eda14cbcSMatt Macy static int
icp_gcm_impl_set(const char * val,zfs_kernel_param_t * kp)949eda14cbcSMatt Macy icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
950eda14cbcSMatt Macy {
951eda14cbcSMatt Macy 	return (gcm_impl_set(val));
952eda14cbcSMatt Macy }
953eda14cbcSMatt Macy 
954eda14cbcSMatt Macy static int
icp_gcm_impl_get(char * buffer,zfs_kernel_param_t * kp)955eda14cbcSMatt Macy icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
956eda14cbcSMatt Macy {
957eda14cbcSMatt Macy 	int i, cnt = 0;
958eda14cbcSMatt Macy 	char *fmt;
959eda14cbcSMatt Macy 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
960eda14cbcSMatt Macy 
961eda14cbcSMatt Macy 	/* list mandatory options */
962eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
963eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
964eda14cbcSMatt Macy 		/* Ignore avx implementation if it won't work. */
965*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
966*53a2e263SMartin Matuska 		if (gcm_impl_opts[i].sel == IMPL_AVX2 &&
967*53a2e263SMartin Matuska 		    !gcm_avx2_will_work()) {
968*53a2e263SMartin Matuska 			continue;
969*53a2e263SMartin Matuska 		}
970*53a2e263SMartin Matuska #endif
971eda14cbcSMatt Macy 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
972eda14cbcSMatt Macy 			continue;
973eda14cbcSMatt Macy 		}
974eda14cbcSMatt Macy #endif
975eda14cbcSMatt Macy 		fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
976bb2d13b6SMartin Matuska 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
977bb2d13b6SMartin Matuska 		    gcm_impl_opts[i].name);
978eda14cbcSMatt Macy 	}
979eda14cbcSMatt Macy 
980eda14cbcSMatt Macy 	/* list all supported implementations */
981eda14cbcSMatt Macy 	for (i = 0; i < gcm_supp_impl_cnt; i++) {
982eda14cbcSMatt Macy 		fmt = (i == impl) ? "[%s] " : "%s ";
983bb2d13b6SMartin Matuska 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
984bb2d13b6SMartin Matuska 		    gcm_supp_impl[i]->name);
985eda14cbcSMatt Macy 	}
986eda14cbcSMatt Macy 
987eda14cbcSMatt Macy 	return (cnt);
988eda14cbcSMatt Macy }
989eda14cbcSMatt Macy 
990eda14cbcSMatt Macy module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
991eda14cbcSMatt Macy     NULL, 0644);
992eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
993eda14cbcSMatt Macy #endif /* defined(__KERNEL) */
994eda14cbcSMatt Macy 
995eda14cbcSMatt Macy #ifdef CAN_USE_GCM_ASM
996eda14cbcSMatt Macy #define	GCM_BLOCK_LEN 16
997eda14cbcSMatt Macy /*
998eda14cbcSMatt Macy  * The openssl asm routines are 6x aggregated and need that many bytes
999eda14cbcSMatt Macy  * at minimum.
1000eda14cbcSMatt Macy  */
1001eda14cbcSMatt Macy #define	GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
1002eda14cbcSMatt Macy #define	GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
1003eda14cbcSMatt Macy /*
1004eda14cbcSMatt Macy  * Ensure the chunk size is reasonable since we are allocating a
1005eda14cbcSMatt Macy  * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
1006eda14cbcSMatt Macy  */
1007eda14cbcSMatt Macy #define	GCM_AVX_MAX_CHUNK_SIZE \
1008eda14cbcSMatt Macy 	(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
1009eda14cbcSMatt Macy 
1010eda14cbcSMatt Macy /* Clear the FPU registers since they hold sensitive internal state. */
1011eda14cbcSMatt Macy #define	clear_fpu_regs() clear_fpu_regs_avx()
1012eda14cbcSMatt Macy 
1013eda14cbcSMatt Macy #define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
1014eda14cbcSMatt Macy 
1015e92ffd9bSMartin Matuska /* Get the chunk size module parameter. */
1016e92ffd9bSMartin Matuska #define	GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
1017e92ffd9bSMartin Matuska 
1018eda14cbcSMatt Macy /*
1019eda14cbcSMatt Macy  * Module parameter: number of bytes to process at once while owning the FPU.
1020eda14cbcSMatt Macy  * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
1021eda14cbcSMatt Macy  * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
1022eda14cbcSMatt Macy  */
1023eda14cbcSMatt Macy static uint32_t gcm_avx_chunk_size =
1024eda14cbcSMatt Macy 	((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1025eda14cbcSMatt Macy 
1026*53a2e263SMartin Matuska /*
1027*53a2e263SMartin Matuska  * GCM definitions: uint128_t is copied from include/crypto/modes.h
1028*53a2e263SMartin Matuska  * Avoiding u128 because it is already defined in kernel sources.
1029*53a2e263SMartin Matuska  */
1030*53a2e263SMartin Matuska typedef struct {
1031*53a2e263SMartin Matuska     uint64_t hi, lo;
1032*53a2e263SMartin Matuska } uint128_t;
1033*53a2e263SMartin Matuska 
103415f0b8c3SMartin Matuska extern void ASMABI clear_fpu_regs_avx(void);
103515f0b8c3SMartin Matuska extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst);
103615f0b8c3SMartin Matuska extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr,
1037eda14cbcSMatt Macy     const uint32_t pt[4], uint32_t ct[4]);
1038eda14cbcSMatt Macy 
103915f0b8c3SMartin Matuska extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
1040*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1041*53a2e263SMartin Matuska extern void ASMABI gcm_init_vpclmulqdq_avx2(uint128_t Htable[16],
1042*53a2e263SMartin Matuska     const uint64_t H[2]);
1043*53a2e263SMartin Matuska #endif
104415f0b8c3SMartin Matuska extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
1045eda14cbcSMatt Macy     const uint8_t *in, size_t len);
1046*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1047*53a2e263SMartin Matuska extern void ASMABI gcm_ghash_vpclmulqdq_avx2(uint64_t ghash[2],
1048*53a2e263SMartin Matuska     const uint64_t *Htable, const uint8_t *in, size_t len);
1049*53a2e263SMartin Matuska #endif
GHASH_AVX(gcm_ctx_t * ctx,const uint8_t * in,size_t len)1050*53a2e263SMartin Matuska static inline void GHASH_AVX(gcm_ctx_t *ctx, const uint8_t *in, size_t len)
1051*53a2e263SMartin Matuska {
1052*53a2e263SMartin Matuska 	switch (ctx->impl) {
1053*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1054*53a2e263SMartin Matuska 		case GCM_IMPL_AVX2:
1055*53a2e263SMartin Matuska 			gcm_ghash_vpclmulqdq_avx2(ctx->gcm_ghash,
1056*53a2e263SMartin Matuska 			    (const uint64_t *)ctx->gcm_Htable, in, len);
1057*53a2e263SMartin Matuska 			break;
1058*53a2e263SMartin Matuska #endif
1059eda14cbcSMatt Macy 
1060*53a2e263SMartin Matuska 		case GCM_IMPL_AVX:
1061*53a2e263SMartin Matuska 			gcm_ghash_avx(ctx->gcm_ghash,
1062*53a2e263SMartin Matuska 			    (const uint64_t *)ctx->gcm_Htable, in, len);
1063*53a2e263SMartin Matuska 			break;
1064*53a2e263SMartin Matuska 
1065*53a2e263SMartin Matuska 		default:
1066*53a2e263SMartin Matuska 			VERIFY(B_FALSE);
1067*53a2e263SMartin Matuska 	}
1068*53a2e263SMartin Matuska }
1069*53a2e263SMartin Matuska 
1070*53a2e263SMartin Matuska typedef size_t ASMABI aesni_gcm_encrypt_impl(const uint8_t *, uint8_t *,
1071*53a2e263SMartin Matuska     size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
107215f0b8c3SMartin Matuska extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
1073eda14cbcSMatt Macy     const void *, uint64_t *, uint64_t *);
1074*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1075*53a2e263SMartin Matuska extern void ASMABI aes_gcm_enc_update_vaes_avx2(const uint8_t *in,
1076*53a2e263SMartin Matuska     uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1077*53a2e263SMartin Matuska     const uint128_t Htable[16], uint8_t Xi[16]);
1078*53a2e263SMartin Matuska #endif
1079eda14cbcSMatt Macy 
1080*53a2e263SMartin Matuska typedef size_t ASMABI aesni_gcm_decrypt_impl(const uint8_t *, uint8_t *,
1081*53a2e263SMartin Matuska     size_t, const void *, uint64_t *, const uint64_t *Htable, uint64_t *);
108215f0b8c3SMartin Matuska extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
1083eda14cbcSMatt Macy     const void *, uint64_t *, uint64_t *);
1084*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1085*53a2e263SMartin Matuska extern void ASMABI aes_gcm_dec_update_vaes_avx2(const uint8_t *in,
1086*53a2e263SMartin Matuska     uint8_t *out, size_t len, const void *key, const uint8_t ivec[16],
1087*53a2e263SMartin Matuska     const uint128_t Htable[16], uint8_t Xi[16]);
1088*53a2e263SMartin Matuska #endif
1089*53a2e263SMartin Matuska 
1090*53a2e263SMartin Matuska static inline boolean_t
gcm_avx2_will_work(void)1091*53a2e263SMartin Matuska gcm_avx2_will_work(void)
1092*53a2e263SMartin Matuska {
1093*53a2e263SMartin Matuska 	return (kfpu_allowed() &&
1094*53a2e263SMartin Matuska 	    zfs_avx2_available() && zfs_vaes_available() &&
1095*53a2e263SMartin Matuska 	    zfs_vpclmulqdq_available());
1096*53a2e263SMartin Matuska }
1097eda14cbcSMatt Macy 
1098eda14cbcSMatt Macy static inline boolean_t
gcm_avx_will_work(void)1099eda14cbcSMatt Macy gcm_avx_will_work(void)
1100eda14cbcSMatt Macy {
1101eda14cbcSMatt Macy 	/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
1102eda14cbcSMatt Macy 	return (kfpu_allowed() &&
1103eda14cbcSMatt Macy 	    zfs_avx_available() && zfs_aes_available() &&
1104eda14cbcSMatt Macy 	    zfs_pclmulqdq_available());
1105eda14cbcSMatt Macy }
1106eda14cbcSMatt Macy 
1107eda14cbcSMatt Macy static inline void
gcm_use_impl(gcm_impl impl)1108*53a2e263SMartin Matuska gcm_use_impl(gcm_impl impl)
1109eda14cbcSMatt Macy {
1110*53a2e263SMartin Matuska 	switch (impl) {
1111*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1112*53a2e263SMartin Matuska 		case GCM_IMPL_AVX2:
1113*53a2e263SMartin Matuska 			if (gcm_avx2_will_work() == B_TRUE) {
1114*53a2e263SMartin Matuska 				atomic_swap_32(&gcm_impl_used, impl);
1115*53a2e263SMartin Matuska 				return;
1116*53a2e263SMartin Matuska 			}
1117*53a2e263SMartin Matuska 
1118*53a2e263SMartin Matuska 			zfs_fallthrough;
1119*53a2e263SMartin Matuska #endif
1120*53a2e263SMartin Matuska 
1121*53a2e263SMartin Matuska 		case GCM_IMPL_AVX:
1122eda14cbcSMatt Macy 			if (gcm_avx_will_work() == B_TRUE) {
1123*53a2e263SMartin Matuska 				atomic_swap_32(&gcm_impl_used, impl);
1124*53a2e263SMartin Matuska 				return;
1125*53a2e263SMartin Matuska 			}
1126*53a2e263SMartin Matuska 
1127*53a2e263SMartin Matuska 			zfs_fallthrough;
1128*53a2e263SMartin Matuska 
1129*53a2e263SMartin Matuska 		default:
1130*53a2e263SMartin Matuska 			atomic_swap_32(&gcm_impl_used, GCM_IMPL_GENERIC);
1131eda14cbcSMatt Macy 	}
1132eda14cbcSMatt Macy }
1133eda14cbcSMatt Macy 
1134eda14cbcSMatt Macy static inline boolean_t
gcm_impl_will_work(gcm_impl impl)1135*53a2e263SMartin Matuska gcm_impl_will_work(gcm_impl impl)
1136eda14cbcSMatt Macy {
1137*53a2e263SMartin Matuska 	switch (impl) {
1138*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1139*53a2e263SMartin Matuska 		case GCM_IMPL_AVX2:
1140*53a2e263SMartin Matuska 			return (gcm_avx2_will_work());
1141*53a2e263SMartin Matuska #endif
1142eda14cbcSMatt Macy 
1143*53a2e263SMartin Matuska 		case GCM_IMPL_AVX:
1144*53a2e263SMartin Matuska 			return (gcm_avx_will_work());
11457877fdebSMatt Macy 
11467877fdebSMatt Macy 		default:
1147*53a2e263SMartin Matuska 			return (B_TRUE);
11487877fdebSMatt Macy 	}
11497877fdebSMatt Macy }
11507877fdebSMatt Macy 
1151*53a2e263SMartin Matuska static inline gcm_impl
gcm_toggle_impl(void)1152*53a2e263SMartin Matuska gcm_toggle_impl(void)
1153*53a2e263SMartin Matuska {
1154*53a2e263SMartin Matuska 	gcm_impl current_impl, new_impl;
1155*53a2e263SMartin Matuska 	do { /* handle races */
1156*53a2e263SMartin Matuska 		current_impl = atomic_load_32(&gcm_impl_used);
1157*53a2e263SMartin Matuska 		new_impl = current_impl;
1158*53a2e263SMartin Matuska 		while (B_TRUE) { /* handle incompatble implementations */
1159*53a2e263SMartin Matuska 			new_impl = (new_impl + 1) % GCM_IMPL_MAX;
1160*53a2e263SMartin Matuska 			if (gcm_impl_will_work(new_impl)) {
1161*53a2e263SMartin Matuska 				break;
1162*53a2e263SMartin Matuska 			}
1163*53a2e263SMartin Matuska 		}
1164*53a2e263SMartin Matuska 
1165*53a2e263SMartin Matuska 	} while (atomic_cas_32(&gcm_impl_used, current_impl, new_impl) !=
1166*53a2e263SMartin Matuska 	    current_impl);
1167*53a2e263SMartin Matuska 
1168*53a2e263SMartin Matuska 	return (new_impl);
1169*53a2e263SMartin Matuska }
1170*53a2e263SMartin Matuska 
1171eda14cbcSMatt Macy 
1172eda14cbcSMatt Macy /* Increment the GCM counter block by n. */
1173eda14cbcSMatt Macy static inline void
gcm_incr_counter_block_by(gcm_ctx_t * ctx,int n)1174eda14cbcSMatt Macy gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
1175eda14cbcSMatt Macy {
1176eda14cbcSMatt Macy 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
1177eda14cbcSMatt Macy 	uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
1178eda14cbcSMatt Macy 
1179eda14cbcSMatt Macy 	counter = htonll(counter + n);
1180eda14cbcSMatt Macy 	counter &= counter_mask;
1181eda14cbcSMatt Macy 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
1182eda14cbcSMatt Macy }
1183eda14cbcSMatt Macy 
aesni_gcm_encrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1184*53a2e263SMartin Matuska static size_t aesni_gcm_encrypt_avx(const uint8_t *in, uint8_t *out,
1185*53a2e263SMartin Matuska     size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1186*53a2e263SMartin Matuska     uint64_t *Xip)
1187*53a2e263SMartin Matuska {
1188*53a2e263SMartin Matuska 	(void) Htable;
1189*53a2e263SMartin Matuska 	return (aesni_gcm_encrypt(in, out, len, key, iv, Xip));
1190*53a2e263SMartin Matuska }
1191*53a2e263SMartin Matuska 
1192*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1193*53a2e263SMartin Matuska // kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
1194*53a2e263SMartin Matuska // bits of a |size_t|.
1195*53a2e263SMartin Matuska // This is from boringssl/crypto/fipsmodule/aes/gcm.cc.inc
1196*53a2e263SMartin Matuska static const size_t kSizeTWithoutLower4Bits = (size_t)-16;
1197*53a2e263SMartin Matuska 
1198*53a2e263SMartin Matuska /* The following CRYPTO methods are from boringssl/crypto/internal.h */
CRYPTO_bswap4(uint32_t x)1199*53a2e263SMartin Matuska static inline uint32_t CRYPTO_bswap4(uint32_t x) {
1200*53a2e263SMartin Matuska 	return (__builtin_bswap32(x));
1201*53a2e263SMartin Matuska }
1202*53a2e263SMartin Matuska 
CRYPTO_load_u32_be(const void * in)1203*53a2e263SMartin Matuska static inline uint32_t CRYPTO_load_u32_be(const void *in) {
1204*53a2e263SMartin Matuska 	uint32_t v;
1205*53a2e263SMartin Matuska 	memcpy(&v, in, sizeof (v));
1206*53a2e263SMartin Matuska 	return (CRYPTO_bswap4(v));
1207*53a2e263SMartin Matuska }
1208*53a2e263SMartin Matuska 
CRYPTO_store_u32_be(void * out,uint32_t v)1209*53a2e263SMartin Matuska static inline void CRYPTO_store_u32_be(void *out, uint32_t v) {
1210*53a2e263SMartin Matuska 	v = CRYPTO_bswap4(v);
1211*53a2e263SMartin Matuska 	memcpy(out, &v, sizeof (v));
1212*53a2e263SMartin Matuska }
1213*53a2e263SMartin Matuska 
aesni_gcm_encrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1214*53a2e263SMartin Matuska static size_t aesni_gcm_encrypt_avx2(const uint8_t *in, uint8_t *out,
1215*53a2e263SMartin Matuska     size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1216*53a2e263SMartin Matuska     uint64_t *Xip)
1217*53a2e263SMartin Matuska {
1218*53a2e263SMartin Matuska 	uint8_t *ivec = (uint8_t *)iv;
1219*53a2e263SMartin Matuska 	len &= kSizeTWithoutLower4Bits;
1220*53a2e263SMartin Matuska 	aes_gcm_enc_update_vaes_avx2(in, out, len, key, ivec,
1221*53a2e263SMartin Matuska 	    (const uint128_t *)Htable, (uint8_t *)Xip);
1222*53a2e263SMartin Matuska 	CRYPTO_store_u32_be(&ivec[12],
1223*53a2e263SMartin Matuska 	    CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1224*53a2e263SMartin Matuska 	return (len);
1225*53a2e263SMartin Matuska }
1226*53a2e263SMartin Matuska #endif /* if CAN_USE_GCM_ASM >= 2 */
1227*53a2e263SMartin Matuska 
1228eda14cbcSMatt Macy /*
1229eda14cbcSMatt Macy  * Encrypt multiple blocks of data in GCM mode.
1230eda14cbcSMatt Macy  * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
1231eda14cbcSMatt Macy  * if possible. While processing a chunk the FPU is "locked".
1232eda14cbcSMatt Macy  */
1233eda14cbcSMatt Macy static int
gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t * ctx,char * data,size_t length,crypto_data_t * out,size_t block_size)1234eda14cbcSMatt Macy gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
1235eda14cbcSMatt Macy     size_t length, crypto_data_t *out, size_t block_size)
1236eda14cbcSMatt Macy {
1237eda14cbcSMatt Macy 	size_t bleft = length;
1238eda14cbcSMatt Macy 	size_t need = 0;
1239eda14cbcSMatt Macy 	size_t done = 0;
1240eda14cbcSMatt Macy 	uint8_t *datap = (uint8_t *)data;
1241eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1242*53a2e263SMartin Matuska 	aesni_gcm_encrypt_impl *encrypt_blocks =
1243*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1244*53a2e263SMartin Matuska 	    ctx->impl == GCM_IMPL_AVX2 ?
1245*53a2e263SMartin Matuska 	    aesni_gcm_encrypt_avx2 :
1246*53a2e263SMartin Matuska #endif
1247*53a2e263SMartin Matuska 	    aesni_gcm_encrypt_avx;
1248eda14cbcSMatt Macy 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1249eda14cbcSMatt Macy 	uint64_t *ghash = ctx->gcm_ghash;
1250*53a2e263SMartin Matuska 	uint64_t *htable = ctx->gcm_Htable;
1251eda14cbcSMatt Macy 	uint64_t *cb = ctx->gcm_cb;
1252eda14cbcSMatt Macy 	uint8_t *ct_buf = NULL;
1253eda14cbcSMatt Macy 	uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1254eda14cbcSMatt Macy 	int rv = CRYPTO_SUCCESS;
1255eda14cbcSMatt Macy 
1256eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
12572a58b312SMartin Matuska 	ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
12582a58b312SMartin Matuska 	    B_FALSE);
1259eda14cbcSMatt Macy 	/*
1260eda14cbcSMatt Macy 	 * If the last call left an incomplete block, try to fill
1261eda14cbcSMatt Macy 	 * it first.
1262eda14cbcSMatt Macy 	 */
1263eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
1264eda14cbcSMatt Macy 		need = block_size - ctx->gcm_remainder_len;
1265eda14cbcSMatt Macy 		if (length < need) {
1266eda14cbcSMatt Macy 			/* Accumulate bytes here and return. */
1267da5137abSMartin Matuska 			memcpy((uint8_t *)ctx->gcm_remainder +
1268da5137abSMartin Matuska 			    ctx->gcm_remainder_len, datap, length);
1269eda14cbcSMatt Macy 
1270eda14cbcSMatt Macy 			ctx->gcm_remainder_len += length;
1271eda14cbcSMatt Macy 			if (ctx->gcm_copy_to == NULL) {
1272eda14cbcSMatt Macy 				ctx->gcm_copy_to = datap;
1273eda14cbcSMatt Macy 			}
1274eda14cbcSMatt Macy 			return (CRYPTO_SUCCESS);
1275eda14cbcSMatt Macy 		} else {
1276eda14cbcSMatt Macy 			/* Complete incomplete block. */
1277da5137abSMartin Matuska 			memcpy((uint8_t *)ctx->gcm_remainder +
1278da5137abSMartin Matuska 			    ctx->gcm_remainder_len, datap, need);
1279eda14cbcSMatt Macy 
1280eda14cbcSMatt Macy 			ctx->gcm_copy_to = NULL;
1281eda14cbcSMatt Macy 		}
1282eda14cbcSMatt Macy 	}
1283eda14cbcSMatt Macy 
1284eda14cbcSMatt Macy 	/* Allocate a buffer to encrypt to if there is enough input. */
1285eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1286c03c5b1cSMartin Matuska 		ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
1287eda14cbcSMatt Macy 		if (ct_buf == NULL) {
1288eda14cbcSMatt Macy 			return (CRYPTO_HOST_MEMORY);
1289eda14cbcSMatt Macy 		}
1290eda14cbcSMatt Macy 	}
1291eda14cbcSMatt Macy 
1292eda14cbcSMatt Macy 	/* If we completed an incomplete block, encrypt and write it out. */
1293eda14cbcSMatt Macy 	if (ctx->gcm_remainder_len > 0) {
1294eda14cbcSMatt Macy 		kfpu_begin();
1295eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1296eda14cbcSMatt Macy 		    (const uint32_t *)cb, (uint32_t *)tmp);
1297eda14cbcSMatt Macy 
1298eda14cbcSMatt Macy 		gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
1299eda14cbcSMatt Macy 		GHASH_AVX(ctx, tmp, block_size);
1300eda14cbcSMatt Macy 		clear_fpu_regs();
1301eda14cbcSMatt Macy 		kfpu_end();
1302eda14cbcSMatt Macy 		rv = crypto_put_output_data(tmp, out, block_size);
1303eda14cbcSMatt Macy 		out->cd_offset += block_size;
1304eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1305eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
1306eda14cbcSMatt Macy 		bleft -= need;
1307eda14cbcSMatt Macy 		datap += need;
1308eda14cbcSMatt Macy 		ctx->gcm_remainder_len = 0;
1309eda14cbcSMatt Macy 	}
1310eda14cbcSMatt Macy 
1311eda14cbcSMatt Macy 	/* Do the bulk encryption in chunk_size blocks. */
1312eda14cbcSMatt Macy 	for (; bleft >= chunk_size; bleft -= chunk_size) {
1313eda14cbcSMatt Macy 		kfpu_begin();
1314*53a2e263SMartin Matuska 		done = encrypt_blocks(
1315*53a2e263SMartin Matuska 		    datap, ct_buf, chunk_size, key, cb, htable, ghash);
1316eda14cbcSMatt Macy 
1317eda14cbcSMatt Macy 		clear_fpu_regs();
1318eda14cbcSMatt Macy 		kfpu_end();
1319eda14cbcSMatt Macy 		if (done != chunk_size) {
1320eda14cbcSMatt Macy 			rv = CRYPTO_FAILED;
1321eda14cbcSMatt Macy 			goto out_nofpu;
1322eda14cbcSMatt Macy 		}
1323eda14cbcSMatt Macy 		rv = crypto_put_output_data(ct_buf, out, chunk_size);
1324eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1325eda14cbcSMatt Macy 			goto out_nofpu;
1326eda14cbcSMatt Macy 		}
1327eda14cbcSMatt Macy 		out->cd_offset += chunk_size;
1328eda14cbcSMatt Macy 		datap += chunk_size;
1329eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += chunk_size;
1330eda14cbcSMatt Macy 	}
1331eda14cbcSMatt Macy 	/* Check if we are already done. */
1332eda14cbcSMatt Macy 	if (bleft == 0) {
1333eda14cbcSMatt Macy 		goto out_nofpu;
1334eda14cbcSMatt Macy 	}
1335eda14cbcSMatt Macy 	/* Bulk encrypt the remaining data. */
1336eda14cbcSMatt Macy 	kfpu_begin();
1337eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
1338*53a2e263SMartin Matuska 		done = encrypt_blocks(datap, ct_buf, bleft, key, cb, htable,
1339*53a2e263SMartin Matuska 		    ghash);
1340eda14cbcSMatt Macy 		if (done == 0) {
1341eda14cbcSMatt Macy 			rv = CRYPTO_FAILED;
1342eda14cbcSMatt Macy 			goto out;
1343eda14cbcSMatt Macy 		}
1344eda14cbcSMatt Macy 		rv = crypto_put_output_data(ct_buf, out, done);
1345eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1346eda14cbcSMatt Macy 			goto out;
1347eda14cbcSMatt Macy 		}
1348eda14cbcSMatt Macy 		out->cd_offset += done;
1349eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += done;
1350eda14cbcSMatt Macy 		datap += done;
1351eda14cbcSMatt Macy 		bleft -= done;
1352eda14cbcSMatt Macy 
1353eda14cbcSMatt Macy 	}
1354eda14cbcSMatt Macy 	/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
1355eda14cbcSMatt Macy 	while (bleft > 0) {
1356eda14cbcSMatt Macy 		if (bleft < block_size) {
1357da5137abSMartin Matuska 			memcpy(ctx->gcm_remainder, datap, bleft);
1358eda14cbcSMatt Macy 			ctx->gcm_remainder_len = bleft;
1359eda14cbcSMatt Macy 			ctx->gcm_copy_to = datap;
1360eda14cbcSMatt Macy 			goto out;
1361eda14cbcSMatt Macy 		}
1362eda14cbcSMatt Macy 		/* Encrypt, hash and write out. */
1363eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
1364eda14cbcSMatt Macy 		    (const uint32_t *)cb, (uint32_t *)tmp);
1365eda14cbcSMatt Macy 
1366eda14cbcSMatt Macy 		gcm_xor_avx(datap, tmp);
1367eda14cbcSMatt Macy 		GHASH_AVX(ctx, tmp, block_size);
1368eda14cbcSMatt Macy 		rv = crypto_put_output_data(tmp, out, block_size);
1369eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS) {
1370eda14cbcSMatt Macy 			goto out;
1371eda14cbcSMatt Macy 		}
1372eda14cbcSMatt Macy 		out->cd_offset += block_size;
1373eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1374eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += block_size;
1375eda14cbcSMatt Macy 		datap += block_size;
1376eda14cbcSMatt Macy 		bleft -= block_size;
1377eda14cbcSMatt Macy 	}
1378eda14cbcSMatt Macy out:
1379eda14cbcSMatt Macy 	clear_fpu_regs();
1380eda14cbcSMatt Macy 	kfpu_end();
1381eda14cbcSMatt Macy out_nofpu:
1382eda14cbcSMatt Macy 	if (ct_buf != NULL) {
1383eda14cbcSMatt Macy 		vmem_free(ct_buf, chunk_size);
1384eda14cbcSMatt Macy 	}
1385eda14cbcSMatt Macy 	return (rv);
1386eda14cbcSMatt Macy }
1387eda14cbcSMatt Macy 
1388eda14cbcSMatt Macy /*
1389eda14cbcSMatt Macy  * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
1390eda14cbcSMatt Macy  * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
1391eda14cbcSMatt Macy  */
1392eda14cbcSMatt Macy static int
gcm_encrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1393eda14cbcSMatt Macy gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1394eda14cbcSMatt Macy {
1395eda14cbcSMatt Macy 	uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
1396eda14cbcSMatt Macy 	uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
1397eda14cbcSMatt Macy 	uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
1398eda14cbcSMatt Macy 	size_t rem_len = ctx->gcm_remainder_len;
1399eda14cbcSMatt Macy 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1400eda14cbcSMatt Macy 	int aes_rounds = ((aes_key_t *)keysched)->nr;
1401eda14cbcSMatt Macy 	int rv;
1402eda14cbcSMatt Macy 
1403eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
14042a58b312SMartin Matuska 	ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
14052a58b312SMartin Matuska 	    B_FALSE);
1406eda14cbcSMatt Macy 
1407eda14cbcSMatt Macy 	if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
1408eda14cbcSMatt Macy 		return (CRYPTO_DATA_LEN_RANGE);
1409eda14cbcSMatt Macy 	}
1410eda14cbcSMatt Macy 
1411eda14cbcSMatt Macy 	kfpu_begin();
1412eda14cbcSMatt Macy 	/* Pad last incomplete block with zeros, encrypt and hash. */
1413eda14cbcSMatt Macy 	if (rem_len > 0) {
1414eda14cbcSMatt Macy 		uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
1415eda14cbcSMatt Macy 		const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1416eda14cbcSMatt Macy 
1417eda14cbcSMatt Macy 		aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
1418da5137abSMartin Matuska 		memset(remainder + rem_len, 0, block_size - rem_len);
1419eda14cbcSMatt Macy 		for (int i = 0; i < rem_len; i++) {
1420eda14cbcSMatt Macy 			remainder[i] ^= tmp[i];
1421eda14cbcSMatt Macy 		}
1422eda14cbcSMatt Macy 		GHASH_AVX(ctx, remainder, block_size);
1423eda14cbcSMatt Macy 		ctx->gcm_processed_data_len += rem_len;
1424eda14cbcSMatt Macy 		/* No need to increment counter_block, it's the last block. */
1425eda14cbcSMatt Macy 	}
1426eda14cbcSMatt Macy 	/* Finish tag. */
1427eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] =
1428eda14cbcSMatt Macy 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
1429eda14cbcSMatt Macy 	GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
1430eda14cbcSMatt Macy 	aes_encrypt_intel(keysched, aes_rounds, J0, J0);
1431eda14cbcSMatt Macy 
1432eda14cbcSMatt Macy 	gcm_xor_avx((uint8_t *)J0, ghash);
1433eda14cbcSMatt Macy 	clear_fpu_regs();
1434eda14cbcSMatt Macy 	kfpu_end();
1435eda14cbcSMatt Macy 
1436eda14cbcSMatt Macy 	/* Output remainder. */
1437eda14cbcSMatt Macy 	if (rem_len > 0) {
1438eda14cbcSMatt Macy 		rv = crypto_put_output_data(remainder, out, rem_len);
1439eda14cbcSMatt Macy 		if (rv != CRYPTO_SUCCESS)
1440eda14cbcSMatt Macy 			return (rv);
1441eda14cbcSMatt Macy 	}
1442eda14cbcSMatt Macy 	out->cd_offset += rem_len;
1443eda14cbcSMatt Macy 	ctx->gcm_remainder_len = 0;
1444eda14cbcSMatt Macy 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
1445eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS)
1446eda14cbcSMatt Macy 		return (rv);
1447eda14cbcSMatt Macy 
1448eda14cbcSMatt Macy 	out->cd_offset += ctx->gcm_tag_len;
1449eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1450eda14cbcSMatt Macy }
1451eda14cbcSMatt Macy 
aesni_gcm_decrypt_avx(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1452*53a2e263SMartin Matuska static size_t aesni_gcm_decrypt_avx(const uint8_t *in, uint8_t *out,
1453*53a2e263SMartin Matuska     size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1454*53a2e263SMartin Matuska     uint64_t *Xip)
1455*53a2e263SMartin Matuska {
1456*53a2e263SMartin Matuska 	(void) Htable;
1457*53a2e263SMartin Matuska 	return (aesni_gcm_decrypt(in, out, len, key, iv, Xip));
1458*53a2e263SMartin Matuska }
1459*53a2e263SMartin Matuska 
1460*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
aesni_gcm_decrypt_avx2(const uint8_t * in,uint8_t * out,size_t len,const void * key,uint64_t * iv,const uint64_t * Htable,uint64_t * Xip)1461*53a2e263SMartin Matuska static size_t aesni_gcm_decrypt_avx2(const uint8_t *in, uint8_t *out,
1462*53a2e263SMartin Matuska     size_t len, const void *key, uint64_t *iv, const uint64_t *Htable,
1463*53a2e263SMartin Matuska     uint64_t *Xip)
1464*53a2e263SMartin Matuska {
1465*53a2e263SMartin Matuska 	uint8_t *ivec = (uint8_t *)iv;
1466*53a2e263SMartin Matuska 	len &= kSizeTWithoutLower4Bits;
1467*53a2e263SMartin Matuska 	aes_gcm_dec_update_vaes_avx2(in, out, len, key, ivec,
1468*53a2e263SMartin Matuska 	    (const uint128_t *)Htable, (uint8_t *)Xip);
1469*53a2e263SMartin Matuska 	CRYPTO_store_u32_be(&ivec[12],
1470*53a2e263SMartin Matuska 	    CRYPTO_load_u32_be(&ivec[12]) + len / 16);
1471*53a2e263SMartin Matuska 	return (len);
1472*53a2e263SMartin Matuska }
1473*53a2e263SMartin Matuska #endif /* if CAN_USE_GCM_ASM >= 2 */
1474*53a2e263SMartin Matuska 
1475eda14cbcSMatt Macy /*
1476eda14cbcSMatt Macy  * Finalize decryption: We just have accumulated crypto text, so now we
1477eda14cbcSMatt Macy  * decrypt it here inplace.
1478eda14cbcSMatt Macy  */
1479eda14cbcSMatt Macy static int
gcm_decrypt_final_avx(gcm_ctx_t * ctx,crypto_data_t * out,size_t block_size)1480eda14cbcSMatt Macy gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
1481eda14cbcSMatt Macy {
1482eda14cbcSMatt Macy 	ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
1483eda14cbcSMatt Macy 	ASSERT3U(block_size, ==, 16);
14842a58b312SMartin Matuska 	ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
14852a58b312SMartin Matuska 	    B_FALSE);
1486eda14cbcSMatt Macy 
1487eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1488*53a2e263SMartin Matuska 	aesni_gcm_decrypt_impl *decrypt_blocks =
1489*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1490*53a2e263SMartin Matuska 	    ctx->impl == GCM_IMPL_AVX2 ?
1491*53a2e263SMartin Matuska 	    aesni_gcm_decrypt_avx2 :
1492*53a2e263SMartin Matuska #endif
1493*53a2e263SMartin Matuska 	    aesni_gcm_decrypt_avx;
1494eda14cbcSMatt Macy 	size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
1495eda14cbcSMatt Macy 	uint8_t *datap = ctx->gcm_pt_buf;
1496eda14cbcSMatt Macy 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
1497eda14cbcSMatt Macy 	uint32_t *cb = (uint32_t *)ctx->gcm_cb;
1498*53a2e263SMartin Matuska 	uint64_t *htable = ctx->gcm_Htable;
1499eda14cbcSMatt Macy 	uint64_t *ghash = ctx->gcm_ghash;
1500eda14cbcSMatt Macy 	uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
1501eda14cbcSMatt Macy 	int rv = CRYPTO_SUCCESS;
1502eda14cbcSMatt Macy 	size_t bleft, done;
1503eda14cbcSMatt Macy 
1504eda14cbcSMatt Macy 	/*
1505eda14cbcSMatt Macy 	 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
1506eda14cbcSMatt Macy 	 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
1507eda14cbcSMatt Macy 	 * GCM_AVX_MIN_DECRYPT_BYTES.
1508eda14cbcSMatt Macy 	 */
1509eda14cbcSMatt Macy 	for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
1510eda14cbcSMatt Macy 		kfpu_begin();
1511*53a2e263SMartin Matuska 		done = decrypt_blocks(datap, datap, chunk_size,
1512*53a2e263SMartin Matuska 		    (const void *)key, ctx->gcm_cb, htable, ghash);
1513eda14cbcSMatt Macy 		clear_fpu_regs();
1514eda14cbcSMatt Macy 		kfpu_end();
1515eda14cbcSMatt Macy 		if (done != chunk_size) {
1516eda14cbcSMatt Macy 			return (CRYPTO_FAILED);
1517eda14cbcSMatt Macy 		}
1518eda14cbcSMatt Macy 		datap += done;
1519eda14cbcSMatt Macy 	}
152016038816SMartin Matuska 	/* Decrypt remainder, which is less than chunk size, in one go. */
1521eda14cbcSMatt Macy 	kfpu_begin();
1522eda14cbcSMatt Macy 	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
1523*53a2e263SMartin Matuska 		done = decrypt_blocks(datap, datap, bleft,
1524*53a2e263SMartin Matuska 		    (const void *)key, ctx->gcm_cb, htable, ghash);
1525eda14cbcSMatt Macy 		if (done == 0) {
1526eda14cbcSMatt Macy 			clear_fpu_regs();
1527eda14cbcSMatt Macy 			kfpu_end();
1528eda14cbcSMatt Macy 			return (CRYPTO_FAILED);
1529eda14cbcSMatt Macy 		}
1530eda14cbcSMatt Macy 		datap += done;
1531eda14cbcSMatt Macy 		bleft -= done;
1532eda14cbcSMatt Macy 	}
1533eda14cbcSMatt Macy 	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
1534eda14cbcSMatt Macy 
1535eda14cbcSMatt Macy 	/*
153616038816SMartin Matuska 	 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
1537eda14cbcSMatt Macy 	 * decrypt them block by block.
1538eda14cbcSMatt Macy 	 */
1539eda14cbcSMatt Macy 	while (bleft > 0) {
1540eda14cbcSMatt Macy 		/* Incomplete last block. */
1541eda14cbcSMatt Macy 		if (bleft < block_size) {
1542eda14cbcSMatt Macy 			uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
1543eda14cbcSMatt Macy 
1544da5137abSMartin Matuska 			memset(lastb, 0, block_size);
1545da5137abSMartin Matuska 			memcpy(lastb, datap, bleft);
1546eda14cbcSMatt Macy 			/* The GCM processing. */
1547eda14cbcSMatt Macy 			GHASH_AVX(ctx, lastb, block_size);
1548eda14cbcSMatt Macy 			aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1549eda14cbcSMatt Macy 			for (size_t i = 0; i < bleft; i++) {
1550eda14cbcSMatt Macy 				datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
1551eda14cbcSMatt Macy 			}
1552eda14cbcSMatt Macy 			break;
1553eda14cbcSMatt Macy 		}
1554eda14cbcSMatt Macy 		/* The GCM processing. */
1555eda14cbcSMatt Macy 		GHASH_AVX(ctx, datap, block_size);
1556eda14cbcSMatt Macy 		aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
1557eda14cbcSMatt Macy 		gcm_xor_avx((uint8_t *)tmp, datap);
1558eda14cbcSMatt Macy 		gcm_incr_counter_block(ctx);
1559eda14cbcSMatt Macy 
1560eda14cbcSMatt Macy 		datap += block_size;
1561eda14cbcSMatt Macy 		bleft -= block_size;
1562eda14cbcSMatt Macy 	}
1563eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS) {
1564eda14cbcSMatt Macy 		clear_fpu_regs();
1565eda14cbcSMatt Macy 		kfpu_end();
1566eda14cbcSMatt Macy 		return (rv);
1567eda14cbcSMatt Macy 	}
1568eda14cbcSMatt Macy 	/* Decryption done, finish the tag. */
1569eda14cbcSMatt Macy 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
1570eda14cbcSMatt Macy 	GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
1571eda14cbcSMatt Macy 	aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
1572eda14cbcSMatt Macy 	    (uint32_t *)ctx->gcm_J0);
1573eda14cbcSMatt Macy 
1574eda14cbcSMatt Macy 	gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
1575eda14cbcSMatt Macy 
1576eda14cbcSMatt Macy 	/* We are done with the FPU, restore its state. */
1577eda14cbcSMatt Macy 	clear_fpu_regs();
1578eda14cbcSMatt Macy 	kfpu_end();
1579eda14cbcSMatt Macy 
1580eda14cbcSMatt Macy 	/* Compare the input authentication tag with what we calculated. */
1581da5137abSMartin Matuska 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
1582eda14cbcSMatt Macy 		/* They don't match. */
1583eda14cbcSMatt Macy 		return (CRYPTO_INVALID_MAC);
1584eda14cbcSMatt Macy 	}
1585eda14cbcSMatt Macy 	rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
1586eda14cbcSMatt Macy 	if (rv != CRYPTO_SUCCESS) {
1587eda14cbcSMatt Macy 		return (rv);
1588eda14cbcSMatt Macy 	}
1589eda14cbcSMatt Macy 	out->cd_offset += pt_len;
1590eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1591eda14cbcSMatt Macy }
1592eda14cbcSMatt Macy 
1593eda14cbcSMatt Macy /*
1594eda14cbcSMatt Macy  * Initialize the GCM params H, Htabtle and the counter block. Save the
1595eda14cbcSMatt Macy  * initial counter block.
1596eda14cbcSMatt Macy  */
1597eda14cbcSMatt Macy static int
gcm_init_avx(gcm_ctx_t * ctx,const uint8_t * iv,size_t iv_len,const uint8_t * auth_data,size_t auth_data_len,size_t block_size)15982a58b312SMartin Matuska gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len,
15992a58b312SMartin Matuska     const uint8_t *auth_data, size_t auth_data_len, size_t block_size)
1600eda14cbcSMatt Macy {
1601eda14cbcSMatt Macy 	uint8_t *cb = (uint8_t *)ctx->gcm_cb;
1602eda14cbcSMatt Macy 	uint64_t *H = ctx->gcm_H;
1603eda14cbcSMatt Macy 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
1604eda14cbcSMatt Macy 	int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
16052a58b312SMartin Matuska 	const uint8_t *datap = auth_data;
1606eda14cbcSMatt Macy 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
1607eda14cbcSMatt Macy 	size_t bleft;
1608eda14cbcSMatt Macy 
1609eda14cbcSMatt Macy 	ASSERT(block_size == GCM_BLOCK_LEN);
16102a58b312SMartin Matuska 	ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==,
16112a58b312SMartin Matuska 	    B_FALSE);
1612eda14cbcSMatt Macy 
1613*53a2e263SMartin Matuska 	size_t htab_len = 0;
1614*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1615*53a2e263SMartin Matuska 	if (ctx->impl == GCM_IMPL_AVX2) {
1616*53a2e263SMartin Matuska 		/*
1617*53a2e263SMartin Matuska 		 * BoringSSL's API specifies uint128_t[16] for htab; but only
1618*53a2e263SMartin Matuska 		 * uint128_t[12] are used.
1619*53a2e263SMartin Matuska 		 * See https://github.com/google/boringssl/blob/
1620*53a2e263SMartin Matuska 		 * 813840dd094f9e9c1b00a7368aa25e656554221f1/crypto/fipsmodule/
1621*53a2e263SMartin Matuska 		 * modes/asm/aes-gcm-avx2-x86_64.pl#L198-L200
1622*53a2e263SMartin Matuska 		 */
1623*53a2e263SMartin Matuska 		htab_len = (2 * 8 * sizeof (uint128_t));
1624*53a2e263SMartin Matuska 	} else
1625*53a2e263SMartin Matuska #endif /* CAN_USE_GCM_ASM >= 2 */
1626*53a2e263SMartin Matuska 	{
1627*53a2e263SMartin Matuska 		htab_len = (2 * 6 * sizeof (uint128_t));
1628*53a2e263SMartin Matuska 	}
1629*53a2e263SMartin Matuska 
1630*53a2e263SMartin Matuska 	ctx->gcm_Htable = kmem_alloc(htab_len, KM_SLEEP);
1631*53a2e263SMartin Matuska 	if (ctx->gcm_Htable == NULL) {
1632*53a2e263SMartin Matuska 		return (CRYPTO_HOST_MEMORY);
1633*53a2e263SMartin Matuska 	}
1634*53a2e263SMartin Matuska 
1635eda14cbcSMatt Macy 	/* Init H (encrypt zero block) and create the initial counter block. */
1636da5137abSMartin Matuska 	memset(H, 0, sizeof (ctx->gcm_H));
1637eda14cbcSMatt Macy 	kfpu_begin();
1638eda14cbcSMatt Macy 	aes_encrypt_intel(keysched, aes_rounds,
1639eda14cbcSMatt Macy 	    (const uint32_t *)H, (uint32_t *)H);
1640eda14cbcSMatt Macy 
1641*53a2e263SMartin Matuska #if CAN_USE_GCM_ASM >= 2
1642*53a2e263SMartin Matuska 	if (ctx->impl == GCM_IMPL_AVX2) {
1643*53a2e263SMartin Matuska 		gcm_init_vpclmulqdq_avx2((uint128_t *)ctx->gcm_Htable, H);
1644*53a2e263SMartin Matuska 	} else
1645*53a2e263SMartin Matuska #endif /* if CAN_USE_GCM_ASM >= 2 */
1646*53a2e263SMartin Matuska 	{
1647eda14cbcSMatt Macy 		gcm_init_htab_avx(ctx->gcm_Htable, H);
1648*53a2e263SMartin Matuska 	}
1649eda14cbcSMatt Macy 
1650eda14cbcSMatt Macy 	if (iv_len == 12) {
1651da5137abSMartin Matuska 		memcpy(cb, iv, 12);
1652eda14cbcSMatt Macy 		cb[12] = 0;
1653eda14cbcSMatt Macy 		cb[13] = 0;
1654eda14cbcSMatt Macy 		cb[14] = 0;
1655eda14cbcSMatt Macy 		cb[15] = 1;
1656eda14cbcSMatt Macy 		/* We need the ICB later. */
1657da5137abSMartin Matuska 		memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
1658eda14cbcSMatt Macy 	} else {
1659eda14cbcSMatt Macy 		/*
1660eda14cbcSMatt Macy 		 * Most consumers use 12 byte IVs, so it's OK to use the
1661eda14cbcSMatt Macy 		 * original routines for other IV sizes, just avoid nesting
1662eda14cbcSMatt Macy 		 * kfpu_begin calls.
1663eda14cbcSMatt Macy 		 */
1664eda14cbcSMatt Macy 		clear_fpu_regs();
1665eda14cbcSMatt Macy 		kfpu_end();
1666eda14cbcSMatt Macy 		gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
1667eda14cbcSMatt Macy 		    aes_copy_block, aes_xor_block);
1668eda14cbcSMatt Macy 		kfpu_begin();
1669eda14cbcSMatt Macy 	}
1670eda14cbcSMatt Macy 
1671d2a8fad3SMartin Matuska 	memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
1672d2a8fad3SMartin Matuska 
1673eda14cbcSMatt Macy 	/* Openssl post increments the counter, adjust for that. */
1674eda14cbcSMatt Macy 	gcm_incr_counter_block(ctx);
1675eda14cbcSMatt Macy 
1676eda14cbcSMatt Macy 	/* Ghash AAD in chunk_size blocks. */
1677eda14cbcSMatt Macy 	for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
1678eda14cbcSMatt Macy 		GHASH_AVX(ctx, datap, chunk_size);
1679eda14cbcSMatt Macy 		datap += chunk_size;
1680eda14cbcSMatt Macy 		clear_fpu_regs();
1681eda14cbcSMatt Macy 		kfpu_end();
1682eda14cbcSMatt Macy 		kfpu_begin();
1683eda14cbcSMatt Macy 	}
1684eda14cbcSMatt Macy 	/* Ghash the remainder and handle possible incomplete GCM block. */
1685eda14cbcSMatt Macy 	if (bleft > 0) {
1686eda14cbcSMatt Macy 		size_t incomp = bleft % block_size;
1687eda14cbcSMatt Macy 
1688eda14cbcSMatt Macy 		bleft -= incomp;
1689eda14cbcSMatt Macy 		if (bleft > 0) {
1690eda14cbcSMatt Macy 			GHASH_AVX(ctx, datap, bleft);
1691eda14cbcSMatt Macy 			datap += bleft;
1692eda14cbcSMatt Macy 		}
1693eda14cbcSMatt Macy 		if (incomp > 0) {
1694eda14cbcSMatt Macy 			/* Zero pad and hash incomplete last block. */
1695eda14cbcSMatt Macy 			uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
1696eda14cbcSMatt Macy 
1697da5137abSMartin Matuska 			memset(authp, 0, block_size);
1698da5137abSMartin Matuska 			memcpy(authp, datap, incomp);
1699eda14cbcSMatt Macy 			GHASH_AVX(ctx, authp, block_size);
1700eda14cbcSMatt Macy 		}
1701eda14cbcSMatt Macy 	}
1702eda14cbcSMatt Macy 	clear_fpu_regs();
1703eda14cbcSMatt Macy 	kfpu_end();
1704eda14cbcSMatt Macy 	return (CRYPTO_SUCCESS);
1705eda14cbcSMatt Macy }
1706eda14cbcSMatt Macy 
1707eda14cbcSMatt Macy #if defined(_KERNEL)
1708eda14cbcSMatt Macy static int
icp_gcm_avx_set_chunk_size(const char * buf,zfs_kernel_param_t * kp)1709eda14cbcSMatt Macy icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
1710eda14cbcSMatt Macy {
1711eda14cbcSMatt Macy 	unsigned long val;
1712eda14cbcSMatt Macy 	char val_rounded[16];
1713eda14cbcSMatt Macy 	int error = 0;
1714eda14cbcSMatt Macy 
1715eda14cbcSMatt Macy 	error = kstrtoul(buf, 0, &val);
1716eda14cbcSMatt Macy 	if (error)
1717eda14cbcSMatt Macy 		return (error);
1718eda14cbcSMatt Macy 
1719eda14cbcSMatt Macy 	val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
1720eda14cbcSMatt Macy 
1721eda14cbcSMatt Macy 	if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
1722eda14cbcSMatt Macy 		return (-EINVAL);
1723eda14cbcSMatt Macy 
1724eda14cbcSMatt Macy 	snprintf(val_rounded, 16, "%u", (uint32_t)val);
1725eda14cbcSMatt Macy 	error = param_set_uint(val_rounded, kp);
1726eda14cbcSMatt Macy 	return (error);
1727eda14cbcSMatt Macy }
1728eda14cbcSMatt Macy 
1729eda14cbcSMatt Macy module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
1730eda14cbcSMatt Macy     param_get_uint, &gcm_avx_chunk_size, 0644);
1731eda14cbcSMatt Macy 
1732eda14cbcSMatt Macy MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
1733eda14cbcSMatt Macy 	"How many bytes to process while owning the FPU");
1734eda14cbcSMatt Macy 
1735eda14cbcSMatt Macy #endif /* defined(__KERNEL) */
1736eda14cbcSMatt Macy #endif /* ifdef CAN_USE_GCM_ASM */
1737