xref: /linux/arch/x86/crypto/aesni-intel_glue.c (revision ee8287e068a3995b0f8001dd6931e221dfb7c530)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Support for Intel AES-NI instructions. This file contains glue
4  * code, the real AES implementation is in intel-aes_asm.S.
5  *
6  * Copyright (C) 2008, Intel Corp.
7  *    Author: Huang Ying <ying.huang@intel.com>
8  *
9  * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
10  * interface for 64-bit kernels.
11  *    Authors: Adrian Hoban <adrian.hoban@intel.com>
12  *             Gabriele Paoloni <gabriele.paoloni@intel.com>
13  *             Tadeusz Struk (tadeusz.struk@intel.com)
14  *             Aidan O'Mahony (aidan.o.mahony@intel.com)
15  *    Copyright (c) 2010, Intel Corporation.
16  */
17 
18 #include <linux/hardirq.h>
19 #include <linux/types.h>
20 #include <linux/module.h>
21 #include <linux/err.h>
22 #include <crypto/algapi.h>
23 #include <crypto/aes.h>
24 #include <crypto/ctr.h>
25 #include <crypto/b128ops.h>
26 #include <crypto/gcm.h>
27 #include <crypto/xts.h>
28 #include <asm/cpu_device_id.h>
29 #include <asm/simd.h>
30 #include <crypto/scatterwalk.h>
31 #include <crypto/internal/aead.h>
32 #include <crypto/internal/simd.h>
33 #include <crypto/internal/skcipher.h>
34 #include <linux/jump_label.h>
35 #include <linux/workqueue.h>
36 #include <linux/spinlock.h>
37 #include <linux/static_call.h>
38 
39 
40 #define AESNI_ALIGN	16
41 #define AESNI_ALIGN_ATTR __attribute__ ((__aligned__(AESNI_ALIGN)))
42 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE - 1))
43 #define AESNI_ALIGN_EXTRA ((AESNI_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
44 #define CRYPTO_AES_CTX_SIZE (sizeof(struct crypto_aes_ctx) + AESNI_ALIGN_EXTRA)
45 #define XTS_AES_CTX_SIZE (sizeof(struct aesni_xts_ctx) + AESNI_ALIGN_EXTRA)
46 
47 /* This data is stored at the end of the crypto_tfm struct.
48  * It's a type of per "session" data storage location.
49  * This needs to be 16 byte aligned.
50  */
51 struct aesni_rfc4106_gcm_ctx {
52 	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
53 	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
54 	u8 nonce[4];
55 };
56 
57 struct generic_gcmaes_ctx {
58 	u8 hash_subkey[16] AESNI_ALIGN_ATTR;
59 	struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR;
60 };
61 
62 struct aesni_xts_ctx {
63 	struct crypto_aes_ctx tweak_ctx AESNI_ALIGN_ATTR;
64 	struct crypto_aes_ctx crypt_ctx AESNI_ALIGN_ATTR;
65 };
66 
67 #define GCM_BLOCK_LEN 16
68 
69 struct gcm_context_data {
70 	/* init, update and finalize context data */
71 	u8 aad_hash[GCM_BLOCK_LEN];
72 	u64 aad_length;
73 	u64 in_length;
74 	u8 partial_block_enc_key[GCM_BLOCK_LEN];
75 	u8 orig_IV[GCM_BLOCK_LEN];
76 	u8 current_counter[GCM_BLOCK_LEN];
77 	u64 partial_block_len;
78 	u64 unused;
79 	u8 hash_keys[GCM_BLOCK_LEN * 16];
80 };
81 
82 static inline void *aes_align_addr(void *addr)
83 {
84 	if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN)
85 		return addr;
86 	return PTR_ALIGN(addr, AESNI_ALIGN);
87 }
88 
89 asmlinkage void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
90 			      unsigned int key_len);
91 asmlinkage void aesni_enc(const void *ctx, u8 *out, const u8 *in);
92 asmlinkage void aesni_dec(const void *ctx, u8 *out, const u8 *in);
93 asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out,
94 			      const u8 *in, unsigned int len);
95 asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out,
96 			      const u8 *in, unsigned int len);
97 asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
98 			      const u8 *in, unsigned int len, u8 *iv);
99 asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
100 			      const u8 *in, unsigned int len, u8 *iv);
101 asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
102 				  const u8 *in, unsigned int len, u8 *iv);
103 asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
104 				  const u8 *in, unsigned int len, u8 *iv);
105 
106 #define AVX_GEN2_OPTSIZE 640
107 #define AVX_GEN4_OPTSIZE 4096
108 
109 asmlinkage void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *out,
110 			      const u8 *in, unsigned int len, u8 *iv);
111 
112 asmlinkage void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *out,
113 			      const u8 *in, unsigned int len, u8 *iv);
114 
115 #ifdef CONFIG_X86_64
116 
117 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
118 			      const u8 *in, unsigned int len, u8 *iv);
119 DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc);
120 
121 /* Scatter / Gather routines, with args similar to above */
122 asmlinkage void aesni_gcm_init(void *ctx,
123 			       struct gcm_context_data *gdata,
124 			       u8 *iv,
125 			       u8 *hash_subkey, const u8 *aad,
126 			       unsigned long aad_len);
127 asmlinkage void aesni_gcm_enc_update(void *ctx,
128 				     struct gcm_context_data *gdata, u8 *out,
129 				     const u8 *in, unsigned long plaintext_len);
130 asmlinkage void aesni_gcm_dec_update(void *ctx,
131 				     struct gcm_context_data *gdata, u8 *out,
132 				     const u8 *in,
133 				     unsigned long ciphertext_len);
134 asmlinkage void aesni_gcm_finalize(void *ctx,
135 				   struct gcm_context_data *gdata,
136 				   u8 *auth_tag, unsigned long auth_tag_len);
137 
138 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
139 		void *keys, u8 *out, unsigned int num_bytes);
140 asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
141 		void *keys, u8 *out, unsigned int num_bytes);
142 asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
143 		void *keys, u8 *out, unsigned int num_bytes);
144 
145 
146 asmlinkage void aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv,
147 	const void *keys, u8 *out, unsigned int num_bytes,
148 	unsigned int byte_ctr);
149 
150 asmlinkage void aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv,
151 	const void *keys, u8 *out, unsigned int num_bytes,
152 	unsigned int byte_ctr);
153 
154 asmlinkage void aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv,
155 	const void *keys, u8 *out, unsigned int num_bytes,
156 	unsigned int byte_ctr);
157 
158 /*
159  * asmlinkage void aesni_gcm_init_avx_gen2()
160  * gcm_data *my_ctx_data, context data
161  * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
162  */
163 asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
164 					struct gcm_context_data *gdata,
165 					u8 *iv,
166 					u8 *hash_subkey,
167 					const u8 *aad,
168 					unsigned long aad_len);
169 
170 asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
171 				     struct gcm_context_data *gdata, u8 *out,
172 				     const u8 *in, unsigned long plaintext_len);
173 asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
174 				     struct gcm_context_data *gdata, u8 *out,
175 				     const u8 *in,
176 				     unsigned long ciphertext_len);
177 asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
178 				   struct gcm_context_data *gdata,
179 				   u8 *auth_tag, unsigned long auth_tag_len);
180 
181 /*
182  * asmlinkage void aesni_gcm_init_avx_gen4()
183  * gcm_data *my_ctx_data, context data
184  * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
185  */
186 asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
187 					struct gcm_context_data *gdata,
188 					u8 *iv,
189 					u8 *hash_subkey,
190 					const u8 *aad,
191 					unsigned long aad_len);
192 
193 asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
194 				     struct gcm_context_data *gdata, u8 *out,
195 				     const u8 *in, unsigned long plaintext_len);
196 asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
197 				     struct gcm_context_data *gdata, u8 *out,
198 				     const u8 *in,
199 				     unsigned long ciphertext_len);
200 asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
201 				   struct gcm_context_data *gdata,
202 				   u8 *auth_tag, unsigned long auth_tag_len);
203 
204 static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx);
205 static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2);
206 
207 static inline struct
208 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
209 {
210 	return aes_align_addr(crypto_aead_ctx(tfm));
211 }
212 
213 static inline struct
214 generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm)
215 {
216 	return aes_align_addr(crypto_aead_ctx(tfm));
217 }
218 #endif
219 
220 static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx)
221 {
222 	return aes_align_addr(raw_ctx);
223 }
224 
225 static inline struct aesni_xts_ctx *aes_xts_ctx(struct crypto_skcipher *tfm)
226 {
227 	return aes_align_addr(crypto_skcipher_ctx(tfm));
228 }
229 
230 static int aes_set_key_common(struct crypto_aes_ctx *ctx,
231 			      const u8 *in_key, unsigned int key_len)
232 {
233 	int err;
234 
235 	if (!crypto_simd_usable())
236 		return aes_expandkey(ctx, in_key, key_len);
237 
238 	err = aes_check_keylen(key_len);
239 	if (err)
240 		return err;
241 
242 	kernel_fpu_begin();
243 	aesni_set_key(ctx, in_key, key_len);
244 	kernel_fpu_end();
245 	return 0;
246 }
247 
248 static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
249 		       unsigned int key_len)
250 {
251 	return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm)), in_key,
252 				  key_len);
253 }
254 
255 static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
256 {
257 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
258 
259 	if (!crypto_simd_usable()) {
260 		aes_encrypt(ctx, dst, src);
261 	} else {
262 		kernel_fpu_begin();
263 		aesni_enc(ctx, dst, src);
264 		kernel_fpu_end();
265 	}
266 }
267 
268 static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
269 {
270 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
271 
272 	if (!crypto_simd_usable()) {
273 		aes_decrypt(ctx, dst, src);
274 	} else {
275 		kernel_fpu_begin();
276 		aesni_dec(ctx, dst, src);
277 		kernel_fpu_end();
278 	}
279 }
280 
281 static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
282 			         unsigned int len)
283 {
284 	return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm)), key, len);
285 }
286 
287 static int ecb_encrypt(struct skcipher_request *req)
288 {
289 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
290 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
291 	struct skcipher_walk walk;
292 	unsigned int nbytes;
293 	int err;
294 
295 	err = skcipher_walk_virt(&walk, req, false);
296 
297 	while ((nbytes = walk.nbytes)) {
298 		kernel_fpu_begin();
299 		aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
300 			      nbytes & AES_BLOCK_MASK);
301 		kernel_fpu_end();
302 		nbytes &= AES_BLOCK_SIZE - 1;
303 		err = skcipher_walk_done(&walk, nbytes);
304 	}
305 
306 	return err;
307 }
308 
309 static int ecb_decrypt(struct skcipher_request *req)
310 {
311 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
312 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
313 	struct skcipher_walk walk;
314 	unsigned int nbytes;
315 	int err;
316 
317 	err = skcipher_walk_virt(&walk, req, false);
318 
319 	while ((nbytes = walk.nbytes)) {
320 		kernel_fpu_begin();
321 		aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
322 			      nbytes & AES_BLOCK_MASK);
323 		kernel_fpu_end();
324 		nbytes &= AES_BLOCK_SIZE - 1;
325 		err = skcipher_walk_done(&walk, nbytes);
326 	}
327 
328 	return err;
329 }
330 
331 static int cbc_encrypt(struct skcipher_request *req)
332 {
333 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
334 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
335 	struct skcipher_walk walk;
336 	unsigned int nbytes;
337 	int err;
338 
339 	err = skcipher_walk_virt(&walk, req, false);
340 
341 	while ((nbytes = walk.nbytes)) {
342 		kernel_fpu_begin();
343 		aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
344 			      nbytes & AES_BLOCK_MASK, walk.iv);
345 		kernel_fpu_end();
346 		nbytes &= AES_BLOCK_SIZE - 1;
347 		err = skcipher_walk_done(&walk, nbytes);
348 	}
349 
350 	return err;
351 }
352 
353 static int cbc_decrypt(struct skcipher_request *req)
354 {
355 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
356 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
357 	struct skcipher_walk walk;
358 	unsigned int nbytes;
359 	int err;
360 
361 	err = skcipher_walk_virt(&walk, req, false);
362 
363 	while ((nbytes = walk.nbytes)) {
364 		kernel_fpu_begin();
365 		aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
366 			      nbytes & AES_BLOCK_MASK, walk.iv);
367 		kernel_fpu_end();
368 		nbytes &= AES_BLOCK_SIZE - 1;
369 		err = skcipher_walk_done(&walk, nbytes);
370 	}
371 
372 	return err;
373 }
374 
375 static int cts_cbc_encrypt(struct skcipher_request *req)
376 {
377 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
378 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
379 	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
380 	struct scatterlist *src = req->src, *dst = req->dst;
381 	struct scatterlist sg_src[2], sg_dst[2];
382 	struct skcipher_request subreq;
383 	struct skcipher_walk walk;
384 	int err;
385 
386 	skcipher_request_set_tfm(&subreq, tfm);
387 	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
388 				      NULL, NULL);
389 
390 	if (req->cryptlen <= AES_BLOCK_SIZE) {
391 		if (req->cryptlen < AES_BLOCK_SIZE)
392 			return -EINVAL;
393 		cbc_blocks = 1;
394 	}
395 
396 	if (cbc_blocks > 0) {
397 		skcipher_request_set_crypt(&subreq, req->src, req->dst,
398 					   cbc_blocks * AES_BLOCK_SIZE,
399 					   req->iv);
400 
401 		err = cbc_encrypt(&subreq);
402 		if (err)
403 			return err;
404 
405 		if (req->cryptlen == AES_BLOCK_SIZE)
406 			return 0;
407 
408 		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
409 		if (req->dst != req->src)
410 			dst = scatterwalk_ffwd(sg_dst, req->dst,
411 					       subreq.cryptlen);
412 	}
413 
414 	/* handle ciphertext stealing */
415 	skcipher_request_set_crypt(&subreq, src, dst,
416 				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
417 				   req->iv);
418 
419 	err = skcipher_walk_virt(&walk, &subreq, false);
420 	if (err)
421 		return err;
422 
423 	kernel_fpu_begin();
424 	aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
425 			  walk.nbytes, walk.iv);
426 	kernel_fpu_end();
427 
428 	return skcipher_walk_done(&walk, 0);
429 }
430 
431 static int cts_cbc_decrypt(struct skcipher_request *req)
432 {
433 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
434 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
435 	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
436 	struct scatterlist *src = req->src, *dst = req->dst;
437 	struct scatterlist sg_src[2], sg_dst[2];
438 	struct skcipher_request subreq;
439 	struct skcipher_walk walk;
440 	int err;
441 
442 	skcipher_request_set_tfm(&subreq, tfm);
443 	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
444 				      NULL, NULL);
445 
446 	if (req->cryptlen <= AES_BLOCK_SIZE) {
447 		if (req->cryptlen < AES_BLOCK_SIZE)
448 			return -EINVAL;
449 		cbc_blocks = 1;
450 	}
451 
452 	if (cbc_blocks > 0) {
453 		skcipher_request_set_crypt(&subreq, req->src, req->dst,
454 					   cbc_blocks * AES_BLOCK_SIZE,
455 					   req->iv);
456 
457 		err = cbc_decrypt(&subreq);
458 		if (err)
459 			return err;
460 
461 		if (req->cryptlen == AES_BLOCK_SIZE)
462 			return 0;
463 
464 		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
465 		if (req->dst != req->src)
466 			dst = scatterwalk_ffwd(sg_dst, req->dst,
467 					       subreq.cryptlen);
468 	}
469 
470 	/* handle ciphertext stealing */
471 	skcipher_request_set_crypt(&subreq, src, dst,
472 				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
473 				   req->iv);
474 
475 	err = skcipher_walk_virt(&walk, &subreq, false);
476 	if (err)
477 		return err;
478 
479 	kernel_fpu_begin();
480 	aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
481 			  walk.nbytes, walk.iv);
482 	kernel_fpu_end();
483 
484 	return skcipher_walk_done(&walk, 0);
485 }
486 
487 #ifdef CONFIG_X86_64
488 static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
489 			      const u8 *in, unsigned int len, u8 *iv)
490 {
491 	/*
492 	 * based on key length, override with the by8 version
493 	 * of ctr mode encryption/decryption for improved performance
494 	 * aes_set_key_common() ensures that key length is one of
495 	 * {128,192,256}
496 	 */
497 	if (ctx->key_length == AES_KEYSIZE_128)
498 		aes_ctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len);
499 	else if (ctx->key_length == AES_KEYSIZE_192)
500 		aes_ctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len);
501 	else
502 		aes_ctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len);
503 }
504 
505 static int ctr_crypt(struct skcipher_request *req)
506 {
507 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
508 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
509 	u8 keystream[AES_BLOCK_SIZE];
510 	struct skcipher_walk walk;
511 	unsigned int nbytes;
512 	int err;
513 
514 	err = skcipher_walk_virt(&walk, req, false);
515 
516 	while ((nbytes = walk.nbytes) > 0) {
517 		kernel_fpu_begin();
518 		if (nbytes & AES_BLOCK_MASK)
519 			static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr,
520 						       walk.src.virt.addr,
521 						       nbytes & AES_BLOCK_MASK,
522 						       walk.iv);
523 		nbytes &= ~AES_BLOCK_MASK;
524 
525 		if (walk.nbytes == walk.total && nbytes > 0) {
526 			aesni_enc(ctx, keystream, walk.iv);
527 			crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes,
528 				       walk.src.virt.addr + walk.nbytes - nbytes,
529 				       keystream, nbytes);
530 			crypto_inc(walk.iv, AES_BLOCK_SIZE);
531 			nbytes = 0;
532 		}
533 		kernel_fpu_end();
534 		err = skcipher_walk_done(&walk, nbytes);
535 	}
536 	return err;
537 }
538 
539 static void aesni_xctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
540 				   const u8 *in, unsigned int len, u8 *iv,
541 				   unsigned int byte_ctr)
542 {
543 	if (ctx->key_length == AES_KEYSIZE_128)
544 		aes_xctr_enc_128_avx_by8(in, iv, (void *)ctx, out, len,
545 					 byte_ctr);
546 	else if (ctx->key_length == AES_KEYSIZE_192)
547 		aes_xctr_enc_192_avx_by8(in, iv, (void *)ctx, out, len,
548 					 byte_ctr);
549 	else
550 		aes_xctr_enc_256_avx_by8(in, iv, (void *)ctx, out, len,
551 					 byte_ctr);
552 }
553 
554 static int xctr_crypt(struct skcipher_request *req)
555 {
556 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
557 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
558 	u8 keystream[AES_BLOCK_SIZE];
559 	struct skcipher_walk walk;
560 	unsigned int nbytes;
561 	unsigned int byte_ctr = 0;
562 	int err;
563 	__le32 block[AES_BLOCK_SIZE / sizeof(__le32)];
564 
565 	err = skcipher_walk_virt(&walk, req, false);
566 
567 	while ((nbytes = walk.nbytes) > 0) {
568 		kernel_fpu_begin();
569 		if (nbytes & AES_BLOCK_MASK)
570 			aesni_xctr_enc_avx_tfm(ctx, walk.dst.virt.addr,
571 				walk.src.virt.addr, nbytes & AES_BLOCK_MASK,
572 				walk.iv, byte_ctr);
573 		nbytes &= ~AES_BLOCK_MASK;
574 		byte_ctr += walk.nbytes - nbytes;
575 
576 		if (walk.nbytes == walk.total && nbytes > 0) {
577 			memcpy(block, walk.iv, AES_BLOCK_SIZE);
578 			block[0] ^= cpu_to_le32(1 + byte_ctr / AES_BLOCK_SIZE);
579 			aesni_enc(ctx, keystream, (u8 *)block);
580 			crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes -
581 				       nbytes, walk.src.virt.addr + walk.nbytes
582 				       - nbytes, keystream, nbytes);
583 			byte_ctr += nbytes;
584 			nbytes = 0;
585 		}
586 		kernel_fpu_end();
587 		err = skcipher_walk_done(&walk, nbytes);
588 	}
589 	return err;
590 }
591 
592 static int aes_gcm_derive_hash_subkey(const struct crypto_aes_ctx *aes_key,
593 				      u8 hash_subkey[AES_BLOCK_SIZE])
594 {
595 	static const u8 zeroes[AES_BLOCK_SIZE];
596 
597 	aes_encrypt(aes_key, hash_subkey, zeroes);
598 	return 0;
599 }
600 
601 static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
602 				  unsigned int key_len)
603 {
604 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
605 
606 	if (key_len < 4)
607 		return -EINVAL;
608 
609 	/*Account for 4 byte nonce at the end.*/
610 	key_len -= 4;
611 
612 	memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce));
613 
614 	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
615 	       aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded,
616 					  ctx->hash_subkey);
617 }
618 
619 /* This is the Integrity Check Value (aka the authentication tag) length and can
620  * be 8, 12 or 16 bytes long. */
621 static int common_rfc4106_set_authsize(struct crypto_aead *aead,
622 				       unsigned int authsize)
623 {
624 	switch (authsize) {
625 	case 8:
626 	case 12:
627 	case 16:
628 		break;
629 	default:
630 		return -EINVAL;
631 	}
632 
633 	return 0;
634 }
635 
636 static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
637 				       unsigned int authsize)
638 {
639 	switch (authsize) {
640 	case 4:
641 	case 8:
642 	case 12:
643 	case 13:
644 	case 14:
645 	case 15:
646 	case 16:
647 		break;
648 	default:
649 		return -EINVAL;
650 	}
651 
652 	return 0;
653 }
654 
655 static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
656 			      unsigned int assoclen, u8 *hash_subkey,
657 			      u8 *iv, void *aes_ctx, u8 *auth_tag,
658 			      unsigned long auth_tag_len)
659 {
660 	u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
661 	struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
662 	unsigned long left = req->cryptlen;
663 	struct scatter_walk assoc_sg_walk;
664 	struct skcipher_walk walk;
665 	bool do_avx, do_avx2;
666 	u8 *assocmem = NULL;
667 	u8 *assoc;
668 	int err;
669 
670 	if (!enc)
671 		left -= auth_tag_len;
672 
673 	do_avx = (left >= AVX_GEN2_OPTSIZE);
674 	do_avx2 = (left >= AVX_GEN4_OPTSIZE);
675 
676 	/* Linearize assoc, if not already linear */
677 	if (req->src->length >= assoclen && req->src->length) {
678 		scatterwalk_start(&assoc_sg_walk, req->src);
679 		assoc = scatterwalk_map(&assoc_sg_walk);
680 	} else {
681 		gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
682 			      GFP_KERNEL : GFP_ATOMIC;
683 
684 		/* assoc can be any length, so must be on heap */
685 		assocmem = kmalloc(assoclen, flags);
686 		if (unlikely(!assocmem))
687 			return -ENOMEM;
688 		assoc = assocmem;
689 
690 		scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
691 	}
692 
693 	kernel_fpu_begin();
694 	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
695 		aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc,
696 					assoclen);
697 	else if (static_branch_likely(&gcm_use_avx) && do_avx)
698 		aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc,
699 					assoclen);
700 	else
701 		aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
702 	kernel_fpu_end();
703 
704 	if (!assocmem)
705 		scatterwalk_unmap(assoc);
706 	else
707 		kfree(assocmem);
708 
709 	err = enc ? skcipher_walk_aead_encrypt(&walk, req, false)
710 		  : skcipher_walk_aead_decrypt(&walk, req, false);
711 
712 	while (walk.nbytes > 0) {
713 		kernel_fpu_begin();
714 		if (static_branch_likely(&gcm_use_avx2) && do_avx2) {
715 			if (enc)
716 				aesni_gcm_enc_update_avx_gen4(aes_ctx, data,
717 							      walk.dst.virt.addr,
718 							      walk.src.virt.addr,
719 							      walk.nbytes);
720 			else
721 				aesni_gcm_dec_update_avx_gen4(aes_ctx, data,
722 							      walk.dst.virt.addr,
723 							      walk.src.virt.addr,
724 							      walk.nbytes);
725 		} else if (static_branch_likely(&gcm_use_avx) && do_avx) {
726 			if (enc)
727 				aesni_gcm_enc_update_avx_gen2(aes_ctx, data,
728 							      walk.dst.virt.addr,
729 							      walk.src.virt.addr,
730 							      walk.nbytes);
731 			else
732 				aesni_gcm_dec_update_avx_gen2(aes_ctx, data,
733 							      walk.dst.virt.addr,
734 							      walk.src.virt.addr,
735 							      walk.nbytes);
736 		} else if (enc) {
737 			aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr,
738 					     walk.src.virt.addr, walk.nbytes);
739 		} else {
740 			aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr,
741 					     walk.src.virt.addr, walk.nbytes);
742 		}
743 		kernel_fpu_end();
744 
745 		err = skcipher_walk_done(&walk, 0);
746 	}
747 
748 	if (err)
749 		return err;
750 
751 	kernel_fpu_begin();
752 	if (static_branch_likely(&gcm_use_avx2) && do_avx2)
753 		aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag,
754 					    auth_tag_len);
755 	else if (static_branch_likely(&gcm_use_avx) && do_avx)
756 		aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag,
757 					    auth_tag_len);
758 	else
759 		aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len);
760 	kernel_fpu_end();
761 
762 	return 0;
763 }
764 
765 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
766 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
767 {
768 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
769 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
770 	u8 auth_tag[16];
771 	int err;
772 
773 	err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx,
774 				 auth_tag, auth_tag_len);
775 	if (err)
776 		return err;
777 
778 	scatterwalk_map_and_copy(auth_tag, req->dst,
779 				 req->assoclen + req->cryptlen,
780 				 auth_tag_len, 1);
781 	return 0;
782 }
783 
784 static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
785 			  u8 *hash_subkey, u8 *iv, void *aes_ctx)
786 {
787 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
788 	unsigned long auth_tag_len = crypto_aead_authsize(tfm);
789 	u8 auth_tag_msg[16];
790 	u8 auth_tag[16];
791 	int err;
792 
793 	err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx,
794 				 auth_tag, auth_tag_len);
795 	if (err)
796 		return err;
797 
798 	/* Copy out original auth_tag */
799 	scatterwalk_map_and_copy(auth_tag_msg, req->src,
800 				 req->assoclen + req->cryptlen - auth_tag_len,
801 				 auth_tag_len, 0);
802 
803 	/* Compare generated tag with passed in tag. */
804 	if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) {
805 		memzero_explicit(auth_tag, sizeof(auth_tag));
806 		return -EBADMSG;
807 	}
808 	return 0;
809 }
810 
811 static int helper_rfc4106_encrypt(struct aead_request *req)
812 {
813 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
814 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
815 	void *aes_ctx = &(ctx->aes_key_expanded);
816 	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
817 	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
818 	unsigned int i;
819 	__be32 counter = cpu_to_be32(1);
820 
821 	/* Assuming we are supporting rfc4106 64-bit extended */
822 	/* sequence numbers We need to have the AAD length equal */
823 	/* to 16 or 20 bytes */
824 	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
825 		return -EINVAL;
826 
827 	/* IV below built */
828 	for (i = 0; i < 4; i++)
829 		*(iv+i) = ctx->nonce[i];
830 	for (i = 0; i < 8; i++)
831 		*(iv+4+i) = req->iv[i];
832 	*((__be32 *)(iv+12)) = counter;
833 
834 	return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
835 			      aes_ctx);
836 }
837 
838 static int helper_rfc4106_decrypt(struct aead_request *req)
839 {
840 	__be32 counter = cpu_to_be32(1);
841 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
842 	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
843 	void *aes_ctx = &(ctx->aes_key_expanded);
844 	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
845 	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
846 	unsigned int i;
847 
848 	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
849 		return -EINVAL;
850 
851 	/* Assuming we are supporting rfc4106 64-bit extended */
852 	/* sequence numbers We need to have the AAD length */
853 	/* equal to 16 or 20 bytes */
854 
855 	/* IV below built */
856 	for (i = 0; i < 4; i++)
857 		*(iv+i) = ctx->nonce[i];
858 	for (i = 0; i < 8; i++)
859 		*(iv+4+i) = req->iv[i];
860 	*((__be32 *)(iv+12)) = counter;
861 
862 	return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
863 			      aes_ctx);
864 }
865 #endif
866 
867 static int xts_setkey_aesni(struct crypto_skcipher *tfm, const u8 *key,
868 			    unsigned int keylen)
869 {
870 	struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
871 	int err;
872 
873 	err = xts_verify_key(tfm, key, keylen);
874 	if (err)
875 		return err;
876 
877 	keylen /= 2;
878 
879 	/* first half of xts-key is for crypt */
880 	err = aes_set_key_common(&ctx->crypt_ctx, key, keylen);
881 	if (err)
882 		return err;
883 
884 	/* second half of xts-key is for tweak */
885 	return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen);
886 }
887 
888 typedef void (*xts_encrypt_iv_func)(const struct crypto_aes_ctx *tweak_key,
889 				    u8 iv[AES_BLOCK_SIZE]);
890 typedef void (*xts_crypt_func)(const struct crypto_aes_ctx *key,
891 			       const u8 *src, u8 *dst, unsigned int len,
892 			       u8 tweak[AES_BLOCK_SIZE]);
893 
894 /* This handles cases where the source and/or destination span pages. */
895 static noinline int
896 xts_crypt_slowpath(struct skcipher_request *req, xts_crypt_func crypt_func)
897 {
898 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
899 	const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
900 	int tail = req->cryptlen % AES_BLOCK_SIZE;
901 	struct scatterlist sg_src[2], sg_dst[2];
902 	struct skcipher_request subreq;
903 	struct skcipher_walk walk;
904 	struct scatterlist *src, *dst;
905 	int err;
906 
907 	/*
908 	 * If the message length isn't divisible by the AES block size, then
909 	 * separate off the last full block and the partial block.  This ensures
910 	 * that they are processed in the same call to the assembly function,
911 	 * which is required for ciphertext stealing.
912 	 */
913 	if (tail) {
914 		skcipher_request_set_tfm(&subreq, tfm);
915 		skcipher_request_set_callback(&subreq,
916 					      skcipher_request_flags(req),
917 					      NULL, NULL);
918 		skcipher_request_set_crypt(&subreq, req->src, req->dst,
919 					   req->cryptlen - tail - AES_BLOCK_SIZE,
920 					   req->iv);
921 		req = &subreq;
922 	}
923 
924 	err = skcipher_walk_virt(&walk, req, false);
925 
926 	while (walk.nbytes) {
927 		kernel_fpu_begin();
928 		(*crypt_func)(&ctx->crypt_ctx,
929 			      walk.src.virt.addr, walk.dst.virt.addr,
930 			      walk.nbytes & ~(AES_BLOCK_SIZE - 1), req->iv);
931 		kernel_fpu_end();
932 		err = skcipher_walk_done(&walk,
933 					 walk.nbytes & (AES_BLOCK_SIZE - 1));
934 	}
935 
936 	if (err || !tail)
937 		return err;
938 
939 	/* Do ciphertext stealing with the last full block and partial block. */
940 
941 	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
942 	if (req->dst != req->src)
943 		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
944 
945 	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
946 				   req->iv);
947 
948 	err = skcipher_walk_virt(&walk, req, false);
949 	if (err)
950 		return err;
951 
952 	kernel_fpu_begin();
953 	(*crypt_func)(&ctx->crypt_ctx, walk.src.virt.addr, walk.dst.virt.addr,
954 		      walk.nbytes, req->iv);
955 	kernel_fpu_end();
956 
957 	return skcipher_walk_done(&walk, 0);
958 }
959 
960 /* __always_inline to avoid indirect call in fastpath */
961 static __always_inline int
962 xts_crypt(struct skcipher_request *req, xts_encrypt_iv_func encrypt_iv,
963 	  xts_crypt_func crypt_func)
964 {
965 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
966 	const struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm);
967 	const unsigned int cryptlen = req->cryptlen;
968 	struct scatterlist *src = req->src;
969 	struct scatterlist *dst = req->dst;
970 
971 	if (unlikely(cryptlen < AES_BLOCK_SIZE))
972 		return -EINVAL;
973 
974 	kernel_fpu_begin();
975 	(*encrypt_iv)(&ctx->tweak_ctx, req->iv);
976 
977 	/*
978 	 * In practice, virtually all XTS plaintexts and ciphertexts are either
979 	 * 512 or 4096 bytes, aligned such that they don't span page boundaries.
980 	 * To optimize the performance of these cases, and also any other case
981 	 * where no page boundary is spanned, the below fast-path handles
982 	 * single-page sources and destinations as efficiently as possible.
983 	 */
984 	if (likely(src->length >= cryptlen && dst->length >= cryptlen &&
985 		   src->offset + cryptlen <= PAGE_SIZE &&
986 		   dst->offset + cryptlen <= PAGE_SIZE)) {
987 		struct page *src_page = sg_page(src);
988 		struct page *dst_page = sg_page(dst);
989 		void *src_virt = kmap_local_page(src_page) + src->offset;
990 		void *dst_virt = kmap_local_page(dst_page) + dst->offset;
991 
992 		(*crypt_func)(&ctx->crypt_ctx, src_virt, dst_virt, cryptlen,
993 			      req->iv);
994 		kunmap_local(dst_virt);
995 		kunmap_local(src_virt);
996 		kernel_fpu_end();
997 		return 0;
998 	}
999 	kernel_fpu_end();
1000 	return xts_crypt_slowpath(req, crypt_func);
1001 }
1002 
1003 static void aesni_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
1004 				 u8 iv[AES_BLOCK_SIZE])
1005 {
1006 	aesni_enc(tweak_key, iv, iv);
1007 }
1008 
1009 static void aesni_xts_encrypt(const struct crypto_aes_ctx *key,
1010 			      const u8 *src, u8 *dst, unsigned int len,
1011 			      u8 tweak[AES_BLOCK_SIZE])
1012 {
1013 	aesni_xts_enc(key, dst, src, len, tweak);
1014 }
1015 
1016 static void aesni_xts_decrypt(const struct crypto_aes_ctx *key,
1017 			      const u8 *src, u8 *dst, unsigned int len,
1018 			      u8 tweak[AES_BLOCK_SIZE])
1019 {
1020 	aesni_xts_dec(key, dst, src, len, tweak);
1021 }
1022 
1023 static int xts_encrypt_aesni(struct skcipher_request *req)
1024 {
1025 	return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_encrypt);
1026 }
1027 
1028 static int xts_decrypt_aesni(struct skcipher_request *req)
1029 {
1030 	return xts_crypt(req, aesni_xts_encrypt_iv, aesni_xts_decrypt);
1031 }
1032 
1033 static struct crypto_alg aesni_cipher_alg = {
1034 	.cra_name		= "aes",
1035 	.cra_driver_name	= "aes-aesni",
1036 	.cra_priority		= 300,
1037 	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
1038 	.cra_blocksize		= AES_BLOCK_SIZE,
1039 	.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1040 	.cra_module		= THIS_MODULE,
1041 	.cra_u	= {
1042 		.cipher	= {
1043 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
1044 			.cia_max_keysize	= AES_MAX_KEY_SIZE,
1045 			.cia_setkey		= aes_set_key,
1046 			.cia_encrypt		= aesni_encrypt,
1047 			.cia_decrypt		= aesni_decrypt
1048 		}
1049 	}
1050 };
1051 
1052 static struct skcipher_alg aesni_skciphers[] = {
1053 	{
1054 		.base = {
1055 			.cra_name		= "__ecb(aes)",
1056 			.cra_driver_name	= "__ecb-aes-aesni",
1057 			.cra_priority		= 400,
1058 			.cra_flags		= CRYPTO_ALG_INTERNAL,
1059 			.cra_blocksize		= AES_BLOCK_SIZE,
1060 			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1061 			.cra_module		= THIS_MODULE,
1062 		},
1063 		.min_keysize	= AES_MIN_KEY_SIZE,
1064 		.max_keysize	= AES_MAX_KEY_SIZE,
1065 		.setkey		= aesni_skcipher_setkey,
1066 		.encrypt	= ecb_encrypt,
1067 		.decrypt	= ecb_decrypt,
1068 	}, {
1069 		.base = {
1070 			.cra_name		= "__cbc(aes)",
1071 			.cra_driver_name	= "__cbc-aes-aesni",
1072 			.cra_priority		= 400,
1073 			.cra_flags		= CRYPTO_ALG_INTERNAL,
1074 			.cra_blocksize		= AES_BLOCK_SIZE,
1075 			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1076 			.cra_module		= THIS_MODULE,
1077 		},
1078 		.min_keysize	= AES_MIN_KEY_SIZE,
1079 		.max_keysize	= AES_MAX_KEY_SIZE,
1080 		.ivsize		= AES_BLOCK_SIZE,
1081 		.setkey		= aesni_skcipher_setkey,
1082 		.encrypt	= cbc_encrypt,
1083 		.decrypt	= cbc_decrypt,
1084 	}, {
1085 		.base = {
1086 			.cra_name		= "__cts(cbc(aes))",
1087 			.cra_driver_name	= "__cts-cbc-aes-aesni",
1088 			.cra_priority		= 400,
1089 			.cra_flags		= CRYPTO_ALG_INTERNAL,
1090 			.cra_blocksize		= AES_BLOCK_SIZE,
1091 			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1092 			.cra_module		= THIS_MODULE,
1093 		},
1094 		.min_keysize	= AES_MIN_KEY_SIZE,
1095 		.max_keysize	= AES_MAX_KEY_SIZE,
1096 		.ivsize		= AES_BLOCK_SIZE,
1097 		.walksize	= 2 * AES_BLOCK_SIZE,
1098 		.setkey		= aesni_skcipher_setkey,
1099 		.encrypt	= cts_cbc_encrypt,
1100 		.decrypt	= cts_cbc_decrypt,
1101 #ifdef CONFIG_X86_64
1102 	}, {
1103 		.base = {
1104 			.cra_name		= "__ctr(aes)",
1105 			.cra_driver_name	= "__ctr-aes-aesni",
1106 			.cra_priority		= 400,
1107 			.cra_flags		= CRYPTO_ALG_INTERNAL,
1108 			.cra_blocksize		= 1,
1109 			.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1110 			.cra_module		= THIS_MODULE,
1111 		},
1112 		.min_keysize	= AES_MIN_KEY_SIZE,
1113 		.max_keysize	= AES_MAX_KEY_SIZE,
1114 		.ivsize		= AES_BLOCK_SIZE,
1115 		.chunksize	= AES_BLOCK_SIZE,
1116 		.setkey		= aesni_skcipher_setkey,
1117 		.encrypt	= ctr_crypt,
1118 		.decrypt	= ctr_crypt,
1119 #endif
1120 	}, {
1121 		.base = {
1122 			.cra_name		= "__xts(aes)",
1123 			.cra_driver_name	= "__xts-aes-aesni",
1124 			.cra_priority		= 401,
1125 			.cra_flags		= CRYPTO_ALG_INTERNAL,
1126 			.cra_blocksize		= AES_BLOCK_SIZE,
1127 			.cra_ctxsize		= XTS_AES_CTX_SIZE,
1128 			.cra_module		= THIS_MODULE,
1129 		},
1130 		.min_keysize	= 2 * AES_MIN_KEY_SIZE,
1131 		.max_keysize	= 2 * AES_MAX_KEY_SIZE,
1132 		.ivsize		= AES_BLOCK_SIZE,
1133 		.walksize	= 2 * AES_BLOCK_SIZE,
1134 		.setkey		= xts_setkey_aesni,
1135 		.encrypt	= xts_encrypt_aesni,
1136 		.decrypt	= xts_decrypt_aesni,
1137 	}
1138 };
1139 
1140 static
1141 struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
1142 
1143 #ifdef CONFIG_X86_64
1144 /*
1145  * XCTR does not have a non-AVX implementation, so it must be enabled
1146  * conditionally.
1147  */
1148 static struct skcipher_alg aesni_xctr = {
1149 	.base = {
1150 		.cra_name		= "__xctr(aes)",
1151 		.cra_driver_name	= "__xctr-aes-aesni",
1152 		.cra_priority		= 400,
1153 		.cra_flags		= CRYPTO_ALG_INTERNAL,
1154 		.cra_blocksize		= 1,
1155 		.cra_ctxsize		= CRYPTO_AES_CTX_SIZE,
1156 		.cra_module		= THIS_MODULE,
1157 	},
1158 	.min_keysize	= AES_MIN_KEY_SIZE,
1159 	.max_keysize	= AES_MAX_KEY_SIZE,
1160 	.ivsize		= AES_BLOCK_SIZE,
1161 	.chunksize	= AES_BLOCK_SIZE,
1162 	.setkey		= aesni_skcipher_setkey,
1163 	.encrypt	= xctr_crypt,
1164 	.decrypt	= xctr_crypt,
1165 };
1166 
1167 static struct simd_skcipher_alg *aesni_simd_xctr;
1168 
1169 asmlinkage void aes_xts_encrypt_iv(const struct crypto_aes_ctx *tweak_key,
1170 				   u8 iv[AES_BLOCK_SIZE]);
1171 
1172 #define DEFINE_XTS_ALG(suffix, driver_name, priority)			       \
1173 									       \
1174 asmlinkage void								       \
1175 aes_xts_encrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src,      \
1176 			 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \
1177 asmlinkage void								       \
1178 aes_xts_decrypt_##suffix(const struct crypto_aes_ctx *key, const u8 *src,      \
1179 			 u8 *dst, unsigned int len, u8 tweak[AES_BLOCK_SIZE]); \
1180 									       \
1181 static int xts_encrypt_##suffix(struct skcipher_request *req)		       \
1182 {									       \
1183 	return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_encrypt_##suffix);   \
1184 }									       \
1185 									       \
1186 static int xts_decrypt_##suffix(struct skcipher_request *req)		       \
1187 {									       \
1188 	return xts_crypt(req, aes_xts_encrypt_iv, aes_xts_decrypt_##suffix);   \
1189 }									       \
1190 									       \
1191 static struct skcipher_alg aes_xts_alg_##suffix = {			       \
1192 	.base = {							       \
1193 		.cra_name		= "__xts(aes)",			       \
1194 		.cra_driver_name	= "__" driver_name,		       \
1195 		.cra_priority		= priority,			       \
1196 		.cra_flags		= CRYPTO_ALG_INTERNAL,		       \
1197 		.cra_blocksize		= AES_BLOCK_SIZE,		       \
1198 		.cra_ctxsize		= XTS_AES_CTX_SIZE,		       \
1199 		.cra_module		= THIS_MODULE,			       \
1200 	},								       \
1201 	.min_keysize	= 2 * AES_MIN_KEY_SIZE,				       \
1202 	.max_keysize	= 2 * AES_MAX_KEY_SIZE,				       \
1203 	.ivsize		= AES_BLOCK_SIZE,				       \
1204 	.walksize	= 2 * AES_BLOCK_SIZE,				       \
1205 	.setkey		= xts_setkey_aesni,				       \
1206 	.encrypt	= xts_encrypt_##suffix,				       \
1207 	.decrypt	= xts_decrypt_##suffix,				       \
1208 };									       \
1209 									       \
1210 static struct simd_skcipher_alg *aes_xts_simdalg_##suffix
1211 
1212 DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500);
1213 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1214 DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600);
1215 DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
1216 DEFINE_XTS_ALG(vaes_avx10_512, "xts-aes-vaes-avx10_512", 800);
1217 #endif
1218 
1219 /*
1220  * This is a list of CPU models that are known to suffer from downclocking when
1221  * zmm registers (512-bit vectors) are used.  On these CPUs, the AES-XTS
1222  * implementation with zmm registers won't be used by default.  An
1223  * implementation with ymm registers (256-bit vectors) will be used instead.
1224  */
1225 static const struct x86_cpu_id zmm_exclusion_list[] = {
1226 	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
1227 	X86_MATCH_VFM(INTEL_ICELAKE_X,		0),
1228 	X86_MATCH_VFM(INTEL_ICELAKE_D,		0),
1229 	X86_MATCH_VFM(INTEL_ICELAKE,		0),
1230 	X86_MATCH_VFM(INTEL_ICELAKE_L,		0),
1231 	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,	0),
1232 	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	0),
1233 	X86_MATCH_VFM(INTEL_TIGERLAKE,		0),
1234 	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
1235 	/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
1236 	{},
1237 };
1238 
1239 static int __init register_xts_algs(void)
1240 {
1241 	int err;
1242 
1243 	if (!boot_cpu_has(X86_FEATURE_AVX))
1244 		return 0;
1245 	err = simd_register_skciphers_compat(&aes_xts_alg_aesni_avx, 1,
1246 					     &aes_xts_simdalg_aesni_avx);
1247 	if (err)
1248 		return err;
1249 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1250 	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
1251 	    !boot_cpu_has(X86_FEATURE_VAES) ||
1252 	    !boot_cpu_has(X86_FEATURE_VPCLMULQDQ) ||
1253 	    !boot_cpu_has(X86_FEATURE_PCLMULQDQ) ||
1254 	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
1255 		return 0;
1256 	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1,
1257 					     &aes_xts_simdalg_vaes_avx2);
1258 	if (err)
1259 		return err;
1260 
1261 	if (!boot_cpu_has(X86_FEATURE_AVX512BW) ||
1262 	    !boot_cpu_has(X86_FEATURE_AVX512VL) ||
1263 	    !boot_cpu_has(X86_FEATURE_BMI2) ||
1264 	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
1265 			       XFEATURE_MASK_AVX512, NULL))
1266 		return 0;
1267 
1268 	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1,
1269 					     &aes_xts_simdalg_vaes_avx10_256);
1270 	if (err)
1271 		return err;
1272 
1273 	if (x86_match_cpu(zmm_exclusion_list))
1274 		aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
1275 
1276 	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_512, 1,
1277 					     &aes_xts_simdalg_vaes_avx10_512);
1278 	if (err)
1279 		return err;
1280 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
1281 	return 0;
1282 }
1283 
1284 static void unregister_xts_algs(void)
1285 {
1286 	if (aes_xts_simdalg_aesni_avx)
1287 		simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1,
1288 					  &aes_xts_simdalg_aesni_avx);
1289 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
1290 	if (aes_xts_simdalg_vaes_avx2)
1291 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1,
1292 					  &aes_xts_simdalg_vaes_avx2);
1293 	if (aes_xts_simdalg_vaes_avx10_256)
1294 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
1295 					  &aes_xts_simdalg_vaes_avx10_256);
1296 	if (aes_xts_simdalg_vaes_avx10_512)
1297 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_512, 1,
1298 					  &aes_xts_simdalg_vaes_avx10_512);
1299 #endif
1300 }
1301 #else /* CONFIG_X86_64 */
1302 static int __init register_xts_algs(void)
1303 {
1304 	return 0;
1305 }
1306 
1307 static void unregister_xts_algs(void)
1308 {
1309 }
1310 #endif /* !CONFIG_X86_64 */
1311 
1312 #ifdef CONFIG_X86_64
1313 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
1314 				  unsigned int key_len)
1315 {
1316 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead);
1317 
1318 	return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?:
1319 	       aes_gcm_derive_hash_subkey(&ctx->aes_key_expanded,
1320 					  ctx->hash_subkey);
1321 }
1322 
1323 static int generic_gcmaes_encrypt(struct aead_request *req)
1324 {
1325 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1326 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
1327 	void *aes_ctx = &(ctx->aes_key_expanded);
1328 	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
1329 	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
1330 	__be32 counter = cpu_to_be32(1);
1331 
1332 	memcpy(iv, req->iv, 12);
1333 	*((__be32 *)(iv+12)) = counter;
1334 
1335 	return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv,
1336 			      aes_ctx);
1337 }
1338 
1339 static int generic_gcmaes_decrypt(struct aead_request *req)
1340 {
1341 	__be32 counter = cpu_to_be32(1);
1342 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
1343 	struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
1344 	void *aes_ctx = &(ctx->aes_key_expanded);
1345 	u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
1346 	u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
1347 
1348 	memcpy(iv, req->iv, 12);
1349 	*((__be32 *)(iv+12)) = counter;
1350 
1351 	return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv,
1352 			      aes_ctx);
1353 }
1354 
1355 static struct aead_alg aesni_aeads[] = { {
1356 	.setkey			= common_rfc4106_set_key,
1357 	.setauthsize		= common_rfc4106_set_authsize,
1358 	.encrypt		= helper_rfc4106_encrypt,
1359 	.decrypt		= helper_rfc4106_decrypt,
1360 	.ivsize			= GCM_RFC4106_IV_SIZE,
1361 	.maxauthsize		= 16,
1362 	.base = {
1363 		.cra_name		= "__rfc4106(gcm(aes))",
1364 		.cra_driver_name	= "__rfc4106-gcm-aesni",
1365 		.cra_priority		= 400,
1366 		.cra_flags		= CRYPTO_ALG_INTERNAL,
1367 		.cra_blocksize		= 1,
1368 		.cra_ctxsize		= sizeof(struct aesni_rfc4106_gcm_ctx),
1369 		.cra_alignmask		= 0,
1370 		.cra_module		= THIS_MODULE,
1371 	},
1372 }, {
1373 	.setkey			= generic_gcmaes_set_key,
1374 	.setauthsize		= generic_gcmaes_set_authsize,
1375 	.encrypt		= generic_gcmaes_encrypt,
1376 	.decrypt		= generic_gcmaes_decrypt,
1377 	.ivsize			= GCM_AES_IV_SIZE,
1378 	.maxauthsize		= 16,
1379 	.base = {
1380 		.cra_name		= "__gcm(aes)",
1381 		.cra_driver_name	= "__generic-gcm-aesni",
1382 		.cra_priority		= 400,
1383 		.cra_flags		= CRYPTO_ALG_INTERNAL,
1384 		.cra_blocksize		= 1,
1385 		.cra_ctxsize		= sizeof(struct generic_gcmaes_ctx),
1386 		.cra_alignmask		= 0,
1387 		.cra_module		= THIS_MODULE,
1388 	},
1389 } };
1390 #else
1391 static struct aead_alg aesni_aeads[0];
1392 #endif
1393 
1394 static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
1395 
1396 static const struct x86_cpu_id aesni_cpu_id[] = {
1397 	X86_MATCH_FEATURE(X86_FEATURE_AES, NULL),
1398 	{}
1399 };
1400 MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
1401 
1402 static int __init aesni_init(void)
1403 {
1404 	int err;
1405 
1406 	if (!x86_match_cpu(aesni_cpu_id))
1407 		return -ENODEV;
1408 #ifdef CONFIG_X86_64
1409 	if (boot_cpu_has(X86_FEATURE_AVX2)) {
1410 		pr_info("AVX2 version of gcm_enc/dec engaged.\n");
1411 		static_branch_enable(&gcm_use_avx);
1412 		static_branch_enable(&gcm_use_avx2);
1413 	} else
1414 	if (boot_cpu_has(X86_FEATURE_AVX)) {
1415 		pr_info("AVX version of gcm_enc/dec engaged.\n");
1416 		static_branch_enable(&gcm_use_avx);
1417 	} else {
1418 		pr_info("SSE version of gcm_enc/dec engaged.\n");
1419 	}
1420 	if (boot_cpu_has(X86_FEATURE_AVX)) {
1421 		/* optimize performance of ctr mode encryption transform */
1422 		static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
1423 		pr_info("AES CTR mode by8 optimization enabled\n");
1424 	}
1425 #endif /* CONFIG_X86_64 */
1426 
1427 	err = crypto_register_alg(&aesni_cipher_alg);
1428 	if (err)
1429 		return err;
1430 
1431 	err = simd_register_skciphers_compat(aesni_skciphers,
1432 					     ARRAY_SIZE(aesni_skciphers),
1433 					     aesni_simd_skciphers);
1434 	if (err)
1435 		goto unregister_cipher;
1436 
1437 	err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
1438 					 aesni_simd_aeads);
1439 	if (err)
1440 		goto unregister_skciphers;
1441 
1442 #ifdef CONFIG_X86_64
1443 	if (boot_cpu_has(X86_FEATURE_AVX))
1444 		err = simd_register_skciphers_compat(&aesni_xctr, 1,
1445 						     &aesni_simd_xctr);
1446 	if (err)
1447 		goto unregister_aeads;
1448 #endif /* CONFIG_X86_64 */
1449 
1450 	err = register_xts_algs();
1451 	if (err)
1452 		goto unregister_xts;
1453 
1454 	return 0;
1455 
1456 unregister_xts:
1457 	unregister_xts_algs();
1458 #ifdef CONFIG_X86_64
1459 	if (aesni_simd_xctr)
1460 		simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
1461 unregister_aeads:
1462 #endif /* CONFIG_X86_64 */
1463 	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
1464 				aesni_simd_aeads);
1465 
1466 unregister_skciphers:
1467 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
1468 				  aesni_simd_skciphers);
1469 unregister_cipher:
1470 	crypto_unregister_alg(&aesni_cipher_alg);
1471 	return err;
1472 }
1473 
1474 static void __exit aesni_exit(void)
1475 {
1476 	simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
1477 			      aesni_simd_aeads);
1478 	simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
1479 				  aesni_simd_skciphers);
1480 	crypto_unregister_alg(&aesni_cipher_alg);
1481 #ifdef CONFIG_X86_64
1482 	if (boot_cpu_has(X86_FEATURE_AVX))
1483 		simd_unregister_skciphers(&aesni_xctr, 1, &aesni_simd_xctr);
1484 #endif /* CONFIG_X86_64 */
1485 	unregister_xts_algs();
1486 }
1487 
1488 late_initcall(aesni_init);
1489 module_exit(aesni_exit);
1490 
1491 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
1492 MODULE_LICENSE("GPL");
1493 MODULE_ALIAS_CRYPTO("aes");
1494