xref: /linux/arch/x86/crypto/aes-ctr-avx-x86_64.S (revision 7a012a692e7cfbca245d195a80f23634d3d74fcc)
1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2//
3// Copyright 2025 Google LLC
4//
5// Author: Eric Biggers <ebiggers@google.com>
6//
7// This file is dual-licensed, meaning that you can use it under your choice of
8// either of the following two licenses:
9//
10// Licensed under the Apache License 2.0 (the "License").  You may obtain a copy
11// of the License at
12//
13//	http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20//
21// or
22//
23// Redistribution and use in source and binary forms, with or without
24// modification, are permitted provided that the following conditions are met:
25//
26// 1. Redistributions of source code must retain the above copyright notice,
27//    this list of conditions and the following disclaimer.
28//
29// 2. Redistributions in binary form must reproduce the above copyright
30//    notice, this list of conditions and the following disclaimer in the
31//    documentation and/or other materials provided with the distribution.
32//
33// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
37// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43// POSSIBILITY OF SUCH DAMAGE.
44//
45//------------------------------------------------------------------------------
46//
47// This file contains x86_64 assembly implementations of AES-CTR and AES-XCTR
48// using the following sets of CPU features:
49//	- AES-NI && AVX
50//	- VAES && AVX2
51//	- VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2
52//	- VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2
53//
54// See the function definitions at the bottom of the file for more information.
55
56#include <linux/linkage.h>
57#include <linux/cfi_types.h>
58
59.section .rodata
60.p2align 4
61
62.Lbswap_mask:
63	.octa	0x000102030405060708090a0b0c0d0e0f
64
65.Lctr_pattern:
66	.quad	0, 0
67.Lone:
68	.quad	1, 0
69.Ltwo:
70	.quad	2, 0
71	.quad	3, 0
72
73.Lfour:
74	.quad	4, 0
75
76.text
77
78// Move a vector between memory and a register.
79// The register operand must be in the first 16 vector registers.
80.macro	_vmovdqu	src, dst
81.if VL < 64
82	vmovdqu		\src, \dst
83.else
84	vmovdqu8	\src, \dst
85.endif
86.endm
87
88// Move a vector between registers.
89// The registers must be in the first 16 vector registers.
90.macro	_vmovdqa	src, dst
91.if VL < 64
92	vmovdqa		\src, \dst
93.else
94	vmovdqa64	\src, \dst
95.endif
96.endm
97
98// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector
99// register.  The register operand must be in the first 16 vector registers.
100.macro	_vbroadcast128	src, dst
101.if VL == 16
102	vmovdqu		\src, \dst
103.elseif VL == 32
104	vbroadcasti128	\src, \dst
105.else
106	vbroadcasti32x4	\src, \dst
107.endif
108.endm
109
110// XOR two vectors together.
111// Any register operands must be in the first 16 vector registers.
112.macro	_vpxor	src1, src2, dst
113.if VL < 64
114	vpxor		\src1, \src2, \dst
115.else
116	vpxord		\src1, \src2, \dst
117.endif
118.endm
119
120// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst
121// and zeroize any remaining bytes.  Clobbers %rax, %rcx, and \tmp{64,32}.
122.macro	_load_partial_block	src, dst, tmp64, tmp32
123	sub		$8, %ecx		// LEN - 8
124	jle		.Lle8\@
125
126	// Load 9 <= LEN <= 15 bytes.
127	vmovq		(\src), \dst		// Load first 8 bytes
128	mov		(\src, %rcx), %rax	// Load last 8 bytes
129	neg		%ecx
130	shl		$3, %ecx
131	shr		%cl, %rax		// Discard overlapping bytes
132	vpinsrq		$1, %rax, \dst, \dst
133	jmp		.Ldone\@
134
135.Lle8\@:
136	add		$4, %ecx		// LEN - 4
137	jl		.Llt4\@
138
139	// Load 4 <= LEN <= 8 bytes.
140	mov		(\src), %eax		// Load first 4 bytes
141	mov		(\src, %rcx), \tmp32	// Load last 4 bytes
142	jmp		.Lcombine\@
143
144.Llt4\@:
145	// Load 1 <= LEN <= 3 bytes.
146	add		$2, %ecx		// LEN - 2
147	movzbl		(\src), %eax		// Load first byte
148	jl		.Lmovq\@
149	movzwl		(\src, %rcx), \tmp32	// Load last 2 bytes
150.Lcombine\@:
151	shl		$3, %ecx
152	shl		%cl, \tmp64
153	or		\tmp64, %rax		// Combine the two parts
154.Lmovq\@:
155	vmovq		%rax, \dst
156.Ldone\@:
157.endm
158
159// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst.
160// Clobbers %rax, %rcx, and \tmp{64,32}.
161.macro	_store_partial_block	src, dst, tmp64, tmp32
162	sub		$8, %ecx		// LEN - 8
163	jl		.Llt8\@
164
165	// Store 8 <= LEN <= 15 bytes.
166	vpextrq		$1, \src, %rax
167	mov		%ecx, \tmp32
168	shl		$3, %ecx
169	ror		%cl, %rax
170	mov		%rax, (\dst, \tmp64)	// Store last LEN - 8 bytes
171	vmovq		\src, (\dst)		// Store first 8 bytes
172	jmp		.Ldone\@
173
174.Llt8\@:
175	add		$4, %ecx		// LEN - 4
176	jl		.Llt4\@
177
178	// Store 4 <= LEN <= 7 bytes.
179	vpextrd		$1, \src, %eax
180	mov		%ecx, \tmp32
181	shl		$3, %ecx
182	ror		%cl, %eax
183	mov		%eax, (\dst, \tmp64)	// Store last LEN - 4 bytes
184	vmovd		\src, (\dst)		// Store first 4 bytes
185	jmp		.Ldone\@
186
187.Llt4\@:
188	// Store 1 <= LEN <= 3 bytes.
189	vpextrb		$0, \src, 0(\dst)
190	cmp		$-2, %ecx		// LEN - 4 == -2, i.e. LEN == 2?
191	jl		.Ldone\@
192	vpextrb		$1, \src, 1(\dst)
193	je		.Ldone\@
194	vpextrb		$2, \src, 2(\dst)
195.Ldone\@:
196.endm
197
198// Prepare the next two vectors of AES inputs in AESDATA\i0 and AESDATA\i1, and
199// XOR each with the zero-th round key.  Also update LE_CTR if !\final.
200.macro	_prepare_2_ctr_vecs	is_xctr, i0, i1, final=0
201.if \is_xctr
202  .if USE_AVX10
203	_vmovdqa	LE_CTR, AESDATA\i0
204	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i0
205  .else
206	vpxor		XCTR_IV, LE_CTR, AESDATA\i0
207	vpxor		RNDKEY0, AESDATA\i0, AESDATA\i0
208  .endif
209	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1
210
211  .if USE_AVX10
212	vpternlogd	$0x96, XCTR_IV, RNDKEY0, AESDATA\i1
213  .else
214	vpxor		XCTR_IV, AESDATA\i1, AESDATA\i1
215	vpxor		RNDKEY0, AESDATA\i1, AESDATA\i1
216  .endif
217.else
218	vpshufb		BSWAP_MASK, LE_CTR, AESDATA\i0
219	_vpxor		RNDKEY0, AESDATA\i0, AESDATA\i0
220	vpaddq		LE_CTR_INC1, LE_CTR, AESDATA\i1
221	vpshufb		BSWAP_MASK, AESDATA\i1, AESDATA\i1
222	_vpxor		RNDKEY0, AESDATA\i1, AESDATA\i1
223.endif
224.if !\final
225	vpaddq		LE_CTR_INC2, LE_CTR, LE_CTR
226.endif
227.endm
228
229// Do all AES rounds on the data in the given AESDATA vectors, excluding the
230// zero-th and last rounds.
231.macro	_aesenc_loop	vecs:vararg
232	mov		KEY, %rax
2331:
234	_vbroadcast128	(%rax), RNDKEY
235.irp i, \vecs
236	vaesenc		RNDKEY, AESDATA\i, AESDATA\i
237.endr
238	add		$16, %rax
239	cmp		%rax, RNDKEYLAST_PTR
240	jne		1b
241.endm
242
243// Finalize the keystream blocks in the given AESDATA vectors by doing the last
244// AES round, then XOR those keystream blocks with the corresponding data.
245// Reduce latency by doing the XOR before the vaesenclast, utilizing the
246// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a).
247.macro	_aesenclast_and_xor	vecs:vararg
248.irp i, \vecs
249	_vpxor		\i*VL(SRC), RNDKEYLAST, RNDKEY
250	vaesenclast	RNDKEY, AESDATA\i, AESDATA\i
251.endr
252.irp i, \vecs
253	_vmovdqu	AESDATA\i, \i*VL(DST)
254.endr
255.endm
256
257// XOR the keystream blocks in the specified AESDATA vectors with the
258// corresponding data.
259.macro	_xor_data	vecs:vararg
260.irp i, \vecs
261	_vpxor		\i*VL(SRC), AESDATA\i, AESDATA\i
262.endr
263.irp i, \vecs
264	_vmovdqu	AESDATA\i, \i*VL(DST)
265.endr
266.endm
267
268.macro	_aes_ctr_crypt		is_xctr
269
270	// Define register aliases V0-V15 that map to the xmm, ymm, or zmm
271	// registers according to the selected Vector Length (VL).
272.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
273  .if VL == 16
274	.set	V\i,		%xmm\i
275  .elseif VL == 32
276	.set	V\i,		%ymm\i
277  .elseif VL == 64
278	.set	V\i,		%zmm\i
279  .else
280	.error "Unsupported Vector Length (VL)"
281  .endif
282.endr
283
284	// Function arguments
285	.set	KEY,		%rdi	// Initially points to the start of the
286					// crypto_aes_ctx, then is advanced to
287					// point to the index 1 round key
288	.set	KEY32,		%edi	// Available as temp register after all
289					// keystream blocks have been generated
290	.set	SRC,		%rsi	// Pointer to next source data
291	.set	DST,		%rdx	// Pointer to next destination data
292	.set	LEN,		%ecx	// Remaining length in bytes.
293					// Note: _load_partial_block relies on
294					// this being in %ecx.
295	.set	LEN64,		%rcx	// Zero-extend LEN before using!
296	.set	LEN8,		%cl
297.if \is_xctr
298	.set	XCTR_IV_PTR,	%r8	// const u8 iv[AES_BLOCK_SIZE];
299	.set	XCTR_CTR,	%r9	// u64 ctr;
300.else
301	.set	LE_CTR_PTR,	%r8	// const u64 le_ctr[2];
302.endif
303
304	// Additional local variables
305	.set	RNDKEYLAST_PTR,	%r10
306	.set	AESDATA0,	V0
307	.set	AESDATA0_XMM,	%xmm0
308	.set	AESDATA1,	V1
309	.set	AESDATA1_XMM,	%xmm1
310	.set	AESDATA2,	V2
311	.set	AESDATA3,	V3
312	.set	AESDATA4,	V4
313	.set	AESDATA5,	V5
314	.set	AESDATA6,	V6
315	.set	AESDATA7,	V7
316.if \is_xctr
317	.set	XCTR_IV,	V8
318.else
319	.set	BSWAP_MASK,	V8
320.endif
321	.set	LE_CTR,		V9
322	.set	LE_CTR_XMM,	%xmm9
323	.set	LE_CTR_INC1,	V10
324	.set	LE_CTR_INC2,	V11
325	.set	RNDKEY0,	V12
326	.set	RNDKEYLAST,	V13
327	.set	RNDKEY,		V14
328
329	// Create the first vector of counters.
330.if \is_xctr
331  .if VL == 16
332	vmovq		XCTR_CTR, LE_CTR
333  .elseif VL == 32
334	vmovq		XCTR_CTR, LE_CTR_XMM
335	inc		XCTR_CTR
336	vmovq		XCTR_CTR, AESDATA0_XMM
337	vinserti128	$1, AESDATA0_XMM, LE_CTR, LE_CTR
338  .else
339	vpbroadcastq	XCTR_CTR, LE_CTR
340	vpsrldq		$8, LE_CTR, LE_CTR
341	vpaddq		.Lctr_pattern(%rip), LE_CTR, LE_CTR
342  .endif
343	_vbroadcast128	(XCTR_IV_PTR), XCTR_IV
344.else
345	_vbroadcast128	(LE_CTR_PTR), LE_CTR
346  .if VL > 16
347	vpaddq		.Lctr_pattern(%rip), LE_CTR, LE_CTR
348  .endif
349	_vbroadcast128	.Lbswap_mask(%rip), BSWAP_MASK
350.endif
351
352.if VL == 16
353	_vbroadcast128	.Lone(%rip), LE_CTR_INC1
354.elseif VL == 32
355	_vbroadcast128	.Ltwo(%rip), LE_CTR_INC1
356.else
357	_vbroadcast128	.Lfour(%rip), LE_CTR_INC1
358.endif
359	vpsllq		$1, LE_CTR_INC1, LE_CTR_INC2
360
361	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
362	movl		480(KEY), %eax
363
364	// Compute the pointer to the last round key.
365	lea		6*16(KEY, %rax, 4), RNDKEYLAST_PTR
366
367	// Load the zero-th and last round keys.
368	_vbroadcast128	(KEY), RNDKEY0
369	_vbroadcast128	(RNDKEYLAST_PTR), RNDKEYLAST
370
371	// Make KEY point to the first round key.
372	add		$16, KEY
373
374	// This is the main loop, which encrypts 8 vectors of data at a time.
375	add		$-8*VL, LEN
376	jl		.Lloop_8x_done\@
377.Lloop_8x\@:
378	_prepare_2_ctr_vecs	\is_xctr, 0, 1
379	_prepare_2_ctr_vecs	\is_xctr, 2, 3
380	_prepare_2_ctr_vecs	\is_xctr, 4, 5
381	_prepare_2_ctr_vecs	\is_xctr, 6, 7
382	_aesenc_loop	0,1,2,3,4,5,6,7
383	_aesenclast_and_xor 0,1,2,3,4,5,6,7
384	sub		$-8*VL, SRC
385	sub		$-8*VL, DST
386	add		$-8*VL, LEN
387	jge		.Lloop_8x\@
388.Lloop_8x_done\@:
389	sub		$-8*VL, LEN
390	jz		.Ldone\@
391
392	// 1 <= LEN < 8*VL.  Generate 2, 4, or 8 more vectors of keystream
393	// blocks, depending on the remaining LEN.
394
395	_prepare_2_ctr_vecs	\is_xctr, 0, 1
396	_prepare_2_ctr_vecs	\is_xctr, 2, 3
397	cmp		$4*VL, LEN
398	jle		.Lenc_tail_atmost4vecs\@
399
400	// 4*VL < LEN < 8*VL.  Generate 8 vectors of keystream blocks.  Use the
401	// first 4 to XOR 4 full vectors of data.  Then XOR the remaining data.
402	_prepare_2_ctr_vecs	\is_xctr, 4, 5
403	_prepare_2_ctr_vecs	\is_xctr, 6, 7, final=1
404	_aesenc_loop	0,1,2,3,4,5,6,7
405	_aesenclast_and_xor 0,1,2,3
406	vaesenclast	RNDKEYLAST, AESDATA4, AESDATA0
407	vaesenclast	RNDKEYLAST, AESDATA5, AESDATA1
408	vaesenclast	RNDKEYLAST, AESDATA6, AESDATA2
409	vaesenclast	RNDKEYLAST, AESDATA7, AESDATA3
410	sub		$-4*VL, SRC
411	sub		$-4*VL, DST
412	add		$-4*VL, LEN
413	cmp		$1*VL-1, LEN
414	jle		.Lxor_tail_partial_vec_0\@
415	_xor_data	0
416	cmp		$2*VL-1, LEN
417	jle		.Lxor_tail_partial_vec_1\@
418	_xor_data	1
419	cmp		$3*VL-1, LEN
420	jle		.Lxor_tail_partial_vec_2\@
421	_xor_data	2
422	cmp		$4*VL-1, LEN
423	jle		.Lxor_tail_partial_vec_3\@
424	_xor_data	3
425	jmp		.Ldone\@
426
427.Lenc_tail_atmost4vecs\@:
428	cmp		$2*VL, LEN
429	jle		.Lenc_tail_atmost2vecs\@
430
431	// 2*VL < LEN <= 4*VL.  Generate 4 vectors of keystream blocks.  Use the
432	// first 2 to XOR 2 full vectors of data.  Then XOR the remaining data.
433	_aesenc_loop	0,1,2,3
434	_aesenclast_and_xor 0,1
435	vaesenclast	RNDKEYLAST, AESDATA2, AESDATA0
436	vaesenclast	RNDKEYLAST, AESDATA3, AESDATA1
437	sub		$-2*VL, SRC
438	sub		$-2*VL, DST
439	add		$-2*VL, LEN
440	jmp		.Lxor_tail_upto2vecs\@
441
442.Lenc_tail_atmost2vecs\@:
443	// 1 <= LEN <= 2*VL.  Generate 2 vectors of keystream blocks.  Then XOR
444	// the remaining data.
445	_aesenc_loop	0,1
446	vaesenclast	RNDKEYLAST, AESDATA0, AESDATA0
447	vaesenclast	RNDKEYLAST, AESDATA1, AESDATA1
448
449.Lxor_tail_upto2vecs\@:
450	cmp		$1*VL-1, LEN
451	jle		.Lxor_tail_partial_vec_0\@
452	_xor_data	0
453	cmp		$2*VL-1, LEN
454	jle		.Lxor_tail_partial_vec_1\@
455	_xor_data	1
456	jmp		.Ldone\@
457
458.Lxor_tail_partial_vec_1\@:
459	add		$-1*VL, LEN
460	jz		.Ldone\@
461	sub		$-1*VL, SRC
462	sub		$-1*VL, DST
463	_vmovdqa	AESDATA1, AESDATA0
464	jmp		.Lxor_tail_partial_vec_0\@
465
466.Lxor_tail_partial_vec_2\@:
467	add		$-2*VL, LEN
468	jz		.Ldone\@
469	sub		$-2*VL, SRC
470	sub		$-2*VL, DST
471	_vmovdqa	AESDATA2, AESDATA0
472	jmp		.Lxor_tail_partial_vec_0\@
473
474.Lxor_tail_partial_vec_3\@:
475	add		$-3*VL, LEN
476	jz		.Ldone\@
477	sub		$-3*VL, SRC
478	sub		$-3*VL, DST
479	_vmovdqa	AESDATA3, AESDATA0
480
481.Lxor_tail_partial_vec_0\@:
482	// XOR the remaining 1 <= LEN < VL bytes.  It's easy if masked
483	// loads/stores are available; otherwise it's a bit harder...
484.if USE_AVX10
485  .if VL <= 32
486	mov		$-1, %eax
487	bzhi		LEN, %eax, %eax
488	kmovd		%eax, %k1
489  .else
490	mov		$-1, %rax
491	bzhi		LEN64, %rax, %rax
492	kmovq		%rax, %k1
493  .endif
494	vmovdqu8	(SRC), AESDATA1{%k1}{z}
495	_vpxor		AESDATA1, AESDATA0, AESDATA0
496	vmovdqu8	AESDATA0, (DST){%k1}
497.else
498  .if VL == 32
499	cmp		$16, LEN
500	jl		1f
501	vpxor		(SRC), AESDATA0_XMM, AESDATA1_XMM
502	vmovdqu		AESDATA1_XMM, (DST)
503	add		$16, SRC
504	add		$16, DST
505	sub		$16, LEN
506	jz		.Ldone\@
507	vextracti128	$1, AESDATA0, AESDATA0_XMM
5081:
509  .endif
510	mov		LEN, %r10d
511	_load_partial_block	SRC, AESDATA1_XMM, KEY, KEY32
512	vpxor		AESDATA1_XMM, AESDATA0_XMM, AESDATA0_XMM
513	mov		%r10d, %ecx
514	_store_partial_block	AESDATA0_XMM, DST, KEY, KEY32
515.endif
516
517.Ldone\@:
518.if VL > 16
519	vzeroupper
520.endif
521	RET
522.endm
523
524// Below are the definitions of the functions generated by the above macro.
525// They have the following prototypes:
526//
527//
528// void aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key,
529//				 const u8 *src, u8 *dst, int len,
530//				 const u64 le_ctr[2]);
531//
532// void aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key,
533//				const u8 *src, u8 *dst, int len,
534//				const u8 iv[AES_BLOCK_SIZE], u64 ctr);
535//
536// Both functions generate |len| bytes of keystream, XOR it with the data from
537// |src|, and write the result to |dst|.  On non-final calls, |len| must be a
538// multiple of 16.  On the final call, |len| can be any value.
539//
540// aes_ctr64_crypt_* implement "regular" CTR, where the keystream is generated
541// from a 128-bit big endian counter that increments by 1 for each AES block.
542// HOWEVER, to keep the assembly code simple, some of the counter management is
543// left to the caller.  aes_ctr64_crypt_* take the counter in little endian
544// form, only increment the low 64 bits internally, do the conversion to big
545// endian internally, and don't write the updated counter back to memory.  The
546// caller is responsible for converting the starting IV to the little endian
547// le_ctr, detecting the (very rare) case of a carry out of the low 64 bits
548// being needed and splitting at that point with a carry done in between, and
549// updating le_ctr after each part if the message is multi-part.
550//
551// aes_xctr_crypt_* implement XCTR as specified in "Length-preserving encryption
552// with HCTR2" (https://eprint.iacr.org/2021/1441.pdf).  XCTR is an
553// easier-to-implement variant of CTR that uses little endian byte order and
554// eliminates carries.  |ctr| is the per-message block counter starting at 1.
555
556.set	VL, 16
557.set	USE_AVX10, 0
558SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx)
559	_aes_ctr_crypt	0
560SYM_FUNC_END(aes_ctr64_crypt_aesni_avx)
561SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx)
562	_aes_ctr_crypt	1
563SYM_FUNC_END(aes_xctr_crypt_aesni_avx)
564
565#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
566.set	VL, 32
567.set	USE_AVX10, 0
568SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2)
569	_aes_ctr_crypt	0
570SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2)
571SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2)
572	_aes_ctr_crypt	1
573SYM_FUNC_END(aes_xctr_crypt_vaes_avx2)
574
575.set	VL, 32
576.set	USE_AVX10, 1
577SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256)
578	_aes_ctr_crypt	0
579SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256)
580SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256)
581	_aes_ctr_crypt	1
582SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256)
583
584.set	VL, 64
585.set	USE_AVX10, 1
586SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512)
587	_aes_ctr_crypt	0
588SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512)
589SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512)
590	_aes_ctr_crypt	1
591SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512)
592#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ
593