1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ 2// 3// Copyright 2025 Google LLC 4// 5// Author: Eric Biggers <ebiggers@google.com> 6// 7// This file is dual-licensed, meaning that you can use it under your choice of 8// either of the following two licenses: 9// 10// Licensed under the Apache License 2.0 (the "License"). You may obtain a copy 11// of the License at 12// 13// http://www.apache.org/licenses/LICENSE-2.0 14// 15// Unless required by applicable law or agreed to in writing, software 16// distributed under the License is distributed on an "AS IS" BASIS, 17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18// See the License for the specific language governing permissions and 19// limitations under the License. 20// 21// or 22// 23// Redistribution and use in source and binary forms, with or without 24// modification, are permitted provided that the following conditions are met: 25// 26// 1. Redistributions of source code must retain the above copyright notice, 27// this list of conditions and the following disclaimer. 28// 29// 2. Redistributions in binary form must reproduce the above copyright 30// notice, this list of conditions and the following disclaimer in the 31// documentation and/or other materials provided with the distribution. 32// 33// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 34// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 35// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 36// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 37// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 38// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 39// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 40// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 41// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 43// POSSIBILITY OF SUCH DAMAGE. 44// 45//------------------------------------------------------------------------------ 46// 47// This file contains x86_64 assembly implementations of AES-CTR and AES-XCTR 48// using the following sets of CPU features: 49// - AES-NI && AVX 50// - VAES && AVX2 51// - VAES && (AVX10/256 || (AVX512BW && AVX512VL)) && BMI2 52// - VAES && (AVX10/512 || (AVX512BW && AVX512VL)) && BMI2 53// 54// See the function definitions at the bottom of the file for more information. 55 56#include <linux/linkage.h> 57#include <linux/cfi_types.h> 58 59.section .rodata 60.p2align 4 61 62.Lbswap_mask: 63 .octa 0x000102030405060708090a0b0c0d0e0f 64 65.Lctr_pattern: 66 .quad 0, 0 67.Lone: 68 .quad 1, 0 69.Ltwo: 70 .quad 2, 0 71 .quad 3, 0 72 73.Lfour: 74 .quad 4, 0 75 76.text 77 78// Move a vector between memory and a register. 79// The register operand must be in the first 16 vector registers. 80.macro _vmovdqu src, dst 81.if VL < 64 82 vmovdqu \src, \dst 83.else 84 vmovdqu8 \src, \dst 85.endif 86.endm 87 88// Move a vector between registers. 89// The registers must be in the first 16 vector registers. 90.macro _vmovdqa src, dst 91.if VL < 64 92 vmovdqa \src, \dst 93.else 94 vmovdqa64 \src, \dst 95.endif 96.endm 97 98// Broadcast a 128-bit value from memory to all 128-bit lanes of a vector 99// register. The register operand must be in the first 16 vector registers. 100.macro _vbroadcast128 src, dst 101.if VL == 16 102 vmovdqu \src, \dst 103.elseif VL == 32 104 vbroadcasti128 \src, \dst 105.else 106 vbroadcasti32x4 \src, \dst 107.endif 108.endm 109 110// XOR two vectors together. 111// Any register operands must be in the first 16 vector registers. 112.macro _vpxor src1, src2, dst 113.if VL < 64 114 vpxor \src1, \src2, \dst 115.else 116 vpxord \src1, \src2, \dst 117.endif 118.endm 119 120// Load 1 <= %ecx <= 15 bytes from the pointer \src into the xmm register \dst 121// and zeroize any remaining bytes. Clobbers %rax, %rcx, and \tmp{64,32}. 122.macro _load_partial_block src, dst, tmp64, tmp32 123 sub $8, %ecx // LEN - 8 124 jle .Lle8\@ 125 126 // Load 9 <= LEN <= 15 bytes. 127 vmovq (\src), \dst // Load first 8 bytes 128 mov (\src, %rcx), %rax // Load last 8 bytes 129 neg %ecx 130 shl $3, %ecx 131 shr %cl, %rax // Discard overlapping bytes 132 vpinsrq $1, %rax, \dst, \dst 133 jmp .Ldone\@ 134 135.Lle8\@: 136 add $4, %ecx // LEN - 4 137 jl .Llt4\@ 138 139 // Load 4 <= LEN <= 8 bytes. 140 mov (\src), %eax // Load first 4 bytes 141 mov (\src, %rcx), \tmp32 // Load last 4 bytes 142 jmp .Lcombine\@ 143 144.Llt4\@: 145 // Load 1 <= LEN <= 3 bytes. 146 add $2, %ecx // LEN - 2 147 movzbl (\src), %eax // Load first byte 148 jl .Lmovq\@ 149 movzwl (\src, %rcx), \tmp32 // Load last 2 bytes 150.Lcombine\@: 151 shl $3, %ecx 152 shl %cl, \tmp64 153 or \tmp64, %rax // Combine the two parts 154.Lmovq\@: 155 vmovq %rax, \dst 156.Ldone\@: 157.endm 158 159// Store 1 <= %ecx <= 15 bytes from the xmm register \src to the pointer \dst. 160// Clobbers %rax, %rcx, and \tmp{64,32}. 161.macro _store_partial_block src, dst, tmp64, tmp32 162 sub $8, %ecx // LEN - 8 163 jl .Llt8\@ 164 165 // Store 8 <= LEN <= 15 bytes. 166 vpextrq $1, \src, %rax 167 mov %ecx, \tmp32 168 shl $3, %ecx 169 ror %cl, %rax 170 mov %rax, (\dst, \tmp64) // Store last LEN - 8 bytes 171 vmovq \src, (\dst) // Store first 8 bytes 172 jmp .Ldone\@ 173 174.Llt8\@: 175 add $4, %ecx // LEN - 4 176 jl .Llt4\@ 177 178 // Store 4 <= LEN <= 7 bytes. 179 vpextrd $1, \src, %eax 180 mov %ecx, \tmp32 181 shl $3, %ecx 182 ror %cl, %eax 183 mov %eax, (\dst, \tmp64) // Store last LEN - 4 bytes 184 vmovd \src, (\dst) // Store first 4 bytes 185 jmp .Ldone\@ 186 187.Llt4\@: 188 // Store 1 <= LEN <= 3 bytes. 189 vpextrb $0, \src, 0(\dst) 190 cmp $-2, %ecx // LEN - 4 == -2, i.e. LEN == 2? 191 jl .Ldone\@ 192 vpextrb $1, \src, 1(\dst) 193 je .Ldone\@ 194 vpextrb $2, \src, 2(\dst) 195.Ldone\@: 196.endm 197 198// Prepare the next two vectors of AES inputs in AESDATA\i0 and AESDATA\i1, and 199// XOR each with the zero-th round key. Also update LE_CTR if !\final. 200.macro _prepare_2_ctr_vecs is_xctr, i0, i1, final=0 201.if \is_xctr 202 .if USE_AVX10 203 _vmovdqa LE_CTR, AESDATA\i0 204 vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i0 205 .else 206 vpxor XCTR_IV, LE_CTR, AESDATA\i0 207 vpxor RNDKEY0, AESDATA\i0, AESDATA\i0 208 .endif 209 vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1 210 211 .if USE_AVX10 212 vpternlogd $0x96, XCTR_IV, RNDKEY0, AESDATA\i1 213 .else 214 vpxor XCTR_IV, AESDATA\i1, AESDATA\i1 215 vpxor RNDKEY0, AESDATA\i1, AESDATA\i1 216 .endif 217.else 218 vpshufb BSWAP_MASK, LE_CTR, AESDATA\i0 219 _vpxor RNDKEY0, AESDATA\i0, AESDATA\i0 220 vpaddq LE_CTR_INC1, LE_CTR, AESDATA\i1 221 vpshufb BSWAP_MASK, AESDATA\i1, AESDATA\i1 222 _vpxor RNDKEY0, AESDATA\i1, AESDATA\i1 223.endif 224.if !\final 225 vpaddq LE_CTR_INC2, LE_CTR, LE_CTR 226.endif 227.endm 228 229// Do all AES rounds on the data in the given AESDATA vectors, excluding the 230// zero-th and last rounds. 231.macro _aesenc_loop vecs:vararg 232 mov KEY, %rax 2331: 234 _vbroadcast128 (%rax), RNDKEY 235.irp i, \vecs 236 vaesenc RNDKEY, AESDATA\i, AESDATA\i 237.endr 238 add $16, %rax 239 cmp %rax, RNDKEYLAST_PTR 240 jne 1b 241.endm 242 243// Finalize the keystream blocks in the given AESDATA vectors by doing the last 244// AES round, then XOR those keystream blocks with the corresponding data. 245// Reduce latency by doing the XOR before the vaesenclast, utilizing the 246// property vaesenclast(key, a) ^ b == vaesenclast(key ^ b, a). 247.macro _aesenclast_and_xor vecs:vararg 248.irp i, \vecs 249 _vpxor \i*VL(SRC), RNDKEYLAST, RNDKEY 250 vaesenclast RNDKEY, AESDATA\i, AESDATA\i 251.endr 252.irp i, \vecs 253 _vmovdqu AESDATA\i, \i*VL(DST) 254.endr 255.endm 256 257// XOR the keystream blocks in the specified AESDATA vectors with the 258// corresponding data. 259.macro _xor_data vecs:vararg 260.irp i, \vecs 261 _vpxor \i*VL(SRC), AESDATA\i, AESDATA\i 262.endr 263.irp i, \vecs 264 _vmovdqu AESDATA\i, \i*VL(DST) 265.endr 266.endm 267 268.macro _aes_ctr_crypt is_xctr 269 270 // Define register aliases V0-V15 that map to the xmm, ymm, or zmm 271 // registers according to the selected Vector Length (VL). 272.irp i, 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 273 .if VL == 16 274 .set V\i, %xmm\i 275 .elseif VL == 32 276 .set V\i, %ymm\i 277 .elseif VL == 64 278 .set V\i, %zmm\i 279 .else 280 .error "Unsupported Vector Length (VL)" 281 .endif 282.endr 283 284 // Function arguments 285 .set KEY, %rdi // Initially points to the start of the 286 // crypto_aes_ctx, then is advanced to 287 // point to the index 1 round key 288 .set KEY32, %edi // Available as temp register after all 289 // keystream blocks have been generated 290 .set SRC, %rsi // Pointer to next source data 291 .set DST, %rdx // Pointer to next destination data 292 .set LEN, %ecx // Remaining length in bytes. 293 // Note: _load_partial_block relies on 294 // this being in %ecx. 295 .set LEN64, %rcx // Zero-extend LEN before using! 296 .set LEN8, %cl 297.if \is_xctr 298 .set XCTR_IV_PTR, %r8 // const u8 iv[AES_BLOCK_SIZE]; 299 .set XCTR_CTR, %r9 // u64 ctr; 300.else 301 .set LE_CTR_PTR, %r8 // const u64 le_ctr[2]; 302.endif 303 304 // Additional local variables 305 .set RNDKEYLAST_PTR, %r10 306 .set AESDATA0, V0 307 .set AESDATA0_XMM, %xmm0 308 .set AESDATA1, V1 309 .set AESDATA1_XMM, %xmm1 310 .set AESDATA2, V2 311 .set AESDATA3, V3 312 .set AESDATA4, V4 313 .set AESDATA5, V5 314 .set AESDATA6, V6 315 .set AESDATA7, V7 316.if \is_xctr 317 .set XCTR_IV, V8 318.else 319 .set BSWAP_MASK, V8 320.endif 321 .set LE_CTR, V9 322 .set LE_CTR_XMM, %xmm9 323 .set LE_CTR_INC1, V10 324 .set LE_CTR_INC2, V11 325 .set RNDKEY0, V12 326 .set RNDKEYLAST, V13 327 .set RNDKEY, V14 328 329 // Create the first vector of counters. 330.if \is_xctr 331 .if VL == 16 332 vmovq XCTR_CTR, LE_CTR 333 .elseif VL == 32 334 vmovq XCTR_CTR, LE_CTR_XMM 335 inc XCTR_CTR 336 vmovq XCTR_CTR, AESDATA0_XMM 337 vinserti128 $1, AESDATA0_XMM, LE_CTR, LE_CTR 338 .else 339 vpbroadcastq XCTR_CTR, LE_CTR 340 vpsrldq $8, LE_CTR, LE_CTR 341 vpaddq .Lctr_pattern(%rip), LE_CTR, LE_CTR 342 .endif 343 _vbroadcast128 (XCTR_IV_PTR), XCTR_IV 344.else 345 _vbroadcast128 (LE_CTR_PTR), LE_CTR 346 .if VL > 16 347 vpaddq .Lctr_pattern(%rip), LE_CTR, LE_CTR 348 .endif 349 _vbroadcast128 .Lbswap_mask(%rip), BSWAP_MASK 350.endif 351 352.if VL == 16 353 _vbroadcast128 .Lone(%rip), LE_CTR_INC1 354.elseif VL == 32 355 _vbroadcast128 .Ltwo(%rip), LE_CTR_INC1 356.else 357 _vbroadcast128 .Lfour(%rip), LE_CTR_INC1 358.endif 359 vpsllq $1, LE_CTR_INC1, LE_CTR_INC2 360 361 // Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256). 362 movl 480(KEY), %eax 363 364 // Compute the pointer to the last round key. 365 lea 6*16(KEY, %rax, 4), RNDKEYLAST_PTR 366 367 // Load the zero-th and last round keys. 368 _vbroadcast128 (KEY), RNDKEY0 369 _vbroadcast128 (RNDKEYLAST_PTR), RNDKEYLAST 370 371 // Make KEY point to the first round key. 372 add $16, KEY 373 374 // This is the main loop, which encrypts 8 vectors of data at a time. 375 add $-8*VL, LEN 376 jl .Lloop_8x_done\@ 377.Lloop_8x\@: 378 _prepare_2_ctr_vecs \is_xctr, 0, 1 379 _prepare_2_ctr_vecs \is_xctr, 2, 3 380 _prepare_2_ctr_vecs \is_xctr, 4, 5 381 _prepare_2_ctr_vecs \is_xctr, 6, 7 382 _aesenc_loop 0,1,2,3,4,5,6,7 383 _aesenclast_and_xor 0,1,2,3,4,5,6,7 384 sub $-8*VL, SRC 385 sub $-8*VL, DST 386 add $-8*VL, LEN 387 jge .Lloop_8x\@ 388.Lloop_8x_done\@: 389 sub $-8*VL, LEN 390 jz .Ldone\@ 391 392 // 1 <= LEN < 8*VL. Generate 2, 4, or 8 more vectors of keystream 393 // blocks, depending on the remaining LEN. 394 395 _prepare_2_ctr_vecs \is_xctr, 0, 1 396 _prepare_2_ctr_vecs \is_xctr, 2, 3 397 cmp $4*VL, LEN 398 jle .Lenc_tail_atmost4vecs\@ 399 400 // 4*VL < LEN < 8*VL. Generate 8 vectors of keystream blocks. Use the 401 // first 4 to XOR 4 full vectors of data. Then XOR the remaining data. 402 _prepare_2_ctr_vecs \is_xctr, 4, 5 403 _prepare_2_ctr_vecs \is_xctr, 6, 7, final=1 404 _aesenc_loop 0,1,2,3,4,5,6,7 405 _aesenclast_and_xor 0,1,2,3 406 vaesenclast RNDKEYLAST, AESDATA4, AESDATA0 407 vaesenclast RNDKEYLAST, AESDATA5, AESDATA1 408 vaesenclast RNDKEYLAST, AESDATA6, AESDATA2 409 vaesenclast RNDKEYLAST, AESDATA7, AESDATA3 410 sub $-4*VL, SRC 411 sub $-4*VL, DST 412 add $-4*VL, LEN 413 cmp $1*VL-1, LEN 414 jle .Lxor_tail_partial_vec_0\@ 415 _xor_data 0 416 cmp $2*VL-1, LEN 417 jle .Lxor_tail_partial_vec_1\@ 418 _xor_data 1 419 cmp $3*VL-1, LEN 420 jle .Lxor_tail_partial_vec_2\@ 421 _xor_data 2 422 cmp $4*VL-1, LEN 423 jle .Lxor_tail_partial_vec_3\@ 424 _xor_data 3 425 jmp .Ldone\@ 426 427.Lenc_tail_atmost4vecs\@: 428 cmp $2*VL, LEN 429 jle .Lenc_tail_atmost2vecs\@ 430 431 // 2*VL < LEN <= 4*VL. Generate 4 vectors of keystream blocks. Use the 432 // first 2 to XOR 2 full vectors of data. Then XOR the remaining data. 433 _aesenc_loop 0,1,2,3 434 _aesenclast_and_xor 0,1 435 vaesenclast RNDKEYLAST, AESDATA2, AESDATA0 436 vaesenclast RNDKEYLAST, AESDATA3, AESDATA1 437 sub $-2*VL, SRC 438 sub $-2*VL, DST 439 add $-2*VL, LEN 440 jmp .Lxor_tail_upto2vecs\@ 441 442.Lenc_tail_atmost2vecs\@: 443 // 1 <= LEN <= 2*VL. Generate 2 vectors of keystream blocks. Then XOR 444 // the remaining data. 445 _aesenc_loop 0,1 446 vaesenclast RNDKEYLAST, AESDATA0, AESDATA0 447 vaesenclast RNDKEYLAST, AESDATA1, AESDATA1 448 449.Lxor_tail_upto2vecs\@: 450 cmp $1*VL-1, LEN 451 jle .Lxor_tail_partial_vec_0\@ 452 _xor_data 0 453 cmp $2*VL-1, LEN 454 jle .Lxor_tail_partial_vec_1\@ 455 _xor_data 1 456 jmp .Ldone\@ 457 458.Lxor_tail_partial_vec_1\@: 459 add $-1*VL, LEN 460 jz .Ldone\@ 461 sub $-1*VL, SRC 462 sub $-1*VL, DST 463 _vmovdqa AESDATA1, AESDATA0 464 jmp .Lxor_tail_partial_vec_0\@ 465 466.Lxor_tail_partial_vec_2\@: 467 add $-2*VL, LEN 468 jz .Ldone\@ 469 sub $-2*VL, SRC 470 sub $-2*VL, DST 471 _vmovdqa AESDATA2, AESDATA0 472 jmp .Lxor_tail_partial_vec_0\@ 473 474.Lxor_tail_partial_vec_3\@: 475 add $-3*VL, LEN 476 jz .Ldone\@ 477 sub $-3*VL, SRC 478 sub $-3*VL, DST 479 _vmovdqa AESDATA3, AESDATA0 480 481.Lxor_tail_partial_vec_0\@: 482 // XOR the remaining 1 <= LEN < VL bytes. It's easy if masked 483 // loads/stores are available; otherwise it's a bit harder... 484.if USE_AVX10 485 .if VL <= 32 486 mov $-1, %eax 487 bzhi LEN, %eax, %eax 488 kmovd %eax, %k1 489 .else 490 mov $-1, %rax 491 bzhi LEN64, %rax, %rax 492 kmovq %rax, %k1 493 .endif 494 vmovdqu8 (SRC), AESDATA1{%k1}{z} 495 _vpxor AESDATA1, AESDATA0, AESDATA0 496 vmovdqu8 AESDATA0, (DST){%k1} 497.else 498 .if VL == 32 499 cmp $16, LEN 500 jl 1f 501 vpxor (SRC), AESDATA0_XMM, AESDATA1_XMM 502 vmovdqu AESDATA1_XMM, (DST) 503 add $16, SRC 504 add $16, DST 505 sub $16, LEN 506 jz .Ldone\@ 507 vextracti128 $1, AESDATA0, AESDATA0_XMM 5081: 509 .endif 510 mov LEN, %r10d 511 _load_partial_block SRC, AESDATA1_XMM, KEY, KEY32 512 vpxor AESDATA1_XMM, AESDATA0_XMM, AESDATA0_XMM 513 mov %r10d, %ecx 514 _store_partial_block AESDATA0_XMM, DST, KEY, KEY32 515.endif 516 517.Ldone\@: 518.if VL > 16 519 vzeroupper 520.endif 521 RET 522.endm 523 524// Below are the definitions of the functions generated by the above macro. 525// They have the following prototypes: 526// 527// 528// void aes_ctr64_crypt_##suffix(const struct crypto_aes_ctx *key, 529// const u8 *src, u8 *dst, int len, 530// const u64 le_ctr[2]); 531// 532// void aes_xctr_crypt_##suffix(const struct crypto_aes_ctx *key, 533// const u8 *src, u8 *dst, int len, 534// const u8 iv[AES_BLOCK_SIZE], u64 ctr); 535// 536// Both functions generate |len| bytes of keystream, XOR it with the data from 537// |src|, and write the result to |dst|. On non-final calls, |len| must be a 538// multiple of 16. On the final call, |len| can be any value. 539// 540// aes_ctr64_crypt_* implement "regular" CTR, where the keystream is generated 541// from a 128-bit big endian counter that increments by 1 for each AES block. 542// HOWEVER, to keep the assembly code simple, some of the counter management is 543// left to the caller. aes_ctr64_crypt_* take the counter in little endian 544// form, only increment the low 64 bits internally, do the conversion to big 545// endian internally, and don't write the updated counter back to memory. The 546// caller is responsible for converting the starting IV to the little endian 547// le_ctr, detecting the (very rare) case of a carry out of the low 64 bits 548// being needed and splitting at that point with a carry done in between, and 549// updating le_ctr after each part if the message is multi-part. 550// 551// aes_xctr_crypt_* implement XCTR as specified in "Length-preserving encryption 552// with HCTR2" (https://eprint.iacr.org/2021/1441.pdf). XCTR is an 553// easier-to-implement variant of CTR that uses little endian byte order and 554// eliminates carries. |ctr| is the per-message block counter starting at 1. 555 556.set VL, 16 557.set USE_AVX10, 0 558SYM_TYPED_FUNC_START(aes_ctr64_crypt_aesni_avx) 559 _aes_ctr_crypt 0 560SYM_FUNC_END(aes_ctr64_crypt_aesni_avx) 561SYM_TYPED_FUNC_START(aes_xctr_crypt_aesni_avx) 562 _aes_ctr_crypt 1 563SYM_FUNC_END(aes_xctr_crypt_aesni_avx) 564 565#if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ) 566.set VL, 32 567.set USE_AVX10, 0 568SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx2) 569 _aes_ctr_crypt 0 570SYM_FUNC_END(aes_ctr64_crypt_vaes_avx2) 571SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx2) 572 _aes_ctr_crypt 1 573SYM_FUNC_END(aes_xctr_crypt_vaes_avx2) 574 575.set VL, 32 576.set USE_AVX10, 1 577SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_256) 578 _aes_ctr_crypt 0 579SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_256) 580SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_256) 581 _aes_ctr_crypt 1 582SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_256) 583 584.set VL, 64 585.set USE_AVX10, 1 586SYM_TYPED_FUNC_START(aes_ctr64_crypt_vaes_avx10_512) 587 _aes_ctr_crypt 0 588SYM_FUNC_END(aes_ctr64_crypt_vaes_avx10_512) 589SYM_TYPED_FUNC_START(aes_xctr_crypt_vaes_avx10_512) 590 _aes_ctr_crypt 1 591SYM_FUNC_END(aes_xctr_crypt_vaes_avx10_512) 592#endif // CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ 593