1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.globl aesni_multi_cbc_encrypt 7bc3d5698SJohn Baldwin.type aesni_multi_cbc_encrypt,@function 8bc3d5698SJohn Baldwin.align 32 9bc3d5698SJohn Baldwinaesni_multi_cbc_encrypt: 10bc3d5698SJohn Baldwin.cfi_startproc 11bc3d5698SJohn Baldwin cmpl $2,%edx 12bc3d5698SJohn Baldwin jb .Lenc_non_avx 13bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+4(%rip),%ecx 14bc3d5698SJohn Baldwin testl $268435456,%ecx 15bc3d5698SJohn Baldwin jnz _avx_cbc_enc_shortcut 16bc3d5698SJohn Baldwin jmp .Lenc_non_avx 17bc3d5698SJohn Baldwin.align 16 18bc3d5698SJohn Baldwin.Lenc_non_avx: 19bc3d5698SJohn Baldwin movq %rsp,%rax 20bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 21bc3d5698SJohn Baldwin pushq %rbx 22bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 23bc3d5698SJohn Baldwin pushq %rbp 24bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 25bc3d5698SJohn Baldwin pushq %r12 26bc3d5698SJohn Baldwin.cfi_offset %r12,-32 27bc3d5698SJohn Baldwin pushq %r13 28bc3d5698SJohn Baldwin.cfi_offset %r13,-40 29bc3d5698SJohn Baldwin pushq %r14 30bc3d5698SJohn Baldwin.cfi_offset %r14,-48 31bc3d5698SJohn Baldwin pushq %r15 32bc3d5698SJohn Baldwin.cfi_offset %r15,-56 33bc3d5698SJohn Baldwin 34bc3d5698SJohn Baldwin 35bc3d5698SJohn Baldwin 36bc3d5698SJohn Baldwin 37bc3d5698SJohn Baldwin 38bc3d5698SJohn Baldwin 39bc3d5698SJohn Baldwin subq $48,%rsp 40bc3d5698SJohn Baldwin andq $-64,%rsp 41bc3d5698SJohn Baldwin movq %rax,16(%rsp) 42bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 43bc3d5698SJohn Baldwin 44bc3d5698SJohn Baldwin.Lenc4x_body: 45bc3d5698SJohn Baldwin movdqu (%rsi),%xmm12 46bc3d5698SJohn Baldwin leaq 120(%rsi),%rsi 47bc3d5698SJohn Baldwin leaq 80(%rdi),%rdi 48bc3d5698SJohn Baldwin 49bc3d5698SJohn Baldwin.Lenc4x_loop_grande: 50bc3d5698SJohn Baldwin movl %edx,24(%rsp) 51bc3d5698SJohn Baldwin xorl %edx,%edx 52*c0855eaaSJohn Baldwin 53bc3d5698SJohn Baldwin movl -64(%rdi),%ecx 54bc3d5698SJohn Baldwin movq -80(%rdi),%r8 55bc3d5698SJohn Baldwin cmpl %edx,%ecx 56bc3d5698SJohn Baldwin movq -72(%rdi),%r12 57bc3d5698SJohn Baldwin cmovgl %ecx,%edx 58bc3d5698SJohn Baldwin testl %ecx,%ecx 59*c0855eaaSJohn Baldwin 60bc3d5698SJohn Baldwin movdqu -56(%rdi),%xmm2 61bc3d5698SJohn Baldwin movl %ecx,32(%rsp) 62bc3d5698SJohn Baldwin cmovleq %rsp,%r8 63*c0855eaaSJohn Baldwin 64bc3d5698SJohn Baldwin movl -24(%rdi),%ecx 65bc3d5698SJohn Baldwin movq -40(%rdi),%r9 66bc3d5698SJohn Baldwin cmpl %edx,%ecx 67bc3d5698SJohn Baldwin movq -32(%rdi),%r13 68bc3d5698SJohn Baldwin cmovgl %ecx,%edx 69bc3d5698SJohn Baldwin testl %ecx,%ecx 70*c0855eaaSJohn Baldwin 71bc3d5698SJohn Baldwin movdqu -16(%rdi),%xmm3 72bc3d5698SJohn Baldwin movl %ecx,36(%rsp) 73bc3d5698SJohn Baldwin cmovleq %rsp,%r9 74*c0855eaaSJohn Baldwin 75bc3d5698SJohn Baldwin movl 16(%rdi),%ecx 76bc3d5698SJohn Baldwin movq 0(%rdi),%r10 77bc3d5698SJohn Baldwin cmpl %edx,%ecx 78bc3d5698SJohn Baldwin movq 8(%rdi),%r14 79bc3d5698SJohn Baldwin cmovgl %ecx,%edx 80bc3d5698SJohn Baldwin testl %ecx,%ecx 81*c0855eaaSJohn Baldwin 82bc3d5698SJohn Baldwin movdqu 24(%rdi),%xmm4 83bc3d5698SJohn Baldwin movl %ecx,40(%rsp) 84bc3d5698SJohn Baldwin cmovleq %rsp,%r10 85*c0855eaaSJohn Baldwin 86bc3d5698SJohn Baldwin movl 56(%rdi),%ecx 87bc3d5698SJohn Baldwin movq 40(%rdi),%r11 88bc3d5698SJohn Baldwin cmpl %edx,%ecx 89bc3d5698SJohn Baldwin movq 48(%rdi),%r15 90bc3d5698SJohn Baldwin cmovgl %ecx,%edx 91bc3d5698SJohn Baldwin testl %ecx,%ecx 92*c0855eaaSJohn Baldwin 93bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm5 94bc3d5698SJohn Baldwin movl %ecx,44(%rsp) 95bc3d5698SJohn Baldwin cmovleq %rsp,%r11 96bc3d5698SJohn Baldwin testl %edx,%edx 97bc3d5698SJohn Baldwin jz .Lenc4x_done 98bc3d5698SJohn Baldwin 99bc3d5698SJohn Baldwin movups 16-120(%rsi),%xmm1 100bc3d5698SJohn Baldwin pxor %xmm12,%xmm2 101bc3d5698SJohn Baldwin movups 32-120(%rsi),%xmm0 102bc3d5698SJohn Baldwin pxor %xmm12,%xmm3 103bc3d5698SJohn Baldwin movl 240-120(%rsi),%eax 104bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 105bc3d5698SJohn Baldwin movdqu (%r8),%xmm6 106bc3d5698SJohn Baldwin pxor %xmm12,%xmm5 107bc3d5698SJohn Baldwin movdqu (%r9),%xmm7 108bc3d5698SJohn Baldwin pxor %xmm6,%xmm2 109bc3d5698SJohn Baldwin movdqu (%r10),%xmm8 110bc3d5698SJohn Baldwin pxor %xmm7,%xmm3 111bc3d5698SJohn Baldwin movdqu (%r11),%xmm9 112bc3d5698SJohn Baldwin pxor %xmm8,%xmm4 113bc3d5698SJohn Baldwin pxor %xmm9,%xmm5 114bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm10 115bc3d5698SJohn Baldwin xorq %rbx,%rbx 116bc3d5698SJohn Baldwin jmp .Loop_enc4x 117bc3d5698SJohn Baldwin 118bc3d5698SJohn Baldwin.align 32 119bc3d5698SJohn Baldwin.Loop_enc4x: 120bc3d5698SJohn Baldwin addq $16,%rbx 121bc3d5698SJohn Baldwin leaq 16(%rsp),%rbp 122bc3d5698SJohn Baldwin movl $1,%ecx 123bc3d5698SJohn Baldwin subq %rbx,%rbp 124bc3d5698SJohn Baldwin 125bc3d5698SJohn Baldwin.byte 102,15,56,220,209 126bc3d5698SJohn Baldwin prefetcht0 31(%r8,%rbx,1) 127bc3d5698SJohn Baldwin prefetcht0 31(%r9,%rbx,1) 128bc3d5698SJohn Baldwin.byte 102,15,56,220,217 129bc3d5698SJohn Baldwin prefetcht0 31(%r10,%rbx,1) 130bc3d5698SJohn Baldwin prefetcht0 31(%r10,%rbx,1) 131bc3d5698SJohn Baldwin.byte 102,15,56,220,225 132bc3d5698SJohn Baldwin.byte 102,15,56,220,233 133bc3d5698SJohn Baldwin movups 48-120(%rsi),%xmm1 134bc3d5698SJohn Baldwin cmpl 32(%rsp),%ecx 135bc3d5698SJohn Baldwin.byte 102,15,56,220,208 136bc3d5698SJohn Baldwin.byte 102,15,56,220,216 137bc3d5698SJohn Baldwin.byte 102,15,56,220,224 138bc3d5698SJohn Baldwin cmovgeq %rbp,%r8 139bc3d5698SJohn Baldwin cmovgq %rbp,%r12 140bc3d5698SJohn Baldwin.byte 102,15,56,220,232 141bc3d5698SJohn Baldwin movups -56(%rsi),%xmm0 142bc3d5698SJohn Baldwin cmpl 36(%rsp),%ecx 143bc3d5698SJohn Baldwin.byte 102,15,56,220,209 144bc3d5698SJohn Baldwin.byte 102,15,56,220,217 145bc3d5698SJohn Baldwin.byte 102,15,56,220,225 146bc3d5698SJohn Baldwin cmovgeq %rbp,%r9 147bc3d5698SJohn Baldwin cmovgq %rbp,%r13 148bc3d5698SJohn Baldwin.byte 102,15,56,220,233 149bc3d5698SJohn Baldwin movups -40(%rsi),%xmm1 150bc3d5698SJohn Baldwin cmpl 40(%rsp),%ecx 151bc3d5698SJohn Baldwin.byte 102,15,56,220,208 152bc3d5698SJohn Baldwin.byte 102,15,56,220,216 153bc3d5698SJohn Baldwin.byte 102,15,56,220,224 154bc3d5698SJohn Baldwin cmovgeq %rbp,%r10 155bc3d5698SJohn Baldwin cmovgq %rbp,%r14 156bc3d5698SJohn Baldwin.byte 102,15,56,220,232 157bc3d5698SJohn Baldwin movups -24(%rsi),%xmm0 158bc3d5698SJohn Baldwin cmpl 44(%rsp),%ecx 159bc3d5698SJohn Baldwin.byte 102,15,56,220,209 160bc3d5698SJohn Baldwin.byte 102,15,56,220,217 161bc3d5698SJohn Baldwin.byte 102,15,56,220,225 162bc3d5698SJohn Baldwin cmovgeq %rbp,%r11 163bc3d5698SJohn Baldwin cmovgq %rbp,%r15 164bc3d5698SJohn Baldwin.byte 102,15,56,220,233 165bc3d5698SJohn Baldwin movups -8(%rsi),%xmm1 166bc3d5698SJohn Baldwin movdqa %xmm10,%xmm11 167bc3d5698SJohn Baldwin.byte 102,15,56,220,208 168bc3d5698SJohn Baldwin prefetcht0 15(%r12,%rbx,1) 169bc3d5698SJohn Baldwin prefetcht0 15(%r13,%rbx,1) 170bc3d5698SJohn Baldwin.byte 102,15,56,220,216 171bc3d5698SJohn Baldwin prefetcht0 15(%r14,%rbx,1) 172bc3d5698SJohn Baldwin prefetcht0 15(%r15,%rbx,1) 173bc3d5698SJohn Baldwin.byte 102,15,56,220,224 174bc3d5698SJohn Baldwin.byte 102,15,56,220,232 175bc3d5698SJohn Baldwin movups 128-120(%rsi),%xmm0 176bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 177bc3d5698SJohn Baldwin 178bc3d5698SJohn Baldwin.byte 102,15,56,220,209 179bc3d5698SJohn Baldwin pcmpgtd %xmm12,%xmm11 180bc3d5698SJohn Baldwin movdqu -120(%rsi),%xmm12 181bc3d5698SJohn Baldwin.byte 102,15,56,220,217 182bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 183bc3d5698SJohn Baldwin movdqa %xmm10,32(%rsp) 184bc3d5698SJohn Baldwin.byte 102,15,56,220,225 185bc3d5698SJohn Baldwin.byte 102,15,56,220,233 186bc3d5698SJohn Baldwin movups 144-120(%rsi),%xmm1 187bc3d5698SJohn Baldwin 188bc3d5698SJohn Baldwin cmpl $11,%eax 189bc3d5698SJohn Baldwin 190bc3d5698SJohn Baldwin.byte 102,15,56,220,208 191bc3d5698SJohn Baldwin.byte 102,15,56,220,216 192bc3d5698SJohn Baldwin.byte 102,15,56,220,224 193bc3d5698SJohn Baldwin.byte 102,15,56,220,232 194bc3d5698SJohn Baldwin movups 160-120(%rsi),%xmm0 195bc3d5698SJohn Baldwin 196bc3d5698SJohn Baldwin jb .Lenc4x_tail 197bc3d5698SJohn Baldwin 198bc3d5698SJohn Baldwin.byte 102,15,56,220,209 199bc3d5698SJohn Baldwin.byte 102,15,56,220,217 200bc3d5698SJohn Baldwin.byte 102,15,56,220,225 201bc3d5698SJohn Baldwin.byte 102,15,56,220,233 202bc3d5698SJohn Baldwin movups 176-120(%rsi),%xmm1 203bc3d5698SJohn Baldwin 204bc3d5698SJohn Baldwin.byte 102,15,56,220,208 205bc3d5698SJohn Baldwin.byte 102,15,56,220,216 206bc3d5698SJohn Baldwin.byte 102,15,56,220,224 207bc3d5698SJohn Baldwin.byte 102,15,56,220,232 208bc3d5698SJohn Baldwin movups 192-120(%rsi),%xmm0 209bc3d5698SJohn Baldwin 210bc3d5698SJohn Baldwin je .Lenc4x_tail 211bc3d5698SJohn Baldwin 212bc3d5698SJohn Baldwin.byte 102,15,56,220,209 213bc3d5698SJohn Baldwin.byte 102,15,56,220,217 214bc3d5698SJohn Baldwin.byte 102,15,56,220,225 215bc3d5698SJohn Baldwin.byte 102,15,56,220,233 216bc3d5698SJohn Baldwin movups 208-120(%rsi),%xmm1 217bc3d5698SJohn Baldwin 218bc3d5698SJohn Baldwin.byte 102,15,56,220,208 219bc3d5698SJohn Baldwin.byte 102,15,56,220,216 220bc3d5698SJohn Baldwin.byte 102,15,56,220,224 221bc3d5698SJohn Baldwin.byte 102,15,56,220,232 222bc3d5698SJohn Baldwin movups 224-120(%rsi),%xmm0 223bc3d5698SJohn Baldwin jmp .Lenc4x_tail 224bc3d5698SJohn Baldwin 225bc3d5698SJohn Baldwin.align 32 226bc3d5698SJohn Baldwin.Lenc4x_tail: 227bc3d5698SJohn Baldwin.byte 102,15,56,220,209 228bc3d5698SJohn Baldwin.byte 102,15,56,220,217 229bc3d5698SJohn Baldwin.byte 102,15,56,220,225 230bc3d5698SJohn Baldwin.byte 102,15,56,220,233 231bc3d5698SJohn Baldwin movdqu (%r8,%rbx,1),%xmm6 232bc3d5698SJohn Baldwin movdqu 16-120(%rsi),%xmm1 233bc3d5698SJohn Baldwin 234bc3d5698SJohn Baldwin.byte 102,15,56,221,208 235bc3d5698SJohn Baldwin movdqu (%r9,%rbx,1),%xmm7 236bc3d5698SJohn Baldwin pxor %xmm12,%xmm6 237bc3d5698SJohn Baldwin.byte 102,15,56,221,216 238bc3d5698SJohn Baldwin movdqu (%r10,%rbx,1),%xmm8 239bc3d5698SJohn Baldwin pxor %xmm12,%xmm7 240bc3d5698SJohn Baldwin.byte 102,15,56,221,224 241bc3d5698SJohn Baldwin movdqu (%r11,%rbx,1),%xmm9 242bc3d5698SJohn Baldwin pxor %xmm12,%xmm8 243bc3d5698SJohn Baldwin.byte 102,15,56,221,232 244bc3d5698SJohn Baldwin movdqu 32-120(%rsi),%xmm0 245bc3d5698SJohn Baldwin pxor %xmm12,%xmm9 246bc3d5698SJohn Baldwin 247bc3d5698SJohn Baldwin movups %xmm2,-16(%r12,%rbx,1) 248bc3d5698SJohn Baldwin pxor %xmm6,%xmm2 249bc3d5698SJohn Baldwin movups %xmm3,-16(%r13,%rbx,1) 250bc3d5698SJohn Baldwin pxor %xmm7,%xmm3 251bc3d5698SJohn Baldwin movups %xmm4,-16(%r14,%rbx,1) 252bc3d5698SJohn Baldwin pxor %xmm8,%xmm4 253bc3d5698SJohn Baldwin movups %xmm5,-16(%r15,%rbx,1) 254bc3d5698SJohn Baldwin pxor %xmm9,%xmm5 255bc3d5698SJohn Baldwin 256bc3d5698SJohn Baldwin decl %edx 257bc3d5698SJohn Baldwin jnz .Loop_enc4x 258bc3d5698SJohn Baldwin 259bc3d5698SJohn Baldwin movq 16(%rsp),%rax 260bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 261bc3d5698SJohn Baldwin movl 24(%rsp),%edx 262bc3d5698SJohn Baldwin 263bc3d5698SJohn Baldwin 264bc3d5698SJohn Baldwin 265bc3d5698SJohn Baldwin 266bc3d5698SJohn Baldwin 267bc3d5698SJohn Baldwin 268bc3d5698SJohn Baldwin 269bc3d5698SJohn Baldwin 270bc3d5698SJohn Baldwin 271bc3d5698SJohn Baldwin 272*c0855eaaSJohn Baldwin 273bc3d5698SJohn Baldwin leaq 160(%rdi),%rdi 274bc3d5698SJohn Baldwin decl %edx 275bc3d5698SJohn Baldwin jnz .Lenc4x_loop_grande 276bc3d5698SJohn Baldwin 277bc3d5698SJohn Baldwin.Lenc4x_done: 278bc3d5698SJohn Baldwin movq -48(%rax),%r15 279bc3d5698SJohn Baldwin.cfi_restore %r15 280bc3d5698SJohn Baldwin movq -40(%rax),%r14 281bc3d5698SJohn Baldwin.cfi_restore %r14 282bc3d5698SJohn Baldwin movq -32(%rax),%r13 283bc3d5698SJohn Baldwin.cfi_restore %r13 284bc3d5698SJohn Baldwin movq -24(%rax),%r12 285bc3d5698SJohn Baldwin.cfi_restore %r12 286bc3d5698SJohn Baldwin movq -16(%rax),%rbp 287bc3d5698SJohn Baldwin.cfi_restore %rbp 288bc3d5698SJohn Baldwin movq -8(%rax),%rbx 289bc3d5698SJohn Baldwin.cfi_restore %rbx 290bc3d5698SJohn Baldwin leaq (%rax),%rsp 291bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 292bc3d5698SJohn Baldwin.Lenc4x_epilogue: 293bc3d5698SJohn Baldwin .byte 0xf3,0xc3 294bc3d5698SJohn Baldwin.cfi_endproc 295bc3d5698SJohn Baldwin.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt 296bc3d5698SJohn Baldwin 297bc3d5698SJohn Baldwin.globl aesni_multi_cbc_decrypt 298bc3d5698SJohn Baldwin.type aesni_multi_cbc_decrypt,@function 299bc3d5698SJohn Baldwin.align 32 300bc3d5698SJohn Baldwinaesni_multi_cbc_decrypt: 301bc3d5698SJohn Baldwin.cfi_startproc 302bc3d5698SJohn Baldwin cmpl $2,%edx 303bc3d5698SJohn Baldwin jb .Ldec_non_avx 304bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+4(%rip),%ecx 305bc3d5698SJohn Baldwin testl $268435456,%ecx 306bc3d5698SJohn Baldwin jnz _avx_cbc_dec_shortcut 307bc3d5698SJohn Baldwin jmp .Ldec_non_avx 308bc3d5698SJohn Baldwin.align 16 309bc3d5698SJohn Baldwin.Ldec_non_avx: 310bc3d5698SJohn Baldwin movq %rsp,%rax 311bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 312bc3d5698SJohn Baldwin pushq %rbx 313bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 314bc3d5698SJohn Baldwin pushq %rbp 315bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 316bc3d5698SJohn Baldwin pushq %r12 317bc3d5698SJohn Baldwin.cfi_offset %r12,-32 318bc3d5698SJohn Baldwin pushq %r13 319bc3d5698SJohn Baldwin.cfi_offset %r13,-40 320bc3d5698SJohn Baldwin pushq %r14 321bc3d5698SJohn Baldwin.cfi_offset %r14,-48 322bc3d5698SJohn Baldwin pushq %r15 323bc3d5698SJohn Baldwin.cfi_offset %r15,-56 324bc3d5698SJohn Baldwin 325bc3d5698SJohn Baldwin 326bc3d5698SJohn Baldwin 327bc3d5698SJohn Baldwin 328bc3d5698SJohn Baldwin 329bc3d5698SJohn Baldwin 330bc3d5698SJohn Baldwin subq $48,%rsp 331bc3d5698SJohn Baldwin andq $-64,%rsp 332bc3d5698SJohn Baldwin movq %rax,16(%rsp) 333bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 334bc3d5698SJohn Baldwin 335bc3d5698SJohn Baldwin.Ldec4x_body: 336bc3d5698SJohn Baldwin movdqu (%rsi),%xmm12 337bc3d5698SJohn Baldwin leaq 120(%rsi),%rsi 338bc3d5698SJohn Baldwin leaq 80(%rdi),%rdi 339bc3d5698SJohn Baldwin 340bc3d5698SJohn Baldwin.Ldec4x_loop_grande: 341bc3d5698SJohn Baldwin movl %edx,24(%rsp) 342bc3d5698SJohn Baldwin xorl %edx,%edx 343*c0855eaaSJohn Baldwin 344bc3d5698SJohn Baldwin movl -64(%rdi),%ecx 345bc3d5698SJohn Baldwin movq -80(%rdi),%r8 346bc3d5698SJohn Baldwin cmpl %edx,%ecx 347bc3d5698SJohn Baldwin movq -72(%rdi),%r12 348bc3d5698SJohn Baldwin cmovgl %ecx,%edx 349bc3d5698SJohn Baldwin testl %ecx,%ecx 350*c0855eaaSJohn Baldwin 351bc3d5698SJohn Baldwin movdqu -56(%rdi),%xmm6 352bc3d5698SJohn Baldwin movl %ecx,32(%rsp) 353bc3d5698SJohn Baldwin cmovleq %rsp,%r8 354*c0855eaaSJohn Baldwin 355bc3d5698SJohn Baldwin movl -24(%rdi),%ecx 356bc3d5698SJohn Baldwin movq -40(%rdi),%r9 357bc3d5698SJohn Baldwin cmpl %edx,%ecx 358bc3d5698SJohn Baldwin movq -32(%rdi),%r13 359bc3d5698SJohn Baldwin cmovgl %ecx,%edx 360bc3d5698SJohn Baldwin testl %ecx,%ecx 361*c0855eaaSJohn Baldwin 362bc3d5698SJohn Baldwin movdqu -16(%rdi),%xmm7 363bc3d5698SJohn Baldwin movl %ecx,36(%rsp) 364bc3d5698SJohn Baldwin cmovleq %rsp,%r9 365*c0855eaaSJohn Baldwin 366bc3d5698SJohn Baldwin movl 16(%rdi),%ecx 367bc3d5698SJohn Baldwin movq 0(%rdi),%r10 368bc3d5698SJohn Baldwin cmpl %edx,%ecx 369bc3d5698SJohn Baldwin movq 8(%rdi),%r14 370bc3d5698SJohn Baldwin cmovgl %ecx,%edx 371bc3d5698SJohn Baldwin testl %ecx,%ecx 372*c0855eaaSJohn Baldwin 373bc3d5698SJohn Baldwin movdqu 24(%rdi),%xmm8 374bc3d5698SJohn Baldwin movl %ecx,40(%rsp) 375bc3d5698SJohn Baldwin cmovleq %rsp,%r10 376*c0855eaaSJohn Baldwin 377bc3d5698SJohn Baldwin movl 56(%rdi),%ecx 378bc3d5698SJohn Baldwin movq 40(%rdi),%r11 379bc3d5698SJohn Baldwin cmpl %edx,%ecx 380bc3d5698SJohn Baldwin movq 48(%rdi),%r15 381bc3d5698SJohn Baldwin cmovgl %ecx,%edx 382bc3d5698SJohn Baldwin testl %ecx,%ecx 383*c0855eaaSJohn Baldwin 384bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm9 385bc3d5698SJohn Baldwin movl %ecx,44(%rsp) 386bc3d5698SJohn Baldwin cmovleq %rsp,%r11 387bc3d5698SJohn Baldwin testl %edx,%edx 388bc3d5698SJohn Baldwin jz .Ldec4x_done 389bc3d5698SJohn Baldwin 390bc3d5698SJohn Baldwin movups 16-120(%rsi),%xmm1 391bc3d5698SJohn Baldwin movups 32-120(%rsi),%xmm0 392bc3d5698SJohn Baldwin movl 240-120(%rsi),%eax 393bc3d5698SJohn Baldwin movdqu (%r8),%xmm2 394bc3d5698SJohn Baldwin movdqu (%r9),%xmm3 395bc3d5698SJohn Baldwin pxor %xmm12,%xmm2 396bc3d5698SJohn Baldwin movdqu (%r10),%xmm4 397bc3d5698SJohn Baldwin pxor %xmm12,%xmm3 398bc3d5698SJohn Baldwin movdqu (%r11),%xmm5 399bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 400bc3d5698SJohn Baldwin pxor %xmm12,%xmm5 401bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm10 402bc3d5698SJohn Baldwin xorq %rbx,%rbx 403bc3d5698SJohn Baldwin jmp .Loop_dec4x 404bc3d5698SJohn Baldwin 405bc3d5698SJohn Baldwin.align 32 406bc3d5698SJohn Baldwin.Loop_dec4x: 407bc3d5698SJohn Baldwin addq $16,%rbx 408bc3d5698SJohn Baldwin leaq 16(%rsp),%rbp 409bc3d5698SJohn Baldwin movl $1,%ecx 410bc3d5698SJohn Baldwin subq %rbx,%rbp 411bc3d5698SJohn Baldwin 412bc3d5698SJohn Baldwin.byte 102,15,56,222,209 413bc3d5698SJohn Baldwin prefetcht0 31(%r8,%rbx,1) 414bc3d5698SJohn Baldwin prefetcht0 31(%r9,%rbx,1) 415bc3d5698SJohn Baldwin.byte 102,15,56,222,217 416bc3d5698SJohn Baldwin prefetcht0 31(%r10,%rbx,1) 417bc3d5698SJohn Baldwin prefetcht0 31(%r11,%rbx,1) 418bc3d5698SJohn Baldwin.byte 102,15,56,222,225 419bc3d5698SJohn Baldwin.byte 102,15,56,222,233 420bc3d5698SJohn Baldwin movups 48-120(%rsi),%xmm1 421bc3d5698SJohn Baldwin cmpl 32(%rsp),%ecx 422bc3d5698SJohn Baldwin.byte 102,15,56,222,208 423bc3d5698SJohn Baldwin.byte 102,15,56,222,216 424bc3d5698SJohn Baldwin.byte 102,15,56,222,224 425bc3d5698SJohn Baldwin cmovgeq %rbp,%r8 426bc3d5698SJohn Baldwin cmovgq %rbp,%r12 427bc3d5698SJohn Baldwin.byte 102,15,56,222,232 428bc3d5698SJohn Baldwin movups -56(%rsi),%xmm0 429bc3d5698SJohn Baldwin cmpl 36(%rsp),%ecx 430bc3d5698SJohn Baldwin.byte 102,15,56,222,209 431bc3d5698SJohn Baldwin.byte 102,15,56,222,217 432bc3d5698SJohn Baldwin.byte 102,15,56,222,225 433bc3d5698SJohn Baldwin cmovgeq %rbp,%r9 434bc3d5698SJohn Baldwin cmovgq %rbp,%r13 435bc3d5698SJohn Baldwin.byte 102,15,56,222,233 436bc3d5698SJohn Baldwin movups -40(%rsi),%xmm1 437bc3d5698SJohn Baldwin cmpl 40(%rsp),%ecx 438bc3d5698SJohn Baldwin.byte 102,15,56,222,208 439bc3d5698SJohn Baldwin.byte 102,15,56,222,216 440bc3d5698SJohn Baldwin.byte 102,15,56,222,224 441bc3d5698SJohn Baldwin cmovgeq %rbp,%r10 442bc3d5698SJohn Baldwin cmovgq %rbp,%r14 443bc3d5698SJohn Baldwin.byte 102,15,56,222,232 444bc3d5698SJohn Baldwin movups -24(%rsi),%xmm0 445bc3d5698SJohn Baldwin cmpl 44(%rsp),%ecx 446bc3d5698SJohn Baldwin.byte 102,15,56,222,209 447bc3d5698SJohn Baldwin.byte 102,15,56,222,217 448bc3d5698SJohn Baldwin.byte 102,15,56,222,225 449bc3d5698SJohn Baldwin cmovgeq %rbp,%r11 450bc3d5698SJohn Baldwin cmovgq %rbp,%r15 451bc3d5698SJohn Baldwin.byte 102,15,56,222,233 452bc3d5698SJohn Baldwin movups -8(%rsi),%xmm1 453bc3d5698SJohn Baldwin movdqa %xmm10,%xmm11 454bc3d5698SJohn Baldwin.byte 102,15,56,222,208 455bc3d5698SJohn Baldwin prefetcht0 15(%r12,%rbx,1) 456bc3d5698SJohn Baldwin prefetcht0 15(%r13,%rbx,1) 457bc3d5698SJohn Baldwin.byte 102,15,56,222,216 458bc3d5698SJohn Baldwin prefetcht0 15(%r14,%rbx,1) 459bc3d5698SJohn Baldwin prefetcht0 15(%r15,%rbx,1) 460bc3d5698SJohn Baldwin.byte 102,15,56,222,224 461bc3d5698SJohn Baldwin.byte 102,15,56,222,232 462bc3d5698SJohn Baldwin movups 128-120(%rsi),%xmm0 463bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 464bc3d5698SJohn Baldwin 465bc3d5698SJohn Baldwin.byte 102,15,56,222,209 466bc3d5698SJohn Baldwin pcmpgtd %xmm12,%xmm11 467bc3d5698SJohn Baldwin movdqu -120(%rsi),%xmm12 468bc3d5698SJohn Baldwin.byte 102,15,56,222,217 469bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 470bc3d5698SJohn Baldwin movdqa %xmm10,32(%rsp) 471bc3d5698SJohn Baldwin.byte 102,15,56,222,225 472bc3d5698SJohn Baldwin.byte 102,15,56,222,233 473bc3d5698SJohn Baldwin movups 144-120(%rsi),%xmm1 474bc3d5698SJohn Baldwin 475bc3d5698SJohn Baldwin cmpl $11,%eax 476bc3d5698SJohn Baldwin 477bc3d5698SJohn Baldwin.byte 102,15,56,222,208 478bc3d5698SJohn Baldwin.byte 102,15,56,222,216 479bc3d5698SJohn Baldwin.byte 102,15,56,222,224 480bc3d5698SJohn Baldwin.byte 102,15,56,222,232 481bc3d5698SJohn Baldwin movups 160-120(%rsi),%xmm0 482bc3d5698SJohn Baldwin 483bc3d5698SJohn Baldwin jb .Ldec4x_tail 484bc3d5698SJohn Baldwin 485bc3d5698SJohn Baldwin.byte 102,15,56,222,209 486bc3d5698SJohn Baldwin.byte 102,15,56,222,217 487bc3d5698SJohn Baldwin.byte 102,15,56,222,225 488bc3d5698SJohn Baldwin.byte 102,15,56,222,233 489bc3d5698SJohn Baldwin movups 176-120(%rsi),%xmm1 490bc3d5698SJohn Baldwin 491bc3d5698SJohn Baldwin.byte 102,15,56,222,208 492bc3d5698SJohn Baldwin.byte 102,15,56,222,216 493bc3d5698SJohn Baldwin.byte 102,15,56,222,224 494bc3d5698SJohn Baldwin.byte 102,15,56,222,232 495bc3d5698SJohn Baldwin movups 192-120(%rsi),%xmm0 496bc3d5698SJohn Baldwin 497bc3d5698SJohn Baldwin je .Ldec4x_tail 498bc3d5698SJohn Baldwin 499bc3d5698SJohn Baldwin.byte 102,15,56,222,209 500bc3d5698SJohn Baldwin.byte 102,15,56,222,217 501bc3d5698SJohn Baldwin.byte 102,15,56,222,225 502bc3d5698SJohn Baldwin.byte 102,15,56,222,233 503bc3d5698SJohn Baldwin movups 208-120(%rsi),%xmm1 504bc3d5698SJohn Baldwin 505bc3d5698SJohn Baldwin.byte 102,15,56,222,208 506bc3d5698SJohn Baldwin.byte 102,15,56,222,216 507bc3d5698SJohn Baldwin.byte 102,15,56,222,224 508bc3d5698SJohn Baldwin.byte 102,15,56,222,232 509bc3d5698SJohn Baldwin movups 224-120(%rsi),%xmm0 510bc3d5698SJohn Baldwin jmp .Ldec4x_tail 511bc3d5698SJohn Baldwin 512bc3d5698SJohn Baldwin.align 32 513bc3d5698SJohn Baldwin.Ldec4x_tail: 514bc3d5698SJohn Baldwin.byte 102,15,56,222,209 515bc3d5698SJohn Baldwin.byte 102,15,56,222,217 516bc3d5698SJohn Baldwin.byte 102,15,56,222,225 517bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 518bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 519bc3d5698SJohn Baldwin.byte 102,15,56,222,233 520bc3d5698SJohn Baldwin movdqu 16-120(%rsi),%xmm1 521bc3d5698SJohn Baldwin pxor %xmm0,%xmm8 522bc3d5698SJohn Baldwin pxor %xmm0,%xmm9 523bc3d5698SJohn Baldwin movdqu 32-120(%rsi),%xmm0 524bc3d5698SJohn Baldwin 525bc3d5698SJohn Baldwin.byte 102,15,56,223,214 526bc3d5698SJohn Baldwin.byte 102,15,56,223,223 527bc3d5698SJohn Baldwin movdqu -16(%r8,%rbx,1),%xmm6 528bc3d5698SJohn Baldwin movdqu -16(%r9,%rbx,1),%xmm7 529bc3d5698SJohn Baldwin.byte 102,65,15,56,223,224 530bc3d5698SJohn Baldwin.byte 102,65,15,56,223,233 531bc3d5698SJohn Baldwin movdqu -16(%r10,%rbx,1),%xmm8 532bc3d5698SJohn Baldwin movdqu -16(%r11,%rbx,1),%xmm9 533bc3d5698SJohn Baldwin 534bc3d5698SJohn Baldwin movups %xmm2,-16(%r12,%rbx,1) 535bc3d5698SJohn Baldwin movdqu (%r8,%rbx,1),%xmm2 536bc3d5698SJohn Baldwin movups %xmm3,-16(%r13,%rbx,1) 537bc3d5698SJohn Baldwin movdqu (%r9,%rbx,1),%xmm3 538bc3d5698SJohn Baldwin pxor %xmm12,%xmm2 539bc3d5698SJohn Baldwin movups %xmm4,-16(%r14,%rbx,1) 540bc3d5698SJohn Baldwin movdqu (%r10,%rbx,1),%xmm4 541bc3d5698SJohn Baldwin pxor %xmm12,%xmm3 542bc3d5698SJohn Baldwin movups %xmm5,-16(%r15,%rbx,1) 543bc3d5698SJohn Baldwin movdqu (%r11,%rbx,1),%xmm5 544bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 545bc3d5698SJohn Baldwin pxor %xmm12,%xmm5 546bc3d5698SJohn Baldwin 547bc3d5698SJohn Baldwin decl %edx 548bc3d5698SJohn Baldwin jnz .Loop_dec4x 549bc3d5698SJohn Baldwin 550bc3d5698SJohn Baldwin movq 16(%rsp),%rax 551bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 552bc3d5698SJohn Baldwin movl 24(%rsp),%edx 553bc3d5698SJohn Baldwin 554bc3d5698SJohn Baldwin leaq 160(%rdi),%rdi 555bc3d5698SJohn Baldwin decl %edx 556bc3d5698SJohn Baldwin jnz .Ldec4x_loop_grande 557bc3d5698SJohn Baldwin 558bc3d5698SJohn Baldwin.Ldec4x_done: 559bc3d5698SJohn Baldwin movq -48(%rax),%r15 560bc3d5698SJohn Baldwin.cfi_restore %r15 561bc3d5698SJohn Baldwin movq -40(%rax),%r14 562bc3d5698SJohn Baldwin.cfi_restore %r14 563bc3d5698SJohn Baldwin movq -32(%rax),%r13 564bc3d5698SJohn Baldwin.cfi_restore %r13 565bc3d5698SJohn Baldwin movq -24(%rax),%r12 566bc3d5698SJohn Baldwin.cfi_restore %r12 567bc3d5698SJohn Baldwin movq -16(%rax),%rbp 568bc3d5698SJohn Baldwin.cfi_restore %rbp 569bc3d5698SJohn Baldwin movq -8(%rax),%rbx 570bc3d5698SJohn Baldwin.cfi_restore %rbx 571bc3d5698SJohn Baldwin leaq (%rax),%rsp 572bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 573bc3d5698SJohn Baldwin.Ldec4x_epilogue: 574bc3d5698SJohn Baldwin .byte 0xf3,0xc3 575bc3d5698SJohn Baldwin.cfi_endproc 576bc3d5698SJohn Baldwin.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt 577bc3d5698SJohn Baldwin.type aesni_multi_cbc_encrypt_avx,@function 578bc3d5698SJohn Baldwin.align 32 579bc3d5698SJohn Baldwinaesni_multi_cbc_encrypt_avx: 580bc3d5698SJohn Baldwin.cfi_startproc 581bc3d5698SJohn Baldwin_avx_cbc_enc_shortcut: 582bc3d5698SJohn Baldwin movq %rsp,%rax 583bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 584bc3d5698SJohn Baldwin pushq %rbx 585bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 586bc3d5698SJohn Baldwin pushq %rbp 587bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 588bc3d5698SJohn Baldwin pushq %r12 589bc3d5698SJohn Baldwin.cfi_offset %r12,-32 590bc3d5698SJohn Baldwin pushq %r13 591bc3d5698SJohn Baldwin.cfi_offset %r13,-40 592bc3d5698SJohn Baldwin pushq %r14 593bc3d5698SJohn Baldwin.cfi_offset %r14,-48 594bc3d5698SJohn Baldwin pushq %r15 595bc3d5698SJohn Baldwin.cfi_offset %r15,-56 596bc3d5698SJohn Baldwin 597bc3d5698SJohn Baldwin 598bc3d5698SJohn Baldwin 599bc3d5698SJohn Baldwin 600bc3d5698SJohn Baldwin 601bc3d5698SJohn Baldwin 602bc3d5698SJohn Baldwin 603bc3d5698SJohn Baldwin 604bc3d5698SJohn Baldwin subq $192,%rsp 605bc3d5698SJohn Baldwin andq $-128,%rsp 606bc3d5698SJohn Baldwin movq %rax,16(%rsp) 607bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 608bc3d5698SJohn Baldwin 609bc3d5698SJohn Baldwin.Lenc8x_body: 610bc3d5698SJohn Baldwin vzeroupper 611bc3d5698SJohn Baldwin vmovdqu (%rsi),%xmm15 612bc3d5698SJohn Baldwin leaq 120(%rsi),%rsi 613bc3d5698SJohn Baldwin leaq 160(%rdi),%rdi 614bc3d5698SJohn Baldwin shrl $1,%edx 615bc3d5698SJohn Baldwin 616bc3d5698SJohn Baldwin.Lenc8x_loop_grande: 617bc3d5698SJohn Baldwin 618bc3d5698SJohn Baldwin xorl %edx,%edx 619*c0855eaaSJohn Baldwin 620bc3d5698SJohn Baldwin movl -144(%rdi),%ecx 621*c0855eaaSJohn Baldwin 622bc3d5698SJohn Baldwin movq -160(%rdi),%r8 623bc3d5698SJohn Baldwin cmpl %edx,%ecx 624*c0855eaaSJohn Baldwin 625bc3d5698SJohn Baldwin movq -152(%rdi),%rbx 626bc3d5698SJohn Baldwin cmovgl %ecx,%edx 627bc3d5698SJohn Baldwin testl %ecx,%ecx 628*c0855eaaSJohn Baldwin 629bc3d5698SJohn Baldwin vmovdqu -136(%rdi),%xmm2 630bc3d5698SJohn Baldwin movl %ecx,32(%rsp) 631bc3d5698SJohn Baldwin cmovleq %rsp,%r8 632bc3d5698SJohn Baldwin subq %r8,%rbx 633bc3d5698SJohn Baldwin movq %rbx,64(%rsp) 634*c0855eaaSJohn Baldwin 635bc3d5698SJohn Baldwin movl -104(%rdi),%ecx 636*c0855eaaSJohn Baldwin 637bc3d5698SJohn Baldwin movq -120(%rdi),%r9 638bc3d5698SJohn Baldwin cmpl %edx,%ecx 639*c0855eaaSJohn Baldwin 640bc3d5698SJohn Baldwin movq -112(%rdi),%rbp 641bc3d5698SJohn Baldwin cmovgl %ecx,%edx 642bc3d5698SJohn Baldwin testl %ecx,%ecx 643*c0855eaaSJohn Baldwin 644bc3d5698SJohn Baldwin vmovdqu -96(%rdi),%xmm3 645bc3d5698SJohn Baldwin movl %ecx,36(%rsp) 646bc3d5698SJohn Baldwin cmovleq %rsp,%r9 647bc3d5698SJohn Baldwin subq %r9,%rbp 648bc3d5698SJohn Baldwin movq %rbp,72(%rsp) 649*c0855eaaSJohn Baldwin 650bc3d5698SJohn Baldwin movl -64(%rdi),%ecx 651*c0855eaaSJohn Baldwin 652bc3d5698SJohn Baldwin movq -80(%rdi),%r10 653bc3d5698SJohn Baldwin cmpl %edx,%ecx 654*c0855eaaSJohn Baldwin 655bc3d5698SJohn Baldwin movq -72(%rdi),%rbp 656bc3d5698SJohn Baldwin cmovgl %ecx,%edx 657bc3d5698SJohn Baldwin testl %ecx,%ecx 658*c0855eaaSJohn Baldwin 659bc3d5698SJohn Baldwin vmovdqu -56(%rdi),%xmm4 660bc3d5698SJohn Baldwin movl %ecx,40(%rsp) 661bc3d5698SJohn Baldwin cmovleq %rsp,%r10 662bc3d5698SJohn Baldwin subq %r10,%rbp 663bc3d5698SJohn Baldwin movq %rbp,80(%rsp) 664*c0855eaaSJohn Baldwin 665bc3d5698SJohn Baldwin movl -24(%rdi),%ecx 666*c0855eaaSJohn Baldwin 667bc3d5698SJohn Baldwin movq -40(%rdi),%r11 668bc3d5698SJohn Baldwin cmpl %edx,%ecx 669*c0855eaaSJohn Baldwin 670bc3d5698SJohn Baldwin movq -32(%rdi),%rbp 671bc3d5698SJohn Baldwin cmovgl %ecx,%edx 672bc3d5698SJohn Baldwin testl %ecx,%ecx 673*c0855eaaSJohn Baldwin 674bc3d5698SJohn Baldwin vmovdqu -16(%rdi),%xmm5 675bc3d5698SJohn Baldwin movl %ecx,44(%rsp) 676bc3d5698SJohn Baldwin cmovleq %rsp,%r11 677bc3d5698SJohn Baldwin subq %r11,%rbp 678bc3d5698SJohn Baldwin movq %rbp,88(%rsp) 679*c0855eaaSJohn Baldwin 680bc3d5698SJohn Baldwin movl 16(%rdi),%ecx 681*c0855eaaSJohn Baldwin 682bc3d5698SJohn Baldwin movq 0(%rdi),%r12 683bc3d5698SJohn Baldwin cmpl %edx,%ecx 684*c0855eaaSJohn Baldwin 685bc3d5698SJohn Baldwin movq 8(%rdi),%rbp 686bc3d5698SJohn Baldwin cmovgl %ecx,%edx 687bc3d5698SJohn Baldwin testl %ecx,%ecx 688*c0855eaaSJohn Baldwin 689bc3d5698SJohn Baldwin vmovdqu 24(%rdi),%xmm6 690bc3d5698SJohn Baldwin movl %ecx,48(%rsp) 691bc3d5698SJohn Baldwin cmovleq %rsp,%r12 692bc3d5698SJohn Baldwin subq %r12,%rbp 693bc3d5698SJohn Baldwin movq %rbp,96(%rsp) 694*c0855eaaSJohn Baldwin 695bc3d5698SJohn Baldwin movl 56(%rdi),%ecx 696*c0855eaaSJohn Baldwin 697bc3d5698SJohn Baldwin movq 40(%rdi),%r13 698bc3d5698SJohn Baldwin cmpl %edx,%ecx 699*c0855eaaSJohn Baldwin 700bc3d5698SJohn Baldwin movq 48(%rdi),%rbp 701bc3d5698SJohn Baldwin cmovgl %ecx,%edx 702bc3d5698SJohn Baldwin testl %ecx,%ecx 703*c0855eaaSJohn Baldwin 704bc3d5698SJohn Baldwin vmovdqu 64(%rdi),%xmm7 705bc3d5698SJohn Baldwin movl %ecx,52(%rsp) 706bc3d5698SJohn Baldwin cmovleq %rsp,%r13 707bc3d5698SJohn Baldwin subq %r13,%rbp 708bc3d5698SJohn Baldwin movq %rbp,104(%rsp) 709*c0855eaaSJohn Baldwin 710bc3d5698SJohn Baldwin movl 96(%rdi),%ecx 711*c0855eaaSJohn Baldwin 712bc3d5698SJohn Baldwin movq 80(%rdi),%r14 713bc3d5698SJohn Baldwin cmpl %edx,%ecx 714*c0855eaaSJohn Baldwin 715bc3d5698SJohn Baldwin movq 88(%rdi),%rbp 716bc3d5698SJohn Baldwin cmovgl %ecx,%edx 717bc3d5698SJohn Baldwin testl %ecx,%ecx 718*c0855eaaSJohn Baldwin 719bc3d5698SJohn Baldwin vmovdqu 104(%rdi),%xmm8 720bc3d5698SJohn Baldwin movl %ecx,56(%rsp) 721bc3d5698SJohn Baldwin cmovleq %rsp,%r14 722bc3d5698SJohn Baldwin subq %r14,%rbp 723bc3d5698SJohn Baldwin movq %rbp,112(%rsp) 724*c0855eaaSJohn Baldwin 725bc3d5698SJohn Baldwin movl 136(%rdi),%ecx 726*c0855eaaSJohn Baldwin 727bc3d5698SJohn Baldwin movq 120(%rdi),%r15 728bc3d5698SJohn Baldwin cmpl %edx,%ecx 729*c0855eaaSJohn Baldwin 730bc3d5698SJohn Baldwin movq 128(%rdi),%rbp 731bc3d5698SJohn Baldwin cmovgl %ecx,%edx 732bc3d5698SJohn Baldwin testl %ecx,%ecx 733*c0855eaaSJohn Baldwin 734bc3d5698SJohn Baldwin vmovdqu 144(%rdi),%xmm9 735bc3d5698SJohn Baldwin movl %ecx,60(%rsp) 736bc3d5698SJohn Baldwin cmovleq %rsp,%r15 737bc3d5698SJohn Baldwin subq %r15,%rbp 738bc3d5698SJohn Baldwin movq %rbp,120(%rsp) 739bc3d5698SJohn Baldwin testl %edx,%edx 740bc3d5698SJohn Baldwin jz .Lenc8x_done 741bc3d5698SJohn Baldwin 742bc3d5698SJohn Baldwin vmovups 16-120(%rsi),%xmm1 743bc3d5698SJohn Baldwin vmovups 32-120(%rsi),%xmm0 744bc3d5698SJohn Baldwin movl 240-120(%rsi),%eax 745bc3d5698SJohn Baldwin 746bc3d5698SJohn Baldwin vpxor (%r8),%xmm15,%xmm10 747bc3d5698SJohn Baldwin leaq 128(%rsp),%rbp 748bc3d5698SJohn Baldwin vpxor (%r9),%xmm15,%xmm11 749bc3d5698SJohn Baldwin vpxor (%r10),%xmm15,%xmm12 750bc3d5698SJohn Baldwin vpxor (%r11),%xmm15,%xmm13 751bc3d5698SJohn Baldwin vpxor %xmm10,%xmm2,%xmm2 752bc3d5698SJohn Baldwin vpxor (%r12),%xmm15,%xmm10 753bc3d5698SJohn Baldwin vpxor %xmm11,%xmm3,%xmm3 754bc3d5698SJohn Baldwin vpxor (%r13),%xmm15,%xmm11 755bc3d5698SJohn Baldwin vpxor %xmm12,%xmm4,%xmm4 756bc3d5698SJohn Baldwin vpxor (%r14),%xmm15,%xmm12 757bc3d5698SJohn Baldwin vpxor %xmm13,%xmm5,%xmm5 758bc3d5698SJohn Baldwin vpxor (%r15),%xmm15,%xmm13 759bc3d5698SJohn Baldwin vpxor %xmm10,%xmm6,%xmm6 760bc3d5698SJohn Baldwin movl $1,%ecx 761bc3d5698SJohn Baldwin vpxor %xmm11,%xmm7,%xmm7 762bc3d5698SJohn Baldwin vpxor %xmm12,%xmm8,%xmm8 763bc3d5698SJohn Baldwin vpxor %xmm13,%xmm9,%xmm9 764bc3d5698SJohn Baldwin jmp .Loop_enc8x 765bc3d5698SJohn Baldwin 766bc3d5698SJohn Baldwin.align 32 767bc3d5698SJohn Baldwin.Loop_enc8x: 768bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 769bc3d5698SJohn Baldwin cmpl 32+0(%rsp),%ecx 770bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 771bc3d5698SJohn Baldwin prefetcht0 31(%r8) 772bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 773bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 774bc3d5698SJohn Baldwin leaq (%r8,%rbx,1),%rbx 775bc3d5698SJohn Baldwin cmovgeq %rsp,%r8 776bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 777bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 778bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 779bc3d5698SJohn Baldwin subq %r8,%rbx 780bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 781bc3d5698SJohn Baldwin vpxor 16(%r8),%xmm15,%xmm10 782bc3d5698SJohn Baldwin movq %rbx,64+0(%rsp) 783bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 784bc3d5698SJohn Baldwin vmovups -72(%rsi),%xmm1 785bc3d5698SJohn Baldwin leaq 16(%r8,%rbx,1),%r8 786bc3d5698SJohn Baldwin vmovdqu %xmm10,0(%rbp) 787bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 788bc3d5698SJohn Baldwin cmpl 32+4(%rsp),%ecx 789bc3d5698SJohn Baldwin movq 64+8(%rsp),%rbx 790bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 791bc3d5698SJohn Baldwin prefetcht0 31(%r9) 792bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 793bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 794bc3d5698SJohn Baldwin leaq (%r9,%rbx,1),%rbx 795bc3d5698SJohn Baldwin cmovgeq %rsp,%r9 796bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 797bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 798bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 799bc3d5698SJohn Baldwin subq %r9,%rbx 800bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 801bc3d5698SJohn Baldwin vpxor 16(%r9),%xmm15,%xmm11 802bc3d5698SJohn Baldwin movq %rbx,64+8(%rsp) 803bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 804bc3d5698SJohn Baldwin vmovups -56(%rsi),%xmm0 805bc3d5698SJohn Baldwin leaq 16(%r9,%rbx,1),%r9 806bc3d5698SJohn Baldwin vmovdqu %xmm11,16(%rbp) 807bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 808bc3d5698SJohn Baldwin cmpl 32+8(%rsp),%ecx 809bc3d5698SJohn Baldwin movq 64+16(%rsp),%rbx 810bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 811bc3d5698SJohn Baldwin prefetcht0 31(%r10) 812bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 813bc3d5698SJohn Baldwin prefetcht0 15(%r8) 814bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 815bc3d5698SJohn Baldwin leaq (%r10,%rbx,1),%rbx 816bc3d5698SJohn Baldwin cmovgeq %rsp,%r10 817bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 818bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 819bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 820bc3d5698SJohn Baldwin subq %r10,%rbx 821bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 822bc3d5698SJohn Baldwin vpxor 16(%r10),%xmm15,%xmm12 823bc3d5698SJohn Baldwin movq %rbx,64+16(%rsp) 824bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 825bc3d5698SJohn Baldwin vmovups -40(%rsi),%xmm1 826bc3d5698SJohn Baldwin leaq 16(%r10,%rbx,1),%r10 827bc3d5698SJohn Baldwin vmovdqu %xmm12,32(%rbp) 828bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 829bc3d5698SJohn Baldwin cmpl 32+12(%rsp),%ecx 830bc3d5698SJohn Baldwin movq 64+24(%rsp),%rbx 831bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 832bc3d5698SJohn Baldwin prefetcht0 31(%r11) 833bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 834bc3d5698SJohn Baldwin prefetcht0 15(%r9) 835bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 836bc3d5698SJohn Baldwin leaq (%r11,%rbx,1),%rbx 837bc3d5698SJohn Baldwin cmovgeq %rsp,%r11 838bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 839bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 840bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 841bc3d5698SJohn Baldwin subq %r11,%rbx 842bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 843bc3d5698SJohn Baldwin vpxor 16(%r11),%xmm15,%xmm13 844bc3d5698SJohn Baldwin movq %rbx,64+24(%rsp) 845bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 846bc3d5698SJohn Baldwin vmovups -24(%rsi),%xmm0 847bc3d5698SJohn Baldwin leaq 16(%r11,%rbx,1),%r11 848bc3d5698SJohn Baldwin vmovdqu %xmm13,48(%rbp) 849bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 850bc3d5698SJohn Baldwin cmpl 32+16(%rsp),%ecx 851bc3d5698SJohn Baldwin movq 64+32(%rsp),%rbx 852bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 853bc3d5698SJohn Baldwin prefetcht0 31(%r12) 854bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 855bc3d5698SJohn Baldwin prefetcht0 15(%r10) 856bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 857bc3d5698SJohn Baldwin leaq (%r12,%rbx,1),%rbx 858bc3d5698SJohn Baldwin cmovgeq %rsp,%r12 859bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 860bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 861bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 862bc3d5698SJohn Baldwin subq %r12,%rbx 863bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 864bc3d5698SJohn Baldwin vpxor 16(%r12),%xmm15,%xmm10 865bc3d5698SJohn Baldwin movq %rbx,64+32(%rsp) 866bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 867bc3d5698SJohn Baldwin vmovups -8(%rsi),%xmm1 868bc3d5698SJohn Baldwin leaq 16(%r12,%rbx,1),%r12 869bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 870bc3d5698SJohn Baldwin cmpl 32+20(%rsp),%ecx 871bc3d5698SJohn Baldwin movq 64+40(%rsp),%rbx 872bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 873bc3d5698SJohn Baldwin prefetcht0 31(%r13) 874bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 875bc3d5698SJohn Baldwin prefetcht0 15(%r11) 876bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 877bc3d5698SJohn Baldwin leaq (%rbx,%r13,1),%rbx 878bc3d5698SJohn Baldwin cmovgeq %rsp,%r13 879bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 880bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 881bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 882bc3d5698SJohn Baldwin subq %r13,%rbx 883bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 884bc3d5698SJohn Baldwin vpxor 16(%r13),%xmm15,%xmm11 885bc3d5698SJohn Baldwin movq %rbx,64+40(%rsp) 886bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 887bc3d5698SJohn Baldwin vmovups 8(%rsi),%xmm0 888bc3d5698SJohn Baldwin leaq 16(%r13,%rbx,1),%r13 889bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 890bc3d5698SJohn Baldwin cmpl 32+24(%rsp),%ecx 891bc3d5698SJohn Baldwin movq 64+48(%rsp),%rbx 892bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 893bc3d5698SJohn Baldwin prefetcht0 31(%r14) 894bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 895bc3d5698SJohn Baldwin prefetcht0 15(%r12) 896bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 897bc3d5698SJohn Baldwin leaq (%r14,%rbx,1),%rbx 898bc3d5698SJohn Baldwin cmovgeq %rsp,%r14 899bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 900bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 901bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 902bc3d5698SJohn Baldwin subq %r14,%rbx 903bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 904bc3d5698SJohn Baldwin vpxor 16(%r14),%xmm15,%xmm12 905bc3d5698SJohn Baldwin movq %rbx,64+48(%rsp) 906bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 907bc3d5698SJohn Baldwin vmovups 24(%rsi),%xmm1 908bc3d5698SJohn Baldwin leaq 16(%r14,%rbx,1),%r14 909bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 910bc3d5698SJohn Baldwin cmpl 32+28(%rsp),%ecx 911bc3d5698SJohn Baldwin movq 64+56(%rsp),%rbx 912bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 913bc3d5698SJohn Baldwin prefetcht0 31(%r15) 914bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 915bc3d5698SJohn Baldwin prefetcht0 15(%r13) 916bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 917bc3d5698SJohn Baldwin leaq (%r15,%rbx,1),%rbx 918bc3d5698SJohn Baldwin cmovgeq %rsp,%r15 919bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 920bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 921bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 922bc3d5698SJohn Baldwin subq %r15,%rbx 923bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 924bc3d5698SJohn Baldwin vpxor 16(%r15),%xmm15,%xmm13 925bc3d5698SJohn Baldwin movq %rbx,64+56(%rsp) 926bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 927bc3d5698SJohn Baldwin vmovups 40(%rsi),%xmm0 928bc3d5698SJohn Baldwin leaq 16(%r15,%rbx,1),%r15 929bc3d5698SJohn Baldwin vmovdqu 32(%rsp),%xmm14 930bc3d5698SJohn Baldwin prefetcht0 15(%r14) 931bc3d5698SJohn Baldwin prefetcht0 15(%r15) 932bc3d5698SJohn Baldwin cmpl $11,%eax 933bc3d5698SJohn Baldwin jb .Lenc8x_tail 934bc3d5698SJohn Baldwin 935bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 936bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 937bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 938bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 939bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 940bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 941bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 942bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 943bc3d5698SJohn Baldwin vmovups 176-120(%rsi),%xmm1 944bc3d5698SJohn Baldwin 945bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 946bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 947bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 948bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 949bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 950bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 951bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 952bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 953bc3d5698SJohn Baldwin vmovups 192-120(%rsi),%xmm0 954bc3d5698SJohn Baldwin je .Lenc8x_tail 955bc3d5698SJohn Baldwin 956bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 957bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 958bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 959bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 960bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 961bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 962bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 963bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 964bc3d5698SJohn Baldwin vmovups 208-120(%rsi),%xmm1 965bc3d5698SJohn Baldwin 966bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm2,%xmm2 967bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm3,%xmm3 968bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm4,%xmm4 969bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm5,%xmm5 970bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm6,%xmm6 971bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm7,%xmm7 972bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm8,%xmm8 973bc3d5698SJohn Baldwin vaesenc %xmm0,%xmm9,%xmm9 974bc3d5698SJohn Baldwin vmovups 224-120(%rsi),%xmm0 975bc3d5698SJohn Baldwin 976bc3d5698SJohn Baldwin.Lenc8x_tail: 977bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm2,%xmm2 978bc3d5698SJohn Baldwin vpxor %xmm15,%xmm15,%xmm15 979bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm3,%xmm3 980bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm4,%xmm4 981bc3d5698SJohn Baldwin vpcmpgtd %xmm15,%xmm14,%xmm15 982bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm5,%xmm5 983bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm6,%xmm6 984bc3d5698SJohn Baldwin vpaddd %xmm14,%xmm15,%xmm15 985bc3d5698SJohn Baldwin vmovdqu 48(%rsp),%xmm14 986bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm7,%xmm7 987bc3d5698SJohn Baldwin movq 64(%rsp),%rbx 988bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm8,%xmm8 989bc3d5698SJohn Baldwin vaesenc %xmm1,%xmm9,%xmm9 990bc3d5698SJohn Baldwin vmovups 16-120(%rsi),%xmm1 991bc3d5698SJohn Baldwin 992bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm2,%xmm2 993bc3d5698SJohn Baldwin vmovdqa %xmm15,32(%rsp) 994bc3d5698SJohn Baldwin vpxor %xmm15,%xmm15,%xmm15 995bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm3,%xmm3 996bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm4,%xmm4 997bc3d5698SJohn Baldwin vpcmpgtd %xmm15,%xmm14,%xmm15 998bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm5,%xmm5 999bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm6,%xmm6 1000bc3d5698SJohn Baldwin vpaddd %xmm15,%xmm14,%xmm14 1001bc3d5698SJohn Baldwin vmovdqu -120(%rsi),%xmm15 1002bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm7,%xmm7 1003bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm8,%xmm8 1004bc3d5698SJohn Baldwin vmovdqa %xmm14,48(%rsp) 1005bc3d5698SJohn Baldwin vaesenclast %xmm0,%xmm9,%xmm9 1006bc3d5698SJohn Baldwin vmovups 32-120(%rsi),%xmm0 1007bc3d5698SJohn Baldwin 1008bc3d5698SJohn Baldwin vmovups %xmm2,-16(%r8) 1009bc3d5698SJohn Baldwin subq %rbx,%r8 1010bc3d5698SJohn Baldwin vpxor 0(%rbp),%xmm2,%xmm2 1011bc3d5698SJohn Baldwin vmovups %xmm3,-16(%r9) 1012bc3d5698SJohn Baldwin subq 72(%rsp),%r9 1013bc3d5698SJohn Baldwin vpxor 16(%rbp),%xmm3,%xmm3 1014bc3d5698SJohn Baldwin vmovups %xmm4,-16(%r10) 1015bc3d5698SJohn Baldwin subq 80(%rsp),%r10 1016bc3d5698SJohn Baldwin vpxor 32(%rbp),%xmm4,%xmm4 1017bc3d5698SJohn Baldwin vmovups %xmm5,-16(%r11) 1018bc3d5698SJohn Baldwin subq 88(%rsp),%r11 1019bc3d5698SJohn Baldwin vpxor 48(%rbp),%xmm5,%xmm5 1020bc3d5698SJohn Baldwin vmovups %xmm6,-16(%r12) 1021bc3d5698SJohn Baldwin subq 96(%rsp),%r12 1022bc3d5698SJohn Baldwin vpxor %xmm10,%xmm6,%xmm6 1023bc3d5698SJohn Baldwin vmovups %xmm7,-16(%r13) 1024bc3d5698SJohn Baldwin subq 104(%rsp),%r13 1025bc3d5698SJohn Baldwin vpxor %xmm11,%xmm7,%xmm7 1026bc3d5698SJohn Baldwin vmovups %xmm8,-16(%r14) 1027bc3d5698SJohn Baldwin subq 112(%rsp),%r14 1028bc3d5698SJohn Baldwin vpxor %xmm12,%xmm8,%xmm8 1029bc3d5698SJohn Baldwin vmovups %xmm9,-16(%r15) 1030bc3d5698SJohn Baldwin subq 120(%rsp),%r15 1031bc3d5698SJohn Baldwin vpxor %xmm13,%xmm9,%xmm9 1032bc3d5698SJohn Baldwin 1033bc3d5698SJohn Baldwin decl %edx 1034bc3d5698SJohn Baldwin jnz .Loop_enc8x 1035bc3d5698SJohn Baldwin 1036bc3d5698SJohn Baldwin movq 16(%rsp),%rax 1037bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1038bc3d5698SJohn Baldwin 1039bc3d5698SJohn Baldwin 1040bc3d5698SJohn Baldwin 1041bc3d5698SJohn Baldwin 1042bc3d5698SJohn Baldwin 1043bc3d5698SJohn Baldwin.Lenc8x_done: 1044bc3d5698SJohn Baldwin vzeroupper 1045bc3d5698SJohn Baldwin movq -48(%rax),%r15 1046bc3d5698SJohn Baldwin.cfi_restore %r15 1047bc3d5698SJohn Baldwin movq -40(%rax),%r14 1048bc3d5698SJohn Baldwin.cfi_restore %r14 1049bc3d5698SJohn Baldwin movq -32(%rax),%r13 1050bc3d5698SJohn Baldwin.cfi_restore %r13 1051bc3d5698SJohn Baldwin movq -24(%rax),%r12 1052bc3d5698SJohn Baldwin.cfi_restore %r12 1053bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1054bc3d5698SJohn Baldwin.cfi_restore %rbp 1055bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1056bc3d5698SJohn Baldwin.cfi_restore %rbx 1057bc3d5698SJohn Baldwin leaq (%rax),%rsp 1058bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1059bc3d5698SJohn Baldwin.Lenc8x_epilogue: 1060bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1061bc3d5698SJohn Baldwin.cfi_endproc 1062bc3d5698SJohn Baldwin.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx 1063bc3d5698SJohn Baldwin 1064bc3d5698SJohn Baldwin.type aesni_multi_cbc_decrypt_avx,@function 1065bc3d5698SJohn Baldwin.align 32 1066bc3d5698SJohn Baldwinaesni_multi_cbc_decrypt_avx: 1067bc3d5698SJohn Baldwin.cfi_startproc 1068bc3d5698SJohn Baldwin_avx_cbc_dec_shortcut: 1069bc3d5698SJohn Baldwin movq %rsp,%rax 1070bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 1071bc3d5698SJohn Baldwin pushq %rbx 1072bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1073bc3d5698SJohn Baldwin pushq %rbp 1074bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1075bc3d5698SJohn Baldwin pushq %r12 1076bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1077bc3d5698SJohn Baldwin pushq %r13 1078bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1079bc3d5698SJohn Baldwin pushq %r14 1080bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1081bc3d5698SJohn Baldwin pushq %r15 1082bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1083bc3d5698SJohn Baldwin 1084bc3d5698SJohn Baldwin 1085bc3d5698SJohn Baldwin 1086bc3d5698SJohn Baldwin 1087bc3d5698SJohn Baldwin 1088bc3d5698SJohn Baldwin 1089bc3d5698SJohn Baldwin 1090bc3d5698SJohn Baldwin 1091bc3d5698SJohn Baldwin 1092bc3d5698SJohn Baldwin subq $256,%rsp 1093bc3d5698SJohn Baldwin andq $-256,%rsp 1094bc3d5698SJohn Baldwin subq $192,%rsp 1095bc3d5698SJohn Baldwin movq %rax,16(%rsp) 1096bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 1097bc3d5698SJohn Baldwin 1098bc3d5698SJohn Baldwin.Ldec8x_body: 1099bc3d5698SJohn Baldwin vzeroupper 1100bc3d5698SJohn Baldwin vmovdqu (%rsi),%xmm15 1101bc3d5698SJohn Baldwin leaq 120(%rsi),%rsi 1102bc3d5698SJohn Baldwin leaq 160(%rdi),%rdi 1103bc3d5698SJohn Baldwin shrl $1,%edx 1104bc3d5698SJohn Baldwin 1105bc3d5698SJohn Baldwin.Ldec8x_loop_grande: 1106bc3d5698SJohn Baldwin 1107bc3d5698SJohn Baldwin xorl %edx,%edx 1108*c0855eaaSJohn Baldwin 1109bc3d5698SJohn Baldwin movl -144(%rdi),%ecx 1110*c0855eaaSJohn Baldwin 1111bc3d5698SJohn Baldwin movq -160(%rdi),%r8 1112bc3d5698SJohn Baldwin cmpl %edx,%ecx 1113*c0855eaaSJohn Baldwin 1114bc3d5698SJohn Baldwin movq -152(%rdi),%rbx 1115bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1116bc3d5698SJohn Baldwin testl %ecx,%ecx 1117*c0855eaaSJohn Baldwin 1118bc3d5698SJohn Baldwin vmovdqu -136(%rdi),%xmm2 1119bc3d5698SJohn Baldwin movl %ecx,32(%rsp) 1120bc3d5698SJohn Baldwin cmovleq %rsp,%r8 1121bc3d5698SJohn Baldwin subq %r8,%rbx 1122bc3d5698SJohn Baldwin movq %rbx,64(%rsp) 1123bc3d5698SJohn Baldwin vmovdqu %xmm2,192(%rsp) 1124*c0855eaaSJohn Baldwin 1125bc3d5698SJohn Baldwin movl -104(%rdi),%ecx 1126*c0855eaaSJohn Baldwin 1127bc3d5698SJohn Baldwin movq -120(%rdi),%r9 1128bc3d5698SJohn Baldwin cmpl %edx,%ecx 1129*c0855eaaSJohn Baldwin 1130bc3d5698SJohn Baldwin movq -112(%rdi),%rbp 1131bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1132bc3d5698SJohn Baldwin testl %ecx,%ecx 1133*c0855eaaSJohn Baldwin 1134bc3d5698SJohn Baldwin vmovdqu -96(%rdi),%xmm3 1135bc3d5698SJohn Baldwin movl %ecx,36(%rsp) 1136bc3d5698SJohn Baldwin cmovleq %rsp,%r9 1137bc3d5698SJohn Baldwin subq %r9,%rbp 1138bc3d5698SJohn Baldwin movq %rbp,72(%rsp) 1139bc3d5698SJohn Baldwin vmovdqu %xmm3,208(%rsp) 1140*c0855eaaSJohn Baldwin 1141bc3d5698SJohn Baldwin movl -64(%rdi),%ecx 1142*c0855eaaSJohn Baldwin 1143bc3d5698SJohn Baldwin movq -80(%rdi),%r10 1144bc3d5698SJohn Baldwin cmpl %edx,%ecx 1145*c0855eaaSJohn Baldwin 1146bc3d5698SJohn Baldwin movq -72(%rdi),%rbp 1147bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1148bc3d5698SJohn Baldwin testl %ecx,%ecx 1149*c0855eaaSJohn Baldwin 1150bc3d5698SJohn Baldwin vmovdqu -56(%rdi),%xmm4 1151bc3d5698SJohn Baldwin movl %ecx,40(%rsp) 1152bc3d5698SJohn Baldwin cmovleq %rsp,%r10 1153bc3d5698SJohn Baldwin subq %r10,%rbp 1154bc3d5698SJohn Baldwin movq %rbp,80(%rsp) 1155bc3d5698SJohn Baldwin vmovdqu %xmm4,224(%rsp) 1156*c0855eaaSJohn Baldwin 1157bc3d5698SJohn Baldwin movl -24(%rdi),%ecx 1158*c0855eaaSJohn Baldwin 1159bc3d5698SJohn Baldwin movq -40(%rdi),%r11 1160bc3d5698SJohn Baldwin cmpl %edx,%ecx 1161*c0855eaaSJohn Baldwin 1162bc3d5698SJohn Baldwin movq -32(%rdi),%rbp 1163bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1164bc3d5698SJohn Baldwin testl %ecx,%ecx 1165*c0855eaaSJohn Baldwin 1166bc3d5698SJohn Baldwin vmovdqu -16(%rdi),%xmm5 1167bc3d5698SJohn Baldwin movl %ecx,44(%rsp) 1168bc3d5698SJohn Baldwin cmovleq %rsp,%r11 1169bc3d5698SJohn Baldwin subq %r11,%rbp 1170bc3d5698SJohn Baldwin movq %rbp,88(%rsp) 1171bc3d5698SJohn Baldwin vmovdqu %xmm5,240(%rsp) 1172*c0855eaaSJohn Baldwin 1173bc3d5698SJohn Baldwin movl 16(%rdi),%ecx 1174*c0855eaaSJohn Baldwin 1175bc3d5698SJohn Baldwin movq 0(%rdi),%r12 1176bc3d5698SJohn Baldwin cmpl %edx,%ecx 1177*c0855eaaSJohn Baldwin 1178bc3d5698SJohn Baldwin movq 8(%rdi),%rbp 1179bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1180bc3d5698SJohn Baldwin testl %ecx,%ecx 1181*c0855eaaSJohn Baldwin 1182bc3d5698SJohn Baldwin vmovdqu 24(%rdi),%xmm6 1183bc3d5698SJohn Baldwin movl %ecx,48(%rsp) 1184bc3d5698SJohn Baldwin cmovleq %rsp,%r12 1185bc3d5698SJohn Baldwin subq %r12,%rbp 1186bc3d5698SJohn Baldwin movq %rbp,96(%rsp) 1187bc3d5698SJohn Baldwin vmovdqu %xmm6,256(%rsp) 1188*c0855eaaSJohn Baldwin 1189bc3d5698SJohn Baldwin movl 56(%rdi),%ecx 1190*c0855eaaSJohn Baldwin 1191bc3d5698SJohn Baldwin movq 40(%rdi),%r13 1192bc3d5698SJohn Baldwin cmpl %edx,%ecx 1193*c0855eaaSJohn Baldwin 1194bc3d5698SJohn Baldwin movq 48(%rdi),%rbp 1195bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1196bc3d5698SJohn Baldwin testl %ecx,%ecx 1197*c0855eaaSJohn Baldwin 1198bc3d5698SJohn Baldwin vmovdqu 64(%rdi),%xmm7 1199bc3d5698SJohn Baldwin movl %ecx,52(%rsp) 1200bc3d5698SJohn Baldwin cmovleq %rsp,%r13 1201bc3d5698SJohn Baldwin subq %r13,%rbp 1202bc3d5698SJohn Baldwin movq %rbp,104(%rsp) 1203bc3d5698SJohn Baldwin vmovdqu %xmm7,272(%rsp) 1204*c0855eaaSJohn Baldwin 1205bc3d5698SJohn Baldwin movl 96(%rdi),%ecx 1206*c0855eaaSJohn Baldwin 1207bc3d5698SJohn Baldwin movq 80(%rdi),%r14 1208bc3d5698SJohn Baldwin cmpl %edx,%ecx 1209*c0855eaaSJohn Baldwin 1210bc3d5698SJohn Baldwin movq 88(%rdi),%rbp 1211bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1212bc3d5698SJohn Baldwin testl %ecx,%ecx 1213*c0855eaaSJohn Baldwin 1214bc3d5698SJohn Baldwin vmovdqu 104(%rdi),%xmm8 1215bc3d5698SJohn Baldwin movl %ecx,56(%rsp) 1216bc3d5698SJohn Baldwin cmovleq %rsp,%r14 1217bc3d5698SJohn Baldwin subq %r14,%rbp 1218bc3d5698SJohn Baldwin movq %rbp,112(%rsp) 1219bc3d5698SJohn Baldwin vmovdqu %xmm8,288(%rsp) 1220*c0855eaaSJohn Baldwin 1221bc3d5698SJohn Baldwin movl 136(%rdi),%ecx 1222*c0855eaaSJohn Baldwin 1223bc3d5698SJohn Baldwin movq 120(%rdi),%r15 1224bc3d5698SJohn Baldwin cmpl %edx,%ecx 1225*c0855eaaSJohn Baldwin 1226bc3d5698SJohn Baldwin movq 128(%rdi),%rbp 1227bc3d5698SJohn Baldwin cmovgl %ecx,%edx 1228bc3d5698SJohn Baldwin testl %ecx,%ecx 1229*c0855eaaSJohn Baldwin 1230bc3d5698SJohn Baldwin vmovdqu 144(%rdi),%xmm9 1231bc3d5698SJohn Baldwin movl %ecx,60(%rsp) 1232bc3d5698SJohn Baldwin cmovleq %rsp,%r15 1233bc3d5698SJohn Baldwin subq %r15,%rbp 1234bc3d5698SJohn Baldwin movq %rbp,120(%rsp) 1235bc3d5698SJohn Baldwin vmovdqu %xmm9,304(%rsp) 1236bc3d5698SJohn Baldwin testl %edx,%edx 1237bc3d5698SJohn Baldwin jz .Ldec8x_done 1238bc3d5698SJohn Baldwin 1239bc3d5698SJohn Baldwin vmovups 16-120(%rsi),%xmm1 1240bc3d5698SJohn Baldwin vmovups 32-120(%rsi),%xmm0 1241bc3d5698SJohn Baldwin movl 240-120(%rsi),%eax 1242bc3d5698SJohn Baldwin leaq 192+128(%rsp),%rbp 1243bc3d5698SJohn Baldwin 1244bc3d5698SJohn Baldwin vmovdqu (%r8),%xmm2 1245bc3d5698SJohn Baldwin vmovdqu (%r9),%xmm3 1246bc3d5698SJohn Baldwin vmovdqu (%r10),%xmm4 1247bc3d5698SJohn Baldwin vmovdqu (%r11),%xmm5 1248bc3d5698SJohn Baldwin vmovdqu (%r12),%xmm6 1249bc3d5698SJohn Baldwin vmovdqu (%r13),%xmm7 1250bc3d5698SJohn Baldwin vmovdqu (%r14),%xmm8 1251bc3d5698SJohn Baldwin vmovdqu (%r15),%xmm9 1252bc3d5698SJohn Baldwin vmovdqu %xmm2,0(%rbp) 1253bc3d5698SJohn Baldwin vpxor %xmm15,%xmm2,%xmm2 1254bc3d5698SJohn Baldwin vmovdqu %xmm3,16(%rbp) 1255bc3d5698SJohn Baldwin vpxor %xmm15,%xmm3,%xmm3 1256bc3d5698SJohn Baldwin vmovdqu %xmm4,32(%rbp) 1257bc3d5698SJohn Baldwin vpxor %xmm15,%xmm4,%xmm4 1258bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rbp) 1259bc3d5698SJohn Baldwin vpxor %xmm15,%xmm5,%xmm5 1260bc3d5698SJohn Baldwin vmovdqu %xmm6,64(%rbp) 1261bc3d5698SJohn Baldwin vpxor %xmm15,%xmm6,%xmm6 1262bc3d5698SJohn Baldwin vmovdqu %xmm7,80(%rbp) 1263bc3d5698SJohn Baldwin vpxor %xmm15,%xmm7,%xmm7 1264bc3d5698SJohn Baldwin vmovdqu %xmm8,96(%rbp) 1265bc3d5698SJohn Baldwin vpxor %xmm15,%xmm8,%xmm8 1266bc3d5698SJohn Baldwin vmovdqu %xmm9,112(%rbp) 1267bc3d5698SJohn Baldwin vpxor %xmm15,%xmm9,%xmm9 1268bc3d5698SJohn Baldwin xorq $0x80,%rbp 1269bc3d5698SJohn Baldwin movl $1,%ecx 1270bc3d5698SJohn Baldwin jmp .Loop_dec8x 1271bc3d5698SJohn Baldwin 1272bc3d5698SJohn Baldwin.align 32 1273bc3d5698SJohn Baldwin.Loop_dec8x: 1274bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1275bc3d5698SJohn Baldwin cmpl 32+0(%rsp),%ecx 1276bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1277bc3d5698SJohn Baldwin prefetcht0 31(%r8) 1278bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1279bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1280bc3d5698SJohn Baldwin leaq (%r8,%rbx,1),%rbx 1281bc3d5698SJohn Baldwin cmovgeq %rsp,%r8 1282bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1283bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1284bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1285bc3d5698SJohn Baldwin subq %r8,%rbx 1286bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1287bc3d5698SJohn Baldwin vmovdqu 16(%r8),%xmm10 1288bc3d5698SJohn Baldwin movq %rbx,64+0(%rsp) 1289bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1290bc3d5698SJohn Baldwin vmovups -72(%rsi),%xmm1 1291bc3d5698SJohn Baldwin leaq 16(%r8,%rbx,1),%r8 1292bc3d5698SJohn Baldwin vmovdqu %xmm10,128(%rsp) 1293bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1294bc3d5698SJohn Baldwin cmpl 32+4(%rsp),%ecx 1295bc3d5698SJohn Baldwin movq 64+8(%rsp),%rbx 1296bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1297bc3d5698SJohn Baldwin prefetcht0 31(%r9) 1298bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1299bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1300bc3d5698SJohn Baldwin leaq (%r9,%rbx,1),%rbx 1301bc3d5698SJohn Baldwin cmovgeq %rsp,%r9 1302bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1303bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1304bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1305bc3d5698SJohn Baldwin subq %r9,%rbx 1306bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1307bc3d5698SJohn Baldwin vmovdqu 16(%r9),%xmm11 1308bc3d5698SJohn Baldwin movq %rbx,64+8(%rsp) 1309bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1310bc3d5698SJohn Baldwin vmovups -56(%rsi),%xmm0 1311bc3d5698SJohn Baldwin leaq 16(%r9,%rbx,1),%r9 1312bc3d5698SJohn Baldwin vmovdqu %xmm11,144(%rsp) 1313bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1314bc3d5698SJohn Baldwin cmpl 32+8(%rsp),%ecx 1315bc3d5698SJohn Baldwin movq 64+16(%rsp),%rbx 1316bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1317bc3d5698SJohn Baldwin prefetcht0 31(%r10) 1318bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1319bc3d5698SJohn Baldwin prefetcht0 15(%r8) 1320bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1321bc3d5698SJohn Baldwin leaq (%r10,%rbx,1),%rbx 1322bc3d5698SJohn Baldwin cmovgeq %rsp,%r10 1323bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1324bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1325bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1326bc3d5698SJohn Baldwin subq %r10,%rbx 1327bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1328bc3d5698SJohn Baldwin vmovdqu 16(%r10),%xmm12 1329bc3d5698SJohn Baldwin movq %rbx,64+16(%rsp) 1330bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1331bc3d5698SJohn Baldwin vmovups -40(%rsi),%xmm1 1332bc3d5698SJohn Baldwin leaq 16(%r10,%rbx,1),%r10 1333bc3d5698SJohn Baldwin vmovdqu %xmm12,160(%rsp) 1334bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1335bc3d5698SJohn Baldwin cmpl 32+12(%rsp),%ecx 1336bc3d5698SJohn Baldwin movq 64+24(%rsp),%rbx 1337bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1338bc3d5698SJohn Baldwin prefetcht0 31(%r11) 1339bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1340bc3d5698SJohn Baldwin prefetcht0 15(%r9) 1341bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1342bc3d5698SJohn Baldwin leaq (%r11,%rbx,1),%rbx 1343bc3d5698SJohn Baldwin cmovgeq %rsp,%r11 1344bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1345bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1346bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1347bc3d5698SJohn Baldwin subq %r11,%rbx 1348bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1349bc3d5698SJohn Baldwin vmovdqu 16(%r11),%xmm13 1350bc3d5698SJohn Baldwin movq %rbx,64+24(%rsp) 1351bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1352bc3d5698SJohn Baldwin vmovups -24(%rsi),%xmm0 1353bc3d5698SJohn Baldwin leaq 16(%r11,%rbx,1),%r11 1354bc3d5698SJohn Baldwin vmovdqu %xmm13,176(%rsp) 1355bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1356bc3d5698SJohn Baldwin cmpl 32+16(%rsp),%ecx 1357bc3d5698SJohn Baldwin movq 64+32(%rsp),%rbx 1358bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1359bc3d5698SJohn Baldwin prefetcht0 31(%r12) 1360bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1361bc3d5698SJohn Baldwin prefetcht0 15(%r10) 1362bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1363bc3d5698SJohn Baldwin leaq (%r12,%rbx,1),%rbx 1364bc3d5698SJohn Baldwin cmovgeq %rsp,%r12 1365bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1366bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1367bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1368bc3d5698SJohn Baldwin subq %r12,%rbx 1369bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1370bc3d5698SJohn Baldwin vmovdqu 16(%r12),%xmm10 1371bc3d5698SJohn Baldwin movq %rbx,64+32(%rsp) 1372bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1373bc3d5698SJohn Baldwin vmovups -8(%rsi),%xmm1 1374bc3d5698SJohn Baldwin leaq 16(%r12,%rbx,1),%r12 1375bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1376bc3d5698SJohn Baldwin cmpl 32+20(%rsp),%ecx 1377bc3d5698SJohn Baldwin movq 64+40(%rsp),%rbx 1378bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1379bc3d5698SJohn Baldwin prefetcht0 31(%r13) 1380bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1381bc3d5698SJohn Baldwin prefetcht0 15(%r11) 1382bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1383bc3d5698SJohn Baldwin leaq (%rbx,%r13,1),%rbx 1384bc3d5698SJohn Baldwin cmovgeq %rsp,%r13 1385bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1386bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1387bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1388bc3d5698SJohn Baldwin subq %r13,%rbx 1389bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1390bc3d5698SJohn Baldwin vmovdqu 16(%r13),%xmm11 1391bc3d5698SJohn Baldwin movq %rbx,64+40(%rsp) 1392bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1393bc3d5698SJohn Baldwin vmovups 8(%rsi),%xmm0 1394bc3d5698SJohn Baldwin leaq 16(%r13,%rbx,1),%r13 1395bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1396bc3d5698SJohn Baldwin cmpl 32+24(%rsp),%ecx 1397bc3d5698SJohn Baldwin movq 64+48(%rsp),%rbx 1398bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1399bc3d5698SJohn Baldwin prefetcht0 31(%r14) 1400bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1401bc3d5698SJohn Baldwin prefetcht0 15(%r12) 1402bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1403bc3d5698SJohn Baldwin leaq (%r14,%rbx,1),%rbx 1404bc3d5698SJohn Baldwin cmovgeq %rsp,%r14 1405bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1406bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1407bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1408bc3d5698SJohn Baldwin subq %r14,%rbx 1409bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1410bc3d5698SJohn Baldwin vmovdqu 16(%r14),%xmm12 1411bc3d5698SJohn Baldwin movq %rbx,64+48(%rsp) 1412bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1413bc3d5698SJohn Baldwin vmovups 24(%rsi),%xmm1 1414bc3d5698SJohn Baldwin leaq 16(%r14,%rbx,1),%r14 1415bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1416bc3d5698SJohn Baldwin cmpl 32+28(%rsp),%ecx 1417bc3d5698SJohn Baldwin movq 64+56(%rsp),%rbx 1418bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1419bc3d5698SJohn Baldwin prefetcht0 31(%r15) 1420bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1421bc3d5698SJohn Baldwin prefetcht0 15(%r13) 1422bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1423bc3d5698SJohn Baldwin leaq (%r15,%rbx,1),%rbx 1424bc3d5698SJohn Baldwin cmovgeq %rsp,%r15 1425bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1426bc3d5698SJohn Baldwin cmovgq %rsp,%rbx 1427bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1428bc3d5698SJohn Baldwin subq %r15,%rbx 1429bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1430bc3d5698SJohn Baldwin vmovdqu 16(%r15),%xmm13 1431bc3d5698SJohn Baldwin movq %rbx,64+56(%rsp) 1432bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1433bc3d5698SJohn Baldwin vmovups 40(%rsi),%xmm0 1434bc3d5698SJohn Baldwin leaq 16(%r15,%rbx,1),%r15 1435bc3d5698SJohn Baldwin vmovdqu 32(%rsp),%xmm14 1436bc3d5698SJohn Baldwin prefetcht0 15(%r14) 1437bc3d5698SJohn Baldwin prefetcht0 15(%r15) 1438bc3d5698SJohn Baldwin cmpl $11,%eax 1439bc3d5698SJohn Baldwin jb .Ldec8x_tail 1440bc3d5698SJohn Baldwin 1441bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1442bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1443bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1444bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1445bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1446bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1447bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1448bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1449bc3d5698SJohn Baldwin vmovups 176-120(%rsi),%xmm1 1450bc3d5698SJohn Baldwin 1451bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1452bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1453bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1454bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1455bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1456bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1457bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1458bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1459bc3d5698SJohn Baldwin vmovups 192-120(%rsi),%xmm0 1460bc3d5698SJohn Baldwin je .Ldec8x_tail 1461bc3d5698SJohn Baldwin 1462bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1463bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1464bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1465bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1466bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1467bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1468bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1469bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1470bc3d5698SJohn Baldwin vmovups 208-120(%rsi),%xmm1 1471bc3d5698SJohn Baldwin 1472bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm2,%xmm2 1473bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm3,%xmm3 1474bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm4,%xmm4 1475bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm5,%xmm5 1476bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm6,%xmm6 1477bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm7,%xmm7 1478bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm8,%xmm8 1479bc3d5698SJohn Baldwin vaesdec %xmm0,%xmm9,%xmm9 1480bc3d5698SJohn Baldwin vmovups 224-120(%rsi),%xmm0 1481bc3d5698SJohn Baldwin 1482bc3d5698SJohn Baldwin.Ldec8x_tail: 1483bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm2,%xmm2 1484bc3d5698SJohn Baldwin vpxor %xmm15,%xmm15,%xmm15 1485bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm3,%xmm3 1486bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm4,%xmm4 1487bc3d5698SJohn Baldwin vpcmpgtd %xmm15,%xmm14,%xmm15 1488bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm5,%xmm5 1489bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm6,%xmm6 1490bc3d5698SJohn Baldwin vpaddd %xmm14,%xmm15,%xmm15 1491bc3d5698SJohn Baldwin vmovdqu 48(%rsp),%xmm14 1492bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm7,%xmm7 1493bc3d5698SJohn Baldwin movq 64(%rsp),%rbx 1494bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm8,%xmm8 1495bc3d5698SJohn Baldwin vaesdec %xmm1,%xmm9,%xmm9 1496bc3d5698SJohn Baldwin vmovups 16-120(%rsi),%xmm1 1497bc3d5698SJohn Baldwin 1498bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm2,%xmm2 1499bc3d5698SJohn Baldwin vmovdqa %xmm15,32(%rsp) 1500bc3d5698SJohn Baldwin vpxor %xmm15,%xmm15,%xmm15 1501bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm3,%xmm3 1502bc3d5698SJohn Baldwin vpxor 0(%rbp),%xmm2,%xmm2 1503bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm4,%xmm4 1504bc3d5698SJohn Baldwin vpxor 16(%rbp),%xmm3,%xmm3 1505bc3d5698SJohn Baldwin vpcmpgtd %xmm15,%xmm14,%xmm15 1506bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm5,%xmm5 1507bc3d5698SJohn Baldwin vpxor 32(%rbp),%xmm4,%xmm4 1508bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm6,%xmm6 1509bc3d5698SJohn Baldwin vpxor 48(%rbp),%xmm5,%xmm5 1510bc3d5698SJohn Baldwin vpaddd %xmm15,%xmm14,%xmm14 1511bc3d5698SJohn Baldwin vmovdqu -120(%rsi),%xmm15 1512bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm7,%xmm7 1513bc3d5698SJohn Baldwin vpxor 64(%rbp),%xmm6,%xmm6 1514bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm8,%xmm8 1515bc3d5698SJohn Baldwin vpxor 80(%rbp),%xmm7,%xmm7 1516bc3d5698SJohn Baldwin vmovdqa %xmm14,48(%rsp) 1517bc3d5698SJohn Baldwin vaesdeclast %xmm0,%xmm9,%xmm9 1518bc3d5698SJohn Baldwin vpxor 96(%rbp),%xmm8,%xmm8 1519bc3d5698SJohn Baldwin vmovups 32-120(%rsi),%xmm0 1520bc3d5698SJohn Baldwin 1521bc3d5698SJohn Baldwin vmovups %xmm2,-16(%r8) 1522bc3d5698SJohn Baldwin subq %rbx,%r8 1523bc3d5698SJohn Baldwin vmovdqu 128+0(%rsp),%xmm2 1524bc3d5698SJohn Baldwin vpxor 112(%rbp),%xmm9,%xmm9 1525bc3d5698SJohn Baldwin vmovups %xmm3,-16(%r9) 1526bc3d5698SJohn Baldwin subq 72(%rsp),%r9 1527bc3d5698SJohn Baldwin vmovdqu %xmm2,0(%rbp) 1528bc3d5698SJohn Baldwin vpxor %xmm15,%xmm2,%xmm2 1529bc3d5698SJohn Baldwin vmovdqu 128+16(%rsp),%xmm3 1530bc3d5698SJohn Baldwin vmovups %xmm4,-16(%r10) 1531bc3d5698SJohn Baldwin subq 80(%rsp),%r10 1532bc3d5698SJohn Baldwin vmovdqu %xmm3,16(%rbp) 1533bc3d5698SJohn Baldwin vpxor %xmm15,%xmm3,%xmm3 1534bc3d5698SJohn Baldwin vmovdqu 128+32(%rsp),%xmm4 1535bc3d5698SJohn Baldwin vmovups %xmm5,-16(%r11) 1536bc3d5698SJohn Baldwin subq 88(%rsp),%r11 1537bc3d5698SJohn Baldwin vmovdqu %xmm4,32(%rbp) 1538bc3d5698SJohn Baldwin vpxor %xmm15,%xmm4,%xmm4 1539bc3d5698SJohn Baldwin vmovdqu 128+48(%rsp),%xmm5 1540bc3d5698SJohn Baldwin vmovups %xmm6,-16(%r12) 1541bc3d5698SJohn Baldwin subq 96(%rsp),%r12 1542bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rbp) 1543bc3d5698SJohn Baldwin vpxor %xmm15,%xmm5,%xmm5 1544bc3d5698SJohn Baldwin vmovdqu %xmm10,64(%rbp) 1545bc3d5698SJohn Baldwin vpxor %xmm10,%xmm15,%xmm6 1546bc3d5698SJohn Baldwin vmovups %xmm7,-16(%r13) 1547bc3d5698SJohn Baldwin subq 104(%rsp),%r13 1548bc3d5698SJohn Baldwin vmovdqu %xmm11,80(%rbp) 1549bc3d5698SJohn Baldwin vpxor %xmm11,%xmm15,%xmm7 1550bc3d5698SJohn Baldwin vmovups %xmm8,-16(%r14) 1551bc3d5698SJohn Baldwin subq 112(%rsp),%r14 1552bc3d5698SJohn Baldwin vmovdqu %xmm12,96(%rbp) 1553bc3d5698SJohn Baldwin vpxor %xmm12,%xmm15,%xmm8 1554bc3d5698SJohn Baldwin vmovups %xmm9,-16(%r15) 1555bc3d5698SJohn Baldwin subq 120(%rsp),%r15 1556bc3d5698SJohn Baldwin vmovdqu %xmm13,112(%rbp) 1557bc3d5698SJohn Baldwin vpxor %xmm13,%xmm15,%xmm9 1558bc3d5698SJohn Baldwin 1559bc3d5698SJohn Baldwin xorq $128,%rbp 1560bc3d5698SJohn Baldwin decl %edx 1561bc3d5698SJohn Baldwin jnz .Loop_dec8x 1562bc3d5698SJohn Baldwin 1563bc3d5698SJohn Baldwin movq 16(%rsp),%rax 1564bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1565bc3d5698SJohn Baldwin 1566bc3d5698SJohn Baldwin 1567bc3d5698SJohn Baldwin 1568bc3d5698SJohn Baldwin 1569bc3d5698SJohn Baldwin 1570bc3d5698SJohn Baldwin.Ldec8x_done: 1571bc3d5698SJohn Baldwin vzeroupper 1572bc3d5698SJohn Baldwin movq -48(%rax),%r15 1573bc3d5698SJohn Baldwin.cfi_restore %r15 1574bc3d5698SJohn Baldwin movq -40(%rax),%r14 1575bc3d5698SJohn Baldwin.cfi_restore %r14 1576bc3d5698SJohn Baldwin movq -32(%rax),%r13 1577bc3d5698SJohn Baldwin.cfi_restore %r13 1578bc3d5698SJohn Baldwin movq -24(%rax),%r12 1579bc3d5698SJohn Baldwin.cfi_restore %r12 1580bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1581bc3d5698SJohn Baldwin.cfi_restore %rbp 1582bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1583bc3d5698SJohn Baldwin.cfi_restore %rbx 1584bc3d5698SJohn Baldwin leaq (%rax),%rsp 1585bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1586bc3d5698SJohn Baldwin.Ldec8x_epilogue: 1587bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1588bc3d5698SJohn Baldwin.cfi_endproc 1589bc3d5698SJohn Baldwin.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx 1590*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 1591*c0855eaaSJohn Baldwin .p2align 3 1592*c0855eaaSJohn Baldwin .long 1f - 0f 1593*c0855eaaSJohn Baldwin .long 4f - 1f 1594*c0855eaaSJohn Baldwin .long 5 1595*c0855eaaSJohn Baldwin0: 1596*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 1597*c0855eaaSJohn Baldwin # on Solaris. 1598*c0855eaaSJohn Baldwin .byte 0x47 1599*c0855eaaSJohn Baldwin .byte 0x4e 1600*c0855eaaSJohn Baldwin .byte 0x55 1601*c0855eaaSJohn Baldwin .byte 0 1602*c0855eaaSJohn Baldwin1: 1603*c0855eaaSJohn Baldwin .p2align 3 1604*c0855eaaSJohn Baldwin .long 0xc0000002 1605*c0855eaaSJohn Baldwin .long 3f - 2f 1606*c0855eaaSJohn Baldwin2: 1607*c0855eaaSJohn Baldwin .long 3 1608*c0855eaaSJohn Baldwin3: 1609*c0855eaaSJohn Baldwin .p2align 3 1610*c0855eaaSJohn Baldwin4: 1611