1/* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */ 2.text 3 4 5 6.globl aesni_multi_cbc_encrypt 7.type aesni_multi_cbc_encrypt,@function 8.align 32 9aesni_multi_cbc_encrypt: 10.cfi_startproc 11 cmpl $2,%edx 12 jb .Lenc_non_avx 13 movl OPENSSL_ia32cap_P+4(%rip),%ecx 14 testl $268435456,%ecx 15 jnz _avx_cbc_enc_shortcut 16 jmp .Lenc_non_avx 17.align 16 18.Lenc_non_avx: 19 movq %rsp,%rax 20.cfi_def_cfa_register %rax 21 pushq %rbx 22.cfi_offset %rbx,-16 23 pushq %rbp 24.cfi_offset %rbp,-24 25 pushq %r12 26.cfi_offset %r12,-32 27 pushq %r13 28.cfi_offset %r13,-40 29 pushq %r14 30.cfi_offset %r14,-48 31 pushq %r15 32.cfi_offset %r15,-56 33 34 35 36 37 38 39 subq $48,%rsp 40 andq $-64,%rsp 41 movq %rax,16(%rsp) 42.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 43 44.Lenc4x_body: 45 movdqu (%rsi),%xmm12 46 leaq 120(%rsi),%rsi 47 leaq 80(%rdi),%rdi 48 49.Lenc4x_loop_grande: 50 movl %edx,24(%rsp) 51 xorl %edx,%edx 52 53 movl -64(%rdi),%ecx 54 movq -80(%rdi),%r8 55 cmpl %edx,%ecx 56 movq -72(%rdi),%r12 57 cmovgl %ecx,%edx 58 testl %ecx,%ecx 59 60 movdqu -56(%rdi),%xmm2 61 movl %ecx,32(%rsp) 62 cmovleq %rsp,%r8 63 64 movl -24(%rdi),%ecx 65 movq -40(%rdi),%r9 66 cmpl %edx,%ecx 67 movq -32(%rdi),%r13 68 cmovgl %ecx,%edx 69 testl %ecx,%ecx 70 71 movdqu -16(%rdi),%xmm3 72 movl %ecx,36(%rsp) 73 cmovleq %rsp,%r9 74 75 movl 16(%rdi),%ecx 76 movq 0(%rdi),%r10 77 cmpl %edx,%ecx 78 movq 8(%rdi),%r14 79 cmovgl %ecx,%edx 80 testl %ecx,%ecx 81 82 movdqu 24(%rdi),%xmm4 83 movl %ecx,40(%rsp) 84 cmovleq %rsp,%r10 85 86 movl 56(%rdi),%ecx 87 movq 40(%rdi),%r11 88 cmpl %edx,%ecx 89 movq 48(%rdi),%r15 90 cmovgl %ecx,%edx 91 testl %ecx,%ecx 92 93 movdqu 64(%rdi),%xmm5 94 movl %ecx,44(%rsp) 95 cmovleq %rsp,%r11 96 testl %edx,%edx 97 jz .Lenc4x_done 98 99 movups 16-120(%rsi),%xmm1 100 pxor %xmm12,%xmm2 101 movups 32-120(%rsi),%xmm0 102 pxor %xmm12,%xmm3 103 movl 240-120(%rsi),%eax 104 pxor %xmm12,%xmm4 105 movdqu (%r8),%xmm6 106 pxor %xmm12,%xmm5 107 movdqu (%r9),%xmm7 108 pxor %xmm6,%xmm2 109 movdqu (%r10),%xmm8 110 pxor %xmm7,%xmm3 111 movdqu (%r11),%xmm9 112 pxor %xmm8,%xmm4 113 pxor %xmm9,%xmm5 114 movdqa 32(%rsp),%xmm10 115 xorq %rbx,%rbx 116 jmp .Loop_enc4x 117 118.align 32 119.Loop_enc4x: 120 addq $16,%rbx 121 leaq 16(%rsp),%rbp 122 movl $1,%ecx 123 subq %rbx,%rbp 124 125.byte 102,15,56,220,209 126 prefetcht0 31(%r8,%rbx,1) 127 prefetcht0 31(%r9,%rbx,1) 128.byte 102,15,56,220,217 129 prefetcht0 31(%r10,%rbx,1) 130 prefetcht0 31(%r10,%rbx,1) 131.byte 102,15,56,220,225 132.byte 102,15,56,220,233 133 movups 48-120(%rsi),%xmm1 134 cmpl 32(%rsp),%ecx 135.byte 102,15,56,220,208 136.byte 102,15,56,220,216 137.byte 102,15,56,220,224 138 cmovgeq %rbp,%r8 139 cmovgq %rbp,%r12 140.byte 102,15,56,220,232 141 movups -56(%rsi),%xmm0 142 cmpl 36(%rsp),%ecx 143.byte 102,15,56,220,209 144.byte 102,15,56,220,217 145.byte 102,15,56,220,225 146 cmovgeq %rbp,%r9 147 cmovgq %rbp,%r13 148.byte 102,15,56,220,233 149 movups -40(%rsi),%xmm1 150 cmpl 40(%rsp),%ecx 151.byte 102,15,56,220,208 152.byte 102,15,56,220,216 153.byte 102,15,56,220,224 154 cmovgeq %rbp,%r10 155 cmovgq %rbp,%r14 156.byte 102,15,56,220,232 157 movups -24(%rsi),%xmm0 158 cmpl 44(%rsp),%ecx 159.byte 102,15,56,220,209 160.byte 102,15,56,220,217 161.byte 102,15,56,220,225 162 cmovgeq %rbp,%r11 163 cmovgq %rbp,%r15 164.byte 102,15,56,220,233 165 movups -8(%rsi),%xmm1 166 movdqa %xmm10,%xmm11 167.byte 102,15,56,220,208 168 prefetcht0 15(%r12,%rbx,1) 169 prefetcht0 15(%r13,%rbx,1) 170.byte 102,15,56,220,216 171 prefetcht0 15(%r14,%rbx,1) 172 prefetcht0 15(%r15,%rbx,1) 173.byte 102,15,56,220,224 174.byte 102,15,56,220,232 175 movups 128-120(%rsi),%xmm0 176 pxor %xmm12,%xmm12 177 178.byte 102,15,56,220,209 179 pcmpgtd %xmm12,%xmm11 180 movdqu -120(%rsi),%xmm12 181.byte 102,15,56,220,217 182 paddd %xmm11,%xmm10 183 movdqa %xmm10,32(%rsp) 184.byte 102,15,56,220,225 185.byte 102,15,56,220,233 186 movups 144-120(%rsi),%xmm1 187 188 cmpl $11,%eax 189 190.byte 102,15,56,220,208 191.byte 102,15,56,220,216 192.byte 102,15,56,220,224 193.byte 102,15,56,220,232 194 movups 160-120(%rsi),%xmm0 195 196 jb .Lenc4x_tail 197 198.byte 102,15,56,220,209 199.byte 102,15,56,220,217 200.byte 102,15,56,220,225 201.byte 102,15,56,220,233 202 movups 176-120(%rsi),%xmm1 203 204.byte 102,15,56,220,208 205.byte 102,15,56,220,216 206.byte 102,15,56,220,224 207.byte 102,15,56,220,232 208 movups 192-120(%rsi),%xmm0 209 210 je .Lenc4x_tail 211 212.byte 102,15,56,220,209 213.byte 102,15,56,220,217 214.byte 102,15,56,220,225 215.byte 102,15,56,220,233 216 movups 208-120(%rsi),%xmm1 217 218.byte 102,15,56,220,208 219.byte 102,15,56,220,216 220.byte 102,15,56,220,224 221.byte 102,15,56,220,232 222 movups 224-120(%rsi),%xmm0 223 jmp .Lenc4x_tail 224 225.align 32 226.Lenc4x_tail: 227.byte 102,15,56,220,209 228.byte 102,15,56,220,217 229.byte 102,15,56,220,225 230.byte 102,15,56,220,233 231 movdqu (%r8,%rbx,1),%xmm6 232 movdqu 16-120(%rsi),%xmm1 233 234.byte 102,15,56,221,208 235 movdqu (%r9,%rbx,1),%xmm7 236 pxor %xmm12,%xmm6 237.byte 102,15,56,221,216 238 movdqu (%r10,%rbx,1),%xmm8 239 pxor %xmm12,%xmm7 240.byte 102,15,56,221,224 241 movdqu (%r11,%rbx,1),%xmm9 242 pxor %xmm12,%xmm8 243.byte 102,15,56,221,232 244 movdqu 32-120(%rsi),%xmm0 245 pxor %xmm12,%xmm9 246 247 movups %xmm2,-16(%r12,%rbx,1) 248 pxor %xmm6,%xmm2 249 movups %xmm3,-16(%r13,%rbx,1) 250 pxor %xmm7,%xmm3 251 movups %xmm4,-16(%r14,%rbx,1) 252 pxor %xmm8,%xmm4 253 movups %xmm5,-16(%r15,%rbx,1) 254 pxor %xmm9,%xmm5 255 256 decl %edx 257 jnz .Loop_enc4x 258 259 movq 16(%rsp),%rax 260.cfi_def_cfa %rax,8 261 movl 24(%rsp),%edx 262 263 264 265 266 267 268 269 270 271 272 273 leaq 160(%rdi),%rdi 274 decl %edx 275 jnz .Lenc4x_loop_grande 276 277.Lenc4x_done: 278 movq -48(%rax),%r15 279.cfi_restore %r15 280 movq -40(%rax),%r14 281.cfi_restore %r14 282 movq -32(%rax),%r13 283.cfi_restore %r13 284 movq -24(%rax),%r12 285.cfi_restore %r12 286 movq -16(%rax),%rbp 287.cfi_restore %rbp 288 movq -8(%rax),%rbx 289.cfi_restore %rbx 290 leaq (%rax),%rsp 291.cfi_def_cfa_register %rsp 292.Lenc4x_epilogue: 293 .byte 0xf3,0xc3 294.cfi_endproc 295.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt 296 297.globl aesni_multi_cbc_decrypt 298.type aesni_multi_cbc_decrypt,@function 299.align 32 300aesni_multi_cbc_decrypt: 301.cfi_startproc 302 cmpl $2,%edx 303 jb .Ldec_non_avx 304 movl OPENSSL_ia32cap_P+4(%rip),%ecx 305 testl $268435456,%ecx 306 jnz _avx_cbc_dec_shortcut 307 jmp .Ldec_non_avx 308.align 16 309.Ldec_non_avx: 310 movq %rsp,%rax 311.cfi_def_cfa_register %rax 312 pushq %rbx 313.cfi_offset %rbx,-16 314 pushq %rbp 315.cfi_offset %rbp,-24 316 pushq %r12 317.cfi_offset %r12,-32 318 pushq %r13 319.cfi_offset %r13,-40 320 pushq %r14 321.cfi_offset %r14,-48 322 pushq %r15 323.cfi_offset %r15,-56 324 325 326 327 328 329 330 subq $48,%rsp 331 andq $-64,%rsp 332 movq %rax,16(%rsp) 333.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 334 335.Ldec4x_body: 336 movdqu (%rsi),%xmm12 337 leaq 120(%rsi),%rsi 338 leaq 80(%rdi),%rdi 339 340.Ldec4x_loop_grande: 341 movl %edx,24(%rsp) 342 xorl %edx,%edx 343 344 movl -64(%rdi),%ecx 345 movq -80(%rdi),%r8 346 cmpl %edx,%ecx 347 movq -72(%rdi),%r12 348 cmovgl %ecx,%edx 349 testl %ecx,%ecx 350 351 movdqu -56(%rdi),%xmm6 352 movl %ecx,32(%rsp) 353 cmovleq %rsp,%r8 354 355 movl -24(%rdi),%ecx 356 movq -40(%rdi),%r9 357 cmpl %edx,%ecx 358 movq -32(%rdi),%r13 359 cmovgl %ecx,%edx 360 testl %ecx,%ecx 361 362 movdqu -16(%rdi),%xmm7 363 movl %ecx,36(%rsp) 364 cmovleq %rsp,%r9 365 366 movl 16(%rdi),%ecx 367 movq 0(%rdi),%r10 368 cmpl %edx,%ecx 369 movq 8(%rdi),%r14 370 cmovgl %ecx,%edx 371 testl %ecx,%ecx 372 373 movdqu 24(%rdi),%xmm8 374 movl %ecx,40(%rsp) 375 cmovleq %rsp,%r10 376 377 movl 56(%rdi),%ecx 378 movq 40(%rdi),%r11 379 cmpl %edx,%ecx 380 movq 48(%rdi),%r15 381 cmovgl %ecx,%edx 382 testl %ecx,%ecx 383 384 movdqu 64(%rdi),%xmm9 385 movl %ecx,44(%rsp) 386 cmovleq %rsp,%r11 387 testl %edx,%edx 388 jz .Ldec4x_done 389 390 movups 16-120(%rsi),%xmm1 391 movups 32-120(%rsi),%xmm0 392 movl 240-120(%rsi),%eax 393 movdqu (%r8),%xmm2 394 movdqu (%r9),%xmm3 395 pxor %xmm12,%xmm2 396 movdqu (%r10),%xmm4 397 pxor %xmm12,%xmm3 398 movdqu (%r11),%xmm5 399 pxor %xmm12,%xmm4 400 pxor %xmm12,%xmm5 401 movdqa 32(%rsp),%xmm10 402 xorq %rbx,%rbx 403 jmp .Loop_dec4x 404 405.align 32 406.Loop_dec4x: 407 addq $16,%rbx 408 leaq 16(%rsp),%rbp 409 movl $1,%ecx 410 subq %rbx,%rbp 411 412.byte 102,15,56,222,209 413 prefetcht0 31(%r8,%rbx,1) 414 prefetcht0 31(%r9,%rbx,1) 415.byte 102,15,56,222,217 416 prefetcht0 31(%r10,%rbx,1) 417 prefetcht0 31(%r11,%rbx,1) 418.byte 102,15,56,222,225 419.byte 102,15,56,222,233 420 movups 48-120(%rsi),%xmm1 421 cmpl 32(%rsp),%ecx 422.byte 102,15,56,222,208 423.byte 102,15,56,222,216 424.byte 102,15,56,222,224 425 cmovgeq %rbp,%r8 426 cmovgq %rbp,%r12 427.byte 102,15,56,222,232 428 movups -56(%rsi),%xmm0 429 cmpl 36(%rsp),%ecx 430.byte 102,15,56,222,209 431.byte 102,15,56,222,217 432.byte 102,15,56,222,225 433 cmovgeq %rbp,%r9 434 cmovgq %rbp,%r13 435.byte 102,15,56,222,233 436 movups -40(%rsi),%xmm1 437 cmpl 40(%rsp),%ecx 438.byte 102,15,56,222,208 439.byte 102,15,56,222,216 440.byte 102,15,56,222,224 441 cmovgeq %rbp,%r10 442 cmovgq %rbp,%r14 443.byte 102,15,56,222,232 444 movups -24(%rsi),%xmm0 445 cmpl 44(%rsp),%ecx 446.byte 102,15,56,222,209 447.byte 102,15,56,222,217 448.byte 102,15,56,222,225 449 cmovgeq %rbp,%r11 450 cmovgq %rbp,%r15 451.byte 102,15,56,222,233 452 movups -8(%rsi),%xmm1 453 movdqa %xmm10,%xmm11 454.byte 102,15,56,222,208 455 prefetcht0 15(%r12,%rbx,1) 456 prefetcht0 15(%r13,%rbx,1) 457.byte 102,15,56,222,216 458 prefetcht0 15(%r14,%rbx,1) 459 prefetcht0 15(%r15,%rbx,1) 460.byte 102,15,56,222,224 461.byte 102,15,56,222,232 462 movups 128-120(%rsi),%xmm0 463 pxor %xmm12,%xmm12 464 465.byte 102,15,56,222,209 466 pcmpgtd %xmm12,%xmm11 467 movdqu -120(%rsi),%xmm12 468.byte 102,15,56,222,217 469 paddd %xmm11,%xmm10 470 movdqa %xmm10,32(%rsp) 471.byte 102,15,56,222,225 472.byte 102,15,56,222,233 473 movups 144-120(%rsi),%xmm1 474 475 cmpl $11,%eax 476 477.byte 102,15,56,222,208 478.byte 102,15,56,222,216 479.byte 102,15,56,222,224 480.byte 102,15,56,222,232 481 movups 160-120(%rsi),%xmm0 482 483 jb .Ldec4x_tail 484 485.byte 102,15,56,222,209 486.byte 102,15,56,222,217 487.byte 102,15,56,222,225 488.byte 102,15,56,222,233 489 movups 176-120(%rsi),%xmm1 490 491.byte 102,15,56,222,208 492.byte 102,15,56,222,216 493.byte 102,15,56,222,224 494.byte 102,15,56,222,232 495 movups 192-120(%rsi),%xmm0 496 497 je .Ldec4x_tail 498 499.byte 102,15,56,222,209 500.byte 102,15,56,222,217 501.byte 102,15,56,222,225 502.byte 102,15,56,222,233 503 movups 208-120(%rsi),%xmm1 504 505.byte 102,15,56,222,208 506.byte 102,15,56,222,216 507.byte 102,15,56,222,224 508.byte 102,15,56,222,232 509 movups 224-120(%rsi),%xmm0 510 jmp .Ldec4x_tail 511 512.align 32 513.Ldec4x_tail: 514.byte 102,15,56,222,209 515.byte 102,15,56,222,217 516.byte 102,15,56,222,225 517 pxor %xmm0,%xmm6 518 pxor %xmm0,%xmm7 519.byte 102,15,56,222,233 520 movdqu 16-120(%rsi),%xmm1 521 pxor %xmm0,%xmm8 522 pxor %xmm0,%xmm9 523 movdqu 32-120(%rsi),%xmm0 524 525.byte 102,15,56,223,214 526.byte 102,15,56,223,223 527 movdqu -16(%r8,%rbx,1),%xmm6 528 movdqu -16(%r9,%rbx,1),%xmm7 529.byte 102,65,15,56,223,224 530.byte 102,65,15,56,223,233 531 movdqu -16(%r10,%rbx,1),%xmm8 532 movdqu -16(%r11,%rbx,1),%xmm9 533 534 movups %xmm2,-16(%r12,%rbx,1) 535 movdqu (%r8,%rbx,1),%xmm2 536 movups %xmm3,-16(%r13,%rbx,1) 537 movdqu (%r9,%rbx,1),%xmm3 538 pxor %xmm12,%xmm2 539 movups %xmm4,-16(%r14,%rbx,1) 540 movdqu (%r10,%rbx,1),%xmm4 541 pxor %xmm12,%xmm3 542 movups %xmm5,-16(%r15,%rbx,1) 543 movdqu (%r11,%rbx,1),%xmm5 544 pxor %xmm12,%xmm4 545 pxor %xmm12,%xmm5 546 547 decl %edx 548 jnz .Loop_dec4x 549 550 movq 16(%rsp),%rax 551.cfi_def_cfa %rax,8 552 movl 24(%rsp),%edx 553 554 leaq 160(%rdi),%rdi 555 decl %edx 556 jnz .Ldec4x_loop_grande 557 558.Ldec4x_done: 559 movq -48(%rax),%r15 560.cfi_restore %r15 561 movq -40(%rax),%r14 562.cfi_restore %r14 563 movq -32(%rax),%r13 564.cfi_restore %r13 565 movq -24(%rax),%r12 566.cfi_restore %r12 567 movq -16(%rax),%rbp 568.cfi_restore %rbp 569 movq -8(%rax),%rbx 570.cfi_restore %rbx 571 leaq (%rax),%rsp 572.cfi_def_cfa_register %rsp 573.Ldec4x_epilogue: 574 .byte 0xf3,0xc3 575.cfi_endproc 576.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt 577.type aesni_multi_cbc_encrypt_avx,@function 578.align 32 579aesni_multi_cbc_encrypt_avx: 580.cfi_startproc 581_avx_cbc_enc_shortcut: 582 movq %rsp,%rax 583.cfi_def_cfa_register %rax 584 pushq %rbx 585.cfi_offset %rbx,-16 586 pushq %rbp 587.cfi_offset %rbp,-24 588 pushq %r12 589.cfi_offset %r12,-32 590 pushq %r13 591.cfi_offset %r13,-40 592 pushq %r14 593.cfi_offset %r14,-48 594 pushq %r15 595.cfi_offset %r15,-56 596 597 598 599 600 601 602 603 604 subq $192,%rsp 605 andq $-128,%rsp 606 movq %rax,16(%rsp) 607.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 608 609.Lenc8x_body: 610 vzeroupper 611 vmovdqu (%rsi),%xmm15 612 leaq 120(%rsi),%rsi 613 leaq 160(%rdi),%rdi 614 shrl $1,%edx 615 616.Lenc8x_loop_grande: 617 618 xorl %edx,%edx 619 620 movl -144(%rdi),%ecx 621 622 movq -160(%rdi),%r8 623 cmpl %edx,%ecx 624 625 movq -152(%rdi),%rbx 626 cmovgl %ecx,%edx 627 testl %ecx,%ecx 628 629 vmovdqu -136(%rdi),%xmm2 630 movl %ecx,32(%rsp) 631 cmovleq %rsp,%r8 632 subq %r8,%rbx 633 movq %rbx,64(%rsp) 634 635 movl -104(%rdi),%ecx 636 637 movq -120(%rdi),%r9 638 cmpl %edx,%ecx 639 640 movq -112(%rdi),%rbp 641 cmovgl %ecx,%edx 642 testl %ecx,%ecx 643 644 vmovdqu -96(%rdi),%xmm3 645 movl %ecx,36(%rsp) 646 cmovleq %rsp,%r9 647 subq %r9,%rbp 648 movq %rbp,72(%rsp) 649 650 movl -64(%rdi),%ecx 651 652 movq -80(%rdi),%r10 653 cmpl %edx,%ecx 654 655 movq -72(%rdi),%rbp 656 cmovgl %ecx,%edx 657 testl %ecx,%ecx 658 659 vmovdqu -56(%rdi),%xmm4 660 movl %ecx,40(%rsp) 661 cmovleq %rsp,%r10 662 subq %r10,%rbp 663 movq %rbp,80(%rsp) 664 665 movl -24(%rdi),%ecx 666 667 movq -40(%rdi),%r11 668 cmpl %edx,%ecx 669 670 movq -32(%rdi),%rbp 671 cmovgl %ecx,%edx 672 testl %ecx,%ecx 673 674 vmovdqu -16(%rdi),%xmm5 675 movl %ecx,44(%rsp) 676 cmovleq %rsp,%r11 677 subq %r11,%rbp 678 movq %rbp,88(%rsp) 679 680 movl 16(%rdi),%ecx 681 682 movq 0(%rdi),%r12 683 cmpl %edx,%ecx 684 685 movq 8(%rdi),%rbp 686 cmovgl %ecx,%edx 687 testl %ecx,%ecx 688 689 vmovdqu 24(%rdi),%xmm6 690 movl %ecx,48(%rsp) 691 cmovleq %rsp,%r12 692 subq %r12,%rbp 693 movq %rbp,96(%rsp) 694 695 movl 56(%rdi),%ecx 696 697 movq 40(%rdi),%r13 698 cmpl %edx,%ecx 699 700 movq 48(%rdi),%rbp 701 cmovgl %ecx,%edx 702 testl %ecx,%ecx 703 704 vmovdqu 64(%rdi),%xmm7 705 movl %ecx,52(%rsp) 706 cmovleq %rsp,%r13 707 subq %r13,%rbp 708 movq %rbp,104(%rsp) 709 710 movl 96(%rdi),%ecx 711 712 movq 80(%rdi),%r14 713 cmpl %edx,%ecx 714 715 movq 88(%rdi),%rbp 716 cmovgl %ecx,%edx 717 testl %ecx,%ecx 718 719 vmovdqu 104(%rdi),%xmm8 720 movl %ecx,56(%rsp) 721 cmovleq %rsp,%r14 722 subq %r14,%rbp 723 movq %rbp,112(%rsp) 724 725 movl 136(%rdi),%ecx 726 727 movq 120(%rdi),%r15 728 cmpl %edx,%ecx 729 730 movq 128(%rdi),%rbp 731 cmovgl %ecx,%edx 732 testl %ecx,%ecx 733 734 vmovdqu 144(%rdi),%xmm9 735 movl %ecx,60(%rsp) 736 cmovleq %rsp,%r15 737 subq %r15,%rbp 738 movq %rbp,120(%rsp) 739 testl %edx,%edx 740 jz .Lenc8x_done 741 742 vmovups 16-120(%rsi),%xmm1 743 vmovups 32-120(%rsi),%xmm0 744 movl 240-120(%rsi),%eax 745 746 vpxor (%r8),%xmm15,%xmm10 747 leaq 128(%rsp),%rbp 748 vpxor (%r9),%xmm15,%xmm11 749 vpxor (%r10),%xmm15,%xmm12 750 vpxor (%r11),%xmm15,%xmm13 751 vpxor %xmm10,%xmm2,%xmm2 752 vpxor (%r12),%xmm15,%xmm10 753 vpxor %xmm11,%xmm3,%xmm3 754 vpxor (%r13),%xmm15,%xmm11 755 vpxor %xmm12,%xmm4,%xmm4 756 vpxor (%r14),%xmm15,%xmm12 757 vpxor %xmm13,%xmm5,%xmm5 758 vpxor (%r15),%xmm15,%xmm13 759 vpxor %xmm10,%xmm6,%xmm6 760 movl $1,%ecx 761 vpxor %xmm11,%xmm7,%xmm7 762 vpxor %xmm12,%xmm8,%xmm8 763 vpxor %xmm13,%xmm9,%xmm9 764 jmp .Loop_enc8x 765 766.align 32 767.Loop_enc8x: 768 vaesenc %xmm1,%xmm2,%xmm2 769 cmpl 32+0(%rsp),%ecx 770 vaesenc %xmm1,%xmm3,%xmm3 771 prefetcht0 31(%r8) 772 vaesenc %xmm1,%xmm4,%xmm4 773 vaesenc %xmm1,%xmm5,%xmm5 774 leaq (%r8,%rbx,1),%rbx 775 cmovgeq %rsp,%r8 776 vaesenc %xmm1,%xmm6,%xmm6 777 cmovgq %rsp,%rbx 778 vaesenc %xmm1,%xmm7,%xmm7 779 subq %r8,%rbx 780 vaesenc %xmm1,%xmm8,%xmm8 781 vpxor 16(%r8),%xmm15,%xmm10 782 movq %rbx,64+0(%rsp) 783 vaesenc %xmm1,%xmm9,%xmm9 784 vmovups -72(%rsi),%xmm1 785 leaq 16(%r8,%rbx,1),%r8 786 vmovdqu %xmm10,0(%rbp) 787 vaesenc %xmm0,%xmm2,%xmm2 788 cmpl 32+4(%rsp),%ecx 789 movq 64+8(%rsp),%rbx 790 vaesenc %xmm0,%xmm3,%xmm3 791 prefetcht0 31(%r9) 792 vaesenc %xmm0,%xmm4,%xmm4 793 vaesenc %xmm0,%xmm5,%xmm5 794 leaq (%r9,%rbx,1),%rbx 795 cmovgeq %rsp,%r9 796 vaesenc %xmm0,%xmm6,%xmm6 797 cmovgq %rsp,%rbx 798 vaesenc %xmm0,%xmm7,%xmm7 799 subq %r9,%rbx 800 vaesenc %xmm0,%xmm8,%xmm8 801 vpxor 16(%r9),%xmm15,%xmm11 802 movq %rbx,64+8(%rsp) 803 vaesenc %xmm0,%xmm9,%xmm9 804 vmovups -56(%rsi),%xmm0 805 leaq 16(%r9,%rbx,1),%r9 806 vmovdqu %xmm11,16(%rbp) 807 vaesenc %xmm1,%xmm2,%xmm2 808 cmpl 32+8(%rsp),%ecx 809 movq 64+16(%rsp),%rbx 810 vaesenc %xmm1,%xmm3,%xmm3 811 prefetcht0 31(%r10) 812 vaesenc %xmm1,%xmm4,%xmm4 813 prefetcht0 15(%r8) 814 vaesenc %xmm1,%xmm5,%xmm5 815 leaq (%r10,%rbx,1),%rbx 816 cmovgeq %rsp,%r10 817 vaesenc %xmm1,%xmm6,%xmm6 818 cmovgq %rsp,%rbx 819 vaesenc %xmm1,%xmm7,%xmm7 820 subq %r10,%rbx 821 vaesenc %xmm1,%xmm8,%xmm8 822 vpxor 16(%r10),%xmm15,%xmm12 823 movq %rbx,64+16(%rsp) 824 vaesenc %xmm1,%xmm9,%xmm9 825 vmovups -40(%rsi),%xmm1 826 leaq 16(%r10,%rbx,1),%r10 827 vmovdqu %xmm12,32(%rbp) 828 vaesenc %xmm0,%xmm2,%xmm2 829 cmpl 32+12(%rsp),%ecx 830 movq 64+24(%rsp),%rbx 831 vaesenc %xmm0,%xmm3,%xmm3 832 prefetcht0 31(%r11) 833 vaesenc %xmm0,%xmm4,%xmm4 834 prefetcht0 15(%r9) 835 vaesenc %xmm0,%xmm5,%xmm5 836 leaq (%r11,%rbx,1),%rbx 837 cmovgeq %rsp,%r11 838 vaesenc %xmm0,%xmm6,%xmm6 839 cmovgq %rsp,%rbx 840 vaesenc %xmm0,%xmm7,%xmm7 841 subq %r11,%rbx 842 vaesenc %xmm0,%xmm8,%xmm8 843 vpxor 16(%r11),%xmm15,%xmm13 844 movq %rbx,64+24(%rsp) 845 vaesenc %xmm0,%xmm9,%xmm9 846 vmovups -24(%rsi),%xmm0 847 leaq 16(%r11,%rbx,1),%r11 848 vmovdqu %xmm13,48(%rbp) 849 vaesenc %xmm1,%xmm2,%xmm2 850 cmpl 32+16(%rsp),%ecx 851 movq 64+32(%rsp),%rbx 852 vaesenc %xmm1,%xmm3,%xmm3 853 prefetcht0 31(%r12) 854 vaesenc %xmm1,%xmm4,%xmm4 855 prefetcht0 15(%r10) 856 vaesenc %xmm1,%xmm5,%xmm5 857 leaq (%r12,%rbx,1),%rbx 858 cmovgeq %rsp,%r12 859 vaesenc %xmm1,%xmm6,%xmm6 860 cmovgq %rsp,%rbx 861 vaesenc %xmm1,%xmm7,%xmm7 862 subq %r12,%rbx 863 vaesenc %xmm1,%xmm8,%xmm8 864 vpxor 16(%r12),%xmm15,%xmm10 865 movq %rbx,64+32(%rsp) 866 vaesenc %xmm1,%xmm9,%xmm9 867 vmovups -8(%rsi),%xmm1 868 leaq 16(%r12,%rbx,1),%r12 869 vaesenc %xmm0,%xmm2,%xmm2 870 cmpl 32+20(%rsp),%ecx 871 movq 64+40(%rsp),%rbx 872 vaesenc %xmm0,%xmm3,%xmm3 873 prefetcht0 31(%r13) 874 vaesenc %xmm0,%xmm4,%xmm4 875 prefetcht0 15(%r11) 876 vaesenc %xmm0,%xmm5,%xmm5 877 leaq (%rbx,%r13,1),%rbx 878 cmovgeq %rsp,%r13 879 vaesenc %xmm0,%xmm6,%xmm6 880 cmovgq %rsp,%rbx 881 vaesenc %xmm0,%xmm7,%xmm7 882 subq %r13,%rbx 883 vaesenc %xmm0,%xmm8,%xmm8 884 vpxor 16(%r13),%xmm15,%xmm11 885 movq %rbx,64+40(%rsp) 886 vaesenc %xmm0,%xmm9,%xmm9 887 vmovups 8(%rsi),%xmm0 888 leaq 16(%r13,%rbx,1),%r13 889 vaesenc %xmm1,%xmm2,%xmm2 890 cmpl 32+24(%rsp),%ecx 891 movq 64+48(%rsp),%rbx 892 vaesenc %xmm1,%xmm3,%xmm3 893 prefetcht0 31(%r14) 894 vaesenc %xmm1,%xmm4,%xmm4 895 prefetcht0 15(%r12) 896 vaesenc %xmm1,%xmm5,%xmm5 897 leaq (%r14,%rbx,1),%rbx 898 cmovgeq %rsp,%r14 899 vaesenc %xmm1,%xmm6,%xmm6 900 cmovgq %rsp,%rbx 901 vaesenc %xmm1,%xmm7,%xmm7 902 subq %r14,%rbx 903 vaesenc %xmm1,%xmm8,%xmm8 904 vpxor 16(%r14),%xmm15,%xmm12 905 movq %rbx,64+48(%rsp) 906 vaesenc %xmm1,%xmm9,%xmm9 907 vmovups 24(%rsi),%xmm1 908 leaq 16(%r14,%rbx,1),%r14 909 vaesenc %xmm0,%xmm2,%xmm2 910 cmpl 32+28(%rsp),%ecx 911 movq 64+56(%rsp),%rbx 912 vaesenc %xmm0,%xmm3,%xmm3 913 prefetcht0 31(%r15) 914 vaesenc %xmm0,%xmm4,%xmm4 915 prefetcht0 15(%r13) 916 vaesenc %xmm0,%xmm5,%xmm5 917 leaq (%r15,%rbx,1),%rbx 918 cmovgeq %rsp,%r15 919 vaesenc %xmm0,%xmm6,%xmm6 920 cmovgq %rsp,%rbx 921 vaesenc %xmm0,%xmm7,%xmm7 922 subq %r15,%rbx 923 vaesenc %xmm0,%xmm8,%xmm8 924 vpxor 16(%r15),%xmm15,%xmm13 925 movq %rbx,64+56(%rsp) 926 vaesenc %xmm0,%xmm9,%xmm9 927 vmovups 40(%rsi),%xmm0 928 leaq 16(%r15,%rbx,1),%r15 929 vmovdqu 32(%rsp),%xmm14 930 prefetcht0 15(%r14) 931 prefetcht0 15(%r15) 932 cmpl $11,%eax 933 jb .Lenc8x_tail 934 935 vaesenc %xmm1,%xmm2,%xmm2 936 vaesenc %xmm1,%xmm3,%xmm3 937 vaesenc %xmm1,%xmm4,%xmm4 938 vaesenc %xmm1,%xmm5,%xmm5 939 vaesenc %xmm1,%xmm6,%xmm6 940 vaesenc %xmm1,%xmm7,%xmm7 941 vaesenc %xmm1,%xmm8,%xmm8 942 vaesenc %xmm1,%xmm9,%xmm9 943 vmovups 176-120(%rsi),%xmm1 944 945 vaesenc %xmm0,%xmm2,%xmm2 946 vaesenc %xmm0,%xmm3,%xmm3 947 vaesenc %xmm0,%xmm4,%xmm4 948 vaesenc %xmm0,%xmm5,%xmm5 949 vaesenc %xmm0,%xmm6,%xmm6 950 vaesenc %xmm0,%xmm7,%xmm7 951 vaesenc %xmm0,%xmm8,%xmm8 952 vaesenc %xmm0,%xmm9,%xmm9 953 vmovups 192-120(%rsi),%xmm0 954 je .Lenc8x_tail 955 956 vaesenc %xmm1,%xmm2,%xmm2 957 vaesenc %xmm1,%xmm3,%xmm3 958 vaesenc %xmm1,%xmm4,%xmm4 959 vaesenc %xmm1,%xmm5,%xmm5 960 vaesenc %xmm1,%xmm6,%xmm6 961 vaesenc %xmm1,%xmm7,%xmm7 962 vaesenc %xmm1,%xmm8,%xmm8 963 vaesenc %xmm1,%xmm9,%xmm9 964 vmovups 208-120(%rsi),%xmm1 965 966 vaesenc %xmm0,%xmm2,%xmm2 967 vaesenc %xmm0,%xmm3,%xmm3 968 vaesenc %xmm0,%xmm4,%xmm4 969 vaesenc %xmm0,%xmm5,%xmm5 970 vaesenc %xmm0,%xmm6,%xmm6 971 vaesenc %xmm0,%xmm7,%xmm7 972 vaesenc %xmm0,%xmm8,%xmm8 973 vaesenc %xmm0,%xmm9,%xmm9 974 vmovups 224-120(%rsi),%xmm0 975 976.Lenc8x_tail: 977 vaesenc %xmm1,%xmm2,%xmm2 978 vpxor %xmm15,%xmm15,%xmm15 979 vaesenc %xmm1,%xmm3,%xmm3 980 vaesenc %xmm1,%xmm4,%xmm4 981 vpcmpgtd %xmm15,%xmm14,%xmm15 982 vaesenc %xmm1,%xmm5,%xmm5 983 vaesenc %xmm1,%xmm6,%xmm6 984 vpaddd %xmm14,%xmm15,%xmm15 985 vmovdqu 48(%rsp),%xmm14 986 vaesenc %xmm1,%xmm7,%xmm7 987 movq 64(%rsp),%rbx 988 vaesenc %xmm1,%xmm8,%xmm8 989 vaesenc %xmm1,%xmm9,%xmm9 990 vmovups 16-120(%rsi),%xmm1 991 992 vaesenclast %xmm0,%xmm2,%xmm2 993 vmovdqa %xmm15,32(%rsp) 994 vpxor %xmm15,%xmm15,%xmm15 995 vaesenclast %xmm0,%xmm3,%xmm3 996 vaesenclast %xmm0,%xmm4,%xmm4 997 vpcmpgtd %xmm15,%xmm14,%xmm15 998 vaesenclast %xmm0,%xmm5,%xmm5 999 vaesenclast %xmm0,%xmm6,%xmm6 1000 vpaddd %xmm15,%xmm14,%xmm14 1001 vmovdqu -120(%rsi),%xmm15 1002 vaesenclast %xmm0,%xmm7,%xmm7 1003 vaesenclast %xmm0,%xmm8,%xmm8 1004 vmovdqa %xmm14,48(%rsp) 1005 vaesenclast %xmm0,%xmm9,%xmm9 1006 vmovups 32-120(%rsi),%xmm0 1007 1008 vmovups %xmm2,-16(%r8) 1009 subq %rbx,%r8 1010 vpxor 0(%rbp),%xmm2,%xmm2 1011 vmovups %xmm3,-16(%r9) 1012 subq 72(%rsp),%r9 1013 vpxor 16(%rbp),%xmm3,%xmm3 1014 vmovups %xmm4,-16(%r10) 1015 subq 80(%rsp),%r10 1016 vpxor 32(%rbp),%xmm4,%xmm4 1017 vmovups %xmm5,-16(%r11) 1018 subq 88(%rsp),%r11 1019 vpxor 48(%rbp),%xmm5,%xmm5 1020 vmovups %xmm6,-16(%r12) 1021 subq 96(%rsp),%r12 1022 vpxor %xmm10,%xmm6,%xmm6 1023 vmovups %xmm7,-16(%r13) 1024 subq 104(%rsp),%r13 1025 vpxor %xmm11,%xmm7,%xmm7 1026 vmovups %xmm8,-16(%r14) 1027 subq 112(%rsp),%r14 1028 vpxor %xmm12,%xmm8,%xmm8 1029 vmovups %xmm9,-16(%r15) 1030 subq 120(%rsp),%r15 1031 vpxor %xmm13,%xmm9,%xmm9 1032 1033 decl %edx 1034 jnz .Loop_enc8x 1035 1036 movq 16(%rsp),%rax 1037.cfi_def_cfa %rax,8 1038 1039 1040 1041 1042 1043.Lenc8x_done: 1044 vzeroupper 1045 movq -48(%rax),%r15 1046.cfi_restore %r15 1047 movq -40(%rax),%r14 1048.cfi_restore %r14 1049 movq -32(%rax),%r13 1050.cfi_restore %r13 1051 movq -24(%rax),%r12 1052.cfi_restore %r12 1053 movq -16(%rax),%rbp 1054.cfi_restore %rbp 1055 movq -8(%rax),%rbx 1056.cfi_restore %rbx 1057 leaq (%rax),%rsp 1058.cfi_def_cfa_register %rsp 1059.Lenc8x_epilogue: 1060 .byte 0xf3,0xc3 1061.cfi_endproc 1062.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx 1063 1064.type aesni_multi_cbc_decrypt_avx,@function 1065.align 32 1066aesni_multi_cbc_decrypt_avx: 1067.cfi_startproc 1068_avx_cbc_dec_shortcut: 1069 movq %rsp,%rax 1070.cfi_def_cfa_register %rax 1071 pushq %rbx 1072.cfi_offset %rbx,-16 1073 pushq %rbp 1074.cfi_offset %rbp,-24 1075 pushq %r12 1076.cfi_offset %r12,-32 1077 pushq %r13 1078.cfi_offset %r13,-40 1079 pushq %r14 1080.cfi_offset %r14,-48 1081 pushq %r15 1082.cfi_offset %r15,-56 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 subq $256,%rsp 1093 andq $-256,%rsp 1094 subq $192,%rsp 1095 movq %rax,16(%rsp) 1096.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 1097 1098.Ldec8x_body: 1099 vzeroupper 1100 vmovdqu (%rsi),%xmm15 1101 leaq 120(%rsi),%rsi 1102 leaq 160(%rdi),%rdi 1103 shrl $1,%edx 1104 1105.Ldec8x_loop_grande: 1106 1107 xorl %edx,%edx 1108 1109 movl -144(%rdi),%ecx 1110 1111 movq -160(%rdi),%r8 1112 cmpl %edx,%ecx 1113 1114 movq -152(%rdi),%rbx 1115 cmovgl %ecx,%edx 1116 testl %ecx,%ecx 1117 1118 vmovdqu -136(%rdi),%xmm2 1119 movl %ecx,32(%rsp) 1120 cmovleq %rsp,%r8 1121 subq %r8,%rbx 1122 movq %rbx,64(%rsp) 1123 vmovdqu %xmm2,192(%rsp) 1124 1125 movl -104(%rdi),%ecx 1126 1127 movq -120(%rdi),%r9 1128 cmpl %edx,%ecx 1129 1130 movq -112(%rdi),%rbp 1131 cmovgl %ecx,%edx 1132 testl %ecx,%ecx 1133 1134 vmovdqu -96(%rdi),%xmm3 1135 movl %ecx,36(%rsp) 1136 cmovleq %rsp,%r9 1137 subq %r9,%rbp 1138 movq %rbp,72(%rsp) 1139 vmovdqu %xmm3,208(%rsp) 1140 1141 movl -64(%rdi),%ecx 1142 1143 movq -80(%rdi),%r10 1144 cmpl %edx,%ecx 1145 1146 movq -72(%rdi),%rbp 1147 cmovgl %ecx,%edx 1148 testl %ecx,%ecx 1149 1150 vmovdqu -56(%rdi),%xmm4 1151 movl %ecx,40(%rsp) 1152 cmovleq %rsp,%r10 1153 subq %r10,%rbp 1154 movq %rbp,80(%rsp) 1155 vmovdqu %xmm4,224(%rsp) 1156 1157 movl -24(%rdi),%ecx 1158 1159 movq -40(%rdi),%r11 1160 cmpl %edx,%ecx 1161 1162 movq -32(%rdi),%rbp 1163 cmovgl %ecx,%edx 1164 testl %ecx,%ecx 1165 1166 vmovdqu -16(%rdi),%xmm5 1167 movl %ecx,44(%rsp) 1168 cmovleq %rsp,%r11 1169 subq %r11,%rbp 1170 movq %rbp,88(%rsp) 1171 vmovdqu %xmm5,240(%rsp) 1172 1173 movl 16(%rdi),%ecx 1174 1175 movq 0(%rdi),%r12 1176 cmpl %edx,%ecx 1177 1178 movq 8(%rdi),%rbp 1179 cmovgl %ecx,%edx 1180 testl %ecx,%ecx 1181 1182 vmovdqu 24(%rdi),%xmm6 1183 movl %ecx,48(%rsp) 1184 cmovleq %rsp,%r12 1185 subq %r12,%rbp 1186 movq %rbp,96(%rsp) 1187 vmovdqu %xmm6,256(%rsp) 1188 1189 movl 56(%rdi),%ecx 1190 1191 movq 40(%rdi),%r13 1192 cmpl %edx,%ecx 1193 1194 movq 48(%rdi),%rbp 1195 cmovgl %ecx,%edx 1196 testl %ecx,%ecx 1197 1198 vmovdqu 64(%rdi),%xmm7 1199 movl %ecx,52(%rsp) 1200 cmovleq %rsp,%r13 1201 subq %r13,%rbp 1202 movq %rbp,104(%rsp) 1203 vmovdqu %xmm7,272(%rsp) 1204 1205 movl 96(%rdi),%ecx 1206 1207 movq 80(%rdi),%r14 1208 cmpl %edx,%ecx 1209 1210 movq 88(%rdi),%rbp 1211 cmovgl %ecx,%edx 1212 testl %ecx,%ecx 1213 1214 vmovdqu 104(%rdi),%xmm8 1215 movl %ecx,56(%rsp) 1216 cmovleq %rsp,%r14 1217 subq %r14,%rbp 1218 movq %rbp,112(%rsp) 1219 vmovdqu %xmm8,288(%rsp) 1220 1221 movl 136(%rdi),%ecx 1222 1223 movq 120(%rdi),%r15 1224 cmpl %edx,%ecx 1225 1226 movq 128(%rdi),%rbp 1227 cmovgl %ecx,%edx 1228 testl %ecx,%ecx 1229 1230 vmovdqu 144(%rdi),%xmm9 1231 movl %ecx,60(%rsp) 1232 cmovleq %rsp,%r15 1233 subq %r15,%rbp 1234 movq %rbp,120(%rsp) 1235 vmovdqu %xmm9,304(%rsp) 1236 testl %edx,%edx 1237 jz .Ldec8x_done 1238 1239 vmovups 16-120(%rsi),%xmm1 1240 vmovups 32-120(%rsi),%xmm0 1241 movl 240-120(%rsi),%eax 1242 leaq 192+128(%rsp),%rbp 1243 1244 vmovdqu (%r8),%xmm2 1245 vmovdqu (%r9),%xmm3 1246 vmovdqu (%r10),%xmm4 1247 vmovdqu (%r11),%xmm5 1248 vmovdqu (%r12),%xmm6 1249 vmovdqu (%r13),%xmm7 1250 vmovdqu (%r14),%xmm8 1251 vmovdqu (%r15),%xmm9 1252 vmovdqu %xmm2,0(%rbp) 1253 vpxor %xmm15,%xmm2,%xmm2 1254 vmovdqu %xmm3,16(%rbp) 1255 vpxor %xmm15,%xmm3,%xmm3 1256 vmovdqu %xmm4,32(%rbp) 1257 vpxor %xmm15,%xmm4,%xmm4 1258 vmovdqu %xmm5,48(%rbp) 1259 vpxor %xmm15,%xmm5,%xmm5 1260 vmovdqu %xmm6,64(%rbp) 1261 vpxor %xmm15,%xmm6,%xmm6 1262 vmovdqu %xmm7,80(%rbp) 1263 vpxor %xmm15,%xmm7,%xmm7 1264 vmovdqu %xmm8,96(%rbp) 1265 vpxor %xmm15,%xmm8,%xmm8 1266 vmovdqu %xmm9,112(%rbp) 1267 vpxor %xmm15,%xmm9,%xmm9 1268 xorq $0x80,%rbp 1269 movl $1,%ecx 1270 jmp .Loop_dec8x 1271 1272.align 32 1273.Loop_dec8x: 1274 vaesdec %xmm1,%xmm2,%xmm2 1275 cmpl 32+0(%rsp),%ecx 1276 vaesdec %xmm1,%xmm3,%xmm3 1277 prefetcht0 31(%r8) 1278 vaesdec %xmm1,%xmm4,%xmm4 1279 vaesdec %xmm1,%xmm5,%xmm5 1280 leaq (%r8,%rbx,1),%rbx 1281 cmovgeq %rsp,%r8 1282 vaesdec %xmm1,%xmm6,%xmm6 1283 cmovgq %rsp,%rbx 1284 vaesdec %xmm1,%xmm7,%xmm7 1285 subq %r8,%rbx 1286 vaesdec %xmm1,%xmm8,%xmm8 1287 vmovdqu 16(%r8),%xmm10 1288 movq %rbx,64+0(%rsp) 1289 vaesdec %xmm1,%xmm9,%xmm9 1290 vmovups -72(%rsi),%xmm1 1291 leaq 16(%r8,%rbx,1),%r8 1292 vmovdqu %xmm10,128(%rsp) 1293 vaesdec %xmm0,%xmm2,%xmm2 1294 cmpl 32+4(%rsp),%ecx 1295 movq 64+8(%rsp),%rbx 1296 vaesdec %xmm0,%xmm3,%xmm3 1297 prefetcht0 31(%r9) 1298 vaesdec %xmm0,%xmm4,%xmm4 1299 vaesdec %xmm0,%xmm5,%xmm5 1300 leaq (%r9,%rbx,1),%rbx 1301 cmovgeq %rsp,%r9 1302 vaesdec %xmm0,%xmm6,%xmm6 1303 cmovgq %rsp,%rbx 1304 vaesdec %xmm0,%xmm7,%xmm7 1305 subq %r9,%rbx 1306 vaesdec %xmm0,%xmm8,%xmm8 1307 vmovdqu 16(%r9),%xmm11 1308 movq %rbx,64+8(%rsp) 1309 vaesdec %xmm0,%xmm9,%xmm9 1310 vmovups -56(%rsi),%xmm0 1311 leaq 16(%r9,%rbx,1),%r9 1312 vmovdqu %xmm11,144(%rsp) 1313 vaesdec %xmm1,%xmm2,%xmm2 1314 cmpl 32+8(%rsp),%ecx 1315 movq 64+16(%rsp),%rbx 1316 vaesdec %xmm1,%xmm3,%xmm3 1317 prefetcht0 31(%r10) 1318 vaesdec %xmm1,%xmm4,%xmm4 1319 prefetcht0 15(%r8) 1320 vaesdec %xmm1,%xmm5,%xmm5 1321 leaq (%r10,%rbx,1),%rbx 1322 cmovgeq %rsp,%r10 1323 vaesdec %xmm1,%xmm6,%xmm6 1324 cmovgq %rsp,%rbx 1325 vaesdec %xmm1,%xmm7,%xmm7 1326 subq %r10,%rbx 1327 vaesdec %xmm1,%xmm8,%xmm8 1328 vmovdqu 16(%r10),%xmm12 1329 movq %rbx,64+16(%rsp) 1330 vaesdec %xmm1,%xmm9,%xmm9 1331 vmovups -40(%rsi),%xmm1 1332 leaq 16(%r10,%rbx,1),%r10 1333 vmovdqu %xmm12,160(%rsp) 1334 vaesdec %xmm0,%xmm2,%xmm2 1335 cmpl 32+12(%rsp),%ecx 1336 movq 64+24(%rsp),%rbx 1337 vaesdec %xmm0,%xmm3,%xmm3 1338 prefetcht0 31(%r11) 1339 vaesdec %xmm0,%xmm4,%xmm4 1340 prefetcht0 15(%r9) 1341 vaesdec %xmm0,%xmm5,%xmm5 1342 leaq (%r11,%rbx,1),%rbx 1343 cmovgeq %rsp,%r11 1344 vaesdec %xmm0,%xmm6,%xmm6 1345 cmovgq %rsp,%rbx 1346 vaesdec %xmm0,%xmm7,%xmm7 1347 subq %r11,%rbx 1348 vaesdec %xmm0,%xmm8,%xmm8 1349 vmovdqu 16(%r11),%xmm13 1350 movq %rbx,64+24(%rsp) 1351 vaesdec %xmm0,%xmm9,%xmm9 1352 vmovups -24(%rsi),%xmm0 1353 leaq 16(%r11,%rbx,1),%r11 1354 vmovdqu %xmm13,176(%rsp) 1355 vaesdec %xmm1,%xmm2,%xmm2 1356 cmpl 32+16(%rsp),%ecx 1357 movq 64+32(%rsp),%rbx 1358 vaesdec %xmm1,%xmm3,%xmm3 1359 prefetcht0 31(%r12) 1360 vaesdec %xmm1,%xmm4,%xmm4 1361 prefetcht0 15(%r10) 1362 vaesdec %xmm1,%xmm5,%xmm5 1363 leaq (%r12,%rbx,1),%rbx 1364 cmovgeq %rsp,%r12 1365 vaesdec %xmm1,%xmm6,%xmm6 1366 cmovgq %rsp,%rbx 1367 vaesdec %xmm1,%xmm7,%xmm7 1368 subq %r12,%rbx 1369 vaesdec %xmm1,%xmm8,%xmm8 1370 vmovdqu 16(%r12),%xmm10 1371 movq %rbx,64+32(%rsp) 1372 vaesdec %xmm1,%xmm9,%xmm9 1373 vmovups -8(%rsi),%xmm1 1374 leaq 16(%r12,%rbx,1),%r12 1375 vaesdec %xmm0,%xmm2,%xmm2 1376 cmpl 32+20(%rsp),%ecx 1377 movq 64+40(%rsp),%rbx 1378 vaesdec %xmm0,%xmm3,%xmm3 1379 prefetcht0 31(%r13) 1380 vaesdec %xmm0,%xmm4,%xmm4 1381 prefetcht0 15(%r11) 1382 vaesdec %xmm0,%xmm5,%xmm5 1383 leaq (%rbx,%r13,1),%rbx 1384 cmovgeq %rsp,%r13 1385 vaesdec %xmm0,%xmm6,%xmm6 1386 cmovgq %rsp,%rbx 1387 vaesdec %xmm0,%xmm7,%xmm7 1388 subq %r13,%rbx 1389 vaesdec %xmm0,%xmm8,%xmm8 1390 vmovdqu 16(%r13),%xmm11 1391 movq %rbx,64+40(%rsp) 1392 vaesdec %xmm0,%xmm9,%xmm9 1393 vmovups 8(%rsi),%xmm0 1394 leaq 16(%r13,%rbx,1),%r13 1395 vaesdec %xmm1,%xmm2,%xmm2 1396 cmpl 32+24(%rsp),%ecx 1397 movq 64+48(%rsp),%rbx 1398 vaesdec %xmm1,%xmm3,%xmm3 1399 prefetcht0 31(%r14) 1400 vaesdec %xmm1,%xmm4,%xmm4 1401 prefetcht0 15(%r12) 1402 vaesdec %xmm1,%xmm5,%xmm5 1403 leaq (%r14,%rbx,1),%rbx 1404 cmovgeq %rsp,%r14 1405 vaesdec %xmm1,%xmm6,%xmm6 1406 cmovgq %rsp,%rbx 1407 vaesdec %xmm1,%xmm7,%xmm7 1408 subq %r14,%rbx 1409 vaesdec %xmm1,%xmm8,%xmm8 1410 vmovdqu 16(%r14),%xmm12 1411 movq %rbx,64+48(%rsp) 1412 vaesdec %xmm1,%xmm9,%xmm9 1413 vmovups 24(%rsi),%xmm1 1414 leaq 16(%r14,%rbx,1),%r14 1415 vaesdec %xmm0,%xmm2,%xmm2 1416 cmpl 32+28(%rsp),%ecx 1417 movq 64+56(%rsp),%rbx 1418 vaesdec %xmm0,%xmm3,%xmm3 1419 prefetcht0 31(%r15) 1420 vaesdec %xmm0,%xmm4,%xmm4 1421 prefetcht0 15(%r13) 1422 vaesdec %xmm0,%xmm5,%xmm5 1423 leaq (%r15,%rbx,1),%rbx 1424 cmovgeq %rsp,%r15 1425 vaesdec %xmm0,%xmm6,%xmm6 1426 cmovgq %rsp,%rbx 1427 vaesdec %xmm0,%xmm7,%xmm7 1428 subq %r15,%rbx 1429 vaesdec %xmm0,%xmm8,%xmm8 1430 vmovdqu 16(%r15),%xmm13 1431 movq %rbx,64+56(%rsp) 1432 vaesdec %xmm0,%xmm9,%xmm9 1433 vmovups 40(%rsi),%xmm0 1434 leaq 16(%r15,%rbx,1),%r15 1435 vmovdqu 32(%rsp),%xmm14 1436 prefetcht0 15(%r14) 1437 prefetcht0 15(%r15) 1438 cmpl $11,%eax 1439 jb .Ldec8x_tail 1440 1441 vaesdec %xmm1,%xmm2,%xmm2 1442 vaesdec %xmm1,%xmm3,%xmm3 1443 vaesdec %xmm1,%xmm4,%xmm4 1444 vaesdec %xmm1,%xmm5,%xmm5 1445 vaesdec %xmm1,%xmm6,%xmm6 1446 vaesdec %xmm1,%xmm7,%xmm7 1447 vaesdec %xmm1,%xmm8,%xmm8 1448 vaesdec %xmm1,%xmm9,%xmm9 1449 vmovups 176-120(%rsi),%xmm1 1450 1451 vaesdec %xmm0,%xmm2,%xmm2 1452 vaesdec %xmm0,%xmm3,%xmm3 1453 vaesdec %xmm0,%xmm4,%xmm4 1454 vaesdec %xmm0,%xmm5,%xmm5 1455 vaesdec %xmm0,%xmm6,%xmm6 1456 vaesdec %xmm0,%xmm7,%xmm7 1457 vaesdec %xmm0,%xmm8,%xmm8 1458 vaesdec %xmm0,%xmm9,%xmm9 1459 vmovups 192-120(%rsi),%xmm0 1460 je .Ldec8x_tail 1461 1462 vaesdec %xmm1,%xmm2,%xmm2 1463 vaesdec %xmm1,%xmm3,%xmm3 1464 vaesdec %xmm1,%xmm4,%xmm4 1465 vaesdec %xmm1,%xmm5,%xmm5 1466 vaesdec %xmm1,%xmm6,%xmm6 1467 vaesdec %xmm1,%xmm7,%xmm7 1468 vaesdec %xmm1,%xmm8,%xmm8 1469 vaesdec %xmm1,%xmm9,%xmm9 1470 vmovups 208-120(%rsi),%xmm1 1471 1472 vaesdec %xmm0,%xmm2,%xmm2 1473 vaesdec %xmm0,%xmm3,%xmm3 1474 vaesdec %xmm0,%xmm4,%xmm4 1475 vaesdec %xmm0,%xmm5,%xmm5 1476 vaesdec %xmm0,%xmm6,%xmm6 1477 vaesdec %xmm0,%xmm7,%xmm7 1478 vaesdec %xmm0,%xmm8,%xmm8 1479 vaesdec %xmm0,%xmm9,%xmm9 1480 vmovups 224-120(%rsi),%xmm0 1481 1482.Ldec8x_tail: 1483 vaesdec %xmm1,%xmm2,%xmm2 1484 vpxor %xmm15,%xmm15,%xmm15 1485 vaesdec %xmm1,%xmm3,%xmm3 1486 vaesdec %xmm1,%xmm4,%xmm4 1487 vpcmpgtd %xmm15,%xmm14,%xmm15 1488 vaesdec %xmm1,%xmm5,%xmm5 1489 vaesdec %xmm1,%xmm6,%xmm6 1490 vpaddd %xmm14,%xmm15,%xmm15 1491 vmovdqu 48(%rsp),%xmm14 1492 vaesdec %xmm1,%xmm7,%xmm7 1493 movq 64(%rsp),%rbx 1494 vaesdec %xmm1,%xmm8,%xmm8 1495 vaesdec %xmm1,%xmm9,%xmm9 1496 vmovups 16-120(%rsi),%xmm1 1497 1498 vaesdeclast %xmm0,%xmm2,%xmm2 1499 vmovdqa %xmm15,32(%rsp) 1500 vpxor %xmm15,%xmm15,%xmm15 1501 vaesdeclast %xmm0,%xmm3,%xmm3 1502 vpxor 0(%rbp),%xmm2,%xmm2 1503 vaesdeclast %xmm0,%xmm4,%xmm4 1504 vpxor 16(%rbp),%xmm3,%xmm3 1505 vpcmpgtd %xmm15,%xmm14,%xmm15 1506 vaesdeclast %xmm0,%xmm5,%xmm5 1507 vpxor 32(%rbp),%xmm4,%xmm4 1508 vaesdeclast %xmm0,%xmm6,%xmm6 1509 vpxor 48(%rbp),%xmm5,%xmm5 1510 vpaddd %xmm15,%xmm14,%xmm14 1511 vmovdqu -120(%rsi),%xmm15 1512 vaesdeclast %xmm0,%xmm7,%xmm7 1513 vpxor 64(%rbp),%xmm6,%xmm6 1514 vaesdeclast %xmm0,%xmm8,%xmm8 1515 vpxor 80(%rbp),%xmm7,%xmm7 1516 vmovdqa %xmm14,48(%rsp) 1517 vaesdeclast %xmm0,%xmm9,%xmm9 1518 vpxor 96(%rbp),%xmm8,%xmm8 1519 vmovups 32-120(%rsi),%xmm0 1520 1521 vmovups %xmm2,-16(%r8) 1522 subq %rbx,%r8 1523 vmovdqu 128+0(%rsp),%xmm2 1524 vpxor 112(%rbp),%xmm9,%xmm9 1525 vmovups %xmm3,-16(%r9) 1526 subq 72(%rsp),%r9 1527 vmovdqu %xmm2,0(%rbp) 1528 vpxor %xmm15,%xmm2,%xmm2 1529 vmovdqu 128+16(%rsp),%xmm3 1530 vmovups %xmm4,-16(%r10) 1531 subq 80(%rsp),%r10 1532 vmovdqu %xmm3,16(%rbp) 1533 vpxor %xmm15,%xmm3,%xmm3 1534 vmovdqu 128+32(%rsp),%xmm4 1535 vmovups %xmm5,-16(%r11) 1536 subq 88(%rsp),%r11 1537 vmovdqu %xmm4,32(%rbp) 1538 vpxor %xmm15,%xmm4,%xmm4 1539 vmovdqu 128+48(%rsp),%xmm5 1540 vmovups %xmm6,-16(%r12) 1541 subq 96(%rsp),%r12 1542 vmovdqu %xmm5,48(%rbp) 1543 vpxor %xmm15,%xmm5,%xmm5 1544 vmovdqu %xmm10,64(%rbp) 1545 vpxor %xmm10,%xmm15,%xmm6 1546 vmovups %xmm7,-16(%r13) 1547 subq 104(%rsp),%r13 1548 vmovdqu %xmm11,80(%rbp) 1549 vpxor %xmm11,%xmm15,%xmm7 1550 vmovups %xmm8,-16(%r14) 1551 subq 112(%rsp),%r14 1552 vmovdqu %xmm12,96(%rbp) 1553 vpxor %xmm12,%xmm15,%xmm8 1554 vmovups %xmm9,-16(%r15) 1555 subq 120(%rsp),%r15 1556 vmovdqu %xmm13,112(%rbp) 1557 vpxor %xmm13,%xmm15,%xmm9 1558 1559 xorq $128,%rbp 1560 decl %edx 1561 jnz .Loop_dec8x 1562 1563 movq 16(%rsp),%rax 1564.cfi_def_cfa %rax,8 1565 1566 1567 1568 1569 1570.Ldec8x_done: 1571 vzeroupper 1572 movq -48(%rax),%r15 1573.cfi_restore %r15 1574 movq -40(%rax),%r14 1575.cfi_restore %r14 1576 movq -32(%rax),%r13 1577.cfi_restore %r13 1578 movq -24(%rax),%r12 1579.cfi_restore %r12 1580 movq -16(%rax),%rbp 1581.cfi_restore %rbp 1582 movq -8(%rax),%rbx 1583.cfi_restore %rbx 1584 leaq (%rax),%rsp 1585.cfi_def_cfa_register %rsp 1586.Ldec8x_epilogue: 1587 .byte 0xf3,0xc3 1588.cfi_endproc 1589.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx 1590 .section ".note.gnu.property", "a" 1591 .p2align 3 1592 .long 1f - 0f 1593 .long 4f - 1f 1594 .long 5 15950: 1596 # "GNU" encoded with .byte, since .asciz isn't supported 1597 # on Solaris. 1598 .byte 0x47 1599 .byte 0x4e 1600 .byte 0x55 1601 .byte 0 16021: 1603 .p2align 3 1604 .long 0xc0000002 1605 .long 3f - 2f 16062: 1607 .long 3 16083: 1609 .p2align 3 16104: 1611