1/* Do not modify. This file is auto-generated from aesni-mb-x86_64.pl. */ 2.text 3 4 5 6.globl aesni_multi_cbc_encrypt 7.type aesni_multi_cbc_encrypt,@function 8.align 32 9aesni_multi_cbc_encrypt: 10.cfi_startproc 11 cmpl $2,%edx 12 jb .Lenc_non_avx 13 movl OPENSSL_ia32cap_P+4(%rip),%ecx 14 testl $268435456,%ecx 15 jnz _avx_cbc_enc_shortcut 16 jmp .Lenc_non_avx 17.align 16 18.Lenc_non_avx: 19 movq %rsp,%rax 20.cfi_def_cfa_register %rax 21 pushq %rbx 22.cfi_offset %rbx,-16 23 pushq %rbp 24.cfi_offset %rbp,-24 25 pushq %r12 26.cfi_offset %r12,-32 27 pushq %r13 28.cfi_offset %r13,-40 29 pushq %r14 30.cfi_offset %r14,-48 31 pushq %r15 32.cfi_offset %r15,-56 33 34 35 36 37 38 39 subq $48,%rsp 40 andq $-64,%rsp 41 movq %rax,16(%rsp) 42.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 43 44.Lenc4x_body: 45 movdqu (%rsi),%xmm12 46 leaq 120(%rsi),%rsi 47 leaq 80(%rdi),%rdi 48 49.Lenc4x_loop_grande: 50 movl %edx,24(%rsp) 51 xorl %edx,%edx 52 movl -64(%rdi),%ecx 53 movq -80(%rdi),%r8 54 cmpl %edx,%ecx 55 movq -72(%rdi),%r12 56 cmovgl %ecx,%edx 57 testl %ecx,%ecx 58 movdqu -56(%rdi),%xmm2 59 movl %ecx,32(%rsp) 60 cmovleq %rsp,%r8 61 movl -24(%rdi),%ecx 62 movq -40(%rdi),%r9 63 cmpl %edx,%ecx 64 movq -32(%rdi),%r13 65 cmovgl %ecx,%edx 66 testl %ecx,%ecx 67 movdqu -16(%rdi),%xmm3 68 movl %ecx,36(%rsp) 69 cmovleq %rsp,%r9 70 movl 16(%rdi),%ecx 71 movq 0(%rdi),%r10 72 cmpl %edx,%ecx 73 movq 8(%rdi),%r14 74 cmovgl %ecx,%edx 75 testl %ecx,%ecx 76 movdqu 24(%rdi),%xmm4 77 movl %ecx,40(%rsp) 78 cmovleq %rsp,%r10 79 movl 56(%rdi),%ecx 80 movq 40(%rdi),%r11 81 cmpl %edx,%ecx 82 movq 48(%rdi),%r15 83 cmovgl %ecx,%edx 84 testl %ecx,%ecx 85 movdqu 64(%rdi),%xmm5 86 movl %ecx,44(%rsp) 87 cmovleq %rsp,%r11 88 testl %edx,%edx 89 jz .Lenc4x_done 90 91 movups 16-120(%rsi),%xmm1 92 pxor %xmm12,%xmm2 93 movups 32-120(%rsi),%xmm0 94 pxor %xmm12,%xmm3 95 movl 240-120(%rsi),%eax 96 pxor %xmm12,%xmm4 97 movdqu (%r8),%xmm6 98 pxor %xmm12,%xmm5 99 movdqu (%r9),%xmm7 100 pxor %xmm6,%xmm2 101 movdqu (%r10),%xmm8 102 pxor %xmm7,%xmm3 103 movdqu (%r11),%xmm9 104 pxor %xmm8,%xmm4 105 pxor %xmm9,%xmm5 106 movdqa 32(%rsp),%xmm10 107 xorq %rbx,%rbx 108 jmp .Loop_enc4x 109 110.align 32 111.Loop_enc4x: 112 addq $16,%rbx 113 leaq 16(%rsp),%rbp 114 movl $1,%ecx 115 subq %rbx,%rbp 116 117.byte 102,15,56,220,209 118 prefetcht0 31(%r8,%rbx,1) 119 prefetcht0 31(%r9,%rbx,1) 120.byte 102,15,56,220,217 121 prefetcht0 31(%r10,%rbx,1) 122 prefetcht0 31(%r10,%rbx,1) 123.byte 102,15,56,220,225 124.byte 102,15,56,220,233 125 movups 48-120(%rsi),%xmm1 126 cmpl 32(%rsp),%ecx 127.byte 102,15,56,220,208 128.byte 102,15,56,220,216 129.byte 102,15,56,220,224 130 cmovgeq %rbp,%r8 131 cmovgq %rbp,%r12 132.byte 102,15,56,220,232 133 movups -56(%rsi),%xmm0 134 cmpl 36(%rsp),%ecx 135.byte 102,15,56,220,209 136.byte 102,15,56,220,217 137.byte 102,15,56,220,225 138 cmovgeq %rbp,%r9 139 cmovgq %rbp,%r13 140.byte 102,15,56,220,233 141 movups -40(%rsi),%xmm1 142 cmpl 40(%rsp),%ecx 143.byte 102,15,56,220,208 144.byte 102,15,56,220,216 145.byte 102,15,56,220,224 146 cmovgeq %rbp,%r10 147 cmovgq %rbp,%r14 148.byte 102,15,56,220,232 149 movups -24(%rsi),%xmm0 150 cmpl 44(%rsp),%ecx 151.byte 102,15,56,220,209 152.byte 102,15,56,220,217 153.byte 102,15,56,220,225 154 cmovgeq %rbp,%r11 155 cmovgq %rbp,%r15 156.byte 102,15,56,220,233 157 movups -8(%rsi),%xmm1 158 movdqa %xmm10,%xmm11 159.byte 102,15,56,220,208 160 prefetcht0 15(%r12,%rbx,1) 161 prefetcht0 15(%r13,%rbx,1) 162.byte 102,15,56,220,216 163 prefetcht0 15(%r14,%rbx,1) 164 prefetcht0 15(%r15,%rbx,1) 165.byte 102,15,56,220,224 166.byte 102,15,56,220,232 167 movups 128-120(%rsi),%xmm0 168 pxor %xmm12,%xmm12 169 170.byte 102,15,56,220,209 171 pcmpgtd %xmm12,%xmm11 172 movdqu -120(%rsi),%xmm12 173.byte 102,15,56,220,217 174 paddd %xmm11,%xmm10 175 movdqa %xmm10,32(%rsp) 176.byte 102,15,56,220,225 177.byte 102,15,56,220,233 178 movups 144-120(%rsi),%xmm1 179 180 cmpl $11,%eax 181 182.byte 102,15,56,220,208 183.byte 102,15,56,220,216 184.byte 102,15,56,220,224 185.byte 102,15,56,220,232 186 movups 160-120(%rsi),%xmm0 187 188 jb .Lenc4x_tail 189 190.byte 102,15,56,220,209 191.byte 102,15,56,220,217 192.byte 102,15,56,220,225 193.byte 102,15,56,220,233 194 movups 176-120(%rsi),%xmm1 195 196.byte 102,15,56,220,208 197.byte 102,15,56,220,216 198.byte 102,15,56,220,224 199.byte 102,15,56,220,232 200 movups 192-120(%rsi),%xmm0 201 202 je .Lenc4x_tail 203 204.byte 102,15,56,220,209 205.byte 102,15,56,220,217 206.byte 102,15,56,220,225 207.byte 102,15,56,220,233 208 movups 208-120(%rsi),%xmm1 209 210.byte 102,15,56,220,208 211.byte 102,15,56,220,216 212.byte 102,15,56,220,224 213.byte 102,15,56,220,232 214 movups 224-120(%rsi),%xmm0 215 jmp .Lenc4x_tail 216 217.align 32 218.Lenc4x_tail: 219.byte 102,15,56,220,209 220.byte 102,15,56,220,217 221.byte 102,15,56,220,225 222.byte 102,15,56,220,233 223 movdqu (%r8,%rbx,1),%xmm6 224 movdqu 16-120(%rsi),%xmm1 225 226.byte 102,15,56,221,208 227 movdqu (%r9,%rbx,1),%xmm7 228 pxor %xmm12,%xmm6 229.byte 102,15,56,221,216 230 movdqu (%r10,%rbx,1),%xmm8 231 pxor %xmm12,%xmm7 232.byte 102,15,56,221,224 233 movdqu (%r11,%rbx,1),%xmm9 234 pxor %xmm12,%xmm8 235.byte 102,15,56,221,232 236 movdqu 32-120(%rsi),%xmm0 237 pxor %xmm12,%xmm9 238 239 movups %xmm2,-16(%r12,%rbx,1) 240 pxor %xmm6,%xmm2 241 movups %xmm3,-16(%r13,%rbx,1) 242 pxor %xmm7,%xmm3 243 movups %xmm4,-16(%r14,%rbx,1) 244 pxor %xmm8,%xmm4 245 movups %xmm5,-16(%r15,%rbx,1) 246 pxor %xmm9,%xmm5 247 248 decl %edx 249 jnz .Loop_enc4x 250 251 movq 16(%rsp),%rax 252.cfi_def_cfa %rax,8 253 movl 24(%rsp),%edx 254 255 256 257 258 259 260 261 262 263 264 leaq 160(%rdi),%rdi 265 decl %edx 266 jnz .Lenc4x_loop_grande 267 268.Lenc4x_done: 269 movq -48(%rax),%r15 270.cfi_restore %r15 271 movq -40(%rax),%r14 272.cfi_restore %r14 273 movq -32(%rax),%r13 274.cfi_restore %r13 275 movq -24(%rax),%r12 276.cfi_restore %r12 277 movq -16(%rax),%rbp 278.cfi_restore %rbp 279 movq -8(%rax),%rbx 280.cfi_restore %rbx 281 leaq (%rax),%rsp 282.cfi_def_cfa_register %rsp 283.Lenc4x_epilogue: 284 .byte 0xf3,0xc3 285.cfi_endproc 286.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt 287 288.globl aesni_multi_cbc_decrypt 289.type aesni_multi_cbc_decrypt,@function 290.align 32 291aesni_multi_cbc_decrypt: 292.cfi_startproc 293 cmpl $2,%edx 294 jb .Ldec_non_avx 295 movl OPENSSL_ia32cap_P+4(%rip),%ecx 296 testl $268435456,%ecx 297 jnz _avx_cbc_dec_shortcut 298 jmp .Ldec_non_avx 299.align 16 300.Ldec_non_avx: 301 movq %rsp,%rax 302.cfi_def_cfa_register %rax 303 pushq %rbx 304.cfi_offset %rbx,-16 305 pushq %rbp 306.cfi_offset %rbp,-24 307 pushq %r12 308.cfi_offset %r12,-32 309 pushq %r13 310.cfi_offset %r13,-40 311 pushq %r14 312.cfi_offset %r14,-48 313 pushq %r15 314.cfi_offset %r15,-56 315 316 317 318 319 320 321 subq $48,%rsp 322 andq $-64,%rsp 323 movq %rax,16(%rsp) 324.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 325 326.Ldec4x_body: 327 movdqu (%rsi),%xmm12 328 leaq 120(%rsi),%rsi 329 leaq 80(%rdi),%rdi 330 331.Ldec4x_loop_grande: 332 movl %edx,24(%rsp) 333 xorl %edx,%edx 334 movl -64(%rdi),%ecx 335 movq -80(%rdi),%r8 336 cmpl %edx,%ecx 337 movq -72(%rdi),%r12 338 cmovgl %ecx,%edx 339 testl %ecx,%ecx 340 movdqu -56(%rdi),%xmm6 341 movl %ecx,32(%rsp) 342 cmovleq %rsp,%r8 343 movl -24(%rdi),%ecx 344 movq -40(%rdi),%r9 345 cmpl %edx,%ecx 346 movq -32(%rdi),%r13 347 cmovgl %ecx,%edx 348 testl %ecx,%ecx 349 movdqu -16(%rdi),%xmm7 350 movl %ecx,36(%rsp) 351 cmovleq %rsp,%r9 352 movl 16(%rdi),%ecx 353 movq 0(%rdi),%r10 354 cmpl %edx,%ecx 355 movq 8(%rdi),%r14 356 cmovgl %ecx,%edx 357 testl %ecx,%ecx 358 movdqu 24(%rdi),%xmm8 359 movl %ecx,40(%rsp) 360 cmovleq %rsp,%r10 361 movl 56(%rdi),%ecx 362 movq 40(%rdi),%r11 363 cmpl %edx,%ecx 364 movq 48(%rdi),%r15 365 cmovgl %ecx,%edx 366 testl %ecx,%ecx 367 movdqu 64(%rdi),%xmm9 368 movl %ecx,44(%rsp) 369 cmovleq %rsp,%r11 370 testl %edx,%edx 371 jz .Ldec4x_done 372 373 movups 16-120(%rsi),%xmm1 374 movups 32-120(%rsi),%xmm0 375 movl 240-120(%rsi),%eax 376 movdqu (%r8),%xmm2 377 movdqu (%r9),%xmm3 378 pxor %xmm12,%xmm2 379 movdqu (%r10),%xmm4 380 pxor %xmm12,%xmm3 381 movdqu (%r11),%xmm5 382 pxor %xmm12,%xmm4 383 pxor %xmm12,%xmm5 384 movdqa 32(%rsp),%xmm10 385 xorq %rbx,%rbx 386 jmp .Loop_dec4x 387 388.align 32 389.Loop_dec4x: 390 addq $16,%rbx 391 leaq 16(%rsp),%rbp 392 movl $1,%ecx 393 subq %rbx,%rbp 394 395.byte 102,15,56,222,209 396 prefetcht0 31(%r8,%rbx,1) 397 prefetcht0 31(%r9,%rbx,1) 398.byte 102,15,56,222,217 399 prefetcht0 31(%r10,%rbx,1) 400 prefetcht0 31(%r11,%rbx,1) 401.byte 102,15,56,222,225 402.byte 102,15,56,222,233 403 movups 48-120(%rsi),%xmm1 404 cmpl 32(%rsp),%ecx 405.byte 102,15,56,222,208 406.byte 102,15,56,222,216 407.byte 102,15,56,222,224 408 cmovgeq %rbp,%r8 409 cmovgq %rbp,%r12 410.byte 102,15,56,222,232 411 movups -56(%rsi),%xmm0 412 cmpl 36(%rsp),%ecx 413.byte 102,15,56,222,209 414.byte 102,15,56,222,217 415.byte 102,15,56,222,225 416 cmovgeq %rbp,%r9 417 cmovgq %rbp,%r13 418.byte 102,15,56,222,233 419 movups -40(%rsi),%xmm1 420 cmpl 40(%rsp),%ecx 421.byte 102,15,56,222,208 422.byte 102,15,56,222,216 423.byte 102,15,56,222,224 424 cmovgeq %rbp,%r10 425 cmovgq %rbp,%r14 426.byte 102,15,56,222,232 427 movups -24(%rsi),%xmm0 428 cmpl 44(%rsp),%ecx 429.byte 102,15,56,222,209 430.byte 102,15,56,222,217 431.byte 102,15,56,222,225 432 cmovgeq %rbp,%r11 433 cmovgq %rbp,%r15 434.byte 102,15,56,222,233 435 movups -8(%rsi),%xmm1 436 movdqa %xmm10,%xmm11 437.byte 102,15,56,222,208 438 prefetcht0 15(%r12,%rbx,1) 439 prefetcht0 15(%r13,%rbx,1) 440.byte 102,15,56,222,216 441 prefetcht0 15(%r14,%rbx,1) 442 prefetcht0 15(%r15,%rbx,1) 443.byte 102,15,56,222,224 444.byte 102,15,56,222,232 445 movups 128-120(%rsi),%xmm0 446 pxor %xmm12,%xmm12 447 448.byte 102,15,56,222,209 449 pcmpgtd %xmm12,%xmm11 450 movdqu -120(%rsi),%xmm12 451.byte 102,15,56,222,217 452 paddd %xmm11,%xmm10 453 movdqa %xmm10,32(%rsp) 454.byte 102,15,56,222,225 455.byte 102,15,56,222,233 456 movups 144-120(%rsi),%xmm1 457 458 cmpl $11,%eax 459 460.byte 102,15,56,222,208 461.byte 102,15,56,222,216 462.byte 102,15,56,222,224 463.byte 102,15,56,222,232 464 movups 160-120(%rsi),%xmm0 465 466 jb .Ldec4x_tail 467 468.byte 102,15,56,222,209 469.byte 102,15,56,222,217 470.byte 102,15,56,222,225 471.byte 102,15,56,222,233 472 movups 176-120(%rsi),%xmm1 473 474.byte 102,15,56,222,208 475.byte 102,15,56,222,216 476.byte 102,15,56,222,224 477.byte 102,15,56,222,232 478 movups 192-120(%rsi),%xmm0 479 480 je .Ldec4x_tail 481 482.byte 102,15,56,222,209 483.byte 102,15,56,222,217 484.byte 102,15,56,222,225 485.byte 102,15,56,222,233 486 movups 208-120(%rsi),%xmm1 487 488.byte 102,15,56,222,208 489.byte 102,15,56,222,216 490.byte 102,15,56,222,224 491.byte 102,15,56,222,232 492 movups 224-120(%rsi),%xmm0 493 jmp .Ldec4x_tail 494 495.align 32 496.Ldec4x_tail: 497.byte 102,15,56,222,209 498.byte 102,15,56,222,217 499.byte 102,15,56,222,225 500 pxor %xmm0,%xmm6 501 pxor %xmm0,%xmm7 502.byte 102,15,56,222,233 503 movdqu 16-120(%rsi),%xmm1 504 pxor %xmm0,%xmm8 505 pxor %xmm0,%xmm9 506 movdqu 32-120(%rsi),%xmm0 507 508.byte 102,15,56,223,214 509.byte 102,15,56,223,223 510 movdqu -16(%r8,%rbx,1),%xmm6 511 movdqu -16(%r9,%rbx,1),%xmm7 512.byte 102,65,15,56,223,224 513.byte 102,65,15,56,223,233 514 movdqu -16(%r10,%rbx,1),%xmm8 515 movdqu -16(%r11,%rbx,1),%xmm9 516 517 movups %xmm2,-16(%r12,%rbx,1) 518 movdqu (%r8,%rbx,1),%xmm2 519 movups %xmm3,-16(%r13,%rbx,1) 520 movdqu (%r9,%rbx,1),%xmm3 521 pxor %xmm12,%xmm2 522 movups %xmm4,-16(%r14,%rbx,1) 523 movdqu (%r10,%rbx,1),%xmm4 524 pxor %xmm12,%xmm3 525 movups %xmm5,-16(%r15,%rbx,1) 526 movdqu (%r11,%rbx,1),%xmm5 527 pxor %xmm12,%xmm4 528 pxor %xmm12,%xmm5 529 530 decl %edx 531 jnz .Loop_dec4x 532 533 movq 16(%rsp),%rax 534.cfi_def_cfa %rax,8 535 movl 24(%rsp),%edx 536 537 leaq 160(%rdi),%rdi 538 decl %edx 539 jnz .Ldec4x_loop_grande 540 541.Ldec4x_done: 542 movq -48(%rax),%r15 543.cfi_restore %r15 544 movq -40(%rax),%r14 545.cfi_restore %r14 546 movq -32(%rax),%r13 547.cfi_restore %r13 548 movq -24(%rax),%r12 549.cfi_restore %r12 550 movq -16(%rax),%rbp 551.cfi_restore %rbp 552 movq -8(%rax),%rbx 553.cfi_restore %rbx 554 leaq (%rax),%rsp 555.cfi_def_cfa_register %rsp 556.Ldec4x_epilogue: 557 .byte 0xf3,0xc3 558.cfi_endproc 559.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt 560.type aesni_multi_cbc_encrypt_avx,@function 561.align 32 562aesni_multi_cbc_encrypt_avx: 563.cfi_startproc 564_avx_cbc_enc_shortcut: 565 movq %rsp,%rax 566.cfi_def_cfa_register %rax 567 pushq %rbx 568.cfi_offset %rbx,-16 569 pushq %rbp 570.cfi_offset %rbp,-24 571 pushq %r12 572.cfi_offset %r12,-32 573 pushq %r13 574.cfi_offset %r13,-40 575 pushq %r14 576.cfi_offset %r14,-48 577 pushq %r15 578.cfi_offset %r15,-56 579 580 581 582 583 584 585 586 587 subq $192,%rsp 588 andq $-128,%rsp 589 movq %rax,16(%rsp) 590.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 591 592.Lenc8x_body: 593 vzeroupper 594 vmovdqu (%rsi),%xmm15 595 leaq 120(%rsi),%rsi 596 leaq 160(%rdi),%rdi 597 shrl $1,%edx 598 599.Lenc8x_loop_grande: 600 601 xorl %edx,%edx 602 movl -144(%rdi),%ecx 603 movq -160(%rdi),%r8 604 cmpl %edx,%ecx 605 movq -152(%rdi),%rbx 606 cmovgl %ecx,%edx 607 testl %ecx,%ecx 608 vmovdqu -136(%rdi),%xmm2 609 movl %ecx,32(%rsp) 610 cmovleq %rsp,%r8 611 subq %r8,%rbx 612 movq %rbx,64(%rsp) 613 movl -104(%rdi),%ecx 614 movq -120(%rdi),%r9 615 cmpl %edx,%ecx 616 movq -112(%rdi),%rbp 617 cmovgl %ecx,%edx 618 testl %ecx,%ecx 619 vmovdqu -96(%rdi),%xmm3 620 movl %ecx,36(%rsp) 621 cmovleq %rsp,%r9 622 subq %r9,%rbp 623 movq %rbp,72(%rsp) 624 movl -64(%rdi),%ecx 625 movq -80(%rdi),%r10 626 cmpl %edx,%ecx 627 movq -72(%rdi),%rbp 628 cmovgl %ecx,%edx 629 testl %ecx,%ecx 630 vmovdqu -56(%rdi),%xmm4 631 movl %ecx,40(%rsp) 632 cmovleq %rsp,%r10 633 subq %r10,%rbp 634 movq %rbp,80(%rsp) 635 movl -24(%rdi),%ecx 636 movq -40(%rdi),%r11 637 cmpl %edx,%ecx 638 movq -32(%rdi),%rbp 639 cmovgl %ecx,%edx 640 testl %ecx,%ecx 641 vmovdqu -16(%rdi),%xmm5 642 movl %ecx,44(%rsp) 643 cmovleq %rsp,%r11 644 subq %r11,%rbp 645 movq %rbp,88(%rsp) 646 movl 16(%rdi),%ecx 647 movq 0(%rdi),%r12 648 cmpl %edx,%ecx 649 movq 8(%rdi),%rbp 650 cmovgl %ecx,%edx 651 testl %ecx,%ecx 652 vmovdqu 24(%rdi),%xmm6 653 movl %ecx,48(%rsp) 654 cmovleq %rsp,%r12 655 subq %r12,%rbp 656 movq %rbp,96(%rsp) 657 movl 56(%rdi),%ecx 658 movq 40(%rdi),%r13 659 cmpl %edx,%ecx 660 movq 48(%rdi),%rbp 661 cmovgl %ecx,%edx 662 testl %ecx,%ecx 663 vmovdqu 64(%rdi),%xmm7 664 movl %ecx,52(%rsp) 665 cmovleq %rsp,%r13 666 subq %r13,%rbp 667 movq %rbp,104(%rsp) 668 movl 96(%rdi),%ecx 669 movq 80(%rdi),%r14 670 cmpl %edx,%ecx 671 movq 88(%rdi),%rbp 672 cmovgl %ecx,%edx 673 testl %ecx,%ecx 674 vmovdqu 104(%rdi),%xmm8 675 movl %ecx,56(%rsp) 676 cmovleq %rsp,%r14 677 subq %r14,%rbp 678 movq %rbp,112(%rsp) 679 movl 136(%rdi),%ecx 680 movq 120(%rdi),%r15 681 cmpl %edx,%ecx 682 movq 128(%rdi),%rbp 683 cmovgl %ecx,%edx 684 testl %ecx,%ecx 685 vmovdqu 144(%rdi),%xmm9 686 movl %ecx,60(%rsp) 687 cmovleq %rsp,%r15 688 subq %r15,%rbp 689 movq %rbp,120(%rsp) 690 testl %edx,%edx 691 jz .Lenc8x_done 692 693 vmovups 16-120(%rsi),%xmm1 694 vmovups 32-120(%rsi),%xmm0 695 movl 240-120(%rsi),%eax 696 697 vpxor (%r8),%xmm15,%xmm10 698 leaq 128(%rsp),%rbp 699 vpxor (%r9),%xmm15,%xmm11 700 vpxor (%r10),%xmm15,%xmm12 701 vpxor (%r11),%xmm15,%xmm13 702 vpxor %xmm10,%xmm2,%xmm2 703 vpxor (%r12),%xmm15,%xmm10 704 vpxor %xmm11,%xmm3,%xmm3 705 vpxor (%r13),%xmm15,%xmm11 706 vpxor %xmm12,%xmm4,%xmm4 707 vpxor (%r14),%xmm15,%xmm12 708 vpxor %xmm13,%xmm5,%xmm5 709 vpxor (%r15),%xmm15,%xmm13 710 vpxor %xmm10,%xmm6,%xmm6 711 movl $1,%ecx 712 vpxor %xmm11,%xmm7,%xmm7 713 vpxor %xmm12,%xmm8,%xmm8 714 vpxor %xmm13,%xmm9,%xmm9 715 jmp .Loop_enc8x 716 717.align 32 718.Loop_enc8x: 719 vaesenc %xmm1,%xmm2,%xmm2 720 cmpl 32+0(%rsp),%ecx 721 vaesenc %xmm1,%xmm3,%xmm3 722 prefetcht0 31(%r8) 723 vaesenc %xmm1,%xmm4,%xmm4 724 vaesenc %xmm1,%xmm5,%xmm5 725 leaq (%r8,%rbx,1),%rbx 726 cmovgeq %rsp,%r8 727 vaesenc %xmm1,%xmm6,%xmm6 728 cmovgq %rsp,%rbx 729 vaesenc %xmm1,%xmm7,%xmm7 730 subq %r8,%rbx 731 vaesenc %xmm1,%xmm8,%xmm8 732 vpxor 16(%r8),%xmm15,%xmm10 733 movq %rbx,64+0(%rsp) 734 vaesenc %xmm1,%xmm9,%xmm9 735 vmovups -72(%rsi),%xmm1 736 leaq 16(%r8,%rbx,1),%r8 737 vmovdqu %xmm10,0(%rbp) 738 vaesenc %xmm0,%xmm2,%xmm2 739 cmpl 32+4(%rsp),%ecx 740 movq 64+8(%rsp),%rbx 741 vaesenc %xmm0,%xmm3,%xmm3 742 prefetcht0 31(%r9) 743 vaesenc %xmm0,%xmm4,%xmm4 744 vaesenc %xmm0,%xmm5,%xmm5 745 leaq (%r9,%rbx,1),%rbx 746 cmovgeq %rsp,%r9 747 vaesenc %xmm0,%xmm6,%xmm6 748 cmovgq %rsp,%rbx 749 vaesenc %xmm0,%xmm7,%xmm7 750 subq %r9,%rbx 751 vaesenc %xmm0,%xmm8,%xmm8 752 vpxor 16(%r9),%xmm15,%xmm11 753 movq %rbx,64+8(%rsp) 754 vaesenc %xmm0,%xmm9,%xmm9 755 vmovups -56(%rsi),%xmm0 756 leaq 16(%r9,%rbx,1),%r9 757 vmovdqu %xmm11,16(%rbp) 758 vaesenc %xmm1,%xmm2,%xmm2 759 cmpl 32+8(%rsp),%ecx 760 movq 64+16(%rsp),%rbx 761 vaesenc %xmm1,%xmm3,%xmm3 762 prefetcht0 31(%r10) 763 vaesenc %xmm1,%xmm4,%xmm4 764 prefetcht0 15(%r8) 765 vaesenc %xmm1,%xmm5,%xmm5 766 leaq (%r10,%rbx,1),%rbx 767 cmovgeq %rsp,%r10 768 vaesenc %xmm1,%xmm6,%xmm6 769 cmovgq %rsp,%rbx 770 vaesenc %xmm1,%xmm7,%xmm7 771 subq %r10,%rbx 772 vaesenc %xmm1,%xmm8,%xmm8 773 vpxor 16(%r10),%xmm15,%xmm12 774 movq %rbx,64+16(%rsp) 775 vaesenc %xmm1,%xmm9,%xmm9 776 vmovups -40(%rsi),%xmm1 777 leaq 16(%r10,%rbx,1),%r10 778 vmovdqu %xmm12,32(%rbp) 779 vaesenc %xmm0,%xmm2,%xmm2 780 cmpl 32+12(%rsp),%ecx 781 movq 64+24(%rsp),%rbx 782 vaesenc %xmm0,%xmm3,%xmm3 783 prefetcht0 31(%r11) 784 vaesenc %xmm0,%xmm4,%xmm4 785 prefetcht0 15(%r9) 786 vaesenc %xmm0,%xmm5,%xmm5 787 leaq (%r11,%rbx,1),%rbx 788 cmovgeq %rsp,%r11 789 vaesenc %xmm0,%xmm6,%xmm6 790 cmovgq %rsp,%rbx 791 vaesenc %xmm0,%xmm7,%xmm7 792 subq %r11,%rbx 793 vaesenc %xmm0,%xmm8,%xmm8 794 vpxor 16(%r11),%xmm15,%xmm13 795 movq %rbx,64+24(%rsp) 796 vaesenc %xmm0,%xmm9,%xmm9 797 vmovups -24(%rsi),%xmm0 798 leaq 16(%r11,%rbx,1),%r11 799 vmovdqu %xmm13,48(%rbp) 800 vaesenc %xmm1,%xmm2,%xmm2 801 cmpl 32+16(%rsp),%ecx 802 movq 64+32(%rsp),%rbx 803 vaesenc %xmm1,%xmm3,%xmm3 804 prefetcht0 31(%r12) 805 vaesenc %xmm1,%xmm4,%xmm4 806 prefetcht0 15(%r10) 807 vaesenc %xmm1,%xmm5,%xmm5 808 leaq (%r12,%rbx,1),%rbx 809 cmovgeq %rsp,%r12 810 vaesenc %xmm1,%xmm6,%xmm6 811 cmovgq %rsp,%rbx 812 vaesenc %xmm1,%xmm7,%xmm7 813 subq %r12,%rbx 814 vaesenc %xmm1,%xmm8,%xmm8 815 vpxor 16(%r12),%xmm15,%xmm10 816 movq %rbx,64+32(%rsp) 817 vaesenc %xmm1,%xmm9,%xmm9 818 vmovups -8(%rsi),%xmm1 819 leaq 16(%r12,%rbx,1),%r12 820 vaesenc %xmm0,%xmm2,%xmm2 821 cmpl 32+20(%rsp),%ecx 822 movq 64+40(%rsp),%rbx 823 vaesenc %xmm0,%xmm3,%xmm3 824 prefetcht0 31(%r13) 825 vaesenc %xmm0,%xmm4,%xmm4 826 prefetcht0 15(%r11) 827 vaesenc %xmm0,%xmm5,%xmm5 828 leaq (%rbx,%r13,1),%rbx 829 cmovgeq %rsp,%r13 830 vaesenc %xmm0,%xmm6,%xmm6 831 cmovgq %rsp,%rbx 832 vaesenc %xmm0,%xmm7,%xmm7 833 subq %r13,%rbx 834 vaesenc %xmm0,%xmm8,%xmm8 835 vpxor 16(%r13),%xmm15,%xmm11 836 movq %rbx,64+40(%rsp) 837 vaesenc %xmm0,%xmm9,%xmm9 838 vmovups 8(%rsi),%xmm0 839 leaq 16(%r13,%rbx,1),%r13 840 vaesenc %xmm1,%xmm2,%xmm2 841 cmpl 32+24(%rsp),%ecx 842 movq 64+48(%rsp),%rbx 843 vaesenc %xmm1,%xmm3,%xmm3 844 prefetcht0 31(%r14) 845 vaesenc %xmm1,%xmm4,%xmm4 846 prefetcht0 15(%r12) 847 vaesenc %xmm1,%xmm5,%xmm5 848 leaq (%r14,%rbx,1),%rbx 849 cmovgeq %rsp,%r14 850 vaesenc %xmm1,%xmm6,%xmm6 851 cmovgq %rsp,%rbx 852 vaesenc %xmm1,%xmm7,%xmm7 853 subq %r14,%rbx 854 vaesenc %xmm1,%xmm8,%xmm8 855 vpxor 16(%r14),%xmm15,%xmm12 856 movq %rbx,64+48(%rsp) 857 vaesenc %xmm1,%xmm9,%xmm9 858 vmovups 24(%rsi),%xmm1 859 leaq 16(%r14,%rbx,1),%r14 860 vaesenc %xmm0,%xmm2,%xmm2 861 cmpl 32+28(%rsp),%ecx 862 movq 64+56(%rsp),%rbx 863 vaesenc %xmm0,%xmm3,%xmm3 864 prefetcht0 31(%r15) 865 vaesenc %xmm0,%xmm4,%xmm4 866 prefetcht0 15(%r13) 867 vaesenc %xmm0,%xmm5,%xmm5 868 leaq (%r15,%rbx,1),%rbx 869 cmovgeq %rsp,%r15 870 vaesenc %xmm0,%xmm6,%xmm6 871 cmovgq %rsp,%rbx 872 vaesenc %xmm0,%xmm7,%xmm7 873 subq %r15,%rbx 874 vaesenc %xmm0,%xmm8,%xmm8 875 vpxor 16(%r15),%xmm15,%xmm13 876 movq %rbx,64+56(%rsp) 877 vaesenc %xmm0,%xmm9,%xmm9 878 vmovups 40(%rsi),%xmm0 879 leaq 16(%r15,%rbx,1),%r15 880 vmovdqu 32(%rsp),%xmm14 881 prefetcht0 15(%r14) 882 prefetcht0 15(%r15) 883 cmpl $11,%eax 884 jb .Lenc8x_tail 885 886 vaesenc %xmm1,%xmm2,%xmm2 887 vaesenc %xmm1,%xmm3,%xmm3 888 vaesenc %xmm1,%xmm4,%xmm4 889 vaesenc %xmm1,%xmm5,%xmm5 890 vaesenc %xmm1,%xmm6,%xmm6 891 vaesenc %xmm1,%xmm7,%xmm7 892 vaesenc %xmm1,%xmm8,%xmm8 893 vaesenc %xmm1,%xmm9,%xmm9 894 vmovups 176-120(%rsi),%xmm1 895 896 vaesenc %xmm0,%xmm2,%xmm2 897 vaesenc %xmm0,%xmm3,%xmm3 898 vaesenc %xmm0,%xmm4,%xmm4 899 vaesenc %xmm0,%xmm5,%xmm5 900 vaesenc %xmm0,%xmm6,%xmm6 901 vaesenc %xmm0,%xmm7,%xmm7 902 vaesenc %xmm0,%xmm8,%xmm8 903 vaesenc %xmm0,%xmm9,%xmm9 904 vmovups 192-120(%rsi),%xmm0 905 je .Lenc8x_tail 906 907 vaesenc %xmm1,%xmm2,%xmm2 908 vaesenc %xmm1,%xmm3,%xmm3 909 vaesenc %xmm1,%xmm4,%xmm4 910 vaesenc %xmm1,%xmm5,%xmm5 911 vaesenc %xmm1,%xmm6,%xmm6 912 vaesenc %xmm1,%xmm7,%xmm7 913 vaesenc %xmm1,%xmm8,%xmm8 914 vaesenc %xmm1,%xmm9,%xmm9 915 vmovups 208-120(%rsi),%xmm1 916 917 vaesenc %xmm0,%xmm2,%xmm2 918 vaesenc %xmm0,%xmm3,%xmm3 919 vaesenc %xmm0,%xmm4,%xmm4 920 vaesenc %xmm0,%xmm5,%xmm5 921 vaesenc %xmm0,%xmm6,%xmm6 922 vaesenc %xmm0,%xmm7,%xmm7 923 vaesenc %xmm0,%xmm8,%xmm8 924 vaesenc %xmm0,%xmm9,%xmm9 925 vmovups 224-120(%rsi),%xmm0 926 927.Lenc8x_tail: 928 vaesenc %xmm1,%xmm2,%xmm2 929 vpxor %xmm15,%xmm15,%xmm15 930 vaesenc %xmm1,%xmm3,%xmm3 931 vaesenc %xmm1,%xmm4,%xmm4 932 vpcmpgtd %xmm15,%xmm14,%xmm15 933 vaesenc %xmm1,%xmm5,%xmm5 934 vaesenc %xmm1,%xmm6,%xmm6 935 vpaddd %xmm14,%xmm15,%xmm15 936 vmovdqu 48(%rsp),%xmm14 937 vaesenc %xmm1,%xmm7,%xmm7 938 movq 64(%rsp),%rbx 939 vaesenc %xmm1,%xmm8,%xmm8 940 vaesenc %xmm1,%xmm9,%xmm9 941 vmovups 16-120(%rsi),%xmm1 942 943 vaesenclast %xmm0,%xmm2,%xmm2 944 vmovdqa %xmm15,32(%rsp) 945 vpxor %xmm15,%xmm15,%xmm15 946 vaesenclast %xmm0,%xmm3,%xmm3 947 vaesenclast %xmm0,%xmm4,%xmm4 948 vpcmpgtd %xmm15,%xmm14,%xmm15 949 vaesenclast %xmm0,%xmm5,%xmm5 950 vaesenclast %xmm0,%xmm6,%xmm6 951 vpaddd %xmm15,%xmm14,%xmm14 952 vmovdqu -120(%rsi),%xmm15 953 vaesenclast %xmm0,%xmm7,%xmm7 954 vaesenclast %xmm0,%xmm8,%xmm8 955 vmovdqa %xmm14,48(%rsp) 956 vaesenclast %xmm0,%xmm9,%xmm9 957 vmovups 32-120(%rsi),%xmm0 958 959 vmovups %xmm2,-16(%r8) 960 subq %rbx,%r8 961 vpxor 0(%rbp),%xmm2,%xmm2 962 vmovups %xmm3,-16(%r9) 963 subq 72(%rsp),%r9 964 vpxor 16(%rbp),%xmm3,%xmm3 965 vmovups %xmm4,-16(%r10) 966 subq 80(%rsp),%r10 967 vpxor 32(%rbp),%xmm4,%xmm4 968 vmovups %xmm5,-16(%r11) 969 subq 88(%rsp),%r11 970 vpxor 48(%rbp),%xmm5,%xmm5 971 vmovups %xmm6,-16(%r12) 972 subq 96(%rsp),%r12 973 vpxor %xmm10,%xmm6,%xmm6 974 vmovups %xmm7,-16(%r13) 975 subq 104(%rsp),%r13 976 vpxor %xmm11,%xmm7,%xmm7 977 vmovups %xmm8,-16(%r14) 978 subq 112(%rsp),%r14 979 vpxor %xmm12,%xmm8,%xmm8 980 vmovups %xmm9,-16(%r15) 981 subq 120(%rsp),%r15 982 vpxor %xmm13,%xmm9,%xmm9 983 984 decl %edx 985 jnz .Loop_enc8x 986 987 movq 16(%rsp),%rax 988.cfi_def_cfa %rax,8 989 990 991 992 993 994.Lenc8x_done: 995 vzeroupper 996 movq -48(%rax),%r15 997.cfi_restore %r15 998 movq -40(%rax),%r14 999.cfi_restore %r14 1000 movq -32(%rax),%r13 1001.cfi_restore %r13 1002 movq -24(%rax),%r12 1003.cfi_restore %r12 1004 movq -16(%rax),%rbp 1005.cfi_restore %rbp 1006 movq -8(%rax),%rbx 1007.cfi_restore %rbx 1008 leaq (%rax),%rsp 1009.cfi_def_cfa_register %rsp 1010.Lenc8x_epilogue: 1011 .byte 0xf3,0xc3 1012.cfi_endproc 1013.size aesni_multi_cbc_encrypt_avx,.-aesni_multi_cbc_encrypt_avx 1014 1015.type aesni_multi_cbc_decrypt_avx,@function 1016.align 32 1017aesni_multi_cbc_decrypt_avx: 1018.cfi_startproc 1019_avx_cbc_dec_shortcut: 1020 movq %rsp,%rax 1021.cfi_def_cfa_register %rax 1022 pushq %rbx 1023.cfi_offset %rbx,-16 1024 pushq %rbp 1025.cfi_offset %rbp,-24 1026 pushq %r12 1027.cfi_offset %r12,-32 1028 pushq %r13 1029.cfi_offset %r13,-40 1030 pushq %r14 1031.cfi_offset %r14,-48 1032 pushq %r15 1033.cfi_offset %r15,-56 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 subq $256,%rsp 1044 andq $-256,%rsp 1045 subq $192,%rsp 1046 movq %rax,16(%rsp) 1047.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08 1048 1049.Ldec8x_body: 1050 vzeroupper 1051 vmovdqu (%rsi),%xmm15 1052 leaq 120(%rsi),%rsi 1053 leaq 160(%rdi),%rdi 1054 shrl $1,%edx 1055 1056.Ldec8x_loop_grande: 1057 1058 xorl %edx,%edx 1059 movl -144(%rdi),%ecx 1060 movq -160(%rdi),%r8 1061 cmpl %edx,%ecx 1062 movq -152(%rdi),%rbx 1063 cmovgl %ecx,%edx 1064 testl %ecx,%ecx 1065 vmovdqu -136(%rdi),%xmm2 1066 movl %ecx,32(%rsp) 1067 cmovleq %rsp,%r8 1068 subq %r8,%rbx 1069 movq %rbx,64(%rsp) 1070 vmovdqu %xmm2,192(%rsp) 1071 movl -104(%rdi),%ecx 1072 movq -120(%rdi),%r9 1073 cmpl %edx,%ecx 1074 movq -112(%rdi),%rbp 1075 cmovgl %ecx,%edx 1076 testl %ecx,%ecx 1077 vmovdqu -96(%rdi),%xmm3 1078 movl %ecx,36(%rsp) 1079 cmovleq %rsp,%r9 1080 subq %r9,%rbp 1081 movq %rbp,72(%rsp) 1082 vmovdqu %xmm3,208(%rsp) 1083 movl -64(%rdi),%ecx 1084 movq -80(%rdi),%r10 1085 cmpl %edx,%ecx 1086 movq -72(%rdi),%rbp 1087 cmovgl %ecx,%edx 1088 testl %ecx,%ecx 1089 vmovdqu -56(%rdi),%xmm4 1090 movl %ecx,40(%rsp) 1091 cmovleq %rsp,%r10 1092 subq %r10,%rbp 1093 movq %rbp,80(%rsp) 1094 vmovdqu %xmm4,224(%rsp) 1095 movl -24(%rdi),%ecx 1096 movq -40(%rdi),%r11 1097 cmpl %edx,%ecx 1098 movq -32(%rdi),%rbp 1099 cmovgl %ecx,%edx 1100 testl %ecx,%ecx 1101 vmovdqu -16(%rdi),%xmm5 1102 movl %ecx,44(%rsp) 1103 cmovleq %rsp,%r11 1104 subq %r11,%rbp 1105 movq %rbp,88(%rsp) 1106 vmovdqu %xmm5,240(%rsp) 1107 movl 16(%rdi),%ecx 1108 movq 0(%rdi),%r12 1109 cmpl %edx,%ecx 1110 movq 8(%rdi),%rbp 1111 cmovgl %ecx,%edx 1112 testl %ecx,%ecx 1113 vmovdqu 24(%rdi),%xmm6 1114 movl %ecx,48(%rsp) 1115 cmovleq %rsp,%r12 1116 subq %r12,%rbp 1117 movq %rbp,96(%rsp) 1118 vmovdqu %xmm6,256(%rsp) 1119 movl 56(%rdi),%ecx 1120 movq 40(%rdi),%r13 1121 cmpl %edx,%ecx 1122 movq 48(%rdi),%rbp 1123 cmovgl %ecx,%edx 1124 testl %ecx,%ecx 1125 vmovdqu 64(%rdi),%xmm7 1126 movl %ecx,52(%rsp) 1127 cmovleq %rsp,%r13 1128 subq %r13,%rbp 1129 movq %rbp,104(%rsp) 1130 vmovdqu %xmm7,272(%rsp) 1131 movl 96(%rdi),%ecx 1132 movq 80(%rdi),%r14 1133 cmpl %edx,%ecx 1134 movq 88(%rdi),%rbp 1135 cmovgl %ecx,%edx 1136 testl %ecx,%ecx 1137 vmovdqu 104(%rdi),%xmm8 1138 movl %ecx,56(%rsp) 1139 cmovleq %rsp,%r14 1140 subq %r14,%rbp 1141 movq %rbp,112(%rsp) 1142 vmovdqu %xmm8,288(%rsp) 1143 movl 136(%rdi),%ecx 1144 movq 120(%rdi),%r15 1145 cmpl %edx,%ecx 1146 movq 128(%rdi),%rbp 1147 cmovgl %ecx,%edx 1148 testl %ecx,%ecx 1149 vmovdqu 144(%rdi),%xmm9 1150 movl %ecx,60(%rsp) 1151 cmovleq %rsp,%r15 1152 subq %r15,%rbp 1153 movq %rbp,120(%rsp) 1154 vmovdqu %xmm9,304(%rsp) 1155 testl %edx,%edx 1156 jz .Ldec8x_done 1157 1158 vmovups 16-120(%rsi),%xmm1 1159 vmovups 32-120(%rsi),%xmm0 1160 movl 240-120(%rsi),%eax 1161 leaq 192+128(%rsp),%rbp 1162 1163 vmovdqu (%r8),%xmm2 1164 vmovdqu (%r9),%xmm3 1165 vmovdqu (%r10),%xmm4 1166 vmovdqu (%r11),%xmm5 1167 vmovdqu (%r12),%xmm6 1168 vmovdqu (%r13),%xmm7 1169 vmovdqu (%r14),%xmm8 1170 vmovdqu (%r15),%xmm9 1171 vmovdqu %xmm2,0(%rbp) 1172 vpxor %xmm15,%xmm2,%xmm2 1173 vmovdqu %xmm3,16(%rbp) 1174 vpxor %xmm15,%xmm3,%xmm3 1175 vmovdqu %xmm4,32(%rbp) 1176 vpxor %xmm15,%xmm4,%xmm4 1177 vmovdqu %xmm5,48(%rbp) 1178 vpxor %xmm15,%xmm5,%xmm5 1179 vmovdqu %xmm6,64(%rbp) 1180 vpxor %xmm15,%xmm6,%xmm6 1181 vmovdqu %xmm7,80(%rbp) 1182 vpxor %xmm15,%xmm7,%xmm7 1183 vmovdqu %xmm8,96(%rbp) 1184 vpxor %xmm15,%xmm8,%xmm8 1185 vmovdqu %xmm9,112(%rbp) 1186 vpxor %xmm15,%xmm9,%xmm9 1187 xorq $0x80,%rbp 1188 movl $1,%ecx 1189 jmp .Loop_dec8x 1190 1191.align 32 1192.Loop_dec8x: 1193 vaesdec %xmm1,%xmm2,%xmm2 1194 cmpl 32+0(%rsp),%ecx 1195 vaesdec %xmm1,%xmm3,%xmm3 1196 prefetcht0 31(%r8) 1197 vaesdec %xmm1,%xmm4,%xmm4 1198 vaesdec %xmm1,%xmm5,%xmm5 1199 leaq (%r8,%rbx,1),%rbx 1200 cmovgeq %rsp,%r8 1201 vaesdec %xmm1,%xmm6,%xmm6 1202 cmovgq %rsp,%rbx 1203 vaesdec %xmm1,%xmm7,%xmm7 1204 subq %r8,%rbx 1205 vaesdec %xmm1,%xmm8,%xmm8 1206 vmovdqu 16(%r8),%xmm10 1207 movq %rbx,64+0(%rsp) 1208 vaesdec %xmm1,%xmm9,%xmm9 1209 vmovups -72(%rsi),%xmm1 1210 leaq 16(%r8,%rbx,1),%r8 1211 vmovdqu %xmm10,128(%rsp) 1212 vaesdec %xmm0,%xmm2,%xmm2 1213 cmpl 32+4(%rsp),%ecx 1214 movq 64+8(%rsp),%rbx 1215 vaesdec %xmm0,%xmm3,%xmm3 1216 prefetcht0 31(%r9) 1217 vaesdec %xmm0,%xmm4,%xmm4 1218 vaesdec %xmm0,%xmm5,%xmm5 1219 leaq (%r9,%rbx,1),%rbx 1220 cmovgeq %rsp,%r9 1221 vaesdec %xmm0,%xmm6,%xmm6 1222 cmovgq %rsp,%rbx 1223 vaesdec %xmm0,%xmm7,%xmm7 1224 subq %r9,%rbx 1225 vaesdec %xmm0,%xmm8,%xmm8 1226 vmovdqu 16(%r9),%xmm11 1227 movq %rbx,64+8(%rsp) 1228 vaesdec %xmm0,%xmm9,%xmm9 1229 vmovups -56(%rsi),%xmm0 1230 leaq 16(%r9,%rbx,1),%r9 1231 vmovdqu %xmm11,144(%rsp) 1232 vaesdec %xmm1,%xmm2,%xmm2 1233 cmpl 32+8(%rsp),%ecx 1234 movq 64+16(%rsp),%rbx 1235 vaesdec %xmm1,%xmm3,%xmm3 1236 prefetcht0 31(%r10) 1237 vaesdec %xmm1,%xmm4,%xmm4 1238 prefetcht0 15(%r8) 1239 vaesdec %xmm1,%xmm5,%xmm5 1240 leaq (%r10,%rbx,1),%rbx 1241 cmovgeq %rsp,%r10 1242 vaesdec %xmm1,%xmm6,%xmm6 1243 cmovgq %rsp,%rbx 1244 vaesdec %xmm1,%xmm7,%xmm7 1245 subq %r10,%rbx 1246 vaesdec %xmm1,%xmm8,%xmm8 1247 vmovdqu 16(%r10),%xmm12 1248 movq %rbx,64+16(%rsp) 1249 vaesdec %xmm1,%xmm9,%xmm9 1250 vmovups -40(%rsi),%xmm1 1251 leaq 16(%r10,%rbx,1),%r10 1252 vmovdqu %xmm12,160(%rsp) 1253 vaesdec %xmm0,%xmm2,%xmm2 1254 cmpl 32+12(%rsp),%ecx 1255 movq 64+24(%rsp),%rbx 1256 vaesdec %xmm0,%xmm3,%xmm3 1257 prefetcht0 31(%r11) 1258 vaesdec %xmm0,%xmm4,%xmm4 1259 prefetcht0 15(%r9) 1260 vaesdec %xmm0,%xmm5,%xmm5 1261 leaq (%r11,%rbx,1),%rbx 1262 cmovgeq %rsp,%r11 1263 vaesdec %xmm0,%xmm6,%xmm6 1264 cmovgq %rsp,%rbx 1265 vaesdec %xmm0,%xmm7,%xmm7 1266 subq %r11,%rbx 1267 vaesdec %xmm0,%xmm8,%xmm8 1268 vmovdqu 16(%r11),%xmm13 1269 movq %rbx,64+24(%rsp) 1270 vaesdec %xmm0,%xmm9,%xmm9 1271 vmovups -24(%rsi),%xmm0 1272 leaq 16(%r11,%rbx,1),%r11 1273 vmovdqu %xmm13,176(%rsp) 1274 vaesdec %xmm1,%xmm2,%xmm2 1275 cmpl 32+16(%rsp),%ecx 1276 movq 64+32(%rsp),%rbx 1277 vaesdec %xmm1,%xmm3,%xmm3 1278 prefetcht0 31(%r12) 1279 vaesdec %xmm1,%xmm4,%xmm4 1280 prefetcht0 15(%r10) 1281 vaesdec %xmm1,%xmm5,%xmm5 1282 leaq (%r12,%rbx,1),%rbx 1283 cmovgeq %rsp,%r12 1284 vaesdec %xmm1,%xmm6,%xmm6 1285 cmovgq %rsp,%rbx 1286 vaesdec %xmm1,%xmm7,%xmm7 1287 subq %r12,%rbx 1288 vaesdec %xmm1,%xmm8,%xmm8 1289 vmovdqu 16(%r12),%xmm10 1290 movq %rbx,64+32(%rsp) 1291 vaesdec %xmm1,%xmm9,%xmm9 1292 vmovups -8(%rsi),%xmm1 1293 leaq 16(%r12,%rbx,1),%r12 1294 vaesdec %xmm0,%xmm2,%xmm2 1295 cmpl 32+20(%rsp),%ecx 1296 movq 64+40(%rsp),%rbx 1297 vaesdec %xmm0,%xmm3,%xmm3 1298 prefetcht0 31(%r13) 1299 vaesdec %xmm0,%xmm4,%xmm4 1300 prefetcht0 15(%r11) 1301 vaesdec %xmm0,%xmm5,%xmm5 1302 leaq (%rbx,%r13,1),%rbx 1303 cmovgeq %rsp,%r13 1304 vaesdec %xmm0,%xmm6,%xmm6 1305 cmovgq %rsp,%rbx 1306 vaesdec %xmm0,%xmm7,%xmm7 1307 subq %r13,%rbx 1308 vaesdec %xmm0,%xmm8,%xmm8 1309 vmovdqu 16(%r13),%xmm11 1310 movq %rbx,64+40(%rsp) 1311 vaesdec %xmm0,%xmm9,%xmm9 1312 vmovups 8(%rsi),%xmm0 1313 leaq 16(%r13,%rbx,1),%r13 1314 vaesdec %xmm1,%xmm2,%xmm2 1315 cmpl 32+24(%rsp),%ecx 1316 movq 64+48(%rsp),%rbx 1317 vaesdec %xmm1,%xmm3,%xmm3 1318 prefetcht0 31(%r14) 1319 vaesdec %xmm1,%xmm4,%xmm4 1320 prefetcht0 15(%r12) 1321 vaesdec %xmm1,%xmm5,%xmm5 1322 leaq (%r14,%rbx,1),%rbx 1323 cmovgeq %rsp,%r14 1324 vaesdec %xmm1,%xmm6,%xmm6 1325 cmovgq %rsp,%rbx 1326 vaesdec %xmm1,%xmm7,%xmm7 1327 subq %r14,%rbx 1328 vaesdec %xmm1,%xmm8,%xmm8 1329 vmovdqu 16(%r14),%xmm12 1330 movq %rbx,64+48(%rsp) 1331 vaesdec %xmm1,%xmm9,%xmm9 1332 vmovups 24(%rsi),%xmm1 1333 leaq 16(%r14,%rbx,1),%r14 1334 vaesdec %xmm0,%xmm2,%xmm2 1335 cmpl 32+28(%rsp),%ecx 1336 movq 64+56(%rsp),%rbx 1337 vaesdec %xmm0,%xmm3,%xmm3 1338 prefetcht0 31(%r15) 1339 vaesdec %xmm0,%xmm4,%xmm4 1340 prefetcht0 15(%r13) 1341 vaesdec %xmm0,%xmm5,%xmm5 1342 leaq (%r15,%rbx,1),%rbx 1343 cmovgeq %rsp,%r15 1344 vaesdec %xmm0,%xmm6,%xmm6 1345 cmovgq %rsp,%rbx 1346 vaesdec %xmm0,%xmm7,%xmm7 1347 subq %r15,%rbx 1348 vaesdec %xmm0,%xmm8,%xmm8 1349 vmovdqu 16(%r15),%xmm13 1350 movq %rbx,64+56(%rsp) 1351 vaesdec %xmm0,%xmm9,%xmm9 1352 vmovups 40(%rsi),%xmm0 1353 leaq 16(%r15,%rbx,1),%r15 1354 vmovdqu 32(%rsp),%xmm14 1355 prefetcht0 15(%r14) 1356 prefetcht0 15(%r15) 1357 cmpl $11,%eax 1358 jb .Ldec8x_tail 1359 1360 vaesdec %xmm1,%xmm2,%xmm2 1361 vaesdec %xmm1,%xmm3,%xmm3 1362 vaesdec %xmm1,%xmm4,%xmm4 1363 vaesdec %xmm1,%xmm5,%xmm5 1364 vaesdec %xmm1,%xmm6,%xmm6 1365 vaesdec %xmm1,%xmm7,%xmm7 1366 vaesdec %xmm1,%xmm8,%xmm8 1367 vaesdec %xmm1,%xmm9,%xmm9 1368 vmovups 176-120(%rsi),%xmm1 1369 1370 vaesdec %xmm0,%xmm2,%xmm2 1371 vaesdec %xmm0,%xmm3,%xmm3 1372 vaesdec %xmm0,%xmm4,%xmm4 1373 vaesdec %xmm0,%xmm5,%xmm5 1374 vaesdec %xmm0,%xmm6,%xmm6 1375 vaesdec %xmm0,%xmm7,%xmm7 1376 vaesdec %xmm0,%xmm8,%xmm8 1377 vaesdec %xmm0,%xmm9,%xmm9 1378 vmovups 192-120(%rsi),%xmm0 1379 je .Ldec8x_tail 1380 1381 vaesdec %xmm1,%xmm2,%xmm2 1382 vaesdec %xmm1,%xmm3,%xmm3 1383 vaesdec %xmm1,%xmm4,%xmm4 1384 vaesdec %xmm1,%xmm5,%xmm5 1385 vaesdec %xmm1,%xmm6,%xmm6 1386 vaesdec %xmm1,%xmm7,%xmm7 1387 vaesdec %xmm1,%xmm8,%xmm8 1388 vaesdec %xmm1,%xmm9,%xmm9 1389 vmovups 208-120(%rsi),%xmm1 1390 1391 vaesdec %xmm0,%xmm2,%xmm2 1392 vaesdec %xmm0,%xmm3,%xmm3 1393 vaesdec %xmm0,%xmm4,%xmm4 1394 vaesdec %xmm0,%xmm5,%xmm5 1395 vaesdec %xmm0,%xmm6,%xmm6 1396 vaesdec %xmm0,%xmm7,%xmm7 1397 vaesdec %xmm0,%xmm8,%xmm8 1398 vaesdec %xmm0,%xmm9,%xmm9 1399 vmovups 224-120(%rsi),%xmm0 1400 1401.Ldec8x_tail: 1402 vaesdec %xmm1,%xmm2,%xmm2 1403 vpxor %xmm15,%xmm15,%xmm15 1404 vaesdec %xmm1,%xmm3,%xmm3 1405 vaesdec %xmm1,%xmm4,%xmm4 1406 vpcmpgtd %xmm15,%xmm14,%xmm15 1407 vaesdec %xmm1,%xmm5,%xmm5 1408 vaesdec %xmm1,%xmm6,%xmm6 1409 vpaddd %xmm14,%xmm15,%xmm15 1410 vmovdqu 48(%rsp),%xmm14 1411 vaesdec %xmm1,%xmm7,%xmm7 1412 movq 64(%rsp),%rbx 1413 vaesdec %xmm1,%xmm8,%xmm8 1414 vaesdec %xmm1,%xmm9,%xmm9 1415 vmovups 16-120(%rsi),%xmm1 1416 1417 vaesdeclast %xmm0,%xmm2,%xmm2 1418 vmovdqa %xmm15,32(%rsp) 1419 vpxor %xmm15,%xmm15,%xmm15 1420 vaesdeclast %xmm0,%xmm3,%xmm3 1421 vpxor 0(%rbp),%xmm2,%xmm2 1422 vaesdeclast %xmm0,%xmm4,%xmm4 1423 vpxor 16(%rbp),%xmm3,%xmm3 1424 vpcmpgtd %xmm15,%xmm14,%xmm15 1425 vaesdeclast %xmm0,%xmm5,%xmm5 1426 vpxor 32(%rbp),%xmm4,%xmm4 1427 vaesdeclast %xmm0,%xmm6,%xmm6 1428 vpxor 48(%rbp),%xmm5,%xmm5 1429 vpaddd %xmm15,%xmm14,%xmm14 1430 vmovdqu -120(%rsi),%xmm15 1431 vaesdeclast %xmm0,%xmm7,%xmm7 1432 vpxor 64(%rbp),%xmm6,%xmm6 1433 vaesdeclast %xmm0,%xmm8,%xmm8 1434 vpxor 80(%rbp),%xmm7,%xmm7 1435 vmovdqa %xmm14,48(%rsp) 1436 vaesdeclast %xmm0,%xmm9,%xmm9 1437 vpxor 96(%rbp),%xmm8,%xmm8 1438 vmovups 32-120(%rsi),%xmm0 1439 1440 vmovups %xmm2,-16(%r8) 1441 subq %rbx,%r8 1442 vmovdqu 128+0(%rsp),%xmm2 1443 vpxor 112(%rbp),%xmm9,%xmm9 1444 vmovups %xmm3,-16(%r9) 1445 subq 72(%rsp),%r9 1446 vmovdqu %xmm2,0(%rbp) 1447 vpxor %xmm15,%xmm2,%xmm2 1448 vmovdqu 128+16(%rsp),%xmm3 1449 vmovups %xmm4,-16(%r10) 1450 subq 80(%rsp),%r10 1451 vmovdqu %xmm3,16(%rbp) 1452 vpxor %xmm15,%xmm3,%xmm3 1453 vmovdqu 128+32(%rsp),%xmm4 1454 vmovups %xmm5,-16(%r11) 1455 subq 88(%rsp),%r11 1456 vmovdqu %xmm4,32(%rbp) 1457 vpxor %xmm15,%xmm4,%xmm4 1458 vmovdqu 128+48(%rsp),%xmm5 1459 vmovups %xmm6,-16(%r12) 1460 subq 96(%rsp),%r12 1461 vmovdqu %xmm5,48(%rbp) 1462 vpxor %xmm15,%xmm5,%xmm5 1463 vmovdqu %xmm10,64(%rbp) 1464 vpxor %xmm10,%xmm15,%xmm6 1465 vmovups %xmm7,-16(%r13) 1466 subq 104(%rsp),%r13 1467 vmovdqu %xmm11,80(%rbp) 1468 vpxor %xmm11,%xmm15,%xmm7 1469 vmovups %xmm8,-16(%r14) 1470 subq 112(%rsp),%r14 1471 vmovdqu %xmm12,96(%rbp) 1472 vpxor %xmm12,%xmm15,%xmm8 1473 vmovups %xmm9,-16(%r15) 1474 subq 120(%rsp),%r15 1475 vmovdqu %xmm13,112(%rbp) 1476 vpxor %xmm13,%xmm15,%xmm9 1477 1478 xorq $128,%rbp 1479 decl %edx 1480 jnz .Loop_dec8x 1481 1482 movq 16(%rsp),%rax 1483.cfi_def_cfa %rax,8 1484 1485 1486 1487 1488 1489.Ldec8x_done: 1490 vzeroupper 1491 movq -48(%rax),%r15 1492.cfi_restore %r15 1493 movq -40(%rax),%r14 1494.cfi_restore %r14 1495 movq -32(%rax),%r13 1496.cfi_restore %r13 1497 movq -24(%rax),%r12 1498.cfi_restore %r12 1499 movq -16(%rax),%rbp 1500.cfi_restore %rbp 1501 movq -8(%rax),%rbx 1502.cfi_restore %rbx 1503 leaq (%rax),%rsp 1504.cfi_def_cfa_register %rsp 1505.Ldec8x_epilogue: 1506 .byte 0xf3,0xc3 1507.cfi_endproc 1508.size aesni_multi_cbc_decrypt_avx,.-aesni_multi_cbc_decrypt_avx 1509