1/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */ 2.text 3 4.globl aesni_encrypt 5.type aesni_encrypt,@function 6.align 16 7aesni_encrypt: 8.cfi_startproc 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15.Loop_enc1_1: 16.byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz .Loop_enc1_1 21.byte 102,15,56,221,209 22 pxor %xmm0,%xmm0 23 pxor %xmm1,%xmm1 24 movups %xmm2,(%rsi) 25 pxor %xmm2,%xmm2 26 .byte 0xf3,0xc3 27.cfi_endproc 28.size aesni_encrypt,.-aesni_encrypt 29 30.globl aesni_decrypt 31.type aesni_decrypt,@function 32.align 16 33aesni_decrypt: 34.cfi_startproc 35 movups (%rdi),%xmm2 36 movl 240(%rdx),%eax 37 movups (%rdx),%xmm0 38 movups 16(%rdx),%xmm1 39 leaq 32(%rdx),%rdx 40 xorps %xmm0,%xmm2 41.Loop_dec1_2: 42.byte 102,15,56,222,209 43 decl %eax 44 movups (%rdx),%xmm1 45 leaq 16(%rdx),%rdx 46 jnz .Loop_dec1_2 47.byte 102,15,56,223,209 48 pxor %xmm0,%xmm0 49 pxor %xmm1,%xmm1 50 movups %xmm2,(%rsi) 51 pxor %xmm2,%xmm2 52 .byte 0xf3,0xc3 53.cfi_endproc 54.size aesni_decrypt, .-aesni_decrypt 55.type _aesni_encrypt2,@function 56.align 16 57_aesni_encrypt2: 58.cfi_startproc 59 movups (%rcx),%xmm0 60 shll $4,%eax 61 movups 16(%rcx),%xmm1 62 xorps %xmm0,%xmm2 63 xorps %xmm0,%xmm3 64 movups 32(%rcx),%xmm0 65 leaq 32(%rcx,%rax,1),%rcx 66 negq %rax 67 addq $16,%rax 68 69.Lenc_loop2: 70.byte 102,15,56,220,209 71.byte 102,15,56,220,217 72 movups (%rcx,%rax,1),%xmm1 73 addq $32,%rax 74.byte 102,15,56,220,208 75.byte 102,15,56,220,216 76 movups -16(%rcx,%rax,1),%xmm0 77 jnz .Lenc_loop2 78 79.byte 102,15,56,220,209 80.byte 102,15,56,220,217 81.byte 102,15,56,221,208 82.byte 102,15,56,221,216 83 .byte 0xf3,0xc3 84.cfi_endproc 85.size _aesni_encrypt2,.-_aesni_encrypt2 86.type _aesni_decrypt2,@function 87.align 16 88_aesni_decrypt2: 89.cfi_startproc 90 movups (%rcx),%xmm0 91 shll $4,%eax 92 movups 16(%rcx),%xmm1 93 xorps %xmm0,%xmm2 94 xorps %xmm0,%xmm3 95 movups 32(%rcx),%xmm0 96 leaq 32(%rcx,%rax,1),%rcx 97 negq %rax 98 addq $16,%rax 99 100.Ldec_loop2: 101.byte 102,15,56,222,209 102.byte 102,15,56,222,217 103 movups (%rcx,%rax,1),%xmm1 104 addq $32,%rax 105.byte 102,15,56,222,208 106.byte 102,15,56,222,216 107 movups -16(%rcx,%rax,1),%xmm0 108 jnz .Ldec_loop2 109 110.byte 102,15,56,222,209 111.byte 102,15,56,222,217 112.byte 102,15,56,223,208 113.byte 102,15,56,223,216 114 .byte 0xf3,0xc3 115.cfi_endproc 116.size _aesni_decrypt2,.-_aesni_decrypt2 117.type _aesni_encrypt3,@function 118.align 16 119_aesni_encrypt3: 120.cfi_startproc 121 movups (%rcx),%xmm0 122 shll $4,%eax 123 movups 16(%rcx),%xmm1 124 xorps %xmm0,%xmm2 125 xorps %xmm0,%xmm3 126 xorps %xmm0,%xmm4 127 movups 32(%rcx),%xmm0 128 leaq 32(%rcx,%rax,1),%rcx 129 negq %rax 130 addq $16,%rax 131 132.Lenc_loop3: 133.byte 102,15,56,220,209 134.byte 102,15,56,220,217 135.byte 102,15,56,220,225 136 movups (%rcx,%rax,1),%xmm1 137 addq $32,%rax 138.byte 102,15,56,220,208 139.byte 102,15,56,220,216 140.byte 102,15,56,220,224 141 movups -16(%rcx,%rax,1),%xmm0 142 jnz .Lenc_loop3 143 144.byte 102,15,56,220,209 145.byte 102,15,56,220,217 146.byte 102,15,56,220,225 147.byte 102,15,56,221,208 148.byte 102,15,56,221,216 149.byte 102,15,56,221,224 150 .byte 0xf3,0xc3 151.cfi_endproc 152.size _aesni_encrypt3,.-_aesni_encrypt3 153.type _aesni_decrypt3,@function 154.align 16 155_aesni_decrypt3: 156.cfi_startproc 157 movups (%rcx),%xmm0 158 shll $4,%eax 159 movups 16(%rcx),%xmm1 160 xorps %xmm0,%xmm2 161 xorps %xmm0,%xmm3 162 xorps %xmm0,%xmm4 163 movups 32(%rcx),%xmm0 164 leaq 32(%rcx,%rax,1),%rcx 165 negq %rax 166 addq $16,%rax 167 168.Ldec_loop3: 169.byte 102,15,56,222,209 170.byte 102,15,56,222,217 171.byte 102,15,56,222,225 172 movups (%rcx,%rax,1),%xmm1 173 addq $32,%rax 174.byte 102,15,56,222,208 175.byte 102,15,56,222,216 176.byte 102,15,56,222,224 177 movups -16(%rcx,%rax,1),%xmm0 178 jnz .Ldec_loop3 179 180.byte 102,15,56,222,209 181.byte 102,15,56,222,217 182.byte 102,15,56,222,225 183.byte 102,15,56,223,208 184.byte 102,15,56,223,216 185.byte 102,15,56,223,224 186 .byte 0xf3,0xc3 187.cfi_endproc 188.size _aesni_decrypt3,.-_aesni_decrypt3 189.type _aesni_encrypt4,@function 190.align 16 191_aesni_encrypt4: 192.cfi_startproc 193 movups (%rcx),%xmm0 194 shll $4,%eax 195 movups 16(%rcx),%xmm1 196 xorps %xmm0,%xmm2 197 xorps %xmm0,%xmm3 198 xorps %xmm0,%xmm4 199 xorps %xmm0,%xmm5 200 movups 32(%rcx),%xmm0 201 leaq 32(%rcx,%rax,1),%rcx 202 negq %rax 203.byte 0x0f,0x1f,0x00 204 addq $16,%rax 205 206.Lenc_loop4: 207.byte 102,15,56,220,209 208.byte 102,15,56,220,217 209.byte 102,15,56,220,225 210.byte 102,15,56,220,233 211 movups (%rcx,%rax,1),%xmm1 212 addq $32,%rax 213.byte 102,15,56,220,208 214.byte 102,15,56,220,216 215.byte 102,15,56,220,224 216.byte 102,15,56,220,232 217 movups -16(%rcx,%rax,1),%xmm0 218 jnz .Lenc_loop4 219 220.byte 102,15,56,220,209 221.byte 102,15,56,220,217 222.byte 102,15,56,220,225 223.byte 102,15,56,220,233 224.byte 102,15,56,221,208 225.byte 102,15,56,221,216 226.byte 102,15,56,221,224 227.byte 102,15,56,221,232 228 .byte 0xf3,0xc3 229.cfi_endproc 230.size _aesni_encrypt4,.-_aesni_encrypt4 231.type _aesni_decrypt4,@function 232.align 16 233_aesni_decrypt4: 234.cfi_startproc 235 movups (%rcx),%xmm0 236 shll $4,%eax 237 movups 16(%rcx),%xmm1 238 xorps %xmm0,%xmm2 239 xorps %xmm0,%xmm3 240 xorps %xmm0,%xmm4 241 xorps %xmm0,%xmm5 242 movups 32(%rcx),%xmm0 243 leaq 32(%rcx,%rax,1),%rcx 244 negq %rax 245.byte 0x0f,0x1f,0x00 246 addq $16,%rax 247 248.Ldec_loop4: 249.byte 102,15,56,222,209 250.byte 102,15,56,222,217 251.byte 102,15,56,222,225 252.byte 102,15,56,222,233 253 movups (%rcx,%rax,1),%xmm1 254 addq $32,%rax 255.byte 102,15,56,222,208 256.byte 102,15,56,222,216 257.byte 102,15,56,222,224 258.byte 102,15,56,222,232 259 movups -16(%rcx,%rax,1),%xmm0 260 jnz .Ldec_loop4 261 262.byte 102,15,56,222,209 263.byte 102,15,56,222,217 264.byte 102,15,56,222,225 265.byte 102,15,56,222,233 266.byte 102,15,56,223,208 267.byte 102,15,56,223,216 268.byte 102,15,56,223,224 269.byte 102,15,56,223,232 270 .byte 0xf3,0xc3 271.cfi_endproc 272.size _aesni_decrypt4,.-_aesni_decrypt4 273.type _aesni_encrypt6,@function 274.align 16 275_aesni_encrypt6: 276.cfi_startproc 277 movups (%rcx),%xmm0 278 shll $4,%eax 279 movups 16(%rcx),%xmm1 280 xorps %xmm0,%xmm2 281 pxor %xmm0,%xmm3 282 pxor %xmm0,%xmm4 283.byte 102,15,56,220,209 284 leaq 32(%rcx,%rax,1),%rcx 285 negq %rax 286.byte 102,15,56,220,217 287 pxor %xmm0,%xmm5 288 pxor %xmm0,%xmm6 289.byte 102,15,56,220,225 290 pxor %xmm0,%xmm7 291 movups (%rcx,%rax,1),%xmm0 292 addq $16,%rax 293 jmp .Lenc_loop6_enter 294.align 16 295.Lenc_loop6: 296.byte 102,15,56,220,209 297.byte 102,15,56,220,217 298.byte 102,15,56,220,225 299.Lenc_loop6_enter: 300.byte 102,15,56,220,233 301.byte 102,15,56,220,241 302.byte 102,15,56,220,249 303 movups (%rcx,%rax,1),%xmm1 304 addq $32,%rax 305.byte 102,15,56,220,208 306.byte 102,15,56,220,216 307.byte 102,15,56,220,224 308.byte 102,15,56,220,232 309.byte 102,15,56,220,240 310.byte 102,15,56,220,248 311 movups -16(%rcx,%rax,1),%xmm0 312 jnz .Lenc_loop6 313 314.byte 102,15,56,220,209 315.byte 102,15,56,220,217 316.byte 102,15,56,220,225 317.byte 102,15,56,220,233 318.byte 102,15,56,220,241 319.byte 102,15,56,220,249 320.byte 102,15,56,221,208 321.byte 102,15,56,221,216 322.byte 102,15,56,221,224 323.byte 102,15,56,221,232 324.byte 102,15,56,221,240 325.byte 102,15,56,221,248 326 .byte 0xf3,0xc3 327.cfi_endproc 328.size _aesni_encrypt6,.-_aesni_encrypt6 329.type _aesni_decrypt6,@function 330.align 16 331_aesni_decrypt6: 332.cfi_startproc 333 movups (%rcx),%xmm0 334 shll $4,%eax 335 movups 16(%rcx),%xmm1 336 xorps %xmm0,%xmm2 337 pxor %xmm0,%xmm3 338 pxor %xmm0,%xmm4 339.byte 102,15,56,222,209 340 leaq 32(%rcx,%rax,1),%rcx 341 negq %rax 342.byte 102,15,56,222,217 343 pxor %xmm0,%xmm5 344 pxor %xmm0,%xmm6 345.byte 102,15,56,222,225 346 pxor %xmm0,%xmm7 347 movups (%rcx,%rax,1),%xmm0 348 addq $16,%rax 349 jmp .Ldec_loop6_enter 350.align 16 351.Ldec_loop6: 352.byte 102,15,56,222,209 353.byte 102,15,56,222,217 354.byte 102,15,56,222,225 355.Ldec_loop6_enter: 356.byte 102,15,56,222,233 357.byte 102,15,56,222,241 358.byte 102,15,56,222,249 359 movups (%rcx,%rax,1),%xmm1 360 addq $32,%rax 361.byte 102,15,56,222,208 362.byte 102,15,56,222,216 363.byte 102,15,56,222,224 364.byte 102,15,56,222,232 365.byte 102,15,56,222,240 366.byte 102,15,56,222,248 367 movups -16(%rcx,%rax,1),%xmm0 368 jnz .Ldec_loop6 369 370.byte 102,15,56,222,209 371.byte 102,15,56,222,217 372.byte 102,15,56,222,225 373.byte 102,15,56,222,233 374.byte 102,15,56,222,241 375.byte 102,15,56,222,249 376.byte 102,15,56,223,208 377.byte 102,15,56,223,216 378.byte 102,15,56,223,224 379.byte 102,15,56,223,232 380.byte 102,15,56,223,240 381.byte 102,15,56,223,248 382 .byte 0xf3,0xc3 383.cfi_endproc 384.size _aesni_decrypt6,.-_aesni_decrypt6 385.type _aesni_encrypt8,@function 386.align 16 387_aesni_encrypt8: 388.cfi_startproc 389 movups (%rcx),%xmm0 390 shll $4,%eax 391 movups 16(%rcx),%xmm1 392 xorps %xmm0,%xmm2 393 xorps %xmm0,%xmm3 394 pxor %xmm0,%xmm4 395 pxor %xmm0,%xmm5 396 pxor %xmm0,%xmm6 397 leaq 32(%rcx,%rax,1),%rcx 398 negq %rax 399.byte 102,15,56,220,209 400 pxor %xmm0,%xmm7 401 pxor %xmm0,%xmm8 402.byte 102,15,56,220,217 403 pxor %xmm0,%xmm9 404 movups (%rcx,%rax,1),%xmm0 405 addq $16,%rax 406 jmp .Lenc_loop8_inner 407.align 16 408.Lenc_loop8: 409.byte 102,15,56,220,209 410.byte 102,15,56,220,217 411.Lenc_loop8_inner: 412.byte 102,15,56,220,225 413.byte 102,15,56,220,233 414.byte 102,15,56,220,241 415.byte 102,15,56,220,249 416.byte 102,68,15,56,220,193 417.byte 102,68,15,56,220,201 418.Lenc_loop8_enter: 419 movups (%rcx,%rax,1),%xmm1 420 addq $32,%rax 421.byte 102,15,56,220,208 422.byte 102,15,56,220,216 423.byte 102,15,56,220,224 424.byte 102,15,56,220,232 425.byte 102,15,56,220,240 426.byte 102,15,56,220,248 427.byte 102,68,15,56,220,192 428.byte 102,68,15,56,220,200 429 movups -16(%rcx,%rax,1),%xmm0 430 jnz .Lenc_loop8 431 432.byte 102,15,56,220,209 433.byte 102,15,56,220,217 434.byte 102,15,56,220,225 435.byte 102,15,56,220,233 436.byte 102,15,56,220,241 437.byte 102,15,56,220,249 438.byte 102,68,15,56,220,193 439.byte 102,68,15,56,220,201 440.byte 102,15,56,221,208 441.byte 102,15,56,221,216 442.byte 102,15,56,221,224 443.byte 102,15,56,221,232 444.byte 102,15,56,221,240 445.byte 102,15,56,221,248 446.byte 102,68,15,56,221,192 447.byte 102,68,15,56,221,200 448 .byte 0xf3,0xc3 449.cfi_endproc 450.size _aesni_encrypt8,.-_aesni_encrypt8 451.type _aesni_decrypt8,@function 452.align 16 453_aesni_decrypt8: 454.cfi_startproc 455 movups (%rcx),%xmm0 456 shll $4,%eax 457 movups 16(%rcx),%xmm1 458 xorps %xmm0,%xmm2 459 xorps %xmm0,%xmm3 460 pxor %xmm0,%xmm4 461 pxor %xmm0,%xmm5 462 pxor %xmm0,%xmm6 463 leaq 32(%rcx,%rax,1),%rcx 464 negq %rax 465.byte 102,15,56,222,209 466 pxor %xmm0,%xmm7 467 pxor %xmm0,%xmm8 468.byte 102,15,56,222,217 469 pxor %xmm0,%xmm9 470 movups (%rcx,%rax,1),%xmm0 471 addq $16,%rax 472 jmp .Ldec_loop8_inner 473.align 16 474.Ldec_loop8: 475.byte 102,15,56,222,209 476.byte 102,15,56,222,217 477.Ldec_loop8_inner: 478.byte 102,15,56,222,225 479.byte 102,15,56,222,233 480.byte 102,15,56,222,241 481.byte 102,15,56,222,249 482.byte 102,68,15,56,222,193 483.byte 102,68,15,56,222,201 484.Ldec_loop8_enter: 485 movups (%rcx,%rax,1),%xmm1 486 addq $32,%rax 487.byte 102,15,56,222,208 488.byte 102,15,56,222,216 489.byte 102,15,56,222,224 490.byte 102,15,56,222,232 491.byte 102,15,56,222,240 492.byte 102,15,56,222,248 493.byte 102,68,15,56,222,192 494.byte 102,68,15,56,222,200 495 movups -16(%rcx,%rax,1),%xmm0 496 jnz .Ldec_loop8 497 498.byte 102,15,56,222,209 499.byte 102,15,56,222,217 500.byte 102,15,56,222,225 501.byte 102,15,56,222,233 502.byte 102,15,56,222,241 503.byte 102,15,56,222,249 504.byte 102,68,15,56,222,193 505.byte 102,68,15,56,222,201 506.byte 102,15,56,223,208 507.byte 102,15,56,223,216 508.byte 102,15,56,223,224 509.byte 102,15,56,223,232 510.byte 102,15,56,223,240 511.byte 102,15,56,223,248 512.byte 102,68,15,56,223,192 513.byte 102,68,15,56,223,200 514 .byte 0xf3,0xc3 515.cfi_endproc 516.size _aesni_decrypt8,.-_aesni_decrypt8 517.globl aesni_ecb_encrypt 518.type aesni_ecb_encrypt,@function 519.align 16 520aesni_ecb_encrypt: 521.cfi_startproc 522 andq $-16,%rdx 523 jz .Lecb_ret 524 525 movl 240(%rcx),%eax 526 movups (%rcx),%xmm0 527 movq %rcx,%r11 528 movl %eax,%r10d 529 testl %r8d,%r8d 530 jz .Lecb_decrypt 531 532 cmpq $0x80,%rdx 533 jb .Lecb_enc_tail 534 535 movdqu (%rdi),%xmm2 536 movdqu 16(%rdi),%xmm3 537 movdqu 32(%rdi),%xmm4 538 movdqu 48(%rdi),%xmm5 539 movdqu 64(%rdi),%xmm6 540 movdqu 80(%rdi),%xmm7 541 movdqu 96(%rdi),%xmm8 542 movdqu 112(%rdi),%xmm9 543 leaq 128(%rdi),%rdi 544 subq $0x80,%rdx 545 jmp .Lecb_enc_loop8_enter 546.align 16 547.Lecb_enc_loop8: 548 movups %xmm2,(%rsi) 549 movq %r11,%rcx 550 movdqu (%rdi),%xmm2 551 movl %r10d,%eax 552 movups %xmm3,16(%rsi) 553 movdqu 16(%rdi),%xmm3 554 movups %xmm4,32(%rsi) 555 movdqu 32(%rdi),%xmm4 556 movups %xmm5,48(%rsi) 557 movdqu 48(%rdi),%xmm5 558 movups %xmm6,64(%rsi) 559 movdqu 64(%rdi),%xmm6 560 movups %xmm7,80(%rsi) 561 movdqu 80(%rdi),%xmm7 562 movups %xmm8,96(%rsi) 563 movdqu 96(%rdi),%xmm8 564 movups %xmm9,112(%rsi) 565 leaq 128(%rsi),%rsi 566 movdqu 112(%rdi),%xmm9 567 leaq 128(%rdi),%rdi 568.Lecb_enc_loop8_enter: 569 570 call _aesni_encrypt8 571 572 subq $0x80,%rdx 573 jnc .Lecb_enc_loop8 574 575 movups %xmm2,(%rsi) 576 movq %r11,%rcx 577 movups %xmm3,16(%rsi) 578 movl %r10d,%eax 579 movups %xmm4,32(%rsi) 580 movups %xmm5,48(%rsi) 581 movups %xmm6,64(%rsi) 582 movups %xmm7,80(%rsi) 583 movups %xmm8,96(%rsi) 584 movups %xmm9,112(%rsi) 585 leaq 128(%rsi),%rsi 586 addq $0x80,%rdx 587 jz .Lecb_ret 588 589.Lecb_enc_tail: 590 movups (%rdi),%xmm2 591 cmpq $0x20,%rdx 592 jb .Lecb_enc_one 593 movups 16(%rdi),%xmm3 594 je .Lecb_enc_two 595 movups 32(%rdi),%xmm4 596 cmpq $0x40,%rdx 597 jb .Lecb_enc_three 598 movups 48(%rdi),%xmm5 599 je .Lecb_enc_four 600 movups 64(%rdi),%xmm6 601 cmpq $0x60,%rdx 602 jb .Lecb_enc_five 603 movups 80(%rdi),%xmm7 604 je .Lecb_enc_six 605 movdqu 96(%rdi),%xmm8 606 xorps %xmm9,%xmm9 607 call _aesni_encrypt8 608 movups %xmm2,(%rsi) 609 movups %xmm3,16(%rsi) 610 movups %xmm4,32(%rsi) 611 movups %xmm5,48(%rsi) 612 movups %xmm6,64(%rsi) 613 movups %xmm7,80(%rsi) 614 movups %xmm8,96(%rsi) 615 jmp .Lecb_ret 616.align 16 617.Lecb_enc_one: 618 movups (%rcx),%xmm0 619 movups 16(%rcx),%xmm1 620 leaq 32(%rcx),%rcx 621 xorps %xmm0,%xmm2 622.Loop_enc1_3: 623.byte 102,15,56,220,209 624 decl %eax 625 movups (%rcx),%xmm1 626 leaq 16(%rcx),%rcx 627 jnz .Loop_enc1_3 628.byte 102,15,56,221,209 629 movups %xmm2,(%rsi) 630 jmp .Lecb_ret 631.align 16 632.Lecb_enc_two: 633 call _aesni_encrypt2 634 movups %xmm2,(%rsi) 635 movups %xmm3,16(%rsi) 636 jmp .Lecb_ret 637.align 16 638.Lecb_enc_three: 639 call _aesni_encrypt3 640 movups %xmm2,(%rsi) 641 movups %xmm3,16(%rsi) 642 movups %xmm4,32(%rsi) 643 jmp .Lecb_ret 644.align 16 645.Lecb_enc_four: 646 call _aesni_encrypt4 647 movups %xmm2,(%rsi) 648 movups %xmm3,16(%rsi) 649 movups %xmm4,32(%rsi) 650 movups %xmm5,48(%rsi) 651 jmp .Lecb_ret 652.align 16 653.Lecb_enc_five: 654 xorps %xmm7,%xmm7 655 call _aesni_encrypt6 656 movups %xmm2,(%rsi) 657 movups %xmm3,16(%rsi) 658 movups %xmm4,32(%rsi) 659 movups %xmm5,48(%rsi) 660 movups %xmm6,64(%rsi) 661 jmp .Lecb_ret 662.align 16 663.Lecb_enc_six: 664 call _aesni_encrypt6 665 movups %xmm2,(%rsi) 666 movups %xmm3,16(%rsi) 667 movups %xmm4,32(%rsi) 668 movups %xmm5,48(%rsi) 669 movups %xmm6,64(%rsi) 670 movups %xmm7,80(%rsi) 671 jmp .Lecb_ret 672 673.align 16 674.Lecb_decrypt: 675 cmpq $0x80,%rdx 676 jb .Lecb_dec_tail 677 678 movdqu (%rdi),%xmm2 679 movdqu 16(%rdi),%xmm3 680 movdqu 32(%rdi),%xmm4 681 movdqu 48(%rdi),%xmm5 682 movdqu 64(%rdi),%xmm6 683 movdqu 80(%rdi),%xmm7 684 movdqu 96(%rdi),%xmm8 685 movdqu 112(%rdi),%xmm9 686 leaq 128(%rdi),%rdi 687 subq $0x80,%rdx 688 jmp .Lecb_dec_loop8_enter 689.align 16 690.Lecb_dec_loop8: 691 movups %xmm2,(%rsi) 692 movq %r11,%rcx 693 movdqu (%rdi),%xmm2 694 movl %r10d,%eax 695 movups %xmm3,16(%rsi) 696 movdqu 16(%rdi),%xmm3 697 movups %xmm4,32(%rsi) 698 movdqu 32(%rdi),%xmm4 699 movups %xmm5,48(%rsi) 700 movdqu 48(%rdi),%xmm5 701 movups %xmm6,64(%rsi) 702 movdqu 64(%rdi),%xmm6 703 movups %xmm7,80(%rsi) 704 movdqu 80(%rdi),%xmm7 705 movups %xmm8,96(%rsi) 706 movdqu 96(%rdi),%xmm8 707 movups %xmm9,112(%rsi) 708 leaq 128(%rsi),%rsi 709 movdqu 112(%rdi),%xmm9 710 leaq 128(%rdi),%rdi 711.Lecb_dec_loop8_enter: 712 713 call _aesni_decrypt8 714 715 movups (%r11),%xmm0 716 subq $0x80,%rdx 717 jnc .Lecb_dec_loop8 718 719 movups %xmm2,(%rsi) 720 pxor %xmm2,%xmm2 721 movq %r11,%rcx 722 movups %xmm3,16(%rsi) 723 pxor %xmm3,%xmm3 724 movl %r10d,%eax 725 movups %xmm4,32(%rsi) 726 pxor %xmm4,%xmm4 727 movups %xmm5,48(%rsi) 728 pxor %xmm5,%xmm5 729 movups %xmm6,64(%rsi) 730 pxor %xmm6,%xmm6 731 movups %xmm7,80(%rsi) 732 pxor %xmm7,%xmm7 733 movups %xmm8,96(%rsi) 734 pxor %xmm8,%xmm8 735 movups %xmm9,112(%rsi) 736 pxor %xmm9,%xmm9 737 leaq 128(%rsi),%rsi 738 addq $0x80,%rdx 739 jz .Lecb_ret 740 741.Lecb_dec_tail: 742 movups (%rdi),%xmm2 743 cmpq $0x20,%rdx 744 jb .Lecb_dec_one 745 movups 16(%rdi),%xmm3 746 je .Lecb_dec_two 747 movups 32(%rdi),%xmm4 748 cmpq $0x40,%rdx 749 jb .Lecb_dec_three 750 movups 48(%rdi),%xmm5 751 je .Lecb_dec_four 752 movups 64(%rdi),%xmm6 753 cmpq $0x60,%rdx 754 jb .Lecb_dec_five 755 movups 80(%rdi),%xmm7 756 je .Lecb_dec_six 757 movups 96(%rdi),%xmm8 758 movups (%rcx),%xmm0 759 xorps %xmm9,%xmm9 760 call _aesni_decrypt8 761 movups %xmm2,(%rsi) 762 pxor %xmm2,%xmm2 763 movups %xmm3,16(%rsi) 764 pxor %xmm3,%xmm3 765 movups %xmm4,32(%rsi) 766 pxor %xmm4,%xmm4 767 movups %xmm5,48(%rsi) 768 pxor %xmm5,%xmm5 769 movups %xmm6,64(%rsi) 770 pxor %xmm6,%xmm6 771 movups %xmm7,80(%rsi) 772 pxor %xmm7,%xmm7 773 movups %xmm8,96(%rsi) 774 pxor %xmm8,%xmm8 775 pxor %xmm9,%xmm9 776 jmp .Lecb_ret 777.align 16 778.Lecb_dec_one: 779 movups (%rcx),%xmm0 780 movups 16(%rcx),%xmm1 781 leaq 32(%rcx),%rcx 782 xorps %xmm0,%xmm2 783.Loop_dec1_4: 784.byte 102,15,56,222,209 785 decl %eax 786 movups (%rcx),%xmm1 787 leaq 16(%rcx),%rcx 788 jnz .Loop_dec1_4 789.byte 102,15,56,223,209 790 movups %xmm2,(%rsi) 791 pxor %xmm2,%xmm2 792 jmp .Lecb_ret 793.align 16 794.Lecb_dec_two: 795 call _aesni_decrypt2 796 movups %xmm2,(%rsi) 797 pxor %xmm2,%xmm2 798 movups %xmm3,16(%rsi) 799 pxor %xmm3,%xmm3 800 jmp .Lecb_ret 801.align 16 802.Lecb_dec_three: 803 call _aesni_decrypt3 804 movups %xmm2,(%rsi) 805 pxor %xmm2,%xmm2 806 movups %xmm3,16(%rsi) 807 pxor %xmm3,%xmm3 808 movups %xmm4,32(%rsi) 809 pxor %xmm4,%xmm4 810 jmp .Lecb_ret 811.align 16 812.Lecb_dec_four: 813 call _aesni_decrypt4 814 movups %xmm2,(%rsi) 815 pxor %xmm2,%xmm2 816 movups %xmm3,16(%rsi) 817 pxor %xmm3,%xmm3 818 movups %xmm4,32(%rsi) 819 pxor %xmm4,%xmm4 820 movups %xmm5,48(%rsi) 821 pxor %xmm5,%xmm5 822 jmp .Lecb_ret 823.align 16 824.Lecb_dec_five: 825 xorps %xmm7,%xmm7 826 call _aesni_decrypt6 827 movups %xmm2,(%rsi) 828 pxor %xmm2,%xmm2 829 movups %xmm3,16(%rsi) 830 pxor %xmm3,%xmm3 831 movups %xmm4,32(%rsi) 832 pxor %xmm4,%xmm4 833 movups %xmm5,48(%rsi) 834 pxor %xmm5,%xmm5 835 movups %xmm6,64(%rsi) 836 pxor %xmm6,%xmm6 837 pxor %xmm7,%xmm7 838 jmp .Lecb_ret 839.align 16 840.Lecb_dec_six: 841 call _aesni_decrypt6 842 movups %xmm2,(%rsi) 843 pxor %xmm2,%xmm2 844 movups %xmm3,16(%rsi) 845 pxor %xmm3,%xmm3 846 movups %xmm4,32(%rsi) 847 pxor %xmm4,%xmm4 848 movups %xmm5,48(%rsi) 849 pxor %xmm5,%xmm5 850 movups %xmm6,64(%rsi) 851 pxor %xmm6,%xmm6 852 movups %xmm7,80(%rsi) 853 pxor %xmm7,%xmm7 854 855.Lecb_ret: 856 xorps %xmm0,%xmm0 857 pxor %xmm1,%xmm1 858 .byte 0xf3,0xc3 859.cfi_endproc 860.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 861.globl aesni_ccm64_encrypt_blocks 862.type aesni_ccm64_encrypt_blocks,@function 863.align 16 864aesni_ccm64_encrypt_blocks: 865.cfi_startproc 866 movl 240(%rcx),%eax 867 movdqu (%r8),%xmm6 868 movdqa .Lincrement64(%rip),%xmm9 869 movdqa .Lbswap_mask(%rip),%xmm7 870 871 shll $4,%eax 872 movl $16,%r10d 873 leaq 0(%rcx),%r11 874 movdqu (%r9),%xmm3 875 movdqa %xmm6,%xmm2 876 leaq 32(%rcx,%rax,1),%rcx 877.byte 102,15,56,0,247 878 subq %rax,%r10 879 jmp .Lccm64_enc_outer 880.align 16 881.Lccm64_enc_outer: 882 movups (%r11),%xmm0 883 movq %r10,%rax 884 movups (%rdi),%xmm8 885 886 xorps %xmm0,%xmm2 887 movups 16(%r11),%xmm1 888 xorps %xmm8,%xmm0 889 xorps %xmm0,%xmm3 890 movups 32(%r11),%xmm0 891 892.Lccm64_enc2_loop: 893.byte 102,15,56,220,209 894.byte 102,15,56,220,217 895 movups (%rcx,%rax,1),%xmm1 896 addq $32,%rax 897.byte 102,15,56,220,208 898.byte 102,15,56,220,216 899 movups -16(%rcx,%rax,1),%xmm0 900 jnz .Lccm64_enc2_loop 901.byte 102,15,56,220,209 902.byte 102,15,56,220,217 903 paddq %xmm9,%xmm6 904 decq %rdx 905.byte 102,15,56,221,208 906.byte 102,15,56,221,216 907 908 leaq 16(%rdi),%rdi 909 xorps %xmm2,%xmm8 910 movdqa %xmm6,%xmm2 911 movups %xmm8,(%rsi) 912.byte 102,15,56,0,215 913 leaq 16(%rsi),%rsi 914 jnz .Lccm64_enc_outer 915 916 pxor %xmm0,%xmm0 917 pxor %xmm1,%xmm1 918 pxor %xmm2,%xmm2 919 movups %xmm3,(%r9) 920 pxor %xmm3,%xmm3 921 pxor %xmm8,%xmm8 922 pxor %xmm6,%xmm6 923 .byte 0xf3,0xc3 924.cfi_endproc 925.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 926.globl aesni_ccm64_decrypt_blocks 927.type aesni_ccm64_decrypt_blocks,@function 928.align 16 929aesni_ccm64_decrypt_blocks: 930.cfi_startproc 931 movl 240(%rcx),%eax 932 movups (%r8),%xmm6 933 movdqu (%r9),%xmm3 934 movdqa .Lincrement64(%rip),%xmm9 935 movdqa .Lbswap_mask(%rip),%xmm7 936 937 movaps %xmm6,%xmm2 938 movl %eax,%r10d 939 movq %rcx,%r11 940.byte 102,15,56,0,247 941 movups (%rcx),%xmm0 942 movups 16(%rcx),%xmm1 943 leaq 32(%rcx),%rcx 944 xorps %xmm0,%xmm2 945.Loop_enc1_5: 946.byte 102,15,56,220,209 947 decl %eax 948 movups (%rcx),%xmm1 949 leaq 16(%rcx),%rcx 950 jnz .Loop_enc1_5 951.byte 102,15,56,221,209 952 shll $4,%r10d 953 movl $16,%eax 954 movups (%rdi),%xmm8 955 paddq %xmm9,%xmm6 956 leaq 16(%rdi),%rdi 957 subq %r10,%rax 958 leaq 32(%r11,%r10,1),%rcx 959 movq %rax,%r10 960 jmp .Lccm64_dec_outer 961.align 16 962.Lccm64_dec_outer: 963 xorps %xmm2,%xmm8 964 movdqa %xmm6,%xmm2 965 movups %xmm8,(%rsi) 966 leaq 16(%rsi),%rsi 967.byte 102,15,56,0,215 968 969 subq $1,%rdx 970 jz .Lccm64_dec_break 971 972 movups (%r11),%xmm0 973 movq %r10,%rax 974 movups 16(%r11),%xmm1 975 xorps %xmm0,%xmm8 976 xorps %xmm0,%xmm2 977 xorps %xmm8,%xmm3 978 movups 32(%r11),%xmm0 979 jmp .Lccm64_dec2_loop 980.align 16 981.Lccm64_dec2_loop: 982.byte 102,15,56,220,209 983.byte 102,15,56,220,217 984 movups (%rcx,%rax,1),%xmm1 985 addq $32,%rax 986.byte 102,15,56,220,208 987.byte 102,15,56,220,216 988 movups -16(%rcx,%rax,1),%xmm0 989 jnz .Lccm64_dec2_loop 990 movups (%rdi),%xmm8 991 paddq %xmm9,%xmm6 992.byte 102,15,56,220,209 993.byte 102,15,56,220,217 994.byte 102,15,56,221,208 995.byte 102,15,56,221,216 996 leaq 16(%rdi),%rdi 997 jmp .Lccm64_dec_outer 998 999.align 16 1000.Lccm64_dec_break: 1001 1002 movl 240(%r11),%eax 1003 movups (%r11),%xmm0 1004 movups 16(%r11),%xmm1 1005 xorps %xmm0,%xmm8 1006 leaq 32(%r11),%r11 1007 xorps %xmm8,%xmm3 1008.Loop_enc1_6: 1009.byte 102,15,56,220,217 1010 decl %eax 1011 movups (%r11),%xmm1 1012 leaq 16(%r11),%r11 1013 jnz .Loop_enc1_6 1014.byte 102,15,56,221,217 1015 pxor %xmm0,%xmm0 1016 pxor %xmm1,%xmm1 1017 pxor %xmm2,%xmm2 1018 movups %xmm3,(%r9) 1019 pxor %xmm3,%xmm3 1020 pxor %xmm8,%xmm8 1021 pxor %xmm6,%xmm6 1022 .byte 0xf3,0xc3 1023.cfi_endproc 1024.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1025.globl aesni_ctr32_encrypt_blocks 1026.type aesni_ctr32_encrypt_blocks,@function 1027.align 16 1028aesni_ctr32_encrypt_blocks: 1029.cfi_startproc 1030 cmpq $1,%rdx 1031 jne .Lctr32_bulk 1032 1033 1034 1035 movups (%r8),%xmm2 1036 movups (%rdi),%xmm3 1037 movl 240(%rcx),%edx 1038 movups (%rcx),%xmm0 1039 movups 16(%rcx),%xmm1 1040 leaq 32(%rcx),%rcx 1041 xorps %xmm0,%xmm2 1042.Loop_enc1_7: 1043.byte 102,15,56,220,209 1044 decl %edx 1045 movups (%rcx),%xmm1 1046 leaq 16(%rcx),%rcx 1047 jnz .Loop_enc1_7 1048.byte 102,15,56,221,209 1049 pxor %xmm0,%xmm0 1050 pxor %xmm1,%xmm1 1051 xorps %xmm3,%xmm2 1052 pxor %xmm3,%xmm3 1053 movups %xmm2,(%rsi) 1054 xorps %xmm2,%xmm2 1055 jmp .Lctr32_epilogue 1056 1057.align 16 1058.Lctr32_bulk: 1059 leaq (%rsp),%r11 1060.cfi_def_cfa_register %r11 1061 pushq %rbp 1062.cfi_offset %rbp,-16 1063 subq $128,%rsp 1064 andq $-16,%rsp 1065 1066 1067 1068 1069 movdqu (%r8),%xmm2 1070 movdqu (%rcx),%xmm0 1071 movl 12(%r8),%r8d 1072 pxor %xmm0,%xmm2 1073 movl 12(%rcx),%ebp 1074 movdqa %xmm2,0(%rsp) 1075 bswapl %r8d 1076 movdqa %xmm2,%xmm3 1077 movdqa %xmm2,%xmm4 1078 movdqa %xmm2,%xmm5 1079 movdqa %xmm2,64(%rsp) 1080 movdqa %xmm2,80(%rsp) 1081 movdqa %xmm2,96(%rsp) 1082 movq %rdx,%r10 1083 movdqa %xmm2,112(%rsp) 1084 1085 leaq 1(%r8),%rax 1086 leaq 2(%r8),%rdx 1087 bswapl %eax 1088 bswapl %edx 1089 xorl %ebp,%eax 1090 xorl %ebp,%edx 1091.byte 102,15,58,34,216,3 1092 leaq 3(%r8),%rax 1093 movdqa %xmm3,16(%rsp) 1094.byte 102,15,58,34,226,3 1095 bswapl %eax 1096 movq %r10,%rdx 1097 leaq 4(%r8),%r10 1098 movdqa %xmm4,32(%rsp) 1099 xorl %ebp,%eax 1100 bswapl %r10d 1101.byte 102,15,58,34,232,3 1102 xorl %ebp,%r10d 1103 movdqa %xmm5,48(%rsp) 1104 leaq 5(%r8),%r9 1105 movl %r10d,64+12(%rsp) 1106 bswapl %r9d 1107 leaq 6(%r8),%r10 1108 movl 240(%rcx),%eax 1109 xorl %ebp,%r9d 1110 bswapl %r10d 1111 movl %r9d,80+12(%rsp) 1112 xorl %ebp,%r10d 1113 leaq 7(%r8),%r9 1114 movl %r10d,96+12(%rsp) 1115 bswapl %r9d 1116 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1117 xorl %ebp,%r9d 1118 andl $71303168,%r10d 1119 movl %r9d,112+12(%rsp) 1120 1121 movups 16(%rcx),%xmm1 1122 1123 movdqa 64(%rsp),%xmm6 1124 movdqa 80(%rsp),%xmm7 1125 1126 cmpq $8,%rdx 1127 jb .Lctr32_tail 1128 1129 subq $6,%rdx 1130 cmpl $4194304,%r10d 1131 je .Lctr32_6x 1132 1133 leaq 128(%rcx),%rcx 1134 subq $2,%rdx 1135 jmp .Lctr32_loop8 1136 1137.align 16 1138.Lctr32_6x: 1139 shll $4,%eax 1140 movl $48,%r10d 1141 bswapl %ebp 1142 leaq 32(%rcx,%rax,1),%rcx 1143 subq %rax,%r10 1144 jmp .Lctr32_loop6 1145 1146.align 16 1147.Lctr32_loop6: 1148 addl $6,%r8d 1149 movups -48(%rcx,%r10,1),%xmm0 1150.byte 102,15,56,220,209 1151 movl %r8d,%eax 1152 xorl %ebp,%eax 1153.byte 102,15,56,220,217 1154.byte 0x0f,0x38,0xf1,0x44,0x24,12 1155 leal 1(%r8),%eax 1156.byte 102,15,56,220,225 1157 xorl %ebp,%eax 1158.byte 0x0f,0x38,0xf1,0x44,0x24,28 1159.byte 102,15,56,220,233 1160 leal 2(%r8),%eax 1161 xorl %ebp,%eax 1162.byte 102,15,56,220,241 1163.byte 0x0f,0x38,0xf1,0x44,0x24,44 1164 leal 3(%r8),%eax 1165.byte 102,15,56,220,249 1166 movups -32(%rcx,%r10,1),%xmm1 1167 xorl %ebp,%eax 1168 1169.byte 102,15,56,220,208 1170.byte 0x0f,0x38,0xf1,0x44,0x24,60 1171 leal 4(%r8),%eax 1172.byte 102,15,56,220,216 1173 xorl %ebp,%eax 1174.byte 0x0f,0x38,0xf1,0x44,0x24,76 1175.byte 102,15,56,220,224 1176 leal 5(%r8),%eax 1177 xorl %ebp,%eax 1178.byte 102,15,56,220,232 1179.byte 0x0f,0x38,0xf1,0x44,0x24,92 1180 movq %r10,%rax 1181.byte 102,15,56,220,240 1182.byte 102,15,56,220,248 1183 movups -16(%rcx,%r10,1),%xmm0 1184 1185 call .Lenc_loop6 1186 1187 movdqu (%rdi),%xmm8 1188 movdqu 16(%rdi),%xmm9 1189 movdqu 32(%rdi),%xmm10 1190 movdqu 48(%rdi),%xmm11 1191 movdqu 64(%rdi),%xmm12 1192 movdqu 80(%rdi),%xmm13 1193 leaq 96(%rdi),%rdi 1194 movups -64(%rcx,%r10,1),%xmm1 1195 pxor %xmm2,%xmm8 1196 movaps 0(%rsp),%xmm2 1197 pxor %xmm3,%xmm9 1198 movaps 16(%rsp),%xmm3 1199 pxor %xmm4,%xmm10 1200 movaps 32(%rsp),%xmm4 1201 pxor %xmm5,%xmm11 1202 movaps 48(%rsp),%xmm5 1203 pxor %xmm6,%xmm12 1204 movaps 64(%rsp),%xmm6 1205 pxor %xmm7,%xmm13 1206 movaps 80(%rsp),%xmm7 1207 movdqu %xmm8,(%rsi) 1208 movdqu %xmm9,16(%rsi) 1209 movdqu %xmm10,32(%rsi) 1210 movdqu %xmm11,48(%rsi) 1211 movdqu %xmm12,64(%rsi) 1212 movdqu %xmm13,80(%rsi) 1213 leaq 96(%rsi),%rsi 1214 1215 subq $6,%rdx 1216 jnc .Lctr32_loop6 1217 1218 addq $6,%rdx 1219 jz .Lctr32_done 1220 1221 leal -48(%r10),%eax 1222 leaq -80(%rcx,%r10,1),%rcx 1223 negl %eax 1224 shrl $4,%eax 1225 jmp .Lctr32_tail 1226 1227.align 32 1228.Lctr32_loop8: 1229 addl $8,%r8d 1230 movdqa 96(%rsp),%xmm8 1231.byte 102,15,56,220,209 1232 movl %r8d,%r9d 1233 movdqa 112(%rsp),%xmm9 1234.byte 102,15,56,220,217 1235 bswapl %r9d 1236 movups 32-128(%rcx),%xmm0 1237.byte 102,15,56,220,225 1238 xorl %ebp,%r9d 1239 nop 1240.byte 102,15,56,220,233 1241 movl %r9d,0+12(%rsp) 1242 leaq 1(%r8),%r9 1243.byte 102,15,56,220,241 1244.byte 102,15,56,220,249 1245.byte 102,68,15,56,220,193 1246.byte 102,68,15,56,220,201 1247 movups 48-128(%rcx),%xmm1 1248 bswapl %r9d 1249.byte 102,15,56,220,208 1250.byte 102,15,56,220,216 1251 xorl %ebp,%r9d 1252.byte 0x66,0x90 1253.byte 102,15,56,220,224 1254.byte 102,15,56,220,232 1255 movl %r9d,16+12(%rsp) 1256 leaq 2(%r8),%r9 1257.byte 102,15,56,220,240 1258.byte 102,15,56,220,248 1259.byte 102,68,15,56,220,192 1260.byte 102,68,15,56,220,200 1261 movups 64-128(%rcx),%xmm0 1262 bswapl %r9d 1263.byte 102,15,56,220,209 1264.byte 102,15,56,220,217 1265 xorl %ebp,%r9d 1266.byte 0x66,0x90 1267.byte 102,15,56,220,225 1268.byte 102,15,56,220,233 1269 movl %r9d,32+12(%rsp) 1270 leaq 3(%r8),%r9 1271.byte 102,15,56,220,241 1272.byte 102,15,56,220,249 1273.byte 102,68,15,56,220,193 1274.byte 102,68,15,56,220,201 1275 movups 80-128(%rcx),%xmm1 1276 bswapl %r9d 1277.byte 102,15,56,220,208 1278.byte 102,15,56,220,216 1279 xorl %ebp,%r9d 1280.byte 0x66,0x90 1281.byte 102,15,56,220,224 1282.byte 102,15,56,220,232 1283 movl %r9d,48+12(%rsp) 1284 leaq 4(%r8),%r9 1285.byte 102,15,56,220,240 1286.byte 102,15,56,220,248 1287.byte 102,68,15,56,220,192 1288.byte 102,68,15,56,220,200 1289 movups 96-128(%rcx),%xmm0 1290 bswapl %r9d 1291.byte 102,15,56,220,209 1292.byte 102,15,56,220,217 1293 xorl %ebp,%r9d 1294.byte 0x66,0x90 1295.byte 102,15,56,220,225 1296.byte 102,15,56,220,233 1297 movl %r9d,64+12(%rsp) 1298 leaq 5(%r8),%r9 1299.byte 102,15,56,220,241 1300.byte 102,15,56,220,249 1301.byte 102,68,15,56,220,193 1302.byte 102,68,15,56,220,201 1303 movups 112-128(%rcx),%xmm1 1304 bswapl %r9d 1305.byte 102,15,56,220,208 1306.byte 102,15,56,220,216 1307 xorl %ebp,%r9d 1308.byte 0x66,0x90 1309.byte 102,15,56,220,224 1310.byte 102,15,56,220,232 1311 movl %r9d,80+12(%rsp) 1312 leaq 6(%r8),%r9 1313.byte 102,15,56,220,240 1314.byte 102,15,56,220,248 1315.byte 102,68,15,56,220,192 1316.byte 102,68,15,56,220,200 1317 movups 128-128(%rcx),%xmm0 1318 bswapl %r9d 1319.byte 102,15,56,220,209 1320.byte 102,15,56,220,217 1321 xorl %ebp,%r9d 1322.byte 0x66,0x90 1323.byte 102,15,56,220,225 1324.byte 102,15,56,220,233 1325 movl %r9d,96+12(%rsp) 1326 leaq 7(%r8),%r9 1327.byte 102,15,56,220,241 1328.byte 102,15,56,220,249 1329.byte 102,68,15,56,220,193 1330.byte 102,68,15,56,220,201 1331 movups 144-128(%rcx),%xmm1 1332 bswapl %r9d 1333.byte 102,15,56,220,208 1334.byte 102,15,56,220,216 1335.byte 102,15,56,220,224 1336 xorl %ebp,%r9d 1337 movdqu 0(%rdi),%xmm10 1338.byte 102,15,56,220,232 1339 movl %r9d,112+12(%rsp) 1340 cmpl $11,%eax 1341.byte 102,15,56,220,240 1342.byte 102,15,56,220,248 1343.byte 102,68,15,56,220,192 1344.byte 102,68,15,56,220,200 1345 movups 160-128(%rcx),%xmm0 1346 1347 jb .Lctr32_enc_done 1348 1349.byte 102,15,56,220,209 1350.byte 102,15,56,220,217 1351.byte 102,15,56,220,225 1352.byte 102,15,56,220,233 1353.byte 102,15,56,220,241 1354.byte 102,15,56,220,249 1355.byte 102,68,15,56,220,193 1356.byte 102,68,15,56,220,201 1357 movups 176-128(%rcx),%xmm1 1358 1359.byte 102,15,56,220,208 1360.byte 102,15,56,220,216 1361.byte 102,15,56,220,224 1362.byte 102,15,56,220,232 1363.byte 102,15,56,220,240 1364.byte 102,15,56,220,248 1365.byte 102,68,15,56,220,192 1366.byte 102,68,15,56,220,200 1367 movups 192-128(%rcx),%xmm0 1368 je .Lctr32_enc_done 1369 1370.byte 102,15,56,220,209 1371.byte 102,15,56,220,217 1372.byte 102,15,56,220,225 1373.byte 102,15,56,220,233 1374.byte 102,15,56,220,241 1375.byte 102,15,56,220,249 1376.byte 102,68,15,56,220,193 1377.byte 102,68,15,56,220,201 1378 movups 208-128(%rcx),%xmm1 1379 1380.byte 102,15,56,220,208 1381.byte 102,15,56,220,216 1382.byte 102,15,56,220,224 1383.byte 102,15,56,220,232 1384.byte 102,15,56,220,240 1385.byte 102,15,56,220,248 1386.byte 102,68,15,56,220,192 1387.byte 102,68,15,56,220,200 1388 movups 224-128(%rcx),%xmm0 1389 jmp .Lctr32_enc_done 1390 1391.align 16 1392.Lctr32_enc_done: 1393 movdqu 16(%rdi),%xmm11 1394 pxor %xmm0,%xmm10 1395 movdqu 32(%rdi),%xmm12 1396 pxor %xmm0,%xmm11 1397 movdqu 48(%rdi),%xmm13 1398 pxor %xmm0,%xmm12 1399 movdqu 64(%rdi),%xmm14 1400 pxor %xmm0,%xmm13 1401 movdqu 80(%rdi),%xmm15 1402 pxor %xmm0,%xmm14 1403 pxor %xmm0,%xmm15 1404.byte 102,15,56,220,209 1405.byte 102,15,56,220,217 1406.byte 102,15,56,220,225 1407.byte 102,15,56,220,233 1408.byte 102,15,56,220,241 1409.byte 102,15,56,220,249 1410.byte 102,68,15,56,220,193 1411.byte 102,68,15,56,220,201 1412 movdqu 96(%rdi),%xmm1 1413 leaq 128(%rdi),%rdi 1414 1415.byte 102,65,15,56,221,210 1416 pxor %xmm0,%xmm1 1417 movdqu 112-128(%rdi),%xmm10 1418.byte 102,65,15,56,221,219 1419 pxor %xmm0,%xmm10 1420 movdqa 0(%rsp),%xmm11 1421.byte 102,65,15,56,221,228 1422.byte 102,65,15,56,221,237 1423 movdqa 16(%rsp),%xmm12 1424 movdqa 32(%rsp),%xmm13 1425.byte 102,65,15,56,221,246 1426.byte 102,65,15,56,221,255 1427 movdqa 48(%rsp),%xmm14 1428 movdqa 64(%rsp),%xmm15 1429.byte 102,68,15,56,221,193 1430 movdqa 80(%rsp),%xmm0 1431 movups 16-128(%rcx),%xmm1 1432.byte 102,69,15,56,221,202 1433 1434 movups %xmm2,(%rsi) 1435 movdqa %xmm11,%xmm2 1436 movups %xmm3,16(%rsi) 1437 movdqa %xmm12,%xmm3 1438 movups %xmm4,32(%rsi) 1439 movdqa %xmm13,%xmm4 1440 movups %xmm5,48(%rsi) 1441 movdqa %xmm14,%xmm5 1442 movups %xmm6,64(%rsi) 1443 movdqa %xmm15,%xmm6 1444 movups %xmm7,80(%rsi) 1445 movdqa %xmm0,%xmm7 1446 movups %xmm8,96(%rsi) 1447 movups %xmm9,112(%rsi) 1448 leaq 128(%rsi),%rsi 1449 1450 subq $8,%rdx 1451 jnc .Lctr32_loop8 1452 1453 addq $8,%rdx 1454 jz .Lctr32_done 1455 leaq -128(%rcx),%rcx 1456 1457.Lctr32_tail: 1458 1459 1460 leaq 16(%rcx),%rcx 1461 cmpq $4,%rdx 1462 jb .Lctr32_loop3 1463 je .Lctr32_loop4 1464 1465 1466 shll $4,%eax 1467 movdqa 96(%rsp),%xmm8 1468 pxor %xmm9,%xmm9 1469 1470 movups 16(%rcx),%xmm0 1471.byte 102,15,56,220,209 1472.byte 102,15,56,220,217 1473 leaq 32-16(%rcx,%rax,1),%rcx 1474 negq %rax 1475.byte 102,15,56,220,225 1476 addq $16,%rax 1477 movups (%rdi),%xmm10 1478.byte 102,15,56,220,233 1479.byte 102,15,56,220,241 1480 movups 16(%rdi),%xmm11 1481 movups 32(%rdi),%xmm12 1482.byte 102,15,56,220,249 1483.byte 102,68,15,56,220,193 1484 1485 call .Lenc_loop8_enter 1486 1487 movdqu 48(%rdi),%xmm13 1488 pxor %xmm10,%xmm2 1489 movdqu 64(%rdi),%xmm10 1490 pxor %xmm11,%xmm3 1491 movdqu %xmm2,(%rsi) 1492 pxor %xmm12,%xmm4 1493 movdqu %xmm3,16(%rsi) 1494 pxor %xmm13,%xmm5 1495 movdqu %xmm4,32(%rsi) 1496 pxor %xmm10,%xmm6 1497 movdqu %xmm5,48(%rsi) 1498 movdqu %xmm6,64(%rsi) 1499 cmpq $6,%rdx 1500 jb .Lctr32_done 1501 1502 movups 80(%rdi),%xmm11 1503 xorps %xmm11,%xmm7 1504 movups %xmm7,80(%rsi) 1505 je .Lctr32_done 1506 1507 movups 96(%rdi),%xmm12 1508 xorps %xmm12,%xmm8 1509 movups %xmm8,96(%rsi) 1510 jmp .Lctr32_done 1511 1512.align 32 1513.Lctr32_loop4: 1514.byte 102,15,56,220,209 1515 leaq 16(%rcx),%rcx 1516 decl %eax 1517.byte 102,15,56,220,217 1518.byte 102,15,56,220,225 1519.byte 102,15,56,220,233 1520 movups (%rcx),%xmm1 1521 jnz .Lctr32_loop4 1522.byte 102,15,56,221,209 1523.byte 102,15,56,221,217 1524 movups (%rdi),%xmm10 1525 movups 16(%rdi),%xmm11 1526.byte 102,15,56,221,225 1527.byte 102,15,56,221,233 1528 movups 32(%rdi),%xmm12 1529 movups 48(%rdi),%xmm13 1530 1531 xorps %xmm10,%xmm2 1532 movups %xmm2,(%rsi) 1533 xorps %xmm11,%xmm3 1534 movups %xmm3,16(%rsi) 1535 pxor %xmm12,%xmm4 1536 movdqu %xmm4,32(%rsi) 1537 pxor %xmm13,%xmm5 1538 movdqu %xmm5,48(%rsi) 1539 jmp .Lctr32_done 1540 1541.align 32 1542.Lctr32_loop3: 1543.byte 102,15,56,220,209 1544 leaq 16(%rcx),%rcx 1545 decl %eax 1546.byte 102,15,56,220,217 1547.byte 102,15,56,220,225 1548 movups (%rcx),%xmm1 1549 jnz .Lctr32_loop3 1550.byte 102,15,56,221,209 1551.byte 102,15,56,221,217 1552.byte 102,15,56,221,225 1553 1554 movups (%rdi),%xmm10 1555 xorps %xmm10,%xmm2 1556 movups %xmm2,(%rsi) 1557 cmpq $2,%rdx 1558 jb .Lctr32_done 1559 1560 movups 16(%rdi),%xmm11 1561 xorps %xmm11,%xmm3 1562 movups %xmm3,16(%rsi) 1563 je .Lctr32_done 1564 1565 movups 32(%rdi),%xmm12 1566 xorps %xmm12,%xmm4 1567 movups %xmm4,32(%rsi) 1568 1569.Lctr32_done: 1570 xorps %xmm0,%xmm0 1571 xorl %ebp,%ebp 1572 pxor %xmm1,%xmm1 1573 pxor %xmm2,%xmm2 1574 pxor %xmm3,%xmm3 1575 pxor %xmm4,%xmm4 1576 pxor %xmm5,%xmm5 1577 pxor %xmm6,%xmm6 1578 pxor %xmm7,%xmm7 1579 movaps %xmm0,0(%rsp) 1580 pxor %xmm8,%xmm8 1581 movaps %xmm0,16(%rsp) 1582 pxor %xmm9,%xmm9 1583 movaps %xmm0,32(%rsp) 1584 pxor %xmm10,%xmm10 1585 movaps %xmm0,48(%rsp) 1586 pxor %xmm11,%xmm11 1587 movaps %xmm0,64(%rsp) 1588 pxor %xmm12,%xmm12 1589 movaps %xmm0,80(%rsp) 1590 pxor %xmm13,%xmm13 1591 movaps %xmm0,96(%rsp) 1592 pxor %xmm14,%xmm14 1593 movaps %xmm0,112(%rsp) 1594 pxor %xmm15,%xmm15 1595 movq -8(%r11),%rbp 1596.cfi_restore %rbp 1597 leaq (%r11),%rsp 1598.cfi_def_cfa_register %rsp 1599.Lctr32_epilogue: 1600 .byte 0xf3,0xc3 1601.cfi_endproc 1602.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1603.globl aesni_xts_encrypt 1604.type aesni_xts_encrypt,@function 1605.align 16 1606aesni_xts_encrypt: 1607.cfi_startproc 1608 leaq (%rsp),%r11 1609.cfi_def_cfa_register %r11 1610 pushq %rbp 1611.cfi_offset %rbp,-16 1612 subq $112,%rsp 1613 andq $-16,%rsp 1614 movups (%r9),%xmm2 1615 movl 240(%r8),%eax 1616 movl 240(%rcx),%r10d 1617 movups (%r8),%xmm0 1618 movups 16(%r8),%xmm1 1619 leaq 32(%r8),%r8 1620 xorps %xmm0,%xmm2 1621.Loop_enc1_8: 1622.byte 102,15,56,220,209 1623 decl %eax 1624 movups (%r8),%xmm1 1625 leaq 16(%r8),%r8 1626 jnz .Loop_enc1_8 1627.byte 102,15,56,221,209 1628 movups (%rcx),%xmm0 1629 movq %rcx,%rbp 1630 movl %r10d,%eax 1631 shll $4,%r10d 1632 movq %rdx,%r9 1633 andq $-16,%rdx 1634 1635 movups 16(%rcx,%r10,1),%xmm1 1636 1637 movdqa .Lxts_magic(%rip),%xmm8 1638 movdqa %xmm2,%xmm15 1639 pshufd $0x5f,%xmm2,%xmm9 1640 pxor %xmm0,%xmm1 1641 movdqa %xmm9,%xmm14 1642 paddd %xmm9,%xmm9 1643 movdqa %xmm15,%xmm10 1644 psrad $31,%xmm14 1645 paddq %xmm15,%xmm15 1646 pand %xmm8,%xmm14 1647 pxor %xmm0,%xmm10 1648 pxor %xmm14,%xmm15 1649 movdqa %xmm9,%xmm14 1650 paddd %xmm9,%xmm9 1651 movdqa %xmm15,%xmm11 1652 psrad $31,%xmm14 1653 paddq %xmm15,%xmm15 1654 pand %xmm8,%xmm14 1655 pxor %xmm0,%xmm11 1656 pxor %xmm14,%xmm15 1657 movdqa %xmm9,%xmm14 1658 paddd %xmm9,%xmm9 1659 movdqa %xmm15,%xmm12 1660 psrad $31,%xmm14 1661 paddq %xmm15,%xmm15 1662 pand %xmm8,%xmm14 1663 pxor %xmm0,%xmm12 1664 pxor %xmm14,%xmm15 1665 movdqa %xmm9,%xmm14 1666 paddd %xmm9,%xmm9 1667 movdqa %xmm15,%xmm13 1668 psrad $31,%xmm14 1669 paddq %xmm15,%xmm15 1670 pand %xmm8,%xmm14 1671 pxor %xmm0,%xmm13 1672 pxor %xmm14,%xmm15 1673 movdqa %xmm15,%xmm14 1674 psrad $31,%xmm9 1675 paddq %xmm15,%xmm15 1676 pand %xmm8,%xmm9 1677 pxor %xmm0,%xmm14 1678 pxor %xmm9,%xmm15 1679 movaps %xmm1,96(%rsp) 1680 1681 subq $96,%rdx 1682 jc .Lxts_enc_short 1683 1684 movl $16+96,%eax 1685 leaq 32(%rbp,%r10,1),%rcx 1686 subq %r10,%rax 1687 movups 16(%rbp),%xmm1 1688 movq %rax,%r10 1689 leaq .Lxts_magic(%rip),%r8 1690 jmp .Lxts_enc_grandloop 1691 1692.align 32 1693.Lxts_enc_grandloop: 1694 movdqu 0(%rdi),%xmm2 1695 movdqa %xmm0,%xmm8 1696 movdqu 16(%rdi),%xmm3 1697 pxor %xmm10,%xmm2 1698 movdqu 32(%rdi),%xmm4 1699 pxor %xmm11,%xmm3 1700.byte 102,15,56,220,209 1701 movdqu 48(%rdi),%xmm5 1702 pxor %xmm12,%xmm4 1703.byte 102,15,56,220,217 1704 movdqu 64(%rdi),%xmm6 1705 pxor %xmm13,%xmm5 1706.byte 102,15,56,220,225 1707 movdqu 80(%rdi),%xmm7 1708 pxor %xmm15,%xmm8 1709 movdqa 96(%rsp),%xmm9 1710 pxor %xmm14,%xmm6 1711.byte 102,15,56,220,233 1712 movups 32(%rbp),%xmm0 1713 leaq 96(%rdi),%rdi 1714 pxor %xmm8,%xmm7 1715 1716 pxor %xmm9,%xmm10 1717.byte 102,15,56,220,241 1718 pxor %xmm9,%xmm11 1719 movdqa %xmm10,0(%rsp) 1720.byte 102,15,56,220,249 1721 movups 48(%rbp),%xmm1 1722 pxor %xmm9,%xmm12 1723 1724.byte 102,15,56,220,208 1725 pxor %xmm9,%xmm13 1726 movdqa %xmm11,16(%rsp) 1727.byte 102,15,56,220,216 1728 pxor %xmm9,%xmm14 1729 movdqa %xmm12,32(%rsp) 1730.byte 102,15,56,220,224 1731.byte 102,15,56,220,232 1732 pxor %xmm9,%xmm8 1733 movdqa %xmm14,64(%rsp) 1734.byte 102,15,56,220,240 1735.byte 102,15,56,220,248 1736 movups 64(%rbp),%xmm0 1737 movdqa %xmm8,80(%rsp) 1738 pshufd $0x5f,%xmm15,%xmm9 1739 jmp .Lxts_enc_loop6 1740.align 32 1741.Lxts_enc_loop6: 1742.byte 102,15,56,220,209 1743.byte 102,15,56,220,217 1744.byte 102,15,56,220,225 1745.byte 102,15,56,220,233 1746.byte 102,15,56,220,241 1747.byte 102,15,56,220,249 1748 movups -64(%rcx,%rax,1),%xmm1 1749 addq $32,%rax 1750 1751.byte 102,15,56,220,208 1752.byte 102,15,56,220,216 1753.byte 102,15,56,220,224 1754.byte 102,15,56,220,232 1755.byte 102,15,56,220,240 1756.byte 102,15,56,220,248 1757 movups -80(%rcx,%rax,1),%xmm0 1758 jnz .Lxts_enc_loop6 1759 1760 movdqa (%r8),%xmm8 1761 movdqa %xmm9,%xmm14 1762 paddd %xmm9,%xmm9 1763.byte 102,15,56,220,209 1764 paddq %xmm15,%xmm15 1765 psrad $31,%xmm14 1766.byte 102,15,56,220,217 1767 pand %xmm8,%xmm14 1768 movups (%rbp),%xmm10 1769.byte 102,15,56,220,225 1770.byte 102,15,56,220,233 1771.byte 102,15,56,220,241 1772 pxor %xmm14,%xmm15 1773 movaps %xmm10,%xmm11 1774.byte 102,15,56,220,249 1775 movups -64(%rcx),%xmm1 1776 1777 movdqa %xmm9,%xmm14 1778.byte 102,15,56,220,208 1779 paddd %xmm9,%xmm9 1780 pxor %xmm15,%xmm10 1781.byte 102,15,56,220,216 1782 psrad $31,%xmm14 1783 paddq %xmm15,%xmm15 1784.byte 102,15,56,220,224 1785.byte 102,15,56,220,232 1786 pand %xmm8,%xmm14 1787 movaps %xmm11,%xmm12 1788.byte 102,15,56,220,240 1789 pxor %xmm14,%xmm15 1790 movdqa %xmm9,%xmm14 1791.byte 102,15,56,220,248 1792 movups -48(%rcx),%xmm0 1793 1794 paddd %xmm9,%xmm9 1795.byte 102,15,56,220,209 1796 pxor %xmm15,%xmm11 1797 psrad $31,%xmm14 1798.byte 102,15,56,220,217 1799 paddq %xmm15,%xmm15 1800 pand %xmm8,%xmm14 1801.byte 102,15,56,220,225 1802.byte 102,15,56,220,233 1803 movdqa %xmm13,48(%rsp) 1804 pxor %xmm14,%xmm15 1805.byte 102,15,56,220,241 1806 movaps %xmm12,%xmm13 1807 movdqa %xmm9,%xmm14 1808.byte 102,15,56,220,249 1809 movups -32(%rcx),%xmm1 1810 1811 paddd %xmm9,%xmm9 1812.byte 102,15,56,220,208 1813 pxor %xmm15,%xmm12 1814 psrad $31,%xmm14 1815.byte 102,15,56,220,216 1816 paddq %xmm15,%xmm15 1817 pand %xmm8,%xmm14 1818.byte 102,15,56,220,224 1819.byte 102,15,56,220,232 1820.byte 102,15,56,220,240 1821 pxor %xmm14,%xmm15 1822 movaps %xmm13,%xmm14 1823.byte 102,15,56,220,248 1824 1825 movdqa %xmm9,%xmm0 1826 paddd %xmm9,%xmm9 1827.byte 102,15,56,220,209 1828 pxor %xmm15,%xmm13 1829 psrad $31,%xmm0 1830.byte 102,15,56,220,217 1831 paddq %xmm15,%xmm15 1832 pand %xmm8,%xmm0 1833.byte 102,15,56,220,225 1834.byte 102,15,56,220,233 1835 pxor %xmm0,%xmm15 1836 movups (%rbp),%xmm0 1837.byte 102,15,56,220,241 1838.byte 102,15,56,220,249 1839 movups 16(%rbp),%xmm1 1840 1841 pxor %xmm15,%xmm14 1842.byte 102,15,56,221,84,36,0 1843 psrad $31,%xmm9 1844 paddq %xmm15,%xmm15 1845.byte 102,15,56,221,92,36,16 1846.byte 102,15,56,221,100,36,32 1847 pand %xmm8,%xmm9 1848 movq %r10,%rax 1849.byte 102,15,56,221,108,36,48 1850.byte 102,15,56,221,116,36,64 1851.byte 102,15,56,221,124,36,80 1852 pxor %xmm9,%xmm15 1853 1854 leaq 96(%rsi),%rsi 1855 movups %xmm2,-96(%rsi) 1856 movups %xmm3,-80(%rsi) 1857 movups %xmm4,-64(%rsi) 1858 movups %xmm5,-48(%rsi) 1859 movups %xmm6,-32(%rsi) 1860 movups %xmm7,-16(%rsi) 1861 subq $96,%rdx 1862 jnc .Lxts_enc_grandloop 1863 1864 movl $16+96,%eax 1865 subl %r10d,%eax 1866 movq %rbp,%rcx 1867 shrl $4,%eax 1868 1869.Lxts_enc_short: 1870 1871 movl %eax,%r10d 1872 pxor %xmm0,%xmm10 1873 addq $96,%rdx 1874 jz .Lxts_enc_done 1875 1876 pxor %xmm0,%xmm11 1877 cmpq $0x20,%rdx 1878 jb .Lxts_enc_one 1879 pxor %xmm0,%xmm12 1880 je .Lxts_enc_two 1881 1882 pxor %xmm0,%xmm13 1883 cmpq $0x40,%rdx 1884 jb .Lxts_enc_three 1885 pxor %xmm0,%xmm14 1886 je .Lxts_enc_four 1887 1888 movdqu (%rdi),%xmm2 1889 movdqu 16(%rdi),%xmm3 1890 movdqu 32(%rdi),%xmm4 1891 pxor %xmm10,%xmm2 1892 movdqu 48(%rdi),%xmm5 1893 pxor %xmm11,%xmm3 1894 movdqu 64(%rdi),%xmm6 1895 leaq 80(%rdi),%rdi 1896 pxor %xmm12,%xmm4 1897 pxor %xmm13,%xmm5 1898 pxor %xmm14,%xmm6 1899 pxor %xmm7,%xmm7 1900 1901 call _aesni_encrypt6 1902 1903 xorps %xmm10,%xmm2 1904 movdqa %xmm15,%xmm10 1905 xorps %xmm11,%xmm3 1906 xorps %xmm12,%xmm4 1907 movdqu %xmm2,(%rsi) 1908 xorps %xmm13,%xmm5 1909 movdqu %xmm3,16(%rsi) 1910 xorps %xmm14,%xmm6 1911 movdqu %xmm4,32(%rsi) 1912 movdqu %xmm5,48(%rsi) 1913 movdqu %xmm6,64(%rsi) 1914 leaq 80(%rsi),%rsi 1915 jmp .Lxts_enc_done 1916 1917.align 16 1918.Lxts_enc_one: 1919 movups (%rdi),%xmm2 1920 leaq 16(%rdi),%rdi 1921 xorps %xmm10,%xmm2 1922 movups (%rcx),%xmm0 1923 movups 16(%rcx),%xmm1 1924 leaq 32(%rcx),%rcx 1925 xorps %xmm0,%xmm2 1926.Loop_enc1_9: 1927.byte 102,15,56,220,209 1928 decl %eax 1929 movups (%rcx),%xmm1 1930 leaq 16(%rcx),%rcx 1931 jnz .Loop_enc1_9 1932.byte 102,15,56,221,209 1933 xorps %xmm10,%xmm2 1934 movdqa %xmm11,%xmm10 1935 movups %xmm2,(%rsi) 1936 leaq 16(%rsi),%rsi 1937 jmp .Lxts_enc_done 1938 1939.align 16 1940.Lxts_enc_two: 1941 movups (%rdi),%xmm2 1942 movups 16(%rdi),%xmm3 1943 leaq 32(%rdi),%rdi 1944 xorps %xmm10,%xmm2 1945 xorps %xmm11,%xmm3 1946 1947 call _aesni_encrypt2 1948 1949 xorps %xmm10,%xmm2 1950 movdqa %xmm12,%xmm10 1951 xorps %xmm11,%xmm3 1952 movups %xmm2,(%rsi) 1953 movups %xmm3,16(%rsi) 1954 leaq 32(%rsi),%rsi 1955 jmp .Lxts_enc_done 1956 1957.align 16 1958.Lxts_enc_three: 1959 movups (%rdi),%xmm2 1960 movups 16(%rdi),%xmm3 1961 movups 32(%rdi),%xmm4 1962 leaq 48(%rdi),%rdi 1963 xorps %xmm10,%xmm2 1964 xorps %xmm11,%xmm3 1965 xorps %xmm12,%xmm4 1966 1967 call _aesni_encrypt3 1968 1969 xorps %xmm10,%xmm2 1970 movdqa %xmm13,%xmm10 1971 xorps %xmm11,%xmm3 1972 xorps %xmm12,%xmm4 1973 movups %xmm2,(%rsi) 1974 movups %xmm3,16(%rsi) 1975 movups %xmm4,32(%rsi) 1976 leaq 48(%rsi),%rsi 1977 jmp .Lxts_enc_done 1978 1979.align 16 1980.Lxts_enc_four: 1981 movups (%rdi),%xmm2 1982 movups 16(%rdi),%xmm3 1983 movups 32(%rdi),%xmm4 1984 xorps %xmm10,%xmm2 1985 movups 48(%rdi),%xmm5 1986 leaq 64(%rdi),%rdi 1987 xorps %xmm11,%xmm3 1988 xorps %xmm12,%xmm4 1989 xorps %xmm13,%xmm5 1990 1991 call _aesni_encrypt4 1992 1993 pxor %xmm10,%xmm2 1994 movdqa %xmm14,%xmm10 1995 pxor %xmm11,%xmm3 1996 pxor %xmm12,%xmm4 1997 movdqu %xmm2,(%rsi) 1998 pxor %xmm13,%xmm5 1999 movdqu %xmm3,16(%rsi) 2000 movdqu %xmm4,32(%rsi) 2001 movdqu %xmm5,48(%rsi) 2002 leaq 64(%rsi),%rsi 2003 jmp .Lxts_enc_done 2004 2005.align 16 2006.Lxts_enc_done: 2007 andq $15,%r9 2008 jz .Lxts_enc_ret 2009 movq %r9,%rdx 2010 2011.Lxts_enc_steal: 2012 movzbl (%rdi),%eax 2013 movzbl -16(%rsi),%ecx 2014 leaq 1(%rdi),%rdi 2015 movb %al,-16(%rsi) 2016 movb %cl,0(%rsi) 2017 leaq 1(%rsi),%rsi 2018 subq $1,%rdx 2019 jnz .Lxts_enc_steal 2020 2021 subq %r9,%rsi 2022 movq %rbp,%rcx 2023 movl %r10d,%eax 2024 2025 movups -16(%rsi),%xmm2 2026 xorps %xmm10,%xmm2 2027 movups (%rcx),%xmm0 2028 movups 16(%rcx),%xmm1 2029 leaq 32(%rcx),%rcx 2030 xorps %xmm0,%xmm2 2031.Loop_enc1_10: 2032.byte 102,15,56,220,209 2033 decl %eax 2034 movups (%rcx),%xmm1 2035 leaq 16(%rcx),%rcx 2036 jnz .Loop_enc1_10 2037.byte 102,15,56,221,209 2038 xorps %xmm10,%xmm2 2039 movups %xmm2,-16(%rsi) 2040 2041.Lxts_enc_ret: 2042 xorps %xmm0,%xmm0 2043 pxor %xmm1,%xmm1 2044 pxor %xmm2,%xmm2 2045 pxor %xmm3,%xmm3 2046 pxor %xmm4,%xmm4 2047 pxor %xmm5,%xmm5 2048 pxor %xmm6,%xmm6 2049 pxor %xmm7,%xmm7 2050 movaps %xmm0,0(%rsp) 2051 pxor %xmm8,%xmm8 2052 movaps %xmm0,16(%rsp) 2053 pxor %xmm9,%xmm9 2054 movaps %xmm0,32(%rsp) 2055 pxor %xmm10,%xmm10 2056 movaps %xmm0,48(%rsp) 2057 pxor %xmm11,%xmm11 2058 movaps %xmm0,64(%rsp) 2059 pxor %xmm12,%xmm12 2060 movaps %xmm0,80(%rsp) 2061 pxor %xmm13,%xmm13 2062 movaps %xmm0,96(%rsp) 2063 pxor %xmm14,%xmm14 2064 pxor %xmm15,%xmm15 2065 movq -8(%r11),%rbp 2066.cfi_restore %rbp 2067 leaq (%r11),%rsp 2068.cfi_def_cfa_register %rsp 2069.Lxts_enc_epilogue: 2070 .byte 0xf3,0xc3 2071.cfi_endproc 2072.size aesni_xts_encrypt,.-aesni_xts_encrypt 2073.globl aesni_xts_decrypt 2074.type aesni_xts_decrypt,@function 2075.align 16 2076aesni_xts_decrypt: 2077.cfi_startproc 2078 leaq (%rsp),%r11 2079.cfi_def_cfa_register %r11 2080 pushq %rbp 2081.cfi_offset %rbp,-16 2082 subq $112,%rsp 2083 andq $-16,%rsp 2084 movups (%r9),%xmm2 2085 movl 240(%r8),%eax 2086 movl 240(%rcx),%r10d 2087 movups (%r8),%xmm0 2088 movups 16(%r8),%xmm1 2089 leaq 32(%r8),%r8 2090 xorps %xmm0,%xmm2 2091.Loop_enc1_11: 2092.byte 102,15,56,220,209 2093 decl %eax 2094 movups (%r8),%xmm1 2095 leaq 16(%r8),%r8 2096 jnz .Loop_enc1_11 2097.byte 102,15,56,221,209 2098 xorl %eax,%eax 2099 testq $15,%rdx 2100 setnz %al 2101 shlq $4,%rax 2102 subq %rax,%rdx 2103 2104 movups (%rcx),%xmm0 2105 movq %rcx,%rbp 2106 movl %r10d,%eax 2107 shll $4,%r10d 2108 movq %rdx,%r9 2109 andq $-16,%rdx 2110 2111 movups 16(%rcx,%r10,1),%xmm1 2112 2113 movdqa .Lxts_magic(%rip),%xmm8 2114 movdqa %xmm2,%xmm15 2115 pshufd $0x5f,%xmm2,%xmm9 2116 pxor %xmm0,%xmm1 2117 movdqa %xmm9,%xmm14 2118 paddd %xmm9,%xmm9 2119 movdqa %xmm15,%xmm10 2120 psrad $31,%xmm14 2121 paddq %xmm15,%xmm15 2122 pand %xmm8,%xmm14 2123 pxor %xmm0,%xmm10 2124 pxor %xmm14,%xmm15 2125 movdqa %xmm9,%xmm14 2126 paddd %xmm9,%xmm9 2127 movdqa %xmm15,%xmm11 2128 psrad $31,%xmm14 2129 paddq %xmm15,%xmm15 2130 pand %xmm8,%xmm14 2131 pxor %xmm0,%xmm11 2132 pxor %xmm14,%xmm15 2133 movdqa %xmm9,%xmm14 2134 paddd %xmm9,%xmm9 2135 movdqa %xmm15,%xmm12 2136 psrad $31,%xmm14 2137 paddq %xmm15,%xmm15 2138 pand %xmm8,%xmm14 2139 pxor %xmm0,%xmm12 2140 pxor %xmm14,%xmm15 2141 movdqa %xmm9,%xmm14 2142 paddd %xmm9,%xmm9 2143 movdqa %xmm15,%xmm13 2144 psrad $31,%xmm14 2145 paddq %xmm15,%xmm15 2146 pand %xmm8,%xmm14 2147 pxor %xmm0,%xmm13 2148 pxor %xmm14,%xmm15 2149 movdqa %xmm15,%xmm14 2150 psrad $31,%xmm9 2151 paddq %xmm15,%xmm15 2152 pand %xmm8,%xmm9 2153 pxor %xmm0,%xmm14 2154 pxor %xmm9,%xmm15 2155 movaps %xmm1,96(%rsp) 2156 2157 subq $96,%rdx 2158 jc .Lxts_dec_short 2159 2160 movl $16+96,%eax 2161 leaq 32(%rbp,%r10,1),%rcx 2162 subq %r10,%rax 2163 movups 16(%rbp),%xmm1 2164 movq %rax,%r10 2165 leaq .Lxts_magic(%rip),%r8 2166 jmp .Lxts_dec_grandloop 2167 2168.align 32 2169.Lxts_dec_grandloop: 2170 movdqu 0(%rdi),%xmm2 2171 movdqa %xmm0,%xmm8 2172 movdqu 16(%rdi),%xmm3 2173 pxor %xmm10,%xmm2 2174 movdqu 32(%rdi),%xmm4 2175 pxor %xmm11,%xmm3 2176.byte 102,15,56,222,209 2177 movdqu 48(%rdi),%xmm5 2178 pxor %xmm12,%xmm4 2179.byte 102,15,56,222,217 2180 movdqu 64(%rdi),%xmm6 2181 pxor %xmm13,%xmm5 2182.byte 102,15,56,222,225 2183 movdqu 80(%rdi),%xmm7 2184 pxor %xmm15,%xmm8 2185 movdqa 96(%rsp),%xmm9 2186 pxor %xmm14,%xmm6 2187.byte 102,15,56,222,233 2188 movups 32(%rbp),%xmm0 2189 leaq 96(%rdi),%rdi 2190 pxor %xmm8,%xmm7 2191 2192 pxor %xmm9,%xmm10 2193.byte 102,15,56,222,241 2194 pxor %xmm9,%xmm11 2195 movdqa %xmm10,0(%rsp) 2196.byte 102,15,56,222,249 2197 movups 48(%rbp),%xmm1 2198 pxor %xmm9,%xmm12 2199 2200.byte 102,15,56,222,208 2201 pxor %xmm9,%xmm13 2202 movdqa %xmm11,16(%rsp) 2203.byte 102,15,56,222,216 2204 pxor %xmm9,%xmm14 2205 movdqa %xmm12,32(%rsp) 2206.byte 102,15,56,222,224 2207.byte 102,15,56,222,232 2208 pxor %xmm9,%xmm8 2209 movdqa %xmm14,64(%rsp) 2210.byte 102,15,56,222,240 2211.byte 102,15,56,222,248 2212 movups 64(%rbp),%xmm0 2213 movdqa %xmm8,80(%rsp) 2214 pshufd $0x5f,%xmm15,%xmm9 2215 jmp .Lxts_dec_loop6 2216.align 32 2217.Lxts_dec_loop6: 2218.byte 102,15,56,222,209 2219.byte 102,15,56,222,217 2220.byte 102,15,56,222,225 2221.byte 102,15,56,222,233 2222.byte 102,15,56,222,241 2223.byte 102,15,56,222,249 2224 movups -64(%rcx,%rax,1),%xmm1 2225 addq $32,%rax 2226 2227.byte 102,15,56,222,208 2228.byte 102,15,56,222,216 2229.byte 102,15,56,222,224 2230.byte 102,15,56,222,232 2231.byte 102,15,56,222,240 2232.byte 102,15,56,222,248 2233 movups -80(%rcx,%rax,1),%xmm0 2234 jnz .Lxts_dec_loop6 2235 2236 movdqa (%r8),%xmm8 2237 movdqa %xmm9,%xmm14 2238 paddd %xmm9,%xmm9 2239.byte 102,15,56,222,209 2240 paddq %xmm15,%xmm15 2241 psrad $31,%xmm14 2242.byte 102,15,56,222,217 2243 pand %xmm8,%xmm14 2244 movups (%rbp),%xmm10 2245.byte 102,15,56,222,225 2246.byte 102,15,56,222,233 2247.byte 102,15,56,222,241 2248 pxor %xmm14,%xmm15 2249 movaps %xmm10,%xmm11 2250.byte 102,15,56,222,249 2251 movups -64(%rcx),%xmm1 2252 2253 movdqa %xmm9,%xmm14 2254.byte 102,15,56,222,208 2255 paddd %xmm9,%xmm9 2256 pxor %xmm15,%xmm10 2257.byte 102,15,56,222,216 2258 psrad $31,%xmm14 2259 paddq %xmm15,%xmm15 2260.byte 102,15,56,222,224 2261.byte 102,15,56,222,232 2262 pand %xmm8,%xmm14 2263 movaps %xmm11,%xmm12 2264.byte 102,15,56,222,240 2265 pxor %xmm14,%xmm15 2266 movdqa %xmm9,%xmm14 2267.byte 102,15,56,222,248 2268 movups -48(%rcx),%xmm0 2269 2270 paddd %xmm9,%xmm9 2271.byte 102,15,56,222,209 2272 pxor %xmm15,%xmm11 2273 psrad $31,%xmm14 2274.byte 102,15,56,222,217 2275 paddq %xmm15,%xmm15 2276 pand %xmm8,%xmm14 2277.byte 102,15,56,222,225 2278.byte 102,15,56,222,233 2279 movdqa %xmm13,48(%rsp) 2280 pxor %xmm14,%xmm15 2281.byte 102,15,56,222,241 2282 movaps %xmm12,%xmm13 2283 movdqa %xmm9,%xmm14 2284.byte 102,15,56,222,249 2285 movups -32(%rcx),%xmm1 2286 2287 paddd %xmm9,%xmm9 2288.byte 102,15,56,222,208 2289 pxor %xmm15,%xmm12 2290 psrad $31,%xmm14 2291.byte 102,15,56,222,216 2292 paddq %xmm15,%xmm15 2293 pand %xmm8,%xmm14 2294.byte 102,15,56,222,224 2295.byte 102,15,56,222,232 2296.byte 102,15,56,222,240 2297 pxor %xmm14,%xmm15 2298 movaps %xmm13,%xmm14 2299.byte 102,15,56,222,248 2300 2301 movdqa %xmm9,%xmm0 2302 paddd %xmm9,%xmm9 2303.byte 102,15,56,222,209 2304 pxor %xmm15,%xmm13 2305 psrad $31,%xmm0 2306.byte 102,15,56,222,217 2307 paddq %xmm15,%xmm15 2308 pand %xmm8,%xmm0 2309.byte 102,15,56,222,225 2310.byte 102,15,56,222,233 2311 pxor %xmm0,%xmm15 2312 movups (%rbp),%xmm0 2313.byte 102,15,56,222,241 2314.byte 102,15,56,222,249 2315 movups 16(%rbp),%xmm1 2316 2317 pxor %xmm15,%xmm14 2318.byte 102,15,56,223,84,36,0 2319 psrad $31,%xmm9 2320 paddq %xmm15,%xmm15 2321.byte 102,15,56,223,92,36,16 2322.byte 102,15,56,223,100,36,32 2323 pand %xmm8,%xmm9 2324 movq %r10,%rax 2325.byte 102,15,56,223,108,36,48 2326.byte 102,15,56,223,116,36,64 2327.byte 102,15,56,223,124,36,80 2328 pxor %xmm9,%xmm15 2329 2330 leaq 96(%rsi),%rsi 2331 movups %xmm2,-96(%rsi) 2332 movups %xmm3,-80(%rsi) 2333 movups %xmm4,-64(%rsi) 2334 movups %xmm5,-48(%rsi) 2335 movups %xmm6,-32(%rsi) 2336 movups %xmm7,-16(%rsi) 2337 subq $96,%rdx 2338 jnc .Lxts_dec_grandloop 2339 2340 movl $16+96,%eax 2341 subl %r10d,%eax 2342 movq %rbp,%rcx 2343 shrl $4,%eax 2344 2345.Lxts_dec_short: 2346 2347 movl %eax,%r10d 2348 pxor %xmm0,%xmm10 2349 pxor %xmm0,%xmm11 2350 addq $96,%rdx 2351 jz .Lxts_dec_done 2352 2353 pxor %xmm0,%xmm12 2354 cmpq $0x20,%rdx 2355 jb .Lxts_dec_one 2356 pxor %xmm0,%xmm13 2357 je .Lxts_dec_two 2358 2359 pxor %xmm0,%xmm14 2360 cmpq $0x40,%rdx 2361 jb .Lxts_dec_three 2362 je .Lxts_dec_four 2363 2364 movdqu (%rdi),%xmm2 2365 movdqu 16(%rdi),%xmm3 2366 movdqu 32(%rdi),%xmm4 2367 pxor %xmm10,%xmm2 2368 movdqu 48(%rdi),%xmm5 2369 pxor %xmm11,%xmm3 2370 movdqu 64(%rdi),%xmm6 2371 leaq 80(%rdi),%rdi 2372 pxor %xmm12,%xmm4 2373 pxor %xmm13,%xmm5 2374 pxor %xmm14,%xmm6 2375 2376 call _aesni_decrypt6 2377 2378 xorps %xmm10,%xmm2 2379 xorps %xmm11,%xmm3 2380 xorps %xmm12,%xmm4 2381 movdqu %xmm2,(%rsi) 2382 xorps %xmm13,%xmm5 2383 movdqu %xmm3,16(%rsi) 2384 xorps %xmm14,%xmm6 2385 movdqu %xmm4,32(%rsi) 2386 pxor %xmm14,%xmm14 2387 movdqu %xmm5,48(%rsi) 2388 pcmpgtd %xmm15,%xmm14 2389 movdqu %xmm6,64(%rsi) 2390 leaq 80(%rsi),%rsi 2391 pshufd $0x13,%xmm14,%xmm11 2392 andq $15,%r9 2393 jz .Lxts_dec_ret 2394 2395 movdqa %xmm15,%xmm10 2396 paddq %xmm15,%xmm15 2397 pand %xmm8,%xmm11 2398 pxor %xmm15,%xmm11 2399 jmp .Lxts_dec_done2 2400 2401.align 16 2402.Lxts_dec_one: 2403 movups (%rdi),%xmm2 2404 leaq 16(%rdi),%rdi 2405 xorps %xmm10,%xmm2 2406 movups (%rcx),%xmm0 2407 movups 16(%rcx),%xmm1 2408 leaq 32(%rcx),%rcx 2409 xorps %xmm0,%xmm2 2410.Loop_dec1_12: 2411.byte 102,15,56,222,209 2412 decl %eax 2413 movups (%rcx),%xmm1 2414 leaq 16(%rcx),%rcx 2415 jnz .Loop_dec1_12 2416.byte 102,15,56,223,209 2417 xorps %xmm10,%xmm2 2418 movdqa %xmm11,%xmm10 2419 movups %xmm2,(%rsi) 2420 movdqa %xmm12,%xmm11 2421 leaq 16(%rsi),%rsi 2422 jmp .Lxts_dec_done 2423 2424.align 16 2425.Lxts_dec_two: 2426 movups (%rdi),%xmm2 2427 movups 16(%rdi),%xmm3 2428 leaq 32(%rdi),%rdi 2429 xorps %xmm10,%xmm2 2430 xorps %xmm11,%xmm3 2431 2432 call _aesni_decrypt2 2433 2434 xorps %xmm10,%xmm2 2435 movdqa %xmm12,%xmm10 2436 xorps %xmm11,%xmm3 2437 movdqa %xmm13,%xmm11 2438 movups %xmm2,(%rsi) 2439 movups %xmm3,16(%rsi) 2440 leaq 32(%rsi),%rsi 2441 jmp .Lxts_dec_done 2442 2443.align 16 2444.Lxts_dec_three: 2445 movups (%rdi),%xmm2 2446 movups 16(%rdi),%xmm3 2447 movups 32(%rdi),%xmm4 2448 leaq 48(%rdi),%rdi 2449 xorps %xmm10,%xmm2 2450 xorps %xmm11,%xmm3 2451 xorps %xmm12,%xmm4 2452 2453 call _aesni_decrypt3 2454 2455 xorps %xmm10,%xmm2 2456 movdqa %xmm13,%xmm10 2457 xorps %xmm11,%xmm3 2458 movdqa %xmm14,%xmm11 2459 xorps %xmm12,%xmm4 2460 movups %xmm2,(%rsi) 2461 movups %xmm3,16(%rsi) 2462 movups %xmm4,32(%rsi) 2463 leaq 48(%rsi),%rsi 2464 jmp .Lxts_dec_done 2465 2466.align 16 2467.Lxts_dec_four: 2468 movups (%rdi),%xmm2 2469 movups 16(%rdi),%xmm3 2470 movups 32(%rdi),%xmm4 2471 xorps %xmm10,%xmm2 2472 movups 48(%rdi),%xmm5 2473 leaq 64(%rdi),%rdi 2474 xorps %xmm11,%xmm3 2475 xorps %xmm12,%xmm4 2476 xorps %xmm13,%xmm5 2477 2478 call _aesni_decrypt4 2479 2480 pxor %xmm10,%xmm2 2481 movdqa %xmm14,%xmm10 2482 pxor %xmm11,%xmm3 2483 movdqa %xmm15,%xmm11 2484 pxor %xmm12,%xmm4 2485 movdqu %xmm2,(%rsi) 2486 pxor %xmm13,%xmm5 2487 movdqu %xmm3,16(%rsi) 2488 movdqu %xmm4,32(%rsi) 2489 movdqu %xmm5,48(%rsi) 2490 leaq 64(%rsi),%rsi 2491 jmp .Lxts_dec_done 2492 2493.align 16 2494.Lxts_dec_done: 2495 andq $15,%r9 2496 jz .Lxts_dec_ret 2497.Lxts_dec_done2: 2498 movq %r9,%rdx 2499 movq %rbp,%rcx 2500 movl %r10d,%eax 2501 2502 movups (%rdi),%xmm2 2503 xorps %xmm11,%xmm2 2504 movups (%rcx),%xmm0 2505 movups 16(%rcx),%xmm1 2506 leaq 32(%rcx),%rcx 2507 xorps %xmm0,%xmm2 2508.Loop_dec1_13: 2509.byte 102,15,56,222,209 2510 decl %eax 2511 movups (%rcx),%xmm1 2512 leaq 16(%rcx),%rcx 2513 jnz .Loop_dec1_13 2514.byte 102,15,56,223,209 2515 xorps %xmm11,%xmm2 2516 movups %xmm2,(%rsi) 2517 2518.Lxts_dec_steal: 2519 movzbl 16(%rdi),%eax 2520 movzbl (%rsi),%ecx 2521 leaq 1(%rdi),%rdi 2522 movb %al,(%rsi) 2523 movb %cl,16(%rsi) 2524 leaq 1(%rsi),%rsi 2525 subq $1,%rdx 2526 jnz .Lxts_dec_steal 2527 2528 subq %r9,%rsi 2529 movq %rbp,%rcx 2530 movl %r10d,%eax 2531 2532 movups (%rsi),%xmm2 2533 xorps %xmm10,%xmm2 2534 movups (%rcx),%xmm0 2535 movups 16(%rcx),%xmm1 2536 leaq 32(%rcx),%rcx 2537 xorps %xmm0,%xmm2 2538.Loop_dec1_14: 2539.byte 102,15,56,222,209 2540 decl %eax 2541 movups (%rcx),%xmm1 2542 leaq 16(%rcx),%rcx 2543 jnz .Loop_dec1_14 2544.byte 102,15,56,223,209 2545 xorps %xmm10,%xmm2 2546 movups %xmm2,(%rsi) 2547 2548.Lxts_dec_ret: 2549 xorps %xmm0,%xmm0 2550 pxor %xmm1,%xmm1 2551 pxor %xmm2,%xmm2 2552 pxor %xmm3,%xmm3 2553 pxor %xmm4,%xmm4 2554 pxor %xmm5,%xmm5 2555 pxor %xmm6,%xmm6 2556 pxor %xmm7,%xmm7 2557 movaps %xmm0,0(%rsp) 2558 pxor %xmm8,%xmm8 2559 movaps %xmm0,16(%rsp) 2560 pxor %xmm9,%xmm9 2561 movaps %xmm0,32(%rsp) 2562 pxor %xmm10,%xmm10 2563 movaps %xmm0,48(%rsp) 2564 pxor %xmm11,%xmm11 2565 movaps %xmm0,64(%rsp) 2566 pxor %xmm12,%xmm12 2567 movaps %xmm0,80(%rsp) 2568 pxor %xmm13,%xmm13 2569 movaps %xmm0,96(%rsp) 2570 pxor %xmm14,%xmm14 2571 pxor %xmm15,%xmm15 2572 movq -8(%r11),%rbp 2573.cfi_restore %rbp 2574 leaq (%r11),%rsp 2575.cfi_def_cfa_register %rsp 2576.Lxts_dec_epilogue: 2577 .byte 0xf3,0xc3 2578.cfi_endproc 2579.size aesni_xts_decrypt,.-aesni_xts_decrypt 2580.globl aesni_ocb_encrypt 2581.type aesni_ocb_encrypt,@function 2582.align 32 2583aesni_ocb_encrypt: 2584.cfi_startproc 2585 leaq (%rsp),%rax 2586 pushq %rbx 2587.cfi_adjust_cfa_offset 8 2588.cfi_offset %rbx,-16 2589 pushq %rbp 2590.cfi_adjust_cfa_offset 8 2591.cfi_offset %rbp,-24 2592 pushq %r12 2593.cfi_adjust_cfa_offset 8 2594.cfi_offset %r12,-32 2595 pushq %r13 2596.cfi_adjust_cfa_offset 8 2597.cfi_offset %r13,-40 2598 pushq %r14 2599.cfi_adjust_cfa_offset 8 2600.cfi_offset %r14,-48 2601 movq 8(%rax),%rbx 2602 movq 8+8(%rax),%rbp 2603 2604 movl 240(%rcx),%r10d 2605 movq %rcx,%r11 2606 shll $4,%r10d 2607 movups (%rcx),%xmm9 2608 movups 16(%rcx,%r10,1),%xmm1 2609 2610 movdqu (%r9),%xmm15 2611 pxor %xmm1,%xmm9 2612 pxor %xmm1,%xmm15 2613 2614 movl $16+32,%eax 2615 leaq 32(%r11,%r10,1),%rcx 2616 movups 16(%r11),%xmm1 2617 subq %r10,%rax 2618 movq %rax,%r10 2619 2620 movdqu (%rbx),%xmm10 2621 movdqu (%rbp),%xmm8 2622 2623 testq $1,%r8 2624 jnz .Locb_enc_odd 2625 2626 bsfq %r8,%r12 2627 addq $1,%r8 2628 shlq $4,%r12 2629 movdqu (%rbx,%r12,1),%xmm7 2630 movdqu (%rdi),%xmm2 2631 leaq 16(%rdi),%rdi 2632 2633 call __ocb_encrypt1 2634 2635 movdqa %xmm7,%xmm15 2636 movups %xmm2,(%rsi) 2637 leaq 16(%rsi),%rsi 2638 subq $1,%rdx 2639 jz .Locb_enc_done 2640 2641.Locb_enc_odd: 2642 leaq 1(%r8),%r12 2643 leaq 3(%r8),%r13 2644 leaq 5(%r8),%r14 2645 leaq 6(%r8),%r8 2646 bsfq %r12,%r12 2647 bsfq %r13,%r13 2648 bsfq %r14,%r14 2649 shlq $4,%r12 2650 shlq $4,%r13 2651 shlq $4,%r14 2652 2653 subq $6,%rdx 2654 jc .Locb_enc_short 2655 jmp .Locb_enc_grandloop 2656 2657.align 32 2658.Locb_enc_grandloop: 2659 movdqu 0(%rdi),%xmm2 2660 movdqu 16(%rdi),%xmm3 2661 movdqu 32(%rdi),%xmm4 2662 movdqu 48(%rdi),%xmm5 2663 movdqu 64(%rdi),%xmm6 2664 movdqu 80(%rdi),%xmm7 2665 leaq 96(%rdi),%rdi 2666 2667 call __ocb_encrypt6 2668 2669 movups %xmm2,0(%rsi) 2670 movups %xmm3,16(%rsi) 2671 movups %xmm4,32(%rsi) 2672 movups %xmm5,48(%rsi) 2673 movups %xmm6,64(%rsi) 2674 movups %xmm7,80(%rsi) 2675 leaq 96(%rsi),%rsi 2676 subq $6,%rdx 2677 jnc .Locb_enc_grandloop 2678 2679.Locb_enc_short: 2680 addq $6,%rdx 2681 jz .Locb_enc_done 2682 2683 movdqu 0(%rdi),%xmm2 2684 cmpq $2,%rdx 2685 jb .Locb_enc_one 2686 movdqu 16(%rdi),%xmm3 2687 je .Locb_enc_two 2688 2689 movdqu 32(%rdi),%xmm4 2690 cmpq $4,%rdx 2691 jb .Locb_enc_three 2692 movdqu 48(%rdi),%xmm5 2693 je .Locb_enc_four 2694 2695 movdqu 64(%rdi),%xmm6 2696 pxor %xmm7,%xmm7 2697 2698 call __ocb_encrypt6 2699 2700 movdqa %xmm14,%xmm15 2701 movups %xmm2,0(%rsi) 2702 movups %xmm3,16(%rsi) 2703 movups %xmm4,32(%rsi) 2704 movups %xmm5,48(%rsi) 2705 movups %xmm6,64(%rsi) 2706 2707 jmp .Locb_enc_done 2708 2709.align 16 2710.Locb_enc_one: 2711 movdqa %xmm10,%xmm7 2712 2713 call __ocb_encrypt1 2714 2715 movdqa %xmm7,%xmm15 2716 movups %xmm2,0(%rsi) 2717 jmp .Locb_enc_done 2718 2719.align 16 2720.Locb_enc_two: 2721 pxor %xmm4,%xmm4 2722 pxor %xmm5,%xmm5 2723 2724 call __ocb_encrypt4 2725 2726 movdqa %xmm11,%xmm15 2727 movups %xmm2,0(%rsi) 2728 movups %xmm3,16(%rsi) 2729 2730 jmp .Locb_enc_done 2731 2732.align 16 2733.Locb_enc_three: 2734 pxor %xmm5,%xmm5 2735 2736 call __ocb_encrypt4 2737 2738 movdqa %xmm12,%xmm15 2739 movups %xmm2,0(%rsi) 2740 movups %xmm3,16(%rsi) 2741 movups %xmm4,32(%rsi) 2742 2743 jmp .Locb_enc_done 2744 2745.align 16 2746.Locb_enc_four: 2747 call __ocb_encrypt4 2748 2749 movdqa %xmm13,%xmm15 2750 movups %xmm2,0(%rsi) 2751 movups %xmm3,16(%rsi) 2752 movups %xmm4,32(%rsi) 2753 movups %xmm5,48(%rsi) 2754 2755.Locb_enc_done: 2756 pxor %xmm0,%xmm15 2757 movdqu %xmm8,(%rbp) 2758 movdqu %xmm15,(%r9) 2759 2760 xorps %xmm0,%xmm0 2761 pxor %xmm1,%xmm1 2762 pxor %xmm2,%xmm2 2763 pxor %xmm3,%xmm3 2764 pxor %xmm4,%xmm4 2765 pxor %xmm5,%xmm5 2766 pxor %xmm6,%xmm6 2767 pxor %xmm7,%xmm7 2768 pxor %xmm8,%xmm8 2769 pxor %xmm9,%xmm9 2770 pxor %xmm10,%xmm10 2771 pxor %xmm11,%xmm11 2772 pxor %xmm12,%xmm12 2773 pxor %xmm13,%xmm13 2774 pxor %xmm14,%xmm14 2775 pxor %xmm15,%xmm15 2776 leaq 40(%rsp),%rax 2777.cfi_def_cfa %rax,8 2778 movq -40(%rax),%r14 2779.cfi_restore %r14 2780 movq -32(%rax),%r13 2781.cfi_restore %r13 2782 movq -24(%rax),%r12 2783.cfi_restore %r12 2784 movq -16(%rax),%rbp 2785.cfi_restore %rbp 2786 movq -8(%rax),%rbx 2787.cfi_restore %rbx 2788 leaq (%rax),%rsp 2789.cfi_def_cfa_register %rsp 2790.Locb_enc_epilogue: 2791 .byte 0xf3,0xc3 2792.cfi_endproc 2793.size aesni_ocb_encrypt,.-aesni_ocb_encrypt 2794 2795.type __ocb_encrypt6,@function 2796.align 32 2797__ocb_encrypt6: 2798.cfi_startproc 2799 pxor %xmm9,%xmm15 2800 movdqu (%rbx,%r12,1),%xmm11 2801 movdqa %xmm10,%xmm12 2802 movdqu (%rbx,%r13,1),%xmm13 2803 movdqa %xmm10,%xmm14 2804 pxor %xmm15,%xmm10 2805 movdqu (%rbx,%r14,1),%xmm15 2806 pxor %xmm10,%xmm11 2807 pxor %xmm2,%xmm8 2808 pxor %xmm10,%xmm2 2809 pxor %xmm11,%xmm12 2810 pxor %xmm3,%xmm8 2811 pxor %xmm11,%xmm3 2812 pxor %xmm12,%xmm13 2813 pxor %xmm4,%xmm8 2814 pxor %xmm12,%xmm4 2815 pxor %xmm13,%xmm14 2816 pxor %xmm5,%xmm8 2817 pxor %xmm13,%xmm5 2818 pxor %xmm14,%xmm15 2819 pxor %xmm6,%xmm8 2820 pxor %xmm14,%xmm6 2821 pxor %xmm7,%xmm8 2822 pxor %xmm15,%xmm7 2823 movups 32(%r11),%xmm0 2824 2825 leaq 1(%r8),%r12 2826 leaq 3(%r8),%r13 2827 leaq 5(%r8),%r14 2828 addq $6,%r8 2829 pxor %xmm9,%xmm10 2830 bsfq %r12,%r12 2831 bsfq %r13,%r13 2832 bsfq %r14,%r14 2833 2834.byte 102,15,56,220,209 2835.byte 102,15,56,220,217 2836.byte 102,15,56,220,225 2837.byte 102,15,56,220,233 2838 pxor %xmm9,%xmm11 2839 pxor %xmm9,%xmm12 2840.byte 102,15,56,220,241 2841 pxor %xmm9,%xmm13 2842 pxor %xmm9,%xmm14 2843.byte 102,15,56,220,249 2844 movups 48(%r11),%xmm1 2845 pxor %xmm9,%xmm15 2846 2847.byte 102,15,56,220,208 2848.byte 102,15,56,220,216 2849.byte 102,15,56,220,224 2850.byte 102,15,56,220,232 2851.byte 102,15,56,220,240 2852.byte 102,15,56,220,248 2853 movups 64(%r11),%xmm0 2854 shlq $4,%r12 2855 shlq $4,%r13 2856 jmp .Locb_enc_loop6 2857 2858.align 32 2859.Locb_enc_loop6: 2860.byte 102,15,56,220,209 2861.byte 102,15,56,220,217 2862.byte 102,15,56,220,225 2863.byte 102,15,56,220,233 2864.byte 102,15,56,220,241 2865.byte 102,15,56,220,249 2866 movups (%rcx,%rax,1),%xmm1 2867 addq $32,%rax 2868 2869.byte 102,15,56,220,208 2870.byte 102,15,56,220,216 2871.byte 102,15,56,220,224 2872.byte 102,15,56,220,232 2873.byte 102,15,56,220,240 2874.byte 102,15,56,220,248 2875 movups -16(%rcx,%rax,1),%xmm0 2876 jnz .Locb_enc_loop6 2877 2878.byte 102,15,56,220,209 2879.byte 102,15,56,220,217 2880.byte 102,15,56,220,225 2881.byte 102,15,56,220,233 2882.byte 102,15,56,220,241 2883.byte 102,15,56,220,249 2884 movups 16(%r11),%xmm1 2885 shlq $4,%r14 2886 2887.byte 102,65,15,56,221,210 2888 movdqu (%rbx),%xmm10 2889 movq %r10,%rax 2890.byte 102,65,15,56,221,219 2891.byte 102,65,15,56,221,228 2892.byte 102,65,15,56,221,237 2893.byte 102,65,15,56,221,246 2894.byte 102,65,15,56,221,255 2895 .byte 0xf3,0xc3 2896.cfi_endproc 2897.size __ocb_encrypt6,.-__ocb_encrypt6 2898 2899.type __ocb_encrypt4,@function 2900.align 32 2901__ocb_encrypt4: 2902.cfi_startproc 2903 pxor %xmm9,%xmm15 2904 movdqu (%rbx,%r12,1),%xmm11 2905 movdqa %xmm10,%xmm12 2906 movdqu (%rbx,%r13,1),%xmm13 2907 pxor %xmm15,%xmm10 2908 pxor %xmm10,%xmm11 2909 pxor %xmm2,%xmm8 2910 pxor %xmm10,%xmm2 2911 pxor %xmm11,%xmm12 2912 pxor %xmm3,%xmm8 2913 pxor %xmm11,%xmm3 2914 pxor %xmm12,%xmm13 2915 pxor %xmm4,%xmm8 2916 pxor %xmm12,%xmm4 2917 pxor %xmm5,%xmm8 2918 pxor %xmm13,%xmm5 2919 movups 32(%r11),%xmm0 2920 2921 pxor %xmm9,%xmm10 2922 pxor %xmm9,%xmm11 2923 pxor %xmm9,%xmm12 2924 pxor %xmm9,%xmm13 2925 2926.byte 102,15,56,220,209 2927.byte 102,15,56,220,217 2928.byte 102,15,56,220,225 2929.byte 102,15,56,220,233 2930 movups 48(%r11),%xmm1 2931 2932.byte 102,15,56,220,208 2933.byte 102,15,56,220,216 2934.byte 102,15,56,220,224 2935.byte 102,15,56,220,232 2936 movups 64(%r11),%xmm0 2937 jmp .Locb_enc_loop4 2938 2939.align 32 2940.Locb_enc_loop4: 2941.byte 102,15,56,220,209 2942.byte 102,15,56,220,217 2943.byte 102,15,56,220,225 2944.byte 102,15,56,220,233 2945 movups (%rcx,%rax,1),%xmm1 2946 addq $32,%rax 2947 2948.byte 102,15,56,220,208 2949.byte 102,15,56,220,216 2950.byte 102,15,56,220,224 2951.byte 102,15,56,220,232 2952 movups -16(%rcx,%rax,1),%xmm0 2953 jnz .Locb_enc_loop4 2954 2955.byte 102,15,56,220,209 2956.byte 102,15,56,220,217 2957.byte 102,15,56,220,225 2958.byte 102,15,56,220,233 2959 movups 16(%r11),%xmm1 2960 movq %r10,%rax 2961 2962.byte 102,65,15,56,221,210 2963.byte 102,65,15,56,221,219 2964.byte 102,65,15,56,221,228 2965.byte 102,65,15,56,221,237 2966 .byte 0xf3,0xc3 2967.cfi_endproc 2968.size __ocb_encrypt4,.-__ocb_encrypt4 2969 2970.type __ocb_encrypt1,@function 2971.align 32 2972__ocb_encrypt1: 2973.cfi_startproc 2974 pxor %xmm15,%xmm7 2975 pxor %xmm9,%xmm7 2976 pxor %xmm2,%xmm8 2977 pxor %xmm7,%xmm2 2978 movups 32(%r11),%xmm0 2979 2980.byte 102,15,56,220,209 2981 movups 48(%r11),%xmm1 2982 pxor %xmm9,%xmm7 2983 2984.byte 102,15,56,220,208 2985 movups 64(%r11),%xmm0 2986 jmp .Locb_enc_loop1 2987 2988.align 32 2989.Locb_enc_loop1: 2990.byte 102,15,56,220,209 2991 movups (%rcx,%rax,1),%xmm1 2992 addq $32,%rax 2993 2994.byte 102,15,56,220,208 2995 movups -16(%rcx,%rax,1),%xmm0 2996 jnz .Locb_enc_loop1 2997 2998.byte 102,15,56,220,209 2999 movups 16(%r11),%xmm1 3000 movq %r10,%rax 3001 3002.byte 102,15,56,221,215 3003 .byte 0xf3,0xc3 3004.cfi_endproc 3005.size __ocb_encrypt1,.-__ocb_encrypt1 3006 3007.globl aesni_ocb_decrypt 3008.type aesni_ocb_decrypt,@function 3009.align 32 3010aesni_ocb_decrypt: 3011.cfi_startproc 3012 leaq (%rsp),%rax 3013 pushq %rbx 3014.cfi_adjust_cfa_offset 8 3015.cfi_offset %rbx,-16 3016 pushq %rbp 3017.cfi_adjust_cfa_offset 8 3018.cfi_offset %rbp,-24 3019 pushq %r12 3020.cfi_adjust_cfa_offset 8 3021.cfi_offset %r12,-32 3022 pushq %r13 3023.cfi_adjust_cfa_offset 8 3024.cfi_offset %r13,-40 3025 pushq %r14 3026.cfi_adjust_cfa_offset 8 3027.cfi_offset %r14,-48 3028 movq 8(%rax),%rbx 3029 movq 8+8(%rax),%rbp 3030 3031 movl 240(%rcx),%r10d 3032 movq %rcx,%r11 3033 shll $4,%r10d 3034 movups (%rcx),%xmm9 3035 movups 16(%rcx,%r10,1),%xmm1 3036 3037 movdqu (%r9),%xmm15 3038 pxor %xmm1,%xmm9 3039 pxor %xmm1,%xmm15 3040 3041 movl $16+32,%eax 3042 leaq 32(%r11,%r10,1),%rcx 3043 movups 16(%r11),%xmm1 3044 subq %r10,%rax 3045 movq %rax,%r10 3046 3047 movdqu (%rbx),%xmm10 3048 movdqu (%rbp),%xmm8 3049 3050 testq $1,%r8 3051 jnz .Locb_dec_odd 3052 3053 bsfq %r8,%r12 3054 addq $1,%r8 3055 shlq $4,%r12 3056 movdqu (%rbx,%r12,1),%xmm7 3057 movdqu (%rdi),%xmm2 3058 leaq 16(%rdi),%rdi 3059 3060 call __ocb_decrypt1 3061 3062 movdqa %xmm7,%xmm15 3063 movups %xmm2,(%rsi) 3064 xorps %xmm2,%xmm8 3065 leaq 16(%rsi),%rsi 3066 subq $1,%rdx 3067 jz .Locb_dec_done 3068 3069.Locb_dec_odd: 3070 leaq 1(%r8),%r12 3071 leaq 3(%r8),%r13 3072 leaq 5(%r8),%r14 3073 leaq 6(%r8),%r8 3074 bsfq %r12,%r12 3075 bsfq %r13,%r13 3076 bsfq %r14,%r14 3077 shlq $4,%r12 3078 shlq $4,%r13 3079 shlq $4,%r14 3080 3081 subq $6,%rdx 3082 jc .Locb_dec_short 3083 jmp .Locb_dec_grandloop 3084 3085.align 32 3086.Locb_dec_grandloop: 3087 movdqu 0(%rdi),%xmm2 3088 movdqu 16(%rdi),%xmm3 3089 movdqu 32(%rdi),%xmm4 3090 movdqu 48(%rdi),%xmm5 3091 movdqu 64(%rdi),%xmm6 3092 movdqu 80(%rdi),%xmm7 3093 leaq 96(%rdi),%rdi 3094 3095 call __ocb_decrypt6 3096 3097 movups %xmm2,0(%rsi) 3098 pxor %xmm2,%xmm8 3099 movups %xmm3,16(%rsi) 3100 pxor %xmm3,%xmm8 3101 movups %xmm4,32(%rsi) 3102 pxor %xmm4,%xmm8 3103 movups %xmm5,48(%rsi) 3104 pxor %xmm5,%xmm8 3105 movups %xmm6,64(%rsi) 3106 pxor %xmm6,%xmm8 3107 movups %xmm7,80(%rsi) 3108 pxor %xmm7,%xmm8 3109 leaq 96(%rsi),%rsi 3110 subq $6,%rdx 3111 jnc .Locb_dec_grandloop 3112 3113.Locb_dec_short: 3114 addq $6,%rdx 3115 jz .Locb_dec_done 3116 3117 movdqu 0(%rdi),%xmm2 3118 cmpq $2,%rdx 3119 jb .Locb_dec_one 3120 movdqu 16(%rdi),%xmm3 3121 je .Locb_dec_two 3122 3123 movdqu 32(%rdi),%xmm4 3124 cmpq $4,%rdx 3125 jb .Locb_dec_three 3126 movdqu 48(%rdi),%xmm5 3127 je .Locb_dec_four 3128 3129 movdqu 64(%rdi),%xmm6 3130 pxor %xmm7,%xmm7 3131 3132 call __ocb_decrypt6 3133 3134 movdqa %xmm14,%xmm15 3135 movups %xmm2,0(%rsi) 3136 pxor %xmm2,%xmm8 3137 movups %xmm3,16(%rsi) 3138 pxor %xmm3,%xmm8 3139 movups %xmm4,32(%rsi) 3140 pxor %xmm4,%xmm8 3141 movups %xmm5,48(%rsi) 3142 pxor %xmm5,%xmm8 3143 movups %xmm6,64(%rsi) 3144 pxor %xmm6,%xmm8 3145 3146 jmp .Locb_dec_done 3147 3148.align 16 3149.Locb_dec_one: 3150 movdqa %xmm10,%xmm7 3151 3152 call __ocb_decrypt1 3153 3154 movdqa %xmm7,%xmm15 3155 movups %xmm2,0(%rsi) 3156 xorps %xmm2,%xmm8 3157 jmp .Locb_dec_done 3158 3159.align 16 3160.Locb_dec_two: 3161 pxor %xmm4,%xmm4 3162 pxor %xmm5,%xmm5 3163 3164 call __ocb_decrypt4 3165 3166 movdqa %xmm11,%xmm15 3167 movups %xmm2,0(%rsi) 3168 xorps %xmm2,%xmm8 3169 movups %xmm3,16(%rsi) 3170 xorps %xmm3,%xmm8 3171 3172 jmp .Locb_dec_done 3173 3174.align 16 3175.Locb_dec_three: 3176 pxor %xmm5,%xmm5 3177 3178 call __ocb_decrypt4 3179 3180 movdqa %xmm12,%xmm15 3181 movups %xmm2,0(%rsi) 3182 xorps %xmm2,%xmm8 3183 movups %xmm3,16(%rsi) 3184 xorps %xmm3,%xmm8 3185 movups %xmm4,32(%rsi) 3186 xorps %xmm4,%xmm8 3187 3188 jmp .Locb_dec_done 3189 3190.align 16 3191.Locb_dec_four: 3192 call __ocb_decrypt4 3193 3194 movdqa %xmm13,%xmm15 3195 movups %xmm2,0(%rsi) 3196 pxor %xmm2,%xmm8 3197 movups %xmm3,16(%rsi) 3198 pxor %xmm3,%xmm8 3199 movups %xmm4,32(%rsi) 3200 pxor %xmm4,%xmm8 3201 movups %xmm5,48(%rsi) 3202 pxor %xmm5,%xmm8 3203 3204.Locb_dec_done: 3205 pxor %xmm0,%xmm15 3206 movdqu %xmm8,(%rbp) 3207 movdqu %xmm15,(%r9) 3208 3209 xorps %xmm0,%xmm0 3210 pxor %xmm1,%xmm1 3211 pxor %xmm2,%xmm2 3212 pxor %xmm3,%xmm3 3213 pxor %xmm4,%xmm4 3214 pxor %xmm5,%xmm5 3215 pxor %xmm6,%xmm6 3216 pxor %xmm7,%xmm7 3217 pxor %xmm8,%xmm8 3218 pxor %xmm9,%xmm9 3219 pxor %xmm10,%xmm10 3220 pxor %xmm11,%xmm11 3221 pxor %xmm12,%xmm12 3222 pxor %xmm13,%xmm13 3223 pxor %xmm14,%xmm14 3224 pxor %xmm15,%xmm15 3225 leaq 40(%rsp),%rax 3226.cfi_def_cfa %rax,8 3227 movq -40(%rax),%r14 3228.cfi_restore %r14 3229 movq -32(%rax),%r13 3230.cfi_restore %r13 3231 movq -24(%rax),%r12 3232.cfi_restore %r12 3233 movq -16(%rax),%rbp 3234.cfi_restore %rbp 3235 movq -8(%rax),%rbx 3236.cfi_restore %rbx 3237 leaq (%rax),%rsp 3238.cfi_def_cfa_register %rsp 3239.Locb_dec_epilogue: 3240 .byte 0xf3,0xc3 3241.cfi_endproc 3242.size aesni_ocb_decrypt,.-aesni_ocb_decrypt 3243 3244.type __ocb_decrypt6,@function 3245.align 32 3246__ocb_decrypt6: 3247.cfi_startproc 3248 pxor %xmm9,%xmm15 3249 movdqu (%rbx,%r12,1),%xmm11 3250 movdqa %xmm10,%xmm12 3251 movdqu (%rbx,%r13,1),%xmm13 3252 movdqa %xmm10,%xmm14 3253 pxor %xmm15,%xmm10 3254 movdqu (%rbx,%r14,1),%xmm15 3255 pxor %xmm10,%xmm11 3256 pxor %xmm10,%xmm2 3257 pxor %xmm11,%xmm12 3258 pxor %xmm11,%xmm3 3259 pxor %xmm12,%xmm13 3260 pxor %xmm12,%xmm4 3261 pxor %xmm13,%xmm14 3262 pxor %xmm13,%xmm5 3263 pxor %xmm14,%xmm15 3264 pxor %xmm14,%xmm6 3265 pxor %xmm15,%xmm7 3266 movups 32(%r11),%xmm0 3267 3268 leaq 1(%r8),%r12 3269 leaq 3(%r8),%r13 3270 leaq 5(%r8),%r14 3271 addq $6,%r8 3272 pxor %xmm9,%xmm10 3273 bsfq %r12,%r12 3274 bsfq %r13,%r13 3275 bsfq %r14,%r14 3276 3277.byte 102,15,56,222,209 3278.byte 102,15,56,222,217 3279.byte 102,15,56,222,225 3280.byte 102,15,56,222,233 3281 pxor %xmm9,%xmm11 3282 pxor %xmm9,%xmm12 3283.byte 102,15,56,222,241 3284 pxor %xmm9,%xmm13 3285 pxor %xmm9,%xmm14 3286.byte 102,15,56,222,249 3287 movups 48(%r11),%xmm1 3288 pxor %xmm9,%xmm15 3289 3290.byte 102,15,56,222,208 3291.byte 102,15,56,222,216 3292.byte 102,15,56,222,224 3293.byte 102,15,56,222,232 3294.byte 102,15,56,222,240 3295.byte 102,15,56,222,248 3296 movups 64(%r11),%xmm0 3297 shlq $4,%r12 3298 shlq $4,%r13 3299 jmp .Locb_dec_loop6 3300 3301.align 32 3302.Locb_dec_loop6: 3303.byte 102,15,56,222,209 3304.byte 102,15,56,222,217 3305.byte 102,15,56,222,225 3306.byte 102,15,56,222,233 3307.byte 102,15,56,222,241 3308.byte 102,15,56,222,249 3309 movups (%rcx,%rax,1),%xmm1 3310 addq $32,%rax 3311 3312.byte 102,15,56,222,208 3313.byte 102,15,56,222,216 3314.byte 102,15,56,222,224 3315.byte 102,15,56,222,232 3316.byte 102,15,56,222,240 3317.byte 102,15,56,222,248 3318 movups -16(%rcx,%rax,1),%xmm0 3319 jnz .Locb_dec_loop6 3320 3321.byte 102,15,56,222,209 3322.byte 102,15,56,222,217 3323.byte 102,15,56,222,225 3324.byte 102,15,56,222,233 3325.byte 102,15,56,222,241 3326.byte 102,15,56,222,249 3327 movups 16(%r11),%xmm1 3328 shlq $4,%r14 3329 3330.byte 102,65,15,56,223,210 3331 movdqu (%rbx),%xmm10 3332 movq %r10,%rax 3333.byte 102,65,15,56,223,219 3334.byte 102,65,15,56,223,228 3335.byte 102,65,15,56,223,237 3336.byte 102,65,15,56,223,246 3337.byte 102,65,15,56,223,255 3338 .byte 0xf3,0xc3 3339.cfi_endproc 3340.size __ocb_decrypt6,.-__ocb_decrypt6 3341 3342.type __ocb_decrypt4,@function 3343.align 32 3344__ocb_decrypt4: 3345.cfi_startproc 3346 pxor %xmm9,%xmm15 3347 movdqu (%rbx,%r12,1),%xmm11 3348 movdqa %xmm10,%xmm12 3349 movdqu (%rbx,%r13,1),%xmm13 3350 pxor %xmm15,%xmm10 3351 pxor %xmm10,%xmm11 3352 pxor %xmm10,%xmm2 3353 pxor %xmm11,%xmm12 3354 pxor %xmm11,%xmm3 3355 pxor %xmm12,%xmm13 3356 pxor %xmm12,%xmm4 3357 pxor %xmm13,%xmm5 3358 movups 32(%r11),%xmm0 3359 3360 pxor %xmm9,%xmm10 3361 pxor %xmm9,%xmm11 3362 pxor %xmm9,%xmm12 3363 pxor %xmm9,%xmm13 3364 3365.byte 102,15,56,222,209 3366.byte 102,15,56,222,217 3367.byte 102,15,56,222,225 3368.byte 102,15,56,222,233 3369 movups 48(%r11),%xmm1 3370 3371.byte 102,15,56,222,208 3372.byte 102,15,56,222,216 3373.byte 102,15,56,222,224 3374.byte 102,15,56,222,232 3375 movups 64(%r11),%xmm0 3376 jmp .Locb_dec_loop4 3377 3378.align 32 3379.Locb_dec_loop4: 3380.byte 102,15,56,222,209 3381.byte 102,15,56,222,217 3382.byte 102,15,56,222,225 3383.byte 102,15,56,222,233 3384 movups (%rcx,%rax,1),%xmm1 3385 addq $32,%rax 3386 3387.byte 102,15,56,222,208 3388.byte 102,15,56,222,216 3389.byte 102,15,56,222,224 3390.byte 102,15,56,222,232 3391 movups -16(%rcx,%rax,1),%xmm0 3392 jnz .Locb_dec_loop4 3393 3394.byte 102,15,56,222,209 3395.byte 102,15,56,222,217 3396.byte 102,15,56,222,225 3397.byte 102,15,56,222,233 3398 movups 16(%r11),%xmm1 3399 movq %r10,%rax 3400 3401.byte 102,65,15,56,223,210 3402.byte 102,65,15,56,223,219 3403.byte 102,65,15,56,223,228 3404.byte 102,65,15,56,223,237 3405 .byte 0xf3,0xc3 3406.cfi_endproc 3407.size __ocb_decrypt4,.-__ocb_decrypt4 3408 3409.type __ocb_decrypt1,@function 3410.align 32 3411__ocb_decrypt1: 3412.cfi_startproc 3413 pxor %xmm15,%xmm7 3414 pxor %xmm9,%xmm7 3415 pxor %xmm7,%xmm2 3416 movups 32(%r11),%xmm0 3417 3418.byte 102,15,56,222,209 3419 movups 48(%r11),%xmm1 3420 pxor %xmm9,%xmm7 3421 3422.byte 102,15,56,222,208 3423 movups 64(%r11),%xmm0 3424 jmp .Locb_dec_loop1 3425 3426.align 32 3427.Locb_dec_loop1: 3428.byte 102,15,56,222,209 3429 movups (%rcx,%rax,1),%xmm1 3430 addq $32,%rax 3431 3432.byte 102,15,56,222,208 3433 movups -16(%rcx,%rax,1),%xmm0 3434 jnz .Locb_dec_loop1 3435 3436.byte 102,15,56,222,209 3437 movups 16(%r11),%xmm1 3438 movq %r10,%rax 3439 3440.byte 102,15,56,223,215 3441 .byte 0xf3,0xc3 3442.cfi_endproc 3443.size __ocb_decrypt1,.-__ocb_decrypt1 3444.globl aesni_cbc_encrypt 3445.type aesni_cbc_encrypt,@function 3446.align 16 3447aesni_cbc_encrypt: 3448.cfi_startproc 3449 testq %rdx,%rdx 3450 jz .Lcbc_ret 3451 3452 movl 240(%rcx),%r10d 3453 movq %rcx,%r11 3454 testl %r9d,%r9d 3455 jz .Lcbc_decrypt 3456 3457 movups (%r8),%xmm2 3458 movl %r10d,%eax 3459 cmpq $16,%rdx 3460 jb .Lcbc_enc_tail 3461 subq $16,%rdx 3462 jmp .Lcbc_enc_loop 3463.align 16 3464.Lcbc_enc_loop: 3465 movups (%rdi),%xmm3 3466 leaq 16(%rdi),%rdi 3467 3468 movups (%rcx),%xmm0 3469 movups 16(%rcx),%xmm1 3470 xorps %xmm0,%xmm3 3471 leaq 32(%rcx),%rcx 3472 xorps %xmm3,%xmm2 3473.Loop_enc1_15: 3474.byte 102,15,56,220,209 3475 decl %eax 3476 movups (%rcx),%xmm1 3477 leaq 16(%rcx),%rcx 3478 jnz .Loop_enc1_15 3479.byte 102,15,56,221,209 3480 movl %r10d,%eax 3481 movq %r11,%rcx 3482 movups %xmm2,0(%rsi) 3483 leaq 16(%rsi),%rsi 3484 subq $16,%rdx 3485 jnc .Lcbc_enc_loop 3486 addq $16,%rdx 3487 jnz .Lcbc_enc_tail 3488 pxor %xmm0,%xmm0 3489 pxor %xmm1,%xmm1 3490 movups %xmm2,(%r8) 3491 pxor %xmm2,%xmm2 3492 pxor %xmm3,%xmm3 3493 jmp .Lcbc_ret 3494 3495.Lcbc_enc_tail: 3496 movq %rdx,%rcx 3497 xchgq %rdi,%rsi 3498.long 0x9066A4F3 3499 movl $16,%ecx 3500 subq %rdx,%rcx 3501 xorl %eax,%eax 3502.long 0x9066AAF3 3503 leaq -16(%rdi),%rdi 3504 movl %r10d,%eax 3505 movq %rdi,%rsi 3506 movq %r11,%rcx 3507 xorq %rdx,%rdx 3508 jmp .Lcbc_enc_loop 3509 3510.align 16 3511.Lcbc_decrypt: 3512 cmpq $16,%rdx 3513 jne .Lcbc_decrypt_bulk 3514 3515 3516 3517 movdqu (%rdi),%xmm2 3518 movdqu (%r8),%xmm3 3519 movdqa %xmm2,%xmm4 3520 movups (%rcx),%xmm0 3521 movups 16(%rcx),%xmm1 3522 leaq 32(%rcx),%rcx 3523 xorps %xmm0,%xmm2 3524.Loop_dec1_16: 3525.byte 102,15,56,222,209 3526 decl %r10d 3527 movups (%rcx),%xmm1 3528 leaq 16(%rcx),%rcx 3529 jnz .Loop_dec1_16 3530.byte 102,15,56,223,209 3531 pxor %xmm0,%xmm0 3532 pxor %xmm1,%xmm1 3533 movdqu %xmm4,(%r8) 3534 xorps %xmm3,%xmm2 3535 pxor %xmm3,%xmm3 3536 movups %xmm2,(%rsi) 3537 pxor %xmm2,%xmm2 3538 jmp .Lcbc_ret 3539.align 16 3540.Lcbc_decrypt_bulk: 3541 leaq (%rsp),%r11 3542.cfi_def_cfa_register %r11 3543 pushq %rbp 3544.cfi_offset %rbp,-16 3545 subq $16,%rsp 3546 andq $-16,%rsp 3547 movq %rcx,%rbp 3548 movups (%r8),%xmm10 3549 movl %r10d,%eax 3550 cmpq $0x50,%rdx 3551 jbe .Lcbc_dec_tail 3552 3553 movups (%rcx),%xmm0 3554 movdqu 0(%rdi),%xmm2 3555 movdqu 16(%rdi),%xmm3 3556 movdqa %xmm2,%xmm11 3557 movdqu 32(%rdi),%xmm4 3558 movdqa %xmm3,%xmm12 3559 movdqu 48(%rdi),%xmm5 3560 movdqa %xmm4,%xmm13 3561 movdqu 64(%rdi),%xmm6 3562 movdqa %xmm5,%xmm14 3563 movdqu 80(%rdi),%xmm7 3564 movdqa %xmm6,%xmm15 3565 movl OPENSSL_ia32cap_P+4(%rip),%r9d 3566 cmpq $0x70,%rdx 3567 jbe .Lcbc_dec_six_or_seven 3568 3569 andl $71303168,%r9d 3570 subq $0x50,%rdx 3571 cmpl $4194304,%r9d 3572 je .Lcbc_dec_loop6_enter 3573 subq $0x20,%rdx 3574 leaq 112(%rcx),%rcx 3575 jmp .Lcbc_dec_loop8_enter 3576.align 16 3577.Lcbc_dec_loop8: 3578 movups %xmm9,(%rsi) 3579 leaq 16(%rsi),%rsi 3580.Lcbc_dec_loop8_enter: 3581 movdqu 96(%rdi),%xmm8 3582 pxor %xmm0,%xmm2 3583 movdqu 112(%rdi),%xmm9 3584 pxor %xmm0,%xmm3 3585 movups 16-112(%rcx),%xmm1 3586 pxor %xmm0,%xmm4 3587 movq $-1,%rbp 3588 cmpq $0x70,%rdx 3589 pxor %xmm0,%xmm5 3590 pxor %xmm0,%xmm6 3591 pxor %xmm0,%xmm7 3592 pxor %xmm0,%xmm8 3593 3594.byte 102,15,56,222,209 3595 pxor %xmm0,%xmm9 3596 movups 32-112(%rcx),%xmm0 3597.byte 102,15,56,222,217 3598.byte 102,15,56,222,225 3599.byte 102,15,56,222,233 3600.byte 102,15,56,222,241 3601.byte 102,15,56,222,249 3602.byte 102,68,15,56,222,193 3603 adcq $0,%rbp 3604 andq $128,%rbp 3605.byte 102,68,15,56,222,201 3606 addq %rdi,%rbp 3607 movups 48-112(%rcx),%xmm1 3608.byte 102,15,56,222,208 3609.byte 102,15,56,222,216 3610.byte 102,15,56,222,224 3611.byte 102,15,56,222,232 3612.byte 102,15,56,222,240 3613.byte 102,15,56,222,248 3614.byte 102,68,15,56,222,192 3615.byte 102,68,15,56,222,200 3616 movups 64-112(%rcx),%xmm0 3617 nop 3618.byte 102,15,56,222,209 3619.byte 102,15,56,222,217 3620.byte 102,15,56,222,225 3621.byte 102,15,56,222,233 3622.byte 102,15,56,222,241 3623.byte 102,15,56,222,249 3624.byte 102,68,15,56,222,193 3625.byte 102,68,15,56,222,201 3626 movups 80-112(%rcx),%xmm1 3627 nop 3628.byte 102,15,56,222,208 3629.byte 102,15,56,222,216 3630.byte 102,15,56,222,224 3631.byte 102,15,56,222,232 3632.byte 102,15,56,222,240 3633.byte 102,15,56,222,248 3634.byte 102,68,15,56,222,192 3635.byte 102,68,15,56,222,200 3636 movups 96-112(%rcx),%xmm0 3637 nop 3638.byte 102,15,56,222,209 3639.byte 102,15,56,222,217 3640.byte 102,15,56,222,225 3641.byte 102,15,56,222,233 3642.byte 102,15,56,222,241 3643.byte 102,15,56,222,249 3644.byte 102,68,15,56,222,193 3645.byte 102,68,15,56,222,201 3646 movups 112-112(%rcx),%xmm1 3647 nop 3648.byte 102,15,56,222,208 3649.byte 102,15,56,222,216 3650.byte 102,15,56,222,224 3651.byte 102,15,56,222,232 3652.byte 102,15,56,222,240 3653.byte 102,15,56,222,248 3654.byte 102,68,15,56,222,192 3655.byte 102,68,15,56,222,200 3656 movups 128-112(%rcx),%xmm0 3657 nop 3658.byte 102,15,56,222,209 3659.byte 102,15,56,222,217 3660.byte 102,15,56,222,225 3661.byte 102,15,56,222,233 3662.byte 102,15,56,222,241 3663.byte 102,15,56,222,249 3664.byte 102,68,15,56,222,193 3665.byte 102,68,15,56,222,201 3666 movups 144-112(%rcx),%xmm1 3667 cmpl $11,%eax 3668.byte 102,15,56,222,208 3669.byte 102,15,56,222,216 3670.byte 102,15,56,222,224 3671.byte 102,15,56,222,232 3672.byte 102,15,56,222,240 3673.byte 102,15,56,222,248 3674.byte 102,68,15,56,222,192 3675.byte 102,68,15,56,222,200 3676 movups 160-112(%rcx),%xmm0 3677 jb .Lcbc_dec_done 3678.byte 102,15,56,222,209 3679.byte 102,15,56,222,217 3680.byte 102,15,56,222,225 3681.byte 102,15,56,222,233 3682.byte 102,15,56,222,241 3683.byte 102,15,56,222,249 3684.byte 102,68,15,56,222,193 3685.byte 102,68,15,56,222,201 3686 movups 176-112(%rcx),%xmm1 3687 nop 3688.byte 102,15,56,222,208 3689.byte 102,15,56,222,216 3690.byte 102,15,56,222,224 3691.byte 102,15,56,222,232 3692.byte 102,15,56,222,240 3693.byte 102,15,56,222,248 3694.byte 102,68,15,56,222,192 3695.byte 102,68,15,56,222,200 3696 movups 192-112(%rcx),%xmm0 3697 je .Lcbc_dec_done 3698.byte 102,15,56,222,209 3699.byte 102,15,56,222,217 3700.byte 102,15,56,222,225 3701.byte 102,15,56,222,233 3702.byte 102,15,56,222,241 3703.byte 102,15,56,222,249 3704.byte 102,68,15,56,222,193 3705.byte 102,68,15,56,222,201 3706 movups 208-112(%rcx),%xmm1 3707 nop 3708.byte 102,15,56,222,208 3709.byte 102,15,56,222,216 3710.byte 102,15,56,222,224 3711.byte 102,15,56,222,232 3712.byte 102,15,56,222,240 3713.byte 102,15,56,222,248 3714.byte 102,68,15,56,222,192 3715.byte 102,68,15,56,222,200 3716 movups 224-112(%rcx),%xmm0 3717 jmp .Lcbc_dec_done 3718.align 16 3719.Lcbc_dec_done: 3720.byte 102,15,56,222,209 3721.byte 102,15,56,222,217 3722 pxor %xmm0,%xmm10 3723 pxor %xmm0,%xmm11 3724.byte 102,15,56,222,225 3725.byte 102,15,56,222,233 3726 pxor %xmm0,%xmm12 3727 pxor %xmm0,%xmm13 3728.byte 102,15,56,222,241 3729.byte 102,15,56,222,249 3730 pxor %xmm0,%xmm14 3731 pxor %xmm0,%xmm15 3732.byte 102,68,15,56,222,193 3733.byte 102,68,15,56,222,201 3734 movdqu 80(%rdi),%xmm1 3735 3736.byte 102,65,15,56,223,210 3737 movdqu 96(%rdi),%xmm10 3738 pxor %xmm0,%xmm1 3739.byte 102,65,15,56,223,219 3740 pxor %xmm0,%xmm10 3741 movdqu 112(%rdi),%xmm0 3742.byte 102,65,15,56,223,228 3743 leaq 128(%rdi),%rdi 3744 movdqu 0(%rbp),%xmm11 3745.byte 102,65,15,56,223,237 3746.byte 102,65,15,56,223,246 3747 movdqu 16(%rbp),%xmm12 3748 movdqu 32(%rbp),%xmm13 3749.byte 102,65,15,56,223,255 3750.byte 102,68,15,56,223,193 3751 movdqu 48(%rbp),%xmm14 3752 movdqu 64(%rbp),%xmm15 3753.byte 102,69,15,56,223,202 3754 movdqa %xmm0,%xmm10 3755 movdqu 80(%rbp),%xmm1 3756 movups -112(%rcx),%xmm0 3757 3758 movups %xmm2,(%rsi) 3759 movdqa %xmm11,%xmm2 3760 movups %xmm3,16(%rsi) 3761 movdqa %xmm12,%xmm3 3762 movups %xmm4,32(%rsi) 3763 movdqa %xmm13,%xmm4 3764 movups %xmm5,48(%rsi) 3765 movdqa %xmm14,%xmm5 3766 movups %xmm6,64(%rsi) 3767 movdqa %xmm15,%xmm6 3768 movups %xmm7,80(%rsi) 3769 movdqa %xmm1,%xmm7 3770 movups %xmm8,96(%rsi) 3771 leaq 112(%rsi),%rsi 3772 3773 subq $0x80,%rdx 3774 ja .Lcbc_dec_loop8 3775 3776 movaps %xmm9,%xmm2 3777 leaq -112(%rcx),%rcx 3778 addq $0x70,%rdx 3779 jle .Lcbc_dec_clear_tail_collected 3780 movups %xmm9,(%rsi) 3781 leaq 16(%rsi),%rsi 3782 cmpq $0x50,%rdx 3783 jbe .Lcbc_dec_tail 3784 3785 movaps %xmm11,%xmm2 3786.Lcbc_dec_six_or_seven: 3787 cmpq $0x60,%rdx 3788 ja .Lcbc_dec_seven 3789 3790 movaps %xmm7,%xmm8 3791 call _aesni_decrypt6 3792 pxor %xmm10,%xmm2 3793 movaps %xmm8,%xmm10 3794 pxor %xmm11,%xmm3 3795 movdqu %xmm2,(%rsi) 3796 pxor %xmm12,%xmm4 3797 movdqu %xmm3,16(%rsi) 3798 pxor %xmm3,%xmm3 3799 pxor %xmm13,%xmm5 3800 movdqu %xmm4,32(%rsi) 3801 pxor %xmm4,%xmm4 3802 pxor %xmm14,%xmm6 3803 movdqu %xmm5,48(%rsi) 3804 pxor %xmm5,%xmm5 3805 pxor %xmm15,%xmm7 3806 movdqu %xmm6,64(%rsi) 3807 pxor %xmm6,%xmm6 3808 leaq 80(%rsi),%rsi 3809 movdqa %xmm7,%xmm2 3810 pxor %xmm7,%xmm7 3811 jmp .Lcbc_dec_tail_collected 3812 3813.align 16 3814.Lcbc_dec_seven: 3815 movups 96(%rdi),%xmm8 3816 xorps %xmm9,%xmm9 3817 call _aesni_decrypt8 3818 movups 80(%rdi),%xmm9 3819 pxor %xmm10,%xmm2 3820 movups 96(%rdi),%xmm10 3821 pxor %xmm11,%xmm3 3822 movdqu %xmm2,(%rsi) 3823 pxor %xmm12,%xmm4 3824 movdqu %xmm3,16(%rsi) 3825 pxor %xmm3,%xmm3 3826 pxor %xmm13,%xmm5 3827 movdqu %xmm4,32(%rsi) 3828 pxor %xmm4,%xmm4 3829 pxor %xmm14,%xmm6 3830 movdqu %xmm5,48(%rsi) 3831 pxor %xmm5,%xmm5 3832 pxor %xmm15,%xmm7 3833 movdqu %xmm6,64(%rsi) 3834 pxor %xmm6,%xmm6 3835 pxor %xmm9,%xmm8 3836 movdqu %xmm7,80(%rsi) 3837 pxor %xmm7,%xmm7 3838 leaq 96(%rsi),%rsi 3839 movdqa %xmm8,%xmm2 3840 pxor %xmm8,%xmm8 3841 pxor %xmm9,%xmm9 3842 jmp .Lcbc_dec_tail_collected 3843 3844.align 16 3845.Lcbc_dec_loop6: 3846 movups %xmm7,(%rsi) 3847 leaq 16(%rsi),%rsi 3848 movdqu 0(%rdi),%xmm2 3849 movdqu 16(%rdi),%xmm3 3850 movdqa %xmm2,%xmm11 3851 movdqu 32(%rdi),%xmm4 3852 movdqa %xmm3,%xmm12 3853 movdqu 48(%rdi),%xmm5 3854 movdqa %xmm4,%xmm13 3855 movdqu 64(%rdi),%xmm6 3856 movdqa %xmm5,%xmm14 3857 movdqu 80(%rdi),%xmm7 3858 movdqa %xmm6,%xmm15 3859.Lcbc_dec_loop6_enter: 3860 leaq 96(%rdi),%rdi 3861 movdqa %xmm7,%xmm8 3862 3863 call _aesni_decrypt6 3864 3865 pxor %xmm10,%xmm2 3866 movdqa %xmm8,%xmm10 3867 pxor %xmm11,%xmm3 3868 movdqu %xmm2,(%rsi) 3869 pxor %xmm12,%xmm4 3870 movdqu %xmm3,16(%rsi) 3871 pxor %xmm13,%xmm5 3872 movdqu %xmm4,32(%rsi) 3873 pxor %xmm14,%xmm6 3874 movq %rbp,%rcx 3875 movdqu %xmm5,48(%rsi) 3876 pxor %xmm15,%xmm7 3877 movl %r10d,%eax 3878 movdqu %xmm6,64(%rsi) 3879 leaq 80(%rsi),%rsi 3880 subq $0x60,%rdx 3881 ja .Lcbc_dec_loop6 3882 3883 movdqa %xmm7,%xmm2 3884 addq $0x50,%rdx 3885 jle .Lcbc_dec_clear_tail_collected 3886 movups %xmm7,(%rsi) 3887 leaq 16(%rsi),%rsi 3888 3889.Lcbc_dec_tail: 3890 movups (%rdi),%xmm2 3891 subq $0x10,%rdx 3892 jbe .Lcbc_dec_one 3893 3894 movups 16(%rdi),%xmm3 3895 movaps %xmm2,%xmm11 3896 subq $0x10,%rdx 3897 jbe .Lcbc_dec_two 3898 3899 movups 32(%rdi),%xmm4 3900 movaps %xmm3,%xmm12 3901 subq $0x10,%rdx 3902 jbe .Lcbc_dec_three 3903 3904 movups 48(%rdi),%xmm5 3905 movaps %xmm4,%xmm13 3906 subq $0x10,%rdx 3907 jbe .Lcbc_dec_four 3908 3909 movups 64(%rdi),%xmm6 3910 movaps %xmm5,%xmm14 3911 movaps %xmm6,%xmm15 3912 xorps %xmm7,%xmm7 3913 call _aesni_decrypt6 3914 pxor %xmm10,%xmm2 3915 movaps %xmm15,%xmm10 3916 pxor %xmm11,%xmm3 3917 movdqu %xmm2,(%rsi) 3918 pxor %xmm12,%xmm4 3919 movdqu %xmm3,16(%rsi) 3920 pxor %xmm3,%xmm3 3921 pxor %xmm13,%xmm5 3922 movdqu %xmm4,32(%rsi) 3923 pxor %xmm4,%xmm4 3924 pxor %xmm14,%xmm6 3925 movdqu %xmm5,48(%rsi) 3926 pxor %xmm5,%xmm5 3927 leaq 64(%rsi),%rsi 3928 movdqa %xmm6,%xmm2 3929 pxor %xmm6,%xmm6 3930 pxor %xmm7,%xmm7 3931 subq $0x10,%rdx 3932 jmp .Lcbc_dec_tail_collected 3933 3934.align 16 3935.Lcbc_dec_one: 3936 movaps %xmm2,%xmm11 3937 movups (%rcx),%xmm0 3938 movups 16(%rcx),%xmm1 3939 leaq 32(%rcx),%rcx 3940 xorps %xmm0,%xmm2 3941.Loop_dec1_17: 3942.byte 102,15,56,222,209 3943 decl %eax 3944 movups (%rcx),%xmm1 3945 leaq 16(%rcx),%rcx 3946 jnz .Loop_dec1_17 3947.byte 102,15,56,223,209 3948 xorps %xmm10,%xmm2 3949 movaps %xmm11,%xmm10 3950 jmp .Lcbc_dec_tail_collected 3951.align 16 3952.Lcbc_dec_two: 3953 movaps %xmm3,%xmm12 3954 call _aesni_decrypt2 3955 pxor %xmm10,%xmm2 3956 movaps %xmm12,%xmm10 3957 pxor %xmm11,%xmm3 3958 movdqu %xmm2,(%rsi) 3959 movdqa %xmm3,%xmm2 3960 pxor %xmm3,%xmm3 3961 leaq 16(%rsi),%rsi 3962 jmp .Lcbc_dec_tail_collected 3963.align 16 3964.Lcbc_dec_three: 3965 movaps %xmm4,%xmm13 3966 call _aesni_decrypt3 3967 pxor %xmm10,%xmm2 3968 movaps %xmm13,%xmm10 3969 pxor %xmm11,%xmm3 3970 movdqu %xmm2,(%rsi) 3971 pxor %xmm12,%xmm4 3972 movdqu %xmm3,16(%rsi) 3973 pxor %xmm3,%xmm3 3974 movdqa %xmm4,%xmm2 3975 pxor %xmm4,%xmm4 3976 leaq 32(%rsi),%rsi 3977 jmp .Lcbc_dec_tail_collected 3978.align 16 3979.Lcbc_dec_four: 3980 movaps %xmm5,%xmm14 3981 call _aesni_decrypt4 3982 pxor %xmm10,%xmm2 3983 movaps %xmm14,%xmm10 3984 pxor %xmm11,%xmm3 3985 movdqu %xmm2,(%rsi) 3986 pxor %xmm12,%xmm4 3987 movdqu %xmm3,16(%rsi) 3988 pxor %xmm3,%xmm3 3989 pxor %xmm13,%xmm5 3990 movdqu %xmm4,32(%rsi) 3991 pxor %xmm4,%xmm4 3992 movdqa %xmm5,%xmm2 3993 pxor %xmm5,%xmm5 3994 leaq 48(%rsi),%rsi 3995 jmp .Lcbc_dec_tail_collected 3996 3997.align 16 3998.Lcbc_dec_clear_tail_collected: 3999 pxor %xmm3,%xmm3 4000 pxor %xmm4,%xmm4 4001 pxor %xmm5,%xmm5 4002 pxor %xmm6,%xmm6 4003 pxor %xmm7,%xmm7 4004 pxor %xmm8,%xmm8 4005 pxor %xmm9,%xmm9 4006.Lcbc_dec_tail_collected: 4007 movups %xmm10,(%r8) 4008 andq $15,%rdx 4009 jnz .Lcbc_dec_tail_partial 4010 movups %xmm2,(%rsi) 4011 pxor %xmm2,%xmm2 4012 jmp .Lcbc_dec_ret 4013.align 16 4014.Lcbc_dec_tail_partial: 4015 movaps %xmm2,(%rsp) 4016 pxor %xmm2,%xmm2 4017 movq $16,%rcx 4018 movq %rsi,%rdi 4019 subq %rdx,%rcx 4020 leaq (%rsp),%rsi 4021.long 0x9066A4F3 4022 movdqa %xmm2,(%rsp) 4023 4024.Lcbc_dec_ret: 4025 xorps %xmm0,%xmm0 4026 pxor %xmm1,%xmm1 4027 movq -8(%r11),%rbp 4028.cfi_restore %rbp 4029 leaq (%r11),%rsp 4030.cfi_def_cfa_register %rsp 4031.Lcbc_ret: 4032 .byte 0xf3,0xc3 4033.cfi_endproc 4034.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 4035.globl aesni_set_decrypt_key 4036.type aesni_set_decrypt_key,@function 4037.align 16 4038aesni_set_decrypt_key: 4039.cfi_startproc 4040.byte 0x48,0x83,0xEC,0x08 4041.cfi_adjust_cfa_offset 8 4042 call __aesni_set_encrypt_key 4043 shll $4,%esi 4044 testl %eax,%eax 4045 jnz .Ldec_key_ret 4046 leaq 16(%rdx,%rsi,1),%rdi 4047 4048 movups (%rdx),%xmm0 4049 movups (%rdi),%xmm1 4050 movups %xmm0,(%rdi) 4051 movups %xmm1,(%rdx) 4052 leaq 16(%rdx),%rdx 4053 leaq -16(%rdi),%rdi 4054 4055.Ldec_key_inverse: 4056 movups (%rdx),%xmm0 4057 movups (%rdi),%xmm1 4058.byte 102,15,56,219,192 4059.byte 102,15,56,219,201 4060 leaq 16(%rdx),%rdx 4061 leaq -16(%rdi),%rdi 4062 movups %xmm0,16(%rdi) 4063 movups %xmm1,-16(%rdx) 4064 cmpq %rdx,%rdi 4065 ja .Ldec_key_inverse 4066 4067 movups (%rdx),%xmm0 4068.byte 102,15,56,219,192 4069 pxor %xmm1,%xmm1 4070 movups %xmm0,(%rdi) 4071 pxor %xmm0,%xmm0 4072.Ldec_key_ret: 4073 addq $8,%rsp 4074.cfi_adjust_cfa_offset -8 4075 .byte 0xf3,0xc3 4076.cfi_endproc 4077.LSEH_end_set_decrypt_key: 4078.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 4079.globl aesni_set_encrypt_key 4080.type aesni_set_encrypt_key,@function 4081.align 16 4082aesni_set_encrypt_key: 4083__aesni_set_encrypt_key: 4084.cfi_startproc 4085.byte 0x48,0x83,0xEC,0x08 4086.cfi_adjust_cfa_offset 8 4087 movq $-1,%rax 4088 testq %rdi,%rdi 4089 jz .Lenc_key_ret 4090 testq %rdx,%rdx 4091 jz .Lenc_key_ret 4092 4093 movl $268437504,%r10d 4094 movups (%rdi),%xmm0 4095 xorps %xmm4,%xmm4 4096 andl OPENSSL_ia32cap_P+4(%rip),%r10d 4097 leaq 16(%rdx),%rax 4098 cmpl $256,%esi 4099 je .L14rounds 4100 cmpl $192,%esi 4101 je .L12rounds 4102 cmpl $128,%esi 4103 jne .Lbad_keybits 4104 4105.L10rounds: 4106 movl $9,%esi 4107 cmpl $268435456,%r10d 4108 je .L10rounds_alt 4109 4110 movups %xmm0,(%rdx) 4111.byte 102,15,58,223,200,1 4112 call .Lkey_expansion_128_cold 4113.byte 102,15,58,223,200,2 4114 call .Lkey_expansion_128 4115.byte 102,15,58,223,200,4 4116 call .Lkey_expansion_128 4117.byte 102,15,58,223,200,8 4118 call .Lkey_expansion_128 4119.byte 102,15,58,223,200,16 4120 call .Lkey_expansion_128 4121.byte 102,15,58,223,200,32 4122 call .Lkey_expansion_128 4123.byte 102,15,58,223,200,64 4124 call .Lkey_expansion_128 4125.byte 102,15,58,223,200,128 4126 call .Lkey_expansion_128 4127.byte 102,15,58,223,200,27 4128 call .Lkey_expansion_128 4129.byte 102,15,58,223,200,54 4130 call .Lkey_expansion_128 4131 movups %xmm0,(%rax) 4132 movl %esi,80(%rax) 4133 xorl %eax,%eax 4134 jmp .Lenc_key_ret 4135 4136.align 16 4137.L10rounds_alt: 4138 movdqa .Lkey_rotate(%rip),%xmm5 4139 movl $8,%r10d 4140 movdqa .Lkey_rcon1(%rip),%xmm4 4141 movdqa %xmm0,%xmm2 4142 movdqu %xmm0,(%rdx) 4143 jmp .Loop_key128 4144 4145.align 16 4146.Loop_key128: 4147.byte 102,15,56,0,197 4148.byte 102,15,56,221,196 4149 pslld $1,%xmm4 4150 leaq 16(%rax),%rax 4151 4152 movdqa %xmm2,%xmm3 4153 pslldq $4,%xmm2 4154 pxor %xmm2,%xmm3 4155 pslldq $4,%xmm2 4156 pxor %xmm2,%xmm3 4157 pslldq $4,%xmm2 4158 pxor %xmm3,%xmm2 4159 4160 pxor %xmm2,%xmm0 4161 movdqu %xmm0,-16(%rax) 4162 movdqa %xmm0,%xmm2 4163 4164 decl %r10d 4165 jnz .Loop_key128 4166 4167 movdqa .Lkey_rcon1b(%rip),%xmm4 4168 4169.byte 102,15,56,0,197 4170.byte 102,15,56,221,196 4171 pslld $1,%xmm4 4172 4173 movdqa %xmm2,%xmm3 4174 pslldq $4,%xmm2 4175 pxor %xmm2,%xmm3 4176 pslldq $4,%xmm2 4177 pxor %xmm2,%xmm3 4178 pslldq $4,%xmm2 4179 pxor %xmm3,%xmm2 4180 4181 pxor %xmm2,%xmm0 4182 movdqu %xmm0,(%rax) 4183 4184 movdqa %xmm0,%xmm2 4185.byte 102,15,56,0,197 4186.byte 102,15,56,221,196 4187 4188 movdqa %xmm2,%xmm3 4189 pslldq $4,%xmm2 4190 pxor %xmm2,%xmm3 4191 pslldq $4,%xmm2 4192 pxor %xmm2,%xmm3 4193 pslldq $4,%xmm2 4194 pxor %xmm3,%xmm2 4195 4196 pxor %xmm2,%xmm0 4197 movdqu %xmm0,16(%rax) 4198 4199 movl %esi,96(%rax) 4200 xorl %eax,%eax 4201 jmp .Lenc_key_ret 4202 4203.align 16 4204.L12rounds: 4205 movq 16(%rdi),%xmm2 4206 movl $11,%esi 4207 cmpl $268435456,%r10d 4208 je .L12rounds_alt 4209 4210 movups %xmm0,(%rdx) 4211.byte 102,15,58,223,202,1 4212 call .Lkey_expansion_192a_cold 4213.byte 102,15,58,223,202,2 4214 call .Lkey_expansion_192b 4215.byte 102,15,58,223,202,4 4216 call .Lkey_expansion_192a 4217.byte 102,15,58,223,202,8 4218 call .Lkey_expansion_192b 4219.byte 102,15,58,223,202,16 4220 call .Lkey_expansion_192a 4221.byte 102,15,58,223,202,32 4222 call .Lkey_expansion_192b 4223.byte 102,15,58,223,202,64 4224 call .Lkey_expansion_192a 4225.byte 102,15,58,223,202,128 4226 call .Lkey_expansion_192b 4227 movups %xmm0,(%rax) 4228 movl %esi,48(%rax) 4229 xorq %rax,%rax 4230 jmp .Lenc_key_ret 4231 4232.align 16 4233.L12rounds_alt: 4234 movdqa .Lkey_rotate192(%rip),%xmm5 4235 movdqa .Lkey_rcon1(%rip),%xmm4 4236 movl $8,%r10d 4237 movdqu %xmm0,(%rdx) 4238 jmp .Loop_key192 4239 4240.align 16 4241.Loop_key192: 4242 movq %xmm2,0(%rax) 4243 movdqa %xmm2,%xmm1 4244.byte 102,15,56,0,213 4245.byte 102,15,56,221,212 4246 pslld $1,%xmm4 4247 leaq 24(%rax),%rax 4248 4249 movdqa %xmm0,%xmm3 4250 pslldq $4,%xmm0 4251 pxor %xmm0,%xmm3 4252 pslldq $4,%xmm0 4253 pxor %xmm0,%xmm3 4254 pslldq $4,%xmm0 4255 pxor %xmm3,%xmm0 4256 4257 pshufd $0xff,%xmm0,%xmm3 4258 pxor %xmm1,%xmm3 4259 pslldq $4,%xmm1 4260 pxor %xmm1,%xmm3 4261 4262 pxor %xmm2,%xmm0 4263 pxor %xmm3,%xmm2 4264 movdqu %xmm0,-16(%rax) 4265 4266 decl %r10d 4267 jnz .Loop_key192 4268 4269 movl %esi,32(%rax) 4270 xorl %eax,%eax 4271 jmp .Lenc_key_ret 4272 4273.align 16 4274.L14rounds: 4275 movups 16(%rdi),%xmm2 4276 movl $13,%esi 4277 leaq 16(%rax),%rax 4278 cmpl $268435456,%r10d 4279 je .L14rounds_alt 4280 4281 movups %xmm0,(%rdx) 4282 movups %xmm2,16(%rdx) 4283.byte 102,15,58,223,202,1 4284 call .Lkey_expansion_256a_cold 4285.byte 102,15,58,223,200,1 4286 call .Lkey_expansion_256b 4287.byte 102,15,58,223,202,2 4288 call .Lkey_expansion_256a 4289.byte 102,15,58,223,200,2 4290 call .Lkey_expansion_256b 4291.byte 102,15,58,223,202,4 4292 call .Lkey_expansion_256a 4293.byte 102,15,58,223,200,4 4294 call .Lkey_expansion_256b 4295.byte 102,15,58,223,202,8 4296 call .Lkey_expansion_256a 4297.byte 102,15,58,223,200,8 4298 call .Lkey_expansion_256b 4299.byte 102,15,58,223,202,16 4300 call .Lkey_expansion_256a 4301.byte 102,15,58,223,200,16 4302 call .Lkey_expansion_256b 4303.byte 102,15,58,223,202,32 4304 call .Lkey_expansion_256a 4305.byte 102,15,58,223,200,32 4306 call .Lkey_expansion_256b 4307.byte 102,15,58,223,202,64 4308 call .Lkey_expansion_256a 4309 movups %xmm0,(%rax) 4310 movl %esi,16(%rax) 4311 xorq %rax,%rax 4312 jmp .Lenc_key_ret 4313 4314.align 16 4315.L14rounds_alt: 4316 movdqa .Lkey_rotate(%rip),%xmm5 4317 movdqa .Lkey_rcon1(%rip),%xmm4 4318 movl $7,%r10d 4319 movdqu %xmm0,0(%rdx) 4320 movdqa %xmm2,%xmm1 4321 movdqu %xmm2,16(%rdx) 4322 jmp .Loop_key256 4323 4324.align 16 4325.Loop_key256: 4326.byte 102,15,56,0,213 4327.byte 102,15,56,221,212 4328 4329 movdqa %xmm0,%xmm3 4330 pslldq $4,%xmm0 4331 pxor %xmm0,%xmm3 4332 pslldq $4,%xmm0 4333 pxor %xmm0,%xmm3 4334 pslldq $4,%xmm0 4335 pxor %xmm3,%xmm0 4336 pslld $1,%xmm4 4337 4338 pxor %xmm2,%xmm0 4339 movdqu %xmm0,(%rax) 4340 4341 decl %r10d 4342 jz .Ldone_key256 4343 4344 pshufd $0xff,%xmm0,%xmm2 4345 pxor %xmm3,%xmm3 4346.byte 102,15,56,221,211 4347 4348 movdqa %xmm1,%xmm3 4349 pslldq $4,%xmm1 4350 pxor %xmm1,%xmm3 4351 pslldq $4,%xmm1 4352 pxor %xmm1,%xmm3 4353 pslldq $4,%xmm1 4354 pxor %xmm3,%xmm1 4355 4356 pxor %xmm1,%xmm2 4357 movdqu %xmm2,16(%rax) 4358 leaq 32(%rax),%rax 4359 movdqa %xmm2,%xmm1 4360 4361 jmp .Loop_key256 4362 4363.Ldone_key256: 4364 movl %esi,16(%rax) 4365 xorl %eax,%eax 4366 jmp .Lenc_key_ret 4367 4368.align 16 4369.Lbad_keybits: 4370 movq $-2,%rax 4371.Lenc_key_ret: 4372 pxor %xmm0,%xmm0 4373 pxor %xmm1,%xmm1 4374 pxor %xmm2,%xmm2 4375 pxor %xmm3,%xmm3 4376 pxor %xmm4,%xmm4 4377 pxor %xmm5,%xmm5 4378 addq $8,%rsp 4379.cfi_adjust_cfa_offset -8 4380 .byte 0xf3,0xc3 4381.LSEH_end_set_encrypt_key: 4382 4383.align 16 4384.Lkey_expansion_128: 4385 movups %xmm0,(%rax) 4386 leaq 16(%rax),%rax 4387.Lkey_expansion_128_cold: 4388 shufps $16,%xmm0,%xmm4 4389 xorps %xmm4,%xmm0 4390 shufps $140,%xmm0,%xmm4 4391 xorps %xmm4,%xmm0 4392 shufps $255,%xmm1,%xmm1 4393 xorps %xmm1,%xmm0 4394 .byte 0xf3,0xc3 4395 4396.align 16 4397.Lkey_expansion_192a: 4398 movups %xmm0,(%rax) 4399 leaq 16(%rax),%rax 4400.Lkey_expansion_192a_cold: 4401 movaps %xmm2,%xmm5 4402.Lkey_expansion_192b_warm: 4403 shufps $16,%xmm0,%xmm4 4404 movdqa %xmm2,%xmm3 4405 xorps %xmm4,%xmm0 4406 shufps $140,%xmm0,%xmm4 4407 pslldq $4,%xmm3 4408 xorps %xmm4,%xmm0 4409 pshufd $85,%xmm1,%xmm1 4410 pxor %xmm3,%xmm2 4411 pxor %xmm1,%xmm0 4412 pshufd $255,%xmm0,%xmm3 4413 pxor %xmm3,%xmm2 4414 .byte 0xf3,0xc3 4415 4416.align 16 4417.Lkey_expansion_192b: 4418 movaps %xmm0,%xmm3 4419 shufps $68,%xmm0,%xmm5 4420 movups %xmm5,(%rax) 4421 shufps $78,%xmm2,%xmm3 4422 movups %xmm3,16(%rax) 4423 leaq 32(%rax),%rax 4424 jmp .Lkey_expansion_192b_warm 4425 4426.align 16 4427.Lkey_expansion_256a: 4428 movups %xmm2,(%rax) 4429 leaq 16(%rax),%rax 4430.Lkey_expansion_256a_cold: 4431 shufps $16,%xmm0,%xmm4 4432 xorps %xmm4,%xmm0 4433 shufps $140,%xmm0,%xmm4 4434 xorps %xmm4,%xmm0 4435 shufps $255,%xmm1,%xmm1 4436 xorps %xmm1,%xmm0 4437 .byte 0xf3,0xc3 4438 4439.align 16 4440.Lkey_expansion_256b: 4441 movups %xmm0,(%rax) 4442 leaq 16(%rax),%rax 4443 4444 shufps $16,%xmm2,%xmm4 4445 xorps %xmm4,%xmm2 4446 shufps $140,%xmm2,%xmm4 4447 xorps %xmm4,%xmm2 4448 shufps $170,%xmm1,%xmm1 4449 xorps %xmm1,%xmm2 4450 .byte 0xf3,0xc3 4451.cfi_endproc 4452.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 4453.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 4454.align 64 4455.Lbswap_mask: 4456.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4457.Lincrement32: 4458.long 6,6,6,0 4459.Lincrement64: 4460.long 1,0,0,0 4461.Lxts_magic: 4462.long 0x87,0,1,0 4463.Lincrement1: 4464.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4465.Lkey_rotate: 4466.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4467.Lkey_rotate192: 4468.long 0x04070605,0x04070605,0x04070605,0x04070605 4469.Lkey_rcon1: 4470.long 1,1,1,1 4471.Lkey_rcon1b: 4472.long 0x1b,0x1b,0x1b,0x1b 4473 4474.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4475.align 64 4476