1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */ 3.text 4 5.globl aesni_encrypt 6.type aesni_encrypt,@function 7.align 16 8aesni_encrypt: 9.cfi_startproc 10 movups (%rdi),%xmm2 11 movl 240(%rdx),%eax 12 movups (%rdx),%xmm0 13 movups 16(%rdx),%xmm1 14 leaq 32(%rdx),%rdx 15 xorps %xmm0,%xmm2 16.Loop_enc1_1: 17.byte 102,15,56,220,209 18 decl %eax 19 movups (%rdx),%xmm1 20 leaq 16(%rdx),%rdx 21 jnz .Loop_enc1_1 22.byte 102,15,56,221,209 23 pxor %xmm0,%xmm0 24 pxor %xmm1,%xmm1 25 movups %xmm2,(%rsi) 26 pxor %xmm2,%xmm2 27 .byte 0xf3,0xc3 28.cfi_endproc 29.size aesni_encrypt,.-aesni_encrypt 30 31.globl aesni_decrypt 32.type aesni_decrypt,@function 33.align 16 34aesni_decrypt: 35.cfi_startproc 36 movups (%rdi),%xmm2 37 movl 240(%rdx),%eax 38 movups (%rdx),%xmm0 39 movups 16(%rdx),%xmm1 40 leaq 32(%rdx),%rdx 41 xorps %xmm0,%xmm2 42.Loop_dec1_2: 43.byte 102,15,56,222,209 44 decl %eax 45 movups (%rdx),%xmm1 46 leaq 16(%rdx),%rdx 47 jnz .Loop_dec1_2 48.byte 102,15,56,223,209 49 pxor %xmm0,%xmm0 50 pxor %xmm1,%xmm1 51 movups %xmm2,(%rsi) 52 pxor %xmm2,%xmm2 53 .byte 0xf3,0xc3 54.cfi_endproc 55.size aesni_decrypt, .-aesni_decrypt 56.type _aesni_encrypt2,@function 57.align 16 58_aesni_encrypt2: 59.cfi_startproc 60 movups (%rcx),%xmm0 61 shll $4,%eax 62 movups 16(%rcx),%xmm1 63 xorps %xmm0,%xmm2 64 xorps %xmm0,%xmm3 65 movups 32(%rcx),%xmm0 66 leaq 32(%rcx,%rax,1),%rcx 67 negq %rax 68 addq $16,%rax 69 70.Lenc_loop2: 71.byte 102,15,56,220,209 72.byte 102,15,56,220,217 73 movups (%rcx,%rax,1),%xmm1 74 addq $32,%rax 75.byte 102,15,56,220,208 76.byte 102,15,56,220,216 77 movups -16(%rcx,%rax,1),%xmm0 78 jnz .Lenc_loop2 79 80.byte 102,15,56,220,209 81.byte 102,15,56,220,217 82.byte 102,15,56,221,208 83.byte 102,15,56,221,216 84 .byte 0xf3,0xc3 85.cfi_endproc 86.size _aesni_encrypt2,.-_aesni_encrypt2 87.type _aesni_decrypt2,@function 88.align 16 89_aesni_decrypt2: 90.cfi_startproc 91 movups (%rcx),%xmm0 92 shll $4,%eax 93 movups 16(%rcx),%xmm1 94 xorps %xmm0,%xmm2 95 xorps %xmm0,%xmm3 96 movups 32(%rcx),%xmm0 97 leaq 32(%rcx,%rax,1),%rcx 98 negq %rax 99 addq $16,%rax 100 101.Ldec_loop2: 102.byte 102,15,56,222,209 103.byte 102,15,56,222,217 104 movups (%rcx,%rax,1),%xmm1 105 addq $32,%rax 106.byte 102,15,56,222,208 107.byte 102,15,56,222,216 108 movups -16(%rcx,%rax,1),%xmm0 109 jnz .Ldec_loop2 110 111.byte 102,15,56,222,209 112.byte 102,15,56,222,217 113.byte 102,15,56,223,208 114.byte 102,15,56,223,216 115 .byte 0xf3,0xc3 116.cfi_endproc 117.size _aesni_decrypt2,.-_aesni_decrypt2 118.type _aesni_encrypt3,@function 119.align 16 120_aesni_encrypt3: 121.cfi_startproc 122 movups (%rcx),%xmm0 123 shll $4,%eax 124 movups 16(%rcx),%xmm1 125 xorps %xmm0,%xmm2 126 xorps %xmm0,%xmm3 127 xorps %xmm0,%xmm4 128 movups 32(%rcx),%xmm0 129 leaq 32(%rcx,%rax,1),%rcx 130 negq %rax 131 addq $16,%rax 132 133.Lenc_loop3: 134.byte 102,15,56,220,209 135.byte 102,15,56,220,217 136.byte 102,15,56,220,225 137 movups (%rcx,%rax,1),%xmm1 138 addq $32,%rax 139.byte 102,15,56,220,208 140.byte 102,15,56,220,216 141.byte 102,15,56,220,224 142 movups -16(%rcx,%rax,1),%xmm0 143 jnz .Lenc_loop3 144 145.byte 102,15,56,220,209 146.byte 102,15,56,220,217 147.byte 102,15,56,220,225 148.byte 102,15,56,221,208 149.byte 102,15,56,221,216 150.byte 102,15,56,221,224 151 .byte 0xf3,0xc3 152.cfi_endproc 153.size _aesni_encrypt3,.-_aesni_encrypt3 154.type _aesni_decrypt3,@function 155.align 16 156_aesni_decrypt3: 157.cfi_startproc 158 movups (%rcx),%xmm0 159 shll $4,%eax 160 movups 16(%rcx),%xmm1 161 xorps %xmm0,%xmm2 162 xorps %xmm0,%xmm3 163 xorps %xmm0,%xmm4 164 movups 32(%rcx),%xmm0 165 leaq 32(%rcx,%rax,1),%rcx 166 negq %rax 167 addq $16,%rax 168 169.Ldec_loop3: 170.byte 102,15,56,222,209 171.byte 102,15,56,222,217 172.byte 102,15,56,222,225 173 movups (%rcx,%rax,1),%xmm1 174 addq $32,%rax 175.byte 102,15,56,222,208 176.byte 102,15,56,222,216 177.byte 102,15,56,222,224 178 movups -16(%rcx,%rax,1),%xmm0 179 jnz .Ldec_loop3 180 181.byte 102,15,56,222,209 182.byte 102,15,56,222,217 183.byte 102,15,56,222,225 184.byte 102,15,56,223,208 185.byte 102,15,56,223,216 186.byte 102,15,56,223,224 187 .byte 0xf3,0xc3 188.cfi_endproc 189.size _aesni_decrypt3,.-_aesni_decrypt3 190.type _aesni_encrypt4,@function 191.align 16 192_aesni_encrypt4: 193.cfi_startproc 194 movups (%rcx),%xmm0 195 shll $4,%eax 196 movups 16(%rcx),%xmm1 197 xorps %xmm0,%xmm2 198 xorps %xmm0,%xmm3 199 xorps %xmm0,%xmm4 200 xorps %xmm0,%xmm5 201 movups 32(%rcx),%xmm0 202 leaq 32(%rcx,%rax,1),%rcx 203 negq %rax 204.byte 0x0f,0x1f,0x00 205 addq $16,%rax 206 207.Lenc_loop4: 208.byte 102,15,56,220,209 209.byte 102,15,56,220,217 210.byte 102,15,56,220,225 211.byte 102,15,56,220,233 212 movups (%rcx,%rax,1),%xmm1 213 addq $32,%rax 214.byte 102,15,56,220,208 215.byte 102,15,56,220,216 216.byte 102,15,56,220,224 217.byte 102,15,56,220,232 218 movups -16(%rcx,%rax,1),%xmm0 219 jnz .Lenc_loop4 220 221.byte 102,15,56,220,209 222.byte 102,15,56,220,217 223.byte 102,15,56,220,225 224.byte 102,15,56,220,233 225.byte 102,15,56,221,208 226.byte 102,15,56,221,216 227.byte 102,15,56,221,224 228.byte 102,15,56,221,232 229 .byte 0xf3,0xc3 230.cfi_endproc 231.size _aesni_encrypt4,.-_aesni_encrypt4 232.type _aesni_decrypt4,@function 233.align 16 234_aesni_decrypt4: 235.cfi_startproc 236 movups (%rcx),%xmm0 237 shll $4,%eax 238 movups 16(%rcx),%xmm1 239 xorps %xmm0,%xmm2 240 xorps %xmm0,%xmm3 241 xorps %xmm0,%xmm4 242 xorps %xmm0,%xmm5 243 movups 32(%rcx),%xmm0 244 leaq 32(%rcx,%rax,1),%rcx 245 negq %rax 246.byte 0x0f,0x1f,0x00 247 addq $16,%rax 248 249.Ldec_loop4: 250.byte 102,15,56,222,209 251.byte 102,15,56,222,217 252.byte 102,15,56,222,225 253.byte 102,15,56,222,233 254 movups (%rcx,%rax,1),%xmm1 255 addq $32,%rax 256.byte 102,15,56,222,208 257.byte 102,15,56,222,216 258.byte 102,15,56,222,224 259.byte 102,15,56,222,232 260 movups -16(%rcx,%rax,1),%xmm0 261 jnz .Ldec_loop4 262 263.byte 102,15,56,222,209 264.byte 102,15,56,222,217 265.byte 102,15,56,222,225 266.byte 102,15,56,222,233 267.byte 102,15,56,223,208 268.byte 102,15,56,223,216 269.byte 102,15,56,223,224 270.byte 102,15,56,223,232 271 .byte 0xf3,0xc3 272.cfi_endproc 273.size _aesni_decrypt4,.-_aesni_decrypt4 274.type _aesni_encrypt6,@function 275.align 16 276_aesni_encrypt6: 277.cfi_startproc 278 movups (%rcx),%xmm0 279 shll $4,%eax 280 movups 16(%rcx),%xmm1 281 xorps %xmm0,%xmm2 282 pxor %xmm0,%xmm3 283 pxor %xmm0,%xmm4 284.byte 102,15,56,220,209 285 leaq 32(%rcx,%rax,1),%rcx 286 negq %rax 287.byte 102,15,56,220,217 288 pxor %xmm0,%xmm5 289 pxor %xmm0,%xmm6 290.byte 102,15,56,220,225 291 pxor %xmm0,%xmm7 292 movups (%rcx,%rax,1),%xmm0 293 addq $16,%rax 294 jmp .Lenc_loop6_enter 295.align 16 296.Lenc_loop6: 297.byte 102,15,56,220,209 298.byte 102,15,56,220,217 299.byte 102,15,56,220,225 300.Lenc_loop6_enter: 301.byte 102,15,56,220,233 302.byte 102,15,56,220,241 303.byte 102,15,56,220,249 304 movups (%rcx,%rax,1),%xmm1 305 addq $32,%rax 306.byte 102,15,56,220,208 307.byte 102,15,56,220,216 308.byte 102,15,56,220,224 309.byte 102,15,56,220,232 310.byte 102,15,56,220,240 311.byte 102,15,56,220,248 312 movups -16(%rcx,%rax,1),%xmm0 313 jnz .Lenc_loop6 314 315.byte 102,15,56,220,209 316.byte 102,15,56,220,217 317.byte 102,15,56,220,225 318.byte 102,15,56,220,233 319.byte 102,15,56,220,241 320.byte 102,15,56,220,249 321.byte 102,15,56,221,208 322.byte 102,15,56,221,216 323.byte 102,15,56,221,224 324.byte 102,15,56,221,232 325.byte 102,15,56,221,240 326.byte 102,15,56,221,248 327 .byte 0xf3,0xc3 328.cfi_endproc 329.size _aesni_encrypt6,.-_aesni_encrypt6 330.type _aesni_decrypt6,@function 331.align 16 332_aesni_decrypt6: 333.cfi_startproc 334 movups (%rcx),%xmm0 335 shll $4,%eax 336 movups 16(%rcx),%xmm1 337 xorps %xmm0,%xmm2 338 pxor %xmm0,%xmm3 339 pxor %xmm0,%xmm4 340.byte 102,15,56,222,209 341 leaq 32(%rcx,%rax,1),%rcx 342 negq %rax 343.byte 102,15,56,222,217 344 pxor %xmm0,%xmm5 345 pxor %xmm0,%xmm6 346.byte 102,15,56,222,225 347 pxor %xmm0,%xmm7 348 movups (%rcx,%rax,1),%xmm0 349 addq $16,%rax 350 jmp .Ldec_loop6_enter 351.align 16 352.Ldec_loop6: 353.byte 102,15,56,222,209 354.byte 102,15,56,222,217 355.byte 102,15,56,222,225 356.Ldec_loop6_enter: 357.byte 102,15,56,222,233 358.byte 102,15,56,222,241 359.byte 102,15,56,222,249 360 movups (%rcx,%rax,1),%xmm1 361 addq $32,%rax 362.byte 102,15,56,222,208 363.byte 102,15,56,222,216 364.byte 102,15,56,222,224 365.byte 102,15,56,222,232 366.byte 102,15,56,222,240 367.byte 102,15,56,222,248 368 movups -16(%rcx,%rax,1),%xmm0 369 jnz .Ldec_loop6 370 371.byte 102,15,56,222,209 372.byte 102,15,56,222,217 373.byte 102,15,56,222,225 374.byte 102,15,56,222,233 375.byte 102,15,56,222,241 376.byte 102,15,56,222,249 377.byte 102,15,56,223,208 378.byte 102,15,56,223,216 379.byte 102,15,56,223,224 380.byte 102,15,56,223,232 381.byte 102,15,56,223,240 382.byte 102,15,56,223,248 383 .byte 0xf3,0xc3 384.cfi_endproc 385.size _aesni_decrypt6,.-_aesni_decrypt6 386.type _aesni_encrypt8,@function 387.align 16 388_aesni_encrypt8: 389.cfi_startproc 390 movups (%rcx),%xmm0 391 shll $4,%eax 392 movups 16(%rcx),%xmm1 393 xorps %xmm0,%xmm2 394 xorps %xmm0,%xmm3 395 pxor %xmm0,%xmm4 396 pxor %xmm0,%xmm5 397 pxor %xmm0,%xmm6 398 leaq 32(%rcx,%rax,1),%rcx 399 negq %rax 400.byte 102,15,56,220,209 401 pxor %xmm0,%xmm7 402 pxor %xmm0,%xmm8 403.byte 102,15,56,220,217 404 pxor %xmm0,%xmm9 405 movups (%rcx,%rax,1),%xmm0 406 addq $16,%rax 407 jmp .Lenc_loop8_inner 408.align 16 409.Lenc_loop8: 410.byte 102,15,56,220,209 411.byte 102,15,56,220,217 412.Lenc_loop8_inner: 413.byte 102,15,56,220,225 414.byte 102,15,56,220,233 415.byte 102,15,56,220,241 416.byte 102,15,56,220,249 417.byte 102,68,15,56,220,193 418.byte 102,68,15,56,220,201 419.Lenc_loop8_enter: 420 movups (%rcx,%rax,1),%xmm1 421 addq $32,%rax 422.byte 102,15,56,220,208 423.byte 102,15,56,220,216 424.byte 102,15,56,220,224 425.byte 102,15,56,220,232 426.byte 102,15,56,220,240 427.byte 102,15,56,220,248 428.byte 102,68,15,56,220,192 429.byte 102,68,15,56,220,200 430 movups -16(%rcx,%rax,1),%xmm0 431 jnz .Lenc_loop8 432 433.byte 102,15,56,220,209 434.byte 102,15,56,220,217 435.byte 102,15,56,220,225 436.byte 102,15,56,220,233 437.byte 102,15,56,220,241 438.byte 102,15,56,220,249 439.byte 102,68,15,56,220,193 440.byte 102,68,15,56,220,201 441.byte 102,15,56,221,208 442.byte 102,15,56,221,216 443.byte 102,15,56,221,224 444.byte 102,15,56,221,232 445.byte 102,15,56,221,240 446.byte 102,15,56,221,248 447.byte 102,68,15,56,221,192 448.byte 102,68,15,56,221,200 449 .byte 0xf3,0xc3 450.cfi_endproc 451.size _aesni_encrypt8,.-_aesni_encrypt8 452.type _aesni_decrypt8,@function 453.align 16 454_aesni_decrypt8: 455.cfi_startproc 456 movups (%rcx),%xmm0 457 shll $4,%eax 458 movups 16(%rcx),%xmm1 459 xorps %xmm0,%xmm2 460 xorps %xmm0,%xmm3 461 pxor %xmm0,%xmm4 462 pxor %xmm0,%xmm5 463 pxor %xmm0,%xmm6 464 leaq 32(%rcx,%rax,1),%rcx 465 negq %rax 466.byte 102,15,56,222,209 467 pxor %xmm0,%xmm7 468 pxor %xmm0,%xmm8 469.byte 102,15,56,222,217 470 pxor %xmm0,%xmm9 471 movups (%rcx,%rax,1),%xmm0 472 addq $16,%rax 473 jmp .Ldec_loop8_inner 474.align 16 475.Ldec_loop8: 476.byte 102,15,56,222,209 477.byte 102,15,56,222,217 478.Ldec_loop8_inner: 479.byte 102,15,56,222,225 480.byte 102,15,56,222,233 481.byte 102,15,56,222,241 482.byte 102,15,56,222,249 483.byte 102,68,15,56,222,193 484.byte 102,68,15,56,222,201 485.Ldec_loop8_enter: 486 movups (%rcx,%rax,1),%xmm1 487 addq $32,%rax 488.byte 102,15,56,222,208 489.byte 102,15,56,222,216 490.byte 102,15,56,222,224 491.byte 102,15,56,222,232 492.byte 102,15,56,222,240 493.byte 102,15,56,222,248 494.byte 102,68,15,56,222,192 495.byte 102,68,15,56,222,200 496 movups -16(%rcx,%rax,1),%xmm0 497 jnz .Ldec_loop8 498 499.byte 102,15,56,222,209 500.byte 102,15,56,222,217 501.byte 102,15,56,222,225 502.byte 102,15,56,222,233 503.byte 102,15,56,222,241 504.byte 102,15,56,222,249 505.byte 102,68,15,56,222,193 506.byte 102,68,15,56,222,201 507.byte 102,15,56,223,208 508.byte 102,15,56,223,216 509.byte 102,15,56,223,224 510.byte 102,15,56,223,232 511.byte 102,15,56,223,240 512.byte 102,15,56,223,248 513.byte 102,68,15,56,223,192 514.byte 102,68,15,56,223,200 515 .byte 0xf3,0xc3 516.cfi_endproc 517.size _aesni_decrypt8,.-_aesni_decrypt8 518.globl aesni_ecb_encrypt 519.type aesni_ecb_encrypt,@function 520.align 16 521aesni_ecb_encrypt: 522.cfi_startproc 523 andq $-16,%rdx 524 jz .Lecb_ret 525 526 movl 240(%rcx),%eax 527 movups (%rcx),%xmm0 528 movq %rcx,%r11 529 movl %eax,%r10d 530 testl %r8d,%r8d 531 jz .Lecb_decrypt 532 533 cmpq $0x80,%rdx 534 jb .Lecb_enc_tail 535 536 movdqu (%rdi),%xmm2 537 movdqu 16(%rdi),%xmm3 538 movdqu 32(%rdi),%xmm4 539 movdqu 48(%rdi),%xmm5 540 movdqu 64(%rdi),%xmm6 541 movdqu 80(%rdi),%xmm7 542 movdqu 96(%rdi),%xmm8 543 movdqu 112(%rdi),%xmm9 544 leaq 128(%rdi),%rdi 545 subq $0x80,%rdx 546 jmp .Lecb_enc_loop8_enter 547.align 16 548.Lecb_enc_loop8: 549 movups %xmm2,(%rsi) 550 movq %r11,%rcx 551 movdqu (%rdi),%xmm2 552 movl %r10d,%eax 553 movups %xmm3,16(%rsi) 554 movdqu 16(%rdi),%xmm3 555 movups %xmm4,32(%rsi) 556 movdqu 32(%rdi),%xmm4 557 movups %xmm5,48(%rsi) 558 movdqu 48(%rdi),%xmm5 559 movups %xmm6,64(%rsi) 560 movdqu 64(%rdi),%xmm6 561 movups %xmm7,80(%rsi) 562 movdqu 80(%rdi),%xmm7 563 movups %xmm8,96(%rsi) 564 movdqu 96(%rdi),%xmm8 565 movups %xmm9,112(%rsi) 566 leaq 128(%rsi),%rsi 567 movdqu 112(%rdi),%xmm9 568 leaq 128(%rdi),%rdi 569.Lecb_enc_loop8_enter: 570 571 call _aesni_encrypt8 572 573 subq $0x80,%rdx 574 jnc .Lecb_enc_loop8 575 576 movups %xmm2,(%rsi) 577 movq %r11,%rcx 578 movups %xmm3,16(%rsi) 579 movl %r10d,%eax 580 movups %xmm4,32(%rsi) 581 movups %xmm5,48(%rsi) 582 movups %xmm6,64(%rsi) 583 movups %xmm7,80(%rsi) 584 movups %xmm8,96(%rsi) 585 movups %xmm9,112(%rsi) 586 leaq 128(%rsi),%rsi 587 addq $0x80,%rdx 588 jz .Lecb_ret 589 590.Lecb_enc_tail: 591 movups (%rdi),%xmm2 592 cmpq $0x20,%rdx 593 jb .Lecb_enc_one 594 movups 16(%rdi),%xmm3 595 je .Lecb_enc_two 596 movups 32(%rdi),%xmm4 597 cmpq $0x40,%rdx 598 jb .Lecb_enc_three 599 movups 48(%rdi),%xmm5 600 je .Lecb_enc_four 601 movups 64(%rdi),%xmm6 602 cmpq $0x60,%rdx 603 jb .Lecb_enc_five 604 movups 80(%rdi),%xmm7 605 je .Lecb_enc_six 606 movdqu 96(%rdi),%xmm8 607 xorps %xmm9,%xmm9 608 call _aesni_encrypt8 609 movups %xmm2,(%rsi) 610 movups %xmm3,16(%rsi) 611 movups %xmm4,32(%rsi) 612 movups %xmm5,48(%rsi) 613 movups %xmm6,64(%rsi) 614 movups %xmm7,80(%rsi) 615 movups %xmm8,96(%rsi) 616 jmp .Lecb_ret 617.align 16 618.Lecb_enc_one: 619 movups (%rcx),%xmm0 620 movups 16(%rcx),%xmm1 621 leaq 32(%rcx),%rcx 622 xorps %xmm0,%xmm2 623.Loop_enc1_3: 624.byte 102,15,56,220,209 625 decl %eax 626 movups (%rcx),%xmm1 627 leaq 16(%rcx),%rcx 628 jnz .Loop_enc1_3 629.byte 102,15,56,221,209 630 movups %xmm2,(%rsi) 631 jmp .Lecb_ret 632.align 16 633.Lecb_enc_two: 634 call _aesni_encrypt2 635 movups %xmm2,(%rsi) 636 movups %xmm3,16(%rsi) 637 jmp .Lecb_ret 638.align 16 639.Lecb_enc_three: 640 call _aesni_encrypt3 641 movups %xmm2,(%rsi) 642 movups %xmm3,16(%rsi) 643 movups %xmm4,32(%rsi) 644 jmp .Lecb_ret 645.align 16 646.Lecb_enc_four: 647 call _aesni_encrypt4 648 movups %xmm2,(%rsi) 649 movups %xmm3,16(%rsi) 650 movups %xmm4,32(%rsi) 651 movups %xmm5,48(%rsi) 652 jmp .Lecb_ret 653.align 16 654.Lecb_enc_five: 655 xorps %xmm7,%xmm7 656 call _aesni_encrypt6 657 movups %xmm2,(%rsi) 658 movups %xmm3,16(%rsi) 659 movups %xmm4,32(%rsi) 660 movups %xmm5,48(%rsi) 661 movups %xmm6,64(%rsi) 662 jmp .Lecb_ret 663.align 16 664.Lecb_enc_six: 665 call _aesni_encrypt6 666 movups %xmm2,(%rsi) 667 movups %xmm3,16(%rsi) 668 movups %xmm4,32(%rsi) 669 movups %xmm5,48(%rsi) 670 movups %xmm6,64(%rsi) 671 movups %xmm7,80(%rsi) 672 jmp .Lecb_ret 673 674.align 16 675.Lecb_decrypt: 676 cmpq $0x80,%rdx 677 jb .Lecb_dec_tail 678 679 movdqu (%rdi),%xmm2 680 movdqu 16(%rdi),%xmm3 681 movdqu 32(%rdi),%xmm4 682 movdqu 48(%rdi),%xmm5 683 movdqu 64(%rdi),%xmm6 684 movdqu 80(%rdi),%xmm7 685 movdqu 96(%rdi),%xmm8 686 movdqu 112(%rdi),%xmm9 687 leaq 128(%rdi),%rdi 688 subq $0x80,%rdx 689 jmp .Lecb_dec_loop8_enter 690.align 16 691.Lecb_dec_loop8: 692 movups %xmm2,(%rsi) 693 movq %r11,%rcx 694 movdqu (%rdi),%xmm2 695 movl %r10d,%eax 696 movups %xmm3,16(%rsi) 697 movdqu 16(%rdi),%xmm3 698 movups %xmm4,32(%rsi) 699 movdqu 32(%rdi),%xmm4 700 movups %xmm5,48(%rsi) 701 movdqu 48(%rdi),%xmm5 702 movups %xmm6,64(%rsi) 703 movdqu 64(%rdi),%xmm6 704 movups %xmm7,80(%rsi) 705 movdqu 80(%rdi),%xmm7 706 movups %xmm8,96(%rsi) 707 movdqu 96(%rdi),%xmm8 708 movups %xmm9,112(%rsi) 709 leaq 128(%rsi),%rsi 710 movdqu 112(%rdi),%xmm9 711 leaq 128(%rdi),%rdi 712.Lecb_dec_loop8_enter: 713 714 call _aesni_decrypt8 715 716 movups (%r11),%xmm0 717 subq $0x80,%rdx 718 jnc .Lecb_dec_loop8 719 720 movups %xmm2,(%rsi) 721 pxor %xmm2,%xmm2 722 movq %r11,%rcx 723 movups %xmm3,16(%rsi) 724 pxor %xmm3,%xmm3 725 movl %r10d,%eax 726 movups %xmm4,32(%rsi) 727 pxor %xmm4,%xmm4 728 movups %xmm5,48(%rsi) 729 pxor %xmm5,%xmm5 730 movups %xmm6,64(%rsi) 731 pxor %xmm6,%xmm6 732 movups %xmm7,80(%rsi) 733 pxor %xmm7,%xmm7 734 movups %xmm8,96(%rsi) 735 pxor %xmm8,%xmm8 736 movups %xmm9,112(%rsi) 737 pxor %xmm9,%xmm9 738 leaq 128(%rsi),%rsi 739 addq $0x80,%rdx 740 jz .Lecb_ret 741 742.Lecb_dec_tail: 743 movups (%rdi),%xmm2 744 cmpq $0x20,%rdx 745 jb .Lecb_dec_one 746 movups 16(%rdi),%xmm3 747 je .Lecb_dec_two 748 movups 32(%rdi),%xmm4 749 cmpq $0x40,%rdx 750 jb .Lecb_dec_three 751 movups 48(%rdi),%xmm5 752 je .Lecb_dec_four 753 movups 64(%rdi),%xmm6 754 cmpq $0x60,%rdx 755 jb .Lecb_dec_five 756 movups 80(%rdi),%xmm7 757 je .Lecb_dec_six 758 movups 96(%rdi),%xmm8 759 movups (%rcx),%xmm0 760 xorps %xmm9,%xmm9 761 call _aesni_decrypt8 762 movups %xmm2,(%rsi) 763 pxor %xmm2,%xmm2 764 movups %xmm3,16(%rsi) 765 pxor %xmm3,%xmm3 766 movups %xmm4,32(%rsi) 767 pxor %xmm4,%xmm4 768 movups %xmm5,48(%rsi) 769 pxor %xmm5,%xmm5 770 movups %xmm6,64(%rsi) 771 pxor %xmm6,%xmm6 772 movups %xmm7,80(%rsi) 773 pxor %xmm7,%xmm7 774 movups %xmm8,96(%rsi) 775 pxor %xmm8,%xmm8 776 pxor %xmm9,%xmm9 777 jmp .Lecb_ret 778.align 16 779.Lecb_dec_one: 780 movups (%rcx),%xmm0 781 movups 16(%rcx),%xmm1 782 leaq 32(%rcx),%rcx 783 xorps %xmm0,%xmm2 784.Loop_dec1_4: 785.byte 102,15,56,222,209 786 decl %eax 787 movups (%rcx),%xmm1 788 leaq 16(%rcx),%rcx 789 jnz .Loop_dec1_4 790.byte 102,15,56,223,209 791 movups %xmm2,(%rsi) 792 pxor %xmm2,%xmm2 793 jmp .Lecb_ret 794.align 16 795.Lecb_dec_two: 796 call _aesni_decrypt2 797 movups %xmm2,(%rsi) 798 pxor %xmm2,%xmm2 799 movups %xmm3,16(%rsi) 800 pxor %xmm3,%xmm3 801 jmp .Lecb_ret 802.align 16 803.Lecb_dec_three: 804 call _aesni_decrypt3 805 movups %xmm2,(%rsi) 806 pxor %xmm2,%xmm2 807 movups %xmm3,16(%rsi) 808 pxor %xmm3,%xmm3 809 movups %xmm4,32(%rsi) 810 pxor %xmm4,%xmm4 811 jmp .Lecb_ret 812.align 16 813.Lecb_dec_four: 814 call _aesni_decrypt4 815 movups %xmm2,(%rsi) 816 pxor %xmm2,%xmm2 817 movups %xmm3,16(%rsi) 818 pxor %xmm3,%xmm3 819 movups %xmm4,32(%rsi) 820 pxor %xmm4,%xmm4 821 movups %xmm5,48(%rsi) 822 pxor %xmm5,%xmm5 823 jmp .Lecb_ret 824.align 16 825.Lecb_dec_five: 826 xorps %xmm7,%xmm7 827 call _aesni_decrypt6 828 movups %xmm2,(%rsi) 829 pxor %xmm2,%xmm2 830 movups %xmm3,16(%rsi) 831 pxor %xmm3,%xmm3 832 movups %xmm4,32(%rsi) 833 pxor %xmm4,%xmm4 834 movups %xmm5,48(%rsi) 835 pxor %xmm5,%xmm5 836 movups %xmm6,64(%rsi) 837 pxor %xmm6,%xmm6 838 pxor %xmm7,%xmm7 839 jmp .Lecb_ret 840.align 16 841.Lecb_dec_six: 842 call _aesni_decrypt6 843 movups %xmm2,(%rsi) 844 pxor %xmm2,%xmm2 845 movups %xmm3,16(%rsi) 846 pxor %xmm3,%xmm3 847 movups %xmm4,32(%rsi) 848 pxor %xmm4,%xmm4 849 movups %xmm5,48(%rsi) 850 pxor %xmm5,%xmm5 851 movups %xmm6,64(%rsi) 852 pxor %xmm6,%xmm6 853 movups %xmm7,80(%rsi) 854 pxor %xmm7,%xmm7 855 856.Lecb_ret: 857 xorps %xmm0,%xmm0 858 pxor %xmm1,%xmm1 859 .byte 0xf3,0xc3 860.cfi_endproc 861.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 862.globl aesni_ccm64_encrypt_blocks 863.type aesni_ccm64_encrypt_blocks,@function 864.align 16 865aesni_ccm64_encrypt_blocks: 866.cfi_startproc 867 movl 240(%rcx),%eax 868 movdqu (%r8),%xmm6 869 movdqa .Lincrement64(%rip),%xmm9 870 movdqa .Lbswap_mask(%rip),%xmm7 871 872 shll $4,%eax 873 movl $16,%r10d 874 leaq 0(%rcx),%r11 875 movdqu (%r9),%xmm3 876 movdqa %xmm6,%xmm2 877 leaq 32(%rcx,%rax,1),%rcx 878.byte 102,15,56,0,247 879 subq %rax,%r10 880 jmp .Lccm64_enc_outer 881.align 16 882.Lccm64_enc_outer: 883 movups (%r11),%xmm0 884 movq %r10,%rax 885 movups (%rdi),%xmm8 886 887 xorps %xmm0,%xmm2 888 movups 16(%r11),%xmm1 889 xorps %xmm8,%xmm0 890 xorps %xmm0,%xmm3 891 movups 32(%r11),%xmm0 892 893.Lccm64_enc2_loop: 894.byte 102,15,56,220,209 895.byte 102,15,56,220,217 896 movups (%rcx,%rax,1),%xmm1 897 addq $32,%rax 898.byte 102,15,56,220,208 899.byte 102,15,56,220,216 900 movups -16(%rcx,%rax,1),%xmm0 901 jnz .Lccm64_enc2_loop 902.byte 102,15,56,220,209 903.byte 102,15,56,220,217 904 paddq %xmm9,%xmm6 905 decq %rdx 906.byte 102,15,56,221,208 907.byte 102,15,56,221,216 908 909 leaq 16(%rdi),%rdi 910 xorps %xmm2,%xmm8 911 movdqa %xmm6,%xmm2 912 movups %xmm8,(%rsi) 913.byte 102,15,56,0,215 914 leaq 16(%rsi),%rsi 915 jnz .Lccm64_enc_outer 916 917 pxor %xmm0,%xmm0 918 pxor %xmm1,%xmm1 919 pxor %xmm2,%xmm2 920 movups %xmm3,(%r9) 921 pxor %xmm3,%xmm3 922 pxor %xmm8,%xmm8 923 pxor %xmm6,%xmm6 924 .byte 0xf3,0xc3 925.cfi_endproc 926.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 927.globl aesni_ccm64_decrypt_blocks 928.type aesni_ccm64_decrypt_blocks,@function 929.align 16 930aesni_ccm64_decrypt_blocks: 931.cfi_startproc 932 movl 240(%rcx),%eax 933 movups (%r8),%xmm6 934 movdqu (%r9),%xmm3 935 movdqa .Lincrement64(%rip),%xmm9 936 movdqa .Lbswap_mask(%rip),%xmm7 937 938 movaps %xmm6,%xmm2 939 movl %eax,%r10d 940 movq %rcx,%r11 941.byte 102,15,56,0,247 942 movups (%rcx),%xmm0 943 movups 16(%rcx),%xmm1 944 leaq 32(%rcx),%rcx 945 xorps %xmm0,%xmm2 946.Loop_enc1_5: 947.byte 102,15,56,220,209 948 decl %eax 949 movups (%rcx),%xmm1 950 leaq 16(%rcx),%rcx 951 jnz .Loop_enc1_5 952.byte 102,15,56,221,209 953 shll $4,%r10d 954 movl $16,%eax 955 movups (%rdi),%xmm8 956 paddq %xmm9,%xmm6 957 leaq 16(%rdi),%rdi 958 subq %r10,%rax 959 leaq 32(%r11,%r10,1),%rcx 960 movq %rax,%r10 961 jmp .Lccm64_dec_outer 962.align 16 963.Lccm64_dec_outer: 964 xorps %xmm2,%xmm8 965 movdqa %xmm6,%xmm2 966 movups %xmm8,(%rsi) 967 leaq 16(%rsi),%rsi 968.byte 102,15,56,0,215 969 970 subq $1,%rdx 971 jz .Lccm64_dec_break 972 973 movups (%r11),%xmm0 974 movq %r10,%rax 975 movups 16(%r11),%xmm1 976 xorps %xmm0,%xmm8 977 xorps %xmm0,%xmm2 978 xorps %xmm8,%xmm3 979 movups 32(%r11),%xmm0 980 jmp .Lccm64_dec2_loop 981.align 16 982.Lccm64_dec2_loop: 983.byte 102,15,56,220,209 984.byte 102,15,56,220,217 985 movups (%rcx,%rax,1),%xmm1 986 addq $32,%rax 987.byte 102,15,56,220,208 988.byte 102,15,56,220,216 989 movups -16(%rcx,%rax,1),%xmm0 990 jnz .Lccm64_dec2_loop 991 movups (%rdi),%xmm8 992 paddq %xmm9,%xmm6 993.byte 102,15,56,220,209 994.byte 102,15,56,220,217 995.byte 102,15,56,221,208 996.byte 102,15,56,221,216 997 leaq 16(%rdi),%rdi 998 jmp .Lccm64_dec_outer 999 1000.align 16 1001.Lccm64_dec_break: 1002 1003 movl 240(%r11),%eax 1004 movups (%r11),%xmm0 1005 movups 16(%r11),%xmm1 1006 xorps %xmm0,%xmm8 1007 leaq 32(%r11),%r11 1008 xorps %xmm8,%xmm3 1009.Loop_enc1_6: 1010.byte 102,15,56,220,217 1011 decl %eax 1012 movups (%r11),%xmm1 1013 leaq 16(%r11),%r11 1014 jnz .Loop_enc1_6 1015.byte 102,15,56,221,217 1016 pxor %xmm0,%xmm0 1017 pxor %xmm1,%xmm1 1018 pxor %xmm2,%xmm2 1019 movups %xmm3,(%r9) 1020 pxor %xmm3,%xmm3 1021 pxor %xmm8,%xmm8 1022 pxor %xmm6,%xmm6 1023 .byte 0xf3,0xc3 1024.cfi_endproc 1025.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1026.globl aesni_ctr32_encrypt_blocks 1027.type aesni_ctr32_encrypt_blocks,@function 1028.align 16 1029aesni_ctr32_encrypt_blocks: 1030.cfi_startproc 1031 cmpq $1,%rdx 1032 jne .Lctr32_bulk 1033 1034 1035 1036 movups (%r8),%xmm2 1037 movups (%rdi),%xmm3 1038 movl 240(%rcx),%edx 1039 movups (%rcx),%xmm0 1040 movups 16(%rcx),%xmm1 1041 leaq 32(%rcx),%rcx 1042 xorps %xmm0,%xmm2 1043.Loop_enc1_7: 1044.byte 102,15,56,220,209 1045 decl %edx 1046 movups (%rcx),%xmm1 1047 leaq 16(%rcx),%rcx 1048 jnz .Loop_enc1_7 1049.byte 102,15,56,221,209 1050 pxor %xmm0,%xmm0 1051 pxor %xmm1,%xmm1 1052 xorps %xmm3,%xmm2 1053 pxor %xmm3,%xmm3 1054 movups %xmm2,(%rsi) 1055 xorps %xmm2,%xmm2 1056 jmp .Lctr32_epilogue 1057 1058.align 16 1059.Lctr32_bulk: 1060 leaq (%rsp),%r11 1061.cfi_def_cfa_register %r11 1062 pushq %rbp 1063.cfi_offset %rbp,-16 1064 subq $128,%rsp 1065 andq $-16,%rsp 1066 1067 1068 1069 1070 movdqu (%r8),%xmm2 1071 movdqu (%rcx),%xmm0 1072 movl 12(%r8),%r8d 1073 pxor %xmm0,%xmm2 1074 movl 12(%rcx),%ebp 1075 movdqa %xmm2,0(%rsp) 1076 bswapl %r8d 1077 movdqa %xmm2,%xmm3 1078 movdqa %xmm2,%xmm4 1079 movdqa %xmm2,%xmm5 1080 movdqa %xmm2,64(%rsp) 1081 movdqa %xmm2,80(%rsp) 1082 movdqa %xmm2,96(%rsp) 1083 movq %rdx,%r10 1084 movdqa %xmm2,112(%rsp) 1085 1086 leaq 1(%r8),%rax 1087 leaq 2(%r8),%rdx 1088 bswapl %eax 1089 bswapl %edx 1090 xorl %ebp,%eax 1091 xorl %ebp,%edx 1092.byte 102,15,58,34,216,3 1093 leaq 3(%r8),%rax 1094 movdqa %xmm3,16(%rsp) 1095.byte 102,15,58,34,226,3 1096 bswapl %eax 1097 movq %r10,%rdx 1098 leaq 4(%r8),%r10 1099 movdqa %xmm4,32(%rsp) 1100 xorl %ebp,%eax 1101 bswapl %r10d 1102.byte 102,15,58,34,232,3 1103 xorl %ebp,%r10d 1104 movdqa %xmm5,48(%rsp) 1105 leaq 5(%r8),%r9 1106 movl %r10d,64+12(%rsp) 1107 bswapl %r9d 1108 leaq 6(%r8),%r10 1109 movl 240(%rcx),%eax 1110 xorl %ebp,%r9d 1111 bswapl %r10d 1112 movl %r9d,80+12(%rsp) 1113 xorl %ebp,%r10d 1114 leaq 7(%r8),%r9 1115 movl %r10d,96+12(%rsp) 1116 bswapl %r9d 1117 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1118 xorl %ebp,%r9d 1119 andl $71303168,%r10d 1120 movl %r9d,112+12(%rsp) 1121 1122 movups 16(%rcx),%xmm1 1123 1124 movdqa 64(%rsp),%xmm6 1125 movdqa 80(%rsp),%xmm7 1126 1127 cmpq $8,%rdx 1128 jb .Lctr32_tail 1129 1130 subq $6,%rdx 1131 cmpl $4194304,%r10d 1132 je .Lctr32_6x 1133 1134 leaq 128(%rcx),%rcx 1135 subq $2,%rdx 1136 jmp .Lctr32_loop8 1137 1138.align 16 1139.Lctr32_6x: 1140 shll $4,%eax 1141 movl $48,%r10d 1142 bswapl %ebp 1143 leaq 32(%rcx,%rax,1),%rcx 1144 subq %rax,%r10 1145 jmp .Lctr32_loop6 1146 1147.align 16 1148.Lctr32_loop6: 1149 addl $6,%r8d 1150 movups -48(%rcx,%r10,1),%xmm0 1151.byte 102,15,56,220,209 1152 movl %r8d,%eax 1153 xorl %ebp,%eax 1154.byte 102,15,56,220,217 1155.byte 0x0f,0x38,0xf1,0x44,0x24,12 1156 leal 1(%r8),%eax 1157.byte 102,15,56,220,225 1158 xorl %ebp,%eax 1159.byte 0x0f,0x38,0xf1,0x44,0x24,28 1160.byte 102,15,56,220,233 1161 leal 2(%r8),%eax 1162 xorl %ebp,%eax 1163.byte 102,15,56,220,241 1164.byte 0x0f,0x38,0xf1,0x44,0x24,44 1165 leal 3(%r8),%eax 1166.byte 102,15,56,220,249 1167 movups -32(%rcx,%r10,1),%xmm1 1168 xorl %ebp,%eax 1169 1170.byte 102,15,56,220,208 1171.byte 0x0f,0x38,0xf1,0x44,0x24,60 1172 leal 4(%r8),%eax 1173.byte 102,15,56,220,216 1174 xorl %ebp,%eax 1175.byte 0x0f,0x38,0xf1,0x44,0x24,76 1176.byte 102,15,56,220,224 1177 leal 5(%r8),%eax 1178 xorl %ebp,%eax 1179.byte 102,15,56,220,232 1180.byte 0x0f,0x38,0xf1,0x44,0x24,92 1181 movq %r10,%rax 1182.byte 102,15,56,220,240 1183.byte 102,15,56,220,248 1184 movups -16(%rcx,%r10,1),%xmm0 1185 1186 call .Lenc_loop6 1187 1188 movdqu (%rdi),%xmm8 1189 movdqu 16(%rdi),%xmm9 1190 movdqu 32(%rdi),%xmm10 1191 movdqu 48(%rdi),%xmm11 1192 movdqu 64(%rdi),%xmm12 1193 movdqu 80(%rdi),%xmm13 1194 leaq 96(%rdi),%rdi 1195 movups -64(%rcx,%r10,1),%xmm1 1196 pxor %xmm2,%xmm8 1197 movaps 0(%rsp),%xmm2 1198 pxor %xmm3,%xmm9 1199 movaps 16(%rsp),%xmm3 1200 pxor %xmm4,%xmm10 1201 movaps 32(%rsp),%xmm4 1202 pxor %xmm5,%xmm11 1203 movaps 48(%rsp),%xmm5 1204 pxor %xmm6,%xmm12 1205 movaps 64(%rsp),%xmm6 1206 pxor %xmm7,%xmm13 1207 movaps 80(%rsp),%xmm7 1208 movdqu %xmm8,(%rsi) 1209 movdqu %xmm9,16(%rsi) 1210 movdqu %xmm10,32(%rsi) 1211 movdqu %xmm11,48(%rsi) 1212 movdqu %xmm12,64(%rsi) 1213 movdqu %xmm13,80(%rsi) 1214 leaq 96(%rsi),%rsi 1215 1216 subq $6,%rdx 1217 jnc .Lctr32_loop6 1218 1219 addq $6,%rdx 1220 jz .Lctr32_done 1221 1222 leal -48(%r10),%eax 1223 leaq -80(%rcx,%r10,1),%rcx 1224 negl %eax 1225 shrl $4,%eax 1226 jmp .Lctr32_tail 1227 1228.align 32 1229.Lctr32_loop8: 1230 addl $8,%r8d 1231 movdqa 96(%rsp),%xmm8 1232.byte 102,15,56,220,209 1233 movl %r8d,%r9d 1234 movdqa 112(%rsp),%xmm9 1235.byte 102,15,56,220,217 1236 bswapl %r9d 1237 movups 32-128(%rcx),%xmm0 1238.byte 102,15,56,220,225 1239 xorl %ebp,%r9d 1240 nop 1241.byte 102,15,56,220,233 1242 movl %r9d,0+12(%rsp) 1243 leaq 1(%r8),%r9 1244.byte 102,15,56,220,241 1245.byte 102,15,56,220,249 1246.byte 102,68,15,56,220,193 1247.byte 102,68,15,56,220,201 1248 movups 48-128(%rcx),%xmm1 1249 bswapl %r9d 1250.byte 102,15,56,220,208 1251.byte 102,15,56,220,216 1252 xorl %ebp,%r9d 1253.byte 0x66,0x90 1254.byte 102,15,56,220,224 1255.byte 102,15,56,220,232 1256 movl %r9d,16+12(%rsp) 1257 leaq 2(%r8),%r9 1258.byte 102,15,56,220,240 1259.byte 102,15,56,220,248 1260.byte 102,68,15,56,220,192 1261.byte 102,68,15,56,220,200 1262 movups 64-128(%rcx),%xmm0 1263 bswapl %r9d 1264.byte 102,15,56,220,209 1265.byte 102,15,56,220,217 1266 xorl %ebp,%r9d 1267.byte 0x66,0x90 1268.byte 102,15,56,220,225 1269.byte 102,15,56,220,233 1270 movl %r9d,32+12(%rsp) 1271 leaq 3(%r8),%r9 1272.byte 102,15,56,220,241 1273.byte 102,15,56,220,249 1274.byte 102,68,15,56,220,193 1275.byte 102,68,15,56,220,201 1276 movups 80-128(%rcx),%xmm1 1277 bswapl %r9d 1278.byte 102,15,56,220,208 1279.byte 102,15,56,220,216 1280 xorl %ebp,%r9d 1281.byte 0x66,0x90 1282.byte 102,15,56,220,224 1283.byte 102,15,56,220,232 1284 movl %r9d,48+12(%rsp) 1285 leaq 4(%r8),%r9 1286.byte 102,15,56,220,240 1287.byte 102,15,56,220,248 1288.byte 102,68,15,56,220,192 1289.byte 102,68,15,56,220,200 1290 movups 96-128(%rcx),%xmm0 1291 bswapl %r9d 1292.byte 102,15,56,220,209 1293.byte 102,15,56,220,217 1294 xorl %ebp,%r9d 1295.byte 0x66,0x90 1296.byte 102,15,56,220,225 1297.byte 102,15,56,220,233 1298 movl %r9d,64+12(%rsp) 1299 leaq 5(%r8),%r9 1300.byte 102,15,56,220,241 1301.byte 102,15,56,220,249 1302.byte 102,68,15,56,220,193 1303.byte 102,68,15,56,220,201 1304 movups 112-128(%rcx),%xmm1 1305 bswapl %r9d 1306.byte 102,15,56,220,208 1307.byte 102,15,56,220,216 1308 xorl %ebp,%r9d 1309.byte 0x66,0x90 1310.byte 102,15,56,220,224 1311.byte 102,15,56,220,232 1312 movl %r9d,80+12(%rsp) 1313 leaq 6(%r8),%r9 1314.byte 102,15,56,220,240 1315.byte 102,15,56,220,248 1316.byte 102,68,15,56,220,192 1317.byte 102,68,15,56,220,200 1318 movups 128-128(%rcx),%xmm0 1319 bswapl %r9d 1320.byte 102,15,56,220,209 1321.byte 102,15,56,220,217 1322 xorl %ebp,%r9d 1323.byte 0x66,0x90 1324.byte 102,15,56,220,225 1325.byte 102,15,56,220,233 1326 movl %r9d,96+12(%rsp) 1327 leaq 7(%r8),%r9 1328.byte 102,15,56,220,241 1329.byte 102,15,56,220,249 1330.byte 102,68,15,56,220,193 1331.byte 102,68,15,56,220,201 1332 movups 144-128(%rcx),%xmm1 1333 bswapl %r9d 1334.byte 102,15,56,220,208 1335.byte 102,15,56,220,216 1336.byte 102,15,56,220,224 1337 xorl %ebp,%r9d 1338 movdqu 0(%rdi),%xmm10 1339.byte 102,15,56,220,232 1340 movl %r9d,112+12(%rsp) 1341 cmpl $11,%eax 1342.byte 102,15,56,220,240 1343.byte 102,15,56,220,248 1344.byte 102,68,15,56,220,192 1345.byte 102,68,15,56,220,200 1346 movups 160-128(%rcx),%xmm0 1347 1348 jb .Lctr32_enc_done 1349 1350.byte 102,15,56,220,209 1351.byte 102,15,56,220,217 1352.byte 102,15,56,220,225 1353.byte 102,15,56,220,233 1354.byte 102,15,56,220,241 1355.byte 102,15,56,220,249 1356.byte 102,68,15,56,220,193 1357.byte 102,68,15,56,220,201 1358 movups 176-128(%rcx),%xmm1 1359 1360.byte 102,15,56,220,208 1361.byte 102,15,56,220,216 1362.byte 102,15,56,220,224 1363.byte 102,15,56,220,232 1364.byte 102,15,56,220,240 1365.byte 102,15,56,220,248 1366.byte 102,68,15,56,220,192 1367.byte 102,68,15,56,220,200 1368 movups 192-128(%rcx),%xmm0 1369 je .Lctr32_enc_done 1370 1371.byte 102,15,56,220,209 1372.byte 102,15,56,220,217 1373.byte 102,15,56,220,225 1374.byte 102,15,56,220,233 1375.byte 102,15,56,220,241 1376.byte 102,15,56,220,249 1377.byte 102,68,15,56,220,193 1378.byte 102,68,15,56,220,201 1379 movups 208-128(%rcx),%xmm1 1380 1381.byte 102,15,56,220,208 1382.byte 102,15,56,220,216 1383.byte 102,15,56,220,224 1384.byte 102,15,56,220,232 1385.byte 102,15,56,220,240 1386.byte 102,15,56,220,248 1387.byte 102,68,15,56,220,192 1388.byte 102,68,15,56,220,200 1389 movups 224-128(%rcx),%xmm0 1390 jmp .Lctr32_enc_done 1391 1392.align 16 1393.Lctr32_enc_done: 1394 movdqu 16(%rdi),%xmm11 1395 pxor %xmm0,%xmm10 1396 movdqu 32(%rdi),%xmm12 1397 pxor %xmm0,%xmm11 1398 movdqu 48(%rdi),%xmm13 1399 pxor %xmm0,%xmm12 1400 movdqu 64(%rdi),%xmm14 1401 pxor %xmm0,%xmm13 1402 movdqu 80(%rdi),%xmm15 1403 pxor %xmm0,%xmm14 1404 pxor %xmm0,%xmm15 1405.byte 102,15,56,220,209 1406.byte 102,15,56,220,217 1407.byte 102,15,56,220,225 1408.byte 102,15,56,220,233 1409.byte 102,15,56,220,241 1410.byte 102,15,56,220,249 1411.byte 102,68,15,56,220,193 1412.byte 102,68,15,56,220,201 1413 movdqu 96(%rdi),%xmm1 1414 leaq 128(%rdi),%rdi 1415 1416.byte 102,65,15,56,221,210 1417 pxor %xmm0,%xmm1 1418 movdqu 112-128(%rdi),%xmm10 1419.byte 102,65,15,56,221,219 1420 pxor %xmm0,%xmm10 1421 movdqa 0(%rsp),%xmm11 1422.byte 102,65,15,56,221,228 1423.byte 102,65,15,56,221,237 1424 movdqa 16(%rsp),%xmm12 1425 movdqa 32(%rsp),%xmm13 1426.byte 102,65,15,56,221,246 1427.byte 102,65,15,56,221,255 1428 movdqa 48(%rsp),%xmm14 1429 movdqa 64(%rsp),%xmm15 1430.byte 102,68,15,56,221,193 1431 movdqa 80(%rsp),%xmm0 1432 movups 16-128(%rcx),%xmm1 1433.byte 102,69,15,56,221,202 1434 1435 movups %xmm2,(%rsi) 1436 movdqa %xmm11,%xmm2 1437 movups %xmm3,16(%rsi) 1438 movdqa %xmm12,%xmm3 1439 movups %xmm4,32(%rsi) 1440 movdqa %xmm13,%xmm4 1441 movups %xmm5,48(%rsi) 1442 movdqa %xmm14,%xmm5 1443 movups %xmm6,64(%rsi) 1444 movdqa %xmm15,%xmm6 1445 movups %xmm7,80(%rsi) 1446 movdqa %xmm0,%xmm7 1447 movups %xmm8,96(%rsi) 1448 movups %xmm9,112(%rsi) 1449 leaq 128(%rsi),%rsi 1450 1451 subq $8,%rdx 1452 jnc .Lctr32_loop8 1453 1454 addq $8,%rdx 1455 jz .Lctr32_done 1456 leaq -128(%rcx),%rcx 1457 1458.Lctr32_tail: 1459 1460 1461 leaq 16(%rcx),%rcx 1462 cmpq $4,%rdx 1463 jb .Lctr32_loop3 1464 je .Lctr32_loop4 1465 1466 1467 shll $4,%eax 1468 movdqa 96(%rsp),%xmm8 1469 pxor %xmm9,%xmm9 1470 1471 movups 16(%rcx),%xmm0 1472.byte 102,15,56,220,209 1473.byte 102,15,56,220,217 1474 leaq 32-16(%rcx,%rax,1),%rcx 1475 negq %rax 1476.byte 102,15,56,220,225 1477 addq $16,%rax 1478 movups (%rdi),%xmm10 1479.byte 102,15,56,220,233 1480.byte 102,15,56,220,241 1481 movups 16(%rdi),%xmm11 1482 movups 32(%rdi),%xmm12 1483.byte 102,15,56,220,249 1484.byte 102,68,15,56,220,193 1485 1486 call .Lenc_loop8_enter 1487 1488 movdqu 48(%rdi),%xmm13 1489 pxor %xmm10,%xmm2 1490 movdqu 64(%rdi),%xmm10 1491 pxor %xmm11,%xmm3 1492 movdqu %xmm2,(%rsi) 1493 pxor %xmm12,%xmm4 1494 movdqu %xmm3,16(%rsi) 1495 pxor %xmm13,%xmm5 1496 movdqu %xmm4,32(%rsi) 1497 pxor %xmm10,%xmm6 1498 movdqu %xmm5,48(%rsi) 1499 movdqu %xmm6,64(%rsi) 1500 cmpq $6,%rdx 1501 jb .Lctr32_done 1502 1503 movups 80(%rdi),%xmm11 1504 xorps %xmm11,%xmm7 1505 movups %xmm7,80(%rsi) 1506 je .Lctr32_done 1507 1508 movups 96(%rdi),%xmm12 1509 xorps %xmm12,%xmm8 1510 movups %xmm8,96(%rsi) 1511 jmp .Lctr32_done 1512 1513.align 32 1514.Lctr32_loop4: 1515.byte 102,15,56,220,209 1516 leaq 16(%rcx),%rcx 1517 decl %eax 1518.byte 102,15,56,220,217 1519.byte 102,15,56,220,225 1520.byte 102,15,56,220,233 1521 movups (%rcx),%xmm1 1522 jnz .Lctr32_loop4 1523.byte 102,15,56,221,209 1524.byte 102,15,56,221,217 1525 movups (%rdi),%xmm10 1526 movups 16(%rdi),%xmm11 1527.byte 102,15,56,221,225 1528.byte 102,15,56,221,233 1529 movups 32(%rdi),%xmm12 1530 movups 48(%rdi),%xmm13 1531 1532 xorps %xmm10,%xmm2 1533 movups %xmm2,(%rsi) 1534 xorps %xmm11,%xmm3 1535 movups %xmm3,16(%rsi) 1536 pxor %xmm12,%xmm4 1537 movdqu %xmm4,32(%rsi) 1538 pxor %xmm13,%xmm5 1539 movdqu %xmm5,48(%rsi) 1540 jmp .Lctr32_done 1541 1542.align 32 1543.Lctr32_loop3: 1544.byte 102,15,56,220,209 1545 leaq 16(%rcx),%rcx 1546 decl %eax 1547.byte 102,15,56,220,217 1548.byte 102,15,56,220,225 1549 movups (%rcx),%xmm1 1550 jnz .Lctr32_loop3 1551.byte 102,15,56,221,209 1552.byte 102,15,56,221,217 1553.byte 102,15,56,221,225 1554 1555 movups (%rdi),%xmm10 1556 xorps %xmm10,%xmm2 1557 movups %xmm2,(%rsi) 1558 cmpq $2,%rdx 1559 jb .Lctr32_done 1560 1561 movups 16(%rdi),%xmm11 1562 xorps %xmm11,%xmm3 1563 movups %xmm3,16(%rsi) 1564 je .Lctr32_done 1565 1566 movups 32(%rdi),%xmm12 1567 xorps %xmm12,%xmm4 1568 movups %xmm4,32(%rsi) 1569 1570.Lctr32_done: 1571 xorps %xmm0,%xmm0 1572 xorl %ebp,%ebp 1573 pxor %xmm1,%xmm1 1574 pxor %xmm2,%xmm2 1575 pxor %xmm3,%xmm3 1576 pxor %xmm4,%xmm4 1577 pxor %xmm5,%xmm5 1578 pxor %xmm6,%xmm6 1579 pxor %xmm7,%xmm7 1580 movaps %xmm0,0(%rsp) 1581 pxor %xmm8,%xmm8 1582 movaps %xmm0,16(%rsp) 1583 pxor %xmm9,%xmm9 1584 movaps %xmm0,32(%rsp) 1585 pxor %xmm10,%xmm10 1586 movaps %xmm0,48(%rsp) 1587 pxor %xmm11,%xmm11 1588 movaps %xmm0,64(%rsp) 1589 pxor %xmm12,%xmm12 1590 movaps %xmm0,80(%rsp) 1591 pxor %xmm13,%xmm13 1592 movaps %xmm0,96(%rsp) 1593 pxor %xmm14,%xmm14 1594 movaps %xmm0,112(%rsp) 1595 pxor %xmm15,%xmm15 1596 movq -8(%r11),%rbp 1597.cfi_restore %rbp 1598 leaq (%r11),%rsp 1599.cfi_def_cfa_register %rsp 1600.Lctr32_epilogue: 1601 .byte 0xf3,0xc3 1602.cfi_endproc 1603.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1604.globl aesni_xts_encrypt 1605.type aesni_xts_encrypt,@function 1606.align 16 1607aesni_xts_encrypt: 1608.cfi_startproc 1609 leaq (%rsp),%r11 1610.cfi_def_cfa_register %r11 1611 pushq %rbp 1612.cfi_offset %rbp,-16 1613 subq $112,%rsp 1614 andq $-16,%rsp 1615 movups (%r9),%xmm2 1616 movl 240(%r8),%eax 1617 movl 240(%rcx),%r10d 1618 movups (%r8),%xmm0 1619 movups 16(%r8),%xmm1 1620 leaq 32(%r8),%r8 1621 xorps %xmm0,%xmm2 1622.Loop_enc1_8: 1623.byte 102,15,56,220,209 1624 decl %eax 1625 movups (%r8),%xmm1 1626 leaq 16(%r8),%r8 1627 jnz .Loop_enc1_8 1628.byte 102,15,56,221,209 1629 movups (%rcx),%xmm0 1630 movq %rcx,%rbp 1631 movl %r10d,%eax 1632 shll $4,%r10d 1633 movq %rdx,%r9 1634 andq $-16,%rdx 1635 1636 movups 16(%rcx,%r10,1),%xmm1 1637 1638 movdqa .Lxts_magic(%rip),%xmm8 1639 movdqa %xmm2,%xmm15 1640 pshufd $0x5f,%xmm2,%xmm9 1641 pxor %xmm0,%xmm1 1642 movdqa %xmm9,%xmm14 1643 paddd %xmm9,%xmm9 1644 movdqa %xmm15,%xmm10 1645 psrad $31,%xmm14 1646 paddq %xmm15,%xmm15 1647 pand %xmm8,%xmm14 1648 pxor %xmm0,%xmm10 1649 pxor %xmm14,%xmm15 1650 movdqa %xmm9,%xmm14 1651 paddd %xmm9,%xmm9 1652 movdqa %xmm15,%xmm11 1653 psrad $31,%xmm14 1654 paddq %xmm15,%xmm15 1655 pand %xmm8,%xmm14 1656 pxor %xmm0,%xmm11 1657 pxor %xmm14,%xmm15 1658 movdqa %xmm9,%xmm14 1659 paddd %xmm9,%xmm9 1660 movdqa %xmm15,%xmm12 1661 psrad $31,%xmm14 1662 paddq %xmm15,%xmm15 1663 pand %xmm8,%xmm14 1664 pxor %xmm0,%xmm12 1665 pxor %xmm14,%xmm15 1666 movdqa %xmm9,%xmm14 1667 paddd %xmm9,%xmm9 1668 movdqa %xmm15,%xmm13 1669 psrad $31,%xmm14 1670 paddq %xmm15,%xmm15 1671 pand %xmm8,%xmm14 1672 pxor %xmm0,%xmm13 1673 pxor %xmm14,%xmm15 1674 movdqa %xmm15,%xmm14 1675 psrad $31,%xmm9 1676 paddq %xmm15,%xmm15 1677 pand %xmm8,%xmm9 1678 pxor %xmm0,%xmm14 1679 pxor %xmm9,%xmm15 1680 movaps %xmm1,96(%rsp) 1681 1682 subq $96,%rdx 1683 jc .Lxts_enc_short 1684 1685 movl $16+96,%eax 1686 leaq 32(%rbp,%r10,1),%rcx 1687 subq %r10,%rax 1688 movups 16(%rbp),%xmm1 1689 movq %rax,%r10 1690 leaq .Lxts_magic(%rip),%r8 1691 jmp .Lxts_enc_grandloop 1692 1693.align 32 1694.Lxts_enc_grandloop: 1695 movdqu 0(%rdi),%xmm2 1696 movdqa %xmm0,%xmm8 1697 movdqu 16(%rdi),%xmm3 1698 pxor %xmm10,%xmm2 1699 movdqu 32(%rdi),%xmm4 1700 pxor %xmm11,%xmm3 1701.byte 102,15,56,220,209 1702 movdqu 48(%rdi),%xmm5 1703 pxor %xmm12,%xmm4 1704.byte 102,15,56,220,217 1705 movdqu 64(%rdi),%xmm6 1706 pxor %xmm13,%xmm5 1707.byte 102,15,56,220,225 1708 movdqu 80(%rdi),%xmm7 1709 pxor %xmm15,%xmm8 1710 movdqa 96(%rsp),%xmm9 1711 pxor %xmm14,%xmm6 1712.byte 102,15,56,220,233 1713 movups 32(%rbp),%xmm0 1714 leaq 96(%rdi),%rdi 1715 pxor %xmm8,%xmm7 1716 1717 pxor %xmm9,%xmm10 1718.byte 102,15,56,220,241 1719 pxor %xmm9,%xmm11 1720 movdqa %xmm10,0(%rsp) 1721.byte 102,15,56,220,249 1722 movups 48(%rbp),%xmm1 1723 pxor %xmm9,%xmm12 1724 1725.byte 102,15,56,220,208 1726 pxor %xmm9,%xmm13 1727 movdqa %xmm11,16(%rsp) 1728.byte 102,15,56,220,216 1729 pxor %xmm9,%xmm14 1730 movdqa %xmm12,32(%rsp) 1731.byte 102,15,56,220,224 1732.byte 102,15,56,220,232 1733 pxor %xmm9,%xmm8 1734 movdqa %xmm14,64(%rsp) 1735.byte 102,15,56,220,240 1736.byte 102,15,56,220,248 1737 movups 64(%rbp),%xmm0 1738 movdqa %xmm8,80(%rsp) 1739 pshufd $0x5f,%xmm15,%xmm9 1740 jmp .Lxts_enc_loop6 1741.align 32 1742.Lxts_enc_loop6: 1743.byte 102,15,56,220,209 1744.byte 102,15,56,220,217 1745.byte 102,15,56,220,225 1746.byte 102,15,56,220,233 1747.byte 102,15,56,220,241 1748.byte 102,15,56,220,249 1749 movups -64(%rcx,%rax,1),%xmm1 1750 addq $32,%rax 1751 1752.byte 102,15,56,220,208 1753.byte 102,15,56,220,216 1754.byte 102,15,56,220,224 1755.byte 102,15,56,220,232 1756.byte 102,15,56,220,240 1757.byte 102,15,56,220,248 1758 movups -80(%rcx,%rax,1),%xmm0 1759 jnz .Lxts_enc_loop6 1760 1761 movdqa (%r8),%xmm8 1762 movdqa %xmm9,%xmm14 1763 paddd %xmm9,%xmm9 1764.byte 102,15,56,220,209 1765 paddq %xmm15,%xmm15 1766 psrad $31,%xmm14 1767.byte 102,15,56,220,217 1768 pand %xmm8,%xmm14 1769 movups (%rbp),%xmm10 1770.byte 102,15,56,220,225 1771.byte 102,15,56,220,233 1772.byte 102,15,56,220,241 1773 pxor %xmm14,%xmm15 1774 movaps %xmm10,%xmm11 1775.byte 102,15,56,220,249 1776 movups -64(%rcx),%xmm1 1777 1778 movdqa %xmm9,%xmm14 1779.byte 102,15,56,220,208 1780 paddd %xmm9,%xmm9 1781 pxor %xmm15,%xmm10 1782.byte 102,15,56,220,216 1783 psrad $31,%xmm14 1784 paddq %xmm15,%xmm15 1785.byte 102,15,56,220,224 1786.byte 102,15,56,220,232 1787 pand %xmm8,%xmm14 1788 movaps %xmm11,%xmm12 1789.byte 102,15,56,220,240 1790 pxor %xmm14,%xmm15 1791 movdqa %xmm9,%xmm14 1792.byte 102,15,56,220,248 1793 movups -48(%rcx),%xmm0 1794 1795 paddd %xmm9,%xmm9 1796.byte 102,15,56,220,209 1797 pxor %xmm15,%xmm11 1798 psrad $31,%xmm14 1799.byte 102,15,56,220,217 1800 paddq %xmm15,%xmm15 1801 pand %xmm8,%xmm14 1802.byte 102,15,56,220,225 1803.byte 102,15,56,220,233 1804 movdqa %xmm13,48(%rsp) 1805 pxor %xmm14,%xmm15 1806.byte 102,15,56,220,241 1807 movaps %xmm12,%xmm13 1808 movdqa %xmm9,%xmm14 1809.byte 102,15,56,220,249 1810 movups -32(%rcx),%xmm1 1811 1812 paddd %xmm9,%xmm9 1813.byte 102,15,56,220,208 1814 pxor %xmm15,%xmm12 1815 psrad $31,%xmm14 1816.byte 102,15,56,220,216 1817 paddq %xmm15,%xmm15 1818 pand %xmm8,%xmm14 1819.byte 102,15,56,220,224 1820.byte 102,15,56,220,232 1821.byte 102,15,56,220,240 1822 pxor %xmm14,%xmm15 1823 movaps %xmm13,%xmm14 1824.byte 102,15,56,220,248 1825 1826 movdqa %xmm9,%xmm0 1827 paddd %xmm9,%xmm9 1828.byte 102,15,56,220,209 1829 pxor %xmm15,%xmm13 1830 psrad $31,%xmm0 1831.byte 102,15,56,220,217 1832 paddq %xmm15,%xmm15 1833 pand %xmm8,%xmm0 1834.byte 102,15,56,220,225 1835.byte 102,15,56,220,233 1836 pxor %xmm0,%xmm15 1837 movups (%rbp),%xmm0 1838.byte 102,15,56,220,241 1839.byte 102,15,56,220,249 1840 movups 16(%rbp),%xmm1 1841 1842 pxor %xmm15,%xmm14 1843.byte 102,15,56,221,84,36,0 1844 psrad $31,%xmm9 1845 paddq %xmm15,%xmm15 1846.byte 102,15,56,221,92,36,16 1847.byte 102,15,56,221,100,36,32 1848 pand %xmm8,%xmm9 1849 movq %r10,%rax 1850.byte 102,15,56,221,108,36,48 1851.byte 102,15,56,221,116,36,64 1852.byte 102,15,56,221,124,36,80 1853 pxor %xmm9,%xmm15 1854 1855 leaq 96(%rsi),%rsi 1856 movups %xmm2,-96(%rsi) 1857 movups %xmm3,-80(%rsi) 1858 movups %xmm4,-64(%rsi) 1859 movups %xmm5,-48(%rsi) 1860 movups %xmm6,-32(%rsi) 1861 movups %xmm7,-16(%rsi) 1862 subq $96,%rdx 1863 jnc .Lxts_enc_grandloop 1864 1865 movl $16+96,%eax 1866 subl %r10d,%eax 1867 movq %rbp,%rcx 1868 shrl $4,%eax 1869 1870.Lxts_enc_short: 1871 1872 movl %eax,%r10d 1873 pxor %xmm0,%xmm10 1874 addq $96,%rdx 1875 jz .Lxts_enc_done 1876 1877 pxor %xmm0,%xmm11 1878 cmpq $0x20,%rdx 1879 jb .Lxts_enc_one 1880 pxor %xmm0,%xmm12 1881 je .Lxts_enc_two 1882 1883 pxor %xmm0,%xmm13 1884 cmpq $0x40,%rdx 1885 jb .Lxts_enc_three 1886 pxor %xmm0,%xmm14 1887 je .Lxts_enc_four 1888 1889 movdqu (%rdi),%xmm2 1890 movdqu 16(%rdi),%xmm3 1891 movdqu 32(%rdi),%xmm4 1892 pxor %xmm10,%xmm2 1893 movdqu 48(%rdi),%xmm5 1894 pxor %xmm11,%xmm3 1895 movdqu 64(%rdi),%xmm6 1896 leaq 80(%rdi),%rdi 1897 pxor %xmm12,%xmm4 1898 pxor %xmm13,%xmm5 1899 pxor %xmm14,%xmm6 1900 pxor %xmm7,%xmm7 1901 1902 call _aesni_encrypt6 1903 1904 xorps %xmm10,%xmm2 1905 movdqa %xmm15,%xmm10 1906 xorps %xmm11,%xmm3 1907 xorps %xmm12,%xmm4 1908 movdqu %xmm2,(%rsi) 1909 xorps %xmm13,%xmm5 1910 movdqu %xmm3,16(%rsi) 1911 xorps %xmm14,%xmm6 1912 movdqu %xmm4,32(%rsi) 1913 movdqu %xmm5,48(%rsi) 1914 movdqu %xmm6,64(%rsi) 1915 leaq 80(%rsi),%rsi 1916 jmp .Lxts_enc_done 1917 1918.align 16 1919.Lxts_enc_one: 1920 movups (%rdi),%xmm2 1921 leaq 16(%rdi),%rdi 1922 xorps %xmm10,%xmm2 1923 movups (%rcx),%xmm0 1924 movups 16(%rcx),%xmm1 1925 leaq 32(%rcx),%rcx 1926 xorps %xmm0,%xmm2 1927.Loop_enc1_9: 1928.byte 102,15,56,220,209 1929 decl %eax 1930 movups (%rcx),%xmm1 1931 leaq 16(%rcx),%rcx 1932 jnz .Loop_enc1_9 1933.byte 102,15,56,221,209 1934 xorps %xmm10,%xmm2 1935 movdqa %xmm11,%xmm10 1936 movups %xmm2,(%rsi) 1937 leaq 16(%rsi),%rsi 1938 jmp .Lxts_enc_done 1939 1940.align 16 1941.Lxts_enc_two: 1942 movups (%rdi),%xmm2 1943 movups 16(%rdi),%xmm3 1944 leaq 32(%rdi),%rdi 1945 xorps %xmm10,%xmm2 1946 xorps %xmm11,%xmm3 1947 1948 call _aesni_encrypt2 1949 1950 xorps %xmm10,%xmm2 1951 movdqa %xmm12,%xmm10 1952 xorps %xmm11,%xmm3 1953 movups %xmm2,(%rsi) 1954 movups %xmm3,16(%rsi) 1955 leaq 32(%rsi),%rsi 1956 jmp .Lxts_enc_done 1957 1958.align 16 1959.Lxts_enc_three: 1960 movups (%rdi),%xmm2 1961 movups 16(%rdi),%xmm3 1962 movups 32(%rdi),%xmm4 1963 leaq 48(%rdi),%rdi 1964 xorps %xmm10,%xmm2 1965 xorps %xmm11,%xmm3 1966 xorps %xmm12,%xmm4 1967 1968 call _aesni_encrypt3 1969 1970 xorps %xmm10,%xmm2 1971 movdqa %xmm13,%xmm10 1972 xorps %xmm11,%xmm3 1973 xorps %xmm12,%xmm4 1974 movups %xmm2,(%rsi) 1975 movups %xmm3,16(%rsi) 1976 movups %xmm4,32(%rsi) 1977 leaq 48(%rsi),%rsi 1978 jmp .Lxts_enc_done 1979 1980.align 16 1981.Lxts_enc_four: 1982 movups (%rdi),%xmm2 1983 movups 16(%rdi),%xmm3 1984 movups 32(%rdi),%xmm4 1985 xorps %xmm10,%xmm2 1986 movups 48(%rdi),%xmm5 1987 leaq 64(%rdi),%rdi 1988 xorps %xmm11,%xmm3 1989 xorps %xmm12,%xmm4 1990 xorps %xmm13,%xmm5 1991 1992 call _aesni_encrypt4 1993 1994 pxor %xmm10,%xmm2 1995 movdqa %xmm14,%xmm10 1996 pxor %xmm11,%xmm3 1997 pxor %xmm12,%xmm4 1998 movdqu %xmm2,(%rsi) 1999 pxor %xmm13,%xmm5 2000 movdqu %xmm3,16(%rsi) 2001 movdqu %xmm4,32(%rsi) 2002 movdqu %xmm5,48(%rsi) 2003 leaq 64(%rsi),%rsi 2004 jmp .Lxts_enc_done 2005 2006.align 16 2007.Lxts_enc_done: 2008 andq $15,%r9 2009 jz .Lxts_enc_ret 2010 movq %r9,%rdx 2011 2012.Lxts_enc_steal: 2013 movzbl (%rdi),%eax 2014 movzbl -16(%rsi),%ecx 2015 leaq 1(%rdi),%rdi 2016 movb %al,-16(%rsi) 2017 movb %cl,0(%rsi) 2018 leaq 1(%rsi),%rsi 2019 subq $1,%rdx 2020 jnz .Lxts_enc_steal 2021 2022 subq %r9,%rsi 2023 movq %rbp,%rcx 2024 movl %r10d,%eax 2025 2026 movups -16(%rsi),%xmm2 2027 xorps %xmm10,%xmm2 2028 movups (%rcx),%xmm0 2029 movups 16(%rcx),%xmm1 2030 leaq 32(%rcx),%rcx 2031 xorps %xmm0,%xmm2 2032.Loop_enc1_10: 2033.byte 102,15,56,220,209 2034 decl %eax 2035 movups (%rcx),%xmm1 2036 leaq 16(%rcx),%rcx 2037 jnz .Loop_enc1_10 2038.byte 102,15,56,221,209 2039 xorps %xmm10,%xmm2 2040 movups %xmm2,-16(%rsi) 2041 2042.Lxts_enc_ret: 2043 xorps %xmm0,%xmm0 2044 pxor %xmm1,%xmm1 2045 pxor %xmm2,%xmm2 2046 pxor %xmm3,%xmm3 2047 pxor %xmm4,%xmm4 2048 pxor %xmm5,%xmm5 2049 pxor %xmm6,%xmm6 2050 pxor %xmm7,%xmm7 2051 movaps %xmm0,0(%rsp) 2052 pxor %xmm8,%xmm8 2053 movaps %xmm0,16(%rsp) 2054 pxor %xmm9,%xmm9 2055 movaps %xmm0,32(%rsp) 2056 pxor %xmm10,%xmm10 2057 movaps %xmm0,48(%rsp) 2058 pxor %xmm11,%xmm11 2059 movaps %xmm0,64(%rsp) 2060 pxor %xmm12,%xmm12 2061 movaps %xmm0,80(%rsp) 2062 pxor %xmm13,%xmm13 2063 movaps %xmm0,96(%rsp) 2064 pxor %xmm14,%xmm14 2065 pxor %xmm15,%xmm15 2066 movq -8(%r11),%rbp 2067.cfi_restore %rbp 2068 leaq (%r11),%rsp 2069.cfi_def_cfa_register %rsp 2070.Lxts_enc_epilogue: 2071 .byte 0xf3,0xc3 2072.cfi_endproc 2073.size aesni_xts_encrypt,.-aesni_xts_encrypt 2074.globl aesni_xts_decrypt 2075.type aesni_xts_decrypt,@function 2076.align 16 2077aesni_xts_decrypt: 2078.cfi_startproc 2079 leaq (%rsp),%r11 2080.cfi_def_cfa_register %r11 2081 pushq %rbp 2082.cfi_offset %rbp,-16 2083 subq $112,%rsp 2084 andq $-16,%rsp 2085 movups (%r9),%xmm2 2086 movl 240(%r8),%eax 2087 movl 240(%rcx),%r10d 2088 movups (%r8),%xmm0 2089 movups 16(%r8),%xmm1 2090 leaq 32(%r8),%r8 2091 xorps %xmm0,%xmm2 2092.Loop_enc1_11: 2093.byte 102,15,56,220,209 2094 decl %eax 2095 movups (%r8),%xmm1 2096 leaq 16(%r8),%r8 2097 jnz .Loop_enc1_11 2098.byte 102,15,56,221,209 2099 xorl %eax,%eax 2100 testq $15,%rdx 2101 setnz %al 2102 shlq $4,%rax 2103 subq %rax,%rdx 2104 2105 movups (%rcx),%xmm0 2106 movq %rcx,%rbp 2107 movl %r10d,%eax 2108 shll $4,%r10d 2109 movq %rdx,%r9 2110 andq $-16,%rdx 2111 2112 movups 16(%rcx,%r10,1),%xmm1 2113 2114 movdqa .Lxts_magic(%rip),%xmm8 2115 movdqa %xmm2,%xmm15 2116 pshufd $0x5f,%xmm2,%xmm9 2117 pxor %xmm0,%xmm1 2118 movdqa %xmm9,%xmm14 2119 paddd %xmm9,%xmm9 2120 movdqa %xmm15,%xmm10 2121 psrad $31,%xmm14 2122 paddq %xmm15,%xmm15 2123 pand %xmm8,%xmm14 2124 pxor %xmm0,%xmm10 2125 pxor %xmm14,%xmm15 2126 movdqa %xmm9,%xmm14 2127 paddd %xmm9,%xmm9 2128 movdqa %xmm15,%xmm11 2129 psrad $31,%xmm14 2130 paddq %xmm15,%xmm15 2131 pand %xmm8,%xmm14 2132 pxor %xmm0,%xmm11 2133 pxor %xmm14,%xmm15 2134 movdqa %xmm9,%xmm14 2135 paddd %xmm9,%xmm9 2136 movdqa %xmm15,%xmm12 2137 psrad $31,%xmm14 2138 paddq %xmm15,%xmm15 2139 pand %xmm8,%xmm14 2140 pxor %xmm0,%xmm12 2141 pxor %xmm14,%xmm15 2142 movdqa %xmm9,%xmm14 2143 paddd %xmm9,%xmm9 2144 movdqa %xmm15,%xmm13 2145 psrad $31,%xmm14 2146 paddq %xmm15,%xmm15 2147 pand %xmm8,%xmm14 2148 pxor %xmm0,%xmm13 2149 pxor %xmm14,%xmm15 2150 movdqa %xmm15,%xmm14 2151 psrad $31,%xmm9 2152 paddq %xmm15,%xmm15 2153 pand %xmm8,%xmm9 2154 pxor %xmm0,%xmm14 2155 pxor %xmm9,%xmm15 2156 movaps %xmm1,96(%rsp) 2157 2158 subq $96,%rdx 2159 jc .Lxts_dec_short 2160 2161 movl $16+96,%eax 2162 leaq 32(%rbp,%r10,1),%rcx 2163 subq %r10,%rax 2164 movups 16(%rbp),%xmm1 2165 movq %rax,%r10 2166 leaq .Lxts_magic(%rip),%r8 2167 jmp .Lxts_dec_grandloop 2168 2169.align 32 2170.Lxts_dec_grandloop: 2171 movdqu 0(%rdi),%xmm2 2172 movdqa %xmm0,%xmm8 2173 movdqu 16(%rdi),%xmm3 2174 pxor %xmm10,%xmm2 2175 movdqu 32(%rdi),%xmm4 2176 pxor %xmm11,%xmm3 2177.byte 102,15,56,222,209 2178 movdqu 48(%rdi),%xmm5 2179 pxor %xmm12,%xmm4 2180.byte 102,15,56,222,217 2181 movdqu 64(%rdi),%xmm6 2182 pxor %xmm13,%xmm5 2183.byte 102,15,56,222,225 2184 movdqu 80(%rdi),%xmm7 2185 pxor %xmm15,%xmm8 2186 movdqa 96(%rsp),%xmm9 2187 pxor %xmm14,%xmm6 2188.byte 102,15,56,222,233 2189 movups 32(%rbp),%xmm0 2190 leaq 96(%rdi),%rdi 2191 pxor %xmm8,%xmm7 2192 2193 pxor %xmm9,%xmm10 2194.byte 102,15,56,222,241 2195 pxor %xmm9,%xmm11 2196 movdqa %xmm10,0(%rsp) 2197.byte 102,15,56,222,249 2198 movups 48(%rbp),%xmm1 2199 pxor %xmm9,%xmm12 2200 2201.byte 102,15,56,222,208 2202 pxor %xmm9,%xmm13 2203 movdqa %xmm11,16(%rsp) 2204.byte 102,15,56,222,216 2205 pxor %xmm9,%xmm14 2206 movdqa %xmm12,32(%rsp) 2207.byte 102,15,56,222,224 2208.byte 102,15,56,222,232 2209 pxor %xmm9,%xmm8 2210 movdqa %xmm14,64(%rsp) 2211.byte 102,15,56,222,240 2212.byte 102,15,56,222,248 2213 movups 64(%rbp),%xmm0 2214 movdqa %xmm8,80(%rsp) 2215 pshufd $0x5f,%xmm15,%xmm9 2216 jmp .Lxts_dec_loop6 2217.align 32 2218.Lxts_dec_loop6: 2219.byte 102,15,56,222,209 2220.byte 102,15,56,222,217 2221.byte 102,15,56,222,225 2222.byte 102,15,56,222,233 2223.byte 102,15,56,222,241 2224.byte 102,15,56,222,249 2225 movups -64(%rcx,%rax,1),%xmm1 2226 addq $32,%rax 2227 2228.byte 102,15,56,222,208 2229.byte 102,15,56,222,216 2230.byte 102,15,56,222,224 2231.byte 102,15,56,222,232 2232.byte 102,15,56,222,240 2233.byte 102,15,56,222,248 2234 movups -80(%rcx,%rax,1),%xmm0 2235 jnz .Lxts_dec_loop6 2236 2237 movdqa (%r8),%xmm8 2238 movdqa %xmm9,%xmm14 2239 paddd %xmm9,%xmm9 2240.byte 102,15,56,222,209 2241 paddq %xmm15,%xmm15 2242 psrad $31,%xmm14 2243.byte 102,15,56,222,217 2244 pand %xmm8,%xmm14 2245 movups (%rbp),%xmm10 2246.byte 102,15,56,222,225 2247.byte 102,15,56,222,233 2248.byte 102,15,56,222,241 2249 pxor %xmm14,%xmm15 2250 movaps %xmm10,%xmm11 2251.byte 102,15,56,222,249 2252 movups -64(%rcx),%xmm1 2253 2254 movdqa %xmm9,%xmm14 2255.byte 102,15,56,222,208 2256 paddd %xmm9,%xmm9 2257 pxor %xmm15,%xmm10 2258.byte 102,15,56,222,216 2259 psrad $31,%xmm14 2260 paddq %xmm15,%xmm15 2261.byte 102,15,56,222,224 2262.byte 102,15,56,222,232 2263 pand %xmm8,%xmm14 2264 movaps %xmm11,%xmm12 2265.byte 102,15,56,222,240 2266 pxor %xmm14,%xmm15 2267 movdqa %xmm9,%xmm14 2268.byte 102,15,56,222,248 2269 movups -48(%rcx),%xmm0 2270 2271 paddd %xmm9,%xmm9 2272.byte 102,15,56,222,209 2273 pxor %xmm15,%xmm11 2274 psrad $31,%xmm14 2275.byte 102,15,56,222,217 2276 paddq %xmm15,%xmm15 2277 pand %xmm8,%xmm14 2278.byte 102,15,56,222,225 2279.byte 102,15,56,222,233 2280 movdqa %xmm13,48(%rsp) 2281 pxor %xmm14,%xmm15 2282.byte 102,15,56,222,241 2283 movaps %xmm12,%xmm13 2284 movdqa %xmm9,%xmm14 2285.byte 102,15,56,222,249 2286 movups -32(%rcx),%xmm1 2287 2288 paddd %xmm9,%xmm9 2289.byte 102,15,56,222,208 2290 pxor %xmm15,%xmm12 2291 psrad $31,%xmm14 2292.byte 102,15,56,222,216 2293 paddq %xmm15,%xmm15 2294 pand %xmm8,%xmm14 2295.byte 102,15,56,222,224 2296.byte 102,15,56,222,232 2297.byte 102,15,56,222,240 2298 pxor %xmm14,%xmm15 2299 movaps %xmm13,%xmm14 2300.byte 102,15,56,222,248 2301 2302 movdqa %xmm9,%xmm0 2303 paddd %xmm9,%xmm9 2304.byte 102,15,56,222,209 2305 pxor %xmm15,%xmm13 2306 psrad $31,%xmm0 2307.byte 102,15,56,222,217 2308 paddq %xmm15,%xmm15 2309 pand %xmm8,%xmm0 2310.byte 102,15,56,222,225 2311.byte 102,15,56,222,233 2312 pxor %xmm0,%xmm15 2313 movups (%rbp),%xmm0 2314.byte 102,15,56,222,241 2315.byte 102,15,56,222,249 2316 movups 16(%rbp),%xmm1 2317 2318 pxor %xmm15,%xmm14 2319.byte 102,15,56,223,84,36,0 2320 psrad $31,%xmm9 2321 paddq %xmm15,%xmm15 2322.byte 102,15,56,223,92,36,16 2323.byte 102,15,56,223,100,36,32 2324 pand %xmm8,%xmm9 2325 movq %r10,%rax 2326.byte 102,15,56,223,108,36,48 2327.byte 102,15,56,223,116,36,64 2328.byte 102,15,56,223,124,36,80 2329 pxor %xmm9,%xmm15 2330 2331 leaq 96(%rsi),%rsi 2332 movups %xmm2,-96(%rsi) 2333 movups %xmm3,-80(%rsi) 2334 movups %xmm4,-64(%rsi) 2335 movups %xmm5,-48(%rsi) 2336 movups %xmm6,-32(%rsi) 2337 movups %xmm7,-16(%rsi) 2338 subq $96,%rdx 2339 jnc .Lxts_dec_grandloop 2340 2341 movl $16+96,%eax 2342 subl %r10d,%eax 2343 movq %rbp,%rcx 2344 shrl $4,%eax 2345 2346.Lxts_dec_short: 2347 2348 movl %eax,%r10d 2349 pxor %xmm0,%xmm10 2350 pxor %xmm0,%xmm11 2351 addq $96,%rdx 2352 jz .Lxts_dec_done 2353 2354 pxor %xmm0,%xmm12 2355 cmpq $0x20,%rdx 2356 jb .Lxts_dec_one 2357 pxor %xmm0,%xmm13 2358 je .Lxts_dec_two 2359 2360 pxor %xmm0,%xmm14 2361 cmpq $0x40,%rdx 2362 jb .Lxts_dec_three 2363 je .Lxts_dec_four 2364 2365 movdqu (%rdi),%xmm2 2366 movdqu 16(%rdi),%xmm3 2367 movdqu 32(%rdi),%xmm4 2368 pxor %xmm10,%xmm2 2369 movdqu 48(%rdi),%xmm5 2370 pxor %xmm11,%xmm3 2371 movdqu 64(%rdi),%xmm6 2372 leaq 80(%rdi),%rdi 2373 pxor %xmm12,%xmm4 2374 pxor %xmm13,%xmm5 2375 pxor %xmm14,%xmm6 2376 2377 call _aesni_decrypt6 2378 2379 xorps %xmm10,%xmm2 2380 xorps %xmm11,%xmm3 2381 xorps %xmm12,%xmm4 2382 movdqu %xmm2,(%rsi) 2383 xorps %xmm13,%xmm5 2384 movdqu %xmm3,16(%rsi) 2385 xorps %xmm14,%xmm6 2386 movdqu %xmm4,32(%rsi) 2387 pxor %xmm14,%xmm14 2388 movdqu %xmm5,48(%rsi) 2389 pcmpgtd %xmm15,%xmm14 2390 movdqu %xmm6,64(%rsi) 2391 leaq 80(%rsi),%rsi 2392 pshufd $0x13,%xmm14,%xmm11 2393 andq $15,%r9 2394 jz .Lxts_dec_ret 2395 2396 movdqa %xmm15,%xmm10 2397 paddq %xmm15,%xmm15 2398 pand %xmm8,%xmm11 2399 pxor %xmm15,%xmm11 2400 jmp .Lxts_dec_done2 2401 2402.align 16 2403.Lxts_dec_one: 2404 movups (%rdi),%xmm2 2405 leaq 16(%rdi),%rdi 2406 xorps %xmm10,%xmm2 2407 movups (%rcx),%xmm0 2408 movups 16(%rcx),%xmm1 2409 leaq 32(%rcx),%rcx 2410 xorps %xmm0,%xmm2 2411.Loop_dec1_12: 2412.byte 102,15,56,222,209 2413 decl %eax 2414 movups (%rcx),%xmm1 2415 leaq 16(%rcx),%rcx 2416 jnz .Loop_dec1_12 2417.byte 102,15,56,223,209 2418 xorps %xmm10,%xmm2 2419 movdqa %xmm11,%xmm10 2420 movups %xmm2,(%rsi) 2421 movdqa %xmm12,%xmm11 2422 leaq 16(%rsi),%rsi 2423 jmp .Lxts_dec_done 2424 2425.align 16 2426.Lxts_dec_two: 2427 movups (%rdi),%xmm2 2428 movups 16(%rdi),%xmm3 2429 leaq 32(%rdi),%rdi 2430 xorps %xmm10,%xmm2 2431 xorps %xmm11,%xmm3 2432 2433 call _aesni_decrypt2 2434 2435 xorps %xmm10,%xmm2 2436 movdqa %xmm12,%xmm10 2437 xorps %xmm11,%xmm3 2438 movdqa %xmm13,%xmm11 2439 movups %xmm2,(%rsi) 2440 movups %xmm3,16(%rsi) 2441 leaq 32(%rsi),%rsi 2442 jmp .Lxts_dec_done 2443 2444.align 16 2445.Lxts_dec_three: 2446 movups (%rdi),%xmm2 2447 movups 16(%rdi),%xmm3 2448 movups 32(%rdi),%xmm4 2449 leaq 48(%rdi),%rdi 2450 xorps %xmm10,%xmm2 2451 xorps %xmm11,%xmm3 2452 xorps %xmm12,%xmm4 2453 2454 call _aesni_decrypt3 2455 2456 xorps %xmm10,%xmm2 2457 movdqa %xmm13,%xmm10 2458 xorps %xmm11,%xmm3 2459 movdqa %xmm14,%xmm11 2460 xorps %xmm12,%xmm4 2461 movups %xmm2,(%rsi) 2462 movups %xmm3,16(%rsi) 2463 movups %xmm4,32(%rsi) 2464 leaq 48(%rsi),%rsi 2465 jmp .Lxts_dec_done 2466 2467.align 16 2468.Lxts_dec_four: 2469 movups (%rdi),%xmm2 2470 movups 16(%rdi),%xmm3 2471 movups 32(%rdi),%xmm4 2472 xorps %xmm10,%xmm2 2473 movups 48(%rdi),%xmm5 2474 leaq 64(%rdi),%rdi 2475 xorps %xmm11,%xmm3 2476 xorps %xmm12,%xmm4 2477 xorps %xmm13,%xmm5 2478 2479 call _aesni_decrypt4 2480 2481 pxor %xmm10,%xmm2 2482 movdqa %xmm14,%xmm10 2483 pxor %xmm11,%xmm3 2484 movdqa %xmm15,%xmm11 2485 pxor %xmm12,%xmm4 2486 movdqu %xmm2,(%rsi) 2487 pxor %xmm13,%xmm5 2488 movdqu %xmm3,16(%rsi) 2489 movdqu %xmm4,32(%rsi) 2490 movdqu %xmm5,48(%rsi) 2491 leaq 64(%rsi),%rsi 2492 jmp .Lxts_dec_done 2493 2494.align 16 2495.Lxts_dec_done: 2496 andq $15,%r9 2497 jz .Lxts_dec_ret 2498.Lxts_dec_done2: 2499 movq %r9,%rdx 2500 movq %rbp,%rcx 2501 movl %r10d,%eax 2502 2503 movups (%rdi),%xmm2 2504 xorps %xmm11,%xmm2 2505 movups (%rcx),%xmm0 2506 movups 16(%rcx),%xmm1 2507 leaq 32(%rcx),%rcx 2508 xorps %xmm0,%xmm2 2509.Loop_dec1_13: 2510.byte 102,15,56,222,209 2511 decl %eax 2512 movups (%rcx),%xmm1 2513 leaq 16(%rcx),%rcx 2514 jnz .Loop_dec1_13 2515.byte 102,15,56,223,209 2516 xorps %xmm11,%xmm2 2517 movups %xmm2,(%rsi) 2518 2519.Lxts_dec_steal: 2520 movzbl 16(%rdi),%eax 2521 movzbl (%rsi),%ecx 2522 leaq 1(%rdi),%rdi 2523 movb %al,(%rsi) 2524 movb %cl,16(%rsi) 2525 leaq 1(%rsi),%rsi 2526 subq $1,%rdx 2527 jnz .Lxts_dec_steal 2528 2529 subq %r9,%rsi 2530 movq %rbp,%rcx 2531 movl %r10d,%eax 2532 2533 movups (%rsi),%xmm2 2534 xorps %xmm10,%xmm2 2535 movups (%rcx),%xmm0 2536 movups 16(%rcx),%xmm1 2537 leaq 32(%rcx),%rcx 2538 xorps %xmm0,%xmm2 2539.Loop_dec1_14: 2540.byte 102,15,56,222,209 2541 decl %eax 2542 movups (%rcx),%xmm1 2543 leaq 16(%rcx),%rcx 2544 jnz .Loop_dec1_14 2545.byte 102,15,56,223,209 2546 xorps %xmm10,%xmm2 2547 movups %xmm2,(%rsi) 2548 2549.Lxts_dec_ret: 2550 xorps %xmm0,%xmm0 2551 pxor %xmm1,%xmm1 2552 pxor %xmm2,%xmm2 2553 pxor %xmm3,%xmm3 2554 pxor %xmm4,%xmm4 2555 pxor %xmm5,%xmm5 2556 pxor %xmm6,%xmm6 2557 pxor %xmm7,%xmm7 2558 movaps %xmm0,0(%rsp) 2559 pxor %xmm8,%xmm8 2560 movaps %xmm0,16(%rsp) 2561 pxor %xmm9,%xmm9 2562 movaps %xmm0,32(%rsp) 2563 pxor %xmm10,%xmm10 2564 movaps %xmm0,48(%rsp) 2565 pxor %xmm11,%xmm11 2566 movaps %xmm0,64(%rsp) 2567 pxor %xmm12,%xmm12 2568 movaps %xmm0,80(%rsp) 2569 pxor %xmm13,%xmm13 2570 movaps %xmm0,96(%rsp) 2571 pxor %xmm14,%xmm14 2572 pxor %xmm15,%xmm15 2573 movq -8(%r11),%rbp 2574.cfi_restore %rbp 2575 leaq (%r11),%rsp 2576.cfi_def_cfa_register %rsp 2577.Lxts_dec_epilogue: 2578 .byte 0xf3,0xc3 2579.cfi_endproc 2580.size aesni_xts_decrypt,.-aesni_xts_decrypt 2581.globl aesni_ocb_encrypt 2582.type aesni_ocb_encrypt,@function 2583.align 32 2584aesni_ocb_encrypt: 2585.cfi_startproc 2586 leaq (%rsp),%rax 2587 pushq %rbx 2588.cfi_adjust_cfa_offset 8 2589.cfi_offset %rbx,-16 2590 pushq %rbp 2591.cfi_adjust_cfa_offset 8 2592.cfi_offset %rbp,-24 2593 pushq %r12 2594.cfi_adjust_cfa_offset 8 2595.cfi_offset %r12,-32 2596 pushq %r13 2597.cfi_adjust_cfa_offset 8 2598.cfi_offset %r13,-40 2599 pushq %r14 2600.cfi_adjust_cfa_offset 8 2601.cfi_offset %r14,-48 2602 movq 8(%rax),%rbx 2603 movq 8+8(%rax),%rbp 2604 2605 movl 240(%rcx),%r10d 2606 movq %rcx,%r11 2607 shll $4,%r10d 2608 movups (%rcx),%xmm9 2609 movups 16(%rcx,%r10,1),%xmm1 2610 2611 movdqu (%r9),%xmm15 2612 pxor %xmm1,%xmm9 2613 pxor %xmm1,%xmm15 2614 2615 movl $16+32,%eax 2616 leaq 32(%r11,%r10,1),%rcx 2617 movups 16(%r11),%xmm1 2618 subq %r10,%rax 2619 movq %rax,%r10 2620 2621 movdqu (%rbx),%xmm10 2622 movdqu (%rbp),%xmm8 2623 2624 testq $1,%r8 2625 jnz .Locb_enc_odd 2626 2627 bsfq %r8,%r12 2628 addq $1,%r8 2629 shlq $4,%r12 2630 movdqu (%rbx,%r12,1),%xmm7 2631 movdqu (%rdi),%xmm2 2632 leaq 16(%rdi),%rdi 2633 2634 call __ocb_encrypt1 2635 2636 movdqa %xmm7,%xmm15 2637 movups %xmm2,(%rsi) 2638 leaq 16(%rsi),%rsi 2639 subq $1,%rdx 2640 jz .Locb_enc_done 2641 2642.Locb_enc_odd: 2643 leaq 1(%r8),%r12 2644 leaq 3(%r8),%r13 2645 leaq 5(%r8),%r14 2646 leaq 6(%r8),%r8 2647 bsfq %r12,%r12 2648 bsfq %r13,%r13 2649 bsfq %r14,%r14 2650 shlq $4,%r12 2651 shlq $4,%r13 2652 shlq $4,%r14 2653 2654 subq $6,%rdx 2655 jc .Locb_enc_short 2656 jmp .Locb_enc_grandloop 2657 2658.align 32 2659.Locb_enc_grandloop: 2660 movdqu 0(%rdi),%xmm2 2661 movdqu 16(%rdi),%xmm3 2662 movdqu 32(%rdi),%xmm4 2663 movdqu 48(%rdi),%xmm5 2664 movdqu 64(%rdi),%xmm6 2665 movdqu 80(%rdi),%xmm7 2666 leaq 96(%rdi),%rdi 2667 2668 call __ocb_encrypt6 2669 2670 movups %xmm2,0(%rsi) 2671 movups %xmm3,16(%rsi) 2672 movups %xmm4,32(%rsi) 2673 movups %xmm5,48(%rsi) 2674 movups %xmm6,64(%rsi) 2675 movups %xmm7,80(%rsi) 2676 leaq 96(%rsi),%rsi 2677 subq $6,%rdx 2678 jnc .Locb_enc_grandloop 2679 2680.Locb_enc_short: 2681 addq $6,%rdx 2682 jz .Locb_enc_done 2683 2684 movdqu 0(%rdi),%xmm2 2685 cmpq $2,%rdx 2686 jb .Locb_enc_one 2687 movdqu 16(%rdi),%xmm3 2688 je .Locb_enc_two 2689 2690 movdqu 32(%rdi),%xmm4 2691 cmpq $4,%rdx 2692 jb .Locb_enc_three 2693 movdqu 48(%rdi),%xmm5 2694 je .Locb_enc_four 2695 2696 movdqu 64(%rdi),%xmm6 2697 pxor %xmm7,%xmm7 2698 2699 call __ocb_encrypt6 2700 2701 movdqa %xmm14,%xmm15 2702 movups %xmm2,0(%rsi) 2703 movups %xmm3,16(%rsi) 2704 movups %xmm4,32(%rsi) 2705 movups %xmm5,48(%rsi) 2706 movups %xmm6,64(%rsi) 2707 2708 jmp .Locb_enc_done 2709 2710.align 16 2711.Locb_enc_one: 2712 movdqa %xmm10,%xmm7 2713 2714 call __ocb_encrypt1 2715 2716 movdqa %xmm7,%xmm15 2717 movups %xmm2,0(%rsi) 2718 jmp .Locb_enc_done 2719 2720.align 16 2721.Locb_enc_two: 2722 pxor %xmm4,%xmm4 2723 pxor %xmm5,%xmm5 2724 2725 call __ocb_encrypt4 2726 2727 movdqa %xmm11,%xmm15 2728 movups %xmm2,0(%rsi) 2729 movups %xmm3,16(%rsi) 2730 2731 jmp .Locb_enc_done 2732 2733.align 16 2734.Locb_enc_three: 2735 pxor %xmm5,%xmm5 2736 2737 call __ocb_encrypt4 2738 2739 movdqa %xmm12,%xmm15 2740 movups %xmm2,0(%rsi) 2741 movups %xmm3,16(%rsi) 2742 movups %xmm4,32(%rsi) 2743 2744 jmp .Locb_enc_done 2745 2746.align 16 2747.Locb_enc_four: 2748 call __ocb_encrypt4 2749 2750 movdqa %xmm13,%xmm15 2751 movups %xmm2,0(%rsi) 2752 movups %xmm3,16(%rsi) 2753 movups %xmm4,32(%rsi) 2754 movups %xmm5,48(%rsi) 2755 2756.Locb_enc_done: 2757 pxor %xmm0,%xmm15 2758 movdqu %xmm8,(%rbp) 2759 movdqu %xmm15,(%r9) 2760 2761 xorps %xmm0,%xmm0 2762 pxor %xmm1,%xmm1 2763 pxor %xmm2,%xmm2 2764 pxor %xmm3,%xmm3 2765 pxor %xmm4,%xmm4 2766 pxor %xmm5,%xmm5 2767 pxor %xmm6,%xmm6 2768 pxor %xmm7,%xmm7 2769 pxor %xmm8,%xmm8 2770 pxor %xmm9,%xmm9 2771 pxor %xmm10,%xmm10 2772 pxor %xmm11,%xmm11 2773 pxor %xmm12,%xmm12 2774 pxor %xmm13,%xmm13 2775 pxor %xmm14,%xmm14 2776 pxor %xmm15,%xmm15 2777 leaq 40(%rsp),%rax 2778.cfi_def_cfa %rax,8 2779 movq -40(%rax),%r14 2780.cfi_restore %r14 2781 movq -32(%rax),%r13 2782.cfi_restore %r13 2783 movq -24(%rax),%r12 2784.cfi_restore %r12 2785 movq -16(%rax),%rbp 2786.cfi_restore %rbp 2787 movq -8(%rax),%rbx 2788.cfi_restore %rbx 2789 leaq (%rax),%rsp 2790.cfi_def_cfa_register %rsp 2791.Locb_enc_epilogue: 2792 .byte 0xf3,0xc3 2793.cfi_endproc 2794.size aesni_ocb_encrypt,.-aesni_ocb_encrypt 2795 2796.type __ocb_encrypt6,@function 2797.align 32 2798__ocb_encrypt6: 2799.cfi_startproc 2800 pxor %xmm9,%xmm15 2801 movdqu (%rbx,%r12,1),%xmm11 2802 movdqa %xmm10,%xmm12 2803 movdqu (%rbx,%r13,1),%xmm13 2804 movdqa %xmm10,%xmm14 2805 pxor %xmm15,%xmm10 2806 movdqu (%rbx,%r14,1),%xmm15 2807 pxor %xmm10,%xmm11 2808 pxor %xmm2,%xmm8 2809 pxor %xmm10,%xmm2 2810 pxor %xmm11,%xmm12 2811 pxor %xmm3,%xmm8 2812 pxor %xmm11,%xmm3 2813 pxor %xmm12,%xmm13 2814 pxor %xmm4,%xmm8 2815 pxor %xmm12,%xmm4 2816 pxor %xmm13,%xmm14 2817 pxor %xmm5,%xmm8 2818 pxor %xmm13,%xmm5 2819 pxor %xmm14,%xmm15 2820 pxor %xmm6,%xmm8 2821 pxor %xmm14,%xmm6 2822 pxor %xmm7,%xmm8 2823 pxor %xmm15,%xmm7 2824 movups 32(%r11),%xmm0 2825 2826 leaq 1(%r8),%r12 2827 leaq 3(%r8),%r13 2828 leaq 5(%r8),%r14 2829 addq $6,%r8 2830 pxor %xmm9,%xmm10 2831 bsfq %r12,%r12 2832 bsfq %r13,%r13 2833 bsfq %r14,%r14 2834 2835.byte 102,15,56,220,209 2836.byte 102,15,56,220,217 2837.byte 102,15,56,220,225 2838.byte 102,15,56,220,233 2839 pxor %xmm9,%xmm11 2840 pxor %xmm9,%xmm12 2841.byte 102,15,56,220,241 2842 pxor %xmm9,%xmm13 2843 pxor %xmm9,%xmm14 2844.byte 102,15,56,220,249 2845 movups 48(%r11),%xmm1 2846 pxor %xmm9,%xmm15 2847 2848.byte 102,15,56,220,208 2849.byte 102,15,56,220,216 2850.byte 102,15,56,220,224 2851.byte 102,15,56,220,232 2852.byte 102,15,56,220,240 2853.byte 102,15,56,220,248 2854 movups 64(%r11),%xmm0 2855 shlq $4,%r12 2856 shlq $4,%r13 2857 jmp .Locb_enc_loop6 2858 2859.align 32 2860.Locb_enc_loop6: 2861.byte 102,15,56,220,209 2862.byte 102,15,56,220,217 2863.byte 102,15,56,220,225 2864.byte 102,15,56,220,233 2865.byte 102,15,56,220,241 2866.byte 102,15,56,220,249 2867 movups (%rcx,%rax,1),%xmm1 2868 addq $32,%rax 2869 2870.byte 102,15,56,220,208 2871.byte 102,15,56,220,216 2872.byte 102,15,56,220,224 2873.byte 102,15,56,220,232 2874.byte 102,15,56,220,240 2875.byte 102,15,56,220,248 2876 movups -16(%rcx,%rax,1),%xmm0 2877 jnz .Locb_enc_loop6 2878 2879.byte 102,15,56,220,209 2880.byte 102,15,56,220,217 2881.byte 102,15,56,220,225 2882.byte 102,15,56,220,233 2883.byte 102,15,56,220,241 2884.byte 102,15,56,220,249 2885 movups 16(%r11),%xmm1 2886 shlq $4,%r14 2887 2888.byte 102,65,15,56,221,210 2889 movdqu (%rbx),%xmm10 2890 movq %r10,%rax 2891.byte 102,65,15,56,221,219 2892.byte 102,65,15,56,221,228 2893.byte 102,65,15,56,221,237 2894.byte 102,65,15,56,221,246 2895.byte 102,65,15,56,221,255 2896 .byte 0xf3,0xc3 2897.cfi_endproc 2898.size __ocb_encrypt6,.-__ocb_encrypt6 2899 2900.type __ocb_encrypt4,@function 2901.align 32 2902__ocb_encrypt4: 2903.cfi_startproc 2904 pxor %xmm9,%xmm15 2905 movdqu (%rbx,%r12,1),%xmm11 2906 movdqa %xmm10,%xmm12 2907 movdqu (%rbx,%r13,1),%xmm13 2908 pxor %xmm15,%xmm10 2909 pxor %xmm10,%xmm11 2910 pxor %xmm2,%xmm8 2911 pxor %xmm10,%xmm2 2912 pxor %xmm11,%xmm12 2913 pxor %xmm3,%xmm8 2914 pxor %xmm11,%xmm3 2915 pxor %xmm12,%xmm13 2916 pxor %xmm4,%xmm8 2917 pxor %xmm12,%xmm4 2918 pxor %xmm5,%xmm8 2919 pxor %xmm13,%xmm5 2920 movups 32(%r11),%xmm0 2921 2922 pxor %xmm9,%xmm10 2923 pxor %xmm9,%xmm11 2924 pxor %xmm9,%xmm12 2925 pxor %xmm9,%xmm13 2926 2927.byte 102,15,56,220,209 2928.byte 102,15,56,220,217 2929.byte 102,15,56,220,225 2930.byte 102,15,56,220,233 2931 movups 48(%r11),%xmm1 2932 2933.byte 102,15,56,220,208 2934.byte 102,15,56,220,216 2935.byte 102,15,56,220,224 2936.byte 102,15,56,220,232 2937 movups 64(%r11),%xmm0 2938 jmp .Locb_enc_loop4 2939 2940.align 32 2941.Locb_enc_loop4: 2942.byte 102,15,56,220,209 2943.byte 102,15,56,220,217 2944.byte 102,15,56,220,225 2945.byte 102,15,56,220,233 2946 movups (%rcx,%rax,1),%xmm1 2947 addq $32,%rax 2948 2949.byte 102,15,56,220,208 2950.byte 102,15,56,220,216 2951.byte 102,15,56,220,224 2952.byte 102,15,56,220,232 2953 movups -16(%rcx,%rax,1),%xmm0 2954 jnz .Locb_enc_loop4 2955 2956.byte 102,15,56,220,209 2957.byte 102,15,56,220,217 2958.byte 102,15,56,220,225 2959.byte 102,15,56,220,233 2960 movups 16(%r11),%xmm1 2961 movq %r10,%rax 2962 2963.byte 102,65,15,56,221,210 2964.byte 102,65,15,56,221,219 2965.byte 102,65,15,56,221,228 2966.byte 102,65,15,56,221,237 2967 .byte 0xf3,0xc3 2968.cfi_endproc 2969.size __ocb_encrypt4,.-__ocb_encrypt4 2970 2971.type __ocb_encrypt1,@function 2972.align 32 2973__ocb_encrypt1: 2974.cfi_startproc 2975 pxor %xmm15,%xmm7 2976 pxor %xmm9,%xmm7 2977 pxor %xmm2,%xmm8 2978 pxor %xmm7,%xmm2 2979 movups 32(%r11),%xmm0 2980 2981.byte 102,15,56,220,209 2982 movups 48(%r11),%xmm1 2983 pxor %xmm9,%xmm7 2984 2985.byte 102,15,56,220,208 2986 movups 64(%r11),%xmm0 2987 jmp .Locb_enc_loop1 2988 2989.align 32 2990.Locb_enc_loop1: 2991.byte 102,15,56,220,209 2992 movups (%rcx,%rax,1),%xmm1 2993 addq $32,%rax 2994 2995.byte 102,15,56,220,208 2996 movups -16(%rcx,%rax,1),%xmm0 2997 jnz .Locb_enc_loop1 2998 2999.byte 102,15,56,220,209 3000 movups 16(%r11),%xmm1 3001 movq %r10,%rax 3002 3003.byte 102,15,56,221,215 3004 .byte 0xf3,0xc3 3005.cfi_endproc 3006.size __ocb_encrypt1,.-__ocb_encrypt1 3007 3008.globl aesni_ocb_decrypt 3009.type aesni_ocb_decrypt,@function 3010.align 32 3011aesni_ocb_decrypt: 3012.cfi_startproc 3013 leaq (%rsp),%rax 3014 pushq %rbx 3015.cfi_adjust_cfa_offset 8 3016.cfi_offset %rbx,-16 3017 pushq %rbp 3018.cfi_adjust_cfa_offset 8 3019.cfi_offset %rbp,-24 3020 pushq %r12 3021.cfi_adjust_cfa_offset 8 3022.cfi_offset %r12,-32 3023 pushq %r13 3024.cfi_adjust_cfa_offset 8 3025.cfi_offset %r13,-40 3026 pushq %r14 3027.cfi_adjust_cfa_offset 8 3028.cfi_offset %r14,-48 3029 movq 8(%rax),%rbx 3030 movq 8+8(%rax),%rbp 3031 3032 movl 240(%rcx),%r10d 3033 movq %rcx,%r11 3034 shll $4,%r10d 3035 movups (%rcx),%xmm9 3036 movups 16(%rcx,%r10,1),%xmm1 3037 3038 movdqu (%r9),%xmm15 3039 pxor %xmm1,%xmm9 3040 pxor %xmm1,%xmm15 3041 3042 movl $16+32,%eax 3043 leaq 32(%r11,%r10,1),%rcx 3044 movups 16(%r11),%xmm1 3045 subq %r10,%rax 3046 movq %rax,%r10 3047 3048 movdqu (%rbx),%xmm10 3049 movdqu (%rbp),%xmm8 3050 3051 testq $1,%r8 3052 jnz .Locb_dec_odd 3053 3054 bsfq %r8,%r12 3055 addq $1,%r8 3056 shlq $4,%r12 3057 movdqu (%rbx,%r12,1),%xmm7 3058 movdqu (%rdi),%xmm2 3059 leaq 16(%rdi),%rdi 3060 3061 call __ocb_decrypt1 3062 3063 movdqa %xmm7,%xmm15 3064 movups %xmm2,(%rsi) 3065 xorps %xmm2,%xmm8 3066 leaq 16(%rsi),%rsi 3067 subq $1,%rdx 3068 jz .Locb_dec_done 3069 3070.Locb_dec_odd: 3071 leaq 1(%r8),%r12 3072 leaq 3(%r8),%r13 3073 leaq 5(%r8),%r14 3074 leaq 6(%r8),%r8 3075 bsfq %r12,%r12 3076 bsfq %r13,%r13 3077 bsfq %r14,%r14 3078 shlq $4,%r12 3079 shlq $4,%r13 3080 shlq $4,%r14 3081 3082 subq $6,%rdx 3083 jc .Locb_dec_short 3084 jmp .Locb_dec_grandloop 3085 3086.align 32 3087.Locb_dec_grandloop: 3088 movdqu 0(%rdi),%xmm2 3089 movdqu 16(%rdi),%xmm3 3090 movdqu 32(%rdi),%xmm4 3091 movdqu 48(%rdi),%xmm5 3092 movdqu 64(%rdi),%xmm6 3093 movdqu 80(%rdi),%xmm7 3094 leaq 96(%rdi),%rdi 3095 3096 call __ocb_decrypt6 3097 3098 movups %xmm2,0(%rsi) 3099 pxor %xmm2,%xmm8 3100 movups %xmm3,16(%rsi) 3101 pxor %xmm3,%xmm8 3102 movups %xmm4,32(%rsi) 3103 pxor %xmm4,%xmm8 3104 movups %xmm5,48(%rsi) 3105 pxor %xmm5,%xmm8 3106 movups %xmm6,64(%rsi) 3107 pxor %xmm6,%xmm8 3108 movups %xmm7,80(%rsi) 3109 pxor %xmm7,%xmm8 3110 leaq 96(%rsi),%rsi 3111 subq $6,%rdx 3112 jnc .Locb_dec_grandloop 3113 3114.Locb_dec_short: 3115 addq $6,%rdx 3116 jz .Locb_dec_done 3117 3118 movdqu 0(%rdi),%xmm2 3119 cmpq $2,%rdx 3120 jb .Locb_dec_one 3121 movdqu 16(%rdi),%xmm3 3122 je .Locb_dec_two 3123 3124 movdqu 32(%rdi),%xmm4 3125 cmpq $4,%rdx 3126 jb .Locb_dec_three 3127 movdqu 48(%rdi),%xmm5 3128 je .Locb_dec_four 3129 3130 movdqu 64(%rdi),%xmm6 3131 pxor %xmm7,%xmm7 3132 3133 call __ocb_decrypt6 3134 3135 movdqa %xmm14,%xmm15 3136 movups %xmm2,0(%rsi) 3137 pxor %xmm2,%xmm8 3138 movups %xmm3,16(%rsi) 3139 pxor %xmm3,%xmm8 3140 movups %xmm4,32(%rsi) 3141 pxor %xmm4,%xmm8 3142 movups %xmm5,48(%rsi) 3143 pxor %xmm5,%xmm8 3144 movups %xmm6,64(%rsi) 3145 pxor %xmm6,%xmm8 3146 3147 jmp .Locb_dec_done 3148 3149.align 16 3150.Locb_dec_one: 3151 movdqa %xmm10,%xmm7 3152 3153 call __ocb_decrypt1 3154 3155 movdqa %xmm7,%xmm15 3156 movups %xmm2,0(%rsi) 3157 xorps %xmm2,%xmm8 3158 jmp .Locb_dec_done 3159 3160.align 16 3161.Locb_dec_two: 3162 pxor %xmm4,%xmm4 3163 pxor %xmm5,%xmm5 3164 3165 call __ocb_decrypt4 3166 3167 movdqa %xmm11,%xmm15 3168 movups %xmm2,0(%rsi) 3169 xorps %xmm2,%xmm8 3170 movups %xmm3,16(%rsi) 3171 xorps %xmm3,%xmm8 3172 3173 jmp .Locb_dec_done 3174 3175.align 16 3176.Locb_dec_three: 3177 pxor %xmm5,%xmm5 3178 3179 call __ocb_decrypt4 3180 3181 movdqa %xmm12,%xmm15 3182 movups %xmm2,0(%rsi) 3183 xorps %xmm2,%xmm8 3184 movups %xmm3,16(%rsi) 3185 xorps %xmm3,%xmm8 3186 movups %xmm4,32(%rsi) 3187 xorps %xmm4,%xmm8 3188 3189 jmp .Locb_dec_done 3190 3191.align 16 3192.Locb_dec_four: 3193 call __ocb_decrypt4 3194 3195 movdqa %xmm13,%xmm15 3196 movups %xmm2,0(%rsi) 3197 pxor %xmm2,%xmm8 3198 movups %xmm3,16(%rsi) 3199 pxor %xmm3,%xmm8 3200 movups %xmm4,32(%rsi) 3201 pxor %xmm4,%xmm8 3202 movups %xmm5,48(%rsi) 3203 pxor %xmm5,%xmm8 3204 3205.Locb_dec_done: 3206 pxor %xmm0,%xmm15 3207 movdqu %xmm8,(%rbp) 3208 movdqu %xmm15,(%r9) 3209 3210 xorps %xmm0,%xmm0 3211 pxor %xmm1,%xmm1 3212 pxor %xmm2,%xmm2 3213 pxor %xmm3,%xmm3 3214 pxor %xmm4,%xmm4 3215 pxor %xmm5,%xmm5 3216 pxor %xmm6,%xmm6 3217 pxor %xmm7,%xmm7 3218 pxor %xmm8,%xmm8 3219 pxor %xmm9,%xmm9 3220 pxor %xmm10,%xmm10 3221 pxor %xmm11,%xmm11 3222 pxor %xmm12,%xmm12 3223 pxor %xmm13,%xmm13 3224 pxor %xmm14,%xmm14 3225 pxor %xmm15,%xmm15 3226 leaq 40(%rsp),%rax 3227.cfi_def_cfa %rax,8 3228 movq -40(%rax),%r14 3229.cfi_restore %r14 3230 movq -32(%rax),%r13 3231.cfi_restore %r13 3232 movq -24(%rax),%r12 3233.cfi_restore %r12 3234 movq -16(%rax),%rbp 3235.cfi_restore %rbp 3236 movq -8(%rax),%rbx 3237.cfi_restore %rbx 3238 leaq (%rax),%rsp 3239.cfi_def_cfa_register %rsp 3240.Locb_dec_epilogue: 3241 .byte 0xf3,0xc3 3242.cfi_endproc 3243.size aesni_ocb_decrypt,.-aesni_ocb_decrypt 3244 3245.type __ocb_decrypt6,@function 3246.align 32 3247__ocb_decrypt6: 3248.cfi_startproc 3249 pxor %xmm9,%xmm15 3250 movdqu (%rbx,%r12,1),%xmm11 3251 movdqa %xmm10,%xmm12 3252 movdqu (%rbx,%r13,1),%xmm13 3253 movdqa %xmm10,%xmm14 3254 pxor %xmm15,%xmm10 3255 movdqu (%rbx,%r14,1),%xmm15 3256 pxor %xmm10,%xmm11 3257 pxor %xmm10,%xmm2 3258 pxor %xmm11,%xmm12 3259 pxor %xmm11,%xmm3 3260 pxor %xmm12,%xmm13 3261 pxor %xmm12,%xmm4 3262 pxor %xmm13,%xmm14 3263 pxor %xmm13,%xmm5 3264 pxor %xmm14,%xmm15 3265 pxor %xmm14,%xmm6 3266 pxor %xmm15,%xmm7 3267 movups 32(%r11),%xmm0 3268 3269 leaq 1(%r8),%r12 3270 leaq 3(%r8),%r13 3271 leaq 5(%r8),%r14 3272 addq $6,%r8 3273 pxor %xmm9,%xmm10 3274 bsfq %r12,%r12 3275 bsfq %r13,%r13 3276 bsfq %r14,%r14 3277 3278.byte 102,15,56,222,209 3279.byte 102,15,56,222,217 3280.byte 102,15,56,222,225 3281.byte 102,15,56,222,233 3282 pxor %xmm9,%xmm11 3283 pxor %xmm9,%xmm12 3284.byte 102,15,56,222,241 3285 pxor %xmm9,%xmm13 3286 pxor %xmm9,%xmm14 3287.byte 102,15,56,222,249 3288 movups 48(%r11),%xmm1 3289 pxor %xmm9,%xmm15 3290 3291.byte 102,15,56,222,208 3292.byte 102,15,56,222,216 3293.byte 102,15,56,222,224 3294.byte 102,15,56,222,232 3295.byte 102,15,56,222,240 3296.byte 102,15,56,222,248 3297 movups 64(%r11),%xmm0 3298 shlq $4,%r12 3299 shlq $4,%r13 3300 jmp .Locb_dec_loop6 3301 3302.align 32 3303.Locb_dec_loop6: 3304.byte 102,15,56,222,209 3305.byte 102,15,56,222,217 3306.byte 102,15,56,222,225 3307.byte 102,15,56,222,233 3308.byte 102,15,56,222,241 3309.byte 102,15,56,222,249 3310 movups (%rcx,%rax,1),%xmm1 3311 addq $32,%rax 3312 3313.byte 102,15,56,222,208 3314.byte 102,15,56,222,216 3315.byte 102,15,56,222,224 3316.byte 102,15,56,222,232 3317.byte 102,15,56,222,240 3318.byte 102,15,56,222,248 3319 movups -16(%rcx,%rax,1),%xmm0 3320 jnz .Locb_dec_loop6 3321 3322.byte 102,15,56,222,209 3323.byte 102,15,56,222,217 3324.byte 102,15,56,222,225 3325.byte 102,15,56,222,233 3326.byte 102,15,56,222,241 3327.byte 102,15,56,222,249 3328 movups 16(%r11),%xmm1 3329 shlq $4,%r14 3330 3331.byte 102,65,15,56,223,210 3332 movdqu (%rbx),%xmm10 3333 movq %r10,%rax 3334.byte 102,65,15,56,223,219 3335.byte 102,65,15,56,223,228 3336.byte 102,65,15,56,223,237 3337.byte 102,65,15,56,223,246 3338.byte 102,65,15,56,223,255 3339 .byte 0xf3,0xc3 3340.cfi_endproc 3341.size __ocb_decrypt6,.-__ocb_decrypt6 3342 3343.type __ocb_decrypt4,@function 3344.align 32 3345__ocb_decrypt4: 3346.cfi_startproc 3347 pxor %xmm9,%xmm15 3348 movdqu (%rbx,%r12,1),%xmm11 3349 movdqa %xmm10,%xmm12 3350 movdqu (%rbx,%r13,1),%xmm13 3351 pxor %xmm15,%xmm10 3352 pxor %xmm10,%xmm11 3353 pxor %xmm10,%xmm2 3354 pxor %xmm11,%xmm12 3355 pxor %xmm11,%xmm3 3356 pxor %xmm12,%xmm13 3357 pxor %xmm12,%xmm4 3358 pxor %xmm13,%xmm5 3359 movups 32(%r11),%xmm0 3360 3361 pxor %xmm9,%xmm10 3362 pxor %xmm9,%xmm11 3363 pxor %xmm9,%xmm12 3364 pxor %xmm9,%xmm13 3365 3366.byte 102,15,56,222,209 3367.byte 102,15,56,222,217 3368.byte 102,15,56,222,225 3369.byte 102,15,56,222,233 3370 movups 48(%r11),%xmm1 3371 3372.byte 102,15,56,222,208 3373.byte 102,15,56,222,216 3374.byte 102,15,56,222,224 3375.byte 102,15,56,222,232 3376 movups 64(%r11),%xmm0 3377 jmp .Locb_dec_loop4 3378 3379.align 32 3380.Locb_dec_loop4: 3381.byte 102,15,56,222,209 3382.byte 102,15,56,222,217 3383.byte 102,15,56,222,225 3384.byte 102,15,56,222,233 3385 movups (%rcx,%rax,1),%xmm1 3386 addq $32,%rax 3387 3388.byte 102,15,56,222,208 3389.byte 102,15,56,222,216 3390.byte 102,15,56,222,224 3391.byte 102,15,56,222,232 3392 movups -16(%rcx,%rax,1),%xmm0 3393 jnz .Locb_dec_loop4 3394 3395.byte 102,15,56,222,209 3396.byte 102,15,56,222,217 3397.byte 102,15,56,222,225 3398.byte 102,15,56,222,233 3399 movups 16(%r11),%xmm1 3400 movq %r10,%rax 3401 3402.byte 102,65,15,56,223,210 3403.byte 102,65,15,56,223,219 3404.byte 102,65,15,56,223,228 3405.byte 102,65,15,56,223,237 3406 .byte 0xf3,0xc3 3407.cfi_endproc 3408.size __ocb_decrypt4,.-__ocb_decrypt4 3409 3410.type __ocb_decrypt1,@function 3411.align 32 3412__ocb_decrypt1: 3413.cfi_startproc 3414 pxor %xmm15,%xmm7 3415 pxor %xmm9,%xmm7 3416 pxor %xmm7,%xmm2 3417 movups 32(%r11),%xmm0 3418 3419.byte 102,15,56,222,209 3420 movups 48(%r11),%xmm1 3421 pxor %xmm9,%xmm7 3422 3423.byte 102,15,56,222,208 3424 movups 64(%r11),%xmm0 3425 jmp .Locb_dec_loop1 3426 3427.align 32 3428.Locb_dec_loop1: 3429.byte 102,15,56,222,209 3430 movups (%rcx,%rax,1),%xmm1 3431 addq $32,%rax 3432 3433.byte 102,15,56,222,208 3434 movups -16(%rcx,%rax,1),%xmm0 3435 jnz .Locb_dec_loop1 3436 3437.byte 102,15,56,222,209 3438 movups 16(%r11),%xmm1 3439 movq %r10,%rax 3440 3441.byte 102,15,56,223,215 3442 .byte 0xf3,0xc3 3443.cfi_endproc 3444.size __ocb_decrypt1,.-__ocb_decrypt1 3445.globl aesni_cbc_encrypt 3446.type aesni_cbc_encrypt,@function 3447.align 16 3448aesni_cbc_encrypt: 3449.cfi_startproc 3450 testq %rdx,%rdx 3451 jz .Lcbc_ret 3452 3453 movl 240(%rcx),%r10d 3454 movq %rcx,%r11 3455 testl %r9d,%r9d 3456 jz .Lcbc_decrypt 3457 3458 movups (%r8),%xmm2 3459 movl %r10d,%eax 3460 cmpq $16,%rdx 3461 jb .Lcbc_enc_tail 3462 subq $16,%rdx 3463 jmp .Lcbc_enc_loop 3464.align 16 3465.Lcbc_enc_loop: 3466 movups (%rdi),%xmm3 3467 leaq 16(%rdi),%rdi 3468 3469 movups (%rcx),%xmm0 3470 movups 16(%rcx),%xmm1 3471 xorps %xmm0,%xmm3 3472 leaq 32(%rcx),%rcx 3473 xorps %xmm3,%xmm2 3474.Loop_enc1_15: 3475.byte 102,15,56,220,209 3476 decl %eax 3477 movups (%rcx),%xmm1 3478 leaq 16(%rcx),%rcx 3479 jnz .Loop_enc1_15 3480.byte 102,15,56,221,209 3481 movl %r10d,%eax 3482 movq %r11,%rcx 3483 movups %xmm2,0(%rsi) 3484 leaq 16(%rsi),%rsi 3485 subq $16,%rdx 3486 jnc .Lcbc_enc_loop 3487 addq $16,%rdx 3488 jnz .Lcbc_enc_tail 3489 pxor %xmm0,%xmm0 3490 pxor %xmm1,%xmm1 3491 movups %xmm2,(%r8) 3492 pxor %xmm2,%xmm2 3493 pxor %xmm3,%xmm3 3494 jmp .Lcbc_ret 3495 3496.Lcbc_enc_tail: 3497 movq %rdx,%rcx 3498 xchgq %rdi,%rsi 3499.long 0x9066A4F3 3500 movl $16,%ecx 3501 subq %rdx,%rcx 3502 xorl %eax,%eax 3503.long 0x9066AAF3 3504 leaq -16(%rdi),%rdi 3505 movl %r10d,%eax 3506 movq %rdi,%rsi 3507 movq %r11,%rcx 3508 xorq %rdx,%rdx 3509 jmp .Lcbc_enc_loop 3510 3511.align 16 3512.Lcbc_decrypt: 3513 cmpq $16,%rdx 3514 jne .Lcbc_decrypt_bulk 3515 3516 3517 3518 movdqu (%rdi),%xmm2 3519 movdqu (%r8),%xmm3 3520 movdqa %xmm2,%xmm4 3521 movups (%rcx),%xmm0 3522 movups 16(%rcx),%xmm1 3523 leaq 32(%rcx),%rcx 3524 xorps %xmm0,%xmm2 3525.Loop_dec1_16: 3526.byte 102,15,56,222,209 3527 decl %r10d 3528 movups (%rcx),%xmm1 3529 leaq 16(%rcx),%rcx 3530 jnz .Loop_dec1_16 3531.byte 102,15,56,223,209 3532 pxor %xmm0,%xmm0 3533 pxor %xmm1,%xmm1 3534 movdqu %xmm4,(%r8) 3535 xorps %xmm3,%xmm2 3536 pxor %xmm3,%xmm3 3537 movups %xmm2,(%rsi) 3538 pxor %xmm2,%xmm2 3539 jmp .Lcbc_ret 3540.align 16 3541.Lcbc_decrypt_bulk: 3542 leaq (%rsp),%r11 3543.cfi_def_cfa_register %r11 3544 pushq %rbp 3545.cfi_offset %rbp,-16 3546 subq $16,%rsp 3547 andq $-16,%rsp 3548 movq %rcx,%rbp 3549 movups (%r8),%xmm10 3550 movl %r10d,%eax 3551 cmpq $0x50,%rdx 3552 jbe .Lcbc_dec_tail 3553 3554 movups (%rcx),%xmm0 3555 movdqu 0(%rdi),%xmm2 3556 movdqu 16(%rdi),%xmm3 3557 movdqa %xmm2,%xmm11 3558 movdqu 32(%rdi),%xmm4 3559 movdqa %xmm3,%xmm12 3560 movdqu 48(%rdi),%xmm5 3561 movdqa %xmm4,%xmm13 3562 movdqu 64(%rdi),%xmm6 3563 movdqa %xmm5,%xmm14 3564 movdqu 80(%rdi),%xmm7 3565 movdqa %xmm6,%xmm15 3566 movl OPENSSL_ia32cap_P+4(%rip),%r9d 3567 cmpq $0x70,%rdx 3568 jbe .Lcbc_dec_six_or_seven 3569 3570 andl $71303168,%r9d 3571 subq $0x50,%rdx 3572 cmpl $4194304,%r9d 3573 je .Lcbc_dec_loop6_enter 3574 subq $0x20,%rdx 3575 leaq 112(%rcx),%rcx 3576 jmp .Lcbc_dec_loop8_enter 3577.align 16 3578.Lcbc_dec_loop8: 3579 movups %xmm9,(%rsi) 3580 leaq 16(%rsi),%rsi 3581.Lcbc_dec_loop8_enter: 3582 movdqu 96(%rdi),%xmm8 3583 pxor %xmm0,%xmm2 3584 movdqu 112(%rdi),%xmm9 3585 pxor %xmm0,%xmm3 3586 movups 16-112(%rcx),%xmm1 3587 pxor %xmm0,%xmm4 3588 movq $-1,%rbp 3589 cmpq $0x70,%rdx 3590 pxor %xmm0,%xmm5 3591 pxor %xmm0,%xmm6 3592 pxor %xmm0,%xmm7 3593 pxor %xmm0,%xmm8 3594 3595.byte 102,15,56,222,209 3596 pxor %xmm0,%xmm9 3597 movups 32-112(%rcx),%xmm0 3598.byte 102,15,56,222,217 3599.byte 102,15,56,222,225 3600.byte 102,15,56,222,233 3601.byte 102,15,56,222,241 3602.byte 102,15,56,222,249 3603.byte 102,68,15,56,222,193 3604 adcq $0,%rbp 3605 andq $128,%rbp 3606.byte 102,68,15,56,222,201 3607 addq %rdi,%rbp 3608 movups 48-112(%rcx),%xmm1 3609.byte 102,15,56,222,208 3610.byte 102,15,56,222,216 3611.byte 102,15,56,222,224 3612.byte 102,15,56,222,232 3613.byte 102,15,56,222,240 3614.byte 102,15,56,222,248 3615.byte 102,68,15,56,222,192 3616.byte 102,68,15,56,222,200 3617 movups 64-112(%rcx),%xmm0 3618 nop 3619.byte 102,15,56,222,209 3620.byte 102,15,56,222,217 3621.byte 102,15,56,222,225 3622.byte 102,15,56,222,233 3623.byte 102,15,56,222,241 3624.byte 102,15,56,222,249 3625.byte 102,68,15,56,222,193 3626.byte 102,68,15,56,222,201 3627 movups 80-112(%rcx),%xmm1 3628 nop 3629.byte 102,15,56,222,208 3630.byte 102,15,56,222,216 3631.byte 102,15,56,222,224 3632.byte 102,15,56,222,232 3633.byte 102,15,56,222,240 3634.byte 102,15,56,222,248 3635.byte 102,68,15,56,222,192 3636.byte 102,68,15,56,222,200 3637 movups 96-112(%rcx),%xmm0 3638 nop 3639.byte 102,15,56,222,209 3640.byte 102,15,56,222,217 3641.byte 102,15,56,222,225 3642.byte 102,15,56,222,233 3643.byte 102,15,56,222,241 3644.byte 102,15,56,222,249 3645.byte 102,68,15,56,222,193 3646.byte 102,68,15,56,222,201 3647 movups 112-112(%rcx),%xmm1 3648 nop 3649.byte 102,15,56,222,208 3650.byte 102,15,56,222,216 3651.byte 102,15,56,222,224 3652.byte 102,15,56,222,232 3653.byte 102,15,56,222,240 3654.byte 102,15,56,222,248 3655.byte 102,68,15,56,222,192 3656.byte 102,68,15,56,222,200 3657 movups 128-112(%rcx),%xmm0 3658 nop 3659.byte 102,15,56,222,209 3660.byte 102,15,56,222,217 3661.byte 102,15,56,222,225 3662.byte 102,15,56,222,233 3663.byte 102,15,56,222,241 3664.byte 102,15,56,222,249 3665.byte 102,68,15,56,222,193 3666.byte 102,68,15,56,222,201 3667 movups 144-112(%rcx),%xmm1 3668 cmpl $11,%eax 3669.byte 102,15,56,222,208 3670.byte 102,15,56,222,216 3671.byte 102,15,56,222,224 3672.byte 102,15,56,222,232 3673.byte 102,15,56,222,240 3674.byte 102,15,56,222,248 3675.byte 102,68,15,56,222,192 3676.byte 102,68,15,56,222,200 3677 movups 160-112(%rcx),%xmm0 3678 jb .Lcbc_dec_done 3679.byte 102,15,56,222,209 3680.byte 102,15,56,222,217 3681.byte 102,15,56,222,225 3682.byte 102,15,56,222,233 3683.byte 102,15,56,222,241 3684.byte 102,15,56,222,249 3685.byte 102,68,15,56,222,193 3686.byte 102,68,15,56,222,201 3687 movups 176-112(%rcx),%xmm1 3688 nop 3689.byte 102,15,56,222,208 3690.byte 102,15,56,222,216 3691.byte 102,15,56,222,224 3692.byte 102,15,56,222,232 3693.byte 102,15,56,222,240 3694.byte 102,15,56,222,248 3695.byte 102,68,15,56,222,192 3696.byte 102,68,15,56,222,200 3697 movups 192-112(%rcx),%xmm0 3698 je .Lcbc_dec_done 3699.byte 102,15,56,222,209 3700.byte 102,15,56,222,217 3701.byte 102,15,56,222,225 3702.byte 102,15,56,222,233 3703.byte 102,15,56,222,241 3704.byte 102,15,56,222,249 3705.byte 102,68,15,56,222,193 3706.byte 102,68,15,56,222,201 3707 movups 208-112(%rcx),%xmm1 3708 nop 3709.byte 102,15,56,222,208 3710.byte 102,15,56,222,216 3711.byte 102,15,56,222,224 3712.byte 102,15,56,222,232 3713.byte 102,15,56,222,240 3714.byte 102,15,56,222,248 3715.byte 102,68,15,56,222,192 3716.byte 102,68,15,56,222,200 3717 movups 224-112(%rcx),%xmm0 3718 jmp .Lcbc_dec_done 3719.align 16 3720.Lcbc_dec_done: 3721.byte 102,15,56,222,209 3722.byte 102,15,56,222,217 3723 pxor %xmm0,%xmm10 3724 pxor %xmm0,%xmm11 3725.byte 102,15,56,222,225 3726.byte 102,15,56,222,233 3727 pxor %xmm0,%xmm12 3728 pxor %xmm0,%xmm13 3729.byte 102,15,56,222,241 3730.byte 102,15,56,222,249 3731 pxor %xmm0,%xmm14 3732 pxor %xmm0,%xmm15 3733.byte 102,68,15,56,222,193 3734.byte 102,68,15,56,222,201 3735 movdqu 80(%rdi),%xmm1 3736 3737.byte 102,65,15,56,223,210 3738 movdqu 96(%rdi),%xmm10 3739 pxor %xmm0,%xmm1 3740.byte 102,65,15,56,223,219 3741 pxor %xmm0,%xmm10 3742 movdqu 112(%rdi),%xmm0 3743.byte 102,65,15,56,223,228 3744 leaq 128(%rdi),%rdi 3745 movdqu 0(%rbp),%xmm11 3746.byte 102,65,15,56,223,237 3747.byte 102,65,15,56,223,246 3748 movdqu 16(%rbp),%xmm12 3749 movdqu 32(%rbp),%xmm13 3750.byte 102,65,15,56,223,255 3751.byte 102,68,15,56,223,193 3752 movdqu 48(%rbp),%xmm14 3753 movdqu 64(%rbp),%xmm15 3754.byte 102,69,15,56,223,202 3755 movdqa %xmm0,%xmm10 3756 movdqu 80(%rbp),%xmm1 3757 movups -112(%rcx),%xmm0 3758 3759 movups %xmm2,(%rsi) 3760 movdqa %xmm11,%xmm2 3761 movups %xmm3,16(%rsi) 3762 movdqa %xmm12,%xmm3 3763 movups %xmm4,32(%rsi) 3764 movdqa %xmm13,%xmm4 3765 movups %xmm5,48(%rsi) 3766 movdqa %xmm14,%xmm5 3767 movups %xmm6,64(%rsi) 3768 movdqa %xmm15,%xmm6 3769 movups %xmm7,80(%rsi) 3770 movdqa %xmm1,%xmm7 3771 movups %xmm8,96(%rsi) 3772 leaq 112(%rsi),%rsi 3773 3774 subq $0x80,%rdx 3775 ja .Lcbc_dec_loop8 3776 3777 movaps %xmm9,%xmm2 3778 leaq -112(%rcx),%rcx 3779 addq $0x70,%rdx 3780 jle .Lcbc_dec_clear_tail_collected 3781 movups %xmm9,(%rsi) 3782 leaq 16(%rsi),%rsi 3783 cmpq $0x50,%rdx 3784 jbe .Lcbc_dec_tail 3785 3786 movaps %xmm11,%xmm2 3787.Lcbc_dec_six_or_seven: 3788 cmpq $0x60,%rdx 3789 ja .Lcbc_dec_seven 3790 3791 movaps %xmm7,%xmm8 3792 call _aesni_decrypt6 3793 pxor %xmm10,%xmm2 3794 movaps %xmm8,%xmm10 3795 pxor %xmm11,%xmm3 3796 movdqu %xmm2,(%rsi) 3797 pxor %xmm12,%xmm4 3798 movdqu %xmm3,16(%rsi) 3799 pxor %xmm3,%xmm3 3800 pxor %xmm13,%xmm5 3801 movdqu %xmm4,32(%rsi) 3802 pxor %xmm4,%xmm4 3803 pxor %xmm14,%xmm6 3804 movdqu %xmm5,48(%rsi) 3805 pxor %xmm5,%xmm5 3806 pxor %xmm15,%xmm7 3807 movdqu %xmm6,64(%rsi) 3808 pxor %xmm6,%xmm6 3809 leaq 80(%rsi),%rsi 3810 movdqa %xmm7,%xmm2 3811 pxor %xmm7,%xmm7 3812 jmp .Lcbc_dec_tail_collected 3813 3814.align 16 3815.Lcbc_dec_seven: 3816 movups 96(%rdi),%xmm8 3817 xorps %xmm9,%xmm9 3818 call _aesni_decrypt8 3819 movups 80(%rdi),%xmm9 3820 pxor %xmm10,%xmm2 3821 movups 96(%rdi),%xmm10 3822 pxor %xmm11,%xmm3 3823 movdqu %xmm2,(%rsi) 3824 pxor %xmm12,%xmm4 3825 movdqu %xmm3,16(%rsi) 3826 pxor %xmm3,%xmm3 3827 pxor %xmm13,%xmm5 3828 movdqu %xmm4,32(%rsi) 3829 pxor %xmm4,%xmm4 3830 pxor %xmm14,%xmm6 3831 movdqu %xmm5,48(%rsi) 3832 pxor %xmm5,%xmm5 3833 pxor %xmm15,%xmm7 3834 movdqu %xmm6,64(%rsi) 3835 pxor %xmm6,%xmm6 3836 pxor %xmm9,%xmm8 3837 movdqu %xmm7,80(%rsi) 3838 pxor %xmm7,%xmm7 3839 leaq 96(%rsi),%rsi 3840 movdqa %xmm8,%xmm2 3841 pxor %xmm8,%xmm8 3842 pxor %xmm9,%xmm9 3843 jmp .Lcbc_dec_tail_collected 3844 3845.align 16 3846.Lcbc_dec_loop6: 3847 movups %xmm7,(%rsi) 3848 leaq 16(%rsi),%rsi 3849 movdqu 0(%rdi),%xmm2 3850 movdqu 16(%rdi),%xmm3 3851 movdqa %xmm2,%xmm11 3852 movdqu 32(%rdi),%xmm4 3853 movdqa %xmm3,%xmm12 3854 movdqu 48(%rdi),%xmm5 3855 movdqa %xmm4,%xmm13 3856 movdqu 64(%rdi),%xmm6 3857 movdqa %xmm5,%xmm14 3858 movdqu 80(%rdi),%xmm7 3859 movdqa %xmm6,%xmm15 3860.Lcbc_dec_loop6_enter: 3861 leaq 96(%rdi),%rdi 3862 movdqa %xmm7,%xmm8 3863 3864 call _aesni_decrypt6 3865 3866 pxor %xmm10,%xmm2 3867 movdqa %xmm8,%xmm10 3868 pxor %xmm11,%xmm3 3869 movdqu %xmm2,(%rsi) 3870 pxor %xmm12,%xmm4 3871 movdqu %xmm3,16(%rsi) 3872 pxor %xmm13,%xmm5 3873 movdqu %xmm4,32(%rsi) 3874 pxor %xmm14,%xmm6 3875 movq %rbp,%rcx 3876 movdqu %xmm5,48(%rsi) 3877 pxor %xmm15,%xmm7 3878 movl %r10d,%eax 3879 movdqu %xmm6,64(%rsi) 3880 leaq 80(%rsi),%rsi 3881 subq $0x60,%rdx 3882 ja .Lcbc_dec_loop6 3883 3884 movdqa %xmm7,%xmm2 3885 addq $0x50,%rdx 3886 jle .Lcbc_dec_clear_tail_collected 3887 movups %xmm7,(%rsi) 3888 leaq 16(%rsi),%rsi 3889 3890.Lcbc_dec_tail: 3891 movups (%rdi),%xmm2 3892 subq $0x10,%rdx 3893 jbe .Lcbc_dec_one 3894 3895 movups 16(%rdi),%xmm3 3896 movaps %xmm2,%xmm11 3897 subq $0x10,%rdx 3898 jbe .Lcbc_dec_two 3899 3900 movups 32(%rdi),%xmm4 3901 movaps %xmm3,%xmm12 3902 subq $0x10,%rdx 3903 jbe .Lcbc_dec_three 3904 3905 movups 48(%rdi),%xmm5 3906 movaps %xmm4,%xmm13 3907 subq $0x10,%rdx 3908 jbe .Lcbc_dec_four 3909 3910 movups 64(%rdi),%xmm6 3911 movaps %xmm5,%xmm14 3912 movaps %xmm6,%xmm15 3913 xorps %xmm7,%xmm7 3914 call _aesni_decrypt6 3915 pxor %xmm10,%xmm2 3916 movaps %xmm15,%xmm10 3917 pxor %xmm11,%xmm3 3918 movdqu %xmm2,(%rsi) 3919 pxor %xmm12,%xmm4 3920 movdqu %xmm3,16(%rsi) 3921 pxor %xmm3,%xmm3 3922 pxor %xmm13,%xmm5 3923 movdqu %xmm4,32(%rsi) 3924 pxor %xmm4,%xmm4 3925 pxor %xmm14,%xmm6 3926 movdqu %xmm5,48(%rsi) 3927 pxor %xmm5,%xmm5 3928 leaq 64(%rsi),%rsi 3929 movdqa %xmm6,%xmm2 3930 pxor %xmm6,%xmm6 3931 pxor %xmm7,%xmm7 3932 subq $0x10,%rdx 3933 jmp .Lcbc_dec_tail_collected 3934 3935.align 16 3936.Lcbc_dec_one: 3937 movaps %xmm2,%xmm11 3938 movups (%rcx),%xmm0 3939 movups 16(%rcx),%xmm1 3940 leaq 32(%rcx),%rcx 3941 xorps %xmm0,%xmm2 3942.Loop_dec1_17: 3943.byte 102,15,56,222,209 3944 decl %eax 3945 movups (%rcx),%xmm1 3946 leaq 16(%rcx),%rcx 3947 jnz .Loop_dec1_17 3948.byte 102,15,56,223,209 3949 xorps %xmm10,%xmm2 3950 movaps %xmm11,%xmm10 3951 jmp .Lcbc_dec_tail_collected 3952.align 16 3953.Lcbc_dec_two: 3954 movaps %xmm3,%xmm12 3955 call _aesni_decrypt2 3956 pxor %xmm10,%xmm2 3957 movaps %xmm12,%xmm10 3958 pxor %xmm11,%xmm3 3959 movdqu %xmm2,(%rsi) 3960 movdqa %xmm3,%xmm2 3961 pxor %xmm3,%xmm3 3962 leaq 16(%rsi),%rsi 3963 jmp .Lcbc_dec_tail_collected 3964.align 16 3965.Lcbc_dec_three: 3966 movaps %xmm4,%xmm13 3967 call _aesni_decrypt3 3968 pxor %xmm10,%xmm2 3969 movaps %xmm13,%xmm10 3970 pxor %xmm11,%xmm3 3971 movdqu %xmm2,(%rsi) 3972 pxor %xmm12,%xmm4 3973 movdqu %xmm3,16(%rsi) 3974 pxor %xmm3,%xmm3 3975 movdqa %xmm4,%xmm2 3976 pxor %xmm4,%xmm4 3977 leaq 32(%rsi),%rsi 3978 jmp .Lcbc_dec_tail_collected 3979.align 16 3980.Lcbc_dec_four: 3981 movaps %xmm5,%xmm14 3982 call _aesni_decrypt4 3983 pxor %xmm10,%xmm2 3984 movaps %xmm14,%xmm10 3985 pxor %xmm11,%xmm3 3986 movdqu %xmm2,(%rsi) 3987 pxor %xmm12,%xmm4 3988 movdqu %xmm3,16(%rsi) 3989 pxor %xmm3,%xmm3 3990 pxor %xmm13,%xmm5 3991 movdqu %xmm4,32(%rsi) 3992 pxor %xmm4,%xmm4 3993 movdqa %xmm5,%xmm2 3994 pxor %xmm5,%xmm5 3995 leaq 48(%rsi),%rsi 3996 jmp .Lcbc_dec_tail_collected 3997 3998.align 16 3999.Lcbc_dec_clear_tail_collected: 4000 pxor %xmm3,%xmm3 4001 pxor %xmm4,%xmm4 4002 pxor %xmm5,%xmm5 4003 pxor %xmm6,%xmm6 4004 pxor %xmm7,%xmm7 4005 pxor %xmm8,%xmm8 4006 pxor %xmm9,%xmm9 4007.Lcbc_dec_tail_collected: 4008 movups %xmm10,(%r8) 4009 andq $15,%rdx 4010 jnz .Lcbc_dec_tail_partial 4011 movups %xmm2,(%rsi) 4012 pxor %xmm2,%xmm2 4013 jmp .Lcbc_dec_ret 4014.align 16 4015.Lcbc_dec_tail_partial: 4016 movaps %xmm2,(%rsp) 4017 pxor %xmm2,%xmm2 4018 movq $16,%rcx 4019 movq %rsi,%rdi 4020 subq %rdx,%rcx 4021 leaq (%rsp),%rsi 4022.long 0x9066A4F3 4023 movdqa %xmm2,(%rsp) 4024 4025.Lcbc_dec_ret: 4026 xorps %xmm0,%xmm0 4027 pxor %xmm1,%xmm1 4028 movq -8(%r11),%rbp 4029.cfi_restore %rbp 4030 leaq (%r11),%rsp 4031.cfi_def_cfa_register %rsp 4032.Lcbc_ret: 4033 .byte 0xf3,0xc3 4034.cfi_endproc 4035.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 4036.globl aesni_set_decrypt_key 4037.type aesni_set_decrypt_key,@function 4038.align 16 4039aesni_set_decrypt_key: 4040.cfi_startproc 4041.byte 0x48,0x83,0xEC,0x08 4042.cfi_adjust_cfa_offset 8 4043 call __aesni_set_encrypt_key 4044 shll $4,%esi 4045 testl %eax,%eax 4046 jnz .Ldec_key_ret 4047 leaq 16(%rdx,%rsi,1),%rdi 4048 4049 movups (%rdx),%xmm0 4050 movups (%rdi),%xmm1 4051 movups %xmm0,(%rdi) 4052 movups %xmm1,(%rdx) 4053 leaq 16(%rdx),%rdx 4054 leaq -16(%rdi),%rdi 4055 4056.Ldec_key_inverse: 4057 movups (%rdx),%xmm0 4058 movups (%rdi),%xmm1 4059.byte 102,15,56,219,192 4060.byte 102,15,56,219,201 4061 leaq 16(%rdx),%rdx 4062 leaq -16(%rdi),%rdi 4063 movups %xmm0,16(%rdi) 4064 movups %xmm1,-16(%rdx) 4065 cmpq %rdx,%rdi 4066 ja .Ldec_key_inverse 4067 4068 movups (%rdx),%xmm0 4069.byte 102,15,56,219,192 4070 pxor %xmm1,%xmm1 4071 movups %xmm0,(%rdi) 4072 pxor %xmm0,%xmm0 4073.Ldec_key_ret: 4074 addq $8,%rsp 4075.cfi_adjust_cfa_offset -8 4076 .byte 0xf3,0xc3 4077.cfi_endproc 4078.LSEH_end_set_decrypt_key: 4079.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 4080.globl aesni_set_encrypt_key 4081.type aesni_set_encrypt_key,@function 4082.align 16 4083aesni_set_encrypt_key: 4084__aesni_set_encrypt_key: 4085.cfi_startproc 4086.byte 0x48,0x83,0xEC,0x08 4087.cfi_adjust_cfa_offset 8 4088 movq $-1,%rax 4089 testq %rdi,%rdi 4090 jz .Lenc_key_ret 4091 testq %rdx,%rdx 4092 jz .Lenc_key_ret 4093 4094 movl $268437504,%r10d 4095 movups (%rdi),%xmm0 4096 xorps %xmm4,%xmm4 4097 andl OPENSSL_ia32cap_P+4(%rip),%r10d 4098 leaq 16(%rdx),%rax 4099 cmpl $256,%esi 4100 je .L14rounds 4101 cmpl $192,%esi 4102 je .L12rounds 4103 cmpl $128,%esi 4104 jne .Lbad_keybits 4105 4106.L10rounds: 4107 movl $9,%esi 4108 cmpl $268435456,%r10d 4109 je .L10rounds_alt 4110 4111 movups %xmm0,(%rdx) 4112.byte 102,15,58,223,200,1 4113 call .Lkey_expansion_128_cold 4114.byte 102,15,58,223,200,2 4115 call .Lkey_expansion_128 4116.byte 102,15,58,223,200,4 4117 call .Lkey_expansion_128 4118.byte 102,15,58,223,200,8 4119 call .Lkey_expansion_128 4120.byte 102,15,58,223,200,16 4121 call .Lkey_expansion_128 4122.byte 102,15,58,223,200,32 4123 call .Lkey_expansion_128 4124.byte 102,15,58,223,200,64 4125 call .Lkey_expansion_128 4126.byte 102,15,58,223,200,128 4127 call .Lkey_expansion_128 4128.byte 102,15,58,223,200,27 4129 call .Lkey_expansion_128 4130.byte 102,15,58,223,200,54 4131 call .Lkey_expansion_128 4132 movups %xmm0,(%rax) 4133 movl %esi,80(%rax) 4134 xorl %eax,%eax 4135 jmp .Lenc_key_ret 4136 4137.align 16 4138.L10rounds_alt: 4139 movdqa .Lkey_rotate(%rip),%xmm5 4140 movl $8,%r10d 4141 movdqa .Lkey_rcon1(%rip),%xmm4 4142 movdqa %xmm0,%xmm2 4143 movdqu %xmm0,(%rdx) 4144 jmp .Loop_key128 4145 4146.align 16 4147.Loop_key128: 4148.byte 102,15,56,0,197 4149.byte 102,15,56,221,196 4150 pslld $1,%xmm4 4151 leaq 16(%rax),%rax 4152 4153 movdqa %xmm2,%xmm3 4154 pslldq $4,%xmm2 4155 pxor %xmm2,%xmm3 4156 pslldq $4,%xmm2 4157 pxor %xmm2,%xmm3 4158 pslldq $4,%xmm2 4159 pxor %xmm3,%xmm2 4160 4161 pxor %xmm2,%xmm0 4162 movdqu %xmm0,-16(%rax) 4163 movdqa %xmm0,%xmm2 4164 4165 decl %r10d 4166 jnz .Loop_key128 4167 4168 movdqa .Lkey_rcon1b(%rip),%xmm4 4169 4170.byte 102,15,56,0,197 4171.byte 102,15,56,221,196 4172 pslld $1,%xmm4 4173 4174 movdqa %xmm2,%xmm3 4175 pslldq $4,%xmm2 4176 pxor %xmm2,%xmm3 4177 pslldq $4,%xmm2 4178 pxor %xmm2,%xmm3 4179 pslldq $4,%xmm2 4180 pxor %xmm3,%xmm2 4181 4182 pxor %xmm2,%xmm0 4183 movdqu %xmm0,(%rax) 4184 4185 movdqa %xmm0,%xmm2 4186.byte 102,15,56,0,197 4187.byte 102,15,56,221,196 4188 4189 movdqa %xmm2,%xmm3 4190 pslldq $4,%xmm2 4191 pxor %xmm2,%xmm3 4192 pslldq $4,%xmm2 4193 pxor %xmm2,%xmm3 4194 pslldq $4,%xmm2 4195 pxor %xmm3,%xmm2 4196 4197 pxor %xmm2,%xmm0 4198 movdqu %xmm0,16(%rax) 4199 4200 movl %esi,96(%rax) 4201 xorl %eax,%eax 4202 jmp .Lenc_key_ret 4203 4204.align 16 4205.L12rounds: 4206 movq 16(%rdi),%xmm2 4207 movl $11,%esi 4208 cmpl $268435456,%r10d 4209 je .L12rounds_alt 4210 4211 movups %xmm0,(%rdx) 4212.byte 102,15,58,223,202,1 4213 call .Lkey_expansion_192a_cold 4214.byte 102,15,58,223,202,2 4215 call .Lkey_expansion_192b 4216.byte 102,15,58,223,202,4 4217 call .Lkey_expansion_192a 4218.byte 102,15,58,223,202,8 4219 call .Lkey_expansion_192b 4220.byte 102,15,58,223,202,16 4221 call .Lkey_expansion_192a 4222.byte 102,15,58,223,202,32 4223 call .Lkey_expansion_192b 4224.byte 102,15,58,223,202,64 4225 call .Lkey_expansion_192a 4226.byte 102,15,58,223,202,128 4227 call .Lkey_expansion_192b 4228 movups %xmm0,(%rax) 4229 movl %esi,48(%rax) 4230 xorq %rax,%rax 4231 jmp .Lenc_key_ret 4232 4233.align 16 4234.L12rounds_alt: 4235 movdqa .Lkey_rotate192(%rip),%xmm5 4236 movdqa .Lkey_rcon1(%rip),%xmm4 4237 movl $8,%r10d 4238 movdqu %xmm0,(%rdx) 4239 jmp .Loop_key192 4240 4241.align 16 4242.Loop_key192: 4243 movq %xmm2,0(%rax) 4244 movdqa %xmm2,%xmm1 4245.byte 102,15,56,0,213 4246.byte 102,15,56,221,212 4247 pslld $1,%xmm4 4248 leaq 24(%rax),%rax 4249 4250 movdqa %xmm0,%xmm3 4251 pslldq $4,%xmm0 4252 pxor %xmm0,%xmm3 4253 pslldq $4,%xmm0 4254 pxor %xmm0,%xmm3 4255 pslldq $4,%xmm0 4256 pxor %xmm3,%xmm0 4257 4258 pshufd $0xff,%xmm0,%xmm3 4259 pxor %xmm1,%xmm3 4260 pslldq $4,%xmm1 4261 pxor %xmm1,%xmm3 4262 4263 pxor %xmm2,%xmm0 4264 pxor %xmm3,%xmm2 4265 movdqu %xmm0,-16(%rax) 4266 4267 decl %r10d 4268 jnz .Loop_key192 4269 4270 movl %esi,32(%rax) 4271 xorl %eax,%eax 4272 jmp .Lenc_key_ret 4273 4274.align 16 4275.L14rounds: 4276 movups 16(%rdi),%xmm2 4277 movl $13,%esi 4278 leaq 16(%rax),%rax 4279 cmpl $268435456,%r10d 4280 je .L14rounds_alt 4281 4282 movups %xmm0,(%rdx) 4283 movups %xmm2,16(%rdx) 4284.byte 102,15,58,223,202,1 4285 call .Lkey_expansion_256a_cold 4286.byte 102,15,58,223,200,1 4287 call .Lkey_expansion_256b 4288.byte 102,15,58,223,202,2 4289 call .Lkey_expansion_256a 4290.byte 102,15,58,223,200,2 4291 call .Lkey_expansion_256b 4292.byte 102,15,58,223,202,4 4293 call .Lkey_expansion_256a 4294.byte 102,15,58,223,200,4 4295 call .Lkey_expansion_256b 4296.byte 102,15,58,223,202,8 4297 call .Lkey_expansion_256a 4298.byte 102,15,58,223,200,8 4299 call .Lkey_expansion_256b 4300.byte 102,15,58,223,202,16 4301 call .Lkey_expansion_256a 4302.byte 102,15,58,223,200,16 4303 call .Lkey_expansion_256b 4304.byte 102,15,58,223,202,32 4305 call .Lkey_expansion_256a 4306.byte 102,15,58,223,200,32 4307 call .Lkey_expansion_256b 4308.byte 102,15,58,223,202,64 4309 call .Lkey_expansion_256a 4310 movups %xmm0,(%rax) 4311 movl %esi,16(%rax) 4312 xorq %rax,%rax 4313 jmp .Lenc_key_ret 4314 4315.align 16 4316.L14rounds_alt: 4317 movdqa .Lkey_rotate(%rip),%xmm5 4318 movdqa .Lkey_rcon1(%rip),%xmm4 4319 movl $7,%r10d 4320 movdqu %xmm0,0(%rdx) 4321 movdqa %xmm2,%xmm1 4322 movdqu %xmm2,16(%rdx) 4323 jmp .Loop_key256 4324 4325.align 16 4326.Loop_key256: 4327.byte 102,15,56,0,213 4328.byte 102,15,56,221,212 4329 4330 movdqa %xmm0,%xmm3 4331 pslldq $4,%xmm0 4332 pxor %xmm0,%xmm3 4333 pslldq $4,%xmm0 4334 pxor %xmm0,%xmm3 4335 pslldq $4,%xmm0 4336 pxor %xmm3,%xmm0 4337 pslld $1,%xmm4 4338 4339 pxor %xmm2,%xmm0 4340 movdqu %xmm0,(%rax) 4341 4342 decl %r10d 4343 jz .Ldone_key256 4344 4345 pshufd $0xff,%xmm0,%xmm2 4346 pxor %xmm3,%xmm3 4347.byte 102,15,56,221,211 4348 4349 movdqa %xmm1,%xmm3 4350 pslldq $4,%xmm1 4351 pxor %xmm1,%xmm3 4352 pslldq $4,%xmm1 4353 pxor %xmm1,%xmm3 4354 pslldq $4,%xmm1 4355 pxor %xmm3,%xmm1 4356 4357 pxor %xmm1,%xmm2 4358 movdqu %xmm2,16(%rax) 4359 leaq 32(%rax),%rax 4360 movdqa %xmm2,%xmm1 4361 4362 jmp .Loop_key256 4363 4364.Ldone_key256: 4365 movl %esi,16(%rax) 4366 xorl %eax,%eax 4367 jmp .Lenc_key_ret 4368 4369.align 16 4370.Lbad_keybits: 4371 movq $-2,%rax 4372.Lenc_key_ret: 4373 pxor %xmm0,%xmm0 4374 pxor %xmm1,%xmm1 4375 pxor %xmm2,%xmm2 4376 pxor %xmm3,%xmm3 4377 pxor %xmm4,%xmm4 4378 pxor %xmm5,%xmm5 4379 addq $8,%rsp 4380.cfi_adjust_cfa_offset -8 4381 .byte 0xf3,0xc3 4382.LSEH_end_set_encrypt_key: 4383 4384.align 16 4385.Lkey_expansion_128: 4386 movups %xmm0,(%rax) 4387 leaq 16(%rax),%rax 4388.Lkey_expansion_128_cold: 4389 shufps $16,%xmm0,%xmm4 4390 xorps %xmm4,%xmm0 4391 shufps $140,%xmm0,%xmm4 4392 xorps %xmm4,%xmm0 4393 shufps $255,%xmm1,%xmm1 4394 xorps %xmm1,%xmm0 4395 .byte 0xf3,0xc3 4396 4397.align 16 4398.Lkey_expansion_192a: 4399 movups %xmm0,(%rax) 4400 leaq 16(%rax),%rax 4401.Lkey_expansion_192a_cold: 4402 movaps %xmm2,%xmm5 4403.Lkey_expansion_192b_warm: 4404 shufps $16,%xmm0,%xmm4 4405 movdqa %xmm2,%xmm3 4406 xorps %xmm4,%xmm0 4407 shufps $140,%xmm0,%xmm4 4408 pslldq $4,%xmm3 4409 xorps %xmm4,%xmm0 4410 pshufd $85,%xmm1,%xmm1 4411 pxor %xmm3,%xmm2 4412 pxor %xmm1,%xmm0 4413 pshufd $255,%xmm0,%xmm3 4414 pxor %xmm3,%xmm2 4415 .byte 0xf3,0xc3 4416 4417.align 16 4418.Lkey_expansion_192b: 4419 movaps %xmm0,%xmm3 4420 shufps $68,%xmm0,%xmm5 4421 movups %xmm5,(%rax) 4422 shufps $78,%xmm2,%xmm3 4423 movups %xmm3,16(%rax) 4424 leaq 32(%rax),%rax 4425 jmp .Lkey_expansion_192b_warm 4426 4427.align 16 4428.Lkey_expansion_256a: 4429 movups %xmm2,(%rax) 4430 leaq 16(%rax),%rax 4431.Lkey_expansion_256a_cold: 4432 shufps $16,%xmm0,%xmm4 4433 xorps %xmm4,%xmm0 4434 shufps $140,%xmm0,%xmm4 4435 xorps %xmm4,%xmm0 4436 shufps $255,%xmm1,%xmm1 4437 xorps %xmm1,%xmm0 4438 .byte 0xf3,0xc3 4439 4440.align 16 4441.Lkey_expansion_256b: 4442 movups %xmm0,(%rax) 4443 leaq 16(%rax),%rax 4444 4445 shufps $16,%xmm2,%xmm4 4446 xorps %xmm4,%xmm2 4447 shufps $140,%xmm2,%xmm4 4448 xorps %xmm4,%xmm2 4449 shufps $170,%xmm1,%xmm1 4450 xorps %xmm1,%xmm2 4451 .byte 0xf3,0xc3 4452.cfi_endproc 4453.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 4454.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 4455.align 64 4456.Lbswap_mask: 4457.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4458.Lincrement32: 4459.long 6,6,6,0 4460.Lincrement64: 4461.long 1,0,0,0 4462.Lxts_magic: 4463.long 0x87,0,1,0 4464.Lincrement1: 4465.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4466.Lkey_rotate: 4467.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4468.Lkey_rotate192: 4469.long 0x04070605,0x04070605,0x04070605,0x04070605 4470.Lkey_rcon1: 4471.long 1,1,1,1 4472.Lkey_rcon1b: 4473.long 0x1b,0x1b,0x1b,0x1b 4474 4475.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4476.align 64 4477