1/* Do not modify. This file is auto-generated from chacha-x86.pl. */ 2#ifdef PIC 3.text 4.globl ChaCha20_ctr32 5.type ChaCha20_ctr32,@function 6.align 16 7ChaCha20_ctr32: 8.L_ChaCha20_ctr32_begin: 9 #ifdef __CET__ 10 11.byte 243,15,30,251 12 #endif 13 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 xorl %eax,%eax 19 cmpl 28(%esp),%eax 20 je .L000no_data 21 call .Lpic_point 22.Lpic_point: 23 popl %eax 24 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 25 testl $16777216,(%ebp) 26 jz .L001x86 27 testl $512,4(%ebp) 28 jz .L001x86 29 jmp .Lssse3_shortcut 30.L001x86: 31 movl 32(%esp),%esi 32 movl 36(%esp),%edi 33 subl $132,%esp 34 movl (%esi),%eax 35 movl 4(%esi),%ebx 36 movl 8(%esi),%ecx 37 movl 12(%esi),%edx 38 movl %eax,80(%esp) 39 movl %ebx,84(%esp) 40 movl %ecx,88(%esp) 41 movl %edx,92(%esp) 42 movl 16(%esi),%eax 43 movl 20(%esi),%ebx 44 movl 24(%esi),%ecx 45 movl 28(%esi),%edx 46 movl %eax,96(%esp) 47 movl %ebx,100(%esp) 48 movl %ecx,104(%esp) 49 movl %edx,108(%esp) 50 movl (%edi),%eax 51 movl 4(%edi),%ebx 52 movl 8(%edi),%ecx 53 movl 12(%edi),%edx 54 subl $1,%eax 55 movl %eax,112(%esp) 56 movl %ebx,116(%esp) 57 movl %ecx,120(%esp) 58 movl %edx,124(%esp) 59 jmp .L002entry 60.align 16 61.L003outer_loop: 62 movl %ebx,156(%esp) 63 movl %eax,152(%esp) 64 movl %ecx,160(%esp) 65.L002entry: 66 movl $1634760805,%eax 67 movl $857760878,4(%esp) 68 movl $2036477234,8(%esp) 69 movl $1797285236,12(%esp) 70 movl 84(%esp),%ebx 71 movl 88(%esp),%ebp 72 movl 104(%esp),%ecx 73 movl 108(%esp),%esi 74 movl 116(%esp),%edx 75 movl 120(%esp),%edi 76 movl %ebx,20(%esp) 77 movl %ebp,24(%esp) 78 movl %ecx,40(%esp) 79 movl %esi,44(%esp) 80 movl %edx,52(%esp) 81 movl %edi,56(%esp) 82 movl 92(%esp),%ebx 83 movl 124(%esp),%edi 84 movl 112(%esp),%edx 85 movl 80(%esp),%ebp 86 movl 96(%esp),%ecx 87 movl 100(%esp),%esi 88 addl $1,%edx 89 movl %ebx,28(%esp) 90 movl %edi,60(%esp) 91 movl %edx,112(%esp) 92 movl $10,%ebx 93 jmp .L004loop 94.align 16 95.L004loop: 96 addl %ebp,%eax 97 movl %ebx,128(%esp) 98 movl %ebp,%ebx 99 xorl %eax,%edx 100 roll $16,%edx 101 addl %edx,%ecx 102 xorl %ecx,%ebx 103 movl 52(%esp),%edi 104 roll $12,%ebx 105 movl 20(%esp),%ebp 106 addl %ebx,%eax 107 xorl %eax,%edx 108 movl %eax,(%esp) 109 roll $8,%edx 110 movl 4(%esp),%eax 111 addl %edx,%ecx 112 movl %edx,48(%esp) 113 xorl %ecx,%ebx 114 addl %ebp,%eax 115 roll $7,%ebx 116 xorl %eax,%edi 117 movl %ecx,32(%esp) 118 roll $16,%edi 119 movl %ebx,16(%esp) 120 addl %edi,%esi 121 movl 40(%esp),%ecx 122 xorl %esi,%ebp 123 movl 56(%esp),%edx 124 roll $12,%ebp 125 movl 24(%esp),%ebx 126 addl %ebp,%eax 127 xorl %eax,%edi 128 movl %eax,4(%esp) 129 roll $8,%edi 130 movl 8(%esp),%eax 131 addl %edi,%esi 132 movl %edi,52(%esp) 133 xorl %esi,%ebp 134 addl %ebx,%eax 135 roll $7,%ebp 136 xorl %eax,%edx 137 movl %esi,36(%esp) 138 roll $16,%edx 139 movl %ebp,20(%esp) 140 addl %edx,%ecx 141 movl 44(%esp),%esi 142 xorl %ecx,%ebx 143 movl 60(%esp),%edi 144 roll $12,%ebx 145 movl 28(%esp),%ebp 146 addl %ebx,%eax 147 xorl %eax,%edx 148 movl %eax,8(%esp) 149 roll $8,%edx 150 movl 12(%esp),%eax 151 addl %edx,%ecx 152 movl %edx,56(%esp) 153 xorl %ecx,%ebx 154 addl %ebp,%eax 155 roll $7,%ebx 156 xorl %eax,%edi 157 roll $16,%edi 158 movl %ebx,24(%esp) 159 addl %edi,%esi 160 xorl %esi,%ebp 161 roll $12,%ebp 162 movl 20(%esp),%ebx 163 addl %ebp,%eax 164 xorl %eax,%edi 165 movl %eax,12(%esp) 166 roll $8,%edi 167 movl (%esp),%eax 168 addl %edi,%esi 169 movl %edi,%edx 170 xorl %esi,%ebp 171 addl %ebx,%eax 172 roll $7,%ebp 173 xorl %eax,%edx 174 roll $16,%edx 175 movl %ebp,28(%esp) 176 addl %edx,%ecx 177 xorl %ecx,%ebx 178 movl 48(%esp),%edi 179 roll $12,%ebx 180 movl 24(%esp),%ebp 181 addl %ebx,%eax 182 xorl %eax,%edx 183 movl %eax,(%esp) 184 roll $8,%edx 185 movl 4(%esp),%eax 186 addl %edx,%ecx 187 movl %edx,60(%esp) 188 xorl %ecx,%ebx 189 addl %ebp,%eax 190 roll $7,%ebx 191 xorl %eax,%edi 192 movl %ecx,40(%esp) 193 roll $16,%edi 194 movl %ebx,20(%esp) 195 addl %edi,%esi 196 movl 32(%esp),%ecx 197 xorl %esi,%ebp 198 movl 52(%esp),%edx 199 roll $12,%ebp 200 movl 28(%esp),%ebx 201 addl %ebp,%eax 202 xorl %eax,%edi 203 movl %eax,4(%esp) 204 roll $8,%edi 205 movl 8(%esp),%eax 206 addl %edi,%esi 207 movl %edi,48(%esp) 208 xorl %esi,%ebp 209 addl %ebx,%eax 210 roll $7,%ebp 211 xorl %eax,%edx 212 movl %esi,44(%esp) 213 roll $16,%edx 214 movl %ebp,24(%esp) 215 addl %edx,%ecx 216 movl 36(%esp),%esi 217 xorl %ecx,%ebx 218 movl 56(%esp),%edi 219 roll $12,%ebx 220 movl 16(%esp),%ebp 221 addl %ebx,%eax 222 xorl %eax,%edx 223 movl %eax,8(%esp) 224 roll $8,%edx 225 movl 12(%esp),%eax 226 addl %edx,%ecx 227 movl %edx,52(%esp) 228 xorl %ecx,%ebx 229 addl %ebp,%eax 230 roll $7,%ebx 231 xorl %eax,%edi 232 roll $16,%edi 233 movl %ebx,28(%esp) 234 addl %edi,%esi 235 xorl %esi,%ebp 236 movl 48(%esp),%edx 237 roll $12,%ebp 238 movl 128(%esp),%ebx 239 addl %ebp,%eax 240 xorl %eax,%edi 241 movl %eax,12(%esp) 242 roll $8,%edi 243 movl (%esp),%eax 244 addl %edi,%esi 245 movl %edi,56(%esp) 246 xorl %esi,%ebp 247 roll $7,%ebp 248 decl %ebx 249 jnz .L004loop 250 movl 160(%esp),%ebx 251 addl $1634760805,%eax 252 addl 80(%esp),%ebp 253 addl 96(%esp),%ecx 254 addl 100(%esp),%esi 255 cmpl $64,%ebx 256 jb .L005tail 257 movl 156(%esp),%ebx 258 addl 112(%esp),%edx 259 addl 120(%esp),%edi 260 xorl (%ebx),%eax 261 xorl 16(%ebx),%ebp 262 movl %eax,(%esp) 263 movl 152(%esp),%eax 264 xorl 32(%ebx),%ecx 265 xorl 36(%ebx),%esi 266 xorl 48(%ebx),%edx 267 xorl 56(%ebx),%edi 268 movl %ebp,16(%eax) 269 movl %ecx,32(%eax) 270 movl %esi,36(%eax) 271 movl %edx,48(%eax) 272 movl %edi,56(%eax) 273 movl 4(%esp),%ebp 274 movl 8(%esp),%ecx 275 movl 12(%esp),%esi 276 movl 20(%esp),%edx 277 movl 24(%esp),%edi 278 addl $857760878,%ebp 279 addl $2036477234,%ecx 280 addl $1797285236,%esi 281 addl 84(%esp),%edx 282 addl 88(%esp),%edi 283 xorl 4(%ebx),%ebp 284 xorl 8(%ebx),%ecx 285 xorl 12(%ebx),%esi 286 xorl 20(%ebx),%edx 287 xorl 24(%ebx),%edi 288 movl %ebp,4(%eax) 289 movl %ecx,8(%eax) 290 movl %esi,12(%eax) 291 movl %edx,20(%eax) 292 movl %edi,24(%eax) 293 movl 28(%esp),%ebp 294 movl 40(%esp),%ecx 295 movl 44(%esp),%esi 296 movl 52(%esp),%edx 297 movl 60(%esp),%edi 298 addl 92(%esp),%ebp 299 addl 104(%esp),%ecx 300 addl 108(%esp),%esi 301 addl 116(%esp),%edx 302 addl 124(%esp),%edi 303 xorl 28(%ebx),%ebp 304 xorl 40(%ebx),%ecx 305 xorl 44(%ebx),%esi 306 xorl 52(%ebx),%edx 307 xorl 60(%ebx),%edi 308 leal 64(%ebx),%ebx 309 movl %ebp,28(%eax) 310 movl (%esp),%ebp 311 movl %ecx,40(%eax) 312 movl 160(%esp),%ecx 313 movl %esi,44(%eax) 314 movl %edx,52(%eax) 315 movl %edi,60(%eax) 316 movl %ebp,(%eax) 317 leal 64(%eax),%eax 318 subl $64,%ecx 319 jnz .L003outer_loop 320 jmp .L006done 321.L005tail: 322 addl 112(%esp),%edx 323 addl 120(%esp),%edi 324 movl %eax,(%esp) 325 movl %ebp,16(%esp) 326 movl %ecx,32(%esp) 327 movl %esi,36(%esp) 328 movl %edx,48(%esp) 329 movl %edi,56(%esp) 330 movl 4(%esp),%ebp 331 movl 8(%esp),%ecx 332 movl 12(%esp),%esi 333 movl 20(%esp),%edx 334 movl 24(%esp),%edi 335 addl $857760878,%ebp 336 addl $2036477234,%ecx 337 addl $1797285236,%esi 338 addl 84(%esp),%edx 339 addl 88(%esp),%edi 340 movl %ebp,4(%esp) 341 movl %ecx,8(%esp) 342 movl %esi,12(%esp) 343 movl %edx,20(%esp) 344 movl %edi,24(%esp) 345 movl 28(%esp),%ebp 346 movl 40(%esp),%ecx 347 movl 44(%esp),%esi 348 movl 52(%esp),%edx 349 movl 60(%esp),%edi 350 addl 92(%esp),%ebp 351 addl 104(%esp),%ecx 352 addl 108(%esp),%esi 353 addl 116(%esp),%edx 354 addl 124(%esp),%edi 355 movl %ebp,28(%esp) 356 movl 156(%esp),%ebp 357 movl %ecx,40(%esp) 358 movl 152(%esp),%ecx 359 movl %esi,44(%esp) 360 xorl %esi,%esi 361 movl %edx,52(%esp) 362 movl %edi,60(%esp) 363 xorl %eax,%eax 364 xorl %edx,%edx 365.L007tail_loop: 366 movb (%esi,%ebp,1),%al 367 movb (%esp,%esi,1),%dl 368 leal 1(%esi),%esi 369 xorb %dl,%al 370 movb %al,-1(%ecx,%esi,1) 371 decl %ebx 372 jnz .L007tail_loop 373.L006done: 374 addl $132,%esp 375.L000no_data: 376 popl %edi 377 popl %esi 378 popl %ebx 379 popl %ebp 380 ret 381.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 382.globl ChaCha20_ssse3 383.type ChaCha20_ssse3,@function 384.align 16 385ChaCha20_ssse3: 386.L_ChaCha20_ssse3_begin: 387 #ifdef __CET__ 388 389.byte 243,15,30,251 390 #endif 391 392 pushl %ebp 393 pushl %ebx 394 pushl %esi 395 pushl %edi 396.Lssse3_shortcut: 397 testl $2048,4(%ebp) 398 jnz .Lxop_shortcut 399 movl 20(%esp),%edi 400 movl 24(%esp),%esi 401 movl 28(%esp),%ecx 402 movl 32(%esp),%edx 403 movl 36(%esp),%ebx 404 movl %esp,%ebp 405 subl $524,%esp 406 andl $-64,%esp 407 movl %ebp,512(%esp) 408 leal .Lssse3_data-.Lpic_point(%eax),%eax 409 movdqu (%ebx),%xmm3 410.L0081x: 411 movdqa 32(%eax),%xmm0 412 movdqu (%edx),%xmm1 413 movdqu 16(%edx),%xmm2 414 movdqa (%eax),%xmm6 415 movdqa 16(%eax),%xmm7 416 movl %ebp,48(%esp) 417 movdqa %xmm0,(%esp) 418 movdqa %xmm1,16(%esp) 419 movdqa %xmm2,32(%esp) 420 movdqa %xmm3,48(%esp) 421 movl $10,%edx 422 jmp .L009loop1x 423.align 16 424.L010outer1x: 425 movdqa 80(%eax),%xmm3 426 movdqa (%esp),%xmm0 427 movdqa 16(%esp),%xmm1 428 movdqa 32(%esp),%xmm2 429 paddd 48(%esp),%xmm3 430 movl $10,%edx 431 movdqa %xmm3,48(%esp) 432 jmp .L009loop1x 433.align 16 434.L009loop1x: 435 paddd %xmm1,%xmm0 436 pxor %xmm0,%xmm3 437.byte 102,15,56,0,222 438 paddd %xmm3,%xmm2 439 pxor %xmm2,%xmm1 440 movdqa %xmm1,%xmm4 441 psrld $20,%xmm1 442 pslld $12,%xmm4 443 por %xmm4,%xmm1 444 paddd %xmm1,%xmm0 445 pxor %xmm0,%xmm3 446.byte 102,15,56,0,223 447 paddd %xmm3,%xmm2 448 pxor %xmm2,%xmm1 449 movdqa %xmm1,%xmm4 450 psrld $25,%xmm1 451 pslld $7,%xmm4 452 por %xmm4,%xmm1 453 pshufd $78,%xmm2,%xmm2 454 pshufd $57,%xmm1,%xmm1 455 pshufd $147,%xmm3,%xmm3 456 nop 457 paddd %xmm1,%xmm0 458 pxor %xmm0,%xmm3 459.byte 102,15,56,0,222 460 paddd %xmm3,%xmm2 461 pxor %xmm2,%xmm1 462 movdqa %xmm1,%xmm4 463 psrld $20,%xmm1 464 pslld $12,%xmm4 465 por %xmm4,%xmm1 466 paddd %xmm1,%xmm0 467 pxor %xmm0,%xmm3 468.byte 102,15,56,0,223 469 paddd %xmm3,%xmm2 470 pxor %xmm2,%xmm1 471 movdqa %xmm1,%xmm4 472 psrld $25,%xmm1 473 pslld $7,%xmm4 474 por %xmm4,%xmm1 475 pshufd $78,%xmm2,%xmm2 476 pshufd $147,%xmm1,%xmm1 477 pshufd $57,%xmm3,%xmm3 478 decl %edx 479 jnz .L009loop1x 480 paddd (%esp),%xmm0 481 paddd 16(%esp),%xmm1 482 paddd 32(%esp),%xmm2 483 paddd 48(%esp),%xmm3 484 cmpl $64,%ecx 485 jb .L011tail 486 movdqu (%esi),%xmm4 487 movdqu 16(%esi),%xmm5 488 pxor %xmm4,%xmm0 489 movdqu 32(%esi),%xmm4 490 pxor %xmm5,%xmm1 491 movdqu 48(%esi),%xmm5 492 pxor %xmm4,%xmm2 493 pxor %xmm5,%xmm3 494 leal 64(%esi),%esi 495 movdqu %xmm0,(%edi) 496 movdqu %xmm1,16(%edi) 497 movdqu %xmm2,32(%edi) 498 movdqu %xmm3,48(%edi) 499 leal 64(%edi),%edi 500 subl $64,%ecx 501 jnz .L010outer1x 502 jmp .L012done 503.L011tail: 504 movdqa %xmm0,(%esp) 505 movdqa %xmm1,16(%esp) 506 movdqa %xmm2,32(%esp) 507 movdqa %xmm3,48(%esp) 508 xorl %eax,%eax 509 xorl %edx,%edx 510 xorl %ebp,%ebp 511.L013tail_loop: 512 movb (%esp,%ebp,1),%al 513 movb (%esi,%ebp,1),%dl 514 leal 1(%ebp),%ebp 515 xorb %dl,%al 516 movb %al,-1(%edi,%ebp,1) 517 decl %ecx 518 jnz .L013tail_loop 519.L012done: 520 movl 512(%esp),%esp 521 popl %edi 522 popl %esi 523 popl %ebx 524 popl %ebp 525 ret 526.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 527.align 64 528.Lssse3_data: 529.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 530.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 531.long 1634760805,857760878,2036477234,1797285236 532.long 0,1,2,3 533.long 4,4,4,4 534.long 1,0,0,0 535.long 4,0,0,0 536.long 0,-1,-1,-1 537.align 64 538.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 539.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 540.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 541.byte 114,103,62,0 542.globl ChaCha20_xop 543.type ChaCha20_xop,@function 544.align 16 545ChaCha20_xop: 546.L_ChaCha20_xop_begin: 547 #ifdef __CET__ 548 549.byte 243,15,30,251 550 #endif 551 552 pushl %ebp 553 pushl %ebx 554 pushl %esi 555 pushl %edi 556.Lxop_shortcut: 557 movl 20(%esp),%edi 558 movl 24(%esp),%esi 559 movl 28(%esp),%ecx 560 movl 32(%esp),%edx 561 movl 36(%esp),%ebx 562 vzeroupper 563 movl %esp,%ebp 564 subl $524,%esp 565 andl $-64,%esp 566 movl %ebp,512(%esp) 567 leal .Lssse3_data-.Lpic_point(%eax),%eax 568 vmovdqu (%ebx),%xmm3 569 cmpl $256,%ecx 570 jb .L0141x 571 movl %edx,516(%esp) 572 movl %ebx,520(%esp) 573 subl $256,%ecx 574 leal 384(%esp),%ebp 575 vmovdqu (%edx),%xmm7 576 vpshufd $0,%xmm3,%xmm0 577 vpshufd $85,%xmm3,%xmm1 578 vpshufd $170,%xmm3,%xmm2 579 vpshufd $255,%xmm3,%xmm3 580 vpaddd 48(%eax),%xmm0,%xmm0 581 vpshufd $0,%xmm7,%xmm4 582 vpshufd $85,%xmm7,%xmm5 583 vpsubd 64(%eax),%xmm0,%xmm0 584 vpshufd $170,%xmm7,%xmm6 585 vpshufd $255,%xmm7,%xmm7 586 vmovdqa %xmm0,64(%ebp) 587 vmovdqa %xmm1,80(%ebp) 588 vmovdqa %xmm2,96(%ebp) 589 vmovdqa %xmm3,112(%ebp) 590 vmovdqu 16(%edx),%xmm3 591 vmovdqa %xmm4,-64(%ebp) 592 vmovdqa %xmm5,-48(%ebp) 593 vmovdqa %xmm6,-32(%ebp) 594 vmovdqa %xmm7,-16(%ebp) 595 vmovdqa 32(%eax),%xmm7 596 leal 128(%esp),%ebx 597 vpshufd $0,%xmm3,%xmm0 598 vpshufd $85,%xmm3,%xmm1 599 vpshufd $170,%xmm3,%xmm2 600 vpshufd $255,%xmm3,%xmm3 601 vpshufd $0,%xmm7,%xmm4 602 vpshufd $85,%xmm7,%xmm5 603 vpshufd $170,%xmm7,%xmm6 604 vpshufd $255,%xmm7,%xmm7 605 vmovdqa %xmm0,(%ebp) 606 vmovdqa %xmm1,16(%ebp) 607 vmovdqa %xmm2,32(%ebp) 608 vmovdqa %xmm3,48(%ebp) 609 vmovdqa %xmm4,-128(%ebp) 610 vmovdqa %xmm5,-112(%ebp) 611 vmovdqa %xmm6,-96(%ebp) 612 vmovdqa %xmm7,-80(%ebp) 613 leal 128(%esi),%esi 614 leal 128(%edi),%edi 615 jmp .L015outer_loop 616.align 32 617.L015outer_loop: 618 vmovdqa -112(%ebp),%xmm1 619 vmovdqa -96(%ebp),%xmm2 620 vmovdqa -80(%ebp),%xmm3 621 vmovdqa -48(%ebp),%xmm5 622 vmovdqa -32(%ebp),%xmm6 623 vmovdqa -16(%ebp),%xmm7 624 vmovdqa %xmm1,-112(%ebx) 625 vmovdqa %xmm2,-96(%ebx) 626 vmovdqa %xmm3,-80(%ebx) 627 vmovdqa %xmm5,-48(%ebx) 628 vmovdqa %xmm6,-32(%ebx) 629 vmovdqa %xmm7,-16(%ebx) 630 vmovdqa 32(%ebp),%xmm2 631 vmovdqa 48(%ebp),%xmm3 632 vmovdqa 64(%ebp),%xmm4 633 vmovdqa 80(%ebp),%xmm5 634 vmovdqa 96(%ebp),%xmm6 635 vmovdqa 112(%ebp),%xmm7 636 vpaddd 64(%eax),%xmm4,%xmm4 637 vmovdqa %xmm2,32(%ebx) 638 vmovdqa %xmm3,48(%ebx) 639 vmovdqa %xmm4,64(%ebx) 640 vmovdqa %xmm5,80(%ebx) 641 vmovdqa %xmm6,96(%ebx) 642 vmovdqa %xmm7,112(%ebx) 643 vmovdqa %xmm4,64(%ebp) 644 vmovdqa -128(%ebp),%xmm0 645 vmovdqa %xmm4,%xmm6 646 vmovdqa -64(%ebp),%xmm3 647 vmovdqa (%ebp),%xmm4 648 vmovdqa 16(%ebp),%xmm5 649 movl $10,%edx 650 nop 651.align 32 652.L016loop: 653 vpaddd %xmm3,%xmm0,%xmm0 654 vpxor %xmm0,%xmm6,%xmm6 655.byte 143,232,120,194,246,16 656 vpaddd %xmm6,%xmm4,%xmm4 657 vpxor %xmm4,%xmm3,%xmm2 658 vmovdqa -112(%ebx),%xmm1 659.byte 143,232,120,194,210,12 660 vmovdqa -48(%ebx),%xmm3 661 vpaddd %xmm2,%xmm0,%xmm0 662 vmovdqa 80(%ebx),%xmm7 663 vpxor %xmm0,%xmm6,%xmm6 664 vpaddd %xmm3,%xmm1,%xmm1 665.byte 143,232,120,194,246,8 666 vmovdqa %xmm0,-128(%ebx) 667 vpaddd %xmm6,%xmm4,%xmm4 668 vmovdqa %xmm6,64(%ebx) 669 vpxor %xmm4,%xmm2,%xmm2 670 vpxor %xmm1,%xmm7,%xmm7 671.byte 143,232,120,194,210,7 672 vmovdqa %xmm4,(%ebx) 673.byte 143,232,120,194,255,16 674 vmovdqa %xmm2,-64(%ebx) 675 vpaddd %xmm7,%xmm5,%xmm5 676 vmovdqa 32(%ebx),%xmm4 677 vpxor %xmm5,%xmm3,%xmm3 678 vmovdqa -96(%ebx),%xmm0 679.byte 143,232,120,194,219,12 680 vmovdqa -32(%ebx),%xmm2 681 vpaddd %xmm3,%xmm1,%xmm1 682 vmovdqa 96(%ebx),%xmm6 683 vpxor %xmm1,%xmm7,%xmm7 684 vpaddd %xmm2,%xmm0,%xmm0 685.byte 143,232,120,194,255,8 686 vmovdqa %xmm1,-112(%ebx) 687 vpaddd %xmm7,%xmm5,%xmm5 688 vmovdqa %xmm7,80(%ebx) 689 vpxor %xmm5,%xmm3,%xmm3 690 vpxor %xmm0,%xmm6,%xmm6 691.byte 143,232,120,194,219,7 692 vmovdqa %xmm5,16(%ebx) 693.byte 143,232,120,194,246,16 694 vmovdqa %xmm3,-48(%ebx) 695 vpaddd %xmm6,%xmm4,%xmm4 696 vmovdqa 48(%ebx),%xmm5 697 vpxor %xmm4,%xmm2,%xmm2 698 vmovdqa -80(%ebx),%xmm1 699.byte 143,232,120,194,210,12 700 vmovdqa -16(%ebx),%xmm3 701 vpaddd %xmm2,%xmm0,%xmm0 702 vmovdqa 112(%ebx),%xmm7 703 vpxor %xmm0,%xmm6,%xmm6 704 vpaddd %xmm3,%xmm1,%xmm1 705.byte 143,232,120,194,246,8 706 vmovdqa %xmm0,-96(%ebx) 707 vpaddd %xmm6,%xmm4,%xmm4 708 vmovdqa %xmm6,96(%ebx) 709 vpxor %xmm4,%xmm2,%xmm2 710 vpxor %xmm1,%xmm7,%xmm7 711.byte 143,232,120,194,210,7 712.byte 143,232,120,194,255,16 713 vmovdqa %xmm2,-32(%ebx) 714 vpaddd %xmm7,%xmm5,%xmm5 715 vpxor %xmm5,%xmm3,%xmm3 716 vmovdqa -128(%ebx),%xmm0 717.byte 143,232,120,194,219,12 718 vmovdqa -48(%ebx),%xmm2 719 vpaddd %xmm3,%xmm1,%xmm1 720 vpxor %xmm1,%xmm7,%xmm7 721 vpaddd %xmm2,%xmm0,%xmm0 722.byte 143,232,120,194,255,8 723 vmovdqa %xmm1,-80(%ebx) 724 vpaddd %xmm7,%xmm5,%xmm5 725 vpxor %xmm5,%xmm3,%xmm3 726 vpxor %xmm0,%xmm7,%xmm6 727.byte 143,232,120,194,219,7 728.byte 143,232,120,194,246,16 729 vmovdqa %xmm3,-16(%ebx) 730 vpaddd %xmm6,%xmm4,%xmm4 731 vpxor %xmm4,%xmm2,%xmm2 732 vmovdqa -112(%ebx),%xmm1 733.byte 143,232,120,194,210,12 734 vmovdqa -32(%ebx),%xmm3 735 vpaddd %xmm2,%xmm0,%xmm0 736 vmovdqa 64(%ebx),%xmm7 737 vpxor %xmm0,%xmm6,%xmm6 738 vpaddd %xmm3,%xmm1,%xmm1 739.byte 143,232,120,194,246,8 740 vmovdqa %xmm0,-128(%ebx) 741 vpaddd %xmm6,%xmm4,%xmm4 742 vmovdqa %xmm6,112(%ebx) 743 vpxor %xmm4,%xmm2,%xmm2 744 vpxor %xmm1,%xmm7,%xmm7 745.byte 143,232,120,194,210,7 746 vmovdqa %xmm4,32(%ebx) 747.byte 143,232,120,194,255,16 748 vmovdqa %xmm2,-48(%ebx) 749 vpaddd %xmm7,%xmm5,%xmm5 750 vmovdqa (%ebx),%xmm4 751 vpxor %xmm5,%xmm3,%xmm3 752 vmovdqa -96(%ebx),%xmm0 753.byte 143,232,120,194,219,12 754 vmovdqa -16(%ebx),%xmm2 755 vpaddd %xmm3,%xmm1,%xmm1 756 vmovdqa 80(%ebx),%xmm6 757 vpxor %xmm1,%xmm7,%xmm7 758 vpaddd %xmm2,%xmm0,%xmm0 759.byte 143,232,120,194,255,8 760 vmovdqa %xmm1,-112(%ebx) 761 vpaddd %xmm7,%xmm5,%xmm5 762 vmovdqa %xmm7,64(%ebx) 763 vpxor %xmm5,%xmm3,%xmm3 764 vpxor %xmm0,%xmm6,%xmm6 765.byte 143,232,120,194,219,7 766 vmovdqa %xmm5,48(%ebx) 767.byte 143,232,120,194,246,16 768 vmovdqa %xmm3,-32(%ebx) 769 vpaddd %xmm6,%xmm4,%xmm4 770 vmovdqa 16(%ebx),%xmm5 771 vpxor %xmm4,%xmm2,%xmm2 772 vmovdqa -80(%ebx),%xmm1 773.byte 143,232,120,194,210,12 774 vmovdqa -64(%ebx),%xmm3 775 vpaddd %xmm2,%xmm0,%xmm0 776 vmovdqa 96(%ebx),%xmm7 777 vpxor %xmm0,%xmm6,%xmm6 778 vpaddd %xmm3,%xmm1,%xmm1 779.byte 143,232,120,194,246,8 780 vmovdqa %xmm0,-96(%ebx) 781 vpaddd %xmm6,%xmm4,%xmm4 782 vmovdqa %xmm6,80(%ebx) 783 vpxor %xmm4,%xmm2,%xmm2 784 vpxor %xmm1,%xmm7,%xmm7 785.byte 143,232,120,194,210,7 786.byte 143,232,120,194,255,16 787 vmovdqa %xmm2,-16(%ebx) 788 vpaddd %xmm7,%xmm5,%xmm5 789 vpxor %xmm5,%xmm3,%xmm3 790 vmovdqa -128(%ebx),%xmm0 791.byte 143,232,120,194,219,12 792 vpaddd %xmm3,%xmm1,%xmm1 793 vmovdqa 64(%ebx),%xmm6 794 vpxor %xmm1,%xmm7,%xmm7 795.byte 143,232,120,194,255,8 796 vmovdqa %xmm1,-80(%ebx) 797 vpaddd %xmm7,%xmm5,%xmm5 798 vmovdqa %xmm7,96(%ebx) 799 vpxor %xmm5,%xmm3,%xmm3 800.byte 143,232,120,194,219,7 801 decl %edx 802 jnz .L016loop 803 vmovdqa %xmm3,-64(%ebx) 804 vmovdqa %xmm4,(%ebx) 805 vmovdqa %xmm5,16(%ebx) 806 vmovdqa %xmm6,64(%ebx) 807 vmovdqa %xmm7,96(%ebx) 808 vmovdqa -112(%ebx),%xmm1 809 vmovdqa -96(%ebx),%xmm2 810 vmovdqa -80(%ebx),%xmm3 811 vpaddd -128(%ebp),%xmm0,%xmm0 812 vpaddd -112(%ebp),%xmm1,%xmm1 813 vpaddd -96(%ebp),%xmm2,%xmm2 814 vpaddd -80(%ebp),%xmm3,%xmm3 815 vpunpckldq %xmm1,%xmm0,%xmm6 816 vpunpckldq %xmm3,%xmm2,%xmm7 817 vpunpckhdq %xmm1,%xmm0,%xmm0 818 vpunpckhdq %xmm3,%xmm2,%xmm2 819 vpunpcklqdq %xmm7,%xmm6,%xmm1 820 vpunpckhqdq %xmm7,%xmm6,%xmm6 821 vpunpcklqdq %xmm2,%xmm0,%xmm7 822 vpunpckhqdq %xmm2,%xmm0,%xmm3 823 vpxor -128(%esi),%xmm1,%xmm4 824 vpxor -64(%esi),%xmm6,%xmm5 825 vpxor (%esi),%xmm7,%xmm6 826 vpxor 64(%esi),%xmm3,%xmm7 827 leal 16(%esi),%esi 828 vmovdqa -64(%ebx),%xmm0 829 vmovdqa -48(%ebx),%xmm1 830 vmovdqa -32(%ebx),%xmm2 831 vmovdqa -16(%ebx),%xmm3 832 vmovdqu %xmm4,-128(%edi) 833 vmovdqu %xmm5,-64(%edi) 834 vmovdqu %xmm6,(%edi) 835 vmovdqu %xmm7,64(%edi) 836 leal 16(%edi),%edi 837 vpaddd -64(%ebp),%xmm0,%xmm0 838 vpaddd -48(%ebp),%xmm1,%xmm1 839 vpaddd -32(%ebp),%xmm2,%xmm2 840 vpaddd -16(%ebp),%xmm3,%xmm3 841 vpunpckldq %xmm1,%xmm0,%xmm6 842 vpunpckldq %xmm3,%xmm2,%xmm7 843 vpunpckhdq %xmm1,%xmm0,%xmm0 844 vpunpckhdq %xmm3,%xmm2,%xmm2 845 vpunpcklqdq %xmm7,%xmm6,%xmm1 846 vpunpckhqdq %xmm7,%xmm6,%xmm6 847 vpunpcklqdq %xmm2,%xmm0,%xmm7 848 vpunpckhqdq %xmm2,%xmm0,%xmm3 849 vpxor -128(%esi),%xmm1,%xmm4 850 vpxor -64(%esi),%xmm6,%xmm5 851 vpxor (%esi),%xmm7,%xmm6 852 vpxor 64(%esi),%xmm3,%xmm7 853 leal 16(%esi),%esi 854 vmovdqa (%ebx),%xmm0 855 vmovdqa 16(%ebx),%xmm1 856 vmovdqa 32(%ebx),%xmm2 857 vmovdqa 48(%ebx),%xmm3 858 vmovdqu %xmm4,-128(%edi) 859 vmovdqu %xmm5,-64(%edi) 860 vmovdqu %xmm6,(%edi) 861 vmovdqu %xmm7,64(%edi) 862 leal 16(%edi),%edi 863 vpaddd (%ebp),%xmm0,%xmm0 864 vpaddd 16(%ebp),%xmm1,%xmm1 865 vpaddd 32(%ebp),%xmm2,%xmm2 866 vpaddd 48(%ebp),%xmm3,%xmm3 867 vpunpckldq %xmm1,%xmm0,%xmm6 868 vpunpckldq %xmm3,%xmm2,%xmm7 869 vpunpckhdq %xmm1,%xmm0,%xmm0 870 vpunpckhdq %xmm3,%xmm2,%xmm2 871 vpunpcklqdq %xmm7,%xmm6,%xmm1 872 vpunpckhqdq %xmm7,%xmm6,%xmm6 873 vpunpcklqdq %xmm2,%xmm0,%xmm7 874 vpunpckhqdq %xmm2,%xmm0,%xmm3 875 vpxor -128(%esi),%xmm1,%xmm4 876 vpxor -64(%esi),%xmm6,%xmm5 877 vpxor (%esi),%xmm7,%xmm6 878 vpxor 64(%esi),%xmm3,%xmm7 879 leal 16(%esi),%esi 880 vmovdqa 64(%ebx),%xmm0 881 vmovdqa 80(%ebx),%xmm1 882 vmovdqa 96(%ebx),%xmm2 883 vmovdqa 112(%ebx),%xmm3 884 vmovdqu %xmm4,-128(%edi) 885 vmovdqu %xmm5,-64(%edi) 886 vmovdqu %xmm6,(%edi) 887 vmovdqu %xmm7,64(%edi) 888 leal 16(%edi),%edi 889 vpaddd 64(%ebp),%xmm0,%xmm0 890 vpaddd 80(%ebp),%xmm1,%xmm1 891 vpaddd 96(%ebp),%xmm2,%xmm2 892 vpaddd 112(%ebp),%xmm3,%xmm3 893 vpunpckldq %xmm1,%xmm0,%xmm6 894 vpunpckldq %xmm3,%xmm2,%xmm7 895 vpunpckhdq %xmm1,%xmm0,%xmm0 896 vpunpckhdq %xmm3,%xmm2,%xmm2 897 vpunpcklqdq %xmm7,%xmm6,%xmm1 898 vpunpckhqdq %xmm7,%xmm6,%xmm6 899 vpunpcklqdq %xmm2,%xmm0,%xmm7 900 vpunpckhqdq %xmm2,%xmm0,%xmm3 901 vpxor -128(%esi),%xmm1,%xmm4 902 vpxor -64(%esi),%xmm6,%xmm5 903 vpxor (%esi),%xmm7,%xmm6 904 vpxor 64(%esi),%xmm3,%xmm7 905 leal 208(%esi),%esi 906 vmovdqu %xmm4,-128(%edi) 907 vmovdqu %xmm5,-64(%edi) 908 vmovdqu %xmm6,(%edi) 909 vmovdqu %xmm7,64(%edi) 910 leal 208(%edi),%edi 911 subl $256,%ecx 912 jnc .L015outer_loop 913 addl $256,%ecx 914 jz .L017done 915 movl 520(%esp),%ebx 916 leal -128(%esi),%esi 917 movl 516(%esp),%edx 918 leal -128(%edi),%edi 919 vmovd 64(%ebp),%xmm2 920 vmovdqu (%ebx),%xmm3 921 vpaddd 96(%eax),%xmm2,%xmm2 922 vpand 112(%eax),%xmm3,%xmm3 923 vpor %xmm2,%xmm3,%xmm3 924.L0141x: 925 vmovdqa 32(%eax),%xmm0 926 vmovdqu (%edx),%xmm1 927 vmovdqu 16(%edx),%xmm2 928 vmovdqa (%eax),%xmm6 929 vmovdqa 16(%eax),%xmm7 930 movl %ebp,48(%esp) 931 vmovdqa %xmm0,(%esp) 932 vmovdqa %xmm1,16(%esp) 933 vmovdqa %xmm2,32(%esp) 934 vmovdqa %xmm3,48(%esp) 935 movl $10,%edx 936 jmp .L018loop1x 937.align 16 938.L019outer1x: 939 vmovdqa 80(%eax),%xmm3 940 vmovdqa (%esp),%xmm0 941 vmovdqa 16(%esp),%xmm1 942 vmovdqa 32(%esp),%xmm2 943 vpaddd 48(%esp),%xmm3,%xmm3 944 movl $10,%edx 945 vmovdqa %xmm3,48(%esp) 946 jmp .L018loop1x 947.align 16 948.L018loop1x: 949 vpaddd %xmm1,%xmm0,%xmm0 950 vpxor %xmm0,%xmm3,%xmm3 951.byte 143,232,120,194,219,16 952 vpaddd %xmm3,%xmm2,%xmm2 953 vpxor %xmm2,%xmm1,%xmm1 954.byte 143,232,120,194,201,12 955 vpaddd %xmm1,%xmm0,%xmm0 956 vpxor %xmm0,%xmm3,%xmm3 957.byte 143,232,120,194,219,8 958 vpaddd %xmm3,%xmm2,%xmm2 959 vpxor %xmm2,%xmm1,%xmm1 960.byte 143,232,120,194,201,7 961 vpshufd $78,%xmm2,%xmm2 962 vpshufd $57,%xmm1,%xmm1 963 vpshufd $147,%xmm3,%xmm3 964 vpaddd %xmm1,%xmm0,%xmm0 965 vpxor %xmm0,%xmm3,%xmm3 966.byte 143,232,120,194,219,16 967 vpaddd %xmm3,%xmm2,%xmm2 968 vpxor %xmm2,%xmm1,%xmm1 969.byte 143,232,120,194,201,12 970 vpaddd %xmm1,%xmm0,%xmm0 971 vpxor %xmm0,%xmm3,%xmm3 972.byte 143,232,120,194,219,8 973 vpaddd %xmm3,%xmm2,%xmm2 974 vpxor %xmm2,%xmm1,%xmm1 975.byte 143,232,120,194,201,7 976 vpshufd $78,%xmm2,%xmm2 977 vpshufd $147,%xmm1,%xmm1 978 vpshufd $57,%xmm3,%xmm3 979 decl %edx 980 jnz .L018loop1x 981 vpaddd (%esp),%xmm0,%xmm0 982 vpaddd 16(%esp),%xmm1,%xmm1 983 vpaddd 32(%esp),%xmm2,%xmm2 984 vpaddd 48(%esp),%xmm3,%xmm3 985 cmpl $64,%ecx 986 jb .L020tail 987 vpxor (%esi),%xmm0,%xmm0 988 vpxor 16(%esi),%xmm1,%xmm1 989 vpxor 32(%esi),%xmm2,%xmm2 990 vpxor 48(%esi),%xmm3,%xmm3 991 leal 64(%esi),%esi 992 vmovdqu %xmm0,(%edi) 993 vmovdqu %xmm1,16(%edi) 994 vmovdqu %xmm2,32(%edi) 995 vmovdqu %xmm3,48(%edi) 996 leal 64(%edi),%edi 997 subl $64,%ecx 998 jnz .L019outer1x 999 jmp .L017done 1000.L020tail: 1001 vmovdqa %xmm0,(%esp) 1002 vmovdqa %xmm1,16(%esp) 1003 vmovdqa %xmm2,32(%esp) 1004 vmovdqa %xmm3,48(%esp) 1005 xorl %eax,%eax 1006 xorl %edx,%edx 1007 xorl %ebp,%ebp 1008.L021tail_loop: 1009 movb (%esp,%ebp,1),%al 1010 movb (%esi,%ebp,1),%dl 1011 leal 1(%ebp),%ebp 1012 xorb %dl,%al 1013 movb %al,-1(%edi,%ebp,1) 1014 decl %ecx 1015 jnz .L021tail_loop 1016.L017done: 1017 vzeroupper 1018 movl 512(%esp),%esp 1019 popl %edi 1020 popl %esi 1021 popl %ebx 1022 popl %ebp 1023 ret 1024.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 1025.comm OPENSSL_ia32cap_P,16,4 1026 1027 .section ".note.gnu.property", "a" 1028 .p2align 2 1029 .long 1f - 0f 1030 .long 4f - 1f 1031 .long 5 10320: 1033 .asciz "GNU" 10341: 1035 .p2align 2 1036 .long 0xc0000002 1037 .long 3f - 2f 10382: 1039 .long 3 10403: 1041 .p2align 2 10424: 1043#else 1044.text 1045.globl ChaCha20_ctr32 1046.type ChaCha20_ctr32,@function 1047.align 16 1048ChaCha20_ctr32: 1049.L_ChaCha20_ctr32_begin: 1050 #ifdef __CET__ 1051 1052.byte 243,15,30,251 1053 #endif 1054 1055 pushl %ebp 1056 pushl %ebx 1057 pushl %esi 1058 pushl %edi 1059 xorl %eax,%eax 1060 cmpl 28(%esp),%eax 1061 je .L000no_data 1062 call .Lpic_point 1063.Lpic_point: 1064 popl %eax 1065 leal OPENSSL_ia32cap_P,%ebp 1066 testl $16777216,(%ebp) 1067 jz .L001x86 1068 testl $512,4(%ebp) 1069 jz .L001x86 1070 jmp .Lssse3_shortcut 1071.L001x86: 1072 movl 32(%esp),%esi 1073 movl 36(%esp),%edi 1074 subl $132,%esp 1075 movl (%esi),%eax 1076 movl 4(%esi),%ebx 1077 movl 8(%esi),%ecx 1078 movl 12(%esi),%edx 1079 movl %eax,80(%esp) 1080 movl %ebx,84(%esp) 1081 movl %ecx,88(%esp) 1082 movl %edx,92(%esp) 1083 movl 16(%esi),%eax 1084 movl 20(%esi),%ebx 1085 movl 24(%esi),%ecx 1086 movl 28(%esi),%edx 1087 movl %eax,96(%esp) 1088 movl %ebx,100(%esp) 1089 movl %ecx,104(%esp) 1090 movl %edx,108(%esp) 1091 movl (%edi),%eax 1092 movl 4(%edi),%ebx 1093 movl 8(%edi),%ecx 1094 movl 12(%edi),%edx 1095 subl $1,%eax 1096 movl %eax,112(%esp) 1097 movl %ebx,116(%esp) 1098 movl %ecx,120(%esp) 1099 movl %edx,124(%esp) 1100 jmp .L002entry 1101.align 16 1102.L003outer_loop: 1103 movl %ebx,156(%esp) 1104 movl %eax,152(%esp) 1105 movl %ecx,160(%esp) 1106.L002entry: 1107 movl $1634760805,%eax 1108 movl $857760878,4(%esp) 1109 movl $2036477234,8(%esp) 1110 movl $1797285236,12(%esp) 1111 movl 84(%esp),%ebx 1112 movl 88(%esp),%ebp 1113 movl 104(%esp),%ecx 1114 movl 108(%esp),%esi 1115 movl 116(%esp),%edx 1116 movl 120(%esp),%edi 1117 movl %ebx,20(%esp) 1118 movl %ebp,24(%esp) 1119 movl %ecx,40(%esp) 1120 movl %esi,44(%esp) 1121 movl %edx,52(%esp) 1122 movl %edi,56(%esp) 1123 movl 92(%esp),%ebx 1124 movl 124(%esp),%edi 1125 movl 112(%esp),%edx 1126 movl 80(%esp),%ebp 1127 movl 96(%esp),%ecx 1128 movl 100(%esp),%esi 1129 addl $1,%edx 1130 movl %ebx,28(%esp) 1131 movl %edi,60(%esp) 1132 movl %edx,112(%esp) 1133 movl $10,%ebx 1134 jmp .L004loop 1135.align 16 1136.L004loop: 1137 addl %ebp,%eax 1138 movl %ebx,128(%esp) 1139 movl %ebp,%ebx 1140 xorl %eax,%edx 1141 roll $16,%edx 1142 addl %edx,%ecx 1143 xorl %ecx,%ebx 1144 movl 52(%esp),%edi 1145 roll $12,%ebx 1146 movl 20(%esp),%ebp 1147 addl %ebx,%eax 1148 xorl %eax,%edx 1149 movl %eax,(%esp) 1150 roll $8,%edx 1151 movl 4(%esp),%eax 1152 addl %edx,%ecx 1153 movl %edx,48(%esp) 1154 xorl %ecx,%ebx 1155 addl %ebp,%eax 1156 roll $7,%ebx 1157 xorl %eax,%edi 1158 movl %ecx,32(%esp) 1159 roll $16,%edi 1160 movl %ebx,16(%esp) 1161 addl %edi,%esi 1162 movl 40(%esp),%ecx 1163 xorl %esi,%ebp 1164 movl 56(%esp),%edx 1165 roll $12,%ebp 1166 movl 24(%esp),%ebx 1167 addl %ebp,%eax 1168 xorl %eax,%edi 1169 movl %eax,4(%esp) 1170 roll $8,%edi 1171 movl 8(%esp),%eax 1172 addl %edi,%esi 1173 movl %edi,52(%esp) 1174 xorl %esi,%ebp 1175 addl %ebx,%eax 1176 roll $7,%ebp 1177 xorl %eax,%edx 1178 movl %esi,36(%esp) 1179 roll $16,%edx 1180 movl %ebp,20(%esp) 1181 addl %edx,%ecx 1182 movl 44(%esp),%esi 1183 xorl %ecx,%ebx 1184 movl 60(%esp),%edi 1185 roll $12,%ebx 1186 movl 28(%esp),%ebp 1187 addl %ebx,%eax 1188 xorl %eax,%edx 1189 movl %eax,8(%esp) 1190 roll $8,%edx 1191 movl 12(%esp),%eax 1192 addl %edx,%ecx 1193 movl %edx,56(%esp) 1194 xorl %ecx,%ebx 1195 addl %ebp,%eax 1196 roll $7,%ebx 1197 xorl %eax,%edi 1198 roll $16,%edi 1199 movl %ebx,24(%esp) 1200 addl %edi,%esi 1201 xorl %esi,%ebp 1202 roll $12,%ebp 1203 movl 20(%esp),%ebx 1204 addl %ebp,%eax 1205 xorl %eax,%edi 1206 movl %eax,12(%esp) 1207 roll $8,%edi 1208 movl (%esp),%eax 1209 addl %edi,%esi 1210 movl %edi,%edx 1211 xorl %esi,%ebp 1212 addl %ebx,%eax 1213 roll $7,%ebp 1214 xorl %eax,%edx 1215 roll $16,%edx 1216 movl %ebp,28(%esp) 1217 addl %edx,%ecx 1218 xorl %ecx,%ebx 1219 movl 48(%esp),%edi 1220 roll $12,%ebx 1221 movl 24(%esp),%ebp 1222 addl %ebx,%eax 1223 xorl %eax,%edx 1224 movl %eax,(%esp) 1225 roll $8,%edx 1226 movl 4(%esp),%eax 1227 addl %edx,%ecx 1228 movl %edx,60(%esp) 1229 xorl %ecx,%ebx 1230 addl %ebp,%eax 1231 roll $7,%ebx 1232 xorl %eax,%edi 1233 movl %ecx,40(%esp) 1234 roll $16,%edi 1235 movl %ebx,20(%esp) 1236 addl %edi,%esi 1237 movl 32(%esp),%ecx 1238 xorl %esi,%ebp 1239 movl 52(%esp),%edx 1240 roll $12,%ebp 1241 movl 28(%esp),%ebx 1242 addl %ebp,%eax 1243 xorl %eax,%edi 1244 movl %eax,4(%esp) 1245 roll $8,%edi 1246 movl 8(%esp),%eax 1247 addl %edi,%esi 1248 movl %edi,48(%esp) 1249 xorl %esi,%ebp 1250 addl %ebx,%eax 1251 roll $7,%ebp 1252 xorl %eax,%edx 1253 movl %esi,44(%esp) 1254 roll $16,%edx 1255 movl %ebp,24(%esp) 1256 addl %edx,%ecx 1257 movl 36(%esp),%esi 1258 xorl %ecx,%ebx 1259 movl 56(%esp),%edi 1260 roll $12,%ebx 1261 movl 16(%esp),%ebp 1262 addl %ebx,%eax 1263 xorl %eax,%edx 1264 movl %eax,8(%esp) 1265 roll $8,%edx 1266 movl 12(%esp),%eax 1267 addl %edx,%ecx 1268 movl %edx,52(%esp) 1269 xorl %ecx,%ebx 1270 addl %ebp,%eax 1271 roll $7,%ebx 1272 xorl %eax,%edi 1273 roll $16,%edi 1274 movl %ebx,28(%esp) 1275 addl %edi,%esi 1276 xorl %esi,%ebp 1277 movl 48(%esp),%edx 1278 roll $12,%ebp 1279 movl 128(%esp),%ebx 1280 addl %ebp,%eax 1281 xorl %eax,%edi 1282 movl %eax,12(%esp) 1283 roll $8,%edi 1284 movl (%esp),%eax 1285 addl %edi,%esi 1286 movl %edi,56(%esp) 1287 xorl %esi,%ebp 1288 roll $7,%ebp 1289 decl %ebx 1290 jnz .L004loop 1291 movl 160(%esp),%ebx 1292 addl $1634760805,%eax 1293 addl 80(%esp),%ebp 1294 addl 96(%esp),%ecx 1295 addl 100(%esp),%esi 1296 cmpl $64,%ebx 1297 jb .L005tail 1298 movl 156(%esp),%ebx 1299 addl 112(%esp),%edx 1300 addl 120(%esp),%edi 1301 xorl (%ebx),%eax 1302 xorl 16(%ebx),%ebp 1303 movl %eax,(%esp) 1304 movl 152(%esp),%eax 1305 xorl 32(%ebx),%ecx 1306 xorl 36(%ebx),%esi 1307 xorl 48(%ebx),%edx 1308 xorl 56(%ebx),%edi 1309 movl %ebp,16(%eax) 1310 movl %ecx,32(%eax) 1311 movl %esi,36(%eax) 1312 movl %edx,48(%eax) 1313 movl %edi,56(%eax) 1314 movl 4(%esp),%ebp 1315 movl 8(%esp),%ecx 1316 movl 12(%esp),%esi 1317 movl 20(%esp),%edx 1318 movl 24(%esp),%edi 1319 addl $857760878,%ebp 1320 addl $2036477234,%ecx 1321 addl $1797285236,%esi 1322 addl 84(%esp),%edx 1323 addl 88(%esp),%edi 1324 xorl 4(%ebx),%ebp 1325 xorl 8(%ebx),%ecx 1326 xorl 12(%ebx),%esi 1327 xorl 20(%ebx),%edx 1328 xorl 24(%ebx),%edi 1329 movl %ebp,4(%eax) 1330 movl %ecx,8(%eax) 1331 movl %esi,12(%eax) 1332 movl %edx,20(%eax) 1333 movl %edi,24(%eax) 1334 movl 28(%esp),%ebp 1335 movl 40(%esp),%ecx 1336 movl 44(%esp),%esi 1337 movl 52(%esp),%edx 1338 movl 60(%esp),%edi 1339 addl 92(%esp),%ebp 1340 addl 104(%esp),%ecx 1341 addl 108(%esp),%esi 1342 addl 116(%esp),%edx 1343 addl 124(%esp),%edi 1344 xorl 28(%ebx),%ebp 1345 xorl 40(%ebx),%ecx 1346 xorl 44(%ebx),%esi 1347 xorl 52(%ebx),%edx 1348 xorl 60(%ebx),%edi 1349 leal 64(%ebx),%ebx 1350 movl %ebp,28(%eax) 1351 movl (%esp),%ebp 1352 movl %ecx,40(%eax) 1353 movl 160(%esp),%ecx 1354 movl %esi,44(%eax) 1355 movl %edx,52(%eax) 1356 movl %edi,60(%eax) 1357 movl %ebp,(%eax) 1358 leal 64(%eax),%eax 1359 subl $64,%ecx 1360 jnz .L003outer_loop 1361 jmp .L006done 1362.L005tail: 1363 addl 112(%esp),%edx 1364 addl 120(%esp),%edi 1365 movl %eax,(%esp) 1366 movl %ebp,16(%esp) 1367 movl %ecx,32(%esp) 1368 movl %esi,36(%esp) 1369 movl %edx,48(%esp) 1370 movl %edi,56(%esp) 1371 movl 4(%esp),%ebp 1372 movl 8(%esp),%ecx 1373 movl 12(%esp),%esi 1374 movl 20(%esp),%edx 1375 movl 24(%esp),%edi 1376 addl $857760878,%ebp 1377 addl $2036477234,%ecx 1378 addl $1797285236,%esi 1379 addl 84(%esp),%edx 1380 addl 88(%esp),%edi 1381 movl %ebp,4(%esp) 1382 movl %ecx,8(%esp) 1383 movl %esi,12(%esp) 1384 movl %edx,20(%esp) 1385 movl %edi,24(%esp) 1386 movl 28(%esp),%ebp 1387 movl 40(%esp),%ecx 1388 movl 44(%esp),%esi 1389 movl 52(%esp),%edx 1390 movl 60(%esp),%edi 1391 addl 92(%esp),%ebp 1392 addl 104(%esp),%ecx 1393 addl 108(%esp),%esi 1394 addl 116(%esp),%edx 1395 addl 124(%esp),%edi 1396 movl %ebp,28(%esp) 1397 movl 156(%esp),%ebp 1398 movl %ecx,40(%esp) 1399 movl 152(%esp),%ecx 1400 movl %esi,44(%esp) 1401 xorl %esi,%esi 1402 movl %edx,52(%esp) 1403 movl %edi,60(%esp) 1404 xorl %eax,%eax 1405 xorl %edx,%edx 1406.L007tail_loop: 1407 movb (%esi,%ebp,1),%al 1408 movb (%esp,%esi,1),%dl 1409 leal 1(%esi),%esi 1410 xorb %dl,%al 1411 movb %al,-1(%ecx,%esi,1) 1412 decl %ebx 1413 jnz .L007tail_loop 1414.L006done: 1415 addl $132,%esp 1416.L000no_data: 1417 popl %edi 1418 popl %esi 1419 popl %ebx 1420 popl %ebp 1421 ret 1422.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 1423.globl ChaCha20_ssse3 1424.type ChaCha20_ssse3,@function 1425.align 16 1426ChaCha20_ssse3: 1427.L_ChaCha20_ssse3_begin: 1428 #ifdef __CET__ 1429 1430.byte 243,15,30,251 1431 #endif 1432 1433 pushl %ebp 1434 pushl %ebx 1435 pushl %esi 1436 pushl %edi 1437.Lssse3_shortcut: 1438 testl $2048,4(%ebp) 1439 jnz .Lxop_shortcut 1440 movl 20(%esp),%edi 1441 movl 24(%esp),%esi 1442 movl 28(%esp),%ecx 1443 movl 32(%esp),%edx 1444 movl 36(%esp),%ebx 1445 movl %esp,%ebp 1446 subl $524,%esp 1447 andl $-64,%esp 1448 movl %ebp,512(%esp) 1449 leal .Lssse3_data-.Lpic_point(%eax),%eax 1450 movdqu (%ebx),%xmm3 1451.L0081x: 1452 movdqa 32(%eax),%xmm0 1453 movdqu (%edx),%xmm1 1454 movdqu 16(%edx),%xmm2 1455 movdqa (%eax),%xmm6 1456 movdqa 16(%eax),%xmm7 1457 movl %ebp,48(%esp) 1458 movdqa %xmm0,(%esp) 1459 movdqa %xmm1,16(%esp) 1460 movdqa %xmm2,32(%esp) 1461 movdqa %xmm3,48(%esp) 1462 movl $10,%edx 1463 jmp .L009loop1x 1464.align 16 1465.L010outer1x: 1466 movdqa 80(%eax),%xmm3 1467 movdqa (%esp),%xmm0 1468 movdqa 16(%esp),%xmm1 1469 movdqa 32(%esp),%xmm2 1470 paddd 48(%esp),%xmm3 1471 movl $10,%edx 1472 movdqa %xmm3,48(%esp) 1473 jmp .L009loop1x 1474.align 16 1475.L009loop1x: 1476 paddd %xmm1,%xmm0 1477 pxor %xmm0,%xmm3 1478.byte 102,15,56,0,222 1479 paddd %xmm3,%xmm2 1480 pxor %xmm2,%xmm1 1481 movdqa %xmm1,%xmm4 1482 psrld $20,%xmm1 1483 pslld $12,%xmm4 1484 por %xmm4,%xmm1 1485 paddd %xmm1,%xmm0 1486 pxor %xmm0,%xmm3 1487.byte 102,15,56,0,223 1488 paddd %xmm3,%xmm2 1489 pxor %xmm2,%xmm1 1490 movdqa %xmm1,%xmm4 1491 psrld $25,%xmm1 1492 pslld $7,%xmm4 1493 por %xmm4,%xmm1 1494 pshufd $78,%xmm2,%xmm2 1495 pshufd $57,%xmm1,%xmm1 1496 pshufd $147,%xmm3,%xmm3 1497 nop 1498 paddd %xmm1,%xmm0 1499 pxor %xmm0,%xmm3 1500.byte 102,15,56,0,222 1501 paddd %xmm3,%xmm2 1502 pxor %xmm2,%xmm1 1503 movdqa %xmm1,%xmm4 1504 psrld $20,%xmm1 1505 pslld $12,%xmm4 1506 por %xmm4,%xmm1 1507 paddd %xmm1,%xmm0 1508 pxor %xmm0,%xmm3 1509.byte 102,15,56,0,223 1510 paddd %xmm3,%xmm2 1511 pxor %xmm2,%xmm1 1512 movdqa %xmm1,%xmm4 1513 psrld $25,%xmm1 1514 pslld $7,%xmm4 1515 por %xmm4,%xmm1 1516 pshufd $78,%xmm2,%xmm2 1517 pshufd $147,%xmm1,%xmm1 1518 pshufd $57,%xmm3,%xmm3 1519 decl %edx 1520 jnz .L009loop1x 1521 paddd (%esp),%xmm0 1522 paddd 16(%esp),%xmm1 1523 paddd 32(%esp),%xmm2 1524 paddd 48(%esp),%xmm3 1525 cmpl $64,%ecx 1526 jb .L011tail 1527 movdqu (%esi),%xmm4 1528 movdqu 16(%esi),%xmm5 1529 pxor %xmm4,%xmm0 1530 movdqu 32(%esi),%xmm4 1531 pxor %xmm5,%xmm1 1532 movdqu 48(%esi),%xmm5 1533 pxor %xmm4,%xmm2 1534 pxor %xmm5,%xmm3 1535 leal 64(%esi),%esi 1536 movdqu %xmm0,(%edi) 1537 movdqu %xmm1,16(%edi) 1538 movdqu %xmm2,32(%edi) 1539 movdqu %xmm3,48(%edi) 1540 leal 64(%edi),%edi 1541 subl $64,%ecx 1542 jnz .L010outer1x 1543 jmp .L012done 1544.L011tail: 1545 movdqa %xmm0,(%esp) 1546 movdqa %xmm1,16(%esp) 1547 movdqa %xmm2,32(%esp) 1548 movdqa %xmm3,48(%esp) 1549 xorl %eax,%eax 1550 xorl %edx,%edx 1551 xorl %ebp,%ebp 1552.L013tail_loop: 1553 movb (%esp,%ebp,1),%al 1554 movb (%esi,%ebp,1),%dl 1555 leal 1(%ebp),%ebp 1556 xorb %dl,%al 1557 movb %al,-1(%edi,%ebp,1) 1558 decl %ecx 1559 jnz .L013tail_loop 1560.L012done: 1561 movl 512(%esp),%esp 1562 popl %edi 1563 popl %esi 1564 popl %ebx 1565 popl %ebp 1566 ret 1567.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 1568.align 64 1569.Lssse3_data: 1570.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 1571.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 1572.long 1634760805,857760878,2036477234,1797285236 1573.long 0,1,2,3 1574.long 4,4,4,4 1575.long 1,0,0,0 1576.long 4,0,0,0 1577.long 0,-1,-1,-1 1578.align 64 1579.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 1580.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1581.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1582.byte 114,103,62,0 1583.globl ChaCha20_xop 1584.type ChaCha20_xop,@function 1585.align 16 1586ChaCha20_xop: 1587.L_ChaCha20_xop_begin: 1588 #ifdef __CET__ 1589 1590.byte 243,15,30,251 1591 #endif 1592 1593 pushl %ebp 1594 pushl %ebx 1595 pushl %esi 1596 pushl %edi 1597.Lxop_shortcut: 1598 movl 20(%esp),%edi 1599 movl 24(%esp),%esi 1600 movl 28(%esp),%ecx 1601 movl 32(%esp),%edx 1602 movl 36(%esp),%ebx 1603 vzeroupper 1604 movl %esp,%ebp 1605 subl $524,%esp 1606 andl $-64,%esp 1607 movl %ebp,512(%esp) 1608 leal .Lssse3_data-.Lpic_point(%eax),%eax 1609 vmovdqu (%ebx),%xmm3 1610 cmpl $256,%ecx 1611 jb .L0141x 1612 movl %edx,516(%esp) 1613 movl %ebx,520(%esp) 1614 subl $256,%ecx 1615 leal 384(%esp),%ebp 1616 vmovdqu (%edx),%xmm7 1617 vpshufd $0,%xmm3,%xmm0 1618 vpshufd $85,%xmm3,%xmm1 1619 vpshufd $170,%xmm3,%xmm2 1620 vpshufd $255,%xmm3,%xmm3 1621 vpaddd 48(%eax),%xmm0,%xmm0 1622 vpshufd $0,%xmm7,%xmm4 1623 vpshufd $85,%xmm7,%xmm5 1624 vpsubd 64(%eax),%xmm0,%xmm0 1625 vpshufd $170,%xmm7,%xmm6 1626 vpshufd $255,%xmm7,%xmm7 1627 vmovdqa %xmm0,64(%ebp) 1628 vmovdqa %xmm1,80(%ebp) 1629 vmovdqa %xmm2,96(%ebp) 1630 vmovdqa %xmm3,112(%ebp) 1631 vmovdqu 16(%edx),%xmm3 1632 vmovdqa %xmm4,-64(%ebp) 1633 vmovdqa %xmm5,-48(%ebp) 1634 vmovdqa %xmm6,-32(%ebp) 1635 vmovdqa %xmm7,-16(%ebp) 1636 vmovdqa 32(%eax),%xmm7 1637 leal 128(%esp),%ebx 1638 vpshufd $0,%xmm3,%xmm0 1639 vpshufd $85,%xmm3,%xmm1 1640 vpshufd $170,%xmm3,%xmm2 1641 vpshufd $255,%xmm3,%xmm3 1642 vpshufd $0,%xmm7,%xmm4 1643 vpshufd $85,%xmm7,%xmm5 1644 vpshufd $170,%xmm7,%xmm6 1645 vpshufd $255,%xmm7,%xmm7 1646 vmovdqa %xmm0,(%ebp) 1647 vmovdqa %xmm1,16(%ebp) 1648 vmovdqa %xmm2,32(%ebp) 1649 vmovdqa %xmm3,48(%ebp) 1650 vmovdqa %xmm4,-128(%ebp) 1651 vmovdqa %xmm5,-112(%ebp) 1652 vmovdqa %xmm6,-96(%ebp) 1653 vmovdqa %xmm7,-80(%ebp) 1654 leal 128(%esi),%esi 1655 leal 128(%edi),%edi 1656 jmp .L015outer_loop 1657.align 32 1658.L015outer_loop: 1659 vmovdqa -112(%ebp),%xmm1 1660 vmovdqa -96(%ebp),%xmm2 1661 vmovdqa -80(%ebp),%xmm3 1662 vmovdqa -48(%ebp),%xmm5 1663 vmovdqa -32(%ebp),%xmm6 1664 vmovdqa -16(%ebp),%xmm7 1665 vmovdqa %xmm1,-112(%ebx) 1666 vmovdqa %xmm2,-96(%ebx) 1667 vmovdqa %xmm3,-80(%ebx) 1668 vmovdqa %xmm5,-48(%ebx) 1669 vmovdqa %xmm6,-32(%ebx) 1670 vmovdqa %xmm7,-16(%ebx) 1671 vmovdqa 32(%ebp),%xmm2 1672 vmovdqa 48(%ebp),%xmm3 1673 vmovdqa 64(%ebp),%xmm4 1674 vmovdqa 80(%ebp),%xmm5 1675 vmovdqa 96(%ebp),%xmm6 1676 vmovdqa 112(%ebp),%xmm7 1677 vpaddd 64(%eax),%xmm4,%xmm4 1678 vmovdqa %xmm2,32(%ebx) 1679 vmovdqa %xmm3,48(%ebx) 1680 vmovdqa %xmm4,64(%ebx) 1681 vmovdqa %xmm5,80(%ebx) 1682 vmovdqa %xmm6,96(%ebx) 1683 vmovdqa %xmm7,112(%ebx) 1684 vmovdqa %xmm4,64(%ebp) 1685 vmovdqa -128(%ebp),%xmm0 1686 vmovdqa %xmm4,%xmm6 1687 vmovdqa -64(%ebp),%xmm3 1688 vmovdqa (%ebp),%xmm4 1689 vmovdqa 16(%ebp),%xmm5 1690 movl $10,%edx 1691 nop 1692.align 32 1693.L016loop: 1694 vpaddd %xmm3,%xmm0,%xmm0 1695 vpxor %xmm0,%xmm6,%xmm6 1696.byte 143,232,120,194,246,16 1697 vpaddd %xmm6,%xmm4,%xmm4 1698 vpxor %xmm4,%xmm3,%xmm2 1699 vmovdqa -112(%ebx),%xmm1 1700.byte 143,232,120,194,210,12 1701 vmovdqa -48(%ebx),%xmm3 1702 vpaddd %xmm2,%xmm0,%xmm0 1703 vmovdqa 80(%ebx),%xmm7 1704 vpxor %xmm0,%xmm6,%xmm6 1705 vpaddd %xmm3,%xmm1,%xmm1 1706.byte 143,232,120,194,246,8 1707 vmovdqa %xmm0,-128(%ebx) 1708 vpaddd %xmm6,%xmm4,%xmm4 1709 vmovdqa %xmm6,64(%ebx) 1710 vpxor %xmm4,%xmm2,%xmm2 1711 vpxor %xmm1,%xmm7,%xmm7 1712.byte 143,232,120,194,210,7 1713 vmovdqa %xmm4,(%ebx) 1714.byte 143,232,120,194,255,16 1715 vmovdqa %xmm2,-64(%ebx) 1716 vpaddd %xmm7,%xmm5,%xmm5 1717 vmovdqa 32(%ebx),%xmm4 1718 vpxor %xmm5,%xmm3,%xmm3 1719 vmovdqa -96(%ebx),%xmm0 1720.byte 143,232,120,194,219,12 1721 vmovdqa -32(%ebx),%xmm2 1722 vpaddd %xmm3,%xmm1,%xmm1 1723 vmovdqa 96(%ebx),%xmm6 1724 vpxor %xmm1,%xmm7,%xmm7 1725 vpaddd %xmm2,%xmm0,%xmm0 1726.byte 143,232,120,194,255,8 1727 vmovdqa %xmm1,-112(%ebx) 1728 vpaddd %xmm7,%xmm5,%xmm5 1729 vmovdqa %xmm7,80(%ebx) 1730 vpxor %xmm5,%xmm3,%xmm3 1731 vpxor %xmm0,%xmm6,%xmm6 1732.byte 143,232,120,194,219,7 1733 vmovdqa %xmm5,16(%ebx) 1734.byte 143,232,120,194,246,16 1735 vmovdqa %xmm3,-48(%ebx) 1736 vpaddd %xmm6,%xmm4,%xmm4 1737 vmovdqa 48(%ebx),%xmm5 1738 vpxor %xmm4,%xmm2,%xmm2 1739 vmovdqa -80(%ebx),%xmm1 1740.byte 143,232,120,194,210,12 1741 vmovdqa -16(%ebx),%xmm3 1742 vpaddd %xmm2,%xmm0,%xmm0 1743 vmovdqa 112(%ebx),%xmm7 1744 vpxor %xmm0,%xmm6,%xmm6 1745 vpaddd %xmm3,%xmm1,%xmm1 1746.byte 143,232,120,194,246,8 1747 vmovdqa %xmm0,-96(%ebx) 1748 vpaddd %xmm6,%xmm4,%xmm4 1749 vmovdqa %xmm6,96(%ebx) 1750 vpxor %xmm4,%xmm2,%xmm2 1751 vpxor %xmm1,%xmm7,%xmm7 1752.byte 143,232,120,194,210,7 1753.byte 143,232,120,194,255,16 1754 vmovdqa %xmm2,-32(%ebx) 1755 vpaddd %xmm7,%xmm5,%xmm5 1756 vpxor %xmm5,%xmm3,%xmm3 1757 vmovdqa -128(%ebx),%xmm0 1758.byte 143,232,120,194,219,12 1759 vmovdqa -48(%ebx),%xmm2 1760 vpaddd %xmm3,%xmm1,%xmm1 1761 vpxor %xmm1,%xmm7,%xmm7 1762 vpaddd %xmm2,%xmm0,%xmm0 1763.byte 143,232,120,194,255,8 1764 vmovdqa %xmm1,-80(%ebx) 1765 vpaddd %xmm7,%xmm5,%xmm5 1766 vpxor %xmm5,%xmm3,%xmm3 1767 vpxor %xmm0,%xmm7,%xmm6 1768.byte 143,232,120,194,219,7 1769.byte 143,232,120,194,246,16 1770 vmovdqa %xmm3,-16(%ebx) 1771 vpaddd %xmm6,%xmm4,%xmm4 1772 vpxor %xmm4,%xmm2,%xmm2 1773 vmovdqa -112(%ebx),%xmm1 1774.byte 143,232,120,194,210,12 1775 vmovdqa -32(%ebx),%xmm3 1776 vpaddd %xmm2,%xmm0,%xmm0 1777 vmovdqa 64(%ebx),%xmm7 1778 vpxor %xmm0,%xmm6,%xmm6 1779 vpaddd %xmm3,%xmm1,%xmm1 1780.byte 143,232,120,194,246,8 1781 vmovdqa %xmm0,-128(%ebx) 1782 vpaddd %xmm6,%xmm4,%xmm4 1783 vmovdqa %xmm6,112(%ebx) 1784 vpxor %xmm4,%xmm2,%xmm2 1785 vpxor %xmm1,%xmm7,%xmm7 1786.byte 143,232,120,194,210,7 1787 vmovdqa %xmm4,32(%ebx) 1788.byte 143,232,120,194,255,16 1789 vmovdqa %xmm2,-48(%ebx) 1790 vpaddd %xmm7,%xmm5,%xmm5 1791 vmovdqa (%ebx),%xmm4 1792 vpxor %xmm5,%xmm3,%xmm3 1793 vmovdqa -96(%ebx),%xmm0 1794.byte 143,232,120,194,219,12 1795 vmovdqa -16(%ebx),%xmm2 1796 vpaddd %xmm3,%xmm1,%xmm1 1797 vmovdqa 80(%ebx),%xmm6 1798 vpxor %xmm1,%xmm7,%xmm7 1799 vpaddd %xmm2,%xmm0,%xmm0 1800.byte 143,232,120,194,255,8 1801 vmovdqa %xmm1,-112(%ebx) 1802 vpaddd %xmm7,%xmm5,%xmm5 1803 vmovdqa %xmm7,64(%ebx) 1804 vpxor %xmm5,%xmm3,%xmm3 1805 vpxor %xmm0,%xmm6,%xmm6 1806.byte 143,232,120,194,219,7 1807 vmovdqa %xmm5,48(%ebx) 1808.byte 143,232,120,194,246,16 1809 vmovdqa %xmm3,-32(%ebx) 1810 vpaddd %xmm6,%xmm4,%xmm4 1811 vmovdqa 16(%ebx),%xmm5 1812 vpxor %xmm4,%xmm2,%xmm2 1813 vmovdqa -80(%ebx),%xmm1 1814.byte 143,232,120,194,210,12 1815 vmovdqa -64(%ebx),%xmm3 1816 vpaddd %xmm2,%xmm0,%xmm0 1817 vmovdqa 96(%ebx),%xmm7 1818 vpxor %xmm0,%xmm6,%xmm6 1819 vpaddd %xmm3,%xmm1,%xmm1 1820.byte 143,232,120,194,246,8 1821 vmovdqa %xmm0,-96(%ebx) 1822 vpaddd %xmm6,%xmm4,%xmm4 1823 vmovdqa %xmm6,80(%ebx) 1824 vpxor %xmm4,%xmm2,%xmm2 1825 vpxor %xmm1,%xmm7,%xmm7 1826.byte 143,232,120,194,210,7 1827.byte 143,232,120,194,255,16 1828 vmovdqa %xmm2,-16(%ebx) 1829 vpaddd %xmm7,%xmm5,%xmm5 1830 vpxor %xmm5,%xmm3,%xmm3 1831 vmovdqa -128(%ebx),%xmm0 1832.byte 143,232,120,194,219,12 1833 vpaddd %xmm3,%xmm1,%xmm1 1834 vmovdqa 64(%ebx),%xmm6 1835 vpxor %xmm1,%xmm7,%xmm7 1836.byte 143,232,120,194,255,8 1837 vmovdqa %xmm1,-80(%ebx) 1838 vpaddd %xmm7,%xmm5,%xmm5 1839 vmovdqa %xmm7,96(%ebx) 1840 vpxor %xmm5,%xmm3,%xmm3 1841.byte 143,232,120,194,219,7 1842 decl %edx 1843 jnz .L016loop 1844 vmovdqa %xmm3,-64(%ebx) 1845 vmovdqa %xmm4,(%ebx) 1846 vmovdqa %xmm5,16(%ebx) 1847 vmovdqa %xmm6,64(%ebx) 1848 vmovdqa %xmm7,96(%ebx) 1849 vmovdqa -112(%ebx),%xmm1 1850 vmovdqa -96(%ebx),%xmm2 1851 vmovdqa -80(%ebx),%xmm3 1852 vpaddd -128(%ebp),%xmm0,%xmm0 1853 vpaddd -112(%ebp),%xmm1,%xmm1 1854 vpaddd -96(%ebp),%xmm2,%xmm2 1855 vpaddd -80(%ebp),%xmm3,%xmm3 1856 vpunpckldq %xmm1,%xmm0,%xmm6 1857 vpunpckldq %xmm3,%xmm2,%xmm7 1858 vpunpckhdq %xmm1,%xmm0,%xmm0 1859 vpunpckhdq %xmm3,%xmm2,%xmm2 1860 vpunpcklqdq %xmm7,%xmm6,%xmm1 1861 vpunpckhqdq %xmm7,%xmm6,%xmm6 1862 vpunpcklqdq %xmm2,%xmm0,%xmm7 1863 vpunpckhqdq %xmm2,%xmm0,%xmm3 1864 vpxor -128(%esi),%xmm1,%xmm4 1865 vpxor -64(%esi),%xmm6,%xmm5 1866 vpxor (%esi),%xmm7,%xmm6 1867 vpxor 64(%esi),%xmm3,%xmm7 1868 leal 16(%esi),%esi 1869 vmovdqa -64(%ebx),%xmm0 1870 vmovdqa -48(%ebx),%xmm1 1871 vmovdqa -32(%ebx),%xmm2 1872 vmovdqa -16(%ebx),%xmm3 1873 vmovdqu %xmm4,-128(%edi) 1874 vmovdqu %xmm5,-64(%edi) 1875 vmovdqu %xmm6,(%edi) 1876 vmovdqu %xmm7,64(%edi) 1877 leal 16(%edi),%edi 1878 vpaddd -64(%ebp),%xmm0,%xmm0 1879 vpaddd -48(%ebp),%xmm1,%xmm1 1880 vpaddd -32(%ebp),%xmm2,%xmm2 1881 vpaddd -16(%ebp),%xmm3,%xmm3 1882 vpunpckldq %xmm1,%xmm0,%xmm6 1883 vpunpckldq %xmm3,%xmm2,%xmm7 1884 vpunpckhdq %xmm1,%xmm0,%xmm0 1885 vpunpckhdq %xmm3,%xmm2,%xmm2 1886 vpunpcklqdq %xmm7,%xmm6,%xmm1 1887 vpunpckhqdq %xmm7,%xmm6,%xmm6 1888 vpunpcklqdq %xmm2,%xmm0,%xmm7 1889 vpunpckhqdq %xmm2,%xmm0,%xmm3 1890 vpxor -128(%esi),%xmm1,%xmm4 1891 vpxor -64(%esi),%xmm6,%xmm5 1892 vpxor (%esi),%xmm7,%xmm6 1893 vpxor 64(%esi),%xmm3,%xmm7 1894 leal 16(%esi),%esi 1895 vmovdqa (%ebx),%xmm0 1896 vmovdqa 16(%ebx),%xmm1 1897 vmovdqa 32(%ebx),%xmm2 1898 vmovdqa 48(%ebx),%xmm3 1899 vmovdqu %xmm4,-128(%edi) 1900 vmovdqu %xmm5,-64(%edi) 1901 vmovdqu %xmm6,(%edi) 1902 vmovdqu %xmm7,64(%edi) 1903 leal 16(%edi),%edi 1904 vpaddd (%ebp),%xmm0,%xmm0 1905 vpaddd 16(%ebp),%xmm1,%xmm1 1906 vpaddd 32(%ebp),%xmm2,%xmm2 1907 vpaddd 48(%ebp),%xmm3,%xmm3 1908 vpunpckldq %xmm1,%xmm0,%xmm6 1909 vpunpckldq %xmm3,%xmm2,%xmm7 1910 vpunpckhdq %xmm1,%xmm0,%xmm0 1911 vpunpckhdq %xmm3,%xmm2,%xmm2 1912 vpunpcklqdq %xmm7,%xmm6,%xmm1 1913 vpunpckhqdq %xmm7,%xmm6,%xmm6 1914 vpunpcklqdq %xmm2,%xmm0,%xmm7 1915 vpunpckhqdq %xmm2,%xmm0,%xmm3 1916 vpxor -128(%esi),%xmm1,%xmm4 1917 vpxor -64(%esi),%xmm6,%xmm5 1918 vpxor (%esi),%xmm7,%xmm6 1919 vpxor 64(%esi),%xmm3,%xmm7 1920 leal 16(%esi),%esi 1921 vmovdqa 64(%ebx),%xmm0 1922 vmovdqa 80(%ebx),%xmm1 1923 vmovdqa 96(%ebx),%xmm2 1924 vmovdqa 112(%ebx),%xmm3 1925 vmovdqu %xmm4,-128(%edi) 1926 vmovdqu %xmm5,-64(%edi) 1927 vmovdqu %xmm6,(%edi) 1928 vmovdqu %xmm7,64(%edi) 1929 leal 16(%edi),%edi 1930 vpaddd 64(%ebp),%xmm0,%xmm0 1931 vpaddd 80(%ebp),%xmm1,%xmm1 1932 vpaddd 96(%ebp),%xmm2,%xmm2 1933 vpaddd 112(%ebp),%xmm3,%xmm3 1934 vpunpckldq %xmm1,%xmm0,%xmm6 1935 vpunpckldq %xmm3,%xmm2,%xmm7 1936 vpunpckhdq %xmm1,%xmm0,%xmm0 1937 vpunpckhdq %xmm3,%xmm2,%xmm2 1938 vpunpcklqdq %xmm7,%xmm6,%xmm1 1939 vpunpckhqdq %xmm7,%xmm6,%xmm6 1940 vpunpcklqdq %xmm2,%xmm0,%xmm7 1941 vpunpckhqdq %xmm2,%xmm0,%xmm3 1942 vpxor -128(%esi),%xmm1,%xmm4 1943 vpxor -64(%esi),%xmm6,%xmm5 1944 vpxor (%esi),%xmm7,%xmm6 1945 vpxor 64(%esi),%xmm3,%xmm7 1946 leal 208(%esi),%esi 1947 vmovdqu %xmm4,-128(%edi) 1948 vmovdqu %xmm5,-64(%edi) 1949 vmovdqu %xmm6,(%edi) 1950 vmovdqu %xmm7,64(%edi) 1951 leal 208(%edi),%edi 1952 subl $256,%ecx 1953 jnc .L015outer_loop 1954 addl $256,%ecx 1955 jz .L017done 1956 movl 520(%esp),%ebx 1957 leal -128(%esi),%esi 1958 movl 516(%esp),%edx 1959 leal -128(%edi),%edi 1960 vmovd 64(%ebp),%xmm2 1961 vmovdqu (%ebx),%xmm3 1962 vpaddd 96(%eax),%xmm2,%xmm2 1963 vpand 112(%eax),%xmm3,%xmm3 1964 vpor %xmm2,%xmm3,%xmm3 1965.L0141x: 1966 vmovdqa 32(%eax),%xmm0 1967 vmovdqu (%edx),%xmm1 1968 vmovdqu 16(%edx),%xmm2 1969 vmovdqa (%eax),%xmm6 1970 vmovdqa 16(%eax),%xmm7 1971 movl %ebp,48(%esp) 1972 vmovdqa %xmm0,(%esp) 1973 vmovdqa %xmm1,16(%esp) 1974 vmovdqa %xmm2,32(%esp) 1975 vmovdqa %xmm3,48(%esp) 1976 movl $10,%edx 1977 jmp .L018loop1x 1978.align 16 1979.L019outer1x: 1980 vmovdqa 80(%eax),%xmm3 1981 vmovdqa (%esp),%xmm0 1982 vmovdqa 16(%esp),%xmm1 1983 vmovdqa 32(%esp),%xmm2 1984 vpaddd 48(%esp),%xmm3,%xmm3 1985 movl $10,%edx 1986 vmovdqa %xmm3,48(%esp) 1987 jmp .L018loop1x 1988.align 16 1989.L018loop1x: 1990 vpaddd %xmm1,%xmm0,%xmm0 1991 vpxor %xmm0,%xmm3,%xmm3 1992.byte 143,232,120,194,219,16 1993 vpaddd %xmm3,%xmm2,%xmm2 1994 vpxor %xmm2,%xmm1,%xmm1 1995.byte 143,232,120,194,201,12 1996 vpaddd %xmm1,%xmm0,%xmm0 1997 vpxor %xmm0,%xmm3,%xmm3 1998.byte 143,232,120,194,219,8 1999 vpaddd %xmm3,%xmm2,%xmm2 2000 vpxor %xmm2,%xmm1,%xmm1 2001.byte 143,232,120,194,201,7 2002 vpshufd $78,%xmm2,%xmm2 2003 vpshufd $57,%xmm1,%xmm1 2004 vpshufd $147,%xmm3,%xmm3 2005 vpaddd %xmm1,%xmm0,%xmm0 2006 vpxor %xmm0,%xmm3,%xmm3 2007.byte 143,232,120,194,219,16 2008 vpaddd %xmm3,%xmm2,%xmm2 2009 vpxor %xmm2,%xmm1,%xmm1 2010.byte 143,232,120,194,201,12 2011 vpaddd %xmm1,%xmm0,%xmm0 2012 vpxor %xmm0,%xmm3,%xmm3 2013.byte 143,232,120,194,219,8 2014 vpaddd %xmm3,%xmm2,%xmm2 2015 vpxor %xmm2,%xmm1,%xmm1 2016.byte 143,232,120,194,201,7 2017 vpshufd $78,%xmm2,%xmm2 2018 vpshufd $147,%xmm1,%xmm1 2019 vpshufd $57,%xmm3,%xmm3 2020 decl %edx 2021 jnz .L018loop1x 2022 vpaddd (%esp),%xmm0,%xmm0 2023 vpaddd 16(%esp),%xmm1,%xmm1 2024 vpaddd 32(%esp),%xmm2,%xmm2 2025 vpaddd 48(%esp),%xmm3,%xmm3 2026 cmpl $64,%ecx 2027 jb .L020tail 2028 vpxor (%esi),%xmm0,%xmm0 2029 vpxor 16(%esi),%xmm1,%xmm1 2030 vpxor 32(%esi),%xmm2,%xmm2 2031 vpxor 48(%esi),%xmm3,%xmm3 2032 leal 64(%esi),%esi 2033 vmovdqu %xmm0,(%edi) 2034 vmovdqu %xmm1,16(%edi) 2035 vmovdqu %xmm2,32(%edi) 2036 vmovdqu %xmm3,48(%edi) 2037 leal 64(%edi),%edi 2038 subl $64,%ecx 2039 jnz .L019outer1x 2040 jmp .L017done 2041.L020tail: 2042 vmovdqa %xmm0,(%esp) 2043 vmovdqa %xmm1,16(%esp) 2044 vmovdqa %xmm2,32(%esp) 2045 vmovdqa %xmm3,48(%esp) 2046 xorl %eax,%eax 2047 xorl %edx,%edx 2048 xorl %ebp,%ebp 2049.L021tail_loop: 2050 movb (%esp,%ebp,1),%al 2051 movb (%esi,%ebp,1),%dl 2052 leal 1(%ebp),%ebp 2053 xorb %dl,%al 2054 movb %al,-1(%edi,%ebp,1) 2055 decl %ecx 2056 jnz .L021tail_loop 2057.L017done: 2058 vzeroupper 2059 movl 512(%esp),%esp 2060 popl %edi 2061 popl %esi 2062 popl %ebx 2063 popl %ebp 2064 ret 2065.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 2066.comm OPENSSL_ia32cap_P,16,4 2067 2068 .section ".note.gnu.property", "a" 2069 .p2align 2 2070 .long 1f - 0f 2071 .long 4f - 1f 2072 .long 5 20730: 2074 .asciz "GNU" 20751: 2076 .p2align 2 2077 .long 0xc0000002 2078 .long 3f - 2f 20792: 2080 .long 3 20813: 2082 .p2align 2 20834: 2084#endif 2085