1/* Do not modify. This file is auto-generated from chacha-x86.pl. */ 2#ifdef PIC 3.text 4.globl ChaCha20_ctr32 5.type ChaCha20_ctr32,@function 6.align 16 7ChaCha20_ctr32: 8.L_ChaCha20_ctr32_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 xorl %eax,%eax 14 cmpl 28(%esp),%eax 15 je .L000no_data 16 call .Lpic_point 17.Lpic_point: 18 popl %eax 19 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 20 testl $16777216,(%ebp) 21 jz .L001x86 22 testl $512,4(%ebp) 23 jz .L001x86 24 jmp .Lssse3_shortcut 25.L001x86: 26 movl 32(%esp),%esi 27 movl 36(%esp),%edi 28 subl $132,%esp 29 movl (%esi),%eax 30 movl 4(%esi),%ebx 31 movl 8(%esi),%ecx 32 movl 12(%esi),%edx 33 movl %eax,80(%esp) 34 movl %ebx,84(%esp) 35 movl %ecx,88(%esp) 36 movl %edx,92(%esp) 37 movl 16(%esi),%eax 38 movl 20(%esi),%ebx 39 movl 24(%esi),%ecx 40 movl 28(%esi),%edx 41 movl %eax,96(%esp) 42 movl %ebx,100(%esp) 43 movl %ecx,104(%esp) 44 movl %edx,108(%esp) 45 movl (%edi),%eax 46 movl 4(%edi),%ebx 47 movl 8(%edi),%ecx 48 movl 12(%edi),%edx 49 subl $1,%eax 50 movl %eax,112(%esp) 51 movl %ebx,116(%esp) 52 movl %ecx,120(%esp) 53 movl %edx,124(%esp) 54 jmp .L002entry 55.align 16 56.L003outer_loop: 57 movl %ebx,156(%esp) 58 movl %eax,152(%esp) 59 movl %ecx,160(%esp) 60.L002entry: 61 movl $1634760805,%eax 62 movl $857760878,4(%esp) 63 movl $2036477234,8(%esp) 64 movl $1797285236,12(%esp) 65 movl 84(%esp),%ebx 66 movl 88(%esp),%ebp 67 movl 104(%esp),%ecx 68 movl 108(%esp),%esi 69 movl 116(%esp),%edx 70 movl 120(%esp),%edi 71 movl %ebx,20(%esp) 72 movl %ebp,24(%esp) 73 movl %ecx,40(%esp) 74 movl %esi,44(%esp) 75 movl %edx,52(%esp) 76 movl %edi,56(%esp) 77 movl 92(%esp),%ebx 78 movl 124(%esp),%edi 79 movl 112(%esp),%edx 80 movl 80(%esp),%ebp 81 movl 96(%esp),%ecx 82 movl 100(%esp),%esi 83 addl $1,%edx 84 movl %ebx,28(%esp) 85 movl %edi,60(%esp) 86 movl %edx,112(%esp) 87 movl $10,%ebx 88 jmp .L004loop 89.align 16 90.L004loop: 91 addl %ebp,%eax 92 movl %ebx,128(%esp) 93 movl %ebp,%ebx 94 xorl %eax,%edx 95 roll $16,%edx 96 addl %edx,%ecx 97 xorl %ecx,%ebx 98 movl 52(%esp),%edi 99 roll $12,%ebx 100 movl 20(%esp),%ebp 101 addl %ebx,%eax 102 xorl %eax,%edx 103 movl %eax,(%esp) 104 roll $8,%edx 105 movl 4(%esp),%eax 106 addl %edx,%ecx 107 movl %edx,48(%esp) 108 xorl %ecx,%ebx 109 addl %ebp,%eax 110 roll $7,%ebx 111 xorl %eax,%edi 112 movl %ecx,32(%esp) 113 roll $16,%edi 114 movl %ebx,16(%esp) 115 addl %edi,%esi 116 movl 40(%esp),%ecx 117 xorl %esi,%ebp 118 movl 56(%esp),%edx 119 roll $12,%ebp 120 movl 24(%esp),%ebx 121 addl %ebp,%eax 122 xorl %eax,%edi 123 movl %eax,4(%esp) 124 roll $8,%edi 125 movl 8(%esp),%eax 126 addl %edi,%esi 127 movl %edi,52(%esp) 128 xorl %esi,%ebp 129 addl %ebx,%eax 130 roll $7,%ebp 131 xorl %eax,%edx 132 movl %esi,36(%esp) 133 roll $16,%edx 134 movl %ebp,20(%esp) 135 addl %edx,%ecx 136 movl 44(%esp),%esi 137 xorl %ecx,%ebx 138 movl 60(%esp),%edi 139 roll $12,%ebx 140 movl 28(%esp),%ebp 141 addl %ebx,%eax 142 xorl %eax,%edx 143 movl %eax,8(%esp) 144 roll $8,%edx 145 movl 12(%esp),%eax 146 addl %edx,%ecx 147 movl %edx,56(%esp) 148 xorl %ecx,%ebx 149 addl %ebp,%eax 150 roll $7,%ebx 151 xorl %eax,%edi 152 roll $16,%edi 153 movl %ebx,24(%esp) 154 addl %edi,%esi 155 xorl %esi,%ebp 156 roll $12,%ebp 157 movl 20(%esp),%ebx 158 addl %ebp,%eax 159 xorl %eax,%edi 160 movl %eax,12(%esp) 161 roll $8,%edi 162 movl (%esp),%eax 163 addl %edi,%esi 164 movl %edi,%edx 165 xorl %esi,%ebp 166 addl %ebx,%eax 167 roll $7,%ebp 168 xorl %eax,%edx 169 roll $16,%edx 170 movl %ebp,28(%esp) 171 addl %edx,%ecx 172 xorl %ecx,%ebx 173 movl 48(%esp),%edi 174 roll $12,%ebx 175 movl 24(%esp),%ebp 176 addl %ebx,%eax 177 xorl %eax,%edx 178 movl %eax,(%esp) 179 roll $8,%edx 180 movl 4(%esp),%eax 181 addl %edx,%ecx 182 movl %edx,60(%esp) 183 xorl %ecx,%ebx 184 addl %ebp,%eax 185 roll $7,%ebx 186 xorl %eax,%edi 187 movl %ecx,40(%esp) 188 roll $16,%edi 189 movl %ebx,20(%esp) 190 addl %edi,%esi 191 movl 32(%esp),%ecx 192 xorl %esi,%ebp 193 movl 52(%esp),%edx 194 roll $12,%ebp 195 movl 28(%esp),%ebx 196 addl %ebp,%eax 197 xorl %eax,%edi 198 movl %eax,4(%esp) 199 roll $8,%edi 200 movl 8(%esp),%eax 201 addl %edi,%esi 202 movl %edi,48(%esp) 203 xorl %esi,%ebp 204 addl %ebx,%eax 205 roll $7,%ebp 206 xorl %eax,%edx 207 movl %esi,44(%esp) 208 roll $16,%edx 209 movl %ebp,24(%esp) 210 addl %edx,%ecx 211 movl 36(%esp),%esi 212 xorl %ecx,%ebx 213 movl 56(%esp),%edi 214 roll $12,%ebx 215 movl 16(%esp),%ebp 216 addl %ebx,%eax 217 xorl %eax,%edx 218 movl %eax,8(%esp) 219 roll $8,%edx 220 movl 12(%esp),%eax 221 addl %edx,%ecx 222 movl %edx,52(%esp) 223 xorl %ecx,%ebx 224 addl %ebp,%eax 225 roll $7,%ebx 226 xorl %eax,%edi 227 roll $16,%edi 228 movl %ebx,28(%esp) 229 addl %edi,%esi 230 xorl %esi,%ebp 231 movl 48(%esp),%edx 232 roll $12,%ebp 233 movl 128(%esp),%ebx 234 addl %ebp,%eax 235 xorl %eax,%edi 236 movl %eax,12(%esp) 237 roll $8,%edi 238 movl (%esp),%eax 239 addl %edi,%esi 240 movl %edi,56(%esp) 241 xorl %esi,%ebp 242 roll $7,%ebp 243 decl %ebx 244 jnz .L004loop 245 movl 160(%esp),%ebx 246 addl $1634760805,%eax 247 addl 80(%esp),%ebp 248 addl 96(%esp),%ecx 249 addl 100(%esp),%esi 250 cmpl $64,%ebx 251 jb .L005tail 252 movl 156(%esp),%ebx 253 addl 112(%esp),%edx 254 addl 120(%esp),%edi 255 xorl (%ebx),%eax 256 xorl 16(%ebx),%ebp 257 movl %eax,(%esp) 258 movl 152(%esp),%eax 259 xorl 32(%ebx),%ecx 260 xorl 36(%ebx),%esi 261 xorl 48(%ebx),%edx 262 xorl 56(%ebx),%edi 263 movl %ebp,16(%eax) 264 movl %ecx,32(%eax) 265 movl %esi,36(%eax) 266 movl %edx,48(%eax) 267 movl %edi,56(%eax) 268 movl 4(%esp),%ebp 269 movl 8(%esp),%ecx 270 movl 12(%esp),%esi 271 movl 20(%esp),%edx 272 movl 24(%esp),%edi 273 addl $857760878,%ebp 274 addl $2036477234,%ecx 275 addl $1797285236,%esi 276 addl 84(%esp),%edx 277 addl 88(%esp),%edi 278 xorl 4(%ebx),%ebp 279 xorl 8(%ebx),%ecx 280 xorl 12(%ebx),%esi 281 xorl 20(%ebx),%edx 282 xorl 24(%ebx),%edi 283 movl %ebp,4(%eax) 284 movl %ecx,8(%eax) 285 movl %esi,12(%eax) 286 movl %edx,20(%eax) 287 movl %edi,24(%eax) 288 movl 28(%esp),%ebp 289 movl 40(%esp),%ecx 290 movl 44(%esp),%esi 291 movl 52(%esp),%edx 292 movl 60(%esp),%edi 293 addl 92(%esp),%ebp 294 addl 104(%esp),%ecx 295 addl 108(%esp),%esi 296 addl 116(%esp),%edx 297 addl 124(%esp),%edi 298 xorl 28(%ebx),%ebp 299 xorl 40(%ebx),%ecx 300 xorl 44(%ebx),%esi 301 xorl 52(%ebx),%edx 302 xorl 60(%ebx),%edi 303 leal 64(%ebx),%ebx 304 movl %ebp,28(%eax) 305 movl (%esp),%ebp 306 movl %ecx,40(%eax) 307 movl 160(%esp),%ecx 308 movl %esi,44(%eax) 309 movl %edx,52(%eax) 310 movl %edi,60(%eax) 311 movl %ebp,(%eax) 312 leal 64(%eax),%eax 313 subl $64,%ecx 314 jnz .L003outer_loop 315 jmp .L006done 316.L005tail: 317 addl 112(%esp),%edx 318 addl 120(%esp),%edi 319 movl %eax,(%esp) 320 movl %ebp,16(%esp) 321 movl %ecx,32(%esp) 322 movl %esi,36(%esp) 323 movl %edx,48(%esp) 324 movl %edi,56(%esp) 325 movl 4(%esp),%ebp 326 movl 8(%esp),%ecx 327 movl 12(%esp),%esi 328 movl 20(%esp),%edx 329 movl 24(%esp),%edi 330 addl $857760878,%ebp 331 addl $2036477234,%ecx 332 addl $1797285236,%esi 333 addl 84(%esp),%edx 334 addl 88(%esp),%edi 335 movl %ebp,4(%esp) 336 movl %ecx,8(%esp) 337 movl %esi,12(%esp) 338 movl %edx,20(%esp) 339 movl %edi,24(%esp) 340 movl 28(%esp),%ebp 341 movl 40(%esp),%ecx 342 movl 44(%esp),%esi 343 movl 52(%esp),%edx 344 movl 60(%esp),%edi 345 addl 92(%esp),%ebp 346 addl 104(%esp),%ecx 347 addl 108(%esp),%esi 348 addl 116(%esp),%edx 349 addl 124(%esp),%edi 350 movl %ebp,28(%esp) 351 movl 156(%esp),%ebp 352 movl %ecx,40(%esp) 353 movl 152(%esp),%ecx 354 movl %esi,44(%esp) 355 xorl %esi,%esi 356 movl %edx,52(%esp) 357 movl %edi,60(%esp) 358 xorl %eax,%eax 359 xorl %edx,%edx 360.L007tail_loop: 361 movb (%esi,%ebp,1),%al 362 movb (%esp,%esi,1),%dl 363 leal 1(%esi),%esi 364 xorb %dl,%al 365 movb %al,-1(%ecx,%esi,1) 366 decl %ebx 367 jnz .L007tail_loop 368.L006done: 369 addl $132,%esp 370.L000no_data: 371 popl %edi 372 popl %esi 373 popl %ebx 374 popl %ebp 375 ret 376.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 377.globl ChaCha20_ssse3 378.type ChaCha20_ssse3,@function 379.align 16 380ChaCha20_ssse3: 381.L_ChaCha20_ssse3_begin: 382 pushl %ebp 383 pushl %ebx 384 pushl %esi 385 pushl %edi 386.Lssse3_shortcut: 387 testl $2048,4(%ebp) 388 jnz .Lxop_shortcut 389 movl 20(%esp),%edi 390 movl 24(%esp),%esi 391 movl 28(%esp),%ecx 392 movl 32(%esp),%edx 393 movl 36(%esp),%ebx 394 movl %esp,%ebp 395 subl $524,%esp 396 andl $-64,%esp 397 movl %ebp,512(%esp) 398 leal .Lssse3_data-.Lpic_point(%eax),%eax 399 movdqu (%ebx),%xmm3 400.L0081x: 401 movdqa 32(%eax),%xmm0 402 movdqu (%edx),%xmm1 403 movdqu 16(%edx),%xmm2 404 movdqa (%eax),%xmm6 405 movdqa 16(%eax),%xmm7 406 movl %ebp,48(%esp) 407 movdqa %xmm0,(%esp) 408 movdqa %xmm1,16(%esp) 409 movdqa %xmm2,32(%esp) 410 movdqa %xmm3,48(%esp) 411 movl $10,%edx 412 jmp .L009loop1x 413.align 16 414.L010outer1x: 415 movdqa 80(%eax),%xmm3 416 movdqa (%esp),%xmm0 417 movdqa 16(%esp),%xmm1 418 movdqa 32(%esp),%xmm2 419 paddd 48(%esp),%xmm3 420 movl $10,%edx 421 movdqa %xmm3,48(%esp) 422 jmp .L009loop1x 423.align 16 424.L009loop1x: 425 paddd %xmm1,%xmm0 426 pxor %xmm0,%xmm3 427.byte 102,15,56,0,222 428 paddd %xmm3,%xmm2 429 pxor %xmm2,%xmm1 430 movdqa %xmm1,%xmm4 431 psrld $20,%xmm1 432 pslld $12,%xmm4 433 por %xmm4,%xmm1 434 paddd %xmm1,%xmm0 435 pxor %xmm0,%xmm3 436.byte 102,15,56,0,223 437 paddd %xmm3,%xmm2 438 pxor %xmm2,%xmm1 439 movdqa %xmm1,%xmm4 440 psrld $25,%xmm1 441 pslld $7,%xmm4 442 por %xmm4,%xmm1 443 pshufd $78,%xmm2,%xmm2 444 pshufd $57,%xmm1,%xmm1 445 pshufd $147,%xmm3,%xmm3 446 nop 447 paddd %xmm1,%xmm0 448 pxor %xmm0,%xmm3 449.byte 102,15,56,0,222 450 paddd %xmm3,%xmm2 451 pxor %xmm2,%xmm1 452 movdqa %xmm1,%xmm4 453 psrld $20,%xmm1 454 pslld $12,%xmm4 455 por %xmm4,%xmm1 456 paddd %xmm1,%xmm0 457 pxor %xmm0,%xmm3 458.byte 102,15,56,0,223 459 paddd %xmm3,%xmm2 460 pxor %xmm2,%xmm1 461 movdqa %xmm1,%xmm4 462 psrld $25,%xmm1 463 pslld $7,%xmm4 464 por %xmm4,%xmm1 465 pshufd $78,%xmm2,%xmm2 466 pshufd $147,%xmm1,%xmm1 467 pshufd $57,%xmm3,%xmm3 468 decl %edx 469 jnz .L009loop1x 470 paddd (%esp),%xmm0 471 paddd 16(%esp),%xmm1 472 paddd 32(%esp),%xmm2 473 paddd 48(%esp),%xmm3 474 cmpl $64,%ecx 475 jb .L011tail 476 movdqu (%esi),%xmm4 477 movdqu 16(%esi),%xmm5 478 pxor %xmm4,%xmm0 479 movdqu 32(%esi),%xmm4 480 pxor %xmm5,%xmm1 481 movdqu 48(%esi),%xmm5 482 pxor %xmm4,%xmm2 483 pxor %xmm5,%xmm3 484 leal 64(%esi),%esi 485 movdqu %xmm0,(%edi) 486 movdqu %xmm1,16(%edi) 487 movdqu %xmm2,32(%edi) 488 movdqu %xmm3,48(%edi) 489 leal 64(%edi),%edi 490 subl $64,%ecx 491 jnz .L010outer1x 492 jmp .L012done 493.L011tail: 494 movdqa %xmm0,(%esp) 495 movdqa %xmm1,16(%esp) 496 movdqa %xmm2,32(%esp) 497 movdqa %xmm3,48(%esp) 498 xorl %eax,%eax 499 xorl %edx,%edx 500 xorl %ebp,%ebp 501.L013tail_loop: 502 movb (%esp,%ebp,1),%al 503 movb (%esi,%ebp,1),%dl 504 leal 1(%ebp),%ebp 505 xorb %dl,%al 506 movb %al,-1(%edi,%ebp,1) 507 decl %ecx 508 jnz .L013tail_loop 509.L012done: 510 movl 512(%esp),%esp 511 popl %edi 512 popl %esi 513 popl %ebx 514 popl %ebp 515 ret 516.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 517.align 64 518.Lssse3_data: 519.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 520.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 521.long 1634760805,857760878,2036477234,1797285236 522.long 0,1,2,3 523.long 4,4,4,4 524.long 1,0,0,0 525.long 4,0,0,0 526.long 0,-1,-1,-1 527.align 64 528.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 529.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 530.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 531.byte 114,103,62,0 532.globl ChaCha20_xop 533.type ChaCha20_xop,@function 534.align 16 535ChaCha20_xop: 536.L_ChaCha20_xop_begin: 537 pushl %ebp 538 pushl %ebx 539 pushl %esi 540 pushl %edi 541.Lxop_shortcut: 542 movl 20(%esp),%edi 543 movl 24(%esp),%esi 544 movl 28(%esp),%ecx 545 movl 32(%esp),%edx 546 movl 36(%esp),%ebx 547 vzeroupper 548 movl %esp,%ebp 549 subl $524,%esp 550 andl $-64,%esp 551 movl %ebp,512(%esp) 552 leal .Lssse3_data-.Lpic_point(%eax),%eax 553 vmovdqu (%ebx),%xmm3 554 cmpl $256,%ecx 555 jb .L0141x 556 movl %edx,516(%esp) 557 movl %ebx,520(%esp) 558 subl $256,%ecx 559 leal 384(%esp),%ebp 560 vmovdqu (%edx),%xmm7 561 vpshufd $0,%xmm3,%xmm0 562 vpshufd $85,%xmm3,%xmm1 563 vpshufd $170,%xmm3,%xmm2 564 vpshufd $255,%xmm3,%xmm3 565 vpaddd 48(%eax),%xmm0,%xmm0 566 vpshufd $0,%xmm7,%xmm4 567 vpshufd $85,%xmm7,%xmm5 568 vpsubd 64(%eax),%xmm0,%xmm0 569 vpshufd $170,%xmm7,%xmm6 570 vpshufd $255,%xmm7,%xmm7 571 vmovdqa %xmm0,64(%ebp) 572 vmovdqa %xmm1,80(%ebp) 573 vmovdqa %xmm2,96(%ebp) 574 vmovdqa %xmm3,112(%ebp) 575 vmovdqu 16(%edx),%xmm3 576 vmovdqa %xmm4,-64(%ebp) 577 vmovdqa %xmm5,-48(%ebp) 578 vmovdqa %xmm6,-32(%ebp) 579 vmovdqa %xmm7,-16(%ebp) 580 vmovdqa 32(%eax),%xmm7 581 leal 128(%esp),%ebx 582 vpshufd $0,%xmm3,%xmm0 583 vpshufd $85,%xmm3,%xmm1 584 vpshufd $170,%xmm3,%xmm2 585 vpshufd $255,%xmm3,%xmm3 586 vpshufd $0,%xmm7,%xmm4 587 vpshufd $85,%xmm7,%xmm5 588 vpshufd $170,%xmm7,%xmm6 589 vpshufd $255,%xmm7,%xmm7 590 vmovdqa %xmm0,(%ebp) 591 vmovdqa %xmm1,16(%ebp) 592 vmovdqa %xmm2,32(%ebp) 593 vmovdqa %xmm3,48(%ebp) 594 vmovdqa %xmm4,-128(%ebp) 595 vmovdqa %xmm5,-112(%ebp) 596 vmovdqa %xmm6,-96(%ebp) 597 vmovdqa %xmm7,-80(%ebp) 598 leal 128(%esi),%esi 599 leal 128(%edi),%edi 600 jmp .L015outer_loop 601.align 32 602.L015outer_loop: 603 vmovdqa -112(%ebp),%xmm1 604 vmovdqa -96(%ebp),%xmm2 605 vmovdqa -80(%ebp),%xmm3 606 vmovdqa -48(%ebp),%xmm5 607 vmovdqa -32(%ebp),%xmm6 608 vmovdqa -16(%ebp),%xmm7 609 vmovdqa %xmm1,-112(%ebx) 610 vmovdqa %xmm2,-96(%ebx) 611 vmovdqa %xmm3,-80(%ebx) 612 vmovdqa %xmm5,-48(%ebx) 613 vmovdqa %xmm6,-32(%ebx) 614 vmovdqa %xmm7,-16(%ebx) 615 vmovdqa 32(%ebp),%xmm2 616 vmovdqa 48(%ebp),%xmm3 617 vmovdqa 64(%ebp),%xmm4 618 vmovdqa 80(%ebp),%xmm5 619 vmovdqa 96(%ebp),%xmm6 620 vmovdqa 112(%ebp),%xmm7 621 vpaddd 64(%eax),%xmm4,%xmm4 622 vmovdqa %xmm2,32(%ebx) 623 vmovdqa %xmm3,48(%ebx) 624 vmovdqa %xmm4,64(%ebx) 625 vmovdqa %xmm5,80(%ebx) 626 vmovdqa %xmm6,96(%ebx) 627 vmovdqa %xmm7,112(%ebx) 628 vmovdqa %xmm4,64(%ebp) 629 vmovdqa -128(%ebp),%xmm0 630 vmovdqa %xmm4,%xmm6 631 vmovdqa -64(%ebp),%xmm3 632 vmovdqa (%ebp),%xmm4 633 vmovdqa 16(%ebp),%xmm5 634 movl $10,%edx 635 nop 636.align 32 637.L016loop: 638 vpaddd %xmm3,%xmm0,%xmm0 639 vpxor %xmm0,%xmm6,%xmm6 640.byte 143,232,120,194,246,16 641 vpaddd %xmm6,%xmm4,%xmm4 642 vpxor %xmm4,%xmm3,%xmm2 643 vmovdqa -112(%ebx),%xmm1 644.byte 143,232,120,194,210,12 645 vmovdqa -48(%ebx),%xmm3 646 vpaddd %xmm2,%xmm0,%xmm0 647 vmovdqa 80(%ebx),%xmm7 648 vpxor %xmm0,%xmm6,%xmm6 649 vpaddd %xmm3,%xmm1,%xmm1 650.byte 143,232,120,194,246,8 651 vmovdqa %xmm0,-128(%ebx) 652 vpaddd %xmm6,%xmm4,%xmm4 653 vmovdqa %xmm6,64(%ebx) 654 vpxor %xmm4,%xmm2,%xmm2 655 vpxor %xmm1,%xmm7,%xmm7 656.byte 143,232,120,194,210,7 657 vmovdqa %xmm4,(%ebx) 658.byte 143,232,120,194,255,16 659 vmovdqa %xmm2,-64(%ebx) 660 vpaddd %xmm7,%xmm5,%xmm5 661 vmovdqa 32(%ebx),%xmm4 662 vpxor %xmm5,%xmm3,%xmm3 663 vmovdqa -96(%ebx),%xmm0 664.byte 143,232,120,194,219,12 665 vmovdqa -32(%ebx),%xmm2 666 vpaddd %xmm3,%xmm1,%xmm1 667 vmovdqa 96(%ebx),%xmm6 668 vpxor %xmm1,%xmm7,%xmm7 669 vpaddd %xmm2,%xmm0,%xmm0 670.byte 143,232,120,194,255,8 671 vmovdqa %xmm1,-112(%ebx) 672 vpaddd %xmm7,%xmm5,%xmm5 673 vmovdqa %xmm7,80(%ebx) 674 vpxor %xmm5,%xmm3,%xmm3 675 vpxor %xmm0,%xmm6,%xmm6 676.byte 143,232,120,194,219,7 677 vmovdqa %xmm5,16(%ebx) 678.byte 143,232,120,194,246,16 679 vmovdqa %xmm3,-48(%ebx) 680 vpaddd %xmm6,%xmm4,%xmm4 681 vmovdqa 48(%ebx),%xmm5 682 vpxor %xmm4,%xmm2,%xmm2 683 vmovdqa -80(%ebx),%xmm1 684.byte 143,232,120,194,210,12 685 vmovdqa -16(%ebx),%xmm3 686 vpaddd %xmm2,%xmm0,%xmm0 687 vmovdqa 112(%ebx),%xmm7 688 vpxor %xmm0,%xmm6,%xmm6 689 vpaddd %xmm3,%xmm1,%xmm1 690.byte 143,232,120,194,246,8 691 vmovdqa %xmm0,-96(%ebx) 692 vpaddd %xmm6,%xmm4,%xmm4 693 vmovdqa %xmm6,96(%ebx) 694 vpxor %xmm4,%xmm2,%xmm2 695 vpxor %xmm1,%xmm7,%xmm7 696.byte 143,232,120,194,210,7 697.byte 143,232,120,194,255,16 698 vmovdqa %xmm2,-32(%ebx) 699 vpaddd %xmm7,%xmm5,%xmm5 700 vpxor %xmm5,%xmm3,%xmm3 701 vmovdqa -128(%ebx),%xmm0 702.byte 143,232,120,194,219,12 703 vmovdqa -48(%ebx),%xmm2 704 vpaddd %xmm3,%xmm1,%xmm1 705 vpxor %xmm1,%xmm7,%xmm7 706 vpaddd %xmm2,%xmm0,%xmm0 707.byte 143,232,120,194,255,8 708 vmovdqa %xmm1,-80(%ebx) 709 vpaddd %xmm7,%xmm5,%xmm5 710 vpxor %xmm5,%xmm3,%xmm3 711 vpxor %xmm0,%xmm7,%xmm6 712.byte 143,232,120,194,219,7 713.byte 143,232,120,194,246,16 714 vmovdqa %xmm3,-16(%ebx) 715 vpaddd %xmm6,%xmm4,%xmm4 716 vpxor %xmm4,%xmm2,%xmm2 717 vmovdqa -112(%ebx),%xmm1 718.byte 143,232,120,194,210,12 719 vmovdqa -32(%ebx),%xmm3 720 vpaddd %xmm2,%xmm0,%xmm0 721 vmovdqa 64(%ebx),%xmm7 722 vpxor %xmm0,%xmm6,%xmm6 723 vpaddd %xmm3,%xmm1,%xmm1 724.byte 143,232,120,194,246,8 725 vmovdqa %xmm0,-128(%ebx) 726 vpaddd %xmm6,%xmm4,%xmm4 727 vmovdqa %xmm6,112(%ebx) 728 vpxor %xmm4,%xmm2,%xmm2 729 vpxor %xmm1,%xmm7,%xmm7 730.byte 143,232,120,194,210,7 731 vmovdqa %xmm4,32(%ebx) 732.byte 143,232,120,194,255,16 733 vmovdqa %xmm2,-48(%ebx) 734 vpaddd %xmm7,%xmm5,%xmm5 735 vmovdqa (%ebx),%xmm4 736 vpxor %xmm5,%xmm3,%xmm3 737 vmovdqa -96(%ebx),%xmm0 738.byte 143,232,120,194,219,12 739 vmovdqa -16(%ebx),%xmm2 740 vpaddd %xmm3,%xmm1,%xmm1 741 vmovdqa 80(%ebx),%xmm6 742 vpxor %xmm1,%xmm7,%xmm7 743 vpaddd %xmm2,%xmm0,%xmm0 744.byte 143,232,120,194,255,8 745 vmovdqa %xmm1,-112(%ebx) 746 vpaddd %xmm7,%xmm5,%xmm5 747 vmovdqa %xmm7,64(%ebx) 748 vpxor %xmm5,%xmm3,%xmm3 749 vpxor %xmm0,%xmm6,%xmm6 750.byte 143,232,120,194,219,7 751 vmovdqa %xmm5,48(%ebx) 752.byte 143,232,120,194,246,16 753 vmovdqa %xmm3,-32(%ebx) 754 vpaddd %xmm6,%xmm4,%xmm4 755 vmovdqa 16(%ebx),%xmm5 756 vpxor %xmm4,%xmm2,%xmm2 757 vmovdqa -80(%ebx),%xmm1 758.byte 143,232,120,194,210,12 759 vmovdqa -64(%ebx),%xmm3 760 vpaddd %xmm2,%xmm0,%xmm0 761 vmovdqa 96(%ebx),%xmm7 762 vpxor %xmm0,%xmm6,%xmm6 763 vpaddd %xmm3,%xmm1,%xmm1 764.byte 143,232,120,194,246,8 765 vmovdqa %xmm0,-96(%ebx) 766 vpaddd %xmm6,%xmm4,%xmm4 767 vmovdqa %xmm6,80(%ebx) 768 vpxor %xmm4,%xmm2,%xmm2 769 vpxor %xmm1,%xmm7,%xmm7 770.byte 143,232,120,194,210,7 771.byte 143,232,120,194,255,16 772 vmovdqa %xmm2,-16(%ebx) 773 vpaddd %xmm7,%xmm5,%xmm5 774 vpxor %xmm5,%xmm3,%xmm3 775 vmovdqa -128(%ebx),%xmm0 776.byte 143,232,120,194,219,12 777 vpaddd %xmm3,%xmm1,%xmm1 778 vmovdqa 64(%ebx),%xmm6 779 vpxor %xmm1,%xmm7,%xmm7 780.byte 143,232,120,194,255,8 781 vmovdqa %xmm1,-80(%ebx) 782 vpaddd %xmm7,%xmm5,%xmm5 783 vmovdqa %xmm7,96(%ebx) 784 vpxor %xmm5,%xmm3,%xmm3 785.byte 143,232,120,194,219,7 786 decl %edx 787 jnz .L016loop 788 vmovdqa %xmm3,-64(%ebx) 789 vmovdqa %xmm4,(%ebx) 790 vmovdqa %xmm5,16(%ebx) 791 vmovdqa %xmm6,64(%ebx) 792 vmovdqa %xmm7,96(%ebx) 793 vmovdqa -112(%ebx),%xmm1 794 vmovdqa -96(%ebx),%xmm2 795 vmovdqa -80(%ebx),%xmm3 796 vpaddd -128(%ebp),%xmm0,%xmm0 797 vpaddd -112(%ebp),%xmm1,%xmm1 798 vpaddd -96(%ebp),%xmm2,%xmm2 799 vpaddd -80(%ebp),%xmm3,%xmm3 800 vpunpckldq %xmm1,%xmm0,%xmm6 801 vpunpckldq %xmm3,%xmm2,%xmm7 802 vpunpckhdq %xmm1,%xmm0,%xmm0 803 vpunpckhdq %xmm3,%xmm2,%xmm2 804 vpunpcklqdq %xmm7,%xmm6,%xmm1 805 vpunpckhqdq %xmm7,%xmm6,%xmm6 806 vpunpcklqdq %xmm2,%xmm0,%xmm7 807 vpunpckhqdq %xmm2,%xmm0,%xmm3 808 vpxor -128(%esi),%xmm1,%xmm4 809 vpxor -64(%esi),%xmm6,%xmm5 810 vpxor (%esi),%xmm7,%xmm6 811 vpxor 64(%esi),%xmm3,%xmm7 812 leal 16(%esi),%esi 813 vmovdqa -64(%ebx),%xmm0 814 vmovdqa -48(%ebx),%xmm1 815 vmovdqa -32(%ebx),%xmm2 816 vmovdqa -16(%ebx),%xmm3 817 vmovdqu %xmm4,-128(%edi) 818 vmovdqu %xmm5,-64(%edi) 819 vmovdqu %xmm6,(%edi) 820 vmovdqu %xmm7,64(%edi) 821 leal 16(%edi),%edi 822 vpaddd -64(%ebp),%xmm0,%xmm0 823 vpaddd -48(%ebp),%xmm1,%xmm1 824 vpaddd -32(%ebp),%xmm2,%xmm2 825 vpaddd -16(%ebp),%xmm3,%xmm3 826 vpunpckldq %xmm1,%xmm0,%xmm6 827 vpunpckldq %xmm3,%xmm2,%xmm7 828 vpunpckhdq %xmm1,%xmm0,%xmm0 829 vpunpckhdq %xmm3,%xmm2,%xmm2 830 vpunpcklqdq %xmm7,%xmm6,%xmm1 831 vpunpckhqdq %xmm7,%xmm6,%xmm6 832 vpunpcklqdq %xmm2,%xmm0,%xmm7 833 vpunpckhqdq %xmm2,%xmm0,%xmm3 834 vpxor -128(%esi),%xmm1,%xmm4 835 vpxor -64(%esi),%xmm6,%xmm5 836 vpxor (%esi),%xmm7,%xmm6 837 vpxor 64(%esi),%xmm3,%xmm7 838 leal 16(%esi),%esi 839 vmovdqa (%ebx),%xmm0 840 vmovdqa 16(%ebx),%xmm1 841 vmovdqa 32(%ebx),%xmm2 842 vmovdqa 48(%ebx),%xmm3 843 vmovdqu %xmm4,-128(%edi) 844 vmovdqu %xmm5,-64(%edi) 845 vmovdqu %xmm6,(%edi) 846 vmovdqu %xmm7,64(%edi) 847 leal 16(%edi),%edi 848 vpaddd (%ebp),%xmm0,%xmm0 849 vpaddd 16(%ebp),%xmm1,%xmm1 850 vpaddd 32(%ebp),%xmm2,%xmm2 851 vpaddd 48(%ebp),%xmm3,%xmm3 852 vpunpckldq %xmm1,%xmm0,%xmm6 853 vpunpckldq %xmm3,%xmm2,%xmm7 854 vpunpckhdq %xmm1,%xmm0,%xmm0 855 vpunpckhdq %xmm3,%xmm2,%xmm2 856 vpunpcklqdq %xmm7,%xmm6,%xmm1 857 vpunpckhqdq %xmm7,%xmm6,%xmm6 858 vpunpcklqdq %xmm2,%xmm0,%xmm7 859 vpunpckhqdq %xmm2,%xmm0,%xmm3 860 vpxor -128(%esi),%xmm1,%xmm4 861 vpxor -64(%esi),%xmm6,%xmm5 862 vpxor (%esi),%xmm7,%xmm6 863 vpxor 64(%esi),%xmm3,%xmm7 864 leal 16(%esi),%esi 865 vmovdqa 64(%ebx),%xmm0 866 vmovdqa 80(%ebx),%xmm1 867 vmovdqa 96(%ebx),%xmm2 868 vmovdqa 112(%ebx),%xmm3 869 vmovdqu %xmm4,-128(%edi) 870 vmovdqu %xmm5,-64(%edi) 871 vmovdqu %xmm6,(%edi) 872 vmovdqu %xmm7,64(%edi) 873 leal 16(%edi),%edi 874 vpaddd 64(%ebp),%xmm0,%xmm0 875 vpaddd 80(%ebp),%xmm1,%xmm1 876 vpaddd 96(%ebp),%xmm2,%xmm2 877 vpaddd 112(%ebp),%xmm3,%xmm3 878 vpunpckldq %xmm1,%xmm0,%xmm6 879 vpunpckldq %xmm3,%xmm2,%xmm7 880 vpunpckhdq %xmm1,%xmm0,%xmm0 881 vpunpckhdq %xmm3,%xmm2,%xmm2 882 vpunpcklqdq %xmm7,%xmm6,%xmm1 883 vpunpckhqdq %xmm7,%xmm6,%xmm6 884 vpunpcklqdq %xmm2,%xmm0,%xmm7 885 vpunpckhqdq %xmm2,%xmm0,%xmm3 886 vpxor -128(%esi),%xmm1,%xmm4 887 vpxor -64(%esi),%xmm6,%xmm5 888 vpxor (%esi),%xmm7,%xmm6 889 vpxor 64(%esi),%xmm3,%xmm7 890 leal 208(%esi),%esi 891 vmovdqu %xmm4,-128(%edi) 892 vmovdqu %xmm5,-64(%edi) 893 vmovdqu %xmm6,(%edi) 894 vmovdqu %xmm7,64(%edi) 895 leal 208(%edi),%edi 896 subl $256,%ecx 897 jnc .L015outer_loop 898 addl $256,%ecx 899 jz .L017done 900 movl 520(%esp),%ebx 901 leal -128(%esi),%esi 902 movl 516(%esp),%edx 903 leal -128(%edi),%edi 904 vmovd 64(%ebp),%xmm2 905 vmovdqu (%ebx),%xmm3 906 vpaddd 96(%eax),%xmm2,%xmm2 907 vpand 112(%eax),%xmm3,%xmm3 908 vpor %xmm2,%xmm3,%xmm3 909.L0141x: 910 vmovdqa 32(%eax),%xmm0 911 vmovdqu (%edx),%xmm1 912 vmovdqu 16(%edx),%xmm2 913 vmovdqa (%eax),%xmm6 914 vmovdqa 16(%eax),%xmm7 915 movl %ebp,48(%esp) 916 vmovdqa %xmm0,(%esp) 917 vmovdqa %xmm1,16(%esp) 918 vmovdqa %xmm2,32(%esp) 919 vmovdqa %xmm3,48(%esp) 920 movl $10,%edx 921 jmp .L018loop1x 922.align 16 923.L019outer1x: 924 vmovdqa 80(%eax),%xmm3 925 vmovdqa (%esp),%xmm0 926 vmovdqa 16(%esp),%xmm1 927 vmovdqa 32(%esp),%xmm2 928 vpaddd 48(%esp),%xmm3,%xmm3 929 movl $10,%edx 930 vmovdqa %xmm3,48(%esp) 931 jmp .L018loop1x 932.align 16 933.L018loop1x: 934 vpaddd %xmm1,%xmm0,%xmm0 935 vpxor %xmm0,%xmm3,%xmm3 936.byte 143,232,120,194,219,16 937 vpaddd %xmm3,%xmm2,%xmm2 938 vpxor %xmm2,%xmm1,%xmm1 939.byte 143,232,120,194,201,12 940 vpaddd %xmm1,%xmm0,%xmm0 941 vpxor %xmm0,%xmm3,%xmm3 942.byte 143,232,120,194,219,8 943 vpaddd %xmm3,%xmm2,%xmm2 944 vpxor %xmm2,%xmm1,%xmm1 945.byte 143,232,120,194,201,7 946 vpshufd $78,%xmm2,%xmm2 947 vpshufd $57,%xmm1,%xmm1 948 vpshufd $147,%xmm3,%xmm3 949 vpaddd %xmm1,%xmm0,%xmm0 950 vpxor %xmm0,%xmm3,%xmm3 951.byte 143,232,120,194,219,16 952 vpaddd %xmm3,%xmm2,%xmm2 953 vpxor %xmm2,%xmm1,%xmm1 954.byte 143,232,120,194,201,12 955 vpaddd %xmm1,%xmm0,%xmm0 956 vpxor %xmm0,%xmm3,%xmm3 957.byte 143,232,120,194,219,8 958 vpaddd %xmm3,%xmm2,%xmm2 959 vpxor %xmm2,%xmm1,%xmm1 960.byte 143,232,120,194,201,7 961 vpshufd $78,%xmm2,%xmm2 962 vpshufd $147,%xmm1,%xmm1 963 vpshufd $57,%xmm3,%xmm3 964 decl %edx 965 jnz .L018loop1x 966 vpaddd (%esp),%xmm0,%xmm0 967 vpaddd 16(%esp),%xmm1,%xmm1 968 vpaddd 32(%esp),%xmm2,%xmm2 969 vpaddd 48(%esp),%xmm3,%xmm3 970 cmpl $64,%ecx 971 jb .L020tail 972 vpxor (%esi),%xmm0,%xmm0 973 vpxor 16(%esi),%xmm1,%xmm1 974 vpxor 32(%esi),%xmm2,%xmm2 975 vpxor 48(%esi),%xmm3,%xmm3 976 leal 64(%esi),%esi 977 vmovdqu %xmm0,(%edi) 978 vmovdqu %xmm1,16(%edi) 979 vmovdqu %xmm2,32(%edi) 980 vmovdqu %xmm3,48(%edi) 981 leal 64(%edi),%edi 982 subl $64,%ecx 983 jnz .L019outer1x 984 jmp .L017done 985.L020tail: 986 vmovdqa %xmm0,(%esp) 987 vmovdqa %xmm1,16(%esp) 988 vmovdqa %xmm2,32(%esp) 989 vmovdqa %xmm3,48(%esp) 990 xorl %eax,%eax 991 xorl %edx,%edx 992 xorl %ebp,%ebp 993.L021tail_loop: 994 movb (%esp,%ebp,1),%al 995 movb (%esi,%ebp,1),%dl 996 leal 1(%ebp),%ebp 997 xorb %dl,%al 998 movb %al,-1(%edi,%ebp,1) 999 decl %ecx 1000 jnz .L021tail_loop 1001.L017done: 1002 vzeroupper 1003 movl 512(%esp),%esp 1004 popl %edi 1005 popl %esi 1006 popl %ebx 1007 popl %ebp 1008 ret 1009.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 1010.comm OPENSSL_ia32cap_P,16,4 1011#else 1012.text 1013.globl ChaCha20_ctr32 1014.type ChaCha20_ctr32,@function 1015.align 16 1016ChaCha20_ctr32: 1017.L_ChaCha20_ctr32_begin: 1018 pushl %ebp 1019 pushl %ebx 1020 pushl %esi 1021 pushl %edi 1022 xorl %eax,%eax 1023 cmpl 28(%esp),%eax 1024 je .L000no_data 1025 call .Lpic_point 1026.Lpic_point: 1027 popl %eax 1028 leal OPENSSL_ia32cap_P,%ebp 1029 testl $16777216,(%ebp) 1030 jz .L001x86 1031 testl $512,4(%ebp) 1032 jz .L001x86 1033 jmp .Lssse3_shortcut 1034.L001x86: 1035 movl 32(%esp),%esi 1036 movl 36(%esp),%edi 1037 subl $132,%esp 1038 movl (%esi),%eax 1039 movl 4(%esi),%ebx 1040 movl 8(%esi),%ecx 1041 movl 12(%esi),%edx 1042 movl %eax,80(%esp) 1043 movl %ebx,84(%esp) 1044 movl %ecx,88(%esp) 1045 movl %edx,92(%esp) 1046 movl 16(%esi),%eax 1047 movl 20(%esi),%ebx 1048 movl 24(%esi),%ecx 1049 movl 28(%esi),%edx 1050 movl %eax,96(%esp) 1051 movl %ebx,100(%esp) 1052 movl %ecx,104(%esp) 1053 movl %edx,108(%esp) 1054 movl (%edi),%eax 1055 movl 4(%edi),%ebx 1056 movl 8(%edi),%ecx 1057 movl 12(%edi),%edx 1058 subl $1,%eax 1059 movl %eax,112(%esp) 1060 movl %ebx,116(%esp) 1061 movl %ecx,120(%esp) 1062 movl %edx,124(%esp) 1063 jmp .L002entry 1064.align 16 1065.L003outer_loop: 1066 movl %ebx,156(%esp) 1067 movl %eax,152(%esp) 1068 movl %ecx,160(%esp) 1069.L002entry: 1070 movl $1634760805,%eax 1071 movl $857760878,4(%esp) 1072 movl $2036477234,8(%esp) 1073 movl $1797285236,12(%esp) 1074 movl 84(%esp),%ebx 1075 movl 88(%esp),%ebp 1076 movl 104(%esp),%ecx 1077 movl 108(%esp),%esi 1078 movl 116(%esp),%edx 1079 movl 120(%esp),%edi 1080 movl %ebx,20(%esp) 1081 movl %ebp,24(%esp) 1082 movl %ecx,40(%esp) 1083 movl %esi,44(%esp) 1084 movl %edx,52(%esp) 1085 movl %edi,56(%esp) 1086 movl 92(%esp),%ebx 1087 movl 124(%esp),%edi 1088 movl 112(%esp),%edx 1089 movl 80(%esp),%ebp 1090 movl 96(%esp),%ecx 1091 movl 100(%esp),%esi 1092 addl $1,%edx 1093 movl %ebx,28(%esp) 1094 movl %edi,60(%esp) 1095 movl %edx,112(%esp) 1096 movl $10,%ebx 1097 jmp .L004loop 1098.align 16 1099.L004loop: 1100 addl %ebp,%eax 1101 movl %ebx,128(%esp) 1102 movl %ebp,%ebx 1103 xorl %eax,%edx 1104 roll $16,%edx 1105 addl %edx,%ecx 1106 xorl %ecx,%ebx 1107 movl 52(%esp),%edi 1108 roll $12,%ebx 1109 movl 20(%esp),%ebp 1110 addl %ebx,%eax 1111 xorl %eax,%edx 1112 movl %eax,(%esp) 1113 roll $8,%edx 1114 movl 4(%esp),%eax 1115 addl %edx,%ecx 1116 movl %edx,48(%esp) 1117 xorl %ecx,%ebx 1118 addl %ebp,%eax 1119 roll $7,%ebx 1120 xorl %eax,%edi 1121 movl %ecx,32(%esp) 1122 roll $16,%edi 1123 movl %ebx,16(%esp) 1124 addl %edi,%esi 1125 movl 40(%esp),%ecx 1126 xorl %esi,%ebp 1127 movl 56(%esp),%edx 1128 roll $12,%ebp 1129 movl 24(%esp),%ebx 1130 addl %ebp,%eax 1131 xorl %eax,%edi 1132 movl %eax,4(%esp) 1133 roll $8,%edi 1134 movl 8(%esp),%eax 1135 addl %edi,%esi 1136 movl %edi,52(%esp) 1137 xorl %esi,%ebp 1138 addl %ebx,%eax 1139 roll $7,%ebp 1140 xorl %eax,%edx 1141 movl %esi,36(%esp) 1142 roll $16,%edx 1143 movl %ebp,20(%esp) 1144 addl %edx,%ecx 1145 movl 44(%esp),%esi 1146 xorl %ecx,%ebx 1147 movl 60(%esp),%edi 1148 roll $12,%ebx 1149 movl 28(%esp),%ebp 1150 addl %ebx,%eax 1151 xorl %eax,%edx 1152 movl %eax,8(%esp) 1153 roll $8,%edx 1154 movl 12(%esp),%eax 1155 addl %edx,%ecx 1156 movl %edx,56(%esp) 1157 xorl %ecx,%ebx 1158 addl %ebp,%eax 1159 roll $7,%ebx 1160 xorl %eax,%edi 1161 roll $16,%edi 1162 movl %ebx,24(%esp) 1163 addl %edi,%esi 1164 xorl %esi,%ebp 1165 roll $12,%ebp 1166 movl 20(%esp),%ebx 1167 addl %ebp,%eax 1168 xorl %eax,%edi 1169 movl %eax,12(%esp) 1170 roll $8,%edi 1171 movl (%esp),%eax 1172 addl %edi,%esi 1173 movl %edi,%edx 1174 xorl %esi,%ebp 1175 addl %ebx,%eax 1176 roll $7,%ebp 1177 xorl %eax,%edx 1178 roll $16,%edx 1179 movl %ebp,28(%esp) 1180 addl %edx,%ecx 1181 xorl %ecx,%ebx 1182 movl 48(%esp),%edi 1183 roll $12,%ebx 1184 movl 24(%esp),%ebp 1185 addl %ebx,%eax 1186 xorl %eax,%edx 1187 movl %eax,(%esp) 1188 roll $8,%edx 1189 movl 4(%esp),%eax 1190 addl %edx,%ecx 1191 movl %edx,60(%esp) 1192 xorl %ecx,%ebx 1193 addl %ebp,%eax 1194 roll $7,%ebx 1195 xorl %eax,%edi 1196 movl %ecx,40(%esp) 1197 roll $16,%edi 1198 movl %ebx,20(%esp) 1199 addl %edi,%esi 1200 movl 32(%esp),%ecx 1201 xorl %esi,%ebp 1202 movl 52(%esp),%edx 1203 roll $12,%ebp 1204 movl 28(%esp),%ebx 1205 addl %ebp,%eax 1206 xorl %eax,%edi 1207 movl %eax,4(%esp) 1208 roll $8,%edi 1209 movl 8(%esp),%eax 1210 addl %edi,%esi 1211 movl %edi,48(%esp) 1212 xorl %esi,%ebp 1213 addl %ebx,%eax 1214 roll $7,%ebp 1215 xorl %eax,%edx 1216 movl %esi,44(%esp) 1217 roll $16,%edx 1218 movl %ebp,24(%esp) 1219 addl %edx,%ecx 1220 movl 36(%esp),%esi 1221 xorl %ecx,%ebx 1222 movl 56(%esp),%edi 1223 roll $12,%ebx 1224 movl 16(%esp),%ebp 1225 addl %ebx,%eax 1226 xorl %eax,%edx 1227 movl %eax,8(%esp) 1228 roll $8,%edx 1229 movl 12(%esp),%eax 1230 addl %edx,%ecx 1231 movl %edx,52(%esp) 1232 xorl %ecx,%ebx 1233 addl %ebp,%eax 1234 roll $7,%ebx 1235 xorl %eax,%edi 1236 roll $16,%edi 1237 movl %ebx,28(%esp) 1238 addl %edi,%esi 1239 xorl %esi,%ebp 1240 movl 48(%esp),%edx 1241 roll $12,%ebp 1242 movl 128(%esp),%ebx 1243 addl %ebp,%eax 1244 xorl %eax,%edi 1245 movl %eax,12(%esp) 1246 roll $8,%edi 1247 movl (%esp),%eax 1248 addl %edi,%esi 1249 movl %edi,56(%esp) 1250 xorl %esi,%ebp 1251 roll $7,%ebp 1252 decl %ebx 1253 jnz .L004loop 1254 movl 160(%esp),%ebx 1255 addl $1634760805,%eax 1256 addl 80(%esp),%ebp 1257 addl 96(%esp),%ecx 1258 addl 100(%esp),%esi 1259 cmpl $64,%ebx 1260 jb .L005tail 1261 movl 156(%esp),%ebx 1262 addl 112(%esp),%edx 1263 addl 120(%esp),%edi 1264 xorl (%ebx),%eax 1265 xorl 16(%ebx),%ebp 1266 movl %eax,(%esp) 1267 movl 152(%esp),%eax 1268 xorl 32(%ebx),%ecx 1269 xorl 36(%ebx),%esi 1270 xorl 48(%ebx),%edx 1271 xorl 56(%ebx),%edi 1272 movl %ebp,16(%eax) 1273 movl %ecx,32(%eax) 1274 movl %esi,36(%eax) 1275 movl %edx,48(%eax) 1276 movl %edi,56(%eax) 1277 movl 4(%esp),%ebp 1278 movl 8(%esp),%ecx 1279 movl 12(%esp),%esi 1280 movl 20(%esp),%edx 1281 movl 24(%esp),%edi 1282 addl $857760878,%ebp 1283 addl $2036477234,%ecx 1284 addl $1797285236,%esi 1285 addl 84(%esp),%edx 1286 addl 88(%esp),%edi 1287 xorl 4(%ebx),%ebp 1288 xorl 8(%ebx),%ecx 1289 xorl 12(%ebx),%esi 1290 xorl 20(%ebx),%edx 1291 xorl 24(%ebx),%edi 1292 movl %ebp,4(%eax) 1293 movl %ecx,8(%eax) 1294 movl %esi,12(%eax) 1295 movl %edx,20(%eax) 1296 movl %edi,24(%eax) 1297 movl 28(%esp),%ebp 1298 movl 40(%esp),%ecx 1299 movl 44(%esp),%esi 1300 movl 52(%esp),%edx 1301 movl 60(%esp),%edi 1302 addl 92(%esp),%ebp 1303 addl 104(%esp),%ecx 1304 addl 108(%esp),%esi 1305 addl 116(%esp),%edx 1306 addl 124(%esp),%edi 1307 xorl 28(%ebx),%ebp 1308 xorl 40(%ebx),%ecx 1309 xorl 44(%ebx),%esi 1310 xorl 52(%ebx),%edx 1311 xorl 60(%ebx),%edi 1312 leal 64(%ebx),%ebx 1313 movl %ebp,28(%eax) 1314 movl (%esp),%ebp 1315 movl %ecx,40(%eax) 1316 movl 160(%esp),%ecx 1317 movl %esi,44(%eax) 1318 movl %edx,52(%eax) 1319 movl %edi,60(%eax) 1320 movl %ebp,(%eax) 1321 leal 64(%eax),%eax 1322 subl $64,%ecx 1323 jnz .L003outer_loop 1324 jmp .L006done 1325.L005tail: 1326 addl 112(%esp),%edx 1327 addl 120(%esp),%edi 1328 movl %eax,(%esp) 1329 movl %ebp,16(%esp) 1330 movl %ecx,32(%esp) 1331 movl %esi,36(%esp) 1332 movl %edx,48(%esp) 1333 movl %edi,56(%esp) 1334 movl 4(%esp),%ebp 1335 movl 8(%esp),%ecx 1336 movl 12(%esp),%esi 1337 movl 20(%esp),%edx 1338 movl 24(%esp),%edi 1339 addl $857760878,%ebp 1340 addl $2036477234,%ecx 1341 addl $1797285236,%esi 1342 addl 84(%esp),%edx 1343 addl 88(%esp),%edi 1344 movl %ebp,4(%esp) 1345 movl %ecx,8(%esp) 1346 movl %esi,12(%esp) 1347 movl %edx,20(%esp) 1348 movl %edi,24(%esp) 1349 movl 28(%esp),%ebp 1350 movl 40(%esp),%ecx 1351 movl 44(%esp),%esi 1352 movl 52(%esp),%edx 1353 movl 60(%esp),%edi 1354 addl 92(%esp),%ebp 1355 addl 104(%esp),%ecx 1356 addl 108(%esp),%esi 1357 addl 116(%esp),%edx 1358 addl 124(%esp),%edi 1359 movl %ebp,28(%esp) 1360 movl 156(%esp),%ebp 1361 movl %ecx,40(%esp) 1362 movl 152(%esp),%ecx 1363 movl %esi,44(%esp) 1364 xorl %esi,%esi 1365 movl %edx,52(%esp) 1366 movl %edi,60(%esp) 1367 xorl %eax,%eax 1368 xorl %edx,%edx 1369.L007tail_loop: 1370 movb (%esi,%ebp,1),%al 1371 movb (%esp,%esi,1),%dl 1372 leal 1(%esi),%esi 1373 xorb %dl,%al 1374 movb %al,-1(%ecx,%esi,1) 1375 decl %ebx 1376 jnz .L007tail_loop 1377.L006done: 1378 addl $132,%esp 1379.L000no_data: 1380 popl %edi 1381 popl %esi 1382 popl %ebx 1383 popl %ebp 1384 ret 1385.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 1386.globl ChaCha20_ssse3 1387.type ChaCha20_ssse3,@function 1388.align 16 1389ChaCha20_ssse3: 1390.L_ChaCha20_ssse3_begin: 1391 pushl %ebp 1392 pushl %ebx 1393 pushl %esi 1394 pushl %edi 1395.Lssse3_shortcut: 1396 testl $2048,4(%ebp) 1397 jnz .Lxop_shortcut 1398 movl 20(%esp),%edi 1399 movl 24(%esp),%esi 1400 movl 28(%esp),%ecx 1401 movl 32(%esp),%edx 1402 movl 36(%esp),%ebx 1403 movl %esp,%ebp 1404 subl $524,%esp 1405 andl $-64,%esp 1406 movl %ebp,512(%esp) 1407 leal .Lssse3_data-.Lpic_point(%eax),%eax 1408 movdqu (%ebx),%xmm3 1409.L0081x: 1410 movdqa 32(%eax),%xmm0 1411 movdqu (%edx),%xmm1 1412 movdqu 16(%edx),%xmm2 1413 movdqa (%eax),%xmm6 1414 movdqa 16(%eax),%xmm7 1415 movl %ebp,48(%esp) 1416 movdqa %xmm0,(%esp) 1417 movdqa %xmm1,16(%esp) 1418 movdqa %xmm2,32(%esp) 1419 movdqa %xmm3,48(%esp) 1420 movl $10,%edx 1421 jmp .L009loop1x 1422.align 16 1423.L010outer1x: 1424 movdqa 80(%eax),%xmm3 1425 movdqa (%esp),%xmm0 1426 movdqa 16(%esp),%xmm1 1427 movdqa 32(%esp),%xmm2 1428 paddd 48(%esp),%xmm3 1429 movl $10,%edx 1430 movdqa %xmm3,48(%esp) 1431 jmp .L009loop1x 1432.align 16 1433.L009loop1x: 1434 paddd %xmm1,%xmm0 1435 pxor %xmm0,%xmm3 1436.byte 102,15,56,0,222 1437 paddd %xmm3,%xmm2 1438 pxor %xmm2,%xmm1 1439 movdqa %xmm1,%xmm4 1440 psrld $20,%xmm1 1441 pslld $12,%xmm4 1442 por %xmm4,%xmm1 1443 paddd %xmm1,%xmm0 1444 pxor %xmm0,%xmm3 1445.byte 102,15,56,0,223 1446 paddd %xmm3,%xmm2 1447 pxor %xmm2,%xmm1 1448 movdqa %xmm1,%xmm4 1449 psrld $25,%xmm1 1450 pslld $7,%xmm4 1451 por %xmm4,%xmm1 1452 pshufd $78,%xmm2,%xmm2 1453 pshufd $57,%xmm1,%xmm1 1454 pshufd $147,%xmm3,%xmm3 1455 nop 1456 paddd %xmm1,%xmm0 1457 pxor %xmm0,%xmm3 1458.byte 102,15,56,0,222 1459 paddd %xmm3,%xmm2 1460 pxor %xmm2,%xmm1 1461 movdqa %xmm1,%xmm4 1462 psrld $20,%xmm1 1463 pslld $12,%xmm4 1464 por %xmm4,%xmm1 1465 paddd %xmm1,%xmm0 1466 pxor %xmm0,%xmm3 1467.byte 102,15,56,0,223 1468 paddd %xmm3,%xmm2 1469 pxor %xmm2,%xmm1 1470 movdqa %xmm1,%xmm4 1471 psrld $25,%xmm1 1472 pslld $7,%xmm4 1473 por %xmm4,%xmm1 1474 pshufd $78,%xmm2,%xmm2 1475 pshufd $147,%xmm1,%xmm1 1476 pshufd $57,%xmm3,%xmm3 1477 decl %edx 1478 jnz .L009loop1x 1479 paddd (%esp),%xmm0 1480 paddd 16(%esp),%xmm1 1481 paddd 32(%esp),%xmm2 1482 paddd 48(%esp),%xmm3 1483 cmpl $64,%ecx 1484 jb .L011tail 1485 movdqu (%esi),%xmm4 1486 movdqu 16(%esi),%xmm5 1487 pxor %xmm4,%xmm0 1488 movdqu 32(%esi),%xmm4 1489 pxor %xmm5,%xmm1 1490 movdqu 48(%esi),%xmm5 1491 pxor %xmm4,%xmm2 1492 pxor %xmm5,%xmm3 1493 leal 64(%esi),%esi 1494 movdqu %xmm0,(%edi) 1495 movdqu %xmm1,16(%edi) 1496 movdqu %xmm2,32(%edi) 1497 movdqu %xmm3,48(%edi) 1498 leal 64(%edi),%edi 1499 subl $64,%ecx 1500 jnz .L010outer1x 1501 jmp .L012done 1502.L011tail: 1503 movdqa %xmm0,(%esp) 1504 movdqa %xmm1,16(%esp) 1505 movdqa %xmm2,32(%esp) 1506 movdqa %xmm3,48(%esp) 1507 xorl %eax,%eax 1508 xorl %edx,%edx 1509 xorl %ebp,%ebp 1510.L013tail_loop: 1511 movb (%esp,%ebp,1),%al 1512 movb (%esi,%ebp,1),%dl 1513 leal 1(%ebp),%ebp 1514 xorb %dl,%al 1515 movb %al,-1(%edi,%ebp,1) 1516 decl %ecx 1517 jnz .L013tail_loop 1518.L012done: 1519 movl 512(%esp),%esp 1520 popl %edi 1521 popl %esi 1522 popl %ebx 1523 popl %ebp 1524 ret 1525.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 1526.align 64 1527.Lssse3_data: 1528.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 1529.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 1530.long 1634760805,857760878,2036477234,1797285236 1531.long 0,1,2,3 1532.long 4,4,4,4 1533.long 1,0,0,0 1534.long 4,0,0,0 1535.long 0,-1,-1,-1 1536.align 64 1537.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 1538.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1539.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1540.byte 114,103,62,0 1541.globl ChaCha20_xop 1542.type ChaCha20_xop,@function 1543.align 16 1544ChaCha20_xop: 1545.L_ChaCha20_xop_begin: 1546 pushl %ebp 1547 pushl %ebx 1548 pushl %esi 1549 pushl %edi 1550.Lxop_shortcut: 1551 movl 20(%esp),%edi 1552 movl 24(%esp),%esi 1553 movl 28(%esp),%ecx 1554 movl 32(%esp),%edx 1555 movl 36(%esp),%ebx 1556 vzeroupper 1557 movl %esp,%ebp 1558 subl $524,%esp 1559 andl $-64,%esp 1560 movl %ebp,512(%esp) 1561 leal .Lssse3_data-.Lpic_point(%eax),%eax 1562 vmovdqu (%ebx),%xmm3 1563 cmpl $256,%ecx 1564 jb .L0141x 1565 movl %edx,516(%esp) 1566 movl %ebx,520(%esp) 1567 subl $256,%ecx 1568 leal 384(%esp),%ebp 1569 vmovdqu (%edx),%xmm7 1570 vpshufd $0,%xmm3,%xmm0 1571 vpshufd $85,%xmm3,%xmm1 1572 vpshufd $170,%xmm3,%xmm2 1573 vpshufd $255,%xmm3,%xmm3 1574 vpaddd 48(%eax),%xmm0,%xmm0 1575 vpshufd $0,%xmm7,%xmm4 1576 vpshufd $85,%xmm7,%xmm5 1577 vpsubd 64(%eax),%xmm0,%xmm0 1578 vpshufd $170,%xmm7,%xmm6 1579 vpshufd $255,%xmm7,%xmm7 1580 vmovdqa %xmm0,64(%ebp) 1581 vmovdqa %xmm1,80(%ebp) 1582 vmovdqa %xmm2,96(%ebp) 1583 vmovdqa %xmm3,112(%ebp) 1584 vmovdqu 16(%edx),%xmm3 1585 vmovdqa %xmm4,-64(%ebp) 1586 vmovdqa %xmm5,-48(%ebp) 1587 vmovdqa %xmm6,-32(%ebp) 1588 vmovdqa %xmm7,-16(%ebp) 1589 vmovdqa 32(%eax),%xmm7 1590 leal 128(%esp),%ebx 1591 vpshufd $0,%xmm3,%xmm0 1592 vpshufd $85,%xmm3,%xmm1 1593 vpshufd $170,%xmm3,%xmm2 1594 vpshufd $255,%xmm3,%xmm3 1595 vpshufd $0,%xmm7,%xmm4 1596 vpshufd $85,%xmm7,%xmm5 1597 vpshufd $170,%xmm7,%xmm6 1598 vpshufd $255,%xmm7,%xmm7 1599 vmovdqa %xmm0,(%ebp) 1600 vmovdqa %xmm1,16(%ebp) 1601 vmovdqa %xmm2,32(%ebp) 1602 vmovdqa %xmm3,48(%ebp) 1603 vmovdqa %xmm4,-128(%ebp) 1604 vmovdqa %xmm5,-112(%ebp) 1605 vmovdqa %xmm6,-96(%ebp) 1606 vmovdqa %xmm7,-80(%ebp) 1607 leal 128(%esi),%esi 1608 leal 128(%edi),%edi 1609 jmp .L015outer_loop 1610.align 32 1611.L015outer_loop: 1612 vmovdqa -112(%ebp),%xmm1 1613 vmovdqa -96(%ebp),%xmm2 1614 vmovdqa -80(%ebp),%xmm3 1615 vmovdqa -48(%ebp),%xmm5 1616 vmovdqa -32(%ebp),%xmm6 1617 vmovdqa -16(%ebp),%xmm7 1618 vmovdqa %xmm1,-112(%ebx) 1619 vmovdqa %xmm2,-96(%ebx) 1620 vmovdqa %xmm3,-80(%ebx) 1621 vmovdqa %xmm5,-48(%ebx) 1622 vmovdqa %xmm6,-32(%ebx) 1623 vmovdqa %xmm7,-16(%ebx) 1624 vmovdqa 32(%ebp),%xmm2 1625 vmovdqa 48(%ebp),%xmm3 1626 vmovdqa 64(%ebp),%xmm4 1627 vmovdqa 80(%ebp),%xmm5 1628 vmovdqa 96(%ebp),%xmm6 1629 vmovdqa 112(%ebp),%xmm7 1630 vpaddd 64(%eax),%xmm4,%xmm4 1631 vmovdqa %xmm2,32(%ebx) 1632 vmovdqa %xmm3,48(%ebx) 1633 vmovdqa %xmm4,64(%ebx) 1634 vmovdqa %xmm5,80(%ebx) 1635 vmovdqa %xmm6,96(%ebx) 1636 vmovdqa %xmm7,112(%ebx) 1637 vmovdqa %xmm4,64(%ebp) 1638 vmovdqa -128(%ebp),%xmm0 1639 vmovdqa %xmm4,%xmm6 1640 vmovdqa -64(%ebp),%xmm3 1641 vmovdqa (%ebp),%xmm4 1642 vmovdqa 16(%ebp),%xmm5 1643 movl $10,%edx 1644 nop 1645.align 32 1646.L016loop: 1647 vpaddd %xmm3,%xmm0,%xmm0 1648 vpxor %xmm0,%xmm6,%xmm6 1649.byte 143,232,120,194,246,16 1650 vpaddd %xmm6,%xmm4,%xmm4 1651 vpxor %xmm4,%xmm3,%xmm2 1652 vmovdqa -112(%ebx),%xmm1 1653.byte 143,232,120,194,210,12 1654 vmovdqa -48(%ebx),%xmm3 1655 vpaddd %xmm2,%xmm0,%xmm0 1656 vmovdqa 80(%ebx),%xmm7 1657 vpxor %xmm0,%xmm6,%xmm6 1658 vpaddd %xmm3,%xmm1,%xmm1 1659.byte 143,232,120,194,246,8 1660 vmovdqa %xmm0,-128(%ebx) 1661 vpaddd %xmm6,%xmm4,%xmm4 1662 vmovdqa %xmm6,64(%ebx) 1663 vpxor %xmm4,%xmm2,%xmm2 1664 vpxor %xmm1,%xmm7,%xmm7 1665.byte 143,232,120,194,210,7 1666 vmovdqa %xmm4,(%ebx) 1667.byte 143,232,120,194,255,16 1668 vmovdqa %xmm2,-64(%ebx) 1669 vpaddd %xmm7,%xmm5,%xmm5 1670 vmovdqa 32(%ebx),%xmm4 1671 vpxor %xmm5,%xmm3,%xmm3 1672 vmovdqa -96(%ebx),%xmm0 1673.byte 143,232,120,194,219,12 1674 vmovdqa -32(%ebx),%xmm2 1675 vpaddd %xmm3,%xmm1,%xmm1 1676 vmovdqa 96(%ebx),%xmm6 1677 vpxor %xmm1,%xmm7,%xmm7 1678 vpaddd %xmm2,%xmm0,%xmm0 1679.byte 143,232,120,194,255,8 1680 vmovdqa %xmm1,-112(%ebx) 1681 vpaddd %xmm7,%xmm5,%xmm5 1682 vmovdqa %xmm7,80(%ebx) 1683 vpxor %xmm5,%xmm3,%xmm3 1684 vpxor %xmm0,%xmm6,%xmm6 1685.byte 143,232,120,194,219,7 1686 vmovdqa %xmm5,16(%ebx) 1687.byte 143,232,120,194,246,16 1688 vmovdqa %xmm3,-48(%ebx) 1689 vpaddd %xmm6,%xmm4,%xmm4 1690 vmovdqa 48(%ebx),%xmm5 1691 vpxor %xmm4,%xmm2,%xmm2 1692 vmovdqa -80(%ebx),%xmm1 1693.byte 143,232,120,194,210,12 1694 vmovdqa -16(%ebx),%xmm3 1695 vpaddd %xmm2,%xmm0,%xmm0 1696 vmovdqa 112(%ebx),%xmm7 1697 vpxor %xmm0,%xmm6,%xmm6 1698 vpaddd %xmm3,%xmm1,%xmm1 1699.byte 143,232,120,194,246,8 1700 vmovdqa %xmm0,-96(%ebx) 1701 vpaddd %xmm6,%xmm4,%xmm4 1702 vmovdqa %xmm6,96(%ebx) 1703 vpxor %xmm4,%xmm2,%xmm2 1704 vpxor %xmm1,%xmm7,%xmm7 1705.byte 143,232,120,194,210,7 1706.byte 143,232,120,194,255,16 1707 vmovdqa %xmm2,-32(%ebx) 1708 vpaddd %xmm7,%xmm5,%xmm5 1709 vpxor %xmm5,%xmm3,%xmm3 1710 vmovdqa -128(%ebx),%xmm0 1711.byte 143,232,120,194,219,12 1712 vmovdqa -48(%ebx),%xmm2 1713 vpaddd %xmm3,%xmm1,%xmm1 1714 vpxor %xmm1,%xmm7,%xmm7 1715 vpaddd %xmm2,%xmm0,%xmm0 1716.byte 143,232,120,194,255,8 1717 vmovdqa %xmm1,-80(%ebx) 1718 vpaddd %xmm7,%xmm5,%xmm5 1719 vpxor %xmm5,%xmm3,%xmm3 1720 vpxor %xmm0,%xmm7,%xmm6 1721.byte 143,232,120,194,219,7 1722.byte 143,232,120,194,246,16 1723 vmovdqa %xmm3,-16(%ebx) 1724 vpaddd %xmm6,%xmm4,%xmm4 1725 vpxor %xmm4,%xmm2,%xmm2 1726 vmovdqa -112(%ebx),%xmm1 1727.byte 143,232,120,194,210,12 1728 vmovdqa -32(%ebx),%xmm3 1729 vpaddd %xmm2,%xmm0,%xmm0 1730 vmovdqa 64(%ebx),%xmm7 1731 vpxor %xmm0,%xmm6,%xmm6 1732 vpaddd %xmm3,%xmm1,%xmm1 1733.byte 143,232,120,194,246,8 1734 vmovdqa %xmm0,-128(%ebx) 1735 vpaddd %xmm6,%xmm4,%xmm4 1736 vmovdqa %xmm6,112(%ebx) 1737 vpxor %xmm4,%xmm2,%xmm2 1738 vpxor %xmm1,%xmm7,%xmm7 1739.byte 143,232,120,194,210,7 1740 vmovdqa %xmm4,32(%ebx) 1741.byte 143,232,120,194,255,16 1742 vmovdqa %xmm2,-48(%ebx) 1743 vpaddd %xmm7,%xmm5,%xmm5 1744 vmovdqa (%ebx),%xmm4 1745 vpxor %xmm5,%xmm3,%xmm3 1746 vmovdqa -96(%ebx),%xmm0 1747.byte 143,232,120,194,219,12 1748 vmovdqa -16(%ebx),%xmm2 1749 vpaddd %xmm3,%xmm1,%xmm1 1750 vmovdqa 80(%ebx),%xmm6 1751 vpxor %xmm1,%xmm7,%xmm7 1752 vpaddd %xmm2,%xmm0,%xmm0 1753.byte 143,232,120,194,255,8 1754 vmovdqa %xmm1,-112(%ebx) 1755 vpaddd %xmm7,%xmm5,%xmm5 1756 vmovdqa %xmm7,64(%ebx) 1757 vpxor %xmm5,%xmm3,%xmm3 1758 vpxor %xmm0,%xmm6,%xmm6 1759.byte 143,232,120,194,219,7 1760 vmovdqa %xmm5,48(%ebx) 1761.byte 143,232,120,194,246,16 1762 vmovdqa %xmm3,-32(%ebx) 1763 vpaddd %xmm6,%xmm4,%xmm4 1764 vmovdqa 16(%ebx),%xmm5 1765 vpxor %xmm4,%xmm2,%xmm2 1766 vmovdqa -80(%ebx),%xmm1 1767.byte 143,232,120,194,210,12 1768 vmovdqa -64(%ebx),%xmm3 1769 vpaddd %xmm2,%xmm0,%xmm0 1770 vmovdqa 96(%ebx),%xmm7 1771 vpxor %xmm0,%xmm6,%xmm6 1772 vpaddd %xmm3,%xmm1,%xmm1 1773.byte 143,232,120,194,246,8 1774 vmovdqa %xmm0,-96(%ebx) 1775 vpaddd %xmm6,%xmm4,%xmm4 1776 vmovdqa %xmm6,80(%ebx) 1777 vpxor %xmm4,%xmm2,%xmm2 1778 vpxor %xmm1,%xmm7,%xmm7 1779.byte 143,232,120,194,210,7 1780.byte 143,232,120,194,255,16 1781 vmovdqa %xmm2,-16(%ebx) 1782 vpaddd %xmm7,%xmm5,%xmm5 1783 vpxor %xmm5,%xmm3,%xmm3 1784 vmovdqa -128(%ebx),%xmm0 1785.byte 143,232,120,194,219,12 1786 vpaddd %xmm3,%xmm1,%xmm1 1787 vmovdqa 64(%ebx),%xmm6 1788 vpxor %xmm1,%xmm7,%xmm7 1789.byte 143,232,120,194,255,8 1790 vmovdqa %xmm1,-80(%ebx) 1791 vpaddd %xmm7,%xmm5,%xmm5 1792 vmovdqa %xmm7,96(%ebx) 1793 vpxor %xmm5,%xmm3,%xmm3 1794.byte 143,232,120,194,219,7 1795 decl %edx 1796 jnz .L016loop 1797 vmovdqa %xmm3,-64(%ebx) 1798 vmovdqa %xmm4,(%ebx) 1799 vmovdqa %xmm5,16(%ebx) 1800 vmovdqa %xmm6,64(%ebx) 1801 vmovdqa %xmm7,96(%ebx) 1802 vmovdqa -112(%ebx),%xmm1 1803 vmovdqa -96(%ebx),%xmm2 1804 vmovdqa -80(%ebx),%xmm3 1805 vpaddd -128(%ebp),%xmm0,%xmm0 1806 vpaddd -112(%ebp),%xmm1,%xmm1 1807 vpaddd -96(%ebp),%xmm2,%xmm2 1808 vpaddd -80(%ebp),%xmm3,%xmm3 1809 vpunpckldq %xmm1,%xmm0,%xmm6 1810 vpunpckldq %xmm3,%xmm2,%xmm7 1811 vpunpckhdq %xmm1,%xmm0,%xmm0 1812 vpunpckhdq %xmm3,%xmm2,%xmm2 1813 vpunpcklqdq %xmm7,%xmm6,%xmm1 1814 vpunpckhqdq %xmm7,%xmm6,%xmm6 1815 vpunpcklqdq %xmm2,%xmm0,%xmm7 1816 vpunpckhqdq %xmm2,%xmm0,%xmm3 1817 vpxor -128(%esi),%xmm1,%xmm4 1818 vpxor -64(%esi),%xmm6,%xmm5 1819 vpxor (%esi),%xmm7,%xmm6 1820 vpxor 64(%esi),%xmm3,%xmm7 1821 leal 16(%esi),%esi 1822 vmovdqa -64(%ebx),%xmm0 1823 vmovdqa -48(%ebx),%xmm1 1824 vmovdqa -32(%ebx),%xmm2 1825 vmovdqa -16(%ebx),%xmm3 1826 vmovdqu %xmm4,-128(%edi) 1827 vmovdqu %xmm5,-64(%edi) 1828 vmovdqu %xmm6,(%edi) 1829 vmovdqu %xmm7,64(%edi) 1830 leal 16(%edi),%edi 1831 vpaddd -64(%ebp),%xmm0,%xmm0 1832 vpaddd -48(%ebp),%xmm1,%xmm1 1833 vpaddd -32(%ebp),%xmm2,%xmm2 1834 vpaddd -16(%ebp),%xmm3,%xmm3 1835 vpunpckldq %xmm1,%xmm0,%xmm6 1836 vpunpckldq %xmm3,%xmm2,%xmm7 1837 vpunpckhdq %xmm1,%xmm0,%xmm0 1838 vpunpckhdq %xmm3,%xmm2,%xmm2 1839 vpunpcklqdq %xmm7,%xmm6,%xmm1 1840 vpunpckhqdq %xmm7,%xmm6,%xmm6 1841 vpunpcklqdq %xmm2,%xmm0,%xmm7 1842 vpunpckhqdq %xmm2,%xmm0,%xmm3 1843 vpxor -128(%esi),%xmm1,%xmm4 1844 vpxor -64(%esi),%xmm6,%xmm5 1845 vpxor (%esi),%xmm7,%xmm6 1846 vpxor 64(%esi),%xmm3,%xmm7 1847 leal 16(%esi),%esi 1848 vmovdqa (%ebx),%xmm0 1849 vmovdqa 16(%ebx),%xmm1 1850 vmovdqa 32(%ebx),%xmm2 1851 vmovdqa 48(%ebx),%xmm3 1852 vmovdqu %xmm4,-128(%edi) 1853 vmovdqu %xmm5,-64(%edi) 1854 vmovdqu %xmm6,(%edi) 1855 vmovdqu %xmm7,64(%edi) 1856 leal 16(%edi),%edi 1857 vpaddd (%ebp),%xmm0,%xmm0 1858 vpaddd 16(%ebp),%xmm1,%xmm1 1859 vpaddd 32(%ebp),%xmm2,%xmm2 1860 vpaddd 48(%ebp),%xmm3,%xmm3 1861 vpunpckldq %xmm1,%xmm0,%xmm6 1862 vpunpckldq %xmm3,%xmm2,%xmm7 1863 vpunpckhdq %xmm1,%xmm0,%xmm0 1864 vpunpckhdq %xmm3,%xmm2,%xmm2 1865 vpunpcklqdq %xmm7,%xmm6,%xmm1 1866 vpunpckhqdq %xmm7,%xmm6,%xmm6 1867 vpunpcklqdq %xmm2,%xmm0,%xmm7 1868 vpunpckhqdq %xmm2,%xmm0,%xmm3 1869 vpxor -128(%esi),%xmm1,%xmm4 1870 vpxor -64(%esi),%xmm6,%xmm5 1871 vpxor (%esi),%xmm7,%xmm6 1872 vpxor 64(%esi),%xmm3,%xmm7 1873 leal 16(%esi),%esi 1874 vmovdqa 64(%ebx),%xmm0 1875 vmovdqa 80(%ebx),%xmm1 1876 vmovdqa 96(%ebx),%xmm2 1877 vmovdqa 112(%ebx),%xmm3 1878 vmovdqu %xmm4,-128(%edi) 1879 vmovdqu %xmm5,-64(%edi) 1880 vmovdqu %xmm6,(%edi) 1881 vmovdqu %xmm7,64(%edi) 1882 leal 16(%edi),%edi 1883 vpaddd 64(%ebp),%xmm0,%xmm0 1884 vpaddd 80(%ebp),%xmm1,%xmm1 1885 vpaddd 96(%ebp),%xmm2,%xmm2 1886 vpaddd 112(%ebp),%xmm3,%xmm3 1887 vpunpckldq %xmm1,%xmm0,%xmm6 1888 vpunpckldq %xmm3,%xmm2,%xmm7 1889 vpunpckhdq %xmm1,%xmm0,%xmm0 1890 vpunpckhdq %xmm3,%xmm2,%xmm2 1891 vpunpcklqdq %xmm7,%xmm6,%xmm1 1892 vpunpckhqdq %xmm7,%xmm6,%xmm6 1893 vpunpcklqdq %xmm2,%xmm0,%xmm7 1894 vpunpckhqdq %xmm2,%xmm0,%xmm3 1895 vpxor -128(%esi),%xmm1,%xmm4 1896 vpxor -64(%esi),%xmm6,%xmm5 1897 vpxor (%esi),%xmm7,%xmm6 1898 vpxor 64(%esi),%xmm3,%xmm7 1899 leal 208(%esi),%esi 1900 vmovdqu %xmm4,-128(%edi) 1901 vmovdqu %xmm5,-64(%edi) 1902 vmovdqu %xmm6,(%edi) 1903 vmovdqu %xmm7,64(%edi) 1904 leal 208(%edi),%edi 1905 subl $256,%ecx 1906 jnc .L015outer_loop 1907 addl $256,%ecx 1908 jz .L017done 1909 movl 520(%esp),%ebx 1910 leal -128(%esi),%esi 1911 movl 516(%esp),%edx 1912 leal -128(%edi),%edi 1913 vmovd 64(%ebp),%xmm2 1914 vmovdqu (%ebx),%xmm3 1915 vpaddd 96(%eax),%xmm2,%xmm2 1916 vpand 112(%eax),%xmm3,%xmm3 1917 vpor %xmm2,%xmm3,%xmm3 1918.L0141x: 1919 vmovdqa 32(%eax),%xmm0 1920 vmovdqu (%edx),%xmm1 1921 vmovdqu 16(%edx),%xmm2 1922 vmovdqa (%eax),%xmm6 1923 vmovdqa 16(%eax),%xmm7 1924 movl %ebp,48(%esp) 1925 vmovdqa %xmm0,(%esp) 1926 vmovdqa %xmm1,16(%esp) 1927 vmovdqa %xmm2,32(%esp) 1928 vmovdqa %xmm3,48(%esp) 1929 movl $10,%edx 1930 jmp .L018loop1x 1931.align 16 1932.L019outer1x: 1933 vmovdqa 80(%eax),%xmm3 1934 vmovdqa (%esp),%xmm0 1935 vmovdqa 16(%esp),%xmm1 1936 vmovdqa 32(%esp),%xmm2 1937 vpaddd 48(%esp),%xmm3,%xmm3 1938 movl $10,%edx 1939 vmovdqa %xmm3,48(%esp) 1940 jmp .L018loop1x 1941.align 16 1942.L018loop1x: 1943 vpaddd %xmm1,%xmm0,%xmm0 1944 vpxor %xmm0,%xmm3,%xmm3 1945.byte 143,232,120,194,219,16 1946 vpaddd %xmm3,%xmm2,%xmm2 1947 vpxor %xmm2,%xmm1,%xmm1 1948.byte 143,232,120,194,201,12 1949 vpaddd %xmm1,%xmm0,%xmm0 1950 vpxor %xmm0,%xmm3,%xmm3 1951.byte 143,232,120,194,219,8 1952 vpaddd %xmm3,%xmm2,%xmm2 1953 vpxor %xmm2,%xmm1,%xmm1 1954.byte 143,232,120,194,201,7 1955 vpshufd $78,%xmm2,%xmm2 1956 vpshufd $57,%xmm1,%xmm1 1957 vpshufd $147,%xmm3,%xmm3 1958 vpaddd %xmm1,%xmm0,%xmm0 1959 vpxor %xmm0,%xmm3,%xmm3 1960.byte 143,232,120,194,219,16 1961 vpaddd %xmm3,%xmm2,%xmm2 1962 vpxor %xmm2,%xmm1,%xmm1 1963.byte 143,232,120,194,201,12 1964 vpaddd %xmm1,%xmm0,%xmm0 1965 vpxor %xmm0,%xmm3,%xmm3 1966.byte 143,232,120,194,219,8 1967 vpaddd %xmm3,%xmm2,%xmm2 1968 vpxor %xmm2,%xmm1,%xmm1 1969.byte 143,232,120,194,201,7 1970 vpshufd $78,%xmm2,%xmm2 1971 vpshufd $147,%xmm1,%xmm1 1972 vpshufd $57,%xmm3,%xmm3 1973 decl %edx 1974 jnz .L018loop1x 1975 vpaddd (%esp),%xmm0,%xmm0 1976 vpaddd 16(%esp),%xmm1,%xmm1 1977 vpaddd 32(%esp),%xmm2,%xmm2 1978 vpaddd 48(%esp),%xmm3,%xmm3 1979 cmpl $64,%ecx 1980 jb .L020tail 1981 vpxor (%esi),%xmm0,%xmm0 1982 vpxor 16(%esi),%xmm1,%xmm1 1983 vpxor 32(%esi),%xmm2,%xmm2 1984 vpxor 48(%esi),%xmm3,%xmm3 1985 leal 64(%esi),%esi 1986 vmovdqu %xmm0,(%edi) 1987 vmovdqu %xmm1,16(%edi) 1988 vmovdqu %xmm2,32(%edi) 1989 vmovdqu %xmm3,48(%edi) 1990 leal 64(%edi),%edi 1991 subl $64,%ecx 1992 jnz .L019outer1x 1993 jmp .L017done 1994.L020tail: 1995 vmovdqa %xmm0,(%esp) 1996 vmovdqa %xmm1,16(%esp) 1997 vmovdqa %xmm2,32(%esp) 1998 vmovdqa %xmm3,48(%esp) 1999 xorl %eax,%eax 2000 xorl %edx,%edx 2001 xorl %ebp,%ebp 2002.L021tail_loop: 2003 movb (%esp,%ebp,1),%al 2004 movb (%esi,%ebp,1),%dl 2005 leal 1(%ebp),%ebp 2006 xorb %dl,%al 2007 movb %al,-1(%edi,%ebp,1) 2008 decl %ecx 2009 jnz .L021tail_loop 2010.L017done: 2011 vzeroupper 2012 movl 512(%esp),%esp 2013 popl %edi 2014 popl %esi 2015 popl %ebx 2016 popl %ebp 2017 ret 2018.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 2019.comm OPENSSL_ia32cap_P,16,4 2020#endif 2021