1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from chacha-x86.pl. */ 3#ifdef PIC 4.text 5.globl ChaCha20_ctr32 6.type ChaCha20_ctr32,@function 7.align 16 8ChaCha20_ctr32: 9.L_ChaCha20_ctr32_begin: 10 pushl %ebp 11 pushl %ebx 12 pushl %esi 13 pushl %edi 14 xorl %eax,%eax 15 cmpl 28(%esp),%eax 16 je .L000no_data 17 call .Lpic_point 18.Lpic_point: 19 popl %eax 20 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 21 testl $16777216,(%ebp) 22 jz .L001x86 23 testl $512,4(%ebp) 24 jz .L001x86 25 jmp .Lssse3_shortcut 26.L001x86: 27 movl 32(%esp),%esi 28 movl 36(%esp),%edi 29 subl $132,%esp 30 movl (%esi),%eax 31 movl 4(%esi),%ebx 32 movl 8(%esi),%ecx 33 movl 12(%esi),%edx 34 movl %eax,80(%esp) 35 movl %ebx,84(%esp) 36 movl %ecx,88(%esp) 37 movl %edx,92(%esp) 38 movl 16(%esi),%eax 39 movl 20(%esi),%ebx 40 movl 24(%esi),%ecx 41 movl 28(%esi),%edx 42 movl %eax,96(%esp) 43 movl %ebx,100(%esp) 44 movl %ecx,104(%esp) 45 movl %edx,108(%esp) 46 movl (%edi),%eax 47 movl 4(%edi),%ebx 48 movl 8(%edi),%ecx 49 movl 12(%edi),%edx 50 subl $1,%eax 51 movl %eax,112(%esp) 52 movl %ebx,116(%esp) 53 movl %ecx,120(%esp) 54 movl %edx,124(%esp) 55 jmp .L002entry 56.align 16 57.L003outer_loop: 58 movl %ebx,156(%esp) 59 movl %eax,152(%esp) 60 movl %ecx,160(%esp) 61.L002entry: 62 movl $1634760805,%eax 63 movl $857760878,4(%esp) 64 movl $2036477234,8(%esp) 65 movl $1797285236,12(%esp) 66 movl 84(%esp),%ebx 67 movl 88(%esp),%ebp 68 movl 104(%esp),%ecx 69 movl 108(%esp),%esi 70 movl 116(%esp),%edx 71 movl 120(%esp),%edi 72 movl %ebx,20(%esp) 73 movl %ebp,24(%esp) 74 movl %ecx,40(%esp) 75 movl %esi,44(%esp) 76 movl %edx,52(%esp) 77 movl %edi,56(%esp) 78 movl 92(%esp),%ebx 79 movl 124(%esp),%edi 80 movl 112(%esp),%edx 81 movl 80(%esp),%ebp 82 movl 96(%esp),%ecx 83 movl 100(%esp),%esi 84 addl $1,%edx 85 movl %ebx,28(%esp) 86 movl %edi,60(%esp) 87 movl %edx,112(%esp) 88 movl $10,%ebx 89 jmp .L004loop 90.align 16 91.L004loop: 92 addl %ebp,%eax 93 movl %ebx,128(%esp) 94 movl %ebp,%ebx 95 xorl %eax,%edx 96 roll $16,%edx 97 addl %edx,%ecx 98 xorl %ecx,%ebx 99 movl 52(%esp),%edi 100 roll $12,%ebx 101 movl 20(%esp),%ebp 102 addl %ebx,%eax 103 xorl %eax,%edx 104 movl %eax,(%esp) 105 roll $8,%edx 106 movl 4(%esp),%eax 107 addl %edx,%ecx 108 movl %edx,48(%esp) 109 xorl %ecx,%ebx 110 addl %ebp,%eax 111 roll $7,%ebx 112 xorl %eax,%edi 113 movl %ecx,32(%esp) 114 roll $16,%edi 115 movl %ebx,16(%esp) 116 addl %edi,%esi 117 movl 40(%esp),%ecx 118 xorl %esi,%ebp 119 movl 56(%esp),%edx 120 roll $12,%ebp 121 movl 24(%esp),%ebx 122 addl %ebp,%eax 123 xorl %eax,%edi 124 movl %eax,4(%esp) 125 roll $8,%edi 126 movl 8(%esp),%eax 127 addl %edi,%esi 128 movl %edi,52(%esp) 129 xorl %esi,%ebp 130 addl %ebx,%eax 131 roll $7,%ebp 132 xorl %eax,%edx 133 movl %esi,36(%esp) 134 roll $16,%edx 135 movl %ebp,20(%esp) 136 addl %edx,%ecx 137 movl 44(%esp),%esi 138 xorl %ecx,%ebx 139 movl 60(%esp),%edi 140 roll $12,%ebx 141 movl 28(%esp),%ebp 142 addl %ebx,%eax 143 xorl %eax,%edx 144 movl %eax,8(%esp) 145 roll $8,%edx 146 movl 12(%esp),%eax 147 addl %edx,%ecx 148 movl %edx,56(%esp) 149 xorl %ecx,%ebx 150 addl %ebp,%eax 151 roll $7,%ebx 152 xorl %eax,%edi 153 roll $16,%edi 154 movl %ebx,24(%esp) 155 addl %edi,%esi 156 xorl %esi,%ebp 157 roll $12,%ebp 158 movl 20(%esp),%ebx 159 addl %ebp,%eax 160 xorl %eax,%edi 161 movl %eax,12(%esp) 162 roll $8,%edi 163 movl (%esp),%eax 164 addl %edi,%esi 165 movl %edi,%edx 166 xorl %esi,%ebp 167 addl %ebx,%eax 168 roll $7,%ebp 169 xorl %eax,%edx 170 roll $16,%edx 171 movl %ebp,28(%esp) 172 addl %edx,%ecx 173 xorl %ecx,%ebx 174 movl 48(%esp),%edi 175 roll $12,%ebx 176 movl 24(%esp),%ebp 177 addl %ebx,%eax 178 xorl %eax,%edx 179 movl %eax,(%esp) 180 roll $8,%edx 181 movl 4(%esp),%eax 182 addl %edx,%ecx 183 movl %edx,60(%esp) 184 xorl %ecx,%ebx 185 addl %ebp,%eax 186 roll $7,%ebx 187 xorl %eax,%edi 188 movl %ecx,40(%esp) 189 roll $16,%edi 190 movl %ebx,20(%esp) 191 addl %edi,%esi 192 movl 32(%esp),%ecx 193 xorl %esi,%ebp 194 movl 52(%esp),%edx 195 roll $12,%ebp 196 movl 28(%esp),%ebx 197 addl %ebp,%eax 198 xorl %eax,%edi 199 movl %eax,4(%esp) 200 roll $8,%edi 201 movl 8(%esp),%eax 202 addl %edi,%esi 203 movl %edi,48(%esp) 204 xorl %esi,%ebp 205 addl %ebx,%eax 206 roll $7,%ebp 207 xorl %eax,%edx 208 movl %esi,44(%esp) 209 roll $16,%edx 210 movl %ebp,24(%esp) 211 addl %edx,%ecx 212 movl 36(%esp),%esi 213 xorl %ecx,%ebx 214 movl 56(%esp),%edi 215 roll $12,%ebx 216 movl 16(%esp),%ebp 217 addl %ebx,%eax 218 xorl %eax,%edx 219 movl %eax,8(%esp) 220 roll $8,%edx 221 movl 12(%esp),%eax 222 addl %edx,%ecx 223 movl %edx,52(%esp) 224 xorl %ecx,%ebx 225 addl %ebp,%eax 226 roll $7,%ebx 227 xorl %eax,%edi 228 roll $16,%edi 229 movl %ebx,28(%esp) 230 addl %edi,%esi 231 xorl %esi,%ebp 232 movl 48(%esp),%edx 233 roll $12,%ebp 234 movl 128(%esp),%ebx 235 addl %ebp,%eax 236 xorl %eax,%edi 237 movl %eax,12(%esp) 238 roll $8,%edi 239 movl (%esp),%eax 240 addl %edi,%esi 241 movl %edi,56(%esp) 242 xorl %esi,%ebp 243 roll $7,%ebp 244 decl %ebx 245 jnz .L004loop 246 movl 160(%esp),%ebx 247 addl $1634760805,%eax 248 addl 80(%esp),%ebp 249 addl 96(%esp),%ecx 250 addl 100(%esp),%esi 251 cmpl $64,%ebx 252 jb .L005tail 253 movl 156(%esp),%ebx 254 addl 112(%esp),%edx 255 addl 120(%esp),%edi 256 xorl (%ebx),%eax 257 xorl 16(%ebx),%ebp 258 movl %eax,(%esp) 259 movl 152(%esp),%eax 260 xorl 32(%ebx),%ecx 261 xorl 36(%ebx),%esi 262 xorl 48(%ebx),%edx 263 xorl 56(%ebx),%edi 264 movl %ebp,16(%eax) 265 movl %ecx,32(%eax) 266 movl %esi,36(%eax) 267 movl %edx,48(%eax) 268 movl %edi,56(%eax) 269 movl 4(%esp),%ebp 270 movl 8(%esp),%ecx 271 movl 12(%esp),%esi 272 movl 20(%esp),%edx 273 movl 24(%esp),%edi 274 addl $857760878,%ebp 275 addl $2036477234,%ecx 276 addl $1797285236,%esi 277 addl 84(%esp),%edx 278 addl 88(%esp),%edi 279 xorl 4(%ebx),%ebp 280 xorl 8(%ebx),%ecx 281 xorl 12(%ebx),%esi 282 xorl 20(%ebx),%edx 283 xorl 24(%ebx),%edi 284 movl %ebp,4(%eax) 285 movl %ecx,8(%eax) 286 movl %esi,12(%eax) 287 movl %edx,20(%eax) 288 movl %edi,24(%eax) 289 movl 28(%esp),%ebp 290 movl 40(%esp),%ecx 291 movl 44(%esp),%esi 292 movl 52(%esp),%edx 293 movl 60(%esp),%edi 294 addl 92(%esp),%ebp 295 addl 104(%esp),%ecx 296 addl 108(%esp),%esi 297 addl 116(%esp),%edx 298 addl 124(%esp),%edi 299 xorl 28(%ebx),%ebp 300 xorl 40(%ebx),%ecx 301 xorl 44(%ebx),%esi 302 xorl 52(%ebx),%edx 303 xorl 60(%ebx),%edi 304 leal 64(%ebx),%ebx 305 movl %ebp,28(%eax) 306 movl (%esp),%ebp 307 movl %ecx,40(%eax) 308 movl 160(%esp),%ecx 309 movl %esi,44(%eax) 310 movl %edx,52(%eax) 311 movl %edi,60(%eax) 312 movl %ebp,(%eax) 313 leal 64(%eax),%eax 314 subl $64,%ecx 315 jnz .L003outer_loop 316 jmp .L006done 317.L005tail: 318 addl 112(%esp),%edx 319 addl 120(%esp),%edi 320 movl %eax,(%esp) 321 movl %ebp,16(%esp) 322 movl %ecx,32(%esp) 323 movl %esi,36(%esp) 324 movl %edx,48(%esp) 325 movl %edi,56(%esp) 326 movl 4(%esp),%ebp 327 movl 8(%esp),%ecx 328 movl 12(%esp),%esi 329 movl 20(%esp),%edx 330 movl 24(%esp),%edi 331 addl $857760878,%ebp 332 addl $2036477234,%ecx 333 addl $1797285236,%esi 334 addl 84(%esp),%edx 335 addl 88(%esp),%edi 336 movl %ebp,4(%esp) 337 movl %ecx,8(%esp) 338 movl %esi,12(%esp) 339 movl %edx,20(%esp) 340 movl %edi,24(%esp) 341 movl 28(%esp),%ebp 342 movl 40(%esp),%ecx 343 movl 44(%esp),%esi 344 movl 52(%esp),%edx 345 movl 60(%esp),%edi 346 addl 92(%esp),%ebp 347 addl 104(%esp),%ecx 348 addl 108(%esp),%esi 349 addl 116(%esp),%edx 350 addl 124(%esp),%edi 351 movl %ebp,28(%esp) 352 movl 156(%esp),%ebp 353 movl %ecx,40(%esp) 354 movl 152(%esp),%ecx 355 movl %esi,44(%esp) 356 xorl %esi,%esi 357 movl %edx,52(%esp) 358 movl %edi,60(%esp) 359 xorl %eax,%eax 360 xorl %edx,%edx 361.L007tail_loop: 362 movb (%esi,%ebp,1),%al 363 movb (%esp,%esi,1),%dl 364 leal 1(%esi),%esi 365 xorb %dl,%al 366 movb %al,-1(%ecx,%esi,1) 367 decl %ebx 368 jnz .L007tail_loop 369.L006done: 370 addl $132,%esp 371.L000no_data: 372 popl %edi 373 popl %esi 374 popl %ebx 375 popl %ebp 376 ret 377.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 378.globl ChaCha20_ssse3 379.type ChaCha20_ssse3,@function 380.align 16 381ChaCha20_ssse3: 382.L_ChaCha20_ssse3_begin: 383 pushl %ebp 384 pushl %ebx 385 pushl %esi 386 pushl %edi 387.Lssse3_shortcut: 388 testl $2048,4(%ebp) 389 jnz .Lxop_shortcut 390 movl 20(%esp),%edi 391 movl 24(%esp),%esi 392 movl 28(%esp),%ecx 393 movl 32(%esp),%edx 394 movl 36(%esp),%ebx 395 movl %esp,%ebp 396 subl $524,%esp 397 andl $-64,%esp 398 movl %ebp,512(%esp) 399 leal .Lssse3_data-.Lpic_point(%eax),%eax 400 movdqu (%ebx),%xmm3 401.L0081x: 402 movdqa 32(%eax),%xmm0 403 movdqu (%edx),%xmm1 404 movdqu 16(%edx),%xmm2 405 movdqa (%eax),%xmm6 406 movdqa 16(%eax),%xmm7 407 movl %ebp,48(%esp) 408 movdqa %xmm0,(%esp) 409 movdqa %xmm1,16(%esp) 410 movdqa %xmm2,32(%esp) 411 movdqa %xmm3,48(%esp) 412 movl $10,%edx 413 jmp .L009loop1x 414.align 16 415.L010outer1x: 416 movdqa 80(%eax),%xmm3 417 movdqa (%esp),%xmm0 418 movdqa 16(%esp),%xmm1 419 movdqa 32(%esp),%xmm2 420 paddd 48(%esp),%xmm3 421 movl $10,%edx 422 movdqa %xmm3,48(%esp) 423 jmp .L009loop1x 424.align 16 425.L009loop1x: 426 paddd %xmm1,%xmm0 427 pxor %xmm0,%xmm3 428.byte 102,15,56,0,222 429 paddd %xmm3,%xmm2 430 pxor %xmm2,%xmm1 431 movdqa %xmm1,%xmm4 432 psrld $20,%xmm1 433 pslld $12,%xmm4 434 por %xmm4,%xmm1 435 paddd %xmm1,%xmm0 436 pxor %xmm0,%xmm3 437.byte 102,15,56,0,223 438 paddd %xmm3,%xmm2 439 pxor %xmm2,%xmm1 440 movdqa %xmm1,%xmm4 441 psrld $25,%xmm1 442 pslld $7,%xmm4 443 por %xmm4,%xmm1 444 pshufd $78,%xmm2,%xmm2 445 pshufd $57,%xmm1,%xmm1 446 pshufd $147,%xmm3,%xmm3 447 nop 448 paddd %xmm1,%xmm0 449 pxor %xmm0,%xmm3 450.byte 102,15,56,0,222 451 paddd %xmm3,%xmm2 452 pxor %xmm2,%xmm1 453 movdqa %xmm1,%xmm4 454 psrld $20,%xmm1 455 pslld $12,%xmm4 456 por %xmm4,%xmm1 457 paddd %xmm1,%xmm0 458 pxor %xmm0,%xmm3 459.byte 102,15,56,0,223 460 paddd %xmm3,%xmm2 461 pxor %xmm2,%xmm1 462 movdqa %xmm1,%xmm4 463 psrld $25,%xmm1 464 pslld $7,%xmm4 465 por %xmm4,%xmm1 466 pshufd $78,%xmm2,%xmm2 467 pshufd $147,%xmm1,%xmm1 468 pshufd $57,%xmm3,%xmm3 469 decl %edx 470 jnz .L009loop1x 471 paddd (%esp),%xmm0 472 paddd 16(%esp),%xmm1 473 paddd 32(%esp),%xmm2 474 paddd 48(%esp),%xmm3 475 cmpl $64,%ecx 476 jb .L011tail 477 movdqu (%esi),%xmm4 478 movdqu 16(%esi),%xmm5 479 pxor %xmm4,%xmm0 480 movdqu 32(%esi),%xmm4 481 pxor %xmm5,%xmm1 482 movdqu 48(%esi),%xmm5 483 pxor %xmm4,%xmm2 484 pxor %xmm5,%xmm3 485 leal 64(%esi),%esi 486 movdqu %xmm0,(%edi) 487 movdqu %xmm1,16(%edi) 488 movdqu %xmm2,32(%edi) 489 movdqu %xmm3,48(%edi) 490 leal 64(%edi),%edi 491 subl $64,%ecx 492 jnz .L010outer1x 493 jmp .L012done 494.L011tail: 495 movdqa %xmm0,(%esp) 496 movdqa %xmm1,16(%esp) 497 movdqa %xmm2,32(%esp) 498 movdqa %xmm3,48(%esp) 499 xorl %eax,%eax 500 xorl %edx,%edx 501 xorl %ebp,%ebp 502.L013tail_loop: 503 movb (%esp,%ebp,1),%al 504 movb (%esi,%ebp,1),%dl 505 leal 1(%ebp),%ebp 506 xorb %dl,%al 507 movb %al,-1(%edi,%ebp,1) 508 decl %ecx 509 jnz .L013tail_loop 510.L012done: 511 movl 512(%esp),%esp 512 popl %edi 513 popl %esi 514 popl %ebx 515 popl %ebp 516 ret 517.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 518.align 64 519.Lssse3_data: 520.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 521.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 522.long 1634760805,857760878,2036477234,1797285236 523.long 0,1,2,3 524.long 4,4,4,4 525.long 1,0,0,0 526.long 4,0,0,0 527.long 0,-1,-1,-1 528.align 64 529.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 530.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 531.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 532.byte 114,103,62,0 533.globl ChaCha20_xop 534.type ChaCha20_xop,@function 535.align 16 536ChaCha20_xop: 537.L_ChaCha20_xop_begin: 538 pushl %ebp 539 pushl %ebx 540 pushl %esi 541 pushl %edi 542.Lxop_shortcut: 543 movl 20(%esp),%edi 544 movl 24(%esp),%esi 545 movl 28(%esp),%ecx 546 movl 32(%esp),%edx 547 movl 36(%esp),%ebx 548 vzeroupper 549 movl %esp,%ebp 550 subl $524,%esp 551 andl $-64,%esp 552 movl %ebp,512(%esp) 553 leal .Lssse3_data-.Lpic_point(%eax),%eax 554 vmovdqu (%ebx),%xmm3 555 cmpl $256,%ecx 556 jb .L0141x 557 movl %edx,516(%esp) 558 movl %ebx,520(%esp) 559 subl $256,%ecx 560 leal 384(%esp),%ebp 561 vmovdqu (%edx),%xmm7 562 vpshufd $0,%xmm3,%xmm0 563 vpshufd $85,%xmm3,%xmm1 564 vpshufd $170,%xmm3,%xmm2 565 vpshufd $255,%xmm3,%xmm3 566 vpaddd 48(%eax),%xmm0,%xmm0 567 vpshufd $0,%xmm7,%xmm4 568 vpshufd $85,%xmm7,%xmm5 569 vpsubd 64(%eax),%xmm0,%xmm0 570 vpshufd $170,%xmm7,%xmm6 571 vpshufd $255,%xmm7,%xmm7 572 vmovdqa %xmm0,64(%ebp) 573 vmovdqa %xmm1,80(%ebp) 574 vmovdqa %xmm2,96(%ebp) 575 vmovdqa %xmm3,112(%ebp) 576 vmovdqu 16(%edx),%xmm3 577 vmovdqa %xmm4,-64(%ebp) 578 vmovdqa %xmm5,-48(%ebp) 579 vmovdqa %xmm6,-32(%ebp) 580 vmovdqa %xmm7,-16(%ebp) 581 vmovdqa 32(%eax),%xmm7 582 leal 128(%esp),%ebx 583 vpshufd $0,%xmm3,%xmm0 584 vpshufd $85,%xmm3,%xmm1 585 vpshufd $170,%xmm3,%xmm2 586 vpshufd $255,%xmm3,%xmm3 587 vpshufd $0,%xmm7,%xmm4 588 vpshufd $85,%xmm7,%xmm5 589 vpshufd $170,%xmm7,%xmm6 590 vpshufd $255,%xmm7,%xmm7 591 vmovdqa %xmm0,(%ebp) 592 vmovdqa %xmm1,16(%ebp) 593 vmovdqa %xmm2,32(%ebp) 594 vmovdqa %xmm3,48(%ebp) 595 vmovdqa %xmm4,-128(%ebp) 596 vmovdqa %xmm5,-112(%ebp) 597 vmovdqa %xmm6,-96(%ebp) 598 vmovdqa %xmm7,-80(%ebp) 599 leal 128(%esi),%esi 600 leal 128(%edi),%edi 601 jmp .L015outer_loop 602.align 32 603.L015outer_loop: 604 vmovdqa -112(%ebp),%xmm1 605 vmovdqa -96(%ebp),%xmm2 606 vmovdqa -80(%ebp),%xmm3 607 vmovdqa -48(%ebp),%xmm5 608 vmovdqa -32(%ebp),%xmm6 609 vmovdqa -16(%ebp),%xmm7 610 vmovdqa %xmm1,-112(%ebx) 611 vmovdqa %xmm2,-96(%ebx) 612 vmovdqa %xmm3,-80(%ebx) 613 vmovdqa %xmm5,-48(%ebx) 614 vmovdqa %xmm6,-32(%ebx) 615 vmovdqa %xmm7,-16(%ebx) 616 vmovdqa 32(%ebp),%xmm2 617 vmovdqa 48(%ebp),%xmm3 618 vmovdqa 64(%ebp),%xmm4 619 vmovdqa 80(%ebp),%xmm5 620 vmovdqa 96(%ebp),%xmm6 621 vmovdqa 112(%ebp),%xmm7 622 vpaddd 64(%eax),%xmm4,%xmm4 623 vmovdqa %xmm2,32(%ebx) 624 vmovdqa %xmm3,48(%ebx) 625 vmovdqa %xmm4,64(%ebx) 626 vmovdqa %xmm5,80(%ebx) 627 vmovdqa %xmm6,96(%ebx) 628 vmovdqa %xmm7,112(%ebx) 629 vmovdqa %xmm4,64(%ebp) 630 vmovdqa -128(%ebp),%xmm0 631 vmovdqa %xmm4,%xmm6 632 vmovdqa -64(%ebp),%xmm3 633 vmovdqa (%ebp),%xmm4 634 vmovdqa 16(%ebp),%xmm5 635 movl $10,%edx 636 nop 637.align 32 638.L016loop: 639 vpaddd %xmm3,%xmm0,%xmm0 640 vpxor %xmm0,%xmm6,%xmm6 641.byte 143,232,120,194,246,16 642 vpaddd %xmm6,%xmm4,%xmm4 643 vpxor %xmm4,%xmm3,%xmm2 644 vmovdqa -112(%ebx),%xmm1 645.byte 143,232,120,194,210,12 646 vmovdqa -48(%ebx),%xmm3 647 vpaddd %xmm2,%xmm0,%xmm0 648 vmovdqa 80(%ebx),%xmm7 649 vpxor %xmm0,%xmm6,%xmm6 650 vpaddd %xmm3,%xmm1,%xmm1 651.byte 143,232,120,194,246,8 652 vmovdqa %xmm0,-128(%ebx) 653 vpaddd %xmm6,%xmm4,%xmm4 654 vmovdqa %xmm6,64(%ebx) 655 vpxor %xmm4,%xmm2,%xmm2 656 vpxor %xmm1,%xmm7,%xmm7 657.byte 143,232,120,194,210,7 658 vmovdqa %xmm4,(%ebx) 659.byte 143,232,120,194,255,16 660 vmovdqa %xmm2,-64(%ebx) 661 vpaddd %xmm7,%xmm5,%xmm5 662 vmovdqa 32(%ebx),%xmm4 663 vpxor %xmm5,%xmm3,%xmm3 664 vmovdqa -96(%ebx),%xmm0 665.byte 143,232,120,194,219,12 666 vmovdqa -32(%ebx),%xmm2 667 vpaddd %xmm3,%xmm1,%xmm1 668 vmovdqa 96(%ebx),%xmm6 669 vpxor %xmm1,%xmm7,%xmm7 670 vpaddd %xmm2,%xmm0,%xmm0 671.byte 143,232,120,194,255,8 672 vmovdqa %xmm1,-112(%ebx) 673 vpaddd %xmm7,%xmm5,%xmm5 674 vmovdqa %xmm7,80(%ebx) 675 vpxor %xmm5,%xmm3,%xmm3 676 vpxor %xmm0,%xmm6,%xmm6 677.byte 143,232,120,194,219,7 678 vmovdqa %xmm5,16(%ebx) 679.byte 143,232,120,194,246,16 680 vmovdqa %xmm3,-48(%ebx) 681 vpaddd %xmm6,%xmm4,%xmm4 682 vmovdqa 48(%ebx),%xmm5 683 vpxor %xmm4,%xmm2,%xmm2 684 vmovdqa -80(%ebx),%xmm1 685.byte 143,232,120,194,210,12 686 vmovdqa -16(%ebx),%xmm3 687 vpaddd %xmm2,%xmm0,%xmm0 688 vmovdqa 112(%ebx),%xmm7 689 vpxor %xmm0,%xmm6,%xmm6 690 vpaddd %xmm3,%xmm1,%xmm1 691.byte 143,232,120,194,246,8 692 vmovdqa %xmm0,-96(%ebx) 693 vpaddd %xmm6,%xmm4,%xmm4 694 vmovdqa %xmm6,96(%ebx) 695 vpxor %xmm4,%xmm2,%xmm2 696 vpxor %xmm1,%xmm7,%xmm7 697.byte 143,232,120,194,210,7 698.byte 143,232,120,194,255,16 699 vmovdqa %xmm2,-32(%ebx) 700 vpaddd %xmm7,%xmm5,%xmm5 701 vpxor %xmm5,%xmm3,%xmm3 702 vmovdqa -128(%ebx),%xmm0 703.byte 143,232,120,194,219,12 704 vmovdqa -48(%ebx),%xmm2 705 vpaddd %xmm3,%xmm1,%xmm1 706 vpxor %xmm1,%xmm7,%xmm7 707 vpaddd %xmm2,%xmm0,%xmm0 708.byte 143,232,120,194,255,8 709 vmovdqa %xmm1,-80(%ebx) 710 vpaddd %xmm7,%xmm5,%xmm5 711 vpxor %xmm5,%xmm3,%xmm3 712 vpxor %xmm0,%xmm7,%xmm6 713.byte 143,232,120,194,219,7 714.byte 143,232,120,194,246,16 715 vmovdqa %xmm3,-16(%ebx) 716 vpaddd %xmm6,%xmm4,%xmm4 717 vpxor %xmm4,%xmm2,%xmm2 718 vmovdqa -112(%ebx),%xmm1 719.byte 143,232,120,194,210,12 720 vmovdqa -32(%ebx),%xmm3 721 vpaddd %xmm2,%xmm0,%xmm0 722 vmovdqa 64(%ebx),%xmm7 723 vpxor %xmm0,%xmm6,%xmm6 724 vpaddd %xmm3,%xmm1,%xmm1 725.byte 143,232,120,194,246,8 726 vmovdqa %xmm0,-128(%ebx) 727 vpaddd %xmm6,%xmm4,%xmm4 728 vmovdqa %xmm6,112(%ebx) 729 vpxor %xmm4,%xmm2,%xmm2 730 vpxor %xmm1,%xmm7,%xmm7 731.byte 143,232,120,194,210,7 732 vmovdqa %xmm4,32(%ebx) 733.byte 143,232,120,194,255,16 734 vmovdqa %xmm2,-48(%ebx) 735 vpaddd %xmm7,%xmm5,%xmm5 736 vmovdqa (%ebx),%xmm4 737 vpxor %xmm5,%xmm3,%xmm3 738 vmovdqa -96(%ebx),%xmm0 739.byte 143,232,120,194,219,12 740 vmovdqa -16(%ebx),%xmm2 741 vpaddd %xmm3,%xmm1,%xmm1 742 vmovdqa 80(%ebx),%xmm6 743 vpxor %xmm1,%xmm7,%xmm7 744 vpaddd %xmm2,%xmm0,%xmm0 745.byte 143,232,120,194,255,8 746 vmovdqa %xmm1,-112(%ebx) 747 vpaddd %xmm7,%xmm5,%xmm5 748 vmovdqa %xmm7,64(%ebx) 749 vpxor %xmm5,%xmm3,%xmm3 750 vpxor %xmm0,%xmm6,%xmm6 751.byte 143,232,120,194,219,7 752 vmovdqa %xmm5,48(%ebx) 753.byte 143,232,120,194,246,16 754 vmovdqa %xmm3,-32(%ebx) 755 vpaddd %xmm6,%xmm4,%xmm4 756 vmovdqa 16(%ebx),%xmm5 757 vpxor %xmm4,%xmm2,%xmm2 758 vmovdqa -80(%ebx),%xmm1 759.byte 143,232,120,194,210,12 760 vmovdqa -64(%ebx),%xmm3 761 vpaddd %xmm2,%xmm0,%xmm0 762 vmovdqa 96(%ebx),%xmm7 763 vpxor %xmm0,%xmm6,%xmm6 764 vpaddd %xmm3,%xmm1,%xmm1 765.byte 143,232,120,194,246,8 766 vmovdqa %xmm0,-96(%ebx) 767 vpaddd %xmm6,%xmm4,%xmm4 768 vmovdqa %xmm6,80(%ebx) 769 vpxor %xmm4,%xmm2,%xmm2 770 vpxor %xmm1,%xmm7,%xmm7 771.byte 143,232,120,194,210,7 772.byte 143,232,120,194,255,16 773 vmovdqa %xmm2,-16(%ebx) 774 vpaddd %xmm7,%xmm5,%xmm5 775 vpxor %xmm5,%xmm3,%xmm3 776 vmovdqa -128(%ebx),%xmm0 777.byte 143,232,120,194,219,12 778 vpaddd %xmm3,%xmm1,%xmm1 779 vmovdqa 64(%ebx),%xmm6 780 vpxor %xmm1,%xmm7,%xmm7 781.byte 143,232,120,194,255,8 782 vmovdqa %xmm1,-80(%ebx) 783 vpaddd %xmm7,%xmm5,%xmm5 784 vmovdqa %xmm7,96(%ebx) 785 vpxor %xmm5,%xmm3,%xmm3 786.byte 143,232,120,194,219,7 787 decl %edx 788 jnz .L016loop 789 vmovdqa %xmm3,-64(%ebx) 790 vmovdqa %xmm4,(%ebx) 791 vmovdqa %xmm5,16(%ebx) 792 vmovdqa %xmm6,64(%ebx) 793 vmovdqa %xmm7,96(%ebx) 794 vmovdqa -112(%ebx),%xmm1 795 vmovdqa -96(%ebx),%xmm2 796 vmovdqa -80(%ebx),%xmm3 797 vpaddd -128(%ebp),%xmm0,%xmm0 798 vpaddd -112(%ebp),%xmm1,%xmm1 799 vpaddd -96(%ebp),%xmm2,%xmm2 800 vpaddd -80(%ebp),%xmm3,%xmm3 801 vpunpckldq %xmm1,%xmm0,%xmm6 802 vpunpckldq %xmm3,%xmm2,%xmm7 803 vpunpckhdq %xmm1,%xmm0,%xmm0 804 vpunpckhdq %xmm3,%xmm2,%xmm2 805 vpunpcklqdq %xmm7,%xmm6,%xmm1 806 vpunpckhqdq %xmm7,%xmm6,%xmm6 807 vpunpcklqdq %xmm2,%xmm0,%xmm7 808 vpunpckhqdq %xmm2,%xmm0,%xmm3 809 vpxor -128(%esi),%xmm1,%xmm4 810 vpxor -64(%esi),%xmm6,%xmm5 811 vpxor (%esi),%xmm7,%xmm6 812 vpxor 64(%esi),%xmm3,%xmm7 813 leal 16(%esi),%esi 814 vmovdqa -64(%ebx),%xmm0 815 vmovdqa -48(%ebx),%xmm1 816 vmovdqa -32(%ebx),%xmm2 817 vmovdqa -16(%ebx),%xmm3 818 vmovdqu %xmm4,-128(%edi) 819 vmovdqu %xmm5,-64(%edi) 820 vmovdqu %xmm6,(%edi) 821 vmovdqu %xmm7,64(%edi) 822 leal 16(%edi),%edi 823 vpaddd -64(%ebp),%xmm0,%xmm0 824 vpaddd -48(%ebp),%xmm1,%xmm1 825 vpaddd -32(%ebp),%xmm2,%xmm2 826 vpaddd -16(%ebp),%xmm3,%xmm3 827 vpunpckldq %xmm1,%xmm0,%xmm6 828 vpunpckldq %xmm3,%xmm2,%xmm7 829 vpunpckhdq %xmm1,%xmm0,%xmm0 830 vpunpckhdq %xmm3,%xmm2,%xmm2 831 vpunpcklqdq %xmm7,%xmm6,%xmm1 832 vpunpckhqdq %xmm7,%xmm6,%xmm6 833 vpunpcklqdq %xmm2,%xmm0,%xmm7 834 vpunpckhqdq %xmm2,%xmm0,%xmm3 835 vpxor -128(%esi),%xmm1,%xmm4 836 vpxor -64(%esi),%xmm6,%xmm5 837 vpxor (%esi),%xmm7,%xmm6 838 vpxor 64(%esi),%xmm3,%xmm7 839 leal 16(%esi),%esi 840 vmovdqa (%ebx),%xmm0 841 vmovdqa 16(%ebx),%xmm1 842 vmovdqa 32(%ebx),%xmm2 843 vmovdqa 48(%ebx),%xmm3 844 vmovdqu %xmm4,-128(%edi) 845 vmovdqu %xmm5,-64(%edi) 846 vmovdqu %xmm6,(%edi) 847 vmovdqu %xmm7,64(%edi) 848 leal 16(%edi),%edi 849 vpaddd (%ebp),%xmm0,%xmm0 850 vpaddd 16(%ebp),%xmm1,%xmm1 851 vpaddd 32(%ebp),%xmm2,%xmm2 852 vpaddd 48(%ebp),%xmm3,%xmm3 853 vpunpckldq %xmm1,%xmm0,%xmm6 854 vpunpckldq %xmm3,%xmm2,%xmm7 855 vpunpckhdq %xmm1,%xmm0,%xmm0 856 vpunpckhdq %xmm3,%xmm2,%xmm2 857 vpunpcklqdq %xmm7,%xmm6,%xmm1 858 vpunpckhqdq %xmm7,%xmm6,%xmm6 859 vpunpcklqdq %xmm2,%xmm0,%xmm7 860 vpunpckhqdq %xmm2,%xmm0,%xmm3 861 vpxor -128(%esi),%xmm1,%xmm4 862 vpxor -64(%esi),%xmm6,%xmm5 863 vpxor (%esi),%xmm7,%xmm6 864 vpxor 64(%esi),%xmm3,%xmm7 865 leal 16(%esi),%esi 866 vmovdqa 64(%ebx),%xmm0 867 vmovdqa 80(%ebx),%xmm1 868 vmovdqa 96(%ebx),%xmm2 869 vmovdqa 112(%ebx),%xmm3 870 vmovdqu %xmm4,-128(%edi) 871 vmovdqu %xmm5,-64(%edi) 872 vmovdqu %xmm6,(%edi) 873 vmovdqu %xmm7,64(%edi) 874 leal 16(%edi),%edi 875 vpaddd 64(%ebp),%xmm0,%xmm0 876 vpaddd 80(%ebp),%xmm1,%xmm1 877 vpaddd 96(%ebp),%xmm2,%xmm2 878 vpaddd 112(%ebp),%xmm3,%xmm3 879 vpunpckldq %xmm1,%xmm0,%xmm6 880 vpunpckldq %xmm3,%xmm2,%xmm7 881 vpunpckhdq %xmm1,%xmm0,%xmm0 882 vpunpckhdq %xmm3,%xmm2,%xmm2 883 vpunpcklqdq %xmm7,%xmm6,%xmm1 884 vpunpckhqdq %xmm7,%xmm6,%xmm6 885 vpunpcklqdq %xmm2,%xmm0,%xmm7 886 vpunpckhqdq %xmm2,%xmm0,%xmm3 887 vpxor -128(%esi),%xmm1,%xmm4 888 vpxor -64(%esi),%xmm6,%xmm5 889 vpxor (%esi),%xmm7,%xmm6 890 vpxor 64(%esi),%xmm3,%xmm7 891 leal 208(%esi),%esi 892 vmovdqu %xmm4,-128(%edi) 893 vmovdqu %xmm5,-64(%edi) 894 vmovdqu %xmm6,(%edi) 895 vmovdqu %xmm7,64(%edi) 896 leal 208(%edi),%edi 897 subl $256,%ecx 898 jnc .L015outer_loop 899 addl $256,%ecx 900 jz .L017done 901 movl 520(%esp),%ebx 902 leal -128(%esi),%esi 903 movl 516(%esp),%edx 904 leal -128(%edi),%edi 905 vmovd 64(%ebp),%xmm2 906 vmovdqu (%ebx),%xmm3 907 vpaddd 96(%eax),%xmm2,%xmm2 908 vpand 112(%eax),%xmm3,%xmm3 909 vpor %xmm2,%xmm3,%xmm3 910.L0141x: 911 vmovdqa 32(%eax),%xmm0 912 vmovdqu (%edx),%xmm1 913 vmovdqu 16(%edx),%xmm2 914 vmovdqa (%eax),%xmm6 915 vmovdqa 16(%eax),%xmm7 916 movl %ebp,48(%esp) 917 vmovdqa %xmm0,(%esp) 918 vmovdqa %xmm1,16(%esp) 919 vmovdqa %xmm2,32(%esp) 920 vmovdqa %xmm3,48(%esp) 921 movl $10,%edx 922 jmp .L018loop1x 923.align 16 924.L019outer1x: 925 vmovdqa 80(%eax),%xmm3 926 vmovdqa (%esp),%xmm0 927 vmovdqa 16(%esp),%xmm1 928 vmovdqa 32(%esp),%xmm2 929 vpaddd 48(%esp),%xmm3,%xmm3 930 movl $10,%edx 931 vmovdqa %xmm3,48(%esp) 932 jmp .L018loop1x 933.align 16 934.L018loop1x: 935 vpaddd %xmm1,%xmm0,%xmm0 936 vpxor %xmm0,%xmm3,%xmm3 937.byte 143,232,120,194,219,16 938 vpaddd %xmm3,%xmm2,%xmm2 939 vpxor %xmm2,%xmm1,%xmm1 940.byte 143,232,120,194,201,12 941 vpaddd %xmm1,%xmm0,%xmm0 942 vpxor %xmm0,%xmm3,%xmm3 943.byte 143,232,120,194,219,8 944 vpaddd %xmm3,%xmm2,%xmm2 945 vpxor %xmm2,%xmm1,%xmm1 946.byte 143,232,120,194,201,7 947 vpshufd $78,%xmm2,%xmm2 948 vpshufd $57,%xmm1,%xmm1 949 vpshufd $147,%xmm3,%xmm3 950 vpaddd %xmm1,%xmm0,%xmm0 951 vpxor %xmm0,%xmm3,%xmm3 952.byte 143,232,120,194,219,16 953 vpaddd %xmm3,%xmm2,%xmm2 954 vpxor %xmm2,%xmm1,%xmm1 955.byte 143,232,120,194,201,12 956 vpaddd %xmm1,%xmm0,%xmm0 957 vpxor %xmm0,%xmm3,%xmm3 958.byte 143,232,120,194,219,8 959 vpaddd %xmm3,%xmm2,%xmm2 960 vpxor %xmm2,%xmm1,%xmm1 961.byte 143,232,120,194,201,7 962 vpshufd $78,%xmm2,%xmm2 963 vpshufd $147,%xmm1,%xmm1 964 vpshufd $57,%xmm3,%xmm3 965 decl %edx 966 jnz .L018loop1x 967 vpaddd (%esp),%xmm0,%xmm0 968 vpaddd 16(%esp),%xmm1,%xmm1 969 vpaddd 32(%esp),%xmm2,%xmm2 970 vpaddd 48(%esp),%xmm3,%xmm3 971 cmpl $64,%ecx 972 jb .L020tail 973 vpxor (%esi),%xmm0,%xmm0 974 vpxor 16(%esi),%xmm1,%xmm1 975 vpxor 32(%esi),%xmm2,%xmm2 976 vpxor 48(%esi),%xmm3,%xmm3 977 leal 64(%esi),%esi 978 vmovdqu %xmm0,(%edi) 979 vmovdqu %xmm1,16(%edi) 980 vmovdqu %xmm2,32(%edi) 981 vmovdqu %xmm3,48(%edi) 982 leal 64(%edi),%edi 983 subl $64,%ecx 984 jnz .L019outer1x 985 jmp .L017done 986.L020tail: 987 vmovdqa %xmm0,(%esp) 988 vmovdqa %xmm1,16(%esp) 989 vmovdqa %xmm2,32(%esp) 990 vmovdqa %xmm3,48(%esp) 991 xorl %eax,%eax 992 xorl %edx,%edx 993 xorl %ebp,%ebp 994.L021tail_loop: 995 movb (%esp,%ebp,1),%al 996 movb (%esi,%ebp,1),%dl 997 leal 1(%ebp),%ebp 998 xorb %dl,%al 999 movb %al,-1(%edi,%ebp,1) 1000 decl %ecx 1001 jnz .L021tail_loop 1002.L017done: 1003 vzeroupper 1004 movl 512(%esp),%esp 1005 popl %edi 1006 popl %esi 1007 popl %ebx 1008 popl %ebp 1009 ret 1010.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 1011.comm OPENSSL_ia32cap_P,16,4 1012#else 1013.text 1014.globl ChaCha20_ctr32 1015.type ChaCha20_ctr32,@function 1016.align 16 1017ChaCha20_ctr32: 1018.L_ChaCha20_ctr32_begin: 1019 pushl %ebp 1020 pushl %ebx 1021 pushl %esi 1022 pushl %edi 1023 xorl %eax,%eax 1024 cmpl 28(%esp),%eax 1025 je .L000no_data 1026 call .Lpic_point 1027.Lpic_point: 1028 popl %eax 1029 leal OPENSSL_ia32cap_P,%ebp 1030 testl $16777216,(%ebp) 1031 jz .L001x86 1032 testl $512,4(%ebp) 1033 jz .L001x86 1034 jmp .Lssse3_shortcut 1035.L001x86: 1036 movl 32(%esp),%esi 1037 movl 36(%esp),%edi 1038 subl $132,%esp 1039 movl (%esi),%eax 1040 movl 4(%esi),%ebx 1041 movl 8(%esi),%ecx 1042 movl 12(%esi),%edx 1043 movl %eax,80(%esp) 1044 movl %ebx,84(%esp) 1045 movl %ecx,88(%esp) 1046 movl %edx,92(%esp) 1047 movl 16(%esi),%eax 1048 movl 20(%esi),%ebx 1049 movl 24(%esi),%ecx 1050 movl 28(%esi),%edx 1051 movl %eax,96(%esp) 1052 movl %ebx,100(%esp) 1053 movl %ecx,104(%esp) 1054 movl %edx,108(%esp) 1055 movl (%edi),%eax 1056 movl 4(%edi),%ebx 1057 movl 8(%edi),%ecx 1058 movl 12(%edi),%edx 1059 subl $1,%eax 1060 movl %eax,112(%esp) 1061 movl %ebx,116(%esp) 1062 movl %ecx,120(%esp) 1063 movl %edx,124(%esp) 1064 jmp .L002entry 1065.align 16 1066.L003outer_loop: 1067 movl %ebx,156(%esp) 1068 movl %eax,152(%esp) 1069 movl %ecx,160(%esp) 1070.L002entry: 1071 movl $1634760805,%eax 1072 movl $857760878,4(%esp) 1073 movl $2036477234,8(%esp) 1074 movl $1797285236,12(%esp) 1075 movl 84(%esp),%ebx 1076 movl 88(%esp),%ebp 1077 movl 104(%esp),%ecx 1078 movl 108(%esp),%esi 1079 movl 116(%esp),%edx 1080 movl 120(%esp),%edi 1081 movl %ebx,20(%esp) 1082 movl %ebp,24(%esp) 1083 movl %ecx,40(%esp) 1084 movl %esi,44(%esp) 1085 movl %edx,52(%esp) 1086 movl %edi,56(%esp) 1087 movl 92(%esp),%ebx 1088 movl 124(%esp),%edi 1089 movl 112(%esp),%edx 1090 movl 80(%esp),%ebp 1091 movl 96(%esp),%ecx 1092 movl 100(%esp),%esi 1093 addl $1,%edx 1094 movl %ebx,28(%esp) 1095 movl %edi,60(%esp) 1096 movl %edx,112(%esp) 1097 movl $10,%ebx 1098 jmp .L004loop 1099.align 16 1100.L004loop: 1101 addl %ebp,%eax 1102 movl %ebx,128(%esp) 1103 movl %ebp,%ebx 1104 xorl %eax,%edx 1105 roll $16,%edx 1106 addl %edx,%ecx 1107 xorl %ecx,%ebx 1108 movl 52(%esp),%edi 1109 roll $12,%ebx 1110 movl 20(%esp),%ebp 1111 addl %ebx,%eax 1112 xorl %eax,%edx 1113 movl %eax,(%esp) 1114 roll $8,%edx 1115 movl 4(%esp),%eax 1116 addl %edx,%ecx 1117 movl %edx,48(%esp) 1118 xorl %ecx,%ebx 1119 addl %ebp,%eax 1120 roll $7,%ebx 1121 xorl %eax,%edi 1122 movl %ecx,32(%esp) 1123 roll $16,%edi 1124 movl %ebx,16(%esp) 1125 addl %edi,%esi 1126 movl 40(%esp),%ecx 1127 xorl %esi,%ebp 1128 movl 56(%esp),%edx 1129 roll $12,%ebp 1130 movl 24(%esp),%ebx 1131 addl %ebp,%eax 1132 xorl %eax,%edi 1133 movl %eax,4(%esp) 1134 roll $8,%edi 1135 movl 8(%esp),%eax 1136 addl %edi,%esi 1137 movl %edi,52(%esp) 1138 xorl %esi,%ebp 1139 addl %ebx,%eax 1140 roll $7,%ebp 1141 xorl %eax,%edx 1142 movl %esi,36(%esp) 1143 roll $16,%edx 1144 movl %ebp,20(%esp) 1145 addl %edx,%ecx 1146 movl 44(%esp),%esi 1147 xorl %ecx,%ebx 1148 movl 60(%esp),%edi 1149 roll $12,%ebx 1150 movl 28(%esp),%ebp 1151 addl %ebx,%eax 1152 xorl %eax,%edx 1153 movl %eax,8(%esp) 1154 roll $8,%edx 1155 movl 12(%esp),%eax 1156 addl %edx,%ecx 1157 movl %edx,56(%esp) 1158 xorl %ecx,%ebx 1159 addl %ebp,%eax 1160 roll $7,%ebx 1161 xorl %eax,%edi 1162 roll $16,%edi 1163 movl %ebx,24(%esp) 1164 addl %edi,%esi 1165 xorl %esi,%ebp 1166 roll $12,%ebp 1167 movl 20(%esp),%ebx 1168 addl %ebp,%eax 1169 xorl %eax,%edi 1170 movl %eax,12(%esp) 1171 roll $8,%edi 1172 movl (%esp),%eax 1173 addl %edi,%esi 1174 movl %edi,%edx 1175 xorl %esi,%ebp 1176 addl %ebx,%eax 1177 roll $7,%ebp 1178 xorl %eax,%edx 1179 roll $16,%edx 1180 movl %ebp,28(%esp) 1181 addl %edx,%ecx 1182 xorl %ecx,%ebx 1183 movl 48(%esp),%edi 1184 roll $12,%ebx 1185 movl 24(%esp),%ebp 1186 addl %ebx,%eax 1187 xorl %eax,%edx 1188 movl %eax,(%esp) 1189 roll $8,%edx 1190 movl 4(%esp),%eax 1191 addl %edx,%ecx 1192 movl %edx,60(%esp) 1193 xorl %ecx,%ebx 1194 addl %ebp,%eax 1195 roll $7,%ebx 1196 xorl %eax,%edi 1197 movl %ecx,40(%esp) 1198 roll $16,%edi 1199 movl %ebx,20(%esp) 1200 addl %edi,%esi 1201 movl 32(%esp),%ecx 1202 xorl %esi,%ebp 1203 movl 52(%esp),%edx 1204 roll $12,%ebp 1205 movl 28(%esp),%ebx 1206 addl %ebp,%eax 1207 xorl %eax,%edi 1208 movl %eax,4(%esp) 1209 roll $8,%edi 1210 movl 8(%esp),%eax 1211 addl %edi,%esi 1212 movl %edi,48(%esp) 1213 xorl %esi,%ebp 1214 addl %ebx,%eax 1215 roll $7,%ebp 1216 xorl %eax,%edx 1217 movl %esi,44(%esp) 1218 roll $16,%edx 1219 movl %ebp,24(%esp) 1220 addl %edx,%ecx 1221 movl 36(%esp),%esi 1222 xorl %ecx,%ebx 1223 movl 56(%esp),%edi 1224 roll $12,%ebx 1225 movl 16(%esp),%ebp 1226 addl %ebx,%eax 1227 xorl %eax,%edx 1228 movl %eax,8(%esp) 1229 roll $8,%edx 1230 movl 12(%esp),%eax 1231 addl %edx,%ecx 1232 movl %edx,52(%esp) 1233 xorl %ecx,%ebx 1234 addl %ebp,%eax 1235 roll $7,%ebx 1236 xorl %eax,%edi 1237 roll $16,%edi 1238 movl %ebx,28(%esp) 1239 addl %edi,%esi 1240 xorl %esi,%ebp 1241 movl 48(%esp),%edx 1242 roll $12,%ebp 1243 movl 128(%esp),%ebx 1244 addl %ebp,%eax 1245 xorl %eax,%edi 1246 movl %eax,12(%esp) 1247 roll $8,%edi 1248 movl (%esp),%eax 1249 addl %edi,%esi 1250 movl %edi,56(%esp) 1251 xorl %esi,%ebp 1252 roll $7,%ebp 1253 decl %ebx 1254 jnz .L004loop 1255 movl 160(%esp),%ebx 1256 addl $1634760805,%eax 1257 addl 80(%esp),%ebp 1258 addl 96(%esp),%ecx 1259 addl 100(%esp),%esi 1260 cmpl $64,%ebx 1261 jb .L005tail 1262 movl 156(%esp),%ebx 1263 addl 112(%esp),%edx 1264 addl 120(%esp),%edi 1265 xorl (%ebx),%eax 1266 xorl 16(%ebx),%ebp 1267 movl %eax,(%esp) 1268 movl 152(%esp),%eax 1269 xorl 32(%ebx),%ecx 1270 xorl 36(%ebx),%esi 1271 xorl 48(%ebx),%edx 1272 xorl 56(%ebx),%edi 1273 movl %ebp,16(%eax) 1274 movl %ecx,32(%eax) 1275 movl %esi,36(%eax) 1276 movl %edx,48(%eax) 1277 movl %edi,56(%eax) 1278 movl 4(%esp),%ebp 1279 movl 8(%esp),%ecx 1280 movl 12(%esp),%esi 1281 movl 20(%esp),%edx 1282 movl 24(%esp),%edi 1283 addl $857760878,%ebp 1284 addl $2036477234,%ecx 1285 addl $1797285236,%esi 1286 addl 84(%esp),%edx 1287 addl 88(%esp),%edi 1288 xorl 4(%ebx),%ebp 1289 xorl 8(%ebx),%ecx 1290 xorl 12(%ebx),%esi 1291 xorl 20(%ebx),%edx 1292 xorl 24(%ebx),%edi 1293 movl %ebp,4(%eax) 1294 movl %ecx,8(%eax) 1295 movl %esi,12(%eax) 1296 movl %edx,20(%eax) 1297 movl %edi,24(%eax) 1298 movl 28(%esp),%ebp 1299 movl 40(%esp),%ecx 1300 movl 44(%esp),%esi 1301 movl 52(%esp),%edx 1302 movl 60(%esp),%edi 1303 addl 92(%esp),%ebp 1304 addl 104(%esp),%ecx 1305 addl 108(%esp),%esi 1306 addl 116(%esp),%edx 1307 addl 124(%esp),%edi 1308 xorl 28(%ebx),%ebp 1309 xorl 40(%ebx),%ecx 1310 xorl 44(%ebx),%esi 1311 xorl 52(%ebx),%edx 1312 xorl 60(%ebx),%edi 1313 leal 64(%ebx),%ebx 1314 movl %ebp,28(%eax) 1315 movl (%esp),%ebp 1316 movl %ecx,40(%eax) 1317 movl 160(%esp),%ecx 1318 movl %esi,44(%eax) 1319 movl %edx,52(%eax) 1320 movl %edi,60(%eax) 1321 movl %ebp,(%eax) 1322 leal 64(%eax),%eax 1323 subl $64,%ecx 1324 jnz .L003outer_loop 1325 jmp .L006done 1326.L005tail: 1327 addl 112(%esp),%edx 1328 addl 120(%esp),%edi 1329 movl %eax,(%esp) 1330 movl %ebp,16(%esp) 1331 movl %ecx,32(%esp) 1332 movl %esi,36(%esp) 1333 movl %edx,48(%esp) 1334 movl %edi,56(%esp) 1335 movl 4(%esp),%ebp 1336 movl 8(%esp),%ecx 1337 movl 12(%esp),%esi 1338 movl 20(%esp),%edx 1339 movl 24(%esp),%edi 1340 addl $857760878,%ebp 1341 addl $2036477234,%ecx 1342 addl $1797285236,%esi 1343 addl 84(%esp),%edx 1344 addl 88(%esp),%edi 1345 movl %ebp,4(%esp) 1346 movl %ecx,8(%esp) 1347 movl %esi,12(%esp) 1348 movl %edx,20(%esp) 1349 movl %edi,24(%esp) 1350 movl 28(%esp),%ebp 1351 movl 40(%esp),%ecx 1352 movl 44(%esp),%esi 1353 movl 52(%esp),%edx 1354 movl 60(%esp),%edi 1355 addl 92(%esp),%ebp 1356 addl 104(%esp),%ecx 1357 addl 108(%esp),%esi 1358 addl 116(%esp),%edx 1359 addl 124(%esp),%edi 1360 movl %ebp,28(%esp) 1361 movl 156(%esp),%ebp 1362 movl %ecx,40(%esp) 1363 movl 152(%esp),%ecx 1364 movl %esi,44(%esp) 1365 xorl %esi,%esi 1366 movl %edx,52(%esp) 1367 movl %edi,60(%esp) 1368 xorl %eax,%eax 1369 xorl %edx,%edx 1370.L007tail_loop: 1371 movb (%esi,%ebp,1),%al 1372 movb (%esp,%esi,1),%dl 1373 leal 1(%esi),%esi 1374 xorb %dl,%al 1375 movb %al,-1(%ecx,%esi,1) 1376 decl %ebx 1377 jnz .L007tail_loop 1378.L006done: 1379 addl $132,%esp 1380.L000no_data: 1381 popl %edi 1382 popl %esi 1383 popl %ebx 1384 popl %ebp 1385 ret 1386.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 1387.globl ChaCha20_ssse3 1388.type ChaCha20_ssse3,@function 1389.align 16 1390ChaCha20_ssse3: 1391.L_ChaCha20_ssse3_begin: 1392 pushl %ebp 1393 pushl %ebx 1394 pushl %esi 1395 pushl %edi 1396.Lssse3_shortcut: 1397 testl $2048,4(%ebp) 1398 jnz .Lxop_shortcut 1399 movl 20(%esp),%edi 1400 movl 24(%esp),%esi 1401 movl 28(%esp),%ecx 1402 movl 32(%esp),%edx 1403 movl 36(%esp),%ebx 1404 movl %esp,%ebp 1405 subl $524,%esp 1406 andl $-64,%esp 1407 movl %ebp,512(%esp) 1408 leal .Lssse3_data-.Lpic_point(%eax),%eax 1409 movdqu (%ebx),%xmm3 1410.L0081x: 1411 movdqa 32(%eax),%xmm0 1412 movdqu (%edx),%xmm1 1413 movdqu 16(%edx),%xmm2 1414 movdqa (%eax),%xmm6 1415 movdqa 16(%eax),%xmm7 1416 movl %ebp,48(%esp) 1417 movdqa %xmm0,(%esp) 1418 movdqa %xmm1,16(%esp) 1419 movdqa %xmm2,32(%esp) 1420 movdqa %xmm3,48(%esp) 1421 movl $10,%edx 1422 jmp .L009loop1x 1423.align 16 1424.L010outer1x: 1425 movdqa 80(%eax),%xmm3 1426 movdqa (%esp),%xmm0 1427 movdqa 16(%esp),%xmm1 1428 movdqa 32(%esp),%xmm2 1429 paddd 48(%esp),%xmm3 1430 movl $10,%edx 1431 movdqa %xmm3,48(%esp) 1432 jmp .L009loop1x 1433.align 16 1434.L009loop1x: 1435 paddd %xmm1,%xmm0 1436 pxor %xmm0,%xmm3 1437.byte 102,15,56,0,222 1438 paddd %xmm3,%xmm2 1439 pxor %xmm2,%xmm1 1440 movdqa %xmm1,%xmm4 1441 psrld $20,%xmm1 1442 pslld $12,%xmm4 1443 por %xmm4,%xmm1 1444 paddd %xmm1,%xmm0 1445 pxor %xmm0,%xmm3 1446.byte 102,15,56,0,223 1447 paddd %xmm3,%xmm2 1448 pxor %xmm2,%xmm1 1449 movdqa %xmm1,%xmm4 1450 psrld $25,%xmm1 1451 pslld $7,%xmm4 1452 por %xmm4,%xmm1 1453 pshufd $78,%xmm2,%xmm2 1454 pshufd $57,%xmm1,%xmm1 1455 pshufd $147,%xmm3,%xmm3 1456 nop 1457 paddd %xmm1,%xmm0 1458 pxor %xmm0,%xmm3 1459.byte 102,15,56,0,222 1460 paddd %xmm3,%xmm2 1461 pxor %xmm2,%xmm1 1462 movdqa %xmm1,%xmm4 1463 psrld $20,%xmm1 1464 pslld $12,%xmm4 1465 por %xmm4,%xmm1 1466 paddd %xmm1,%xmm0 1467 pxor %xmm0,%xmm3 1468.byte 102,15,56,0,223 1469 paddd %xmm3,%xmm2 1470 pxor %xmm2,%xmm1 1471 movdqa %xmm1,%xmm4 1472 psrld $25,%xmm1 1473 pslld $7,%xmm4 1474 por %xmm4,%xmm1 1475 pshufd $78,%xmm2,%xmm2 1476 pshufd $147,%xmm1,%xmm1 1477 pshufd $57,%xmm3,%xmm3 1478 decl %edx 1479 jnz .L009loop1x 1480 paddd (%esp),%xmm0 1481 paddd 16(%esp),%xmm1 1482 paddd 32(%esp),%xmm2 1483 paddd 48(%esp),%xmm3 1484 cmpl $64,%ecx 1485 jb .L011tail 1486 movdqu (%esi),%xmm4 1487 movdqu 16(%esi),%xmm5 1488 pxor %xmm4,%xmm0 1489 movdqu 32(%esi),%xmm4 1490 pxor %xmm5,%xmm1 1491 movdqu 48(%esi),%xmm5 1492 pxor %xmm4,%xmm2 1493 pxor %xmm5,%xmm3 1494 leal 64(%esi),%esi 1495 movdqu %xmm0,(%edi) 1496 movdqu %xmm1,16(%edi) 1497 movdqu %xmm2,32(%edi) 1498 movdqu %xmm3,48(%edi) 1499 leal 64(%edi),%edi 1500 subl $64,%ecx 1501 jnz .L010outer1x 1502 jmp .L012done 1503.L011tail: 1504 movdqa %xmm0,(%esp) 1505 movdqa %xmm1,16(%esp) 1506 movdqa %xmm2,32(%esp) 1507 movdqa %xmm3,48(%esp) 1508 xorl %eax,%eax 1509 xorl %edx,%edx 1510 xorl %ebp,%ebp 1511.L013tail_loop: 1512 movb (%esp,%ebp,1),%al 1513 movb (%esi,%ebp,1),%dl 1514 leal 1(%ebp),%ebp 1515 xorb %dl,%al 1516 movb %al,-1(%edi,%ebp,1) 1517 decl %ecx 1518 jnz .L013tail_loop 1519.L012done: 1520 movl 512(%esp),%esp 1521 popl %edi 1522 popl %esi 1523 popl %ebx 1524 popl %ebp 1525 ret 1526.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 1527.align 64 1528.Lssse3_data: 1529.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 1530.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 1531.long 1634760805,857760878,2036477234,1797285236 1532.long 0,1,2,3 1533.long 4,4,4,4 1534.long 1,0,0,0 1535.long 4,0,0,0 1536.long 0,-1,-1,-1 1537.align 64 1538.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 1539.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1540.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1541.byte 114,103,62,0 1542.globl ChaCha20_xop 1543.type ChaCha20_xop,@function 1544.align 16 1545ChaCha20_xop: 1546.L_ChaCha20_xop_begin: 1547 pushl %ebp 1548 pushl %ebx 1549 pushl %esi 1550 pushl %edi 1551.Lxop_shortcut: 1552 movl 20(%esp),%edi 1553 movl 24(%esp),%esi 1554 movl 28(%esp),%ecx 1555 movl 32(%esp),%edx 1556 movl 36(%esp),%ebx 1557 vzeroupper 1558 movl %esp,%ebp 1559 subl $524,%esp 1560 andl $-64,%esp 1561 movl %ebp,512(%esp) 1562 leal .Lssse3_data-.Lpic_point(%eax),%eax 1563 vmovdqu (%ebx),%xmm3 1564 cmpl $256,%ecx 1565 jb .L0141x 1566 movl %edx,516(%esp) 1567 movl %ebx,520(%esp) 1568 subl $256,%ecx 1569 leal 384(%esp),%ebp 1570 vmovdqu (%edx),%xmm7 1571 vpshufd $0,%xmm3,%xmm0 1572 vpshufd $85,%xmm3,%xmm1 1573 vpshufd $170,%xmm3,%xmm2 1574 vpshufd $255,%xmm3,%xmm3 1575 vpaddd 48(%eax),%xmm0,%xmm0 1576 vpshufd $0,%xmm7,%xmm4 1577 vpshufd $85,%xmm7,%xmm5 1578 vpsubd 64(%eax),%xmm0,%xmm0 1579 vpshufd $170,%xmm7,%xmm6 1580 vpshufd $255,%xmm7,%xmm7 1581 vmovdqa %xmm0,64(%ebp) 1582 vmovdqa %xmm1,80(%ebp) 1583 vmovdqa %xmm2,96(%ebp) 1584 vmovdqa %xmm3,112(%ebp) 1585 vmovdqu 16(%edx),%xmm3 1586 vmovdqa %xmm4,-64(%ebp) 1587 vmovdqa %xmm5,-48(%ebp) 1588 vmovdqa %xmm6,-32(%ebp) 1589 vmovdqa %xmm7,-16(%ebp) 1590 vmovdqa 32(%eax),%xmm7 1591 leal 128(%esp),%ebx 1592 vpshufd $0,%xmm3,%xmm0 1593 vpshufd $85,%xmm3,%xmm1 1594 vpshufd $170,%xmm3,%xmm2 1595 vpshufd $255,%xmm3,%xmm3 1596 vpshufd $0,%xmm7,%xmm4 1597 vpshufd $85,%xmm7,%xmm5 1598 vpshufd $170,%xmm7,%xmm6 1599 vpshufd $255,%xmm7,%xmm7 1600 vmovdqa %xmm0,(%ebp) 1601 vmovdqa %xmm1,16(%ebp) 1602 vmovdqa %xmm2,32(%ebp) 1603 vmovdqa %xmm3,48(%ebp) 1604 vmovdqa %xmm4,-128(%ebp) 1605 vmovdqa %xmm5,-112(%ebp) 1606 vmovdqa %xmm6,-96(%ebp) 1607 vmovdqa %xmm7,-80(%ebp) 1608 leal 128(%esi),%esi 1609 leal 128(%edi),%edi 1610 jmp .L015outer_loop 1611.align 32 1612.L015outer_loop: 1613 vmovdqa -112(%ebp),%xmm1 1614 vmovdqa -96(%ebp),%xmm2 1615 vmovdqa -80(%ebp),%xmm3 1616 vmovdqa -48(%ebp),%xmm5 1617 vmovdqa -32(%ebp),%xmm6 1618 vmovdqa -16(%ebp),%xmm7 1619 vmovdqa %xmm1,-112(%ebx) 1620 vmovdqa %xmm2,-96(%ebx) 1621 vmovdqa %xmm3,-80(%ebx) 1622 vmovdqa %xmm5,-48(%ebx) 1623 vmovdqa %xmm6,-32(%ebx) 1624 vmovdqa %xmm7,-16(%ebx) 1625 vmovdqa 32(%ebp),%xmm2 1626 vmovdqa 48(%ebp),%xmm3 1627 vmovdqa 64(%ebp),%xmm4 1628 vmovdqa 80(%ebp),%xmm5 1629 vmovdqa 96(%ebp),%xmm6 1630 vmovdqa 112(%ebp),%xmm7 1631 vpaddd 64(%eax),%xmm4,%xmm4 1632 vmovdqa %xmm2,32(%ebx) 1633 vmovdqa %xmm3,48(%ebx) 1634 vmovdqa %xmm4,64(%ebx) 1635 vmovdqa %xmm5,80(%ebx) 1636 vmovdqa %xmm6,96(%ebx) 1637 vmovdqa %xmm7,112(%ebx) 1638 vmovdqa %xmm4,64(%ebp) 1639 vmovdqa -128(%ebp),%xmm0 1640 vmovdqa %xmm4,%xmm6 1641 vmovdqa -64(%ebp),%xmm3 1642 vmovdqa (%ebp),%xmm4 1643 vmovdqa 16(%ebp),%xmm5 1644 movl $10,%edx 1645 nop 1646.align 32 1647.L016loop: 1648 vpaddd %xmm3,%xmm0,%xmm0 1649 vpxor %xmm0,%xmm6,%xmm6 1650.byte 143,232,120,194,246,16 1651 vpaddd %xmm6,%xmm4,%xmm4 1652 vpxor %xmm4,%xmm3,%xmm2 1653 vmovdqa -112(%ebx),%xmm1 1654.byte 143,232,120,194,210,12 1655 vmovdqa -48(%ebx),%xmm3 1656 vpaddd %xmm2,%xmm0,%xmm0 1657 vmovdqa 80(%ebx),%xmm7 1658 vpxor %xmm0,%xmm6,%xmm6 1659 vpaddd %xmm3,%xmm1,%xmm1 1660.byte 143,232,120,194,246,8 1661 vmovdqa %xmm0,-128(%ebx) 1662 vpaddd %xmm6,%xmm4,%xmm4 1663 vmovdqa %xmm6,64(%ebx) 1664 vpxor %xmm4,%xmm2,%xmm2 1665 vpxor %xmm1,%xmm7,%xmm7 1666.byte 143,232,120,194,210,7 1667 vmovdqa %xmm4,(%ebx) 1668.byte 143,232,120,194,255,16 1669 vmovdqa %xmm2,-64(%ebx) 1670 vpaddd %xmm7,%xmm5,%xmm5 1671 vmovdqa 32(%ebx),%xmm4 1672 vpxor %xmm5,%xmm3,%xmm3 1673 vmovdqa -96(%ebx),%xmm0 1674.byte 143,232,120,194,219,12 1675 vmovdqa -32(%ebx),%xmm2 1676 vpaddd %xmm3,%xmm1,%xmm1 1677 vmovdqa 96(%ebx),%xmm6 1678 vpxor %xmm1,%xmm7,%xmm7 1679 vpaddd %xmm2,%xmm0,%xmm0 1680.byte 143,232,120,194,255,8 1681 vmovdqa %xmm1,-112(%ebx) 1682 vpaddd %xmm7,%xmm5,%xmm5 1683 vmovdqa %xmm7,80(%ebx) 1684 vpxor %xmm5,%xmm3,%xmm3 1685 vpxor %xmm0,%xmm6,%xmm6 1686.byte 143,232,120,194,219,7 1687 vmovdqa %xmm5,16(%ebx) 1688.byte 143,232,120,194,246,16 1689 vmovdqa %xmm3,-48(%ebx) 1690 vpaddd %xmm6,%xmm4,%xmm4 1691 vmovdqa 48(%ebx),%xmm5 1692 vpxor %xmm4,%xmm2,%xmm2 1693 vmovdqa -80(%ebx),%xmm1 1694.byte 143,232,120,194,210,12 1695 vmovdqa -16(%ebx),%xmm3 1696 vpaddd %xmm2,%xmm0,%xmm0 1697 vmovdqa 112(%ebx),%xmm7 1698 vpxor %xmm0,%xmm6,%xmm6 1699 vpaddd %xmm3,%xmm1,%xmm1 1700.byte 143,232,120,194,246,8 1701 vmovdqa %xmm0,-96(%ebx) 1702 vpaddd %xmm6,%xmm4,%xmm4 1703 vmovdqa %xmm6,96(%ebx) 1704 vpxor %xmm4,%xmm2,%xmm2 1705 vpxor %xmm1,%xmm7,%xmm7 1706.byte 143,232,120,194,210,7 1707.byte 143,232,120,194,255,16 1708 vmovdqa %xmm2,-32(%ebx) 1709 vpaddd %xmm7,%xmm5,%xmm5 1710 vpxor %xmm5,%xmm3,%xmm3 1711 vmovdqa -128(%ebx),%xmm0 1712.byte 143,232,120,194,219,12 1713 vmovdqa -48(%ebx),%xmm2 1714 vpaddd %xmm3,%xmm1,%xmm1 1715 vpxor %xmm1,%xmm7,%xmm7 1716 vpaddd %xmm2,%xmm0,%xmm0 1717.byte 143,232,120,194,255,8 1718 vmovdqa %xmm1,-80(%ebx) 1719 vpaddd %xmm7,%xmm5,%xmm5 1720 vpxor %xmm5,%xmm3,%xmm3 1721 vpxor %xmm0,%xmm7,%xmm6 1722.byte 143,232,120,194,219,7 1723.byte 143,232,120,194,246,16 1724 vmovdqa %xmm3,-16(%ebx) 1725 vpaddd %xmm6,%xmm4,%xmm4 1726 vpxor %xmm4,%xmm2,%xmm2 1727 vmovdqa -112(%ebx),%xmm1 1728.byte 143,232,120,194,210,12 1729 vmovdqa -32(%ebx),%xmm3 1730 vpaddd %xmm2,%xmm0,%xmm0 1731 vmovdqa 64(%ebx),%xmm7 1732 vpxor %xmm0,%xmm6,%xmm6 1733 vpaddd %xmm3,%xmm1,%xmm1 1734.byte 143,232,120,194,246,8 1735 vmovdqa %xmm0,-128(%ebx) 1736 vpaddd %xmm6,%xmm4,%xmm4 1737 vmovdqa %xmm6,112(%ebx) 1738 vpxor %xmm4,%xmm2,%xmm2 1739 vpxor %xmm1,%xmm7,%xmm7 1740.byte 143,232,120,194,210,7 1741 vmovdqa %xmm4,32(%ebx) 1742.byte 143,232,120,194,255,16 1743 vmovdqa %xmm2,-48(%ebx) 1744 vpaddd %xmm7,%xmm5,%xmm5 1745 vmovdqa (%ebx),%xmm4 1746 vpxor %xmm5,%xmm3,%xmm3 1747 vmovdqa -96(%ebx),%xmm0 1748.byte 143,232,120,194,219,12 1749 vmovdqa -16(%ebx),%xmm2 1750 vpaddd %xmm3,%xmm1,%xmm1 1751 vmovdqa 80(%ebx),%xmm6 1752 vpxor %xmm1,%xmm7,%xmm7 1753 vpaddd %xmm2,%xmm0,%xmm0 1754.byte 143,232,120,194,255,8 1755 vmovdqa %xmm1,-112(%ebx) 1756 vpaddd %xmm7,%xmm5,%xmm5 1757 vmovdqa %xmm7,64(%ebx) 1758 vpxor %xmm5,%xmm3,%xmm3 1759 vpxor %xmm0,%xmm6,%xmm6 1760.byte 143,232,120,194,219,7 1761 vmovdqa %xmm5,48(%ebx) 1762.byte 143,232,120,194,246,16 1763 vmovdqa %xmm3,-32(%ebx) 1764 vpaddd %xmm6,%xmm4,%xmm4 1765 vmovdqa 16(%ebx),%xmm5 1766 vpxor %xmm4,%xmm2,%xmm2 1767 vmovdqa -80(%ebx),%xmm1 1768.byte 143,232,120,194,210,12 1769 vmovdqa -64(%ebx),%xmm3 1770 vpaddd %xmm2,%xmm0,%xmm0 1771 vmovdqa 96(%ebx),%xmm7 1772 vpxor %xmm0,%xmm6,%xmm6 1773 vpaddd %xmm3,%xmm1,%xmm1 1774.byte 143,232,120,194,246,8 1775 vmovdqa %xmm0,-96(%ebx) 1776 vpaddd %xmm6,%xmm4,%xmm4 1777 vmovdqa %xmm6,80(%ebx) 1778 vpxor %xmm4,%xmm2,%xmm2 1779 vpxor %xmm1,%xmm7,%xmm7 1780.byte 143,232,120,194,210,7 1781.byte 143,232,120,194,255,16 1782 vmovdqa %xmm2,-16(%ebx) 1783 vpaddd %xmm7,%xmm5,%xmm5 1784 vpxor %xmm5,%xmm3,%xmm3 1785 vmovdqa -128(%ebx),%xmm0 1786.byte 143,232,120,194,219,12 1787 vpaddd %xmm3,%xmm1,%xmm1 1788 vmovdqa 64(%ebx),%xmm6 1789 vpxor %xmm1,%xmm7,%xmm7 1790.byte 143,232,120,194,255,8 1791 vmovdqa %xmm1,-80(%ebx) 1792 vpaddd %xmm7,%xmm5,%xmm5 1793 vmovdqa %xmm7,96(%ebx) 1794 vpxor %xmm5,%xmm3,%xmm3 1795.byte 143,232,120,194,219,7 1796 decl %edx 1797 jnz .L016loop 1798 vmovdqa %xmm3,-64(%ebx) 1799 vmovdqa %xmm4,(%ebx) 1800 vmovdqa %xmm5,16(%ebx) 1801 vmovdqa %xmm6,64(%ebx) 1802 vmovdqa %xmm7,96(%ebx) 1803 vmovdqa -112(%ebx),%xmm1 1804 vmovdqa -96(%ebx),%xmm2 1805 vmovdqa -80(%ebx),%xmm3 1806 vpaddd -128(%ebp),%xmm0,%xmm0 1807 vpaddd -112(%ebp),%xmm1,%xmm1 1808 vpaddd -96(%ebp),%xmm2,%xmm2 1809 vpaddd -80(%ebp),%xmm3,%xmm3 1810 vpunpckldq %xmm1,%xmm0,%xmm6 1811 vpunpckldq %xmm3,%xmm2,%xmm7 1812 vpunpckhdq %xmm1,%xmm0,%xmm0 1813 vpunpckhdq %xmm3,%xmm2,%xmm2 1814 vpunpcklqdq %xmm7,%xmm6,%xmm1 1815 vpunpckhqdq %xmm7,%xmm6,%xmm6 1816 vpunpcklqdq %xmm2,%xmm0,%xmm7 1817 vpunpckhqdq %xmm2,%xmm0,%xmm3 1818 vpxor -128(%esi),%xmm1,%xmm4 1819 vpxor -64(%esi),%xmm6,%xmm5 1820 vpxor (%esi),%xmm7,%xmm6 1821 vpxor 64(%esi),%xmm3,%xmm7 1822 leal 16(%esi),%esi 1823 vmovdqa -64(%ebx),%xmm0 1824 vmovdqa -48(%ebx),%xmm1 1825 vmovdqa -32(%ebx),%xmm2 1826 vmovdqa -16(%ebx),%xmm3 1827 vmovdqu %xmm4,-128(%edi) 1828 vmovdqu %xmm5,-64(%edi) 1829 vmovdqu %xmm6,(%edi) 1830 vmovdqu %xmm7,64(%edi) 1831 leal 16(%edi),%edi 1832 vpaddd -64(%ebp),%xmm0,%xmm0 1833 vpaddd -48(%ebp),%xmm1,%xmm1 1834 vpaddd -32(%ebp),%xmm2,%xmm2 1835 vpaddd -16(%ebp),%xmm3,%xmm3 1836 vpunpckldq %xmm1,%xmm0,%xmm6 1837 vpunpckldq %xmm3,%xmm2,%xmm7 1838 vpunpckhdq %xmm1,%xmm0,%xmm0 1839 vpunpckhdq %xmm3,%xmm2,%xmm2 1840 vpunpcklqdq %xmm7,%xmm6,%xmm1 1841 vpunpckhqdq %xmm7,%xmm6,%xmm6 1842 vpunpcklqdq %xmm2,%xmm0,%xmm7 1843 vpunpckhqdq %xmm2,%xmm0,%xmm3 1844 vpxor -128(%esi),%xmm1,%xmm4 1845 vpxor -64(%esi),%xmm6,%xmm5 1846 vpxor (%esi),%xmm7,%xmm6 1847 vpxor 64(%esi),%xmm3,%xmm7 1848 leal 16(%esi),%esi 1849 vmovdqa (%ebx),%xmm0 1850 vmovdqa 16(%ebx),%xmm1 1851 vmovdqa 32(%ebx),%xmm2 1852 vmovdqa 48(%ebx),%xmm3 1853 vmovdqu %xmm4,-128(%edi) 1854 vmovdqu %xmm5,-64(%edi) 1855 vmovdqu %xmm6,(%edi) 1856 vmovdqu %xmm7,64(%edi) 1857 leal 16(%edi),%edi 1858 vpaddd (%ebp),%xmm0,%xmm0 1859 vpaddd 16(%ebp),%xmm1,%xmm1 1860 vpaddd 32(%ebp),%xmm2,%xmm2 1861 vpaddd 48(%ebp),%xmm3,%xmm3 1862 vpunpckldq %xmm1,%xmm0,%xmm6 1863 vpunpckldq %xmm3,%xmm2,%xmm7 1864 vpunpckhdq %xmm1,%xmm0,%xmm0 1865 vpunpckhdq %xmm3,%xmm2,%xmm2 1866 vpunpcklqdq %xmm7,%xmm6,%xmm1 1867 vpunpckhqdq %xmm7,%xmm6,%xmm6 1868 vpunpcklqdq %xmm2,%xmm0,%xmm7 1869 vpunpckhqdq %xmm2,%xmm0,%xmm3 1870 vpxor -128(%esi),%xmm1,%xmm4 1871 vpxor -64(%esi),%xmm6,%xmm5 1872 vpxor (%esi),%xmm7,%xmm6 1873 vpxor 64(%esi),%xmm3,%xmm7 1874 leal 16(%esi),%esi 1875 vmovdqa 64(%ebx),%xmm0 1876 vmovdqa 80(%ebx),%xmm1 1877 vmovdqa 96(%ebx),%xmm2 1878 vmovdqa 112(%ebx),%xmm3 1879 vmovdqu %xmm4,-128(%edi) 1880 vmovdqu %xmm5,-64(%edi) 1881 vmovdqu %xmm6,(%edi) 1882 vmovdqu %xmm7,64(%edi) 1883 leal 16(%edi),%edi 1884 vpaddd 64(%ebp),%xmm0,%xmm0 1885 vpaddd 80(%ebp),%xmm1,%xmm1 1886 vpaddd 96(%ebp),%xmm2,%xmm2 1887 vpaddd 112(%ebp),%xmm3,%xmm3 1888 vpunpckldq %xmm1,%xmm0,%xmm6 1889 vpunpckldq %xmm3,%xmm2,%xmm7 1890 vpunpckhdq %xmm1,%xmm0,%xmm0 1891 vpunpckhdq %xmm3,%xmm2,%xmm2 1892 vpunpcklqdq %xmm7,%xmm6,%xmm1 1893 vpunpckhqdq %xmm7,%xmm6,%xmm6 1894 vpunpcklqdq %xmm2,%xmm0,%xmm7 1895 vpunpckhqdq %xmm2,%xmm0,%xmm3 1896 vpxor -128(%esi),%xmm1,%xmm4 1897 vpxor -64(%esi),%xmm6,%xmm5 1898 vpxor (%esi),%xmm7,%xmm6 1899 vpxor 64(%esi),%xmm3,%xmm7 1900 leal 208(%esi),%esi 1901 vmovdqu %xmm4,-128(%edi) 1902 vmovdqu %xmm5,-64(%edi) 1903 vmovdqu %xmm6,(%edi) 1904 vmovdqu %xmm7,64(%edi) 1905 leal 208(%edi),%edi 1906 subl $256,%ecx 1907 jnc .L015outer_loop 1908 addl $256,%ecx 1909 jz .L017done 1910 movl 520(%esp),%ebx 1911 leal -128(%esi),%esi 1912 movl 516(%esp),%edx 1913 leal -128(%edi),%edi 1914 vmovd 64(%ebp),%xmm2 1915 vmovdqu (%ebx),%xmm3 1916 vpaddd 96(%eax),%xmm2,%xmm2 1917 vpand 112(%eax),%xmm3,%xmm3 1918 vpor %xmm2,%xmm3,%xmm3 1919.L0141x: 1920 vmovdqa 32(%eax),%xmm0 1921 vmovdqu (%edx),%xmm1 1922 vmovdqu 16(%edx),%xmm2 1923 vmovdqa (%eax),%xmm6 1924 vmovdqa 16(%eax),%xmm7 1925 movl %ebp,48(%esp) 1926 vmovdqa %xmm0,(%esp) 1927 vmovdqa %xmm1,16(%esp) 1928 vmovdqa %xmm2,32(%esp) 1929 vmovdqa %xmm3,48(%esp) 1930 movl $10,%edx 1931 jmp .L018loop1x 1932.align 16 1933.L019outer1x: 1934 vmovdqa 80(%eax),%xmm3 1935 vmovdqa (%esp),%xmm0 1936 vmovdqa 16(%esp),%xmm1 1937 vmovdqa 32(%esp),%xmm2 1938 vpaddd 48(%esp),%xmm3,%xmm3 1939 movl $10,%edx 1940 vmovdqa %xmm3,48(%esp) 1941 jmp .L018loop1x 1942.align 16 1943.L018loop1x: 1944 vpaddd %xmm1,%xmm0,%xmm0 1945 vpxor %xmm0,%xmm3,%xmm3 1946.byte 143,232,120,194,219,16 1947 vpaddd %xmm3,%xmm2,%xmm2 1948 vpxor %xmm2,%xmm1,%xmm1 1949.byte 143,232,120,194,201,12 1950 vpaddd %xmm1,%xmm0,%xmm0 1951 vpxor %xmm0,%xmm3,%xmm3 1952.byte 143,232,120,194,219,8 1953 vpaddd %xmm3,%xmm2,%xmm2 1954 vpxor %xmm2,%xmm1,%xmm1 1955.byte 143,232,120,194,201,7 1956 vpshufd $78,%xmm2,%xmm2 1957 vpshufd $57,%xmm1,%xmm1 1958 vpshufd $147,%xmm3,%xmm3 1959 vpaddd %xmm1,%xmm0,%xmm0 1960 vpxor %xmm0,%xmm3,%xmm3 1961.byte 143,232,120,194,219,16 1962 vpaddd %xmm3,%xmm2,%xmm2 1963 vpxor %xmm2,%xmm1,%xmm1 1964.byte 143,232,120,194,201,12 1965 vpaddd %xmm1,%xmm0,%xmm0 1966 vpxor %xmm0,%xmm3,%xmm3 1967.byte 143,232,120,194,219,8 1968 vpaddd %xmm3,%xmm2,%xmm2 1969 vpxor %xmm2,%xmm1,%xmm1 1970.byte 143,232,120,194,201,7 1971 vpshufd $78,%xmm2,%xmm2 1972 vpshufd $147,%xmm1,%xmm1 1973 vpshufd $57,%xmm3,%xmm3 1974 decl %edx 1975 jnz .L018loop1x 1976 vpaddd (%esp),%xmm0,%xmm0 1977 vpaddd 16(%esp),%xmm1,%xmm1 1978 vpaddd 32(%esp),%xmm2,%xmm2 1979 vpaddd 48(%esp),%xmm3,%xmm3 1980 cmpl $64,%ecx 1981 jb .L020tail 1982 vpxor (%esi),%xmm0,%xmm0 1983 vpxor 16(%esi),%xmm1,%xmm1 1984 vpxor 32(%esi),%xmm2,%xmm2 1985 vpxor 48(%esi),%xmm3,%xmm3 1986 leal 64(%esi),%esi 1987 vmovdqu %xmm0,(%edi) 1988 vmovdqu %xmm1,16(%edi) 1989 vmovdqu %xmm2,32(%edi) 1990 vmovdqu %xmm3,48(%edi) 1991 leal 64(%edi),%edi 1992 subl $64,%ecx 1993 jnz .L019outer1x 1994 jmp .L017done 1995.L020tail: 1996 vmovdqa %xmm0,(%esp) 1997 vmovdqa %xmm1,16(%esp) 1998 vmovdqa %xmm2,32(%esp) 1999 vmovdqa %xmm3,48(%esp) 2000 xorl %eax,%eax 2001 xorl %edx,%edx 2002 xorl %ebp,%ebp 2003.L021tail_loop: 2004 movb (%esp,%ebp,1),%al 2005 movb (%esi,%ebp,1),%dl 2006 leal 1(%ebp),%ebp 2007 xorb %dl,%al 2008 movb %al,-1(%edi,%ebp,1) 2009 decl %ecx 2010 jnz .L021tail_loop 2011.L017done: 2012 vzeroupper 2013 movl 512(%esp),%esp 2014 popl %edi 2015 popl %esi 2016 popl %ebx 2017 popl %ebp 2018 ret 2019.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 2020.comm OPENSSL_ia32cap_P,16,4 2021#endif 2022