1/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */ 2.text 3 4 5 6.globl rsaz_512_sqr 7.type rsaz_512_sqr,@function 8.align 32 9rsaz_512_sqr: 10.cfi_startproc 11 pushq %rbx 12.cfi_adjust_cfa_offset 8 13.cfi_offset %rbx,-16 14 pushq %rbp 15.cfi_adjust_cfa_offset 8 16.cfi_offset %rbp,-24 17 pushq %r12 18.cfi_adjust_cfa_offset 8 19.cfi_offset %r12,-32 20 pushq %r13 21.cfi_adjust_cfa_offset 8 22.cfi_offset %r13,-40 23 pushq %r14 24.cfi_adjust_cfa_offset 8 25.cfi_offset %r14,-48 26 pushq %r15 27.cfi_adjust_cfa_offset 8 28.cfi_offset %r15,-56 29 30 subq $128+24,%rsp 31.cfi_adjust_cfa_offset 128+24 32.Lsqr_body: 33.byte 102,72,15,110,202 34 movq (%rsi),%rdx 35 movq 8(%rsi),%rax 36 movq %rcx,128(%rsp) 37 movl $0x80100,%r11d 38 andl OPENSSL_ia32cap_P+8(%rip),%r11d 39 cmpl $0x80100,%r11d 40 je .Loop_sqrx 41 jmp .Loop_sqr 42 43.align 32 44.Loop_sqr: 45 movl %r8d,128+8(%rsp) 46 47 movq %rdx,%rbx 48 movq %rax,%rbp 49 mulq %rdx 50 movq %rax,%r8 51 movq 16(%rsi),%rax 52 movq %rdx,%r9 53 54 mulq %rbx 55 addq %rax,%r9 56 movq 24(%rsi),%rax 57 movq %rdx,%r10 58 adcq $0,%r10 59 60 mulq %rbx 61 addq %rax,%r10 62 movq 32(%rsi),%rax 63 movq %rdx,%r11 64 adcq $0,%r11 65 66 mulq %rbx 67 addq %rax,%r11 68 movq 40(%rsi),%rax 69 movq %rdx,%r12 70 adcq $0,%r12 71 72 mulq %rbx 73 addq %rax,%r12 74 movq 48(%rsi),%rax 75 movq %rdx,%r13 76 adcq $0,%r13 77 78 mulq %rbx 79 addq %rax,%r13 80 movq 56(%rsi),%rax 81 movq %rdx,%r14 82 adcq $0,%r14 83 84 mulq %rbx 85 addq %rax,%r14 86 movq %rbx,%rax 87 adcq $0,%rdx 88 89 xorq %rcx,%rcx 90 addq %r8,%r8 91 movq %rdx,%r15 92 adcq $0,%rcx 93 94 mulq %rax 95 addq %r8,%rdx 96 adcq $0,%rcx 97 98 movq %rax,(%rsp) 99 movq %rdx,8(%rsp) 100 101 102 movq 16(%rsi),%rax 103 mulq %rbp 104 addq %rax,%r10 105 movq 24(%rsi),%rax 106 movq %rdx,%rbx 107 adcq $0,%rbx 108 109 mulq %rbp 110 addq %rax,%r11 111 movq 32(%rsi),%rax 112 adcq $0,%rdx 113 addq %rbx,%r11 114 movq %rdx,%rbx 115 adcq $0,%rbx 116 117 mulq %rbp 118 addq %rax,%r12 119 movq 40(%rsi),%rax 120 adcq $0,%rdx 121 addq %rbx,%r12 122 movq %rdx,%rbx 123 adcq $0,%rbx 124 125 mulq %rbp 126 addq %rax,%r13 127 movq 48(%rsi),%rax 128 adcq $0,%rdx 129 addq %rbx,%r13 130 movq %rdx,%rbx 131 adcq $0,%rbx 132 133 mulq %rbp 134 addq %rax,%r14 135 movq 56(%rsi),%rax 136 adcq $0,%rdx 137 addq %rbx,%r14 138 movq %rdx,%rbx 139 adcq $0,%rbx 140 141 mulq %rbp 142 addq %rax,%r15 143 movq %rbp,%rax 144 adcq $0,%rdx 145 addq %rbx,%r15 146 adcq $0,%rdx 147 148 xorq %rbx,%rbx 149 addq %r9,%r9 150 movq %rdx,%r8 151 adcq %r10,%r10 152 adcq $0,%rbx 153 154 mulq %rax 155 156 addq %rcx,%rax 157 movq 16(%rsi),%rbp 158 addq %rax,%r9 159 movq 24(%rsi),%rax 160 adcq %rdx,%r10 161 adcq $0,%rbx 162 163 movq %r9,16(%rsp) 164 movq %r10,24(%rsp) 165 166 167 mulq %rbp 168 addq %rax,%r12 169 movq 32(%rsi),%rax 170 movq %rdx,%rcx 171 adcq $0,%rcx 172 173 mulq %rbp 174 addq %rax,%r13 175 movq 40(%rsi),%rax 176 adcq $0,%rdx 177 addq %rcx,%r13 178 movq %rdx,%rcx 179 adcq $0,%rcx 180 181 mulq %rbp 182 addq %rax,%r14 183 movq 48(%rsi),%rax 184 adcq $0,%rdx 185 addq %rcx,%r14 186 movq %rdx,%rcx 187 adcq $0,%rcx 188 189 mulq %rbp 190 addq %rax,%r15 191 movq 56(%rsi),%rax 192 adcq $0,%rdx 193 addq %rcx,%r15 194 movq %rdx,%rcx 195 adcq $0,%rcx 196 197 mulq %rbp 198 addq %rax,%r8 199 movq %rbp,%rax 200 adcq $0,%rdx 201 addq %rcx,%r8 202 adcq $0,%rdx 203 204 xorq %rcx,%rcx 205 addq %r11,%r11 206 movq %rdx,%r9 207 adcq %r12,%r12 208 adcq $0,%rcx 209 210 mulq %rax 211 212 addq %rbx,%rax 213 movq 24(%rsi),%r10 214 addq %rax,%r11 215 movq 32(%rsi),%rax 216 adcq %rdx,%r12 217 adcq $0,%rcx 218 219 movq %r11,32(%rsp) 220 movq %r12,40(%rsp) 221 222 223 movq %rax,%r11 224 mulq %r10 225 addq %rax,%r14 226 movq 40(%rsi),%rax 227 movq %rdx,%rbx 228 adcq $0,%rbx 229 230 movq %rax,%r12 231 mulq %r10 232 addq %rax,%r15 233 movq 48(%rsi),%rax 234 adcq $0,%rdx 235 addq %rbx,%r15 236 movq %rdx,%rbx 237 adcq $0,%rbx 238 239 movq %rax,%rbp 240 mulq %r10 241 addq %rax,%r8 242 movq 56(%rsi),%rax 243 adcq $0,%rdx 244 addq %rbx,%r8 245 movq %rdx,%rbx 246 adcq $0,%rbx 247 248 mulq %r10 249 addq %rax,%r9 250 movq %r10,%rax 251 adcq $0,%rdx 252 addq %rbx,%r9 253 adcq $0,%rdx 254 255 xorq %rbx,%rbx 256 addq %r13,%r13 257 movq %rdx,%r10 258 adcq %r14,%r14 259 adcq $0,%rbx 260 261 mulq %rax 262 263 addq %rcx,%rax 264 addq %rax,%r13 265 movq %r12,%rax 266 adcq %rdx,%r14 267 adcq $0,%rbx 268 269 movq %r13,48(%rsp) 270 movq %r14,56(%rsp) 271 272 273 mulq %r11 274 addq %rax,%r8 275 movq %rbp,%rax 276 movq %rdx,%rcx 277 adcq $0,%rcx 278 279 mulq %r11 280 addq %rax,%r9 281 movq 56(%rsi),%rax 282 adcq $0,%rdx 283 addq %rcx,%r9 284 movq %rdx,%rcx 285 adcq $0,%rcx 286 287 movq %rax,%r14 288 mulq %r11 289 addq %rax,%r10 290 movq %r11,%rax 291 adcq $0,%rdx 292 addq %rcx,%r10 293 adcq $0,%rdx 294 295 xorq %rcx,%rcx 296 addq %r15,%r15 297 movq %rdx,%r11 298 adcq %r8,%r8 299 adcq $0,%rcx 300 301 mulq %rax 302 303 addq %rbx,%rax 304 addq %rax,%r15 305 movq %rbp,%rax 306 adcq %rdx,%r8 307 adcq $0,%rcx 308 309 movq %r15,64(%rsp) 310 movq %r8,72(%rsp) 311 312 313 mulq %r12 314 addq %rax,%r10 315 movq %r14,%rax 316 movq %rdx,%rbx 317 adcq $0,%rbx 318 319 mulq %r12 320 addq %rax,%r11 321 movq %r12,%rax 322 adcq $0,%rdx 323 addq %rbx,%r11 324 adcq $0,%rdx 325 326 xorq %rbx,%rbx 327 addq %r9,%r9 328 movq %rdx,%r12 329 adcq %r10,%r10 330 adcq $0,%rbx 331 332 mulq %rax 333 334 addq %rcx,%rax 335 addq %rax,%r9 336 movq %r14,%rax 337 adcq %rdx,%r10 338 adcq $0,%rbx 339 340 movq %r9,80(%rsp) 341 movq %r10,88(%rsp) 342 343 344 mulq %rbp 345 addq %rax,%r12 346 movq %rbp,%rax 347 adcq $0,%rdx 348 349 xorq %rcx,%rcx 350 addq %r11,%r11 351 movq %rdx,%r13 352 adcq %r12,%r12 353 adcq $0,%rcx 354 355 mulq %rax 356 357 addq %rbx,%rax 358 addq %rax,%r11 359 movq %r14,%rax 360 adcq %rdx,%r12 361 adcq $0,%rcx 362 363 movq %r11,96(%rsp) 364 movq %r12,104(%rsp) 365 366 367 xorq %rbx,%rbx 368 addq %r13,%r13 369 adcq $0,%rbx 370 371 mulq %rax 372 373 addq %rcx,%rax 374 addq %r13,%rax 375 adcq %rbx,%rdx 376 377 movq (%rsp),%r8 378 movq 8(%rsp),%r9 379 movq 16(%rsp),%r10 380 movq 24(%rsp),%r11 381 movq 32(%rsp),%r12 382 movq 40(%rsp),%r13 383 movq 48(%rsp),%r14 384 movq 56(%rsp),%r15 385.byte 102,72,15,126,205 386 387 movq %rax,112(%rsp) 388 movq %rdx,120(%rsp) 389 390 call __rsaz_512_reduce 391 392 addq 64(%rsp),%r8 393 adcq 72(%rsp),%r9 394 adcq 80(%rsp),%r10 395 adcq 88(%rsp),%r11 396 adcq 96(%rsp),%r12 397 adcq 104(%rsp),%r13 398 adcq 112(%rsp),%r14 399 adcq 120(%rsp),%r15 400 sbbq %rcx,%rcx 401 402 call __rsaz_512_subtract 403 404 movq %r8,%rdx 405 movq %r9,%rax 406 movl 128+8(%rsp),%r8d 407 movq %rdi,%rsi 408 409 decl %r8d 410 jnz .Loop_sqr 411 jmp .Lsqr_tail 412 413.align 32 414.Loop_sqrx: 415 movl %r8d,128+8(%rsp) 416.byte 102,72,15,110,199 417 418 mulxq %rax,%r8,%r9 419 movq %rax,%rbx 420 421 mulxq 16(%rsi),%rcx,%r10 422 xorq %rbp,%rbp 423 424 mulxq 24(%rsi),%rax,%r11 425 adcxq %rcx,%r9 426 427.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 428 adcxq %rax,%r10 429 430.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 431 adcxq %rcx,%r11 432 433 mulxq 48(%rsi),%rcx,%r14 434 adcxq %rax,%r12 435 adcxq %rcx,%r13 436 437 mulxq 56(%rsi),%rax,%r15 438 adcxq %rax,%r14 439 adcxq %rbp,%r15 440 441 mulxq %rdx,%rax,%rdi 442 movq %rbx,%rdx 443 xorq %rcx,%rcx 444 adoxq %r8,%r8 445 adcxq %rdi,%r8 446 adoxq %rbp,%rcx 447 adcxq %rbp,%rcx 448 449 movq %rax,(%rsp) 450 movq %r8,8(%rsp) 451 452 453.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 454 adoxq %rax,%r10 455 adcxq %rbx,%r11 456 457 mulxq 24(%rsi),%rdi,%r8 458 adoxq %rdi,%r11 459.byte 0x66 460 adcxq %r8,%r12 461 462 mulxq 32(%rsi),%rax,%rbx 463 adoxq %rax,%r12 464 adcxq %rbx,%r13 465 466 mulxq 40(%rsi),%rdi,%r8 467 adoxq %rdi,%r13 468 adcxq %r8,%r14 469 470.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 471 adoxq %rax,%r14 472 adcxq %rbx,%r15 473 474.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 475 adoxq %rdi,%r15 476 adcxq %rbp,%r8 477 mulxq %rdx,%rax,%rdi 478 adoxq %rbp,%r8 479.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 480 481 xorq %rbx,%rbx 482 adoxq %r9,%r9 483 484 adcxq %rcx,%rax 485 adoxq %r10,%r10 486 adcxq %rax,%r9 487 adoxq %rbp,%rbx 488 adcxq %rdi,%r10 489 adcxq %rbp,%rbx 490 491 movq %r9,16(%rsp) 492.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 493 494 495 mulxq 24(%rsi),%rdi,%r9 496 adoxq %rdi,%r12 497 adcxq %r9,%r13 498 499 mulxq 32(%rsi),%rax,%rcx 500 adoxq %rax,%r13 501 adcxq %rcx,%r14 502 503.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 504 adoxq %rdi,%r14 505 adcxq %r9,%r15 506 507.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 508 adoxq %rax,%r15 509 adcxq %rcx,%r8 510 511 mulxq 56(%rsi),%rdi,%r9 512 adoxq %rdi,%r8 513 adcxq %rbp,%r9 514 mulxq %rdx,%rax,%rdi 515 adoxq %rbp,%r9 516 movq 24(%rsi),%rdx 517 518 xorq %rcx,%rcx 519 adoxq %r11,%r11 520 521 adcxq %rbx,%rax 522 adoxq %r12,%r12 523 adcxq %rax,%r11 524 adoxq %rbp,%rcx 525 adcxq %rdi,%r12 526 adcxq %rbp,%rcx 527 528 movq %r11,32(%rsp) 529 movq %r12,40(%rsp) 530 531 532 mulxq 32(%rsi),%rax,%rbx 533 adoxq %rax,%r14 534 adcxq %rbx,%r15 535 536 mulxq 40(%rsi),%rdi,%r10 537 adoxq %rdi,%r15 538 adcxq %r10,%r8 539 540 mulxq 48(%rsi),%rax,%rbx 541 adoxq %rax,%r8 542 adcxq %rbx,%r9 543 544 mulxq 56(%rsi),%rdi,%r10 545 adoxq %rdi,%r9 546 adcxq %rbp,%r10 547 mulxq %rdx,%rax,%rdi 548 adoxq %rbp,%r10 549 movq 32(%rsi),%rdx 550 551 xorq %rbx,%rbx 552 adoxq %r13,%r13 553 554 adcxq %rcx,%rax 555 adoxq %r14,%r14 556 adcxq %rax,%r13 557 adoxq %rbp,%rbx 558 adcxq %rdi,%r14 559 adcxq %rbp,%rbx 560 561 movq %r13,48(%rsp) 562 movq %r14,56(%rsp) 563 564 565 mulxq 40(%rsi),%rdi,%r11 566 adoxq %rdi,%r8 567 adcxq %r11,%r9 568 569 mulxq 48(%rsi),%rax,%rcx 570 adoxq %rax,%r9 571 adcxq %rcx,%r10 572 573 mulxq 56(%rsi),%rdi,%r11 574 adoxq %rdi,%r10 575 adcxq %rbp,%r11 576 mulxq %rdx,%rax,%rdi 577 movq 40(%rsi),%rdx 578 adoxq %rbp,%r11 579 580 xorq %rcx,%rcx 581 adoxq %r15,%r15 582 583 adcxq %rbx,%rax 584 adoxq %r8,%r8 585 adcxq %rax,%r15 586 adoxq %rbp,%rcx 587 adcxq %rdi,%r8 588 adcxq %rbp,%rcx 589 590 movq %r15,64(%rsp) 591 movq %r8,72(%rsp) 592 593 594.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 595 adoxq %rax,%r10 596 adcxq %rbx,%r11 597 598.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 599 adoxq %rdi,%r11 600 adcxq %rbp,%r12 601 mulxq %rdx,%rax,%rdi 602 adoxq %rbp,%r12 603 movq 48(%rsi),%rdx 604 605 xorq %rbx,%rbx 606 adoxq %r9,%r9 607 608 adcxq %rcx,%rax 609 adoxq %r10,%r10 610 adcxq %rax,%r9 611 adcxq %rdi,%r10 612 adoxq %rbp,%rbx 613 adcxq %rbp,%rbx 614 615 movq %r9,80(%rsp) 616 movq %r10,88(%rsp) 617 618 619.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 620 adoxq %rax,%r12 621 adoxq %rbp,%r13 622 623 mulxq %rdx,%rax,%rdi 624 xorq %rcx,%rcx 625 movq 56(%rsi),%rdx 626 adoxq %r11,%r11 627 628 adcxq %rbx,%rax 629 adoxq %r12,%r12 630 adcxq %rax,%r11 631 adoxq %rbp,%rcx 632 adcxq %rdi,%r12 633 adcxq %rbp,%rcx 634 635.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 636.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 637 638 639 mulxq %rdx,%rax,%rdx 640 xorq %rbx,%rbx 641 adoxq %r13,%r13 642 643 adcxq %rcx,%rax 644 adoxq %rbp,%rbx 645 adcxq %r13,%rax 646 adcxq %rdx,%rbx 647 648.byte 102,72,15,126,199 649.byte 102,72,15,126,205 650 651 movq 128(%rsp),%rdx 652 movq (%rsp),%r8 653 movq 8(%rsp),%r9 654 movq 16(%rsp),%r10 655 movq 24(%rsp),%r11 656 movq 32(%rsp),%r12 657 movq 40(%rsp),%r13 658 movq 48(%rsp),%r14 659 movq 56(%rsp),%r15 660 661 movq %rax,112(%rsp) 662 movq %rbx,120(%rsp) 663 664 call __rsaz_512_reducex 665 666 addq 64(%rsp),%r8 667 adcq 72(%rsp),%r9 668 adcq 80(%rsp),%r10 669 adcq 88(%rsp),%r11 670 adcq 96(%rsp),%r12 671 adcq 104(%rsp),%r13 672 adcq 112(%rsp),%r14 673 adcq 120(%rsp),%r15 674 sbbq %rcx,%rcx 675 676 call __rsaz_512_subtract 677 678 movq %r8,%rdx 679 movq %r9,%rax 680 movl 128+8(%rsp),%r8d 681 movq %rdi,%rsi 682 683 decl %r8d 684 jnz .Loop_sqrx 685 686.Lsqr_tail: 687 688 leaq 128+24+48(%rsp),%rax 689.cfi_def_cfa %rax,8 690 movq -48(%rax),%r15 691.cfi_restore %r15 692 movq -40(%rax),%r14 693.cfi_restore %r14 694 movq -32(%rax),%r13 695.cfi_restore %r13 696 movq -24(%rax),%r12 697.cfi_restore %r12 698 movq -16(%rax),%rbp 699.cfi_restore %rbp 700 movq -8(%rax),%rbx 701.cfi_restore %rbx 702 leaq (%rax),%rsp 703.cfi_def_cfa_register %rsp 704.Lsqr_epilogue: 705 .byte 0xf3,0xc3 706.cfi_endproc 707.size rsaz_512_sqr,.-rsaz_512_sqr 708.globl rsaz_512_mul 709.type rsaz_512_mul,@function 710.align 32 711rsaz_512_mul: 712.cfi_startproc 713 pushq %rbx 714.cfi_adjust_cfa_offset 8 715.cfi_offset %rbx,-16 716 pushq %rbp 717.cfi_adjust_cfa_offset 8 718.cfi_offset %rbp,-24 719 pushq %r12 720.cfi_adjust_cfa_offset 8 721.cfi_offset %r12,-32 722 pushq %r13 723.cfi_adjust_cfa_offset 8 724.cfi_offset %r13,-40 725 pushq %r14 726.cfi_adjust_cfa_offset 8 727.cfi_offset %r14,-48 728 pushq %r15 729.cfi_adjust_cfa_offset 8 730.cfi_offset %r15,-56 731 732 subq $128+24,%rsp 733.cfi_adjust_cfa_offset 128+24 734.Lmul_body: 735.byte 102,72,15,110,199 736.byte 102,72,15,110,201 737 movq %r8,128(%rsp) 738 movl $0x80100,%r11d 739 andl OPENSSL_ia32cap_P+8(%rip),%r11d 740 cmpl $0x80100,%r11d 741 je .Lmulx 742 movq (%rdx),%rbx 743 movq %rdx,%rbp 744 call __rsaz_512_mul 745 746.byte 102,72,15,126,199 747.byte 102,72,15,126,205 748 749 movq (%rsp),%r8 750 movq 8(%rsp),%r9 751 movq 16(%rsp),%r10 752 movq 24(%rsp),%r11 753 movq 32(%rsp),%r12 754 movq 40(%rsp),%r13 755 movq 48(%rsp),%r14 756 movq 56(%rsp),%r15 757 758 call __rsaz_512_reduce 759 jmp .Lmul_tail 760 761.align 32 762.Lmulx: 763 movq %rdx,%rbp 764 movq (%rdx),%rdx 765 call __rsaz_512_mulx 766 767.byte 102,72,15,126,199 768.byte 102,72,15,126,205 769 770 movq 128(%rsp),%rdx 771 movq (%rsp),%r8 772 movq 8(%rsp),%r9 773 movq 16(%rsp),%r10 774 movq 24(%rsp),%r11 775 movq 32(%rsp),%r12 776 movq 40(%rsp),%r13 777 movq 48(%rsp),%r14 778 movq 56(%rsp),%r15 779 780 call __rsaz_512_reducex 781.Lmul_tail: 782 addq 64(%rsp),%r8 783 adcq 72(%rsp),%r9 784 adcq 80(%rsp),%r10 785 adcq 88(%rsp),%r11 786 adcq 96(%rsp),%r12 787 adcq 104(%rsp),%r13 788 adcq 112(%rsp),%r14 789 adcq 120(%rsp),%r15 790 sbbq %rcx,%rcx 791 792 call __rsaz_512_subtract 793 794 leaq 128+24+48(%rsp),%rax 795.cfi_def_cfa %rax,8 796 movq -48(%rax),%r15 797.cfi_restore %r15 798 movq -40(%rax),%r14 799.cfi_restore %r14 800 movq -32(%rax),%r13 801.cfi_restore %r13 802 movq -24(%rax),%r12 803.cfi_restore %r12 804 movq -16(%rax),%rbp 805.cfi_restore %rbp 806 movq -8(%rax),%rbx 807.cfi_restore %rbx 808 leaq (%rax),%rsp 809.cfi_def_cfa_register %rsp 810.Lmul_epilogue: 811 .byte 0xf3,0xc3 812.cfi_endproc 813.size rsaz_512_mul,.-rsaz_512_mul 814.globl rsaz_512_mul_gather4 815.type rsaz_512_mul_gather4,@function 816.align 32 817rsaz_512_mul_gather4: 818.cfi_startproc 819 pushq %rbx 820.cfi_adjust_cfa_offset 8 821.cfi_offset %rbx,-16 822 pushq %rbp 823.cfi_adjust_cfa_offset 8 824.cfi_offset %rbp,-24 825 pushq %r12 826.cfi_adjust_cfa_offset 8 827.cfi_offset %r12,-32 828 pushq %r13 829.cfi_adjust_cfa_offset 8 830.cfi_offset %r13,-40 831 pushq %r14 832.cfi_adjust_cfa_offset 8 833.cfi_offset %r14,-48 834 pushq %r15 835.cfi_adjust_cfa_offset 8 836.cfi_offset %r15,-56 837 838 subq $152,%rsp 839.cfi_adjust_cfa_offset 152 840.Lmul_gather4_body: 841 movd %r9d,%xmm8 842 movdqa .Linc+16(%rip),%xmm1 843 movdqa .Linc(%rip),%xmm0 844 845 pshufd $0,%xmm8,%xmm8 846 movdqa %xmm1,%xmm7 847 movdqa %xmm1,%xmm2 848 paddd %xmm0,%xmm1 849 pcmpeqd %xmm8,%xmm0 850 movdqa %xmm7,%xmm3 851 paddd %xmm1,%xmm2 852 pcmpeqd %xmm8,%xmm1 853 movdqa %xmm7,%xmm4 854 paddd %xmm2,%xmm3 855 pcmpeqd %xmm8,%xmm2 856 movdqa %xmm7,%xmm5 857 paddd %xmm3,%xmm4 858 pcmpeqd %xmm8,%xmm3 859 movdqa %xmm7,%xmm6 860 paddd %xmm4,%xmm5 861 pcmpeqd %xmm8,%xmm4 862 paddd %xmm5,%xmm6 863 pcmpeqd %xmm8,%xmm5 864 paddd %xmm6,%xmm7 865 pcmpeqd %xmm8,%xmm6 866 pcmpeqd %xmm8,%xmm7 867 868 movdqa 0(%rdx),%xmm8 869 movdqa 16(%rdx),%xmm9 870 movdqa 32(%rdx),%xmm10 871 movdqa 48(%rdx),%xmm11 872 pand %xmm0,%xmm8 873 movdqa 64(%rdx),%xmm12 874 pand %xmm1,%xmm9 875 movdqa 80(%rdx),%xmm13 876 pand %xmm2,%xmm10 877 movdqa 96(%rdx),%xmm14 878 pand %xmm3,%xmm11 879 movdqa 112(%rdx),%xmm15 880 leaq 128(%rdx),%rbp 881 pand %xmm4,%xmm12 882 pand %xmm5,%xmm13 883 pand %xmm6,%xmm14 884 pand %xmm7,%xmm15 885 por %xmm10,%xmm8 886 por %xmm11,%xmm9 887 por %xmm12,%xmm8 888 por %xmm13,%xmm9 889 por %xmm14,%xmm8 890 por %xmm15,%xmm9 891 892 por %xmm9,%xmm8 893 pshufd $0x4e,%xmm8,%xmm9 894 por %xmm9,%xmm8 895 movl $0x80100,%r11d 896 andl OPENSSL_ia32cap_P+8(%rip),%r11d 897 cmpl $0x80100,%r11d 898 je .Lmulx_gather 899.byte 102,76,15,126,195 900 901 movq %r8,128(%rsp) 902 movq %rdi,128+8(%rsp) 903 movq %rcx,128+16(%rsp) 904 905 movq (%rsi),%rax 906 movq 8(%rsi),%rcx 907 mulq %rbx 908 movq %rax,(%rsp) 909 movq %rcx,%rax 910 movq %rdx,%r8 911 912 mulq %rbx 913 addq %rax,%r8 914 movq 16(%rsi),%rax 915 movq %rdx,%r9 916 adcq $0,%r9 917 918 mulq %rbx 919 addq %rax,%r9 920 movq 24(%rsi),%rax 921 movq %rdx,%r10 922 adcq $0,%r10 923 924 mulq %rbx 925 addq %rax,%r10 926 movq 32(%rsi),%rax 927 movq %rdx,%r11 928 adcq $0,%r11 929 930 mulq %rbx 931 addq %rax,%r11 932 movq 40(%rsi),%rax 933 movq %rdx,%r12 934 adcq $0,%r12 935 936 mulq %rbx 937 addq %rax,%r12 938 movq 48(%rsi),%rax 939 movq %rdx,%r13 940 adcq $0,%r13 941 942 mulq %rbx 943 addq %rax,%r13 944 movq 56(%rsi),%rax 945 movq %rdx,%r14 946 adcq $0,%r14 947 948 mulq %rbx 949 addq %rax,%r14 950 movq (%rsi),%rax 951 movq %rdx,%r15 952 adcq $0,%r15 953 954 leaq 8(%rsp),%rdi 955 movl $7,%ecx 956 jmp .Loop_mul_gather 957 958.align 32 959.Loop_mul_gather: 960 movdqa 0(%rbp),%xmm8 961 movdqa 16(%rbp),%xmm9 962 movdqa 32(%rbp),%xmm10 963 movdqa 48(%rbp),%xmm11 964 pand %xmm0,%xmm8 965 movdqa 64(%rbp),%xmm12 966 pand %xmm1,%xmm9 967 movdqa 80(%rbp),%xmm13 968 pand %xmm2,%xmm10 969 movdqa 96(%rbp),%xmm14 970 pand %xmm3,%xmm11 971 movdqa 112(%rbp),%xmm15 972 leaq 128(%rbp),%rbp 973 pand %xmm4,%xmm12 974 pand %xmm5,%xmm13 975 pand %xmm6,%xmm14 976 pand %xmm7,%xmm15 977 por %xmm10,%xmm8 978 por %xmm11,%xmm9 979 por %xmm12,%xmm8 980 por %xmm13,%xmm9 981 por %xmm14,%xmm8 982 por %xmm15,%xmm9 983 984 por %xmm9,%xmm8 985 pshufd $0x4e,%xmm8,%xmm9 986 por %xmm9,%xmm8 987.byte 102,76,15,126,195 988 989 mulq %rbx 990 addq %rax,%r8 991 movq 8(%rsi),%rax 992 movq %r8,(%rdi) 993 movq %rdx,%r8 994 adcq $0,%r8 995 996 mulq %rbx 997 addq %rax,%r9 998 movq 16(%rsi),%rax 999 adcq $0,%rdx 1000 addq %r9,%r8 1001 movq %rdx,%r9 1002 adcq $0,%r9 1003 1004 mulq %rbx 1005 addq %rax,%r10 1006 movq 24(%rsi),%rax 1007 adcq $0,%rdx 1008 addq %r10,%r9 1009 movq %rdx,%r10 1010 adcq $0,%r10 1011 1012 mulq %rbx 1013 addq %rax,%r11 1014 movq 32(%rsi),%rax 1015 adcq $0,%rdx 1016 addq %r11,%r10 1017 movq %rdx,%r11 1018 adcq $0,%r11 1019 1020 mulq %rbx 1021 addq %rax,%r12 1022 movq 40(%rsi),%rax 1023 adcq $0,%rdx 1024 addq %r12,%r11 1025 movq %rdx,%r12 1026 adcq $0,%r12 1027 1028 mulq %rbx 1029 addq %rax,%r13 1030 movq 48(%rsi),%rax 1031 adcq $0,%rdx 1032 addq %r13,%r12 1033 movq %rdx,%r13 1034 adcq $0,%r13 1035 1036 mulq %rbx 1037 addq %rax,%r14 1038 movq 56(%rsi),%rax 1039 adcq $0,%rdx 1040 addq %r14,%r13 1041 movq %rdx,%r14 1042 adcq $0,%r14 1043 1044 mulq %rbx 1045 addq %rax,%r15 1046 movq (%rsi),%rax 1047 adcq $0,%rdx 1048 addq %r15,%r14 1049 movq %rdx,%r15 1050 adcq $0,%r15 1051 1052 leaq 8(%rdi),%rdi 1053 1054 decl %ecx 1055 jnz .Loop_mul_gather 1056 1057 movq %r8,(%rdi) 1058 movq %r9,8(%rdi) 1059 movq %r10,16(%rdi) 1060 movq %r11,24(%rdi) 1061 movq %r12,32(%rdi) 1062 movq %r13,40(%rdi) 1063 movq %r14,48(%rdi) 1064 movq %r15,56(%rdi) 1065 1066 movq 128+8(%rsp),%rdi 1067 movq 128+16(%rsp),%rbp 1068 1069 movq (%rsp),%r8 1070 movq 8(%rsp),%r9 1071 movq 16(%rsp),%r10 1072 movq 24(%rsp),%r11 1073 movq 32(%rsp),%r12 1074 movq 40(%rsp),%r13 1075 movq 48(%rsp),%r14 1076 movq 56(%rsp),%r15 1077 1078 call __rsaz_512_reduce 1079 jmp .Lmul_gather_tail 1080 1081.align 32 1082.Lmulx_gather: 1083.byte 102,76,15,126,194 1084 1085 movq %r8,128(%rsp) 1086 movq %rdi,128+8(%rsp) 1087 movq %rcx,128+16(%rsp) 1088 1089 mulxq (%rsi),%rbx,%r8 1090 movq %rbx,(%rsp) 1091 xorl %edi,%edi 1092 1093 mulxq 8(%rsi),%rax,%r9 1094 1095 mulxq 16(%rsi),%rbx,%r10 1096 adcxq %rax,%r8 1097 1098 mulxq 24(%rsi),%rax,%r11 1099 adcxq %rbx,%r9 1100 1101 mulxq 32(%rsi),%rbx,%r12 1102 adcxq %rax,%r10 1103 1104 mulxq 40(%rsi),%rax,%r13 1105 adcxq %rbx,%r11 1106 1107 mulxq 48(%rsi),%rbx,%r14 1108 adcxq %rax,%r12 1109 1110 mulxq 56(%rsi),%rax,%r15 1111 adcxq %rbx,%r13 1112 adcxq %rax,%r14 1113.byte 0x67 1114 movq %r8,%rbx 1115 adcxq %rdi,%r15 1116 1117 movq $-7,%rcx 1118 jmp .Loop_mulx_gather 1119 1120.align 32 1121.Loop_mulx_gather: 1122 movdqa 0(%rbp),%xmm8 1123 movdqa 16(%rbp),%xmm9 1124 movdqa 32(%rbp),%xmm10 1125 movdqa 48(%rbp),%xmm11 1126 pand %xmm0,%xmm8 1127 movdqa 64(%rbp),%xmm12 1128 pand %xmm1,%xmm9 1129 movdqa 80(%rbp),%xmm13 1130 pand %xmm2,%xmm10 1131 movdqa 96(%rbp),%xmm14 1132 pand %xmm3,%xmm11 1133 movdqa 112(%rbp),%xmm15 1134 leaq 128(%rbp),%rbp 1135 pand %xmm4,%xmm12 1136 pand %xmm5,%xmm13 1137 pand %xmm6,%xmm14 1138 pand %xmm7,%xmm15 1139 por %xmm10,%xmm8 1140 por %xmm11,%xmm9 1141 por %xmm12,%xmm8 1142 por %xmm13,%xmm9 1143 por %xmm14,%xmm8 1144 por %xmm15,%xmm9 1145 1146 por %xmm9,%xmm8 1147 pshufd $0x4e,%xmm8,%xmm9 1148 por %xmm9,%xmm8 1149.byte 102,76,15,126,194 1150 1151.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 1152 adcxq %rax,%rbx 1153 adoxq %r9,%r8 1154 1155 mulxq 8(%rsi),%rax,%r9 1156 adcxq %rax,%r8 1157 adoxq %r10,%r9 1158 1159 mulxq 16(%rsi),%rax,%r10 1160 adcxq %rax,%r9 1161 adoxq %r11,%r10 1162 1163.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 1164 adcxq %rax,%r10 1165 adoxq %r12,%r11 1166 1167 mulxq 32(%rsi),%rax,%r12 1168 adcxq %rax,%r11 1169 adoxq %r13,%r12 1170 1171 mulxq 40(%rsi),%rax,%r13 1172 adcxq %rax,%r12 1173 adoxq %r14,%r13 1174 1175.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1176 adcxq %rax,%r13 1177.byte 0x67 1178 adoxq %r15,%r14 1179 1180 mulxq 56(%rsi),%rax,%r15 1181 movq %rbx,64(%rsp,%rcx,8) 1182 adcxq %rax,%r14 1183 adoxq %rdi,%r15 1184 movq %r8,%rbx 1185 adcxq %rdi,%r15 1186 1187 incq %rcx 1188 jnz .Loop_mulx_gather 1189 1190 movq %r8,64(%rsp) 1191 movq %r9,64+8(%rsp) 1192 movq %r10,64+16(%rsp) 1193 movq %r11,64+24(%rsp) 1194 movq %r12,64+32(%rsp) 1195 movq %r13,64+40(%rsp) 1196 movq %r14,64+48(%rsp) 1197 movq %r15,64+56(%rsp) 1198 1199 movq 128(%rsp),%rdx 1200 movq 128+8(%rsp),%rdi 1201 movq 128+16(%rsp),%rbp 1202 1203 movq (%rsp),%r8 1204 movq 8(%rsp),%r9 1205 movq 16(%rsp),%r10 1206 movq 24(%rsp),%r11 1207 movq 32(%rsp),%r12 1208 movq 40(%rsp),%r13 1209 movq 48(%rsp),%r14 1210 movq 56(%rsp),%r15 1211 1212 call __rsaz_512_reducex 1213 1214.Lmul_gather_tail: 1215 addq 64(%rsp),%r8 1216 adcq 72(%rsp),%r9 1217 adcq 80(%rsp),%r10 1218 adcq 88(%rsp),%r11 1219 adcq 96(%rsp),%r12 1220 adcq 104(%rsp),%r13 1221 adcq 112(%rsp),%r14 1222 adcq 120(%rsp),%r15 1223 sbbq %rcx,%rcx 1224 1225 call __rsaz_512_subtract 1226 1227 leaq 128+24+48(%rsp),%rax 1228.cfi_def_cfa %rax,8 1229 movq -48(%rax),%r15 1230.cfi_restore %r15 1231 movq -40(%rax),%r14 1232.cfi_restore %r14 1233 movq -32(%rax),%r13 1234.cfi_restore %r13 1235 movq -24(%rax),%r12 1236.cfi_restore %r12 1237 movq -16(%rax),%rbp 1238.cfi_restore %rbp 1239 movq -8(%rax),%rbx 1240.cfi_restore %rbx 1241 leaq (%rax),%rsp 1242.cfi_def_cfa_register %rsp 1243.Lmul_gather4_epilogue: 1244 .byte 0xf3,0xc3 1245.cfi_endproc 1246.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 1247.globl rsaz_512_mul_scatter4 1248.type rsaz_512_mul_scatter4,@function 1249.align 32 1250rsaz_512_mul_scatter4: 1251.cfi_startproc 1252 pushq %rbx 1253.cfi_adjust_cfa_offset 8 1254.cfi_offset %rbx,-16 1255 pushq %rbp 1256.cfi_adjust_cfa_offset 8 1257.cfi_offset %rbp,-24 1258 pushq %r12 1259.cfi_adjust_cfa_offset 8 1260.cfi_offset %r12,-32 1261 pushq %r13 1262.cfi_adjust_cfa_offset 8 1263.cfi_offset %r13,-40 1264 pushq %r14 1265.cfi_adjust_cfa_offset 8 1266.cfi_offset %r14,-48 1267 pushq %r15 1268.cfi_adjust_cfa_offset 8 1269.cfi_offset %r15,-56 1270 1271 movl %r9d,%r9d 1272 subq $128+24,%rsp 1273.cfi_adjust_cfa_offset 128+24 1274.Lmul_scatter4_body: 1275 leaq (%r8,%r9,8),%r8 1276.byte 102,72,15,110,199 1277.byte 102,72,15,110,202 1278.byte 102,73,15,110,208 1279 movq %rcx,128(%rsp) 1280 1281 movq %rdi,%rbp 1282 movl $0x80100,%r11d 1283 andl OPENSSL_ia32cap_P+8(%rip),%r11d 1284 cmpl $0x80100,%r11d 1285 je .Lmulx_scatter 1286 movq (%rdi),%rbx 1287 call __rsaz_512_mul 1288 1289.byte 102,72,15,126,199 1290.byte 102,72,15,126,205 1291 1292 movq (%rsp),%r8 1293 movq 8(%rsp),%r9 1294 movq 16(%rsp),%r10 1295 movq 24(%rsp),%r11 1296 movq 32(%rsp),%r12 1297 movq 40(%rsp),%r13 1298 movq 48(%rsp),%r14 1299 movq 56(%rsp),%r15 1300 1301 call __rsaz_512_reduce 1302 jmp .Lmul_scatter_tail 1303 1304.align 32 1305.Lmulx_scatter: 1306 movq (%rdi),%rdx 1307 call __rsaz_512_mulx 1308 1309.byte 102,72,15,126,199 1310.byte 102,72,15,126,205 1311 1312 movq 128(%rsp),%rdx 1313 movq (%rsp),%r8 1314 movq 8(%rsp),%r9 1315 movq 16(%rsp),%r10 1316 movq 24(%rsp),%r11 1317 movq 32(%rsp),%r12 1318 movq 40(%rsp),%r13 1319 movq 48(%rsp),%r14 1320 movq 56(%rsp),%r15 1321 1322 call __rsaz_512_reducex 1323 1324.Lmul_scatter_tail: 1325 addq 64(%rsp),%r8 1326 adcq 72(%rsp),%r9 1327 adcq 80(%rsp),%r10 1328 adcq 88(%rsp),%r11 1329 adcq 96(%rsp),%r12 1330 adcq 104(%rsp),%r13 1331 adcq 112(%rsp),%r14 1332 adcq 120(%rsp),%r15 1333.byte 102,72,15,126,214 1334 sbbq %rcx,%rcx 1335 1336 call __rsaz_512_subtract 1337 1338 movq %r8,0(%rsi) 1339 movq %r9,128(%rsi) 1340 movq %r10,256(%rsi) 1341 movq %r11,384(%rsi) 1342 movq %r12,512(%rsi) 1343 movq %r13,640(%rsi) 1344 movq %r14,768(%rsi) 1345 movq %r15,896(%rsi) 1346 1347 leaq 128+24+48(%rsp),%rax 1348.cfi_def_cfa %rax,8 1349 movq -48(%rax),%r15 1350.cfi_restore %r15 1351 movq -40(%rax),%r14 1352.cfi_restore %r14 1353 movq -32(%rax),%r13 1354.cfi_restore %r13 1355 movq -24(%rax),%r12 1356.cfi_restore %r12 1357 movq -16(%rax),%rbp 1358.cfi_restore %rbp 1359 movq -8(%rax),%rbx 1360.cfi_restore %rbx 1361 leaq (%rax),%rsp 1362.cfi_def_cfa_register %rsp 1363.Lmul_scatter4_epilogue: 1364 .byte 0xf3,0xc3 1365.cfi_endproc 1366.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 1367.globl rsaz_512_mul_by_one 1368.type rsaz_512_mul_by_one,@function 1369.align 32 1370rsaz_512_mul_by_one: 1371.cfi_startproc 1372 pushq %rbx 1373.cfi_adjust_cfa_offset 8 1374.cfi_offset %rbx,-16 1375 pushq %rbp 1376.cfi_adjust_cfa_offset 8 1377.cfi_offset %rbp,-24 1378 pushq %r12 1379.cfi_adjust_cfa_offset 8 1380.cfi_offset %r12,-32 1381 pushq %r13 1382.cfi_adjust_cfa_offset 8 1383.cfi_offset %r13,-40 1384 pushq %r14 1385.cfi_adjust_cfa_offset 8 1386.cfi_offset %r14,-48 1387 pushq %r15 1388.cfi_adjust_cfa_offset 8 1389.cfi_offset %r15,-56 1390 1391 subq $128+24,%rsp 1392.cfi_adjust_cfa_offset 128+24 1393.Lmul_by_one_body: 1394 movl OPENSSL_ia32cap_P+8(%rip),%eax 1395 movq %rdx,%rbp 1396 movq %rcx,128(%rsp) 1397 1398 movq (%rsi),%r8 1399 pxor %xmm0,%xmm0 1400 movq 8(%rsi),%r9 1401 movq 16(%rsi),%r10 1402 movq 24(%rsi),%r11 1403 movq 32(%rsi),%r12 1404 movq 40(%rsi),%r13 1405 movq 48(%rsi),%r14 1406 movq 56(%rsi),%r15 1407 1408 movdqa %xmm0,(%rsp) 1409 movdqa %xmm0,16(%rsp) 1410 movdqa %xmm0,32(%rsp) 1411 movdqa %xmm0,48(%rsp) 1412 movdqa %xmm0,64(%rsp) 1413 movdqa %xmm0,80(%rsp) 1414 movdqa %xmm0,96(%rsp) 1415 andl $0x80100,%eax 1416 cmpl $0x80100,%eax 1417 je .Lby_one_callx 1418 call __rsaz_512_reduce 1419 jmp .Lby_one_tail 1420.align 32 1421.Lby_one_callx: 1422 movq 128(%rsp),%rdx 1423 call __rsaz_512_reducex 1424.Lby_one_tail: 1425 movq %r8,(%rdi) 1426 movq %r9,8(%rdi) 1427 movq %r10,16(%rdi) 1428 movq %r11,24(%rdi) 1429 movq %r12,32(%rdi) 1430 movq %r13,40(%rdi) 1431 movq %r14,48(%rdi) 1432 movq %r15,56(%rdi) 1433 1434 leaq 128+24+48(%rsp),%rax 1435.cfi_def_cfa %rax,8 1436 movq -48(%rax),%r15 1437.cfi_restore %r15 1438 movq -40(%rax),%r14 1439.cfi_restore %r14 1440 movq -32(%rax),%r13 1441.cfi_restore %r13 1442 movq -24(%rax),%r12 1443.cfi_restore %r12 1444 movq -16(%rax),%rbp 1445.cfi_restore %rbp 1446 movq -8(%rax),%rbx 1447.cfi_restore %rbx 1448 leaq (%rax),%rsp 1449.cfi_def_cfa_register %rsp 1450.Lmul_by_one_epilogue: 1451 .byte 0xf3,0xc3 1452.cfi_endproc 1453.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one 1454.type __rsaz_512_reduce,@function 1455.align 32 1456__rsaz_512_reduce: 1457.cfi_startproc 1458 movq %r8,%rbx 1459 imulq 128+8(%rsp),%rbx 1460 movq 0(%rbp),%rax 1461 movl $8,%ecx 1462 jmp .Lreduction_loop 1463 1464.align 32 1465.Lreduction_loop: 1466 mulq %rbx 1467 movq 8(%rbp),%rax 1468 negq %r8 1469 movq %rdx,%r8 1470 adcq $0,%r8 1471 1472 mulq %rbx 1473 addq %rax,%r9 1474 movq 16(%rbp),%rax 1475 adcq $0,%rdx 1476 addq %r9,%r8 1477 movq %rdx,%r9 1478 adcq $0,%r9 1479 1480 mulq %rbx 1481 addq %rax,%r10 1482 movq 24(%rbp),%rax 1483 adcq $0,%rdx 1484 addq %r10,%r9 1485 movq %rdx,%r10 1486 adcq $0,%r10 1487 1488 mulq %rbx 1489 addq %rax,%r11 1490 movq 32(%rbp),%rax 1491 adcq $0,%rdx 1492 addq %r11,%r10 1493 movq 128+8(%rsp),%rsi 1494 1495 1496 adcq $0,%rdx 1497 movq %rdx,%r11 1498 1499 mulq %rbx 1500 addq %rax,%r12 1501 movq 40(%rbp),%rax 1502 adcq $0,%rdx 1503 imulq %r8,%rsi 1504 addq %r12,%r11 1505 movq %rdx,%r12 1506 adcq $0,%r12 1507 1508 mulq %rbx 1509 addq %rax,%r13 1510 movq 48(%rbp),%rax 1511 adcq $0,%rdx 1512 addq %r13,%r12 1513 movq %rdx,%r13 1514 adcq $0,%r13 1515 1516 mulq %rbx 1517 addq %rax,%r14 1518 movq 56(%rbp),%rax 1519 adcq $0,%rdx 1520 addq %r14,%r13 1521 movq %rdx,%r14 1522 adcq $0,%r14 1523 1524 mulq %rbx 1525 movq %rsi,%rbx 1526 addq %rax,%r15 1527 movq 0(%rbp),%rax 1528 adcq $0,%rdx 1529 addq %r15,%r14 1530 movq %rdx,%r15 1531 adcq $0,%r15 1532 1533 decl %ecx 1534 jne .Lreduction_loop 1535 1536 .byte 0xf3,0xc3 1537.cfi_endproc 1538.size __rsaz_512_reduce,.-__rsaz_512_reduce 1539.type __rsaz_512_reducex,@function 1540.align 32 1541__rsaz_512_reducex: 1542.cfi_startproc 1543 1544 imulq %r8,%rdx 1545 xorq %rsi,%rsi 1546 movl $8,%ecx 1547 jmp .Lreduction_loopx 1548 1549.align 32 1550.Lreduction_loopx: 1551 movq %r8,%rbx 1552 mulxq 0(%rbp),%rax,%r8 1553 adcxq %rbx,%rax 1554 adoxq %r9,%r8 1555 1556 mulxq 8(%rbp),%rax,%r9 1557 adcxq %rax,%r8 1558 adoxq %r10,%r9 1559 1560 mulxq 16(%rbp),%rbx,%r10 1561 adcxq %rbx,%r9 1562 adoxq %r11,%r10 1563 1564 mulxq 24(%rbp),%rbx,%r11 1565 adcxq %rbx,%r10 1566 adoxq %r12,%r11 1567 1568.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 1569 movq %rdx,%rax 1570 movq %r8,%rdx 1571 adcxq %rbx,%r11 1572 adoxq %r13,%r12 1573 1574 mulxq 128+8(%rsp),%rbx,%rdx 1575 movq %rax,%rdx 1576 1577 mulxq 40(%rbp),%rax,%r13 1578 adcxq %rax,%r12 1579 adoxq %r14,%r13 1580 1581.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 1582 adcxq %rax,%r13 1583 adoxq %r15,%r14 1584 1585 mulxq 56(%rbp),%rax,%r15 1586 movq %rbx,%rdx 1587 adcxq %rax,%r14 1588 adoxq %rsi,%r15 1589 adcxq %rsi,%r15 1590 1591 decl %ecx 1592 jne .Lreduction_loopx 1593 1594 .byte 0xf3,0xc3 1595.cfi_endproc 1596.size __rsaz_512_reducex,.-__rsaz_512_reducex 1597.type __rsaz_512_subtract,@function 1598.align 32 1599__rsaz_512_subtract: 1600.cfi_startproc 1601 movq %r8,(%rdi) 1602 movq %r9,8(%rdi) 1603 movq %r10,16(%rdi) 1604 movq %r11,24(%rdi) 1605 movq %r12,32(%rdi) 1606 movq %r13,40(%rdi) 1607 movq %r14,48(%rdi) 1608 movq %r15,56(%rdi) 1609 1610 movq 0(%rbp),%r8 1611 movq 8(%rbp),%r9 1612 negq %r8 1613 notq %r9 1614 andq %rcx,%r8 1615 movq 16(%rbp),%r10 1616 andq %rcx,%r9 1617 notq %r10 1618 movq 24(%rbp),%r11 1619 andq %rcx,%r10 1620 notq %r11 1621 movq 32(%rbp),%r12 1622 andq %rcx,%r11 1623 notq %r12 1624 movq 40(%rbp),%r13 1625 andq %rcx,%r12 1626 notq %r13 1627 movq 48(%rbp),%r14 1628 andq %rcx,%r13 1629 notq %r14 1630 movq 56(%rbp),%r15 1631 andq %rcx,%r14 1632 notq %r15 1633 andq %rcx,%r15 1634 1635 addq (%rdi),%r8 1636 adcq 8(%rdi),%r9 1637 adcq 16(%rdi),%r10 1638 adcq 24(%rdi),%r11 1639 adcq 32(%rdi),%r12 1640 adcq 40(%rdi),%r13 1641 adcq 48(%rdi),%r14 1642 adcq 56(%rdi),%r15 1643 1644 movq %r8,(%rdi) 1645 movq %r9,8(%rdi) 1646 movq %r10,16(%rdi) 1647 movq %r11,24(%rdi) 1648 movq %r12,32(%rdi) 1649 movq %r13,40(%rdi) 1650 movq %r14,48(%rdi) 1651 movq %r15,56(%rdi) 1652 1653 .byte 0xf3,0xc3 1654.cfi_endproc 1655.size __rsaz_512_subtract,.-__rsaz_512_subtract 1656.type __rsaz_512_mul,@function 1657.align 32 1658__rsaz_512_mul: 1659.cfi_startproc 1660 leaq 8(%rsp),%rdi 1661 1662 movq (%rsi),%rax 1663 mulq %rbx 1664 movq %rax,(%rdi) 1665 movq 8(%rsi),%rax 1666 movq %rdx,%r8 1667 1668 mulq %rbx 1669 addq %rax,%r8 1670 movq 16(%rsi),%rax 1671 movq %rdx,%r9 1672 adcq $0,%r9 1673 1674 mulq %rbx 1675 addq %rax,%r9 1676 movq 24(%rsi),%rax 1677 movq %rdx,%r10 1678 adcq $0,%r10 1679 1680 mulq %rbx 1681 addq %rax,%r10 1682 movq 32(%rsi),%rax 1683 movq %rdx,%r11 1684 adcq $0,%r11 1685 1686 mulq %rbx 1687 addq %rax,%r11 1688 movq 40(%rsi),%rax 1689 movq %rdx,%r12 1690 adcq $0,%r12 1691 1692 mulq %rbx 1693 addq %rax,%r12 1694 movq 48(%rsi),%rax 1695 movq %rdx,%r13 1696 adcq $0,%r13 1697 1698 mulq %rbx 1699 addq %rax,%r13 1700 movq 56(%rsi),%rax 1701 movq %rdx,%r14 1702 adcq $0,%r14 1703 1704 mulq %rbx 1705 addq %rax,%r14 1706 movq (%rsi),%rax 1707 movq %rdx,%r15 1708 adcq $0,%r15 1709 1710 leaq 8(%rbp),%rbp 1711 leaq 8(%rdi),%rdi 1712 1713 movl $7,%ecx 1714 jmp .Loop_mul 1715 1716.align 32 1717.Loop_mul: 1718 movq (%rbp),%rbx 1719 mulq %rbx 1720 addq %rax,%r8 1721 movq 8(%rsi),%rax 1722 movq %r8,(%rdi) 1723 movq %rdx,%r8 1724 adcq $0,%r8 1725 1726 mulq %rbx 1727 addq %rax,%r9 1728 movq 16(%rsi),%rax 1729 adcq $0,%rdx 1730 addq %r9,%r8 1731 movq %rdx,%r9 1732 adcq $0,%r9 1733 1734 mulq %rbx 1735 addq %rax,%r10 1736 movq 24(%rsi),%rax 1737 adcq $0,%rdx 1738 addq %r10,%r9 1739 movq %rdx,%r10 1740 adcq $0,%r10 1741 1742 mulq %rbx 1743 addq %rax,%r11 1744 movq 32(%rsi),%rax 1745 adcq $0,%rdx 1746 addq %r11,%r10 1747 movq %rdx,%r11 1748 adcq $0,%r11 1749 1750 mulq %rbx 1751 addq %rax,%r12 1752 movq 40(%rsi),%rax 1753 adcq $0,%rdx 1754 addq %r12,%r11 1755 movq %rdx,%r12 1756 adcq $0,%r12 1757 1758 mulq %rbx 1759 addq %rax,%r13 1760 movq 48(%rsi),%rax 1761 adcq $0,%rdx 1762 addq %r13,%r12 1763 movq %rdx,%r13 1764 adcq $0,%r13 1765 1766 mulq %rbx 1767 addq %rax,%r14 1768 movq 56(%rsi),%rax 1769 adcq $0,%rdx 1770 addq %r14,%r13 1771 movq %rdx,%r14 1772 leaq 8(%rbp),%rbp 1773 adcq $0,%r14 1774 1775 mulq %rbx 1776 addq %rax,%r15 1777 movq (%rsi),%rax 1778 adcq $0,%rdx 1779 addq %r15,%r14 1780 movq %rdx,%r15 1781 adcq $0,%r15 1782 1783 leaq 8(%rdi),%rdi 1784 1785 decl %ecx 1786 jnz .Loop_mul 1787 1788 movq %r8,(%rdi) 1789 movq %r9,8(%rdi) 1790 movq %r10,16(%rdi) 1791 movq %r11,24(%rdi) 1792 movq %r12,32(%rdi) 1793 movq %r13,40(%rdi) 1794 movq %r14,48(%rdi) 1795 movq %r15,56(%rdi) 1796 1797 .byte 0xf3,0xc3 1798.cfi_endproc 1799.size __rsaz_512_mul,.-__rsaz_512_mul 1800.type __rsaz_512_mulx,@function 1801.align 32 1802__rsaz_512_mulx: 1803.cfi_startproc 1804 mulxq (%rsi),%rbx,%r8 1805 movq $-6,%rcx 1806 1807 mulxq 8(%rsi),%rax,%r9 1808 movq %rbx,8(%rsp) 1809 1810 mulxq 16(%rsi),%rbx,%r10 1811 adcq %rax,%r8 1812 1813 mulxq 24(%rsi),%rax,%r11 1814 adcq %rbx,%r9 1815 1816 mulxq 32(%rsi),%rbx,%r12 1817 adcq %rax,%r10 1818 1819 mulxq 40(%rsi),%rax,%r13 1820 adcq %rbx,%r11 1821 1822 mulxq 48(%rsi),%rbx,%r14 1823 adcq %rax,%r12 1824 1825 mulxq 56(%rsi),%rax,%r15 1826 movq 8(%rbp),%rdx 1827 adcq %rbx,%r13 1828 adcq %rax,%r14 1829 adcq $0,%r15 1830 1831 xorq %rdi,%rdi 1832 jmp .Loop_mulx 1833 1834.align 32 1835.Loop_mulx: 1836 movq %r8,%rbx 1837 mulxq (%rsi),%rax,%r8 1838 adcxq %rax,%rbx 1839 adoxq %r9,%r8 1840 1841 mulxq 8(%rsi),%rax,%r9 1842 adcxq %rax,%r8 1843 adoxq %r10,%r9 1844 1845 mulxq 16(%rsi),%rax,%r10 1846 adcxq %rax,%r9 1847 adoxq %r11,%r10 1848 1849 mulxq 24(%rsi),%rax,%r11 1850 adcxq %rax,%r10 1851 adoxq %r12,%r11 1852 1853.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 1854 adcxq %rax,%r11 1855 adoxq %r13,%r12 1856 1857 mulxq 40(%rsi),%rax,%r13 1858 adcxq %rax,%r12 1859 adoxq %r14,%r13 1860 1861 mulxq 48(%rsi),%rax,%r14 1862 adcxq %rax,%r13 1863 adoxq %r15,%r14 1864 1865 mulxq 56(%rsi),%rax,%r15 1866 movq 64(%rbp,%rcx,8),%rdx 1867 movq %rbx,8+64-8(%rsp,%rcx,8) 1868 adcxq %rax,%r14 1869 adoxq %rdi,%r15 1870 adcxq %rdi,%r15 1871 1872 incq %rcx 1873 jnz .Loop_mulx 1874 1875 movq %r8,%rbx 1876 mulxq (%rsi),%rax,%r8 1877 adcxq %rax,%rbx 1878 adoxq %r9,%r8 1879 1880.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 1881 adcxq %rax,%r8 1882 adoxq %r10,%r9 1883 1884.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 1885 adcxq %rax,%r9 1886 adoxq %r11,%r10 1887 1888 mulxq 24(%rsi),%rax,%r11 1889 adcxq %rax,%r10 1890 adoxq %r12,%r11 1891 1892 mulxq 32(%rsi),%rax,%r12 1893 adcxq %rax,%r11 1894 adoxq %r13,%r12 1895 1896 mulxq 40(%rsi),%rax,%r13 1897 adcxq %rax,%r12 1898 adoxq %r14,%r13 1899 1900.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1901 adcxq %rax,%r13 1902 adoxq %r15,%r14 1903 1904.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 1905 adcxq %rax,%r14 1906 adoxq %rdi,%r15 1907 adcxq %rdi,%r15 1908 1909 movq %rbx,8+64-8(%rsp) 1910 movq %r8,8+64(%rsp) 1911 movq %r9,8+64+8(%rsp) 1912 movq %r10,8+64+16(%rsp) 1913 movq %r11,8+64+24(%rsp) 1914 movq %r12,8+64+32(%rsp) 1915 movq %r13,8+64+40(%rsp) 1916 movq %r14,8+64+48(%rsp) 1917 movq %r15,8+64+56(%rsp) 1918 1919 .byte 0xf3,0xc3 1920.cfi_endproc 1921.size __rsaz_512_mulx,.-__rsaz_512_mulx 1922.globl rsaz_512_scatter4 1923.type rsaz_512_scatter4,@function 1924.align 16 1925rsaz_512_scatter4: 1926.cfi_startproc 1927 leaq (%rdi,%rdx,8),%rdi 1928 movl $8,%r9d 1929 jmp .Loop_scatter 1930.align 16 1931.Loop_scatter: 1932 movq (%rsi),%rax 1933 leaq 8(%rsi),%rsi 1934 movq %rax,(%rdi) 1935 leaq 128(%rdi),%rdi 1936 decl %r9d 1937 jnz .Loop_scatter 1938 .byte 0xf3,0xc3 1939.cfi_endproc 1940.size rsaz_512_scatter4,.-rsaz_512_scatter4 1941 1942.globl rsaz_512_gather4 1943.type rsaz_512_gather4,@function 1944.align 16 1945rsaz_512_gather4: 1946.cfi_startproc 1947 movd %edx,%xmm8 1948 movdqa .Linc+16(%rip),%xmm1 1949 movdqa .Linc(%rip),%xmm0 1950 1951 pshufd $0,%xmm8,%xmm8 1952 movdqa %xmm1,%xmm7 1953 movdqa %xmm1,%xmm2 1954 paddd %xmm0,%xmm1 1955 pcmpeqd %xmm8,%xmm0 1956 movdqa %xmm7,%xmm3 1957 paddd %xmm1,%xmm2 1958 pcmpeqd %xmm8,%xmm1 1959 movdqa %xmm7,%xmm4 1960 paddd %xmm2,%xmm3 1961 pcmpeqd %xmm8,%xmm2 1962 movdqa %xmm7,%xmm5 1963 paddd %xmm3,%xmm4 1964 pcmpeqd %xmm8,%xmm3 1965 movdqa %xmm7,%xmm6 1966 paddd %xmm4,%xmm5 1967 pcmpeqd %xmm8,%xmm4 1968 paddd %xmm5,%xmm6 1969 pcmpeqd %xmm8,%xmm5 1970 paddd %xmm6,%xmm7 1971 pcmpeqd %xmm8,%xmm6 1972 pcmpeqd %xmm8,%xmm7 1973 movl $8,%r9d 1974 jmp .Loop_gather 1975.align 16 1976.Loop_gather: 1977 movdqa 0(%rsi),%xmm8 1978 movdqa 16(%rsi),%xmm9 1979 movdqa 32(%rsi),%xmm10 1980 movdqa 48(%rsi),%xmm11 1981 pand %xmm0,%xmm8 1982 movdqa 64(%rsi),%xmm12 1983 pand %xmm1,%xmm9 1984 movdqa 80(%rsi),%xmm13 1985 pand %xmm2,%xmm10 1986 movdqa 96(%rsi),%xmm14 1987 pand %xmm3,%xmm11 1988 movdqa 112(%rsi),%xmm15 1989 leaq 128(%rsi),%rsi 1990 pand %xmm4,%xmm12 1991 pand %xmm5,%xmm13 1992 pand %xmm6,%xmm14 1993 pand %xmm7,%xmm15 1994 por %xmm10,%xmm8 1995 por %xmm11,%xmm9 1996 por %xmm12,%xmm8 1997 por %xmm13,%xmm9 1998 por %xmm14,%xmm8 1999 por %xmm15,%xmm9 2000 2001 por %xmm9,%xmm8 2002 pshufd $0x4e,%xmm8,%xmm9 2003 por %xmm9,%xmm8 2004 movq %xmm8,(%rdi) 2005 leaq 8(%rdi),%rdi 2006 decl %r9d 2007 jnz .Loop_gather 2008 .byte 0xf3,0xc3 2009.LSEH_end_rsaz_512_gather4: 2010.cfi_endproc 2011.size rsaz_512_gather4,.-rsaz_512_gather4 2012 2013.align 64 2014.Linc: 2015.long 0,0, 1,1 2016.long 2,2, 2,2 2017