1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */ 3.text 4 5 6 7.globl rsaz_512_sqr 8.type rsaz_512_sqr,@function 9.align 32 10rsaz_512_sqr: 11.cfi_startproc 12 pushq %rbx 13.cfi_adjust_cfa_offset 8 14.cfi_offset %rbx,-16 15 pushq %rbp 16.cfi_adjust_cfa_offset 8 17.cfi_offset %rbp,-24 18 pushq %r12 19.cfi_adjust_cfa_offset 8 20.cfi_offset %r12,-32 21 pushq %r13 22.cfi_adjust_cfa_offset 8 23.cfi_offset %r13,-40 24 pushq %r14 25.cfi_adjust_cfa_offset 8 26.cfi_offset %r14,-48 27 pushq %r15 28.cfi_adjust_cfa_offset 8 29.cfi_offset %r15,-56 30 31 subq $128+24,%rsp 32.cfi_adjust_cfa_offset 128+24 33.Lsqr_body: 34.byte 102,72,15,110,202 35 movq (%rsi),%rdx 36 movq 8(%rsi),%rax 37 movq %rcx,128(%rsp) 38 movl $0x80100,%r11d 39 andl OPENSSL_ia32cap_P+8(%rip),%r11d 40 cmpl $0x80100,%r11d 41 je .Loop_sqrx 42 jmp .Loop_sqr 43 44.align 32 45.Loop_sqr: 46 movl %r8d,128+8(%rsp) 47 48 movq %rdx,%rbx 49 movq %rax,%rbp 50 mulq %rdx 51 movq %rax,%r8 52 movq 16(%rsi),%rax 53 movq %rdx,%r9 54 55 mulq %rbx 56 addq %rax,%r9 57 movq 24(%rsi),%rax 58 movq %rdx,%r10 59 adcq $0,%r10 60 61 mulq %rbx 62 addq %rax,%r10 63 movq 32(%rsi),%rax 64 movq %rdx,%r11 65 adcq $0,%r11 66 67 mulq %rbx 68 addq %rax,%r11 69 movq 40(%rsi),%rax 70 movq %rdx,%r12 71 adcq $0,%r12 72 73 mulq %rbx 74 addq %rax,%r12 75 movq 48(%rsi),%rax 76 movq %rdx,%r13 77 adcq $0,%r13 78 79 mulq %rbx 80 addq %rax,%r13 81 movq 56(%rsi),%rax 82 movq %rdx,%r14 83 adcq $0,%r14 84 85 mulq %rbx 86 addq %rax,%r14 87 movq %rbx,%rax 88 adcq $0,%rdx 89 90 xorq %rcx,%rcx 91 addq %r8,%r8 92 movq %rdx,%r15 93 adcq $0,%rcx 94 95 mulq %rax 96 addq %r8,%rdx 97 adcq $0,%rcx 98 99 movq %rax,(%rsp) 100 movq %rdx,8(%rsp) 101 102 103 movq 16(%rsi),%rax 104 mulq %rbp 105 addq %rax,%r10 106 movq 24(%rsi),%rax 107 movq %rdx,%rbx 108 adcq $0,%rbx 109 110 mulq %rbp 111 addq %rax,%r11 112 movq 32(%rsi),%rax 113 adcq $0,%rdx 114 addq %rbx,%r11 115 movq %rdx,%rbx 116 adcq $0,%rbx 117 118 mulq %rbp 119 addq %rax,%r12 120 movq 40(%rsi),%rax 121 adcq $0,%rdx 122 addq %rbx,%r12 123 movq %rdx,%rbx 124 adcq $0,%rbx 125 126 mulq %rbp 127 addq %rax,%r13 128 movq 48(%rsi),%rax 129 adcq $0,%rdx 130 addq %rbx,%r13 131 movq %rdx,%rbx 132 adcq $0,%rbx 133 134 mulq %rbp 135 addq %rax,%r14 136 movq 56(%rsi),%rax 137 adcq $0,%rdx 138 addq %rbx,%r14 139 movq %rdx,%rbx 140 adcq $0,%rbx 141 142 mulq %rbp 143 addq %rax,%r15 144 movq %rbp,%rax 145 adcq $0,%rdx 146 addq %rbx,%r15 147 adcq $0,%rdx 148 149 xorq %rbx,%rbx 150 addq %r9,%r9 151 movq %rdx,%r8 152 adcq %r10,%r10 153 adcq $0,%rbx 154 155 mulq %rax 156 157 addq %rcx,%rax 158 movq 16(%rsi),%rbp 159 addq %rax,%r9 160 movq 24(%rsi),%rax 161 adcq %rdx,%r10 162 adcq $0,%rbx 163 164 movq %r9,16(%rsp) 165 movq %r10,24(%rsp) 166 167 168 mulq %rbp 169 addq %rax,%r12 170 movq 32(%rsi),%rax 171 movq %rdx,%rcx 172 adcq $0,%rcx 173 174 mulq %rbp 175 addq %rax,%r13 176 movq 40(%rsi),%rax 177 adcq $0,%rdx 178 addq %rcx,%r13 179 movq %rdx,%rcx 180 adcq $0,%rcx 181 182 mulq %rbp 183 addq %rax,%r14 184 movq 48(%rsi),%rax 185 adcq $0,%rdx 186 addq %rcx,%r14 187 movq %rdx,%rcx 188 adcq $0,%rcx 189 190 mulq %rbp 191 addq %rax,%r15 192 movq 56(%rsi),%rax 193 adcq $0,%rdx 194 addq %rcx,%r15 195 movq %rdx,%rcx 196 adcq $0,%rcx 197 198 mulq %rbp 199 addq %rax,%r8 200 movq %rbp,%rax 201 adcq $0,%rdx 202 addq %rcx,%r8 203 adcq $0,%rdx 204 205 xorq %rcx,%rcx 206 addq %r11,%r11 207 movq %rdx,%r9 208 adcq %r12,%r12 209 adcq $0,%rcx 210 211 mulq %rax 212 213 addq %rbx,%rax 214 movq 24(%rsi),%r10 215 addq %rax,%r11 216 movq 32(%rsi),%rax 217 adcq %rdx,%r12 218 adcq $0,%rcx 219 220 movq %r11,32(%rsp) 221 movq %r12,40(%rsp) 222 223 224 movq %rax,%r11 225 mulq %r10 226 addq %rax,%r14 227 movq 40(%rsi),%rax 228 movq %rdx,%rbx 229 adcq $0,%rbx 230 231 movq %rax,%r12 232 mulq %r10 233 addq %rax,%r15 234 movq 48(%rsi),%rax 235 adcq $0,%rdx 236 addq %rbx,%r15 237 movq %rdx,%rbx 238 adcq $0,%rbx 239 240 movq %rax,%rbp 241 mulq %r10 242 addq %rax,%r8 243 movq 56(%rsi),%rax 244 adcq $0,%rdx 245 addq %rbx,%r8 246 movq %rdx,%rbx 247 adcq $0,%rbx 248 249 mulq %r10 250 addq %rax,%r9 251 movq %r10,%rax 252 adcq $0,%rdx 253 addq %rbx,%r9 254 adcq $0,%rdx 255 256 xorq %rbx,%rbx 257 addq %r13,%r13 258 movq %rdx,%r10 259 adcq %r14,%r14 260 adcq $0,%rbx 261 262 mulq %rax 263 264 addq %rcx,%rax 265 addq %rax,%r13 266 movq %r12,%rax 267 adcq %rdx,%r14 268 adcq $0,%rbx 269 270 movq %r13,48(%rsp) 271 movq %r14,56(%rsp) 272 273 274 mulq %r11 275 addq %rax,%r8 276 movq %rbp,%rax 277 movq %rdx,%rcx 278 adcq $0,%rcx 279 280 mulq %r11 281 addq %rax,%r9 282 movq 56(%rsi),%rax 283 adcq $0,%rdx 284 addq %rcx,%r9 285 movq %rdx,%rcx 286 adcq $0,%rcx 287 288 movq %rax,%r14 289 mulq %r11 290 addq %rax,%r10 291 movq %r11,%rax 292 adcq $0,%rdx 293 addq %rcx,%r10 294 adcq $0,%rdx 295 296 xorq %rcx,%rcx 297 addq %r15,%r15 298 movq %rdx,%r11 299 adcq %r8,%r8 300 adcq $0,%rcx 301 302 mulq %rax 303 304 addq %rbx,%rax 305 addq %rax,%r15 306 movq %rbp,%rax 307 adcq %rdx,%r8 308 adcq $0,%rcx 309 310 movq %r15,64(%rsp) 311 movq %r8,72(%rsp) 312 313 314 mulq %r12 315 addq %rax,%r10 316 movq %r14,%rax 317 movq %rdx,%rbx 318 adcq $0,%rbx 319 320 mulq %r12 321 addq %rax,%r11 322 movq %r12,%rax 323 adcq $0,%rdx 324 addq %rbx,%r11 325 adcq $0,%rdx 326 327 xorq %rbx,%rbx 328 addq %r9,%r9 329 movq %rdx,%r12 330 adcq %r10,%r10 331 adcq $0,%rbx 332 333 mulq %rax 334 335 addq %rcx,%rax 336 addq %rax,%r9 337 movq %r14,%rax 338 adcq %rdx,%r10 339 adcq $0,%rbx 340 341 movq %r9,80(%rsp) 342 movq %r10,88(%rsp) 343 344 345 mulq %rbp 346 addq %rax,%r12 347 movq %rbp,%rax 348 adcq $0,%rdx 349 350 xorq %rcx,%rcx 351 addq %r11,%r11 352 movq %rdx,%r13 353 adcq %r12,%r12 354 adcq $0,%rcx 355 356 mulq %rax 357 358 addq %rbx,%rax 359 addq %rax,%r11 360 movq %r14,%rax 361 adcq %rdx,%r12 362 adcq $0,%rcx 363 364 movq %r11,96(%rsp) 365 movq %r12,104(%rsp) 366 367 368 xorq %rbx,%rbx 369 addq %r13,%r13 370 adcq $0,%rbx 371 372 mulq %rax 373 374 addq %rcx,%rax 375 addq %r13,%rax 376 adcq %rbx,%rdx 377 378 movq (%rsp),%r8 379 movq 8(%rsp),%r9 380 movq 16(%rsp),%r10 381 movq 24(%rsp),%r11 382 movq 32(%rsp),%r12 383 movq 40(%rsp),%r13 384 movq 48(%rsp),%r14 385 movq 56(%rsp),%r15 386.byte 102,72,15,126,205 387 388 movq %rax,112(%rsp) 389 movq %rdx,120(%rsp) 390 391 call __rsaz_512_reduce 392 393 addq 64(%rsp),%r8 394 adcq 72(%rsp),%r9 395 adcq 80(%rsp),%r10 396 adcq 88(%rsp),%r11 397 adcq 96(%rsp),%r12 398 adcq 104(%rsp),%r13 399 adcq 112(%rsp),%r14 400 adcq 120(%rsp),%r15 401 sbbq %rcx,%rcx 402 403 call __rsaz_512_subtract 404 405 movq %r8,%rdx 406 movq %r9,%rax 407 movl 128+8(%rsp),%r8d 408 movq %rdi,%rsi 409 410 decl %r8d 411 jnz .Loop_sqr 412 jmp .Lsqr_tail 413 414.align 32 415.Loop_sqrx: 416 movl %r8d,128+8(%rsp) 417.byte 102,72,15,110,199 418 419 mulxq %rax,%r8,%r9 420 movq %rax,%rbx 421 422 mulxq 16(%rsi),%rcx,%r10 423 xorq %rbp,%rbp 424 425 mulxq 24(%rsi),%rax,%r11 426 adcxq %rcx,%r9 427 428.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 429 adcxq %rax,%r10 430 431.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 432 adcxq %rcx,%r11 433 434 mulxq 48(%rsi),%rcx,%r14 435 adcxq %rax,%r12 436 adcxq %rcx,%r13 437 438 mulxq 56(%rsi),%rax,%r15 439 adcxq %rax,%r14 440 adcxq %rbp,%r15 441 442 mulxq %rdx,%rax,%rdi 443 movq %rbx,%rdx 444 xorq %rcx,%rcx 445 adoxq %r8,%r8 446 adcxq %rdi,%r8 447 adoxq %rbp,%rcx 448 adcxq %rbp,%rcx 449 450 movq %rax,(%rsp) 451 movq %r8,8(%rsp) 452 453 454.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 455 adoxq %rax,%r10 456 adcxq %rbx,%r11 457 458 mulxq 24(%rsi),%rdi,%r8 459 adoxq %rdi,%r11 460.byte 0x66 461 adcxq %r8,%r12 462 463 mulxq 32(%rsi),%rax,%rbx 464 adoxq %rax,%r12 465 adcxq %rbx,%r13 466 467 mulxq 40(%rsi),%rdi,%r8 468 adoxq %rdi,%r13 469 adcxq %r8,%r14 470 471.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 472 adoxq %rax,%r14 473 adcxq %rbx,%r15 474 475.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 476 adoxq %rdi,%r15 477 adcxq %rbp,%r8 478 mulxq %rdx,%rax,%rdi 479 adoxq %rbp,%r8 480.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 481 482 xorq %rbx,%rbx 483 adoxq %r9,%r9 484 485 adcxq %rcx,%rax 486 adoxq %r10,%r10 487 adcxq %rax,%r9 488 adoxq %rbp,%rbx 489 adcxq %rdi,%r10 490 adcxq %rbp,%rbx 491 492 movq %r9,16(%rsp) 493.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 494 495 496 mulxq 24(%rsi),%rdi,%r9 497 adoxq %rdi,%r12 498 adcxq %r9,%r13 499 500 mulxq 32(%rsi),%rax,%rcx 501 adoxq %rax,%r13 502 adcxq %rcx,%r14 503 504.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 505 adoxq %rdi,%r14 506 adcxq %r9,%r15 507 508.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 509 adoxq %rax,%r15 510 adcxq %rcx,%r8 511 512 mulxq 56(%rsi),%rdi,%r9 513 adoxq %rdi,%r8 514 adcxq %rbp,%r9 515 mulxq %rdx,%rax,%rdi 516 adoxq %rbp,%r9 517 movq 24(%rsi),%rdx 518 519 xorq %rcx,%rcx 520 adoxq %r11,%r11 521 522 adcxq %rbx,%rax 523 adoxq %r12,%r12 524 adcxq %rax,%r11 525 adoxq %rbp,%rcx 526 adcxq %rdi,%r12 527 adcxq %rbp,%rcx 528 529 movq %r11,32(%rsp) 530 movq %r12,40(%rsp) 531 532 533 mulxq 32(%rsi),%rax,%rbx 534 adoxq %rax,%r14 535 adcxq %rbx,%r15 536 537 mulxq 40(%rsi),%rdi,%r10 538 adoxq %rdi,%r15 539 adcxq %r10,%r8 540 541 mulxq 48(%rsi),%rax,%rbx 542 adoxq %rax,%r8 543 adcxq %rbx,%r9 544 545 mulxq 56(%rsi),%rdi,%r10 546 adoxq %rdi,%r9 547 adcxq %rbp,%r10 548 mulxq %rdx,%rax,%rdi 549 adoxq %rbp,%r10 550 movq 32(%rsi),%rdx 551 552 xorq %rbx,%rbx 553 adoxq %r13,%r13 554 555 adcxq %rcx,%rax 556 adoxq %r14,%r14 557 adcxq %rax,%r13 558 adoxq %rbp,%rbx 559 adcxq %rdi,%r14 560 adcxq %rbp,%rbx 561 562 movq %r13,48(%rsp) 563 movq %r14,56(%rsp) 564 565 566 mulxq 40(%rsi),%rdi,%r11 567 adoxq %rdi,%r8 568 adcxq %r11,%r9 569 570 mulxq 48(%rsi),%rax,%rcx 571 adoxq %rax,%r9 572 adcxq %rcx,%r10 573 574 mulxq 56(%rsi),%rdi,%r11 575 adoxq %rdi,%r10 576 adcxq %rbp,%r11 577 mulxq %rdx,%rax,%rdi 578 movq 40(%rsi),%rdx 579 adoxq %rbp,%r11 580 581 xorq %rcx,%rcx 582 adoxq %r15,%r15 583 584 adcxq %rbx,%rax 585 adoxq %r8,%r8 586 adcxq %rax,%r15 587 adoxq %rbp,%rcx 588 adcxq %rdi,%r8 589 adcxq %rbp,%rcx 590 591 movq %r15,64(%rsp) 592 movq %r8,72(%rsp) 593 594 595.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 596 adoxq %rax,%r10 597 adcxq %rbx,%r11 598 599.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 600 adoxq %rdi,%r11 601 adcxq %rbp,%r12 602 mulxq %rdx,%rax,%rdi 603 adoxq %rbp,%r12 604 movq 48(%rsi),%rdx 605 606 xorq %rbx,%rbx 607 adoxq %r9,%r9 608 609 adcxq %rcx,%rax 610 adoxq %r10,%r10 611 adcxq %rax,%r9 612 adcxq %rdi,%r10 613 adoxq %rbp,%rbx 614 adcxq %rbp,%rbx 615 616 movq %r9,80(%rsp) 617 movq %r10,88(%rsp) 618 619 620.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 621 adoxq %rax,%r12 622 adoxq %rbp,%r13 623 624 mulxq %rdx,%rax,%rdi 625 xorq %rcx,%rcx 626 movq 56(%rsi),%rdx 627 adoxq %r11,%r11 628 629 adcxq %rbx,%rax 630 adoxq %r12,%r12 631 adcxq %rax,%r11 632 adoxq %rbp,%rcx 633 adcxq %rdi,%r12 634 adcxq %rbp,%rcx 635 636.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 637.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 638 639 640 mulxq %rdx,%rax,%rdx 641 xorq %rbx,%rbx 642 adoxq %r13,%r13 643 644 adcxq %rcx,%rax 645 adoxq %rbp,%rbx 646 adcxq %r13,%rax 647 adcxq %rdx,%rbx 648 649.byte 102,72,15,126,199 650.byte 102,72,15,126,205 651 652 movq 128(%rsp),%rdx 653 movq (%rsp),%r8 654 movq 8(%rsp),%r9 655 movq 16(%rsp),%r10 656 movq 24(%rsp),%r11 657 movq 32(%rsp),%r12 658 movq 40(%rsp),%r13 659 movq 48(%rsp),%r14 660 movq 56(%rsp),%r15 661 662 movq %rax,112(%rsp) 663 movq %rbx,120(%rsp) 664 665 call __rsaz_512_reducex 666 667 addq 64(%rsp),%r8 668 adcq 72(%rsp),%r9 669 adcq 80(%rsp),%r10 670 adcq 88(%rsp),%r11 671 adcq 96(%rsp),%r12 672 adcq 104(%rsp),%r13 673 adcq 112(%rsp),%r14 674 adcq 120(%rsp),%r15 675 sbbq %rcx,%rcx 676 677 call __rsaz_512_subtract 678 679 movq %r8,%rdx 680 movq %r9,%rax 681 movl 128+8(%rsp),%r8d 682 movq %rdi,%rsi 683 684 decl %r8d 685 jnz .Loop_sqrx 686 687.Lsqr_tail: 688 689 leaq 128+24+48(%rsp),%rax 690.cfi_def_cfa %rax,8 691 movq -48(%rax),%r15 692.cfi_restore %r15 693 movq -40(%rax),%r14 694.cfi_restore %r14 695 movq -32(%rax),%r13 696.cfi_restore %r13 697 movq -24(%rax),%r12 698.cfi_restore %r12 699 movq -16(%rax),%rbp 700.cfi_restore %rbp 701 movq -8(%rax),%rbx 702.cfi_restore %rbx 703 leaq (%rax),%rsp 704.cfi_def_cfa_register %rsp 705.Lsqr_epilogue: 706 .byte 0xf3,0xc3 707.cfi_endproc 708.size rsaz_512_sqr,.-rsaz_512_sqr 709.globl rsaz_512_mul 710.type rsaz_512_mul,@function 711.align 32 712rsaz_512_mul: 713.cfi_startproc 714 pushq %rbx 715.cfi_adjust_cfa_offset 8 716.cfi_offset %rbx,-16 717 pushq %rbp 718.cfi_adjust_cfa_offset 8 719.cfi_offset %rbp,-24 720 pushq %r12 721.cfi_adjust_cfa_offset 8 722.cfi_offset %r12,-32 723 pushq %r13 724.cfi_adjust_cfa_offset 8 725.cfi_offset %r13,-40 726 pushq %r14 727.cfi_adjust_cfa_offset 8 728.cfi_offset %r14,-48 729 pushq %r15 730.cfi_adjust_cfa_offset 8 731.cfi_offset %r15,-56 732 733 subq $128+24,%rsp 734.cfi_adjust_cfa_offset 128+24 735.Lmul_body: 736.byte 102,72,15,110,199 737.byte 102,72,15,110,201 738 movq %r8,128(%rsp) 739 movl $0x80100,%r11d 740 andl OPENSSL_ia32cap_P+8(%rip),%r11d 741 cmpl $0x80100,%r11d 742 je .Lmulx 743 movq (%rdx),%rbx 744 movq %rdx,%rbp 745 call __rsaz_512_mul 746 747.byte 102,72,15,126,199 748.byte 102,72,15,126,205 749 750 movq (%rsp),%r8 751 movq 8(%rsp),%r9 752 movq 16(%rsp),%r10 753 movq 24(%rsp),%r11 754 movq 32(%rsp),%r12 755 movq 40(%rsp),%r13 756 movq 48(%rsp),%r14 757 movq 56(%rsp),%r15 758 759 call __rsaz_512_reduce 760 jmp .Lmul_tail 761 762.align 32 763.Lmulx: 764 movq %rdx,%rbp 765 movq (%rdx),%rdx 766 call __rsaz_512_mulx 767 768.byte 102,72,15,126,199 769.byte 102,72,15,126,205 770 771 movq 128(%rsp),%rdx 772 movq (%rsp),%r8 773 movq 8(%rsp),%r9 774 movq 16(%rsp),%r10 775 movq 24(%rsp),%r11 776 movq 32(%rsp),%r12 777 movq 40(%rsp),%r13 778 movq 48(%rsp),%r14 779 movq 56(%rsp),%r15 780 781 call __rsaz_512_reducex 782.Lmul_tail: 783 addq 64(%rsp),%r8 784 adcq 72(%rsp),%r9 785 adcq 80(%rsp),%r10 786 adcq 88(%rsp),%r11 787 adcq 96(%rsp),%r12 788 adcq 104(%rsp),%r13 789 adcq 112(%rsp),%r14 790 adcq 120(%rsp),%r15 791 sbbq %rcx,%rcx 792 793 call __rsaz_512_subtract 794 795 leaq 128+24+48(%rsp),%rax 796.cfi_def_cfa %rax,8 797 movq -48(%rax),%r15 798.cfi_restore %r15 799 movq -40(%rax),%r14 800.cfi_restore %r14 801 movq -32(%rax),%r13 802.cfi_restore %r13 803 movq -24(%rax),%r12 804.cfi_restore %r12 805 movq -16(%rax),%rbp 806.cfi_restore %rbp 807 movq -8(%rax),%rbx 808.cfi_restore %rbx 809 leaq (%rax),%rsp 810.cfi_def_cfa_register %rsp 811.Lmul_epilogue: 812 .byte 0xf3,0xc3 813.cfi_endproc 814.size rsaz_512_mul,.-rsaz_512_mul 815.globl rsaz_512_mul_gather4 816.type rsaz_512_mul_gather4,@function 817.align 32 818rsaz_512_mul_gather4: 819.cfi_startproc 820 pushq %rbx 821.cfi_adjust_cfa_offset 8 822.cfi_offset %rbx,-16 823 pushq %rbp 824.cfi_adjust_cfa_offset 8 825.cfi_offset %rbp,-24 826 pushq %r12 827.cfi_adjust_cfa_offset 8 828.cfi_offset %r12,-32 829 pushq %r13 830.cfi_adjust_cfa_offset 8 831.cfi_offset %r13,-40 832 pushq %r14 833.cfi_adjust_cfa_offset 8 834.cfi_offset %r14,-48 835 pushq %r15 836.cfi_adjust_cfa_offset 8 837.cfi_offset %r15,-56 838 839 subq $152,%rsp 840.cfi_adjust_cfa_offset 152 841.Lmul_gather4_body: 842 movd %r9d,%xmm8 843 movdqa .Linc+16(%rip),%xmm1 844 movdqa .Linc(%rip),%xmm0 845 846 pshufd $0,%xmm8,%xmm8 847 movdqa %xmm1,%xmm7 848 movdqa %xmm1,%xmm2 849 paddd %xmm0,%xmm1 850 pcmpeqd %xmm8,%xmm0 851 movdqa %xmm7,%xmm3 852 paddd %xmm1,%xmm2 853 pcmpeqd %xmm8,%xmm1 854 movdqa %xmm7,%xmm4 855 paddd %xmm2,%xmm3 856 pcmpeqd %xmm8,%xmm2 857 movdqa %xmm7,%xmm5 858 paddd %xmm3,%xmm4 859 pcmpeqd %xmm8,%xmm3 860 movdqa %xmm7,%xmm6 861 paddd %xmm4,%xmm5 862 pcmpeqd %xmm8,%xmm4 863 paddd %xmm5,%xmm6 864 pcmpeqd %xmm8,%xmm5 865 paddd %xmm6,%xmm7 866 pcmpeqd %xmm8,%xmm6 867 pcmpeqd %xmm8,%xmm7 868 869 movdqa 0(%rdx),%xmm8 870 movdqa 16(%rdx),%xmm9 871 movdqa 32(%rdx),%xmm10 872 movdqa 48(%rdx),%xmm11 873 pand %xmm0,%xmm8 874 movdqa 64(%rdx),%xmm12 875 pand %xmm1,%xmm9 876 movdqa 80(%rdx),%xmm13 877 pand %xmm2,%xmm10 878 movdqa 96(%rdx),%xmm14 879 pand %xmm3,%xmm11 880 movdqa 112(%rdx),%xmm15 881 leaq 128(%rdx),%rbp 882 pand %xmm4,%xmm12 883 pand %xmm5,%xmm13 884 pand %xmm6,%xmm14 885 pand %xmm7,%xmm15 886 por %xmm10,%xmm8 887 por %xmm11,%xmm9 888 por %xmm12,%xmm8 889 por %xmm13,%xmm9 890 por %xmm14,%xmm8 891 por %xmm15,%xmm9 892 893 por %xmm9,%xmm8 894 pshufd $0x4e,%xmm8,%xmm9 895 por %xmm9,%xmm8 896 movl $0x80100,%r11d 897 andl OPENSSL_ia32cap_P+8(%rip),%r11d 898 cmpl $0x80100,%r11d 899 je .Lmulx_gather 900.byte 102,76,15,126,195 901 902 movq %r8,128(%rsp) 903 movq %rdi,128+8(%rsp) 904 movq %rcx,128+16(%rsp) 905 906 movq (%rsi),%rax 907 movq 8(%rsi),%rcx 908 mulq %rbx 909 movq %rax,(%rsp) 910 movq %rcx,%rax 911 movq %rdx,%r8 912 913 mulq %rbx 914 addq %rax,%r8 915 movq 16(%rsi),%rax 916 movq %rdx,%r9 917 adcq $0,%r9 918 919 mulq %rbx 920 addq %rax,%r9 921 movq 24(%rsi),%rax 922 movq %rdx,%r10 923 adcq $0,%r10 924 925 mulq %rbx 926 addq %rax,%r10 927 movq 32(%rsi),%rax 928 movq %rdx,%r11 929 adcq $0,%r11 930 931 mulq %rbx 932 addq %rax,%r11 933 movq 40(%rsi),%rax 934 movq %rdx,%r12 935 adcq $0,%r12 936 937 mulq %rbx 938 addq %rax,%r12 939 movq 48(%rsi),%rax 940 movq %rdx,%r13 941 adcq $0,%r13 942 943 mulq %rbx 944 addq %rax,%r13 945 movq 56(%rsi),%rax 946 movq %rdx,%r14 947 adcq $0,%r14 948 949 mulq %rbx 950 addq %rax,%r14 951 movq (%rsi),%rax 952 movq %rdx,%r15 953 adcq $0,%r15 954 955 leaq 8(%rsp),%rdi 956 movl $7,%ecx 957 jmp .Loop_mul_gather 958 959.align 32 960.Loop_mul_gather: 961 movdqa 0(%rbp),%xmm8 962 movdqa 16(%rbp),%xmm9 963 movdqa 32(%rbp),%xmm10 964 movdqa 48(%rbp),%xmm11 965 pand %xmm0,%xmm8 966 movdqa 64(%rbp),%xmm12 967 pand %xmm1,%xmm9 968 movdqa 80(%rbp),%xmm13 969 pand %xmm2,%xmm10 970 movdqa 96(%rbp),%xmm14 971 pand %xmm3,%xmm11 972 movdqa 112(%rbp),%xmm15 973 leaq 128(%rbp),%rbp 974 pand %xmm4,%xmm12 975 pand %xmm5,%xmm13 976 pand %xmm6,%xmm14 977 pand %xmm7,%xmm15 978 por %xmm10,%xmm8 979 por %xmm11,%xmm9 980 por %xmm12,%xmm8 981 por %xmm13,%xmm9 982 por %xmm14,%xmm8 983 por %xmm15,%xmm9 984 985 por %xmm9,%xmm8 986 pshufd $0x4e,%xmm8,%xmm9 987 por %xmm9,%xmm8 988.byte 102,76,15,126,195 989 990 mulq %rbx 991 addq %rax,%r8 992 movq 8(%rsi),%rax 993 movq %r8,(%rdi) 994 movq %rdx,%r8 995 adcq $0,%r8 996 997 mulq %rbx 998 addq %rax,%r9 999 movq 16(%rsi),%rax 1000 adcq $0,%rdx 1001 addq %r9,%r8 1002 movq %rdx,%r9 1003 adcq $0,%r9 1004 1005 mulq %rbx 1006 addq %rax,%r10 1007 movq 24(%rsi),%rax 1008 adcq $0,%rdx 1009 addq %r10,%r9 1010 movq %rdx,%r10 1011 adcq $0,%r10 1012 1013 mulq %rbx 1014 addq %rax,%r11 1015 movq 32(%rsi),%rax 1016 adcq $0,%rdx 1017 addq %r11,%r10 1018 movq %rdx,%r11 1019 adcq $0,%r11 1020 1021 mulq %rbx 1022 addq %rax,%r12 1023 movq 40(%rsi),%rax 1024 adcq $0,%rdx 1025 addq %r12,%r11 1026 movq %rdx,%r12 1027 adcq $0,%r12 1028 1029 mulq %rbx 1030 addq %rax,%r13 1031 movq 48(%rsi),%rax 1032 adcq $0,%rdx 1033 addq %r13,%r12 1034 movq %rdx,%r13 1035 adcq $0,%r13 1036 1037 mulq %rbx 1038 addq %rax,%r14 1039 movq 56(%rsi),%rax 1040 adcq $0,%rdx 1041 addq %r14,%r13 1042 movq %rdx,%r14 1043 adcq $0,%r14 1044 1045 mulq %rbx 1046 addq %rax,%r15 1047 movq (%rsi),%rax 1048 adcq $0,%rdx 1049 addq %r15,%r14 1050 movq %rdx,%r15 1051 adcq $0,%r15 1052 1053 leaq 8(%rdi),%rdi 1054 1055 decl %ecx 1056 jnz .Loop_mul_gather 1057 1058 movq %r8,(%rdi) 1059 movq %r9,8(%rdi) 1060 movq %r10,16(%rdi) 1061 movq %r11,24(%rdi) 1062 movq %r12,32(%rdi) 1063 movq %r13,40(%rdi) 1064 movq %r14,48(%rdi) 1065 movq %r15,56(%rdi) 1066 1067 movq 128+8(%rsp),%rdi 1068 movq 128+16(%rsp),%rbp 1069 1070 movq (%rsp),%r8 1071 movq 8(%rsp),%r9 1072 movq 16(%rsp),%r10 1073 movq 24(%rsp),%r11 1074 movq 32(%rsp),%r12 1075 movq 40(%rsp),%r13 1076 movq 48(%rsp),%r14 1077 movq 56(%rsp),%r15 1078 1079 call __rsaz_512_reduce 1080 jmp .Lmul_gather_tail 1081 1082.align 32 1083.Lmulx_gather: 1084.byte 102,76,15,126,194 1085 1086 movq %r8,128(%rsp) 1087 movq %rdi,128+8(%rsp) 1088 movq %rcx,128+16(%rsp) 1089 1090 mulxq (%rsi),%rbx,%r8 1091 movq %rbx,(%rsp) 1092 xorl %edi,%edi 1093 1094 mulxq 8(%rsi),%rax,%r9 1095 1096 mulxq 16(%rsi),%rbx,%r10 1097 adcxq %rax,%r8 1098 1099 mulxq 24(%rsi),%rax,%r11 1100 adcxq %rbx,%r9 1101 1102 mulxq 32(%rsi),%rbx,%r12 1103 adcxq %rax,%r10 1104 1105 mulxq 40(%rsi),%rax,%r13 1106 adcxq %rbx,%r11 1107 1108 mulxq 48(%rsi),%rbx,%r14 1109 adcxq %rax,%r12 1110 1111 mulxq 56(%rsi),%rax,%r15 1112 adcxq %rbx,%r13 1113 adcxq %rax,%r14 1114.byte 0x67 1115 movq %r8,%rbx 1116 adcxq %rdi,%r15 1117 1118 movq $-7,%rcx 1119 jmp .Loop_mulx_gather 1120 1121.align 32 1122.Loop_mulx_gather: 1123 movdqa 0(%rbp),%xmm8 1124 movdqa 16(%rbp),%xmm9 1125 movdqa 32(%rbp),%xmm10 1126 movdqa 48(%rbp),%xmm11 1127 pand %xmm0,%xmm8 1128 movdqa 64(%rbp),%xmm12 1129 pand %xmm1,%xmm9 1130 movdqa 80(%rbp),%xmm13 1131 pand %xmm2,%xmm10 1132 movdqa 96(%rbp),%xmm14 1133 pand %xmm3,%xmm11 1134 movdqa 112(%rbp),%xmm15 1135 leaq 128(%rbp),%rbp 1136 pand %xmm4,%xmm12 1137 pand %xmm5,%xmm13 1138 pand %xmm6,%xmm14 1139 pand %xmm7,%xmm15 1140 por %xmm10,%xmm8 1141 por %xmm11,%xmm9 1142 por %xmm12,%xmm8 1143 por %xmm13,%xmm9 1144 por %xmm14,%xmm8 1145 por %xmm15,%xmm9 1146 1147 por %xmm9,%xmm8 1148 pshufd $0x4e,%xmm8,%xmm9 1149 por %xmm9,%xmm8 1150.byte 102,76,15,126,194 1151 1152.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 1153 adcxq %rax,%rbx 1154 adoxq %r9,%r8 1155 1156 mulxq 8(%rsi),%rax,%r9 1157 adcxq %rax,%r8 1158 adoxq %r10,%r9 1159 1160 mulxq 16(%rsi),%rax,%r10 1161 adcxq %rax,%r9 1162 adoxq %r11,%r10 1163 1164.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 1165 adcxq %rax,%r10 1166 adoxq %r12,%r11 1167 1168 mulxq 32(%rsi),%rax,%r12 1169 adcxq %rax,%r11 1170 adoxq %r13,%r12 1171 1172 mulxq 40(%rsi),%rax,%r13 1173 adcxq %rax,%r12 1174 adoxq %r14,%r13 1175 1176.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1177 adcxq %rax,%r13 1178.byte 0x67 1179 adoxq %r15,%r14 1180 1181 mulxq 56(%rsi),%rax,%r15 1182 movq %rbx,64(%rsp,%rcx,8) 1183 adcxq %rax,%r14 1184 adoxq %rdi,%r15 1185 movq %r8,%rbx 1186 adcxq %rdi,%r15 1187 1188 incq %rcx 1189 jnz .Loop_mulx_gather 1190 1191 movq %r8,64(%rsp) 1192 movq %r9,64+8(%rsp) 1193 movq %r10,64+16(%rsp) 1194 movq %r11,64+24(%rsp) 1195 movq %r12,64+32(%rsp) 1196 movq %r13,64+40(%rsp) 1197 movq %r14,64+48(%rsp) 1198 movq %r15,64+56(%rsp) 1199 1200 movq 128(%rsp),%rdx 1201 movq 128+8(%rsp),%rdi 1202 movq 128+16(%rsp),%rbp 1203 1204 movq (%rsp),%r8 1205 movq 8(%rsp),%r9 1206 movq 16(%rsp),%r10 1207 movq 24(%rsp),%r11 1208 movq 32(%rsp),%r12 1209 movq 40(%rsp),%r13 1210 movq 48(%rsp),%r14 1211 movq 56(%rsp),%r15 1212 1213 call __rsaz_512_reducex 1214 1215.Lmul_gather_tail: 1216 addq 64(%rsp),%r8 1217 adcq 72(%rsp),%r9 1218 adcq 80(%rsp),%r10 1219 adcq 88(%rsp),%r11 1220 adcq 96(%rsp),%r12 1221 adcq 104(%rsp),%r13 1222 adcq 112(%rsp),%r14 1223 adcq 120(%rsp),%r15 1224 sbbq %rcx,%rcx 1225 1226 call __rsaz_512_subtract 1227 1228 leaq 128+24+48(%rsp),%rax 1229.cfi_def_cfa %rax,8 1230 movq -48(%rax),%r15 1231.cfi_restore %r15 1232 movq -40(%rax),%r14 1233.cfi_restore %r14 1234 movq -32(%rax),%r13 1235.cfi_restore %r13 1236 movq -24(%rax),%r12 1237.cfi_restore %r12 1238 movq -16(%rax),%rbp 1239.cfi_restore %rbp 1240 movq -8(%rax),%rbx 1241.cfi_restore %rbx 1242 leaq (%rax),%rsp 1243.cfi_def_cfa_register %rsp 1244.Lmul_gather4_epilogue: 1245 .byte 0xf3,0xc3 1246.cfi_endproc 1247.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 1248.globl rsaz_512_mul_scatter4 1249.type rsaz_512_mul_scatter4,@function 1250.align 32 1251rsaz_512_mul_scatter4: 1252.cfi_startproc 1253 pushq %rbx 1254.cfi_adjust_cfa_offset 8 1255.cfi_offset %rbx,-16 1256 pushq %rbp 1257.cfi_adjust_cfa_offset 8 1258.cfi_offset %rbp,-24 1259 pushq %r12 1260.cfi_adjust_cfa_offset 8 1261.cfi_offset %r12,-32 1262 pushq %r13 1263.cfi_adjust_cfa_offset 8 1264.cfi_offset %r13,-40 1265 pushq %r14 1266.cfi_adjust_cfa_offset 8 1267.cfi_offset %r14,-48 1268 pushq %r15 1269.cfi_adjust_cfa_offset 8 1270.cfi_offset %r15,-56 1271 1272 movl %r9d,%r9d 1273 subq $128+24,%rsp 1274.cfi_adjust_cfa_offset 128+24 1275.Lmul_scatter4_body: 1276 leaq (%r8,%r9,8),%r8 1277.byte 102,72,15,110,199 1278.byte 102,72,15,110,202 1279.byte 102,73,15,110,208 1280 movq %rcx,128(%rsp) 1281 1282 movq %rdi,%rbp 1283 movl $0x80100,%r11d 1284 andl OPENSSL_ia32cap_P+8(%rip),%r11d 1285 cmpl $0x80100,%r11d 1286 je .Lmulx_scatter 1287 movq (%rdi),%rbx 1288 call __rsaz_512_mul 1289 1290.byte 102,72,15,126,199 1291.byte 102,72,15,126,205 1292 1293 movq (%rsp),%r8 1294 movq 8(%rsp),%r9 1295 movq 16(%rsp),%r10 1296 movq 24(%rsp),%r11 1297 movq 32(%rsp),%r12 1298 movq 40(%rsp),%r13 1299 movq 48(%rsp),%r14 1300 movq 56(%rsp),%r15 1301 1302 call __rsaz_512_reduce 1303 jmp .Lmul_scatter_tail 1304 1305.align 32 1306.Lmulx_scatter: 1307 movq (%rdi),%rdx 1308 call __rsaz_512_mulx 1309 1310.byte 102,72,15,126,199 1311.byte 102,72,15,126,205 1312 1313 movq 128(%rsp),%rdx 1314 movq (%rsp),%r8 1315 movq 8(%rsp),%r9 1316 movq 16(%rsp),%r10 1317 movq 24(%rsp),%r11 1318 movq 32(%rsp),%r12 1319 movq 40(%rsp),%r13 1320 movq 48(%rsp),%r14 1321 movq 56(%rsp),%r15 1322 1323 call __rsaz_512_reducex 1324 1325.Lmul_scatter_tail: 1326 addq 64(%rsp),%r8 1327 adcq 72(%rsp),%r9 1328 adcq 80(%rsp),%r10 1329 adcq 88(%rsp),%r11 1330 adcq 96(%rsp),%r12 1331 adcq 104(%rsp),%r13 1332 adcq 112(%rsp),%r14 1333 adcq 120(%rsp),%r15 1334.byte 102,72,15,126,214 1335 sbbq %rcx,%rcx 1336 1337 call __rsaz_512_subtract 1338 1339 movq %r8,0(%rsi) 1340 movq %r9,128(%rsi) 1341 movq %r10,256(%rsi) 1342 movq %r11,384(%rsi) 1343 movq %r12,512(%rsi) 1344 movq %r13,640(%rsi) 1345 movq %r14,768(%rsi) 1346 movq %r15,896(%rsi) 1347 1348 leaq 128+24+48(%rsp),%rax 1349.cfi_def_cfa %rax,8 1350 movq -48(%rax),%r15 1351.cfi_restore %r15 1352 movq -40(%rax),%r14 1353.cfi_restore %r14 1354 movq -32(%rax),%r13 1355.cfi_restore %r13 1356 movq -24(%rax),%r12 1357.cfi_restore %r12 1358 movq -16(%rax),%rbp 1359.cfi_restore %rbp 1360 movq -8(%rax),%rbx 1361.cfi_restore %rbx 1362 leaq (%rax),%rsp 1363.cfi_def_cfa_register %rsp 1364.Lmul_scatter4_epilogue: 1365 .byte 0xf3,0xc3 1366.cfi_endproc 1367.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 1368.globl rsaz_512_mul_by_one 1369.type rsaz_512_mul_by_one,@function 1370.align 32 1371rsaz_512_mul_by_one: 1372.cfi_startproc 1373 pushq %rbx 1374.cfi_adjust_cfa_offset 8 1375.cfi_offset %rbx,-16 1376 pushq %rbp 1377.cfi_adjust_cfa_offset 8 1378.cfi_offset %rbp,-24 1379 pushq %r12 1380.cfi_adjust_cfa_offset 8 1381.cfi_offset %r12,-32 1382 pushq %r13 1383.cfi_adjust_cfa_offset 8 1384.cfi_offset %r13,-40 1385 pushq %r14 1386.cfi_adjust_cfa_offset 8 1387.cfi_offset %r14,-48 1388 pushq %r15 1389.cfi_adjust_cfa_offset 8 1390.cfi_offset %r15,-56 1391 1392 subq $128+24,%rsp 1393.cfi_adjust_cfa_offset 128+24 1394.Lmul_by_one_body: 1395 movl OPENSSL_ia32cap_P+8(%rip),%eax 1396 movq %rdx,%rbp 1397 movq %rcx,128(%rsp) 1398 1399 movq (%rsi),%r8 1400 pxor %xmm0,%xmm0 1401 movq 8(%rsi),%r9 1402 movq 16(%rsi),%r10 1403 movq 24(%rsi),%r11 1404 movq 32(%rsi),%r12 1405 movq 40(%rsi),%r13 1406 movq 48(%rsi),%r14 1407 movq 56(%rsi),%r15 1408 1409 movdqa %xmm0,(%rsp) 1410 movdqa %xmm0,16(%rsp) 1411 movdqa %xmm0,32(%rsp) 1412 movdqa %xmm0,48(%rsp) 1413 movdqa %xmm0,64(%rsp) 1414 movdqa %xmm0,80(%rsp) 1415 movdqa %xmm0,96(%rsp) 1416 andl $0x80100,%eax 1417 cmpl $0x80100,%eax 1418 je .Lby_one_callx 1419 call __rsaz_512_reduce 1420 jmp .Lby_one_tail 1421.align 32 1422.Lby_one_callx: 1423 movq 128(%rsp),%rdx 1424 call __rsaz_512_reducex 1425.Lby_one_tail: 1426 movq %r8,(%rdi) 1427 movq %r9,8(%rdi) 1428 movq %r10,16(%rdi) 1429 movq %r11,24(%rdi) 1430 movq %r12,32(%rdi) 1431 movq %r13,40(%rdi) 1432 movq %r14,48(%rdi) 1433 movq %r15,56(%rdi) 1434 1435 leaq 128+24+48(%rsp),%rax 1436.cfi_def_cfa %rax,8 1437 movq -48(%rax),%r15 1438.cfi_restore %r15 1439 movq -40(%rax),%r14 1440.cfi_restore %r14 1441 movq -32(%rax),%r13 1442.cfi_restore %r13 1443 movq -24(%rax),%r12 1444.cfi_restore %r12 1445 movq -16(%rax),%rbp 1446.cfi_restore %rbp 1447 movq -8(%rax),%rbx 1448.cfi_restore %rbx 1449 leaq (%rax),%rsp 1450.cfi_def_cfa_register %rsp 1451.Lmul_by_one_epilogue: 1452 .byte 0xf3,0xc3 1453.cfi_endproc 1454.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one 1455.type __rsaz_512_reduce,@function 1456.align 32 1457__rsaz_512_reduce: 1458.cfi_startproc 1459 movq %r8,%rbx 1460 imulq 128+8(%rsp),%rbx 1461 movq 0(%rbp),%rax 1462 movl $8,%ecx 1463 jmp .Lreduction_loop 1464 1465.align 32 1466.Lreduction_loop: 1467 mulq %rbx 1468 movq 8(%rbp),%rax 1469 negq %r8 1470 movq %rdx,%r8 1471 adcq $0,%r8 1472 1473 mulq %rbx 1474 addq %rax,%r9 1475 movq 16(%rbp),%rax 1476 adcq $0,%rdx 1477 addq %r9,%r8 1478 movq %rdx,%r9 1479 adcq $0,%r9 1480 1481 mulq %rbx 1482 addq %rax,%r10 1483 movq 24(%rbp),%rax 1484 adcq $0,%rdx 1485 addq %r10,%r9 1486 movq %rdx,%r10 1487 adcq $0,%r10 1488 1489 mulq %rbx 1490 addq %rax,%r11 1491 movq 32(%rbp),%rax 1492 adcq $0,%rdx 1493 addq %r11,%r10 1494 movq 128+8(%rsp),%rsi 1495 1496 1497 adcq $0,%rdx 1498 movq %rdx,%r11 1499 1500 mulq %rbx 1501 addq %rax,%r12 1502 movq 40(%rbp),%rax 1503 adcq $0,%rdx 1504 imulq %r8,%rsi 1505 addq %r12,%r11 1506 movq %rdx,%r12 1507 adcq $0,%r12 1508 1509 mulq %rbx 1510 addq %rax,%r13 1511 movq 48(%rbp),%rax 1512 adcq $0,%rdx 1513 addq %r13,%r12 1514 movq %rdx,%r13 1515 adcq $0,%r13 1516 1517 mulq %rbx 1518 addq %rax,%r14 1519 movq 56(%rbp),%rax 1520 adcq $0,%rdx 1521 addq %r14,%r13 1522 movq %rdx,%r14 1523 adcq $0,%r14 1524 1525 mulq %rbx 1526 movq %rsi,%rbx 1527 addq %rax,%r15 1528 movq 0(%rbp),%rax 1529 adcq $0,%rdx 1530 addq %r15,%r14 1531 movq %rdx,%r15 1532 adcq $0,%r15 1533 1534 decl %ecx 1535 jne .Lreduction_loop 1536 1537 .byte 0xf3,0xc3 1538.cfi_endproc 1539.size __rsaz_512_reduce,.-__rsaz_512_reduce 1540.type __rsaz_512_reducex,@function 1541.align 32 1542__rsaz_512_reducex: 1543.cfi_startproc 1544 1545 imulq %r8,%rdx 1546 xorq %rsi,%rsi 1547 movl $8,%ecx 1548 jmp .Lreduction_loopx 1549 1550.align 32 1551.Lreduction_loopx: 1552 movq %r8,%rbx 1553 mulxq 0(%rbp),%rax,%r8 1554 adcxq %rbx,%rax 1555 adoxq %r9,%r8 1556 1557 mulxq 8(%rbp),%rax,%r9 1558 adcxq %rax,%r8 1559 adoxq %r10,%r9 1560 1561 mulxq 16(%rbp),%rbx,%r10 1562 adcxq %rbx,%r9 1563 adoxq %r11,%r10 1564 1565 mulxq 24(%rbp),%rbx,%r11 1566 adcxq %rbx,%r10 1567 adoxq %r12,%r11 1568 1569.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 1570 movq %rdx,%rax 1571 movq %r8,%rdx 1572 adcxq %rbx,%r11 1573 adoxq %r13,%r12 1574 1575 mulxq 128+8(%rsp),%rbx,%rdx 1576 movq %rax,%rdx 1577 1578 mulxq 40(%rbp),%rax,%r13 1579 adcxq %rax,%r12 1580 adoxq %r14,%r13 1581 1582.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 1583 adcxq %rax,%r13 1584 adoxq %r15,%r14 1585 1586 mulxq 56(%rbp),%rax,%r15 1587 movq %rbx,%rdx 1588 adcxq %rax,%r14 1589 adoxq %rsi,%r15 1590 adcxq %rsi,%r15 1591 1592 decl %ecx 1593 jne .Lreduction_loopx 1594 1595 .byte 0xf3,0xc3 1596.cfi_endproc 1597.size __rsaz_512_reducex,.-__rsaz_512_reducex 1598.type __rsaz_512_subtract,@function 1599.align 32 1600__rsaz_512_subtract: 1601.cfi_startproc 1602 movq %r8,(%rdi) 1603 movq %r9,8(%rdi) 1604 movq %r10,16(%rdi) 1605 movq %r11,24(%rdi) 1606 movq %r12,32(%rdi) 1607 movq %r13,40(%rdi) 1608 movq %r14,48(%rdi) 1609 movq %r15,56(%rdi) 1610 1611 movq 0(%rbp),%r8 1612 movq 8(%rbp),%r9 1613 negq %r8 1614 notq %r9 1615 andq %rcx,%r8 1616 movq 16(%rbp),%r10 1617 andq %rcx,%r9 1618 notq %r10 1619 movq 24(%rbp),%r11 1620 andq %rcx,%r10 1621 notq %r11 1622 movq 32(%rbp),%r12 1623 andq %rcx,%r11 1624 notq %r12 1625 movq 40(%rbp),%r13 1626 andq %rcx,%r12 1627 notq %r13 1628 movq 48(%rbp),%r14 1629 andq %rcx,%r13 1630 notq %r14 1631 movq 56(%rbp),%r15 1632 andq %rcx,%r14 1633 notq %r15 1634 andq %rcx,%r15 1635 1636 addq (%rdi),%r8 1637 adcq 8(%rdi),%r9 1638 adcq 16(%rdi),%r10 1639 adcq 24(%rdi),%r11 1640 adcq 32(%rdi),%r12 1641 adcq 40(%rdi),%r13 1642 adcq 48(%rdi),%r14 1643 adcq 56(%rdi),%r15 1644 1645 movq %r8,(%rdi) 1646 movq %r9,8(%rdi) 1647 movq %r10,16(%rdi) 1648 movq %r11,24(%rdi) 1649 movq %r12,32(%rdi) 1650 movq %r13,40(%rdi) 1651 movq %r14,48(%rdi) 1652 movq %r15,56(%rdi) 1653 1654 .byte 0xf3,0xc3 1655.cfi_endproc 1656.size __rsaz_512_subtract,.-__rsaz_512_subtract 1657.type __rsaz_512_mul,@function 1658.align 32 1659__rsaz_512_mul: 1660.cfi_startproc 1661 leaq 8(%rsp),%rdi 1662 1663 movq (%rsi),%rax 1664 mulq %rbx 1665 movq %rax,(%rdi) 1666 movq 8(%rsi),%rax 1667 movq %rdx,%r8 1668 1669 mulq %rbx 1670 addq %rax,%r8 1671 movq 16(%rsi),%rax 1672 movq %rdx,%r9 1673 adcq $0,%r9 1674 1675 mulq %rbx 1676 addq %rax,%r9 1677 movq 24(%rsi),%rax 1678 movq %rdx,%r10 1679 adcq $0,%r10 1680 1681 mulq %rbx 1682 addq %rax,%r10 1683 movq 32(%rsi),%rax 1684 movq %rdx,%r11 1685 adcq $0,%r11 1686 1687 mulq %rbx 1688 addq %rax,%r11 1689 movq 40(%rsi),%rax 1690 movq %rdx,%r12 1691 adcq $0,%r12 1692 1693 mulq %rbx 1694 addq %rax,%r12 1695 movq 48(%rsi),%rax 1696 movq %rdx,%r13 1697 adcq $0,%r13 1698 1699 mulq %rbx 1700 addq %rax,%r13 1701 movq 56(%rsi),%rax 1702 movq %rdx,%r14 1703 adcq $0,%r14 1704 1705 mulq %rbx 1706 addq %rax,%r14 1707 movq (%rsi),%rax 1708 movq %rdx,%r15 1709 adcq $0,%r15 1710 1711 leaq 8(%rbp),%rbp 1712 leaq 8(%rdi),%rdi 1713 1714 movl $7,%ecx 1715 jmp .Loop_mul 1716 1717.align 32 1718.Loop_mul: 1719 movq (%rbp),%rbx 1720 mulq %rbx 1721 addq %rax,%r8 1722 movq 8(%rsi),%rax 1723 movq %r8,(%rdi) 1724 movq %rdx,%r8 1725 adcq $0,%r8 1726 1727 mulq %rbx 1728 addq %rax,%r9 1729 movq 16(%rsi),%rax 1730 adcq $0,%rdx 1731 addq %r9,%r8 1732 movq %rdx,%r9 1733 adcq $0,%r9 1734 1735 mulq %rbx 1736 addq %rax,%r10 1737 movq 24(%rsi),%rax 1738 adcq $0,%rdx 1739 addq %r10,%r9 1740 movq %rdx,%r10 1741 adcq $0,%r10 1742 1743 mulq %rbx 1744 addq %rax,%r11 1745 movq 32(%rsi),%rax 1746 adcq $0,%rdx 1747 addq %r11,%r10 1748 movq %rdx,%r11 1749 adcq $0,%r11 1750 1751 mulq %rbx 1752 addq %rax,%r12 1753 movq 40(%rsi),%rax 1754 adcq $0,%rdx 1755 addq %r12,%r11 1756 movq %rdx,%r12 1757 adcq $0,%r12 1758 1759 mulq %rbx 1760 addq %rax,%r13 1761 movq 48(%rsi),%rax 1762 adcq $0,%rdx 1763 addq %r13,%r12 1764 movq %rdx,%r13 1765 adcq $0,%r13 1766 1767 mulq %rbx 1768 addq %rax,%r14 1769 movq 56(%rsi),%rax 1770 adcq $0,%rdx 1771 addq %r14,%r13 1772 movq %rdx,%r14 1773 leaq 8(%rbp),%rbp 1774 adcq $0,%r14 1775 1776 mulq %rbx 1777 addq %rax,%r15 1778 movq (%rsi),%rax 1779 adcq $0,%rdx 1780 addq %r15,%r14 1781 movq %rdx,%r15 1782 adcq $0,%r15 1783 1784 leaq 8(%rdi),%rdi 1785 1786 decl %ecx 1787 jnz .Loop_mul 1788 1789 movq %r8,(%rdi) 1790 movq %r9,8(%rdi) 1791 movq %r10,16(%rdi) 1792 movq %r11,24(%rdi) 1793 movq %r12,32(%rdi) 1794 movq %r13,40(%rdi) 1795 movq %r14,48(%rdi) 1796 movq %r15,56(%rdi) 1797 1798 .byte 0xf3,0xc3 1799.cfi_endproc 1800.size __rsaz_512_mul,.-__rsaz_512_mul 1801.type __rsaz_512_mulx,@function 1802.align 32 1803__rsaz_512_mulx: 1804.cfi_startproc 1805 mulxq (%rsi),%rbx,%r8 1806 movq $-6,%rcx 1807 1808 mulxq 8(%rsi),%rax,%r9 1809 movq %rbx,8(%rsp) 1810 1811 mulxq 16(%rsi),%rbx,%r10 1812 adcq %rax,%r8 1813 1814 mulxq 24(%rsi),%rax,%r11 1815 adcq %rbx,%r9 1816 1817 mulxq 32(%rsi),%rbx,%r12 1818 adcq %rax,%r10 1819 1820 mulxq 40(%rsi),%rax,%r13 1821 adcq %rbx,%r11 1822 1823 mulxq 48(%rsi),%rbx,%r14 1824 adcq %rax,%r12 1825 1826 mulxq 56(%rsi),%rax,%r15 1827 movq 8(%rbp),%rdx 1828 adcq %rbx,%r13 1829 adcq %rax,%r14 1830 adcq $0,%r15 1831 1832 xorq %rdi,%rdi 1833 jmp .Loop_mulx 1834 1835.align 32 1836.Loop_mulx: 1837 movq %r8,%rbx 1838 mulxq (%rsi),%rax,%r8 1839 adcxq %rax,%rbx 1840 adoxq %r9,%r8 1841 1842 mulxq 8(%rsi),%rax,%r9 1843 adcxq %rax,%r8 1844 adoxq %r10,%r9 1845 1846 mulxq 16(%rsi),%rax,%r10 1847 adcxq %rax,%r9 1848 adoxq %r11,%r10 1849 1850 mulxq 24(%rsi),%rax,%r11 1851 adcxq %rax,%r10 1852 adoxq %r12,%r11 1853 1854.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 1855 adcxq %rax,%r11 1856 adoxq %r13,%r12 1857 1858 mulxq 40(%rsi),%rax,%r13 1859 adcxq %rax,%r12 1860 adoxq %r14,%r13 1861 1862 mulxq 48(%rsi),%rax,%r14 1863 adcxq %rax,%r13 1864 adoxq %r15,%r14 1865 1866 mulxq 56(%rsi),%rax,%r15 1867 movq 64(%rbp,%rcx,8),%rdx 1868 movq %rbx,8+64-8(%rsp,%rcx,8) 1869 adcxq %rax,%r14 1870 adoxq %rdi,%r15 1871 adcxq %rdi,%r15 1872 1873 incq %rcx 1874 jnz .Loop_mulx 1875 1876 movq %r8,%rbx 1877 mulxq (%rsi),%rax,%r8 1878 adcxq %rax,%rbx 1879 adoxq %r9,%r8 1880 1881.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 1882 adcxq %rax,%r8 1883 adoxq %r10,%r9 1884 1885.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 1886 adcxq %rax,%r9 1887 adoxq %r11,%r10 1888 1889 mulxq 24(%rsi),%rax,%r11 1890 adcxq %rax,%r10 1891 adoxq %r12,%r11 1892 1893 mulxq 32(%rsi),%rax,%r12 1894 adcxq %rax,%r11 1895 adoxq %r13,%r12 1896 1897 mulxq 40(%rsi),%rax,%r13 1898 adcxq %rax,%r12 1899 adoxq %r14,%r13 1900 1901.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1902 adcxq %rax,%r13 1903 adoxq %r15,%r14 1904 1905.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 1906 adcxq %rax,%r14 1907 adoxq %rdi,%r15 1908 adcxq %rdi,%r15 1909 1910 movq %rbx,8+64-8(%rsp) 1911 movq %r8,8+64(%rsp) 1912 movq %r9,8+64+8(%rsp) 1913 movq %r10,8+64+16(%rsp) 1914 movq %r11,8+64+24(%rsp) 1915 movq %r12,8+64+32(%rsp) 1916 movq %r13,8+64+40(%rsp) 1917 movq %r14,8+64+48(%rsp) 1918 movq %r15,8+64+56(%rsp) 1919 1920 .byte 0xf3,0xc3 1921.cfi_endproc 1922.size __rsaz_512_mulx,.-__rsaz_512_mulx 1923.globl rsaz_512_scatter4 1924.type rsaz_512_scatter4,@function 1925.align 16 1926rsaz_512_scatter4: 1927.cfi_startproc 1928 leaq (%rdi,%rdx,8),%rdi 1929 movl $8,%r9d 1930 jmp .Loop_scatter 1931.align 16 1932.Loop_scatter: 1933 movq (%rsi),%rax 1934 leaq 8(%rsi),%rsi 1935 movq %rax,(%rdi) 1936 leaq 128(%rdi),%rdi 1937 decl %r9d 1938 jnz .Loop_scatter 1939 .byte 0xf3,0xc3 1940.cfi_endproc 1941.size rsaz_512_scatter4,.-rsaz_512_scatter4 1942 1943.globl rsaz_512_gather4 1944.type rsaz_512_gather4,@function 1945.align 16 1946rsaz_512_gather4: 1947.cfi_startproc 1948 movd %edx,%xmm8 1949 movdqa .Linc+16(%rip),%xmm1 1950 movdqa .Linc(%rip),%xmm0 1951 1952 pshufd $0,%xmm8,%xmm8 1953 movdqa %xmm1,%xmm7 1954 movdqa %xmm1,%xmm2 1955 paddd %xmm0,%xmm1 1956 pcmpeqd %xmm8,%xmm0 1957 movdqa %xmm7,%xmm3 1958 paddd %xmm1,%xmm2 1959 pcmpeqd %xmm8,%xmm1 1960 movdqa %xmm7,%xmm4 1961 paddd %xmm2,%xmm3 1962 pcmpeqd %xmm8,%xmm2 1963 movdqa %xmm7,%xmm5 1964 paddd %xmm3,%xmm4 1965 pcmpeqd %xmm8,%xmm3 1966 movdqa %xmm7,%xmm6 1967 paddd %xmm4,%xmm5 1968 pcmpeqd %xmm8,%xmm4 1969 paddd %xmm5,%xmm6 1970 pcmpeqd %xmm8,%xmm5 1971 paddd %xmm6,%xmm7 1972 pcmpeqd %xmm8,%xmm6 1973 pcmpeqd %xmm8,%xmm7 1974 movl $8,%r9d 1975 jmp .Loop_gather 1976.align 16 1977.Loop_gather: 1978 movdqa 0(%rsi),%xmm8 1979 movdqa 16(%rsi),%xmm9 1980 movdqa 32(%rsi),%xmm10 1981 movdqa 48(%rsi),%xmm11 1982 pand %xmm0,%xmm8 1983 movdqa 64(%rsi),%xmm12 1984 pand %xmm1,%xmm9 1985 movdqa 80(%rsi),%xmm13 1986 pand %xmm2,%xmm10 1987 movdqa 96(%rsi),%xmm14 1988 pand %xmm3,%xmm11 1989 movdqa 112(%rsi),%xmm15 1990 leaq 128(%rsi),%rsi 1991 pand %xmm4,%xmm12 1992 pand %xmm5,%xmm13 1993 pand %xmm6,%xmm14 1994 pand %xmm7,%xmm15 1995 por %xmm10,%xmm8 1996 por %xmm11,%xmm9 1997 por %xmm12,%xmm8 1998 por %xmm13,%xmm9 1999 por %xmm14,%xmm8 2000 por %xmm15,%xmm9 2001 2002 por %xmm9,%xmm8 2003 pshufd $0x4e,%xmm8,%xmm9 2004 por %xmm9,%xmm8 2005 movq %xmm8,(%rdi) 2006 leaq 8(%rdi),%rdi 2007 decl %r9d 2008 jnz .Loop_gather 2009 .byte 0xf3,0xc3 2010.LSEH_end_rsaz_512_gather4: 2011.cfi_endproc 2012.size rsaz_512_gather4,.-rsaz_512_gather4 2013 2014.align 64 2015.Linc: 2016.long 0,0, 1,1 2017.long 2,2, 2,2 2018