1/* Do not modify. This file is auto-generated from x86_64-mont.pl. */ 2.text 3 4 5 6.globl bn_mul_mont 7.type bn_mul_mont,@function 8.align 16 9bn_mul_mont: 10.cfi_startproc 11 movl %r9d,%r9d 12 movq %rsp,%rax 13.cfi_def_cfa_register %rax 14 testl $3,%r9d 15 jnz .Lmul_enter 16 cmpl $8,%r9d 17 jb .Lmul_enter 18 movl OPENSSL_ia32cap_P+8(%rip),%r11d 19 cmpq %rsi,%rdx 20 jne .Lmul4x_enter 21 testl $7,%r9d 22 jz .Lsqr8x_enter 23 jmp .Lmul4x_enter 24 25.align 16 26.Lmul_enter: 27 pushq %rbx 28.cfi_offset %rbx,-16 29 pushq %rbp 30.cfi_offset %rbp,-24 31 pushq %r12 32.cfi_offset %r12,-32 33 pushq %r13 34.cfi_offset %r13,-40 35 pushq %r14 36.cfi_offset %r14,-48 37 pushq %r15 38.cfi_offset %r15,-56 39 40 negq %r9 41 movq %rsp,%r11 42 leaq -16(%rsp,%r9,8),%r10 43 negq %r9 44 andq $-1024,%r10 45 46 47 48 49 50 51 52 53 54 subq %r10,%r11 55 andq $-4096,%r11 56 leaq (%r10,%r11,1),%rsp 57 movq (%rsp),%r11 58 cmpq %r10,%rsp 59 ja .Lmul_page_walk 60 jmp .Lmul_page_walk_done 61 62.align 16 63.Lmul_page_walk: 64 leaq -4096(%rsp),%rsp 65 movq (%rsp),%r11 66 cmpq %r10,%rsp 67 ja .Lmul_page_walk 68.Lmul_page_walk_done: 69 70 movq %rax,8(%rsp,%r9,8) 71.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 72.Lmul_body: 73 movq %rdx,%r12 74 movq (%r8),%r8 75 movq (%r12),%rbx 76 movq (%rsi),%rax 77 78 xorq %r14,%r14 79 xorq %r15,%r15 80 81 movq %r8,%rbp 82 mulq %rbx 83 movq %rax,%r10 84 movq (%rcx),%rax 85 86 imulq %r10,%rbp 87 movq %rdx,%r11 88 89 mulq %rbp 90 addq %rax,%r10 91 movq 8(%rsi),%rax 92 adcq $0,%rdx 93 movq %rdx,%r13 94 95 leaq 1(%r15),%r15 96 jmp .L1st_enter 97 98.align 16 99.L1st: 100 addq %rax,%r13 101 movq (%rsi,%r15,8),%rax 102 adcq $0,%rdx 103 addq %r11,%r13 104 movq %r10,%r11 105 adcq $0,%rdx 106 movq %r13,-16(%rsp,%r15,8) 107 movq %rdx,%r13 108 109.L1st_enter: 110 mulq %rbx 111 addq %rax,%r11 112 movq (%rcx,%r15,8),%rax 113 adcq $0,%rdx 114 leaq 1(%r15),%r15 115 movq %rdx,%r10 116 117 mulq %rbp 118 cmpq %r9,%r15 119 jne .L1st 120 121 addq %rax,%r13 122 movq (%rsi),%rax 123 adcq $0,%rdx 124 addq %r11,%r13 125 adcq $0,%rdx 126 movq %r13,-16(%rsp,%r15,8) 127 movq %rdx,%r13 128 movq %r10,%r11 129 130 xorq %rdx,%rdx 131 addq %r11,%r13 132 adcq $0,%rdx 133 movq %r13,-8(%rsp,%r9,8) 134 movq %rdx,(%rsp,%r9,8) 135 136 leaq 1(%r14),%r14 137 jmp .Louter 138.align 16 139.Louter: 140 movq (%r12,%r14,8),%rbx 141 xorq %r15,%r15 142 movq %r8,%rbp 143 movq (%rsp),%r10 144 mulq %rbx 145 addq %rax,%r10 146 movq (%rcx),%rax 147 adcq $0,%rdx 148 149 imulq %r10,%rbp 150 movq %rdx,%r11 151 152 mulq %rbp 153 addq %rax,%r10 154 movq 8(%rsi),%rax 155 adcq $0,%rdx 156 movq 8(%rsp),%r10 157 movq %rdx,%r13 158 159 leaq 1(%r15),%r15 160 jmp .Linner_enter 161 162.align 16 163.Linner: 164 addq %rax,%r13 165 movq (%rsi,%r15,8),%rax 166 adcq $0,%rdx 167 addq %r10,%r13 168 movq (%rsp,%r15,8),%r10 169 adcq $0,%rdx 170 movq %r13,-16(%rsp,%r15,8) 171 movq %rdx,%r13 172 173.Linner_enter: 174 mulq %rbx 175 addq %rax,%r11 176 movq (%rcx,%r15,8),%rax 177 adcq $0,%rdx 178 addq %r11,%r10 179 movq %rdx,%r11 180 adcq $0,%r11 181 leaq 1(%r15),%r15 182 183 mulq %rbp 184 cmpq %r9,%r15 185 jne .Linner 186 187 addq %rax,%r13 188 movq (%rsi),%rax 189 adcq $0,%rdx 190 addq %r10,%r13 191 movq (%rsp,%r15,8),%r10 192 adcq $0,%rdx 193 movq %r13,-16(%rsp,%r15,8) 194 movq %rdx,%r13 195 196 xorq %rdx,%rdx 197 addq %r11,%r13 198 adcq $0,%rdx 199 addq %r10,%r13 200 adcq $0,%rdx 201 movq %r13,-8(%rsp,%r9,8) 202 movq %rdx,(%rsp,%r9,8) 203 204 leaq 1(%r14),%r14 205 cmpq %r9,%r14 206 jb .Louter 207 208 xorq %r14,%r14 209 movq (%rsp),%rax 210 movq %r9,%r15 211 212.align 16 213.Lsub: sbbq (%rcx,%r14,8),%rax 214 movq %rax,(%rdi,%r14,8) 215 movq 8(%rsp,%r14,8),%rax 216 leaq 1(%r14),%r14 217 decq %r15 218 jnz .Lsub 219 220 sbbq $0,%rax 221 movq $-1,%rbx 222 xorq %rax,%rbx 223 xorq %r14,%r14 224 movq %r9,%r15 225 226.Lcopy: 227 movq (%rdi,%r14,8),%rcx 228 movq (%rsp,%r14,8),%rdx 229 andq %rbx,%rcx 230 andq %rax,%rdx 231 movq %r9,(%rsp,%r14,8) 232 orq %rcx,%rdx 233 movq %rdx,(%rdi,%r14,8) 234 leaq 1(%r14),%r14 235 subq $1,%r15 236 jnz .Lcopy 237 238 movq 8(%rsp,%r9,8),%rsi 239.cfi_def_cfa %rsi,8 240 movq $1,%rax 241 movq -48(%rsi),%r15 242.cfi_restore %r15 243 movq -40(%rsi),%r14 244.cfi_restore %r14 245 movq -32(%rsi),%r13 246.cfi_restore %r13 247 movq -24(%rsi),%r12 248.cfi_restore %r12 249 movq -16(%rsi),%rbp 250.cfi_restore %rbp 251 movq -8(%rsi),%rbx 252.cfi_restore %rbx 253 leaq (%rsi),%rsp 254.cfi_def_cfa_register %rsp 255.Lmul_epilogue: 256 .byte 0xf3,0xc3 257.cfi_endproc 258.size bn_mul_mont,.-bn_mul_mont 259.type bn_mul4x_mont,@function 260.align 16 261bn_mul4x_mont: 262.cfi_startproc 263 movl %r9d,%r9d 264 movq %rsp,%rax 265.cfi_def_cfa_register %rax 266.Lmul4x_enter: 267 andl $0x80100,%r11d 268 cmpl $0x80100,%r11d 269 je .Lmulx4x_enter 270 pushq %rbx 271.cfi_offset %rbx,-16 272 pushq %rbp 273.cfi_offset %rbp,-24 274 pushq %r12 275.cfi_offset %r12,-32 276 pushq %r13 277.cfi_offset %r13,-40 278 pushq %r14 279.cfi_offset %r14,-48 280 pushq %r15 281.cfi_offset %r15,-56 282 283 negq %r9 284 movq %rsp,%r11 285 leaq -32(%rsp,%r9,8),%r10 286 negq %r9 287 andq $-1024,%r10 288 289 subq %r10,%r11 290 andq $-4096,%r11 291 leaq (%r10,%r11,1),%rsp 292 movq (%rsp),%r11 293 cmpq %r10,%rsp 294 ja .Lmul4x_page_walk 295 jmp .Lmul4x_page_walk_done 296 297.Lmul4x_page_walk: 298 leaq -4096(%rsp),%rsp 299 movq (%rsp),%r11 300 cmpq %r10,%rsp 301 ja .Lmul4x_page_walk 302.Lmul4x_page_walk_done: 303 304 movq %rax,8(%rsp,%r9,8) 305.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 306.Lmul4x_body: 307 movq %rdi,16(%rsp,%r9,8) 308 movq %rdx,%r12 309 movq (%r8),%r8 310 movq (%r12),%rbx 311 movq (%rsi),%rax 312 313 xorq %r14,%r14 314 xorq %r15,%r15 315 316 movq %r8,%rbp 317 mulq %rbx 318 movq %rax,%r10 319 movq (%rcx),%rax 320 321 imulq %r10,%rbp 322 movq %rdx,%r11 323 324 mulq %rbp 325 addq %rax,%r10 326 movq 8(%rsi),%rax 327 adcq $0,%rdx 328 movq %rdx,%rdi 329 330 mulq %rbx 331 addq %rax,%r11 332 movq 8(%rcx),%rax 333 adcq $0,%rdx 334 movq %rdx,%r10 335 336 mulq %rbp 337 addq %rax,%rdi 338 movq 16(%rsi),%rax 339 adcq $0,%rdx 340 addq %r11,%rdi 341 leaq 4(%r15),%r15 342 adcq $0,%rdx 343 movq %rdi,(%rsp) 344 movq %rdx,%r13 345 jmp .L1st4x 346.align 16 347.L1st4x: 348 mulq %rbx 349 addq %rax,%r10 350 movq -16(%rcx,%r15,8),%rax 351 adcq $0,%rdx 352 movq %rdx,%r11 353 354 mulq %rbp 355 addq %rax,%r13 356 movq -8(%rsi,%r15,8),%rax 357 adcq $0,%rdx 358 addq %r10,%r13 359 adcq $0,%rdx 360 movq %r13,-24(%rsp,%r15,8) 361 movq %rdx,%rdi 362 363 mulq %rbx 364 addq %rax,%r11 365 movq -8(%rcx,%r15,8),%rax 366 adcq $0,%rdx 367 movq %rdx,%r10 368 369 mulq %rbp 370 addq %rax,%rdi 371 movq (%rsi,%r15,8),%rax 372 adcq $0,%rdx 373 addq %r11,%rdi 374 adcq $0,%rdx 375 movq %rdi,-16(%rsp,%r15,8) 376 movq %rdx,%r13 377 378 mulq %rbx 379 addq %rax,%r10 380 movq (%rcx,%r15,8),%rax 381 adcq $0,%rdx 382 movq %rdx,%r11 383 384 mulq %rbp 385 addq %rax,%r13 386 movq 8(%rsi,%r15,8),%rax 387 adcq $0,%rdx 388 addq %r10,%r13 389 adcq $0,%rdx 390 movq %r13,-8(%rsp,%r15,8) 391 movq %rdx,%rdi 392 393 mulq %rbx 394 addq %rax,%r11 395 movq 8(%rcx,%r15,8),%rax 396 adcq $0,%rdx 397 leaq 4(%r15),%r15 398 movq %rdx,%r10 399 400 mulq %rbp 401 addq %rax,%rdi 402 movq -16(%rsi,%r15,8),%rax 403 adcq $0,%rdx 404 addq %r11,%rdi 405 adcq $0,%rdx 406 movq %rdi,-32(%rsp,%r15,8) 407 movq %rdx,%r13 408 cmpq %r9,%r15 409 jb .L1st4x 410 411 mulq %rbx 412 addq %rax,%r10 413 movq -16(%rcx,%r15,8),%rax 414 adcq $0,%rdx 415 movq %rdx,%r11 416 417 mulq %rbp 418 addq %rax,%r13 419 movq -8(%rsi,%r15,8),%rax 420 adcq $0,%rdx 421 addq %r10,%r13 422 adcq $0,%rdx 423 movq %r13,-24(%rsp,%r15,8) 424 movq %rdx,%rdi 425 426 mulq %rbx 427 addq %rax,%r11 428 movq -8(%rcx,%r15,8),%rax 429 adcq $0,%rdx 430 movq %rdx,%r10 431 432 mulq %rbp 433 addq %rax,%rdi 434 movq (%rsi),%rax 435 adcq $0,%rdx 436 addq %r11,%rdi 437 adcq $0,%rdx 438 movq %rdi,-16(%rsp,%r15,8) 439 movq %rdx,%r13 440 441 xorq %rdi,%rdi 442 addq %r10,%r13 443 adcq $0,%rdi 444 movq %r13,-8(%rsp,%r15,8) 445 movq %rdi,(%rsp,%r15,8) 446 447 leaq 1(%r14),%r14 448.align 4 449.Louter4x: 450 movq (%r12,%r14,8),%rbx 451 xorq %r15,%r15 452 movq (%rsp),%r10 453 movq %r8,%rbp 454 mulq %rbx 455 addq %rax,%r10 456 movq (%rcx),%rax 457 adcq $0,%rdx 458 459 imulq %r10,%rbp 460 movq %rdx,%r11 461 462 mulq %rbp 463 addq %rax,%r10 464 movq 8(%rsi),%rax 465 adcq $0,%rdx 466 movq %rdx,%rdi 467 468 mulq %rbx 469 addq %rax,%r11 470 movq 8(%rcx),%rax 471 adcq $0,%rdx 472 addq 8(%rsp),%r11 473 adcq $0,%rdx 474 movq %rdx,%r10 475 476 mulq %rbp 477 addq %rax,%rdi 478 movq 16(%rsi),%rax 479 adcq $0,%rdx 480 addq %r11,%rdi 481 leaq 4(%r15),%r15 482 adcq $0,%rdx 483 movq %rdi,(%rsp) 484 movq %rdx,%r13 485 jmp .Linner4x 486.align 16 487.Linner4x: 488 mulq %rbx 489 addq %rax,%r10 490 movq -16(%rcx,%r15,8),%rax 491 adcq $0,%rdx 492 addq -16(%rsp,%r15,8),%r10 493 adcq $0,%rdx 494 movq %rdx,%r11 495 496 mulq %rbp 497 addq %rax,%r13 498 movq -8(%rsi,%r15,8),%rax 499 adcq $0,%rdx 500 addq %r10,%r13 501 adcq $0,%rdx 502 movq %r13,-24(%rsp,%r15,8) 503 movq %rdx,%rdi 504 505 mulq %rbx 506 addq %rax,%r11 507 movq -8(%rcx,%r15,8),%rax 508 adcq $0,%rdx 509 addq -8(%rsp,%r15,8),%r11 510 adcq $0,%rdx 511 movq %rdx,%r10 512 513 mulq %rbp 514 addq %rax,%rdi 515 movq (%rsi,%r15,8),%rax 516 adcq $0,%rdx 517 addq %r11,%rdi 518 adcq $0,%rdx 519 movq %rdi,-16(%rsp,%r15,8) 520 movq %rdx,%r13 521 522 mulq %rbx 523 addq %rax,%r10 524 movq (%rcx,%r15,8),%rax 525 adcq $0,%rdx 526 addq (%rsp,%r15,8),%r10 527 adcq $0,%rdx 528 movq %rdx,%r11 529 530 mulq %rbp 531 addq %rax,%r13 532 movq 8(%rsi,%r15,8),%rax 533 adcq $0,%rdx 534 addq %r10,%r13 535 adcq $0,%rdx 536 movq %r13,-8(%rsp,%r15,8) 537 movq %rdx,%rdi 538 539 mulq %rbx 540 addq %rax,%r11 541 movq 8(%rcx,%r15,8),%rax 542 adcq $0,%rdx 543 addq 8(%rsp,%r15,8),%r11 544 adcq $0,%rdx 545 leaq 4(%r15),%r15 546 movq %rdx,%r10 547 548 mulq %rbp 549 addq %rax,%rdi 550 movq -16(%rsi,%r15,8),%rax 551 adcq $0,%rdx 552 addq %r11,%rdi 553 adcq $0,%rdx 554 movq %rdi,-32(%rsp,%r15,8) 555 movq %rdx,%r13 556 cmpq %r9,%r15 557 jb .Linner4x 558 559 mulq %rbx 560 addq %rax,%r10 561 movq -16(%rcx,%r15,8),%rax 562 adcq $0,%rdx 563 addq -16(%rsp,%r15,8),%r10 564 adcq $0,%rdx 565 movq %rdx,%r11 566 567 mulq %rbp 568 addq %rax,%r13 569 movq -8(%rsi,%r15,8),%rax 570 adcq $0,%rdx 571 addq %r10,%r13 572 adcq $0,%rdx 573 movq %r13,-24(%rsp,%r15,8) 574 movq %rdx,%rdi 575 576 mulq %rbx 577 addq %rax,%r11 578 movq -8(%rcx,%r15,8),%rax 579 adcq $0,%rdx 580 addq -8(%rsp,%r15,8),%r11 581 adcq $0,%rdx 582 leaq 1(%r14),%r14 583 movq %rdx,%r10 584 585 mulq %rbp 586 addq %rax,%rdi 587 movq (%rsi),%rax 588 adcq $0,%rdx 589 addq %r11,%rdi 590 adcq $0,%rdx 591 movq %rdi,-16(%rsp,%r15,8) 592 movq %rdx,%r13 593 594 xorq %rdi,%rdi 595 addq %r10,%r13 596 adcq $0,%rdi 597 addq (%rsp,%r9,8),%r13 598 adcq $0,%rdi 599 movq %r13,-8(%rsp,%r15,8) 600 movq %rdi,(%rsp,%r15,8) 601 602 cmpq %r9,%r14 603 jb .Louter4x 604 movq 16(%rsp,%r9,8),%rdi 605 leaq -4(%r9),%r15 606 movq 0(%rsp),%rax 607 movq 8(%rsp),%rdx 608 shrq $2,%r15 609 leaq (%rsp),%rsi 610 xorq %r14,%r14 611 612 subq 0(%rcx),%rax 613 movq 16(%rsi),%rbx 614 movq 24(%rsi),%rbp 615 sbbq 8(%rcx),%rdx 616 617.Lsub4x: 618 movq %rax,0(%rdi,%r14,8) 619 movq %rdx,8(%rdi,%r14,8) 620 sbbq 16(%rcx,%r14,8),%rbx 621 movq 32(%rsi,%r14,8),%rax 622 movq 40(%rsi,%r14,8),%rdx 623 sbbq 24(%rcx,%r14,8),%rbp 624 movq %rbx,16(%rdi,%r14,8) 625 movq %rbp,24(%rdi,%r14,8) 626 sbbq 32(%rcx,%r14,8),%rax 627 movq 48(%rsi,%r14,8),%rbx 628 movq 56(%rsi,%r14,8),%rbp 629 sbbq 40(%rcx,%r14,8),%rdx 630 leaq 4(%r14),%r14 631 decq %r15 632 jnz .Lsub4x 633 634 movq %rax,0(%rdi,%r14,8) 635 movq 32(%rsi,%r14,8),%rax 636 sbbq 16(%rcx,%r14,8),%rbx 637 movq %rdx,8(%rdi,%r14,8) 638 sbbq 24(%rcx,%r14,8),%rbp 639 movq %rbx,16(%rdi,%r14,8) 640 641 sbbq $0,%rax 642 movq %rbp,24(%rdi,%r14,8) 643 pxor %xmm0,%xmm0 644.byte 102,72,15,110,224 645 pcmpeqd %xmm5,%xmm5 646 pshufd $0,%xmm4,%xmm4 647 movq %r9,%r15 648 pxor %xmm4,%xmm5 649 shrq $2,%r15 650 xorl %eax,%eax 651 652 jmp .Lcopy4x 653.align 16 654.Lcopy4x: 655 movdqa (%rsp,%rax,1),%xmm1 656 movdqu (%rdi,%rax,1),%xmm2 657 pand %xmm4,%xmm1 658 pand %xmm5,%xmm2 659 movdqa 16(%rsp,%rax,1),%xmm3 660 movdqa %xmm0,(%rsp,%rax,1) 661 por %xmm2,%xmm1 662 movdqu 16(%rdi,%rax,1),%xmm2 663 movdqu %xmm1,(%rdi,%rax,1) 664 pand %xmm4,%xmm3 665 pand %xmm5,%xmm2 666 movdqa %xmm0,16(%rsp,%rax,1) 667 por %xmm2,%xmm3 668 movdqu %xmm3,16(%rdi,%rax,1) 669 leaq 32(%rax),%rax 670 decq %r15 671 jnz .Lcopy4x 672 movq 8(%rsp,%r9,8),%rsi 673.cfi_def_cfa %rsi, 8 674 movq $1,%rax 675 movq -48(%rsi),%r15 676.cfi_restore %r15 677 movq -40(%rsi),%r14 678.cfi_restore %r14 679 movq -32(%rsi),%r13 680.cfi_restore %r13 681 movq -24(%rsi),%r12 682.cfi_restore %r12 683 movq -16(%rsi),%rbp 684.cfi_restore %rbp 685 movq -8(%rsi),%rbx 686.cfi_restore %rbx 687 leaq (%rsi),%rsp 688.cfi_def_cfa_register %rsp 689.Lmul4x_epilogue: 690 .byte 0xf3,0xc3 691.cfi_endproc 692.size bn_mul4x_mont,.-bn_mul4x_mont 693 694 695 696.type bn_sqr8x_mont,@function 697.align 32 698bn_sqr8x_mont: 699.cfi_startproc 700 movq %rsp,%rax 701.cfi_def_cfa_register %rax 702.Lsqr8x_enter: 703 pushq %rbx 704.cfi_offset %rbx,-16 705 pushq %rbp 706.cfi_offset %rbp,-24 707 pushq %r12 708.cfi_offset %r12,-32 709 pushq %r13 710.cfi_offset %r13,-40 711 pushq %r14 712.cfi_offset %r14,-48 713 pushq %r15 714.cfi_offset %r15,-56 715.Lsqr8x_prologue: 716 717 movl %r9d,%r10d 718 shll $3,%r9d 719 shlq $3+2,%r10 720 negq %r9 721 722 723 724 725 726 727 leaq -64(%rsp,%r9,2),%r11 728 movq %rsp,%rbp 729 movq (%r8),%r8 730 subq %rsi,%r11 731 andq $4095,%r11 732 cmpq %r11,%r10 733 jb .Lsqr8x_sp_alt 734 subq %r11,%rbp 735 leaq -64(%rbp,%r9,2),%rbp 736 jmp .Lsqr8x_sp_done 737 738.align 32 739.Lsqr8x_sp_alt: 740 leaq 4096-64(,%r9,2),%r10 741 leaq -64(%rbp,%r9,2),%rbp 742 subq %r10,%r11 743 movq $0,%r10 744 cmovcq %r10,%r11 745 subq %r11,%rbp 746.Lsqr8x_sp_done: 747 andq $-64,%rbp 748 movq %rsp,%r11 749 subq %rbp,%r11 750 andq $-4096,%r11 751 leaq (%r11,%rbp,1),%rsp 752 movq (%rsp),%r10 753 cmpq %rbp,%rsp 754 ja .Lsqr8x_page_walk 755 jmp .Lsqr8x_page_walk_done 756 757.align 16 758.Lsqr8x_page_walk: 759 leaq -4096(%rsp),%rsp 760 movq (%rsp),%r10 761 cmpq %rbp,%rsp 762 ja .Lsqr8x_page_walk 763.Lsqr8x_page_walk_done: 764 765 movq %r9,%r10 766 negq %r9 767 768 movq %r8,32(%rsp) 769 movq %rax,40(%rsp) 770.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 771.Lsqr8x_body: 772 773.byte 102,72,15,110,209 774 pxor %xmm0,%xmm0 775.byte 102,72,15,110,207 776.byte 102,73,15,110,218 777 movl OPENSSL_ia32cap_P+8(%rip),%eax 778 andl $0x80100,%eax 779 cmpl $0x80100,%eax 780 jne .Lsqr8x_nox 781 782 call bn_sqrx8x_internal 783 784 785 786 787 leaq (%r8,%rcx,1),%rbx 788 movq %rcx,%r9 789 movq %rcx,%rdx 790.byte 102,72,15,126,207 791 sarq $3+2,%rcx 792 jmp .Lsqr8x_sub 793 794.align 32 795.Lsqr8x_nox: 796 call bn_sqr8x_internal 797 798 799 800 801 leaq (%rdi,%r9,1),%rbx 802 movq %r9,%rcx 803 movq %r9,%rdx 804.byte 102,72,15,126,207 805 sarq $3+2,%rcx 806 jmp .Lsqr8x_sub 807 808.align 32 809.Lsqr8x_sub: 810 movq 0(%rbx),%r12 811 movq 8(%rbx),%r13 812 movq 16(%rbx),%r14 813 movq 24(%rbx),%r15 814 leaq 32(%rbx),%rbx 815 sbbq 0(%rbp),%r12 816 sbbq 8(%rbp),%r13 817 sbbq 16(%rbp),%r14 818 sbbq 24(%rbp),%r15 819 leaq 32(%rbp),%rbp 820 movq %r12,0(%rdi) 821 movq %r13,8(%rdi) 822 movq %r14,16(%rdi) 823 movq %r15,24(%rdi) 824 leaq 32(%rdi),%rdi 825 incq %rcx 826 jnz .Lsqr8x_sub 827 828 sbbq $0,%rax 829 leaq (%rbx,%r9,1),%rbx 830 leaq (%rdi,%r9,1),%rdi 831 832.byte 102,72,15,110,200 833 pxor %xmm0,%xmm0 834 pshufd $0,%xmm1,%xmm1 835 movq 40(%rsp),%rsi 836.cfi_def_cfa %rsi,8 837 jmp .Lsqr8x_cond_copy 838 839.align 32 840.Lsqr8x_cond_copy: 841 movdqa 0(%rbx),%xmm2 842 movdqa 16(%rbx),%xmm3 843 leaq 32(%rbx),%rbx 844 movdqu 0(%rdi),%xmm4 845 movdqu 16(%rdi),%xmm5 846 leaq 32(%rdi),%rdi 847 movdqa %xmm0,-32(%rbx) 848 movdqa %xmm0,-16(%rbx) 849 movdqa %xmm0,-32(%rbx,%rdx,1) 850 movdqa %xmm0,-16(%rbx,%rdx,1) 851 pcmpeqd %xmm1,%xmm0 852 pand %xmm1,%xmm2 853 pand %xmm1,%xmm3 854 pand %xmm0,%xmm4 855 pand %xmm0,%xmm5 856 pxor %xmm0,%xmm0 857 por %xmm2,%xmm4 858 por %xmm3,%xmm5 859 movdqu %xmm4,-32(%rdi) 860 movdqu %xmm5,-16(%rdi) 861 addq $32,%r9 862 jnz .Lsqr8x_cond_copy 863 864 movq $1,%rax 865 movq -48(%rsi),%r15 866.cfi_restore %r15 867 movq -40(%rsi),%r14 868.cfi_restore %r14 869 movq -32(%rsi),%r13 870.cfi_restore %r13 871 movq -24(%rsi),%r12 872.cfi_restore %r12 873 movq -16(%rsi),%rbp 874.cfi_restore %rbp 875 movq -8(%rsi),%rbx 876.cfi_restore %rbx 877 leaq (%rsi),%rsp 878.cfi_def_cfa_register %rsp 879.Lsqr8x_epilogue: 880 .byte 0xf3,0xc3 881.cfi_endproc 882.size bn_sqr8x_mont,.-bn_sqr8x_mont 883.type bn_mulx4x_mont,@function 884.align 32 885bn_mulx4x_mont: 886.cfi_startproc 887 movq %rsp,%rax 888.cfi_def_cfa_register %rax 889.Lmulx4x_enter: 890 pushq %rbx 891.cfi_offset %rbx,-16 892 pushq %rbp 893.cfi_offset %rbp,-24 894 pushq %r12 895.cfi_offset %r12,-32 896 pushq %r13 897.cfi_offset %r13,-40 898 pushq %r14 899.cfi_offset %r14,-48 900 pushq %r15 901.cfi_offset %r15,-56 902.Lmulx4x_prologue: 903 904 shll $3,%r9d 905 xorq %r10,%r10 906 subq %r9,%r10 907 movq (%r8),%r8 908 leaq -72(%rsp,%r10,1),%rbp 909 andq $-128,%rbp 910 movq %rsp,%r11 911 subq %rbp,%r11 912 andq $-4096,%r11 913 leaq (%r11,%rbp,1),%rsp 914 movq (%rsp),%r10 915 cmpq %rbp,%rsp 916 ja .Lmulx4x_page_walk 917 jmp .Lmulx4x_page_walk_done 918 919.align 16 920.Lmulx4x_page_walk: 921 leaq -4096(%rsp),%rsp 922 movq (%rsp),%r10 923 cmpq %rbp,%rsp 924 ja .Lmulx4x_page_walk 925.Lmulx4x_page_walk_done: 926 927 leaq (%rdx,%r9,1),%r10 928 929 930 931 932 933 934 935 936 937 938 939 940 movq %r9,0(%rsp) 941 shrq $5,%r9 942 movq %r10,16(%rsp) 943 subq $1,%r9 944 movq %r8,24(%rsp) 945 movq %rdi,32(%rsp) 946 movq %rax,40(%rsp) 947.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 948 movq %r9,48(%rsp) 949 jmp .Lmulx4x_body 950 951.align 32 952.Lmulx4x_body: 953 leaq 8(%rdx),%rdi 954 movq (%rdx),%rdx 955 leaq 64+32(%rsp),%rbx 956 movq %rdx,%r9 957 958 mulxq 0(%rsi),%r8,%rax 959 mulxq 8(%rsi),%r11,%r14 960 addq %rax,%r11 961 movq %rdi,8(%rsp) 962 mulxq 16(%rsi),%r12,%r13 963 adcq %r14,%r12 964 adcq $0,%r13 965 966 movq %r8,%rdi 967 imulq 24(%rsp),%r8 968 xorq %rbp,%rbp 969 970 mulxq 24(%rsi),%rax,%r14 971 movq %r8,%rdx 972 leaq 32(%rsi),%rsi 973 adcxq %rax,%r13 974 adcxq %rbp,%r14 975 976 mulxq 0(%rcx),%rax,%r10 977 adcxq %rax,%rdi 978 adoxq %r11,%r10 979 mulxq 8(%rcx),%rax,%r11 980 adcxq %rax,%r10 981 adoxq %r12,%r11 982.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 983 movq 48(%rsp),%rdi 984 movq %r10,-32(%rbx) 985 adcxq %rax,%r11 986 adoxq %r13,%r12 987 mulxq 24(%rcx),%rax,%r15 988 movq %r9,%rdx 989 movq %r11,-24(%rbx) 990 adcxq %rax,%r12 991 adoxq %rbp,%r15 992 leaq 32(%rcx),%rcx 993 movq %r12,-16(%rbx) 994 995 jmp .Lmulx4x_1st 996 997.align 32 998.Lmulx4x_1st: 999 adcxq %rbp,%r15 1000 mulxq 0(%rsi),%r10,%rax 1001 adcxq %r14,%r10 1002 mulxq 8(%rsi),%r11,%r14 1003 adcxq %rax,%r11 1004 mulxq 16(%rsi),%r12,%rax 1005 adcxq %r14,%r12 1006 mulxq 24(%rsi),%r13,%r14 1007.byte 0x67,0x67 1008 movq %r8,%rdx 1009 adcxq %rax,%r13 1010 adcxq %rbp,%r14 1011 leaq 32(%rsi),%rsi 1012 leaq 32(%rbx),%rbx 1013 1014 adoxq %r15,%r10 1015 mulxq 0(%rcx),%rax,%r15 1016 adcxq %rax,%r10 1017 adoxq %r15,%r11 1018 mulxq 8(%rcx),%rax,%r15 1019 adcxq %rax,%r11 1020 adoxq %r15,%r12 1021 mulxq 16(%rcx),%rax,%r15 1022 movq %r10,-40(%rbx) 1023 adcxq %rax,%r12 1024 movq %r11,-32(%rbx) 1025 adoxq %r15,%r13 1026 mulxq 24(%rcx),%rax,%r15 1027 movq %r9,%rdx 1028 movq %r12,-24(%rbx) 1029 adcxq %rax,%r13 1030 adoxq %rbp,%r15 1031 leaq 32(%rcx),%rcx 1032 movq %r13,-16(%rbx) 1033 1034 decq %rdi 1035 jnz .Lmulx4x_1st 1036 1037 movq 0(%rsp),%rax 1038 movq 8(%rsp),%rdi 1039 adcq %rbp,%r15 1040 addq %r15,%r14 1041 sbbq %r15,%r15 1042 movq %r14,-8(%rbx) 1043 jmp .Lmulx4x_outer 1044 1045.align 32 1046.Lmulx4x_outer: 1047 movq (%rdi),%rdx 1048 leaq 8(%rdi),%rdi 1049 subq %rax,%rsi 1050 movq %r15,(%rbx) 1051 leaq 64+32(%rsp),%rbx 1052 subq %rax,%rcx 1053 1054 mulxq 0(%rsi),%r8,%r11 1055 xorl %ebp,%ebp 1056 movq %rdx,%r9 1057 mulxq 8(%rsi),%r14,%r12 1058 adoxq -32(%rbx),%r8 1059 adcxq %r14,%r11 1060 mulxq 16(%rsi),%r15,%r13 1061 adoxq -24(%rbx),%r11 1062 adcxq %r15,%r12 1063 adoxq -16(%rbx),%r12 1064 adcxq %rbp,%r13 1065 adoxq %rbp,%r13 1066 1067 movq %rdi,8(%rsp) 1068 movq %r8,%r15 1069 imulq 24(%rsp),%r8 1070 xorl %ebp,%ebp 1071 1072 mulxq 24(%rsi),%rax,%r14 1073 movq %r8,%rdx 1074 adcxq %rax,%r13 1075 adoxq -8(%rbx),%r13 1076 adcxq %rbp,%r14 1077 leaq 32(%rsi),%rsi 1078 adoxq %rbp,%r14 1079 1080 mulxq 0(%rcx),%rax,%r10 1081 adcxq %rax,%r15 1082 adoxq %r11,%r10 1083 mulxq 8(%rcx),%rax,%r11 1084 adcxq %rax,%r10 1085 adoxq %r12,%r11 1086 mulxq 16(%rcx),%rax,%r12 1087 movq %r10,-32(%rbx) 1088 adcxq %rax,%r11 1089 adoxq %r13,%r12 1090 mulxq 24(%rcx),%rax,%r15 1091 movq %r9,%rdx 1092 movq %r11,-24(%rbx) 1093 leaq 32(%rcx),%rcx 1094 adcxq %rax,%r12 1095 adoxq %rbp,%r15 1096 movq 48(%rsp),%rdi 1097 movq %r12,-16(%rbx) 1098 1099 jmp .Lmulx4x_inner 1100 1101.align 32 1102.Lmulx4x_inner: 1103 mulxq 0(%rsi),%r10,%rax 1104 adcxq %rbp,%r15 1105 adoxq %r14,%r10 1106 mulxq 8(%rsi),%r11,%r14 1107 adcxq 0(%rbx),%r10 1108 adoxq %rax,%r11 1109 mulxq 16(%rsi),%r12,%rax 1110 adcxq 8(%rbx),%r11 1111 adoxq %r14,%r12 1112 mulxq 24(%rsi),%r13,%r14 1113 movq %r8,%rdx 1114 adcxq 16(%rbx),%r12 1115 adoxq %rax,%r13 1116 adcxq 24(%rbx),%r13 1117 adoxq %rbp,%r14 1118 leaq 32(%rsi),%rsi 1119 leaq 32(%rbx),%rbx 1120 adcxq %rbp,%r14 1121 1122 adoxq %r15,%r10 1123 mulxq 0(%rcx),%rax,%r15 1124 adcxq %rax,%r10 1125 adoxq %r15,%r11 1126 mulxq 8(%rcx),%rax,%r15 1127 adcxq %rax,%r11 1128 adoxq %r15,%r12 1129 mulxq 16(%rcx),%rax,%r15 1130 movq %r10,-40(%rbx) 1131 adcxq %rax,%r12 1132 adoxq %r15,%r13 1133 mulxq 24(%rcx),%rax,%r15 1134 movq %r9,%rdx 1135 movq %r11,-32(%rbx) 1136 movq %r12,-24(%rbx) 1137 adcxq %rax,%r13 1138 adoxq %rbp,%r15 1139 leaq 32(%rcx),%rcx 1140 movq %r13,-16(%rbx) 1141 1142 decq %rdi 1143 jnz .Lmulx4x_inner 1144 1145 movq 0(%rsp),%rax 1146 movq 8(%rsp),%rdi 1147 adcq %rbp,%r15 1148 subq 0(%rbx),%rbp 1149 adcq %r15,%r14 1150 sbbq %r15,%r15 1151 movq %r14,-8(%rbx) 1152 1153 cmpq 16(%rsp),%rdi 1154 jne .Lmulx4x_outer 1155 1156 leaq 64(%rsp),%rbx 1157 subq %rax,%rcx 1158 negq %r15 1159 movq %rax,%rdx 1160 shrq $3+2,%rax 1161 movq 32(%rsp),%rdi 1162 jmp .Lmulx4x_sub 1163 1164.align 32 1165.Lmulx4x_sub: 1166 movq 0(%rbx),%r11 1167 movq 8(%rbx),%r12 1168 movq 16(%rbx),%r13 1169 movq 24(%rbx),%r14 1170 leaq 32(%rbx),%rbx 1171 sbbq 0(%rcx),%r11 1172 sbbq 8(%rcx),%r12 1173 sbbq 16(%rcx),%r13 1174 sbbq 24(%rcx),%r14 1175 leaq 32(%rcx),%rcx 1176 movq %r11,0(%rdi) 1177 movq %r12,8(%rdi) 1178 movq %r13,16(%rdi) 1179 movq %r14,24(%rdi) 1180 leaq 32(%rdi),%rdi 1181 decq %rax 1182 jnz .Lmulx4x_sub 1183 1184 sbbq $0,%r15 1185 leaq 64(%rsp),%rbx 1186 subq %rdx,%rdi 1187 1188.byte 102,73,15,110,207 1189 pxor %xmm0,%xmm0 1190 pshufd $0,%xmm1,%xmm1 1191 movq 40(%rsp),%rsi 1192.cfi_def_cfa %rsi,8 1193 jmp .Lmulx4x_cond_copy 1194 1195.align 32 1196.Lmulx4x_cond_copy: 1197 movdqa 0(%rbx),%xmm2 1198 movdqa 16(%rbx),%xmm3 1199 leaq 32(%rbx),%rbx 1200 movdqu 0(%rdi),%xmm4 1201 movdqu 16(%rdi),%xmm5 1202 leaq 32(%rdi),%rdi 1203 movdqa %xmm0,-32(%rbx) 1204 movdqa %xmm0,-16(%rbx) 1205 pcmpeqd %xmm1,%xmm0 1206 pand %xmm1,%xmm2 1207 pand %xmm1,%xmm3 1208 pand %xmm0,%xmm4 1209 pand %xmm0,%xmm5 1210 pxor %xmm0,%xmm0 1211 por %xmm2,%xmm4 1212 por %xmm3,%xmm5 1213 movdqu %xmm4,-32(%rdi) 1214 movdqu %xmm5,-16(%rdi) 1215 subq $32,%rdx 1216 jnz .Lmulx4x_cond_copy 1217 1218 movq %rdx,(%rbx) 1219 1220 movq $1,%rax 1221 movq -48(%rsi),%r15 1222.cfi_restore %r15 1223 movq -40(%rsi),%r14 1224.cfi_restore %r14 1225 movq -32(%rsi),%r13 1226.cfi_restore %r13 1227 movq -24(%rsi),%r12 1228.cfi_restore %r12 1229 movq -16(%rsi),%rbp 1230.cfi_restore %rbp 1231 movq -8(%rsi),%rbx 1232.cfi_restore %rbx 1233 leaq (%rsi),%rsp 1234.cfi_def_cfa_register %rsp 1235.Lmulx4x_epilogue: 1236 .byte 0xf3,0xc3 1237.cfi_endproc 1238.size bn_mulx4x_mont,.-bn_mulx4x_mont 1239.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1240.align 16 1241 .section ".note.gnu.property", "a" 1242 .p2align 3 1243 .long 1f - 0f 1244 .long 4f - 1f 1245 .long 5 12460: 1247 # "GNU" encoded with .byte, since .asciz isn't supported 1248 # on Solaris. 1249 .byte 0x47 1250 .byte 0x4e 1251 .byte 0x55 1252 .byte 0 12531: 1254 .p2align 3 1255 .long 0xc0000002 1256 .long 3f - 2f 12572: 1258 .long 3 12593: 1260 .p2align 3 12614: 1262