1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from x86_64-mont.pl. */ 3.text 4 5 6 7.globl bn_mul_mont 8.type bn_mul_mont,@function 9.align 16 10bn_mul_mont: 11.cfi_startproc 12 movl %r9d,%r9d 13 movq %rsp,%rax 14.cfi_def_cfa_register %rax 15 testl $3,%r9d 16 jnz .Lmul_enter 17 cmpl $8,%r9d 18 jb .Lmul_enter 19 movl OPENSSL_ia32cap_P+8(%rip),%r11d 20 cmpq %rsi,%rdx 21 jne .Lmul4x_enter 22 testl $7,%r9d 23 jz .Lsqr8x_enter 24 jmp .Lmul4x_enter 25 26.align 16 27.Lmul_enter: 28 pushq %rbx 29.cfi_offset %rbx,-16 30 pushq %rbp 31.cfi_offset %rbp,-24 32 pushq %r12 33.cfi_offset %r12,-32 34 pushq %r13 35.cfi_offset %r13,-40 36 pushq %r14 37.cfi_offset %r14,-48 38 pushq %r15 39.cfi_offset %r15,-56 40 41 negq %r9 42 movq %rsp,%r11 43 leaq -16(%rsp,%r9,8),%r10 44 negq %r9 45 andq $-1024,%r10 46 47 48 49 50 51 52 53 54 55 subq %r10,%r11 56 andq $-4096,%r11 57 leaq (%r10,%r11,1),%rsp 58 movq (%rsp),%r11 59 cmpq %r10,%rsp 60 ja .Lmul_page_walk 61 jmp .Lmul_page_walk_done 62 63.align 16 64.Lmul_page_walk: 65 leaq -4096(%rsp),%rsp 66 movq (%rsp),%r11 67 cmpq %r10,%rsp 68 ja .Lmul_page_walk 69.Lmul_page_walk_done: 70 71 movq %rax,8(%rsp,%r9,8) 72.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 73.Lmul_body: 74 movq %rdx,%r12 75 movq (%r8),%r8 76 movq (%r12),%rbx 77 movq (%rsi),%rax 78 79 xorq %r14,%r14 80 xorq %r15,%r15 81 82 movq %r8,%rbp 83 mulq %rbx 84 movq %rax,%r10 85 movq (%rcx),%rax 86 87 imulq %r10,%rbp 88 movq %rdx,%r11 89 90 mulq %rbp 91 addq %rax,%r10 92 movq 8(%rsi),%rax 93 adcq $0,%rdx 94 movq %rdx,%r13 95 96 leaq 1(%r15),%r15 97 jmp .L1st_enter 98 99.align 16 100.L1st: 101 addq %rax,%r13 102 movq (%rsi,%r15,8),%rax 103 adcq $0,%rdx 104 addq %r11,%r13 105 movq %r10,%r11 106 adcq $0,%rdx 107 movq %r13,-16(%rsp,%r15,8) 108 movq %rdx,%r13 109 110.L1st_enter: 111 mulq %rbx 112 addq %rax,%r11 113 movq (%rcx,%r15,8),%rax 114 adcq $0,%rdx 115 leaq 1(%r15),%r15 116 movq %rdx,%r10 117 118 mulq %rbp 119 cmpq %r9,%r15 120 jne .L1st 121 122 addq %rax,%r13 123 movq (%rsi),%rax 124 adcq $0,%rdx 125 addq %r11,%r13 126 adcq $0,%rdx 127 movq %r13,-16(%rsp,%r15,8) 128 movq %rdx,%r13 129 movq %r10,%r11 130 131 xorq %rdx,%rdx 132 addq %r11,%r13 133 adcq $0,%rdx 134 movq %r13,-8(%rsp,%r9,8) 135 movq %rdx,(%rsp,%r9,8) 136 137 leaq 1(%r14),%r14 138 jmp .Louter 139.align 16 140.Louter: 141 movq (%r12,%r14,8),%rbx 142 xorq %r15,%r15 143 movq %r8,%rbp 144 movq (%rsp),%r10 145 mulq %rbx 146 addq %rax,%r10 147 movq (%rcx),%rax 148 adcq $0,%rdx 149 150 imulq %r10,%rbp 151 movq %rdx,%r11 152 153 mulq %rbp 154 addq %rax,%r10 155 movq 8(%rsi),%rax 156 adcq $0,%rdx 157 movq 8(%rsp),%r10 158 movq %rdx,%r13 159 160 leaq 1(%r15),%r15 161 jmp .Linner_enter 162 163.align 16 164.Linner: 165 addq %rax,%r13 166 movq (%rsi,%r15,8),%rax 167 adcq $0,%rdx 168 addq %r10,%r13 169 movq (%rsp,%r15,8),%r10 170 adcq $0,%rdx 171 movq %r13,-16(%rsp,%r15,8) 172 movq %rdx,%r13 173 174.Linner_enter: 175 mulq %rbx 176 addq %rax,%r11 177 movq (%rcx,%r15,8),%rax 178 adcq $0,%rdx 179 addq %r11,%r10 180 movq %rdx,%r11 181 adcq $0,%r11 182 leaq 1(%r15),%r15 183 184 mulq %rbp 185 cmpq %r9,%r15 186 jne .Linner 187 188 addq %rax,%r13 189 movq (%rsi),%rax 190 adcq $0,%rdx 191 addq %r10,%r13 192 movq (%rsp,%r15,8),%r10 193 adcq $0,%rdx 194 movq %r13,-16(%rsp,%r15,8) 195 movq %rdx,%r13 196 197 xorq %rdx,%rdx 198 addq %r11,%r13 199 adcq $0,%rdx 200 addq %r10,%r13 201 adcq $0,%rdx 202 movq %r13,-8(%rsp,%r9,8) 203 movq %rdx,(%rsp,%r9,8) 204 205 leaq 1(%r14),%r14 206 cmpq %r9,%r14 207 jb .Louter 208 209 xorq %r14,%r14 210 movq (%rsp),%rax 211 movq %r9,%r15 212 213.align 16 214.Lsub: sbbq (%rcx,%r14,8),%rax 215 movq %rax,(%rdi,%r14,8) 216 movq 8(%rsp,%r14,8),%rax 217 leaq 1(%r14),%r14 218 decq %r15 219 jnz .Lsub 220 221 sbbq $0,%rax 222 movq $-1,%rbx 223 xorq %rax,%rbx 224 xorq %r14,%r14 225 movq %r9,%r15 226 227.Lcopy: 228 movq (%rdi,%r14,8),%rcx 229 movq (%rsp,%r14,8),%rdx 230 andq %rbx,%rcx 231 andq %rax,%rdx 232 movq %r9,(%rsp,%r14,8) 233 orq %rcx,%rdx 234 movq %rdx,(%rdi,%r14,8) 235 leaq 1(%r14),%r14 236 subq $1,%r15 237 jnz .Lcopy 238 239 movq 8(%rsp,%r9,8),%rsi 240.cfi_def_cfa %rsi,8 241 movq $1,%rax 242 movq -48(%rsi),%r15 243.cfi_restore %r15 244 movq -40(%rsi),%r14 245.cfi_restore %r14 246 movq -32(%rsi),%r13 247.cfi_restore %r13 248 movq -24(%rsi),%r12 249.cfi_restore %r12 250 movq -16(%rsi),%rbp 251.cfi_restore %rbp 252 movq -8(%rsi),%rbx 253.cfi_restore %rbx 254 leaq (%rsi),%rsp 255.cfi_def_cfa_register %rsp 256.Lmul_epilogue: 257 .byte 0xf3,0xc3 258.cfi_endproc 259.size bn_mul_mont,.-bn_mul_mont 260.type bn_mul4x_mont,@function 261.align 16 262bn_mul4x_mont: 263.cfi_startproc 264 movl %r9d,%r9d 265 movq %rsp,%rax 266.cfi_def_cfa_register %rax 267.Lmul4x_enter: 268 andl $0x80100,%r11d 269 cmpl $0x80100,%r11d 270 je .Lmulx4x_enter 271 pushq %rbx 272.cfi_offset %rbx,-16 273 pushq %rbp 274.cfi_offset %rbp,-24 275 pushq %r12 276.cfi_offset %r12,-32 277 pushq %r13 278.cfi_offset %r13,-40 279 pushq %r14 280.cfi_offset %r14,-48 281 pushq %r15 282.cfi_offset %r15,-56 283 284 negq %r9 285 movq %rsp,%r11 286 leaq -32(%rsp,%r9,8),%r10 287 negq %r9 288 andq $-1024,%r10 289 290 subq %r10,%r11 291 andq $-4096,%r11 292 leaq (%r10,%r11,1),%rsp 293 movq (%rsp),%r11 294 cmpq %r10,%rsp 295 ja .Lmul4x_page_walk 296 jmp .Lmul4x_page_walk_done 297 298.Lmul4x_page_walk: 299 leaq -4096(%rsp),%rsp 300 movq (%rsp),%r11 301 cmpq %r10,%rsp 302 ja .Lmul4x_page_walk 303.Lmul4x_page_walk_done: 304 305 movq %rax,8(%rsp,%r9,8) 306.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 307.Lmul4x_body: 308 movq %rdi,16(%rsp,%r9,8) 309 movq %rdx,%r12 310 movq (%r8),%r8 311 movq (%r12),%rbx 312 movq (%rsi),%rax 313 314 xorq %r14,%r14 315 xorq %r15,%r15 316 317 movq %r8,%rbp 318 mulq %rbx 319 movq %rax,%r10 320 movq (%rcx),%rax 321 322 imulq %r10,%rbp 323 movq %rdx,%r11 324 325 mulq %rbp 326 addq %rax,%r10 327 movq 8(%rsi),%rax 328 adcq $0,%rdx 329 movq %rdx,%rdi 330 331 mulq %rbx 332 addq %rax,%r11 333 movq 8(%rcx),%rax 334 adcq $0,%rdx 335 movq %rdx,%r10 336 337 mulq %rbp 338 addq %rax,%rdi 339 movq 16(%rsi),%rax 340 adcq $0,%rdx 341 addq %r11,%rdi 342 leaq 4(%r15),%r15 343 adcq $0,%rdx 344 movq %rdi,(%rsp) 345 movq %rdx,%r13 346 jmp .L1st4x 347.align 16 348.L1st4x: 349 mulq %rbx 350 addq %rax,%r10 351 movq -16(%rcx,%r15,8),%rax 352 adcq $0,%rdx 353 movq %rdx,%r11 354 355 mulq %rbp 356 addq %rax,%r13 357 movq -8(%rsi,%r15,8),%rax 358 adcq $0,%rdx 359 addq %r10,%r13 360 adcq $0,%rdx 361 movq %r13,-24(%rsp,%r15,8) 362 movq %rdx,%rdi 363 364 mulq %rbx 365 addq %rax,%r11 366 movq -8(%rcx,%r15,8),%rax 367 adcq $0,%rdx 368 movq %rdx,%r10 369 370 mulq %rbp 371 addq %rax,%rdi 372 movq (%rsi,%r15,8),%rax 373 adcq $0,%rdx 374 addq %r11,%rdi 375 adcq $0,%rdx 376 movq %rdi,-16(%rsp,%r15,8) 377 movq %rdx,%r13 378 379 mulq %rbx 380 addq %rax,%r10 381 movq (%rcx,%r15,8),%rax 382 adcq $0,%rdx 383 movq %rdx,%r11 384 385 mulq %rbp 386 addq %rax,%r13 387 movq 8(%rsi,%r15,8),%rax 388 adcq $0,%rdx 389 addq %r10,%r13 390 adcq $0,%rdx 391 movq %r13,-8(%rsp,%r15,8) 392 movq %rdx,%rdi 393 394 mulq %rbx 395 addq %rax,%r11 396 movq 8(%rcx,%r15,8),%rax 397 adcq $0,%rdx 398 leaq 4(%r15),%r15 399 movq %rdx,%r10 400 401 mulq %rbp 402 addq %rax,%rdi 403 movq -16(%rsi,%r15,8),%rax 404 adcq $0,%rdx 405 addq %r11,%rdi 406 adcq $0,%rdx 407 movq %rdi,-32(%rsp,%r15,8) 408 movq %rdx,%r13 409 cmpq %r9,%r15 410 jb .L1st4x 411 412 mulq %rbx 413 addq %rax,%r10 414 movq -16(%rcx,%r15,8),%rax 415 adcq $0,%rdx 416 movq %rdx,%r11 417 418 mulq %rbp 419 addq %rax,%r13 420 movq -8(%rsi,%r15,8),%rax 421 adcq $0,%rdx 422 addq %r10,%r13 423 adcq $0,%rdx 424 movq %r13,-24(%rsp,%r15,8) 425 movq %rdx,%rdi 426 427 mulq %rbx 428 addq %rax,%r11 429 movq -8(%rcx,%r15,8),%rax 430 adcq $0,%rdx 431 movq %rdx,%r10 432 433 mulq %rbp 434 addq %rax,%rdi 435 movq (%rsi),%rax 436 adcq $0,%rdx 437 addq %r11,%rdi 438 adcq $0,%rdx 439 movq %rdi,-16(%rsp,%r15,8) 440 movq %rdx,%r13 441 442 xorq %rdi,%rdi 443 addq %r10,%r13 444 adcq $0,%rdi 445 movq %r13,-8(%rsp,%r15,8) 446 movq %rdi,(%rsp,%r15,8) 447 448 leaq 1(%r14),%r14 449.align 4 450.Louter4x: 451 movq (%r12,%r14,8),%rbx 452 xorq %r15,%r15 453 movq (%rsp),%r10 454 movq %r8,%rbp 455 mulq %rbx 456 addq %rax,%r10 457 movq (%rcx),%rax 458 adcq $0,%rdx 459 460 imulq %r10,%rbp 461 movq %rdx,%r11 462 463 mulq %rbp 464 addq %rax,%r10 465 movq 8(%rsi),%rax 466 adcq $0,%rdx 467 movq %rdx,%rdi 468 469 mulq %rbx 470 addq %rax,%r11 471 movq 8(%rcx),%rax 472 adcq $0,%rdx 473 addq 8(%rsp),%r11 474 adcq $0,%rdx 475 movq %rdx,%r10 476 477 mulq %rbp 478 addq %rax,%rdi 479 movq 16(%rsi),%rax 480 adcq $0,%rdx 481 addq %r11,%rdi 482 leaq 4(%r15),%r15 483 adcq $0,%rdx 484 movq %rdi,(%rsp) 485 movq %rdx,%r13 486 jmp .Linner4x 487.align 16 488.Linner4x: 489 mulq %rbx 490 addq %rax,%r10 491 movq -16(%rcx,%r15,8),%rax 492 adcq $0,%rdx 493 addq -16(%rsp,%r15,8),%r10 494 adcq $0,%rdx 495 movq %rdx,%r11 496 497 mulq %rbp 498 addq %rax,%r13 499 movq -8(%rsi,%r15,8),%rax 500 adcq $0,%rdx 501 addq %r10,%r13 502 adcq $0,%rdx 503 movq %r13,-24(%rsp,%r15,8) 504 movq %rdx,%rdi 505 506 mulq %rbx 507 addq %rax,%r11 508 movq -8(%rcx,%r15,8),%rax 509 adcq $0,%rdx 510 addq -8(%rsp,%r15,8),%r11 511 adcq $0,%rdx 512 movq %rdx,%r10 513 514 mulq %rbp 515 addq %rax,%rdi 516 movq (%rsi,%r15,8),%rax 517 adcq $0,%rdx 518 addq %r11,%rdi 519 adcq $0,%rdx 520 movq %rdi,-16(%rsp,%r15,8) 521 movq %rdx,%r13 522 523 mulq %rbx 524 addq %rax,%r10 525 movq (%rcx,%r15,8),%rax 526 adcq $0,%rdx 527 addq (%rsp,%r15,8),%r10 528 adcq $0,%rdx 529 movq %rdx,%r11 530 531 mulq %rbp 532 addq %rax,%r13 533 movq 8(%rsi,%r15,8),%rax 534 adcq $0,%rdx 535 addq %r10,%r13 536 adcq $0,%rdx 537 movq %r13,-8(%rsp,%r15,8) 538 movq %rdx,%rdi 539 540 mulq %rbx 541 addq %rax,%r11 542 movq 8(%rcx,%r15,8),%rax 543 adcq $0,%rdx 544 addq 8(%rsp,%r15,8),%r11 545 adcq $0,%rdx 546 leaq 4(%r15),%r15 547 movq %rdx,%r10 548 549 mulq %rbp 550 addq %rax,%rdi 551 movq -16(%rsi,%r15,8),%rax 552 adcq $0,%rdx 553 addq %r11,%rdi 554 adcq $0,%rdx 555 movq %rdi,-32(%rsp,%r15,8) 556 movq %rdx,%r13 557 cmpq %r9,%r15 558 jb .Linner4x 559 560 mulq %rbx 561 addq %rax,%r10 562 movq -16(%rcx,%r15,8),%rax 563 adcq $0,%rdx 564 addq -16(%rsp,%r15,8),%r10 565 adcq $0,%rdx 566 movq %rdx,%r11 567 568 mulq %rbp 569 addq %rax,%r13 570 movq -8(%rsi,%r15,8),%rax 571 adcq $0,%rdx 572 addq %r10,%r13 573 adcq $0,%rdx 574 movq %r13,-24(%rsp,%r15,8) 575 movq %rdx,%rdi 576 577 mulq %rbx 578 addq %rax,%r11 579 movq -8(%rcx,%r15,8),%rax 580 adcq $0,%rdx 581 addq -8(%rsp,%r15,8),%r11 582 adcq $0,%rdx 583 leaq 1(%r14),%r14 584 movq %rdx,%r10 585 586 mulq %rbp 587 addq %rax,%rdi 588 movq (%rsi),%rax 589 adcq $0,%rdx 590 addq %r11,%rdi 591 adcq $0,%rdx 592 movq %rdi,-16(%rsp,%r15,8) 593 movq %rdx,%r13 594 595 xorq %rdi,%rdi 596 addq %r10,%r13 597 adcq $0,%rdi 598 addq (%rsp,%r9,8),%r13 599 adcq $0,%rdi 600 movq %r13,-8(%rsp,%r15,8) 601 movq %rdi,(%rsp,%r15,8) 602 603 cmpq %r9,%r14 604 jb .Louter4x 605 movq 16(%rsp,%r9,8),%rdi 606 leaq -4(%r9),%r15 607 movq 0(%rsp),%rax 608 movq 8(%rsp),%rdx 609 shrq $2,%r15 610 leaq (%rsp),%rsi 611 xorq %r14,%r14 612 613 subq 0(%rcx),%rax 614 movq 16(%rsi),%rbx 615 movq 24(%rsi),%rbp 616 sbbq 8(%rcx),%rdx 617 618.Lsub4x: 619 movq %rax,0(%rdi,%r14,8) 620 movq %rdx,8(%rdi,%r14,8) 621 sbbq 16(%rcx,%r14,8),%rbx 622 movq 32(%rsi,%r14,8),%rax 623 movq 40(%rsi,%r14,8),%rdx 624 sbbq 24(%rcx,%r14,8),%rbp 625 movq %rbx,16(%rdi,%r14,8) 626 movq %rbp,24(%rdi,%r14,8) 627 sbbq 32(%rcx,%r14,8),%rax 628 movq 48(%rsi,%r14,8),%rbx 629 movq 56(%rsi,%r14,8),%rbp 630 sbbq 40(%rcx,%r14,8),%rdx 631 leaq 4(%r14),%r14 632 decq %r15 633 jnz .Lsub4x 634 635 movq %rax,0(%rdi,%r14,8) 636 movq 32(%rsi,%r14,8),%rax 637 sbbq 16(%rcx,%r14,8),%rbx 638 movq %rdx,8(%rdi,%r14,8) 639 sbbq 24(%rcx,%r14,8),%rbp 640 movq %rbx,16(%rdi,%r14,8) 641 642 sbbq $0,%rax 643 movq %rbp,24(%rdi,%r14,8) 644 pxor %xmm0,%xmm0 645.byte 102,72,15,110,224 646 pcmpeqd %xmm5,%xmm5 647 pshufd $0,%xmm4,%xmm4 648 movq %r9,%r15 649 pxor %xmm4,%xmm5 650 shrq $2,%r15 651 xorl %eax,%eax 652 653 jmp .Lcopy4x 654.align 16 655.Lcopy4x: 656 movdqa (%rsp,%rax,1),%xmm1 657 movdqu (%rdi,%rax,1),%xmm2 658 pand %xmm4,%xmm1 659 pand %xmm5,%xmm2 660 movdqa 16(%rsp,%rax,1),%xmm3 661 movdqa %xmm0,(%rsp,%rax,1) 662 por %xmm2,%xmm1 663 movdqu 16(%rdi,%rax,1),%xmm2 664 movdqu %xmm1,(%rdi,%rax,1) 665 pand %xmm4,%xmm3 666 pand %xmm5,%xmm2 667 movdqa %xmm0,16(%rsp,%rax,1) 668 por %xmm2,%xmm3 669 movdqu %xmm3,16(%rdi,%rax,1) 670 leaq 32(%rax),%rax 671 decq %r15 672 jnz .Lcopy4x 673 movq 8(%rsp,%r9,8),%rsi 674.cfi_def_cfa %rsi, 8 675 movq $1,%rax 676 movq -48(%rsi),%r15 677.cfi_restore %r15 678 movq -40(%rsi),%r14 679.cfi_restore %r14 680 movq -32(%rsi),%r13 681.cfi_restore %r13 682 movq -24(%rsi),%r12 683.cfi_restore %r12 684 movq -16(%rsi),%rbp 685.cfi_restore %rbp 686 movq -8(%rsi),%rbx 687.cfi_restore %rbx 688 leaq (%rsi),%rsp 689.cfi_def_cfa_register %rsp 690.Lmul4x_epilogue: 691 .byte 0xf3,0xc3 692.cfi_endproc 693.size bn_mul4x_mont,.-bn_mul4x_mont 694 695 696 697.type bn_sqr8x_mont,@function 698.align 32 699bn_sqr8x_mont: 700.cfi_startproc 701 movq %rsp,%rax 702.cfi_def_cfa_register %rax 703.Lsqr8x_enter: 704 pushq %rbx 705.cfi_offset %rbx,-16 706 pushq %rbp 707.cfi_offset %rbp,-24 708 pushq %r12 709.cfi_offset %r12,-32 710 pushq %r13 711.cfi_offset %r13,-40 712 pushq %r14 713.cfi_offset %r14,-48 714 pushq %r15 715.cfi_offset %r15,-56 716.Lsqr8x_prologue: 717 718 movl %r9d,%r10d 719 shll $3,%r9d 720 shlq $3+2,%r10 721 negq %r9 722 723 724 725 726 727 728 leaq -64(%rsp,%r9,2),%r11 729 movq %rsp,%rbp 730 movq (%r8),%r8 731 subq %rsi,%r11 732 andq $4095,%r11 733 cmpq %r11,%r10 734 jb .Lsqr8x_sp_alt 735 subq %r11,%rbp 736 leaq -64(%rbp,%r9,2),%rbp 737 jmp .Lsqr8x_sp_done 738 739.align 32 740.Lsqr8x_sp_alt: 741 leaq 4096-64(,%r9,2),%r10 742 leaq -64(%rbp,%r9,2),%rbp 743 subq %r10,%r11 744 movq $0,%r10 745 cmovcq %r10,%r11 746 subq %r11,%rbp 747.Lsqr8x_sp_done: 748 andq $-64,%rbp 749 movq %rsp,%r11 750 subq %rbp,%r11 751 andq $-4096,%r11 752 leaq (%r11,%rbp,1),%rsp 753 movq (%rsp),%r10 754 cmpq %rbp,%rsp 755 ja .Lsqr8x_page_walk 756 jmp .Lsqr8x_page_walk_done 757 758.align 16 759.Lsqr8x_page_walk: 760 leaq -4096(%rsp),%rsp 761 movq (%rsp),%r10 762 cmpq %rbp,%rsp 763 ja .Lsqr8x_page_walk 764.Lsqr8x_page_walk_done: 765 766 movq %r9,%r10 767 negq %r9 768 769 movq %r8,32(%rsp) 770 movq %rax,40(%rsp) 771.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 772.Lsqr8x_body: 773 774.byte 102,72,15,110,209 775 pxor %xmm0,%xmm0 776.byte 102,72,15,110,207 777.byte 102,73,15,110,218 778 movl OPENSSL_ia32cap_P+8(%rip),%eax 779 andl $0x80100,%eax 780 cmpl $0x80100,%eax 781 jne .Lsqr8x_nox 782 783 call bn_sqrx8x_internal 784 785 786 787 788 leaq (%r8,%rcx,1),%rbx 789 movq %rcx,%r9 790 movq %rcx,%rdx 791.byte 102,72,15,126,207 792 sarq $3+2,%rcx 793 jmp .Lsqr8x_sub 794 795.align 32 796.Lsqr8x_nox: 797 call bn_sqr8x_internal 798 799 800 801 802 leaq (%rdi,%r9,1),%rbx 803 movq %r9,%rcx 804 movq %r9,%rdx 805.byte 102,72,15,126,207 806 sarq $3+2,%rcx 807 jmp .Lsqr8x_sub 808 809.align 32 810.Lsqr8x_sub: 811 movq 0(%rbx),%r12 812 movq 8(%rbx),%r13 813 movq 16(%rbx),%r14 814 movq 24(%rbx),%r15 815 leaq 32(%rbx),%rbx 816 sbbq 0(%rbp),%r12 817 sbbq 8(%rbp),%r13 818 sbbq 16(%rbp),%r14 819 sbbq 24(%rbp),%r15 820 leaq 32(%rbp),%rbp 821 movq %r12,0(%rdi) 822 movq %r13,8(%rdi) 823 movq %r14,16(%rdi) 824 movq %r15,24(%rdi) 825 leaq 32(%rdi),%rdi 826 incq %rcx 827 jnz .Lsqr8x_sub 828 829 sbbq $0,%rax 830 leaq (%rbx,%r9,1),%rbx 831 leaq (%rdi,%r9,1),%rdi 832 833.byte 102,72,15,110,200 834 pxor %xmm0,%xmm0 835 pshufd $0,%xmm1,%xmm1 836 movq 40(%rsp),%rsi 837.cfi_def_cfa %rsi,8 838 jmp .Lsqr8x_cond_copy 839 840.align 32 841.Lsqr8x_cond_copy: 842 movdqa 0(%rbx),%xmm2 843 movdqa 16(%rbx),%xmm3 844 leaq 32(%rbx),%rbx 845 movdqu 0(%rdi),%xmm4 846 movdqu 16(%rdi),%xmm5 847 leaq 32(%rdi),%rdi 848 movdqa %xmm0,-32(%rbx) 849 movdqa %xmm0,-16(%rbx) 850 movdqa %xmm0,-32(%rbx,%rdx,1) 851 movdqa %xmm0,-16(%rbx,%rdx,1) 852 pcmpeqd %xmm1,%xmm0 853 pand %xmm1,%xmm2 854 pand %xmm1,%xmm3 855 pand %xmm0,%xmm4 856 pand %xmm0,%xmm5 857 pxor %xmm0,%xmm0 858 por %xmm2,%xmm4 859 por %xmm3,%xmm5 860 movdqu %xmm4,-32(%rdi) 861 movdqu %xmm5,-16(%rdi) 862 addq $32,%r9 863 jnz .Lsqr8x_cond_copy 864 865 movq $1,%rax 866 movq -48(%rsi),%r15 867.cfi_restore %r15 868 movq -40(%rsi),%r14 869.cfi_restore %r14 870 movq -32(%rsi),%r13 871.cfi_restore %r13 872 movq -24(%rsi),%r12 873.cfi_restore %r12 874 movq -16(%rsi),%rbp 875.cfi_restore %rbp 876 movq -8(%rsi),%rbx 877.cfi_restore %rbx 878 leaq (%rsi),%rsp 879.cfi_def_cfa_register %rsp 880.Lsqr8x_epilogue: 881 .byte 0xf3,0xc3 882.cfi_endproc 883.size bn_sqr8x_mont,.-bn_sqr8x_mont 884.type bn_mulx4x_mont,@function 885.align 32 886bn_mulx4x_mont: 887.cfi_startproc 888 movq %rsp,%rax 889.cfi_def_cfa_register %rax 890.Lmulx4x_enter: 891 pushq %rbx 892.cfi_offset %rbx,-16 893 pushq %rbp 894.cfi_offset %rbp,-24 895 pushq %r12 896.cfi_offset %r12,-32 897 pushq %r13 898.cfi_offset %r13,-40 899 pushq %r14 900.cfi_offset %r14,-48 901 pushq %r15 902.cfi_offset %r15,-56 903.Lmulx4x_prologue: 904 905 shll $3,%r9d 906 xorq %r10,%r10 907 subq %r9,%r10 908 movq (%r8),%r8 909 leaq -72(%rsp,%r10,1),%rbp 910 andq $-128,%rbp 911 movq %rsp,%r11 912 subq %rbp,%r11 913 andq $-4096,%r11 914 leaq (%r11,%rbp,1),%rsp 915 movq (%rsp),%r10 916 cmpq %rbp,%rsp 917 ja .Lmulx4x_page_walk 918 jmp .Lmulx4x_page_walk_done 919 920.align 16 921.Lmulx4x_page_walk: 922 leaq -4096(%rsp),%rsp 923 movq (%rsp),%r10 924 cmpq %rbp,%rsp 925 ja .Lmulx4x_page_walk 926.Lmulx4x_page_walk_done: 927 928 leaq (%rdx,%r9,1),%r10 929 930 931 932 933 934 935 936 937 938 939 940 941 movq %r9,0(%rsp) 942 shrq $5,%r9 943 movq %r10,16(%rsp) 944 subq $1,%r9 945 movq %r8,24(%rsp) 946 movq %rdi,32(%rsp) 947 movq %rax,40(%rsp) 948.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 949 movq %r9,48(%rsp) 950 jmp .Lmulx4x_body 951 952.align 32 953.Lmulx4x_body: 954 leaq 8(%rdx),%rdi 955 movq (%rdx),%rdx 956 leaq 64+32(%rsp),%rbx 957 movq %rdx,%r9 958 959 mulxq 0(%rsi),%r8,%rax 960 mulxq 8(%rsi),%r11,%r14 961 addq %rax,%r11 962 movq %rdi,8(%rsp) 963 mulxq 16(%rsi),%r12,%r13 964 adcq %r14,%r12 965 adcq $0,%r13 966 967 movq %r8,%rdi 968 imulq 24(%rsp),%r8 969 xorq %rbp,%rbp 970 971 mulxq 24(%rsi),%rax,%r14 972 movq %r8,%rdx 973 leaq 32(%rsi),%rsi 974 adcxq %rax,%r13 975 adcxq %rbp,%r14 976 977 mulxq 0(%rcx),%rax,%r10 978 adcxq %rax,%rdi 979 adoxq %r11,%r10 980 mulxq 8(%rcx),%rax,%r11 981 adcxq %rax,%r10 982 adoxq %r12,%r11 983.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 984 movq 48(%rsp),%rdi 985 movq %r10,-32(%rbx) 986 adcxq %rax,%r11 987 adoxq %r13,%r12 988 mulxq 24(%rcx),%rax,%r15 989 movq %r9,%rdx 990 movq %r11,-24(%rbx) 991 adcxq %rax,%r12 992 adoxq %rbp,%r15 993 leaq 32(%rcx),%rcx 994 movq %r12,-16(%rbx) 995 996 jmp .Lmulx4x_1st 997 998.align 32 999.Lmulx4x_1st: 1000 adcxq %rbp,%r15 1001 mulxq 0(%rsi),%r10,%rax 1002 adcxq %r14,%r10 1003 mulxq 8(%rsi),%r11,%r14 1004 adcxq %rax,%r11 1005 mulxq 16(%rsi),%r12,%rax 1006 adcxq %r14,%r12 1007 mulxq 24(%rsi),%r13,%r14 1008.byte 0x67,0x67 1009 movq %r8,%rdx 1010 adcxq %rax,%r13 1011 adcxq %rbp,%r14 1012 leaq 32(%rsi),%rsi 1013 leaq 32(%rbx),%rbx 1014 1015 adoxq %r15,%r10 1016 mulxq 0(%rcx),%rax,%r15 1017 adcxq %rax,%r10 1018 adoxq %r15,%r11 1019 mulxq 8(%rcx),%rax,%r15 1020 adcxq %rax,%r11 1021 adoxq %r15,%r12 1022 mulxq 16(%rcx),%rax,%r15 1023 movq %r10,-40(%rbx) 1024 adcxq %rax,%r12 1025 movq %r11,-32(%rbx) 1026 adoxq %r15,%r13 1027 mulxq 24(%rcx),%rax,%r15 1028 movq %r9,%rdx 1029 movq %r12,-24(%rbx) 1030 adcxq %rax,%r13 1031 adoxq %rbp,%r15 1032 leaq 32(%rcx),%rcx 1033 movq %r13,-16(%rbx) 1034 1035 decq %rdi 1036 jnz .Lmulx4x_1st 1037 1038 movq 0(%rsp),%rax 1039 movq 8(%rsp),%rdi 1040 adcq %rbp,%r15 1041 addq %r15,%r14 1042 sbbq %r15,%r15 1043 movq %r14,-8(%rbx) 1044 jmp .Lmulx4x_outer 1045 1046.align 32 1047.Lmulx4x_outer: 1048 movq (%rdi),%rdx 1049 leaq 8(%rdi),%rdi 1050 subq %rax,%rsi 1051 movq %r15,(%rbx) 1052 leaq 64+32(%rsp),%rbx 1053 subq %rax,%rcx 1054 1055 mulxq 0(%rsi),%r8,%r11 1056 xorl %ebp,%ebp 1057 movq %rdx,%r9 1058 mulxq 8(%rsi),%r14,%r12 1059 adoxq -32(%rbx),%r8 1060 adcxq %r14,%r11 1061 mulxq 16(%rsi),%r15,%r13 1062 adoxq -24(%rbx),%r11 1063 adcxq %r15,%r12 1064 adoxq -16(%rbx),%r12 1065 adcxq %rbp,%r13 1066 adoxq %rbp,%r13 1067 1068 movq %rdi,8(%rsp) 1069 movq %r8,%r15 1070 imulq 24(%rsp),%r8 1071 xorl %ebp,%ebp 1072 1073 mulxq 24(%rsi),%rax,%r14 1074 movq %r8,%rdx 1075 adcxq %rax,%r13 1076 adoxq -8(%rbx),%r13 1077 adcxq %rbp,%r14 1078 leaq 32(%rsi),%rsi 1079 adoxq %rbp,%r14 1080 1081 mulxq 0(%rcx),%rax,%r10 1082 adcxq %rax,%r15 1083 adoxq %r11,%r10 1084 mulxq 8(%rcx),%rax,%r11 1085 adcxq %rax,%r10 1086 adoxq %r12,%r11 1087 mulxq 16(%rcx),%rax,%r12 1088 movq %r10,-32(%rbx) 1089 adcxq %rax,%r11 1090 adoxq %r13,%r12 1091 mulxq 24(%rcx),%rax,%r15 1092 movq %r9,%rdx 1093 movq %r11,-24(%rbx) 1094 leaq 32(%rcx),%rcx 1095 adcxq %rax,%r12 1096 adoxq %rbp,%r15 1097 movq 48(%rsp),%rdi 1098 movq %r12,-16(%rbx) 1099 1100 jmp .Lmulx4x_inner 1101 1102.align 32 1103.Lmulx4x_inner: 1104 mulxq 0(%rsi),%r10,%rax 1105 adcxq %rbp,%r15 1106 adoxq %r14,%r10 1107 mulxq 8(%rsi),%r11,%r14 1108 adcxq 0(%rbx),%r10 1109 adoxq %rax,%r11 1110 mulxq 16(%rsi),%r12,%rax 1111 adcxq 8(%rbx),%r11 1112 adoxq %r14,%r12 1113 mulxq 24(%rsi),%r13,%r14 1114 movq %r8,%rdx 1115 adcxq 16(%rbx),%r12 1116 adoxq %rax,%r13 1117 adcxq 24(%rbx),%r13 1118 adoxq %rbp,%r14 1119 leaq 32(%rsi),%rsi 1120 leaq 32(%rbx),%rbx 1121 adcxq %rbp,%r14 1122 1123 adoxq %r15,%r10 1124 mulxq 0(%rcx),%rax,%r15 1125 adcxq %rax,%r10 1126 adoxq %r15,%r11 1127 mulxq 8(%rcx),%rax,%r15 1128 adcxq %rax,%r11 1129 adoxq %r15,%r12 1130 mulxq 16(%rcx),%rax,%r15 1131 movq %r10,-40(%rbx) 1132 adcxq %rax,%r12 1133 adoxq %r15,%r13 1134 mulxq 24(%rcx),%rax,%r15 1135 movq %r9,%rdx 1136 movq %r11,-32(%rbx) 1137 movq %r12,-24(%rbx) 1138 adcxq %rax,%r13 1139 adoxq %rbp,%r15 1140 leaq 32(%rcx),%rcx 1141 movq %r13,-16(%rbx) 1142 1143 decq %rdi 1144 jnz .Lmulx4x_inner 1145 1146 movq 0(%rsp),%rax 1147 movq 8(%rsp),%rdi 1148 adcq %rbp,%r15 1149 subq 0(%rbx),%rbp 1150 adcq %r15,%r14 1151 sbbq %r15,%r15 1152 movq %r14,-8(%rbx) 1153 1154 cmpq 16(%rsp),%rdi 1155 jne .Lmulx4x_outer 1156 1157 leaq 64(%rsp),%rbx 1158 subq %rax,%rcx 1159 negq %r15 1160 movq %rax,%rdx 1161 shrq $3+2,%rax 1162 movq 32(%rsp),%rdi 1163 jmp .Lmulx4x_sub 1164 1165.align 32 1166.Lmulx4x_sub: 1167 movq 0(%rbx),%r11 1168 movq 8(%rbx),%r12 1169 movq 16(%rbx),%r13 1170 movq 24(%rbx),%r14 1171 leaq 32(%rbx),%rbx 1172 sbbq 0(%rcx),%r11 1173 sbbq 8(%rcx),%r12 1174 sbbq 16(%rcx),%r13 1175 sbbq 24(%rcx),%r14 1176 leaq 32(%rcx),%rcx 1177 movq %r11,0(%rdi) 1178 movq %r12,8(%rdi) 1179 movq %r13,16(%rdi) 1180 movq %r14,24(%rdi) 1181 leaq 32(%rdi),%rdi 1182 decq %rax 1183 jnz .Lmulx4x_sub 1184 1185 sbbq $0,%r15 1186 leaq 64(%rsp),%rbx 1187 subq %rdx,%rdi 1188 1189.byte 102,73,15,110,207 1190 pxor %xmm0,%xmm0 1191 pshufd $0,%xmm1,%xmm1 1192 movq 40(%rsp),%rsi 1193.cfi_def_cfa %rsi,8 1194 jmp .Lmulx4x_cond_copy 1195 1196.align 32 1197.Lmulx4x_cond_copy: 1198 movdqa 0(%rbx),%xmm2 1199 movdqa 16(%rbx),%xmm3 1200 leaq 32(%rbx),%rbx 1201 movdqu 0(%rdi),%xmm4 1202 movdqu 16(%rdi),%xmm5 1203 leaq 32(%rdi),%rdi 1204 movdqa %xmm0,-32(%rbx) 1205 movdqa %xmm0,-16(%rbx) 1206 pcmpeqd %xmm1,%xmm0 1207 pand %xmm1,%xmm2 1208 pand %xmm1,%xmm3 1209 pand %xmm0,%xmm4 1210 pand %xmm0,%xmm5 1211 pxor %xmm0,%xmm0 1212 por %xmm2,%xmm4 1213 por %xmm3,%xmm5 1214 movdqu %xmm4,-32(%rdi) 1215 movdqu %xmm5,-16(%rdi) 1216 subq $32,%rdx 1217 jnz .Lmulx4x_cond_copy 1218 1219 movq %rdx,(%rbx) 1220 1221 movq $1,%rax 1222 movq -48(%rsi),%r15 1223.cfi_restore %r15 1224 movq -40(%rsi),%r14 1225.cfi_restore %r14 1226 movq -32(%rsi),%r13 1227.cfi_restore %r13 1228 movq -24(%rsi),%r12 1229.cfi_restore %r12 1230 movq -16(%rsi),%rbp 1231.cfi_restore %rbp 1232 movq -8(%rsi),%rbx 1233.cfi_restore %rbx 1234 leaq (%rsi),%rsp 1235.cfi_def_cfa_register %rsp 1236.Lmulx4x_epilogue: 1237 .byte 0xf3,0xc3 1238.cfi_endproc 1239.size bn_mulx4x_mont,.-bn_mulx4x_mont 1240.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1241.align 16 1242