1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from x86_64-mont5.pl. */ 3.text 4 5 6 7.globl bn_mul_mont_gather5 8.type bn_mul_mont_gather5,@function 9.align 64 10bn_mul_mont_gather5: 11.cfi_startproc 12 movl %r9d,%r9d 13 movq %rsp,%rax 14.cfi_def_cfa_register %rax 15 testl $7,%r9d 16 jnz .Lmul_enter 17 movl OPENSSL_ia32cap_P+8(%rip),%r11d 18 jmp .Lmul4x_enter 19 20.align 16 21.Lmul_enter: 22 movd 8(%rsp),%xmm5 23 pushq %rbx 24.cfi_offset %rbx,-16 25 pushq %rbp 26.cfi_offset %rbp,-24 27 pushq %r12 28.cfi_offset %r12,-32 29 pushq %r13 30.cfi_offset %r13,-40 31 pushq %r14 32.cfi_offset %r14,-48 33 pushq %r15 34.cfi_offset %r15,-56 35 36 negq %r9 37 movq %rsp,%r11 38 leaq -280(%rsp,%r9,8),%r10 39 negq %r9 40 andq $-1024,%r10 41 42 43 44 45 46 47 48 49 50 subq %r10,%r11 51 andq $-4096,%r11 52 leaq (%r10,%r11,1),%rsp 53 movq (%rsp),%r11 54 cmpq %r10,%rsp 55 ja .Lmul_page_walk 56 jmp .Lmul_page_walk_done 57 58.Lmul_page_walk: 59 leaq -4096(%rsp),%rsp 60 movq (%rsp),%r11 61 cmpq %r10,%rsp 62 ja .Lmul_page_walk 63.Lmul_page_walk_done: 64 65 leaq .Linc(%rip),%r10 66 movq %rax,8(%rsp,%r9,8) 67.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 68.Lmul_body: 69 70 leaq 128(%rdx),%r12 71 movdqa 0(%r10),%xmm0 72 movdqa 16(%r10),%xmm1 73 leaq 24-112(%rsp,%r9,8),%r10 74 andq $-16,%r10 75 76 pshufd $0,%xmm5,%xmm5 77 movdqa %xmm1,%xmm4 78 movdqa %xmm1,%xmm2 79 paddd %xmm0,%xmm1 80 pcmpeqd %xmm5,%xmm0 81.byte 0x67 82 movdqa %xmm4,%xmm3 83 paddd %xmm1,%xmm2 84 pcmpeqd %xmm5,%xmm1 85 movdqa %xmm0,112(%r10) 86 movdqa %xmm4,%xmm0 87 88 paddd %xmm2,%xmm3 89 pcmpeqd %xmm5,%xmm2 90 movdqa %xmm1,128(%r10) 91 movdqa %xmm4,%xmm1 92 93 paddd %xmm3,%xmm0 94 pcmpeqd %xmm5,%xmm3 95 movdqa %xmm2,144(%r10) 96 movdqa %xmm4,%xmm2 97 98 paddd %xmm0,%xmm1 99 pcmpeqd %xmm5,%xmm0 100 movdqa %xmm3,160(%r10) 101 movdqa %xmm4,%xmm3 102 paddd %xmm1,%xmm2 103 pcmpeqd %xmm5,%xmm1 104 movdqa %xmm0,176(%r10) 105 movdqa %xmm4,%xmm0 106 107 paddd %xmm2,%xmm3 108 pcmpeqd %xmm5,%xmm2 109 movdqa %xmm1,192(%r10) 110 movdqa %xmm4,%xmm1 111 112 paddd %xmm3,%xmm0 113 pcmpeqd %xmm5,%xmm3 114 movdqa %xmm2,208(%r10) 115 movdqa %xmm4,%xmm2 116 117 paddd %xmm0,%xmm1 118 pcmpeqd %xmm5,%xmm0 119 movdqa %xmm3,224(%r10) 120 movdqa %xmm4,%xmm3 121 paddd %xmm1,%xmm2 122 pcmpeqd %xmm5,%xmm1 123 movdqa %xmm0,240(%r10) 124 movdqa %xmm4,%xmm0 125 126 paddd %xmm2,%xmm3 127 pcmpeqd %xmm5,%xmm2 128 movdqa %xmm1,256(%r10) 129 movdqa %xmm4,%xmm1 130 131 paddd %xmm3,%xmm0 132 pcmpeqd %xmm5,%xmm3 133 movdqa %xmm2,272(%r10) 134 movdqa %xmm4,%xmm2 135 136 paddd %xmm0,%xmm1 137 pcmpeqd %xmm5,%xmm0 138 movdqa %xmm3,288(%r10) 139 movdqa %xmm4,%xmm3 140 paddd %xmm1,%xmm2 141 pcmpeqd %xmm5,%xmm1 142 movdqa %xmm0,304(%r10) 143 144 paddd %xmm2,%xmm3 145.byte 0x67 146 pcmpeqd %xmm5,%xmm2 147 movdqa %xmm1,320(%r10) 148 149 pcmpeqd %xmm5,%xmm3 150 movdqa %xmm2,336(%r10) 151 pand 64(%r12),%xmm0 152 153 pand 80(%r12),%xmm1 154 pand 96(%r12),%xmm2 155 movdqa %xmm3,352(%r10) 156 pand 112(%r12),%xmm3 157 por %xmm2,%xmm0 158 por %xmm3,%xmm1 159 movdqa -128(%r12),%xmm4 160 movdqa -112(%r12),%xmm5 161 movdqa -96(%r12),%xmm2 162 pand 112(%r10),%xmm4 163 movdqa -80(%r12),%xmm3 164 pand 128(%r10),%xmm5 165 por %xmm4,%xmm0 166 pand 144(%r10),%xmm2 167 por %xmm5,%xmm1 168 pand 160(%r10),%xmm3 169 por %xmm2,%xmm0 170 por %xmm3,%xmm1 171 movdqa -64(%r12),%xmm4 172 movdqa -48(%r12),%xmm5 173 movdqa -32(%r12),%xmm2 174 pand 176(%r10),%xmm4 175 movdqa -16(%r12),%xmm3 176 pand 192(%r10),%xmm5 177 por %xmm4,%xmm0 178 pand 208(%r10),%xmm2 179 por %xmm5,%xmm1 180 pand 224(%r10),%xmm3 181 por %xmm2,%xmm0 182 por %xmm3,%xmm1 183 movdqa 0(%r12),%xmm4 184 movdqa 16(%r12),%xmm5 185 movdqa 32(%r12),%xmm2 186 pand 240(%r10),%xmm4 187 movdqa 48(%r12),%xmm3 188 pand 256(%r10),%xmm5 189 por %xmm4,%xmm0 190 pand 272(%r10),%xmm2 191 por %xmm5,%xmm1 192 pand 288(%r10),%xmm3 193 por %xmm2,%xmm0 194 por %xmm3,%xmm1 195 por %xmm1,%xmm0 196 pshufd $0x4e,%xmm0,%xmm1 197 por %xmm1,%xmm0 198 leaq 256(%r12),%r12 199.byte 102,72,15,126,195 200 201 movq (%r8),%r8 202 movq (%rsi),%rax 203 204 xorq %r14,%r14 205 xorq %r15,%r15 206 207 movq %r8,%rbp 208 mulq %rbx 209 movq %rax,%r10 210 movq (%rcx),%rax 211 212 imulq %r10,%rbp 213 movq %rdx,%r11 214 215 mulq %rbp 216 addq %rax,%r10 217 movq 8(%rsi),%rax 218 adcq $0,%rdx 219 movq %rdx,%r13 220 221 leaq 1(%r15),%r15 222 jmp .L1st_enter 223 224.align 16 225.L1st: 226 addq %rax,%r13 227 movq (%rsi,%r15,8),%rax 228 adcq $0,%rdx 229 addq %r11,%r13 230 movq %r10,%r11 231 adcq $0,%rdx 232 movq %r13,-16(%rsp,%r15,8) 233 movq %rdx,%r13 234 235.L1st_enter: 236 mulq %rbx 237 addq %rax,%r11 238 movq (%rcx,%r15,8),%rax 239 adcq $0,%rdx 240 leaq 1(%r15),%r15 241 movq %rdx,%r10 242 243 mulq %rbp 244 cmpq %r9,%r15 245 jne .L1st 246 247 248 addq %rax,%r13 249 adcq $0,%rdx 250 addq %r11,%r13 251 adcq $0,%rdx 252 movq %r13,-16(%rsp,%r9,8) 253 movq %rdx,%r13 254 movq %r10,%r11 255 256 xorq %rdx,%rdx 257 addq %r11,%r13 258 adcq $0,%rdx 259 movq %r13,-8(%rsp,%r9,8) 260 movq %rdx,(%rsp,%r9,8) 261 262 leaq 1(%r14),%r14 263 jmp .Louter 264.align 16 265.Louter: 266 leaq 24+128(%rsp,%r9,8),%rdx 267 andq $-16,%rdx 268 pxor %xmm4,%xmm4 269 pxor %xmm5,%xmm5 270 movdqa -128(%r12),%xmm0 271 movdqa -112(%r12),%xmm1 272 movdqa -96(%r12),%xmm2 273 movdqa -80(%r12),%xmm3 274 pand -128(%rdx),%xmm0 275 pand -112(%rdx),%xmm1 276 por %xmm0,%xmm4 277 pand -96(%rdx),%xmm2 278 por %xmm1,%xmm5 279 pand -80(%rdx),%xmm3 280 por %xmm2,%xmm4 281 por %xmm3,%xmm5 282 movdqa -64(%r12),%xmm0 283 movdqa -48(%r12),%xmm1 284 movdqa -32(%r12),%xmm2 285 movdqa -16(%r12),%xmm3 286 pand -64(%rdx),%xmm0 287 pand -48(%rdx),%xmm1 288 por %xmm0,%xmm4 289 pand -32(%rdx),%xmm2 290 por %xmm1,%xmm5 291 pand -16(%rdx),%xmm3 292 por %xmm2,%xmm4 293 por %xmm3,%xmm5 294 movdqa 0(%r12),%xmm0 295 movdqa 16(%r12),%xmm1 296 movdqa 32(%r12),%xmm2 297 movdqa 48(%r12),%xmm3 298 pand 0(%rdx),%xmm0 299 pand 16(%rdx),%xmm1 300 por %xmm0,%xmm4 301 pand 32(%rdx),%xmm2 302 por %xmm1,%xmm5 303 pand 48(%rdx),%xmm3 304 por %xmm2,%xmm4 305 por %xmm3,%xmm5 306 movdqa 64(%r12),%xmm0 307 movdqa 80(%r12),%xmm1 308 movdqa 96(%r12),%xmm2 309 movdqa 112(%r12),%xmm3 310 pand 64(%rdx),%xmm0 311 pand 80(%rdx),%xmm1 312 por %xmm0,%xmm4 313 pand 96(%rdx),%xmm2 314 por %xmm1,%xmm5 315 pand 112(%rdx),%xmm3 316 por %xmm2,%xmm4 317 por %xmm3,%xmm5 318 por %xmm5,%xmm4 319 pshufd $0x4e,%xmm4,%xmm0 320 por %xmm4,%xmm0 321 leaq 256(%r12),%r12 322 323 movq (%rsi),%rax 324.byte 102,72,15,126,195 325 326 xorq %r15,%r15 327 movq %r8,%rbp 328 movq (%rsp),%r10 329 330 mulq %rbx 331 addq %rax,%r10 332 movq (%rcx),%rax 333 adcq $0,%rdx 334 335 imulq %r10,%rbp 336 movq %rdx,%r11 337 338 mulq %rbp 339 addq %rax,%r10 340 movq 8(%rsi),%rax 341 adcq $0,%rdx 342 movq 8(%rsp),%r10 343 movq %rdx,%r13 344 345 leaq 1(%r15),%r15 346 jmp .Linner_enter 347 348.align 16 349.Linner: 350 addq %rax,%r13 351 movq (%rsi,%r15,8),%rax 352 adcq $0,%rdx 353 addq %r10,%r13 354 movq (%rsp,%r15,8),%r10 355 adcq $0,%rdx 356 movq %r13,-16(%rsp,%r15,8) 357 movq %rdx,%r13 358 359.Linner_enter: 360 mulq %rbx 361 addq %rax,%r11 362 movq (%rcx,%r15,8),%rax 363 adcq $0,%rdx 364 addq %r11,%r10 365 movq %rdx,%r11 366 adcq $0,%r11 367 leaq 1(%r15),%r15 368 369 mulq %rbp 370 cmpq %r9,%r15 371 jne .Linner 372 373 addq %rax,%r13 374 adcq $0,%rdx 375 addq %r10,%r13 376 movq (%rsp,%r9,8),%r10 377 adcq $0,%rdx 378 movq %r13,-16(%rsp,%r9,8) 379 movq %rdx,%r13 380 381 xorq %rdx,%rdx 382 addq %r11,%r13 383 adcq $0,%rdx 384 addq %r10,%r13 385 adcq $0,%rdx 386 movq %r13,-8(%rsp,%r9,8) 387 movq %rdx,(%rsp,%r9,8) 388 389 leaq 1(%r14),%r14 390 cmpq %r9,%r14 391 jb .Louter 392 393 xorq %r14,%r14 394 movq (%rsp),%rax 395 leaq (%rsp),%rsi 396 movq %r9,%r15 397 jmp .Lsub 398.align 16 399.Lsub: sbbq (%rcx,%r14,8),%rax 400 movq %rax,(%rdi,%r14,8) 401 movq 8(%rsi,%r14,8),%rax 402 leaq 1(%r14),%r14 403 decq %r15 404 jnz .Lsub 405 406 sbbq $0,%rax 407 movq $-1,%rbx 408 xorq %rax,%rbx 409 xorq %r14,%r14 410 movq %r9,%r15 411 412.Lcopy: 413 movq (%rdi,%r14,8),%rcx 414 movq (%rsp,%r14,8),%rdx 415 andq %rbx,%rcx 416 andq %rax,%rdx 417 movq %r14,(%rsp,%r14,8) 418 orq %rcx,%rdx 419 movq %rdx,(%rdi,%r14,8) 420 leaq 1(%r14),%r14 421 subq $1,%r15 422 jnz .Lcopy 423 424 movq 8(%rsp,%r9,8),%rsi 425.cfi_def_cfa %rsi,8 426 movq $1,%rax 427 428 movq -48(%rsi),%r15 429.cfi_restore %r15 430 movq -40(%rsi),%r14 431.cfi_restore %r14 432 movq -32(%rsi),%r13 433.cfi_restore %r13 434 movq -24(%rsi),%r12 435.cfi_restore %r12 436 movq -16(%rsi),%rbp 437.cfi_restore %rbp 438 movq -8(%rsi),%rbx 439.cfi_restore %rbx 440 leaq (%rsi),%rsp 441.cfi_def_cfa_register %rsp 442.Lmul_epilogue: 443 .byte 0xf3,0xc3 444.cfi_endproc 445.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 446.type bn_mul4x_mont_gather5,@function 447.align 32 448bn_mul4x_mont_gather5: 449.cfi_startproc 450.byte 0x67 451 movq %rsp,%rax 452.cfi_def_cfa_register %rax 453.Lmul4x_enter: 454 andl $0x80108,%r11d 455 cmpl $0x80108,%r11d 456 je .Lmulx4x_enter 457 pushq %rbx 458.cfi_offset %rbx,-16 459 pushq %rbp 460.cfi_offset %rbp,-24 461 pushq %r12 462.cfi_offset %r12,-32 463 pushq %r13 464.cfi_offset %r13,-40 465 pushq %r14 466.cfi_offset %r14,-48 467 pushq %r15 468.cfi_offset %r15,-56 469.Lmul4x_prologue: 470 471.byte 0x67 472 shll $3,%r9d 473 leaq (%r9,%r9,2),%r10 474 negq %r9 475 476 477 478 479 480 481 482 483 484 485 leaq -320(%rsp,%r9,2),%r11 486 movq %rsp,%rbp 487 subq %rdi,%r11 488 andq $4095,%r11 489 cmpq %r11,%r10 490 jb .Lmul4xsp_alt 491 subq %r11,%rbp 492 leaq -320(%rbp,%r9,2),%rbp 493 jmp .Lmul4xsp_done 494 495.align 32 496.Lmul4xsp_alt: 497 leaq 4096-320(,%r9,2),%r10 498 leaq -320(%rbp,%r9,2),%rbp 499 subq %r10,%r11 500 movq $0,%r10 501 cmovcq %r10,%r11 502 subq %r11,%rbp 503.Lmul4xsp_done: 504 andq $-64,%rbp 505 movq %rsp,%r11 506 subq %rbp,%r11 507 andq $-4096,%r11 508 leaq (%r11,%rbp,1),%rsp 509 movq (%rsp),%r10 510 cmpq %rbp,%rsp 511 ja .Lmul4x_page_walk 512 jmp .Lmul4x_page_walk_done 513 514.Lmul4x_page_walk: 515 leaq -4096(%rsp),%rsp 516 movq (%rsp),%r10 517 cmpq %rbp,%rsp 518 ja .Lmul4x_page_walk 519.Lmul4x_page_walk_done: 520 521 negq %r9 522 523 movq %rax,40(%rsp) 524.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 525.Lmul4x_body: 526 527 call mul4x_internal 528 529 movq 40(%rsp),%rsi 530.cfi_def_cfa %rsi,8 531 movq $1,%rax 532 533 movq -48(%rsi),%r15 534.cfi_restore %r15 535 movq -40(%rsi),%r14 536.cfi_restore %r14 537 movq -32(%rsi),%r13 538.cfi_restore %r13 539 movq -24(%rsi),%r12 540.cfi_restore %r12 541 movq -16(%rsi),%rbp 542.cfi_restore %rbp 543 movq -8(%rsi),%rbx 544.cfi_restore %rbx 545 leaq (%rsi),%rsp 546.cfi_def_cfa_register %rsp 547.Lmul4x_epilogue: 548 .byte 0xf3,0xc3 549.cfi_endproc 550.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 551 552.type mul4x_internal,@function 553.align 32 554mul4x_internal: 555.cfi_startproc 556 shlq $5,%r9 557 movd 8(%rax),%xmm5 558 leaq .Linc(%rip),%rax 559 leaq 128(%rdx,%r9,1),%r13 560 shrq $5,%r9 561 movdqa 0(%rax),%xmm0 562 movdqa 16(%rax),%xmm1 563 leaq 88-112(%rsp,%r9,1),%r10 564 leaq 128(%rdx),%r12 565 566 pshufd $0,%xmm5,%xmm5 567 movdqa %xmm1,%xmm4 568.byte 0x67,0x67 569 movdqa %xmm1,%xmm2 570 paddd %xmm0,%xmm1 571 pcmpeqd %xmm5,%xmm0 572.byte 0x67 573 movdqa %xmm4,%xmm3 574 paddd %xmm1,%xmm2 575 pcmpeqd %xmm5,%xmm1 576 movdqa %xmm0,112(%r10) 577 movdqa %xmm4,%xmm0 578 579 paddd %xmm2,%xmm3 580 pcmpeqd %xmm5,%xmm2 581 movdqa %xmm1,128(%r10) 582 movdqa %xmm4,%xmm1 583 584 paddd %xmm3,%xmm0 585 pcmpeqd %xmm5,%xmm3 586 movdqa %xmm2,144(%r10) 587 movdqa %xmm4,%xmm2 588 589 paddd %xmm0,%xmm1 590 pcmpeqd %xmm5,%xmm0 591 movdqa %xmm3,160(%r10) 592 movdqa %xmm4,%xmm3 593 paddd %xmm1,%xmm2 594 pcmpeqd %xmm5,%xmm1 595 movdqa %xmm0,176(%r10) 596 movdqa %xmm4,%xmm0 597 598 paddd %xmm2,%xmm3 599 pcmpeqd %xmm5,%xmm2 600 movdqa %xmm1,192(%r10) 601 movdqa %xmm4,%xmm1 602 603 paddd %xmm3,%xmm0 604 pcmpeqd %xmm5,%xmm3 605 movdqa %xmm2,208(%r10) 606 movdqa %xmm4,%xmm2 607 608 paddd %xmm0,%xmm1 609 pcmpeqd %xmm5,%xmm0 610 movdqa %xmm3,224(%r10) 611 movdqa %xmm4,%xmm3 612 paddd %xmm1,%xmm2 613 pcmpeqd %xmm5,%xmm1 614 movdqa %xmm0,240(%r10) 615 movdqa %xmm4,%xmm0 616 617 paddd %xmm2,%xmm3 618 pcmpeqd %xmm5,%xmm2 619 movdqa %xmm1,256(%r10) 620 movdqa %xmm4,%xmm1 621 622 paddd %xmm3,%xmm0 623 pcmpeqd %xmm5,%xmm3 624 movdqa %xmm2,272(%r10) 625 movdqa %xmm4,%xmm2 626 627 paddd %xmm0,%xmm1 628 pcmpeqd %xmm5,%xmm0 629 movdqa %xmm3,288(%r10) 630 movdqa %xmm4,%xmm3 631 paddd %xmm1,%xmm2 632 pcmpeqd %xmm5,%xmm1 633 movdqa %xmm0,304(%r10) 634 635 paddd %xmm2,%xmm3 636.byte 0x67 637 pcmpeqd %xmm5,%xmm2 638 movdqa %xmm1,320(%r10) 639 640 pcmpeqd %xmm5,%xmm3 641 movdqa %xmm2,336(%r10) 642 pand 64(%r12),%xmm0 643 644 pand 80(%r12),%xmm1 645 pand 96(%r12),%xmm2 646 movdqa %xmm3,352(%r10) 647 pand 112(%r12),%xmm3 648 por %xmm2,%xmm0 649 por %xmm3,%xmm1 650 movdqa -128(%r12),%xmm4 651 movdqa -112(%r12),%xmm5 652 movdqa -96(%r12),%xmm2 653 pand 112(%r10),%xmm4 654 movdqa -80(%r12),%xmm3 655 pand 128(%r10),%xmm5 656 por %xmm4,%xmm0 657 pand 144(%r10),%xmm2 658 por %xmm5,%xmm1 659 pand 160(%r10),%xmm3 660 por %xmm2,%xmm0 661 por %xmm3,%xmm1 662 movdqa -64(%r12),%xmm4 663 movdqa -48(%r12),%xmm5 664 movdqa -32(%r12),%xmm2 665 pand 176(%r10),%xmm4 666 movdqa -16(%r12),%xmm3 667 pand 192(%r10),%xmm5 668 por %xmm4,%xmm0 669 pand 208(%r10),%xmm2 670 por %xmm5,%xmm1 671 pand 224(%r10),%xmm3 672 por %xmm2,%xmm0 673 por %xmm3,%xmm1 674 movdqa 0(%r12),%xmm4 675 movdqa 16(%r12),%xmm5 676 movdqa 32(%r12),%xmm2 677 pand 240(%r10),%xmm4 678 movdqa 48(%r12),%xmm3 679 pand 256(%r10),%xmm5 680 por %xmm4,%xmm0 681 pand 272(%r10),%xmm2 682 por %xmm5,%xmm1 683 pand 288(%r10),%xmm3 684 por %xmm2,%xmm0 685 por %xmm3,%xmm1 686 por %xmm1,%xmm0 687 pshufd $0x4e,%xmm0,%xmm1 688 por %xmm1,%xmm0 689 leaq 256(%r12),%r12 690.byte 102,72,15,126,195 691 692 movq %r13,16+8(%rsp) 693 movq %rdi,56+8(%rsp) 694 695 movq (%r8),%r8 696 movq (%rsi),%rax 697 leaq (%rsi,%r9,1),%rsi 698 negq %r9 699 700 movq %r8,%rbp 701 mulq %rbx 702 movq %rax,%r10 703 movq (%rcx),%rax 704 705 imulq %r10,%rbp 706 leaq 64+8(%rsp),%r14 707 movq %rdx,%r11 708 709 mulq %rbp 710 addq %rax,%r10 711 movq 8(%rsi,%r9,1),%rax 712 adcq $0,%rdx 713 movq %rdx,%rdi 714 715 mulq %rbx 716 addq %rax,%r11 717 movq 8(%rcx),%rax 718 adcq $0,%rdx 719 movq %rdx,%r10 720 721 mulq %rbp 722 addq %rax,%rdi 723 movq 16(%rsi,%r9,1),%rax 724 adcq $0,%rdx 725 addq %r11,%rdi 726 leaq 32(%r9),%r15 727 leaq 32(%rcx),%rcx 728 adcq $0,%rdx 729 movq %rdi,(%r14) 730 movq %rdx,%r13 731 jmp .L1st4x 732 733.align 32 734.L1st4x: 735 mulq %rbx 736 addq %rax,%r10 737 movq -16(%rcx),%rax 738 leaq 32(%r14),%r14 739 adcq $0,%rdx 740 movq %rdx,%r11 741 742 mulq %rbp 743 addq %rax,%r13 744 movq -8(%rsi,%r15,1),%rax 745 adcq $0,%rdx 746 addq %r10,%r13 747 adcq $0,%rdx 748 movq %r13,-24(%r14) 749 movq %rdx,%rdi 750 751 mulq %rbx 752 addq %rax,%r11 753 movq -8(%rcx),%rax 754 adcq $0,%rdx 755 movq %rdx,%r10 756 757 mulq %rbp 758 addq %rax,%rdi 759 movq (%rsi,%r15,1),%rax 760 adcq $0,%rdx 761 addq %r11,%rdi 762 adcq $0,%rdx 763 movq %rdi,-16(%r14) 764 movq %rdx,%r13 765 766 mulq %rbx 767 addq %rax,%r10 768 movq 0(%rcx),%rax 769 adcq $0,%rdx 770 movq %rdx,%r11 771 772 mulq %rbp 773 addq %rax,%r13 774 movq 8(%rsi,%r15,1),%rax 775 adcq $0,%rdx 776 addq %r10,%r13 777 adcq $0,%rdx 778 movq %r13,-8(%r14) 779 movq %rdx,%rdi 780 781 mulq %rbx 782 addq %rax,%r11 783 movq 8(%rcx),%rax 784 adcq $0,%rdx 785 movq %rdx,%r10 786 787 mulq %rbp 788 addq %rax,%rdi 789 movq 16(%rsi,%r15,1),%rax 790 adcq $0,%rdx 791 addq %r11,%rdi 792 leaq 32(%rcx),%rcx 793 adcq $0,%rdx 794 movq %rdi,(%r14) 795 movq %rdx,%r13 796 797 addq $32,%r15 798 jnz .L1st4x 799 800 mulq %rbx 801 addq %rax,%r10 802 movq -16(%rcx),%rax 803 leaq 32(%r14),%r14 804 adcq $0,%rdx 805 movq %rdx,%r11 806 807 mulq %rbp 808 addq %rax,%r13 809 movq -8(%rsi),%rax 810 adcq $0,%rdx 811 addq %r10,%r13 812 adcq $0,%rdx 813 movq %r13,-24(%r14) 814 movq %rdx,%rdi 815 816 mulq %rbx 817 addq %rax,%r11 818 movq -8(%rcx),%rax 819 adcq $0,%rdx 820 movq %rdx,%r10 821 822 mulq %rbp 823 addq %rax,%rdi 824 movq (%rsi,%r9,1),%rax 825 adcq $0,%rdx 826 addq %r11,%rdi 827 adcq $0,%rdx 828 movq %rdi,-16(%r14) 829 movq %rdx,%r13 830 831 leaq (%rcx,%r9,1),%rcx 832 833 xorq %rdi,%rdi 834 addq %r10,%r13 835 adcq $0,%rdi 836 movq %r13,-8(%r14) 837 838 jmp .Louter4x 839 840.align 32 841.Louter4x: 842 leaq 16+128(%r14),%rdx 843 pxor %xmm4,%xmm4 844 pxor %xmm5,%xmm5 845 movdqa -128(%r12),%xmm0 846 movdqa -112(%r12),%xmm1 847 movdqa -96(%r12),%xmm2 848 movdqa -80(%r12),%xmm3 849 pand -128(%rdx),%xmm0 850 pand -112(%rdx),%xmm1 851 por %xmm0,%xmm4 852 pand -96(%rdx),%xmm2 853 por %xmm1,%xmm5 854 pand -80(%rdx),%xmm3 855 por %xmm2,%xmm4 856 por %xmm3,%xmm5 857 movdqa -64(%r12),%xmm0 858 movdqa -48(%r12),%xmm1 859 movdqa -32(%r12),%xmm2 860 movdqa -16(%r12),%xmm3 861 pand -64(%rdx),%xmm0 862 pand -48(%rdx),%xmm1 863 por %xmm0,%xmm4 864 pand -32(%rdx),%xmm2 865 por %xmm1,%xmm5 866 pand -16(%rdx),%xmm3 867 por %xmm2,%xmm4 868 por %xmm3,%xmm5 869 movdqa 0(%r12),%xmm0 870 movdqa 16(%r12),%xmm1 871 movdqa 32(%r12),%xmm2 872 movdqa 48(%r12),%xmm3 873 pand 0(%rdx),%xmm0 874 pand 16(%rdx),%xmm1 875 por %xmm0,%xmm4 876 pand 32(%rdx),%xmm2 877 por %xmm1,%xmm5 878 pand 48(%rdx),%xmm3 879 por %xmm2,%xmm4 880 por %xmm3,%xmm5 881 movdqa 64(%r12),%xmm0 882 movdqa 80(%r12),%xmm1 883 movdqa 96(%r12),%xmm2 884 movdqa 112(%r12),%xmm3 885 pand 64(%rdx),%xmm0 886 pand 80(%rdx),%xmm1 887 por %xmm0,%xmm4 888 pand 96(%rdx),%xmm2 889 por %xmm1,%xmm5 890 pand 112(%rdx),%xmm3 891 por %xmm2,%xmm4 892 por %xmm3,%xmm5 893 por %xmm5,%xmm4 894 pshufd $0x4e,%xmm4,%xmm0 895 por %xmm4,%xmm0 896 leaq 256(%r12),%r12 897.byte 102,72,15,126,195 898 899 movq (%r14,%r9,1),%r10 900 movq %r8,%rbp 901 mulq %rbx 902 addq %rax,%r10 903 movq (%rcx),%rax 904 adcq $0,%rdx 905 906 imulq %r10,%rbp 907 movq %rdx,%r11 908 movq %rdi,(%r14) 909 910 leaq (%r14,%r9,1),%r14 911 912 mulq %rbp 913 addq %rax,%r10 914 movq 8(%rsi,%r9,1),%rax 915 adcq $0,%rdx 916 movq %rdx,%rdi 917 918 mulq %rbx 919 addq %rax,%r11 920 movq 8(%rcx),%rax 921 adcq $0,%rdx 922 addq 8(%r14),%r11 923 adcq $0,%rdx 924 movq %rdx,%r10 925 926 mulq %rbp 927 addq %rax,%rdi 928 movq 16(%rsi,%r9,1),%rax 929 adcq $0,%rdx 930 addq %r11,%rdi 931 leaq 32(%r9),%r15 932 leaq 32(%rcx),%rcx 933 adcq $0,%rdx 934 movq %rdx,%r13 935 jmp .Linner4x 936 937.align 32 938.Linner4x: 939 mulq %rbx 940 addq %rax,%r10 941 movq -16(%rcx),%rax 942 adcq $0,%rdx 943 addq 16(%r14),%r10 944 leaq 32(%r14),%r14 945 adcq $0,%rdx 946 movq %rdx,%r11 947 948 mulq %rbp 949 addq %rax,%r13 950 movq -8(%rsi,%r15,1),%rax 951 adcq $0,%rdx 952 addq %r10,%r13 953 adcq $0,%rdx 954 movq %rdi,-32(%r14) 955 movq %rdx,%rdi 956 957 mulq %rbx 958 addq %rax,%r11 959 movq -8(%rcx),%rax 960 adcq $0,%rdx 961 addq -8(%r14),%r11 962 adcq $0,%rdx 963 movq %rdx,%r10 964 965 mulq %rbp 966 addq %rax,%rdi 967 movq (%rsi,%r15,1),%rax 968 adcq $0,%rdx 969 addq %r11,%rdi 970 adcq $0,%rdx 971 movq %r13,-24(%r14) 972 movq %rdx,%r13 973 974 mulq %rbx 975 addq %rax,%r10 976 movq 0(%rcx),%rax 977 adcq $0,%rdx 978 addq (%r14),%r10 979 adcq $0,%rdx 980 movq %rdx,%r11 981 982 mulq %rbp 983 addq %rax,%r13 984 movq 8(%rsi,%r15,1),%rax 985 adcq $0,%rdx 986 addq %r10,%r13 987 adcq $0,%rdx 988 movq %rdi,-16(%r14) 989 movq %rdx,%rdi 990 991 mulq %rbx 992 addq %rax,%r11 993 movq 8(%rcx),%rax 994 adcq $0,%rdx 995 addq 8(%r14),%r11 996 adcq $0,%rdx 997 movq %rdx,%r10 998 999 mulq %rbp 1000 addq %rax,%rdi 1001 movq 16(%rsi,%r15,1),%rax 1002 adcq $0,%rdx 1003 addq %r11,%rdi 1004 leaq 32(%rcx),%rcx 1005 adcq $0,%rdx 1006 movq %r13,-8(%r14) 1007 movq %rdx,%r13 1008 1009 addq $32,%r15 1010 jnz .Linner4x 1011 1012 mulq %rbx 1013 addq %rax,%r10 1014 movq -16(%rcx),%rax 1015 adcq $0,%rdx 1016 addq 16(%r14),%r10 1017 leaq 32(%r14),%r14 1018 adcq $0,%rdx 1019 movq %rdx,%r11 1020 1021 mulq %rbp 1022 addq %rax,%r13 1023 movq -8(%rsi),%rax 1024 adcq $0,%rdx 1025 addq %r10,%r13 1026 adcq $0,%rdx 1027 movq %rdi,-32(%r14) 1028 movq %rdx,%rdi 1029 1030 mulq %rbx 1031 addq %rax,%r11 1032 movq %rbp,%rax 1033 movq -8(%rcx),%rbp 1034 adcq $0,%rdx 1035 addq -8(%r14),%r11 1036 adcq $0,%rdx 1037 movq %rdx,%r10 1038 1039 mulq %rbp 1040 addq %rax,%rdi 1041 movq (%rsi,%r9,1),%rax 1042 adcq $0,%rdx 1043 addq %r11,%rdi 1044 adcq $0,%rdx 1045 movq %r13,-24(%r14) 1046 movq %rdx,%r13 1047 1048 movq %rdi,-16(%r14) 1049 leaq (%rcx,%r9,1),%rcx 1050 1051 xorq %rdi,%rdi 1052 addq %r10,%r13 1053 adcq $0,%rdi 1054 addq (%r14),%r13 1055 adcq $0,%rdi 1056 movq %r13,-8(%r14) 1057 1058 cmpq 16+8(%rsp),%r12 1059 jb .Louter4x 1060 xorq %rax,%rax 1061 subq %r13,%rbp 1062 adcq %r15,%r15 1063 orq %r15,%rdi 1064 subq %rdi,%rax 1065 leaq (%r14,%r9,1),%rbx 1066 movq (%rcx),%r12 1067 leaq (%rcx),%rbp 1068 movq %r9,%rcx 1069 sarq $3+2,%rcx 1070 movq 56+8(%rsp),%rdi 1071 decq %r12 1072 xorq %r10,%r10 1073 movq 8(%rbp),%r13 1074 movq 16(%rbp),%r14 1075 movq 24(%rbp),%r15 1076 jmp .Lsqr4x_sub_entry 1077.cfi_endproc 1078.size mul4x_internal,.-mul4x_internal 1079.globl bn_power5 1080.type bn_power5,@function 1081.align 32 1082bn_power5: 1083.cfi_startproc 1084 movq %rsp,%rax 1085.cfi_def_cfa_register %rax 1086 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1087 andl $0x80108,%r11d 1088 cmpl $0x80108,%r11d 1089 je .Lpowerx5_enter 1090 pushq %rbx 1091.cfi_offset %rbx,-16 1092 pushq %rbp 1093.cfi_offset %rbp,-24 1094 pushq %r12 1095.cfi_offset %r12,-32 1096 pushq %r13 1097.cfi_offset %r13,-40 1098 pushq %r14 1099.cfi_offset %r14,-48 1100 pushq %r15 1101.cfi_offset %r15,-56 1102.Lpower5_prologue: 1103 1104 shll $3,%r9d 1105 leal (%r9,%r9,2),%r10d 1106 negq %r9 1107 movq (%r8),%r8 1108 1109 1110 1111 1112 1113 1114 1115 1116 leaq -320(%rsp,%r9,2),%r11 1117 movq %rsp,%rbp 1118 subq %rdi,%r11 1119 andq $4095,%r11 1120 cmpq %r11,%r10 1121 jb .Lpwr_sp_alt 1122 subq %r11,%rbp 1123 leaq -320(%rbp,%r9,2),%rbp 1124 jmp .Lpwr_sp_done 1125 1126.align 32 1127.Lpwr_sp_alt: 1128 leaq 4096-320(,%r9,2),%r10 1129 leaq -320(%rbp,%r9,2),%rbp 1130 subq %r10,%r11 1131 movq $0,%r10 1132 cmovcq %r10,%r11 1133 subq %r11,%rbp 1134.Lpwr_sp_done: 1135 andq $-64,%rbp 1136 movq %rsp,%r11 1137 subq %rbp,%r11 1138 andq $-4096,%r11 1139 leaq (%r11,%rbp,1),%rsp 1140 movq (%rsp),%r10 1141 cmpq %rbp,%rsp 1142 ja .Lpwr_page_walk 1143 jmp .Lpwr_page_walk_done 1144 1145.Lpwr_page_walk: 1146 leaq -4096(%rsp),%rsp 1147 movq (%rsp),%r10 1148 cmpq %rbp,%rsp 1149 ja .Lpwr_page_walk 1150.Lpwr_page_walk_done: 1151 1152 movq %r9,%r10 1153 negq %r9 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 movq %r8,32(%rsp) 1165 movq %rax,40(%rsp) 1166.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1167.Lpower5_body: 1168.byte 102,72,15,110,207 1169.byte 102,72,15,110,209 1170.byte 102,73,15,110,218 1171.byte 102,72,15,110,226 1172 1173 call __bn_sqr8x_internal 1174 call __bn_post4x_internal 1175 call __bn_sqr8x_internal 1176 call __bn_post4x_internal 1177 call __bn_sqr8x_internal 1178 call __bn_post4x_internal 1179 call __bn_sqr8x_internal 1180 call __bn_post4x_internal 1181 call __bn_sqr8x_internal 1182 call __bn_post4x_internal 1183 1184.byte 102,72,15,126,209 1185.byte 102,72,15,126,226 1186 movq %rsi,%rdi 1187 movq 40(%rsp),%rax 1188 leaq 32(%rsp),%r8 1189 1190 call mul4x_internal 1191 1192 movq 40(%rsp),%rsi 1193.cfi_def_cfa %rsi,8 1194 movq $1,%rax 1195 movq -48(%rsi),%r15 1196.cfi_restore %r15 1197 movq -40(%rsi),%r14 1198.cfi_restore %r14 1199 movq -32(%rsi),%r13 1200.cfi_restore %r13 1201 movq -24(%rsi),%r12 1202.cfi_restore %r12 1203 movq -16(%rsi),%rbp 1204.cfi_restore %rbp 1205 movq -8(%rsi),%rbx 1206.cfi_restore %rbx 1207 leaq (%rsi),%rsp 1208.cfi_def_cfa_register %rsp 1209.Lpower5_epilogue: 1210 .byte 0xf3,0xc3 1211.cfi_endproc 1212.size bn_power5,.-bn_power5 1213 1214.globl bn_sqr8x_internal 1215.hidden bn_sqr8x_internal 1216.type bn_sqr8x_internal,@function 1217.align 32 1218bn_sqr8x_internal: 1219__bn_sqr8x_internal: 1220.cfi_startproc 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 leaq 32(%r10),%rbp 1295 leaq (%rsi,%r9,1),%rsi 1296 1297 movq %r9,%rcx 1298 1299 1300 movq -32(%rsi,%rbp,1),%r14 1301 leaq 48+8(%rsp,%r9,2),%rdi 1302 movq -24(%rsi,%rbp,1),%rax 1303 leaq -32(%rdi,%rbp,1),%rdi 1304 movq -16(%rsi,%rbp,1),%rbx 1305 movq %rax,%r15 1306 1307 mulq %r14 1308 movq %rax,%r10 1309 movq %rbx,%rax 1310 movq %rdx,%r11 1311 movq %r10,-24(%rdi,%rbp,1) 1312 1313 mulq %r14 1314 addq %rax,%r11 1315 movq %rbx,%rax 1316 adcq $0,%rdx 1317 movq %r11,-16(%rdi,%rbp,1) 1318 movq %rdx,%r10 1319 1320 1321 movq -8(%rsi,%rbp,1),%rbx 1322 mulq %r15 1323 movq %rax,%r12 1324 movq %rbx,%rax 1325 movq %rdx,%r13 1326 1327 leaq (%rbp),%rcx 1328 mulq %r14 1329 addq %rax,%r10 1330 movq %rbx,%rax 1331 movq %rdx,%r11 1332 adcq $0,%r11 1333 addq %r12,%r10 1334 adcq $0,%r11 1335 movq %r10,-8(%rdi,%rcx,1) 1336 jmp .Lsqr4x_1st 1337 1338.align 32 1339.Lsqr4x_1st: 1340 movq (%rsi,%rcx,1),%rbx 1341 mulq %r15 1342 addq %rax,%r13 1343 movq %rbx,%rax 1344 movq %rdx,%r12 1345 adcq $0,%r12 1346 1347 mulq %r14 1348 addq %rax,%r11 1349 movq %rbx,%rax 1350 movq 8(%rsi,%rcx,1),%rbx 1351 movq %rdx,%r10 1352 adcq $0,%r10 1353 addq %r13,%r11 1354 adcq $0,%r10 1355 1356 1357 mulq %r15 1358 addq %rax,%r12 1359 movq %rbx,%rax 1360 movq %r11,(%rdi,%rcx,1) 1361 movq %rdx,%r13 1362 adcq $0,%r13 1363 1364 mulq %r14 1365 addq %rax,%r10 1366 movq %rbx,%rax 1367 movq 16(%rsi,%rcx,1),%rbx 1368 movq %rdx,%r11 1369 adcq $0,%r11 1370 addq %r12,%r10 1371 adcq $0,%r11 1372 1373 mulq %r15 1374 addq %rax,%r13 1375 movq %rbx,%rax 1376 movq %r10,8(%rdi,%rcx,1) 1377 movq %rdx,%r12 1378 adcq $0,%r12 1379 1380 mulq %r14 1381 addq %rax,%r11 1382 movq %rbx,%rax 1383 movq 24(%rsi,%rcx,1),%rbx 1384 movq %rdx,%r10 1385 adcq $0,%r10 1386 addq %r13,%r11 1387 adcq $0,%r10 1388 1389 1390 mulq %r15 1391 addq %rax,%r12 1392 movq %rbx,%rax 1393 movq %r11,16(%rdi,%rcx,1) 1394 movq %rdx,%r13 1395 adcq $0,%r13 1396 leaq 32(%rcx),%rcx 1397 1398 mulq %r14 1399 addq %rax,%r10 1400 movq %rbx,%rax 1401 movq %rdx,%r11 1402 adcq $0,%r11 1403 addq %r12,%r10 1404 adcq $0,%r11 1405 movq %r10,-8(%rdi,%rcx,1) 1406 1407 cmpq $0,%rcx 1408 jne .Lsqr4x_1st 1409 1410 mulq %r15 1411 addq %rax,%r13 1412 leaq 16(%rbp),%rbp 1413 adcq $0,%rdx 1414 addq %r11,%r13 1415 adcq $0,%rdx 1416 1417 movq %r13,(%rdi) 1418 movq %rdx,%r12 1419 movq %rdx,8(%rdi) 1420 jmp .Lsqr4x_outer 1421 1422.align 32 1423.Lsqr4x_outer: 1424 movq -32(%rsi,%rbp,1),%r14 1425 leaq 48+8(%rsp,%r9,2),%rdi 1426 movq -24(%rsi,%rbp,1),%rax 1427 leaq -32(%rdi,%rbp,1),%rdi 1428 movq -16(%rsi,%rbp,1),%rbx 1429 movq %rax,%r15 1430 1431 mulq %r14 1432 movq -24(%rdi,%rbp,1),%r10 1433 addq %rax,%r10 1434 movq %rbx,%rax 1435 adcq $0,%rdx 1436 movq %r10,-24(%rdi,%rbp,1) 1437 movq %rdx,%r11 1438 1439 mulq %r14 1440 addq %rax,%r11 1441 movq %rbx,%rax 1442 adcq $0,%rdx 1443 addq -16(%rdi,%rbp,1),%r11 1444 movq %rdx,%r10 1445 adcq $0,%r10 1446 movq %r11,-16(%rdi,%rbp,1) 1447 1448 xorq %r12,%r12 1449 1450 movq -8(%rsi,%rbp,1),%rbx 1451 mulq %r15 1452 addq %rax,%r12 1453 movq %rbx,%rax 1454 adcq $0,%rdx 1455 addq -8(%rdi,%rbp,1),%r12 1456 movq %rdx,%r13 1457 adcq $0,%r13 1458 1459 mulq %r14 1460 addq %rax,%r10 1461 movq %rbx,%rax 1462 adcq $0,%rdx 1463 addq %r12,%r10 1464 movq %rdx,%r11 1465 adcq $0,%r11 1466 movq %r10,-8(%rdi,%rbp,1) 1467 1468 leaq (%rbp),%rcx 1469 jmp .Lsqr4x_inner 1470 1471.align 32 1472.Lsqr4x_inner: 1473 movq (%rsi,%rcx,1),%rbx 1474 mulq %r15 1475 addq %rax,%r13 1476 movq %rbx,%rax 1477 movq %rdx,%r12 1478 adcq $0,%r12 1479 addq (%rdi,%rcx,1),%r13 1480 adcq $0,%r12 1481 1482.byte 0x67 1483 mulq %r14 1484 addq %rax,%r11 1485 movq %rbx,%rax 1486 movq 8(%rsi,%rcx,1),%rbx 1487 movq %rdx,%r10 1488 adcq $0,%r10 1489 addq %r13,%r11 1490 adcq $0,%r10 1491 1492 mulq %r15 1493 addq %rax,%r12 1494 movq %r11,(%rdi,%rcx,1) 1495 movq %rbx,%rax 1496 movq %rdx,%r13 1497 adcq $0,%r13 1498 addq 8(%rdi,%rcx,1),%r12 1499 leaq 16(%rcx),%rcx 1500 adcq $0,%r13 1501 1502 mulq %r14 1503 addq %rax,%r10 1504 movq %rbx,%rax 1505 adcq $0,%rdx 1506 addq %r12,%r10 1507 movq %rdx,%r11 1508 adcq $0,%r11 1509 movq %r10,-8(%rdi,%rcx,1) 1510 1511 cmpq $0,%rcx 1512 jne .Lsqr4x_inner 1513 1514.byte 0x67 1515 mulq %r15 1516 addq %rax,%r13 1517 adcq $0,%rdx 1518 addq %r11,%r13 1519 adcq $0,%rdx 1520 1521 movq %r13,(%rdi) 1522 movq %rdx,%r12 1523 movq %rdx,8(%rdi) 1524 1525 addq $16,%rbp 1526 jnz .Lsqr4x_outer 1527 1528 1529 movq -32(%rsi),%r14 1530 leaq 48+8(%rsp,%r9,2),%rdi 1531 movq -24(%rsi),%rax 1532 leaq -32(%rdi,%rbp,1),%rdi 1533 movq -16(%rsi),%rbx 1534 movq %rax,%r15 1535 1536 mulq %r14 1537 addq %rax,%r10 1538 movq %rbx,%rax 1539 movq %rdx,%r11 1540 adcq $0,%r11 1541 1542 mulq %r14 1543 addq %rax,%r11 1544 movq %rbx,%rax 1545 movq %r10,-24(%rdi) 1546 movq %rdx,%r10 1547 adcq $0,%r10 1548 addq %r13,%r11 1549 movq -8(%rsi),%rbx 1550 adcq $0,%r10 1551 1552 mulq %r15 1553 addq %rax,%r12 1554 movq %rbx,%rax 1555 movq %r11,-16(%rdi) 1556 movq %rdx,%r13 1557 adcq $0,%r13 1558 1559 mulq %r14 1560 addq %rax,%r10 1561 movq %rbx,%rax 1562 movq %rdx,%r11 1563 adcq $0,%r11 1564 addq %r12,%r10 1565 adcq $0,%r11 1566 movq %r10,-8(%rdi) 1567 1568 mulq %r15 1569 addq %rax,%r13 1570 movq -16(%rsi),%rax 1571 adcq $0,%rdx 1572 addq %r11,%r13 1573 adcq $0,%rdx 1574 1575 movq %r13,(%rdi) 1576 movq %rdx,%r12 1577 movq %rdx,8(%rdi) 1578 1579 mulq %rbx 1580 addq $16,%rbp 1581 xorq %r14,%r14 1582 subq %r9,%rbp 1583 xorq %r15,%r15 1584 1585 addq %r12,%rax 1586 adcq $0,%rdx 1587 movq %rax,8(%rdi) 1588 movq %rdx,16(%rdi) 1589 movq %r15,24(%rdi) 1590 1591 movq -16(%rsi,%rbp,1),%rax 1592 leaq 48+8(%rsp),%rdi 1593 xorq %r10,%r10 1594 movq 8(%rdi),%r11 1595 1596 leaq (%r14,%r10,2),%r12 1597 shrq $63,%r10 1598 leaq (%rcx,%r11,2),%r13 1599 shrq $63,%r11 1600 orq %r10,%r13 1601 movq 16(%rdi),%r10 1602 movq %r11,%r14 1603 mulq %rax 1604 negq %r15 1605 movq 24(%rdi),%r11 1606 adcq %rax,%r12 1607 movq -8(%rsi,%rbp,1),%rax 1608 movq %r12,(%rdi) 1609 adcq %rdx,%r13 1610 1611 leaq (%r14,%r10,2),%rbx 1612 movq %r13,8(%rdi) 1613 sbbq %r15,%r15 1614 shrq $63,%r10 1615 leaq (%rcx,%r11,2),%r8 1616 shrq $63,%r11 1617 orq %r10,%r8 1618 movq 32(%rdi),%r10 1619 movq %r11,%r14 1620 mulq %rax 1621 negq %r15 1622 movq 40(%rdi),%r11 1623 adcq %rax,%rbx 1624 movq 0(%rsi,%rbp,1),%rax 1625 movq %rbx,16(%rdi) 1626 adcq %rdx,%r8 1627 leaq 16(%rbp),%rbp 1628 movq %r8,24(%rdi) 1629 sbbq %r15,%r15 1630 leaq 64(%rdi),%rdi 1631 jmp .Lsqr4x_shift_n_add 1632 1633.align 32 1634.Lsqr4x_shift_n_add: 1635 leaq (%r14,%r10,2),%r12 1636 shrq $63,%r10 1637 leaq (%rcx,%r11,2),%r13 1638 shrq $63,%r11 1639 orq %r10,%r13 1640 movq -16(%rdi),%r10 1641 movq %r11,%r14 1642 mulq %rax 1643 negq %r15 1644 movq -8(%rdi),%r11 1645 adcq %rax,%r12 1646 movq -8(%rsi,%rbp,1),%rax 1647 movq %r12,-32(%rdi) 1648 adcq %rdx,%r13 1649 1650 leaq (%r14,%r10,2),%rbx 1651 movq %r13,-24(%rdi) 1652 sbbq %r15,%r15 1653 shrq $63,%r10 1654 leaq (%rcx,%r11,2),%r8 1655 shrq $63,%r11 1656 orq %r10,%r8 1657 movq 0(%rdi),%r10 1658 movq %r11,%r14 1659 mulq %rax 1660 negq %r15 1661 movq 8(%rdi),%r11 1662 adcq %rax,%rbx 1663 movq 0(%rsi,%rbp,1),%rax 1664 movq %rbx,-16(%rdi) 1665 adcq %rdx,%r8 1666 1667 leaq (%r14,%r10,2),%r12 1668 movq %r8,-8(%rdi) 1669 sbbq %r15,%r15 1670 shrq $63,%r10 1671 leaq (%rcx,%r11,2),%r13 1672 shrq $63,%r11 1673 orq %r10,%r13 1674 movq 16(%rdi),%r10 1675 movq %r11,%r14 1676 mulq %rax 1677 negq %r15 1678 movq 24(%rdi),%r11 1679 adcq %rax,%r12 1680 movq 8(%rsi,%rbp,1),%rax 1681 movq %r12,0(%rdi) 1682 adcq %rdx,%r13 1683 1684 leaq (%r14,%r10,2),%rbx 1685 movq %r13,8(%rdi) 1686 sbbq %r15,%r15 1687 shrq $63,%r10 1688 leaq (%rcx,%r11,2),%r8 1689 shrq $63,%r11 1690 orq %r10,%r8 1691 movq 32(%rdi),%r10 1692 movq %r11,%r14 1693 mulq %rax 1694 negq %r15 1695 movq 40(%rdi),%r11 1696 adcq %rax,%rbx 1697 movq 16(%rsi,%rbp,1),%rax 1698 movq %rbx,16(%rdi) 1699 adcq %rdx,%r8 1700 movq %r8,24(%rdi) 1701 sbbq %r15,%r15 1702 leaq 64(%rdi),%rdi 1703 addq $32,%rbp 1704 jnz .Lsqr4x_shift_n_add 1705 1706 leaq (%r14,%r10,2),%r12 1707.byte 0x67 1708 shrq $63,%r10 1709 leaq (%rcx,%r11,2),%r13 1710 shrq $63,%r11 1711 orq %r10,%r13 1712 movq -16(%rdi),%r10 1713 movq %r11,%r14 1714 mulq %rax 1715 negq %r15 1716 movq -8(%rdi),%r11 1717 adcq %rax,%r12 1718 movq -8(%rsi),%rax 1719 movq %r12,-32(%rdi) 1720 adcq %rdx,%r13 1721 1722 leaq (%r14,%r10,2),%rbx 1723 movq %r13,-24(%rdi) 1724 sbbq %r15,%r15 1725 shrq $63,%r10 1726 leaq (%rcx,%r11,2),%r8 1727 shrq $63,%r11 1728 orq %r10,%r8 1729 mulq %rax 1730 negq %r15 1731 adcq %rax,%rbx 1732 adcq %rdx,%r8 1733 movq %rbx,-16(%rdi) 1734 movq %r8,-8(%rdi) 1735.byte 102,72,15,126,213 1736__bn_sqr8x_reduction: 1737 xorq %rax,%rax 1738 leaq (%r9,%rbp,1),%rcx 1739 leaq 48+8(%rsp,%r9,2),%rdx 1740 movq %rcx,0+8(%rsp) 1741 leaq 48+8(%rsp,%r9,1),%rdi 1742 movq %rdx,8+8(%rsp) 1743 negq %r9 1744 jmp .L8x_reduction_loop 1745 1746.align 32 1747.L8x_reduction_loop: 1748 leaq (%rdi,%r9,1),%rdi 1749.byte 0x66 1750 movq 0(%rdi),%rbx 1751 movq 8(%rdi),%r9 1752 movq 16(%rdi),%r10 1753 movq 24(%rdi),%r11 1754 movq 32(%rdi),%r12 1755 movq 40(%rdi),%r13 1756 movq 48(%rdi),%r14 1757 movq 56(%rdi),%r15 1758 movq %rax,(%rdx) 1759 leaq 64(%rdi),%rdi 1760 1761.byte 0x67 1762 movq %rbx,%r8 1763 imulq 32+8(%rsp),%rbx 1764 movq 0(%rbp),%rax 1765 movl $8,%ecx 1766 jmp .L8x_reduce 1767 1768.align 32 1769.L8x_reduce: 1770 mulq %rbx 1771 movq 8(%rbp),%rax 1772 negq %r8 1773 movq %rdx,%r8 1774 adcq $0,%r8 1775 1776 mulq %rbx 1777 addq %rax,%r9 1778 movq 16(%rbp),%rax 1779 adcq $0,%rdx 1780 addq %r9,%r8 1781 movq %rbx,48-8+8(%rsp,%rcx,8) 1782 movq %rdx,%r9 1783 adcq $0,%r9 1784 1785 mulq %rbx 1786 addq %rax,%r10 1787 movq 24(%rbp),%rax 1788 adcq $0,%rdx 1789 addq %r10,%r9 1790 movq 32+8(%rsp),%rsi 1791 movq %rdx,%r10 1792 adcq $0,%r10 1793 1794 mulq %rbx 1795 addq %rax,%r11 1796 movq 32(%rbp),%rax 1797 adcq $0,%rdx 1798 imulq %r8,%rsi 1799 addq %r11,%r10 1800 movq %rdx,%r11 1801 adcq $0,%r11 1802 1803 mulq %rbx 1804 addq %rax,%r12 1805 movq 40(%rbp),%rax 1806 adcq $0,%rdx 1807 addq %r12,%r11 1808 movq %rdx,%r12 1809 adcq $0,%r12 1810 1811 mulq %rbx 1812 addq %rax,%r13 1813 movq 48(%rbp),%rax 1814 adcq $0,%rdx 1815 addq %r13,%r12 1816 movq %rdx,%r13 1817 adcq $0,%r13 1818 1819 mulq %rbx 1820 addq %rax,%r14 1821 movq 56(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r14,%r13 1824 movq %rdx,%r14 1825 adcq $0,%r14 1826 1827 mulq %rbx 1828 movq %rsi,%rbx 1829 addq %rax,%r15 1830 movq 0(%rbp),%rax 1831 adcq $0,%rdx 1832 addq %r15,%r14 1833 movq %rdx,%r15 1834 adcq $0,%r15 1835 1836 decl %ecx 1837 jnz .L8x_reduce 1838 1839 leaq 64(%rbp),%rbp 1840 xorq %rax,%rax 1841 movq 8+8(%rsp),%rdx 1842 cmpq 0+8(%rsp),%rbp 1843 jae .L8x_no_tail 1844 1845.byte 0x66 1846 addq 0(%rdi),%r8 1847 adcq 8(%rdi),%r9 1848 adcq 16(%rdi),%r10 1849 adcq 24(%rdi),%r11 1850 adcq 32(%rdi),%r12 1851 adcq 40(%rdi),%r13 1852 adcq 48(%rdi),%r14 1853 adcq 56(%rdi),%r15 1854 sbbq %rsi,%rsi 1855 1856 movq 48+56+8(%rsp),%rbx 1857 movl $8,%ecx 1858 movq 0(%rbp),%rax 1859 jmp .L8x_tail 1860 1861.align 32 1862.L8x_tail: 1863 mulq %rbx 1864 addq %rax,%r8 1865 movq 8(%rbp),%rax 1866 movq %r8,(%rdi) 1867 movq %rdx,%r8 1868 adcq $0,%r8 1869 1870 mulq %rbx 1871 addq %rax,%r9 1872 movq 16(%rbp),%rax 1873 adcq $0,%rdx 1874 addq %r9,%r8 1875 leaq 8(%rdi),%rdi 1876 movq %rdx,%r9 1877 adcq $0,%r9 1878 1879 mulq %rbx 1880 addq %rax,%r10 1881 movq 24(%rbp),%rax 1882 adcq $0,%rdx 1883 addq %r10,%r9 1884 movq %rdx,%r10 1885 adcq $0,%r10 1886 1887 mulq %rbx 1888 addq %rax,%r11 1889 movq 32(%rbp),%rax 1890 adcq $0,%rdx 1891 addq %r11,%r10 1892 movq %rdx,%r11 1893 adcq $0,%r11 1894 1895 mulq %rbx 1896 addq %rax,%r12 1897 movq 40(%rbp),%rax 1898 adcq $0,%rdx 1899 addq %r12,%r11 1900 movq %rdx,%r12 1901 adcq $0,%r12 1902 1903 mulq %rbx 1904 addq %rax,%r13 1905 movq 48(%rbp),%rax 1906 adcq $0,%rdx 1907 addq %r13,%r12 1908 movq %rdx,%r13 1909 adcq $0,%r13 1910 1911 mulq %rbx 1912 addq %rax,%r14 1913 movq 56(%rbp),%rax 1914 adcq $0,%rdx 1915 addq %r14,%r13 1916 movq %rdx,%r14 1917 adcq $0,%r14 1918 1919 mulq %rbx 1920 movq 48-16+8(%rsp,%rcx,8),%rbx 1921 addq %rax,%r15 1922 adcq $0,%rdx 1923 addq %r15,%r14 1924 movq 0(%rbp),%rax 1925 movq %rdx,%r15 1926 adcq $0,%r15 1927 1928 decl %ecx 1929 jnz .L8x_tail 1930 1931 leaq 64(%rbp),%rbp 1932 movq 8+8(%rsp),%rdx 1933 cmpq 0+8(%rsp),%rbp 1934 jae .L8x_tail_done 1935 1936 movq 48+56+8(%rsp),%rbx 1937 negq %rsi 1938 movq 0(%rbp),%rax 1939 adcq 0(%rdi),%r8 1940 adcq 8(%rdi),%r9 1941 adcq 16(%rdi),%r10 1942 adcq 24(%rdi),%r11 1943 adcq 32(%rdi),%r12 1944 adcq 40(%rdi),%r13 1945 adcq 48(%rdi),%r14 1946 adcq 56(%rdi),%r15 1947 sbbq %rsi,%rsi 1948 1949 movl $8,%ecx 1950 jmp .L8x_tail 1951 1952.align 32 1953.L8x_tail_done: 1954 xorq %rax,%rax 1955 addq (%rdx),%r8 1956 adcq $0,%r9 1957 adcq $0,%r10 1958 adcq $0,%r11 1959 adcq $0,%r12 1960 adcq $0,%r13 1961 adcq $0,%r14 1962 adcq $0,%r15 1963 adcq $0,%rax 1964 1965 negq %rsi 1966.L8x_no_tail: 1967 adcq 0(%rdi),%r8 1968 adcq 8(%rdi),%r9 1969 adcq 16(%rdi),%r10 1970 adcq 24(%rdi),%r11 1971 adcq 32(%rdi),%r12 1972 adcq 40(%rdi),%r13 1973 adcq 48(%rdi),%r14 1974 adcq 56(%rdi),%r15 1975 adcq $0,%rax 1976 movq -8(%rbp),%rcx 1977 xorq %rsi,%rsi 1978 1979.byte 102,72,15,126,213 1980 1981 movq %r8,0(%rdi) 1982 movq %r9,8(%rdi) 1983.byte 102,73,15,126,217 1984 movq %r10,16(%rdi) 1985 movq %r11,24(%rdi) 1986 movq %r12,32(%rdi) 1987 movq %r13,40(%rdi) 1988 movq %r14,48(%rdi) 1989 movq %r15,56(%rdi) 1990 leaq 64(%rdi),%rdi 1991 1992 cmpq %rdx,%rdi 1993 jb .L8x_reduction_loop 1994 .byte 0xf3,0xc3 1995.cfi_endproc 1996.size bn_sqr8x_internal,.-bn_sqr8x_internal 1997.type __bn_post4x_internal,@function 1998.align 32 1999__bn_post4x_internal: 2000.cfi_startproc 2001 movq 0(%rbp),%r12 2002 leaq (%rdi,%r9,1),%rbx 2003 movq %r9,%rcx 2004.byte 102,72,15,126,207 2005 negq %rax 2006.byte 102,72,15,126,206 2007 sarq $3+2,%rcx 2008 decq %r12 2009 xorq %r10,%r10 2010 movq 8(%rbp),%r13 2011 movq 16(%rbp),%r14 2012 movq 24(%rbp),%r15 2013 jmp .Lsqr4x_sub_entry 2014 2015.align 16 2016.Lsqr4x_sub: 2017 movq 0(%rbp),%r12 2018 movq 8(%rbp),%r13 2019 movq 16(%rbp),%r14 2020 movq 24(%rbp),%r15 2021.Lsqr4x_sub_entry: 2022 leaq 32(%rbp),%rbp 2023 notq %r12 2024 notq %r13 2025 notq %r14 2026 notq %r15 2027 andq %rax,%r12 2028 andq %rax,%r13 2029 andq %rax,%r14 2030 andq %rax,%r15 2031 2032 negq %r10 2033 adcq 0(%rbx),%r12 2034 adcq 8(%rbx),%r13 2035 adcq 16(%rbx),%r14 2036 adcq 24(%rbx),%r15 2037 movq %r12,0(%rdi) 2038 leaq 32(%rbx),%rbx 2039 movq %r13,8(%rdi) 2040 sbbq %r10,%r10 2041 movq %r14,16(%rdi) 2042 movq %r15,24(%rdi) 2043 leaq 32(%rdi),%rdi 2044 2045 incq %rcx 2046 jnz .Lsqr4x_sub 2047 2048 movq %r9,%r10 2049 negq %r9 2050 .byte 0xf3,0xc3 2051.cfi_endproc 2052.size __bn_post4x_internal,.-__bn_post4x_internal 2053.type bn_mulx4x_mont_gather5,@function 2054.align 32 2055bn_mulx4x_mont_gather5: 2056.cfi_startproc 2057 movq %rsp,%rax 2058.cfi_def_cfa_register %rax 2059.Lmulx4x_enter: 2060 pushq %rbx 2061.cfi_offset %rbx,-16 2062 pushq %rbp 2063.cfi_offset %rbp,-24 2064 pushq %r12 2065.cfi_offset %r12,-32 2066 pushq %r13 2067.cfi_offset %r13,-40 2068 pushq %r14 2069.cfi_offset %r14,-48 2070 pushq %r15 2071.cfi_offset %r15,-56 2072.Lmulx4x_prologue: 2073 2074 shll $3,%r9d 2075 leaq (%r9,%r9,2),%r10 2076 negq %r9 2077 movq (%r8),%r8 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 leaq -320(%rsp,%r9,2),%r11 2089 movq %rsp,%rbp 2090 subq %rdi,%r11 2091 andq $4095,%r11 2092 cmpq %r11,%r10 2093 jb .Lmulx4xsp_alt 2094 subq %r11,%rbp 2095 leaq -320(%rbp,%r9,2),%rbp 2096 jmp .Lmulx4xsp_done 2097 2098.Lmulx4xsp_alt: 2099 leaq 4096-320(,%r9,2),%r10 2100 leaq -320(%rbp,%r9,2),%rbp 2101 subq %r10,%r11 2102 movq $0,%r10 2103 cmovcq %r10,%r11 2104 subq %r11,%rbp 2105.Lmulx4xsp_done: 2106 andq $-64,%rbp 2107 movq %rsp,%r11 2108 subq %rbp,%r11 2109 andq $-4096,%r11 2110 leaq (%r11,%rbp,1),%rsp 2111 movq (%rsp),%r10 2112 cmpq %rbp,%rsp 2113 ja .Lmulx4x_page_walk 2114 jmp .Lmulx4x_page_walk_done 2115 2116.Lmulx4x_page_walk: 2117 leaq -4096(%rsp),%rsp 2118 movq (%rsp),%r10 2119 cmpq %rbp,%rsp 2120 ja .Lmulx4x_page_walk 2121.Lmulx4x_page_walk_done: 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 movq %r8,32(%rsp) 2136 movq %rax,40(%rsp) 2137.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2138.Lmulx4x_body: 2139 call mulx4x_internal 2140 2141 movq 40(%rsp),%rsi 2142.cfi_def_cfa %rsi,8 2143 movq $1,%rax 2144 2145 movq -48(%rsi),%r15 2146.cfi_restore %r15 2147 movq -40(%rsi),%r14 2148.cfi_restore %r14 2149 movq -32(%rsi),%r13 2150.cfi_restore %r13 2151 movq -24(%rsi),%r12 2152.cfi_restore %r12 2153 movq -16(%rsi),%rbp 2154.cfi_restore %rbp 2155 movq -8(%rsi),%rbx 2156.cfi_restore %rbx 2157 leaq (%rsi),%rsp 2158.cfi_def_cfa_register %rsp 2159.Lmulx4x_epilogue: 2160 .byte 0xf3,0xc3 2161.cfi_endproc 2162.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2163 2164.type mulx4x_internal,@function 2165.align 32 2166mulx4x_internal: 2167.cfi_startproc 2168 movq %r9,8(%rsp) 2169 movq %r9,%r10 2170 negq %r9 2171 shlq $5,%r9 2172 negq %r10 2173 leaq 128(%rdx,%r9,1),%r13 2174 shrq $5+5,%r9 2175 movd 8(%rax),%xmm5 2176 subq $1,%r9 2177 leaq .Linc(%rip),%rax 2178 movq %r13,16+8(%rsp) 2179 movq %r9,24+8(%rsp) 2180 movq %rdi,56+8(%rsp) 2181 movdqa 0(%rax),%xmm0 2182 movdqa 16(%rax),%xmm1 2183 leaq 88-112(%rsp,%r10,1),%r10 2184 leaq 128(%rdx),%rdi 2185 2186 pshufd $0,%xmm5,%xmm5 2187 movdqa %xmm1,%xmm4 2188.byte 0x67 2189 movdqa %xmm1,%xmm2 2190.byte 0x67 2191 paddd %xmm0,%xmm1 2192 pcmpeqd %xmm5,%xmm0 2193 movdqa %xmm4,%xmm3 2194 paddd %xmm1,%xmm2 2195 pcmpeqd %xmm5,%xmm1 2196 movdqa %xmm0,112(%r10) 2197 movdqa %xmm4,%xmm0 2198 2199 paddd %xmm2,%xmm3 2200 pcmpeqd %xmm5,%xmm2 2201 movdqa %xmm1,128(%r10) 2202 movdqa %xmm4,%xmm1 2203 2204 paddd %xmm3,%xmm0 2205 pcmpeqd %xmm5,%xmm3 2206 movdqa %xmm2,144(%r10) 2207 movdqa %xmm4,%xmm2 2208 2209 paddd %xmm0,%xmm1 2210 pcmpeqd %xmm5,%xmm0 2211 movdqa %xmm3,160(%r10) 2212 movdqa %xmm4,%xmm3 2213 paddd %xmm1,%xmm2 2214 pcmpeqd %xmm5,%xmm1 2215 movdqa %xmm0,176(%r10) 2216 movdqa %xmm4,%xmm0 2217 2218 paddd %xmm2,%xmm3 2219 pcmpeqd %xmm5,%xmm2 2220 movdqa %xmm1,192(%r10) 2221 movdqa %xmm4,%xmm1 2222 2223 paddd %xmm3,%xmm0 2224 pcmpeqd %xmm5,%xmm3 2225 movdqa %xmm2,208(%r10) 2226 movdqa %xmm4,%xmm2 2227 2228 paddd %xmm0,%xmm1 2229 pcmpeqd %xmm5,%xmm0 2230 movdqa %xmm3,224(%r10) 2231 movdqa %xmm4,%xmm3 2232 paddd %xmm1,%xmm2 2233 pcmpeqd %xmm5,%xmm1 2234 movdqa %xmm0,240(%r10) 2235 movdqa %xmm4,%xmm0 2236 2237 paddd %xmm2,%xmm3 2238 pcmpeqd %xmm5,%xmm2 2239 movdqa %xmm1,256(%r10) 2240 movdqa %xmm4,%xmm1 2241 2242 paddd %xmm3,%xmm0 2243 pcmpeqd %xmm5,%xmm3 2244 movdqa %xmm2,272(%r10) 2245 movdqa %xmm4,%xmm2 2246 2247 paddd %xmm0,%xmm1 2248 pcmpeqd %xmm5,%xmm0 2249 movdqa %xmm3,288(%r10) 2250 movdqa %xmm4,%xmm3 2251.byte 0x67 2252 paddd %xmm1,%xmm2 2253 pcmpeqd %xmm5,%xmm1 2254 movdqa %xmm0,304(%r10) 2255 2256 paddd %xmm2,%xmm3 2257 pcmpeqd %xmm5,%xmm2 2258 movdqa %xmm1,320(%r10) 2259 2260 pcmpeqd %xmm5,%xmm3 2261 movdqa %xmm2,336(%r10) 2262 2263 pand 64(%rdi),%xmm0 2264 pand 80(%rdi),%xmm1 2265 pand 96(%rdi),%xmm2 2266 movdqa %xmm3,352(%r10) 2267 pand 112(%rdi),%xmm3 2268 por %xmm2,%xmm0 2269 por %xmm3,%xmm1 2270 movdqa -128(%rdi),%xmm4 2271 movdqa -112(%rdi),%xmm5 2272 movdqa -96(%rdi),%xmm2 2273 pand 112(%r10),%xmm4 2274 movdqa -80(%rdi),%xmm3 2275 pand 128(%r10),%xmm5 2276 por %xmm4,%xmm0 2277 pand 144(%r10),%xmm2 2278 por %xmm5,%xmm1 2279 pand 160(%r10),%xmm3 2280 por %xmm2,%xmm0 2281 por %xmm3,%xmm1 2282 movdqa -64(%rdi),%xmm4 2283 movdqa -48(%rdi),%xmm5 2284 movdqa -32(%rdi),%xmm2 2285 pand 176(%r10),%xmm4 2286 movdqa -16(%rdi),%xmm3 2287 pand 192(%r10),%xmm5 2288 por %xmm4,%xmm0 2289 pand 208(%r10),%xmm2 2290 por %xmm5,%xmm1 2291 pand 224(%r10),%xmm3 2292 por %xmm2,%xmm0 2293 por %xmm3,%xmm1 2294 movdqa 0(%rdi),%xmm4 2295 movdqa 16(%rdi),%xmm5 2296 movdqa 32(%rdi),%xmm2 2297 pand 240(%r10),%xmm4 2298 movdqa 48(%rdi),%xmm3 2299 pand 256(%r10),%xmm5 2300 por %xmm4,%xmm0 2301 pand 272(%r10),%xmm2 2302 por %xmm5,%xmm1 2303 pand 288(%r10),%xmm3 2304 por %xmm2,%xmm0 2305 por %xmm3,%xmm1 2306 pxor %xmm1,%xmm0 2307 pshufd $0x4e,%xmm0,%xmm1 2308 por %xmm1,%xmm0 2309 leaq 256(%rdi),%rdi 2310.byte 102,72,15,126,194 2311 leaq 64+32+8(%rsp),%rbx 2312 2313 movq %rdx,%r9 2314 mulxq 0(%rsi),%r8,%rax 2315 mulxq 8(%rsi),%r11,%r12 2316 addq %rax,%r11 2317 mulxq 16(%rsi),%rax,%r13 2318 adcq %rax,%r12 2319 adcq $0,%r13 2320 mulxq 24(%rsi),%rax,%r14 2321 2322 movq %r8,%r15 2323 imulq 32+8(%rsp),%r8 2324 xorq %rbp,%rbp 2325 movq %r8,%rdx 2326 2327 movq %rdi,8+8(%rsp) 2328 2329 leaq 32(%rsi),%rsi 2330 adcxq %rax,%r13 2331 adcxq %rbp,%r14 2332 2333 mulxq 0(%rcx),%rax,%r10 2334 adcxq %rax,%r15 2335 adoxq %r11,%r10 2336 mulxq 8(%rcx),%rax,%r11 2337 adcxq %rax,%r10 2338 adoxq %r12,%r11 2339 mulxq 16(%rcx),%rax,%r12 2340 movq 24+8(%rsp),%rdi 2341 movq %r10,-32(%rbx) 2342 adcxq %rax,%r11 2343 adoxq %r13,%r12 2344 mulxq 24(%rcx),%rax,%r15 2345 movq %r9,%rdx 2346 movq %r11,-24(%rbx) 2347 adcxq %rax,%r12 2348 adoxq %rbp,%r15 2349 leaq 32(%rcx),%rcx 2350 movq %r12,-16(%rbx) 2351 jmp .Lmulx4x_1st 2352 2353.align 32 2354.Lmulx4x_1st: 2355 adcxq %rbp,%r15 2356 mulxq 0(%rsi),%r10,%rax 2357 adcxq %r14,%r10 2358 mulxq 8(%rsi),%r11,%r14 2359 adcxq %rax,%r11 2360 mulxq 16(%rsi),%r12,%rax 2361 adcxq %r14,%r12 2362 mulxq 24(%rsi),%r13,%r14 2363.byte 0x67,0x67 2364 movq %r8,%rdx 2365 adcxq %rax,%r13 2366 adcxq %rbp,%r14 2367 leaq 32(%rsi),%rsi 2368 leaq 32(%rbx),%rbx 2369 2370 adoxq %r15,%r10 2371 mulxq 0(%rcx),%rax,%r15 2372 adcxq %rax,%r10 2373 adoxq %r15,%r11 2374 mulxq 8(%rcx),%rax,%r15 2375 adcxq %rax,%r11 2376 adoxq %r15,%r12 2377 mulxq 16(%rcx),%rax,%r15 2378 movq %r10,-40(%rbx) 2379 adcxq %rax,%r12 2380 movq %r11,-32(%rbx) 2381 adoxq %r15,%r13 2382 mulxq 24(%rcx),%rax,%r15 2383 movq %r9,%rdx 2384 movq %r12,-24(%rbx) 2385 adcxq %rax,%r13 2386 adoxq %rbp,%r15 2387 leaq 32(%rcx),%rcx 2388 movq %r13,-16(%rbx) 2389 2390 decq %rdi 2391 jnz .Lmulx4x_1st 2392 2393 movq 8(%rsp),%rax 2394 adcq %rbp,%r15 2395 leaq (%rsi,%rax,1),%rsi 2396 addq %r15,%r14 2397 movq 8+8(%rsp),%rdi 2398 adcq %rbp,%rbp 2399 movq %r14,-8(%rbx) 2400 jmp .Lmulx4x_outer 2401 2402.align 32 2403.Lmulx4x_outer: 2404 leaq 16-256(%rbx),%r10 2405 pxor %xmm4,%xmm4 2406.byte 0x67,0x67 2407 pxor %xmm5,%xmm5 2408 movdqa -128(%rdi),%xmm0 2409 movdqa -112(%rdi),%xmm1 2410 movdqa -96(%rdi),%xmm2 2411 pand 256(%r10),%xmm0 2412 movdqa -80(%rdi),%xmm3 2413 pand 272(%r10),%xmm1 2414 por %xmm0,%xmm4 2415 pand 288(%r10),%xmm2 2416 por %xmm1,%xmm5 2417 pand 304(%r10),%xmm3 2418 por %xmm2,%xmm4 2419 por %xmm3,%xmm5 2420 movdqa -64(%rdi),%xmm0 2421 movdqa -48(%rdi),%xmm1 2422 movdqa -32(%rdi),%xmm2 2423 pand 320(%r10),%xmm0 2424 movdqa -16(%rdi),%xmm3 2425 pand 336(%r10),%xmm1 2426 por %xmm0,%xmm4 2427 pand 352(%r10),%xmm2 2428 por %xmm1,%xmm5 2429 pand 368(%r10),%xmm3 2430 por %xmm2,%xmm4 2431 por %xmm3,%xmm5 2432 movdqa 0(%rdi),%xmm0 2433 movdqa 16(%rdi),%xmm1 2434 movdqa 32(%rdi),%xmm2 2435 pand 384(%r10),%xmm0 2436 movdqa 48(%rdi),%xmm3 2437 pand 400(%r10),%xmm1 2438 por %xmm0,%xmm4 2439 pand 416(%r10),%xmm2 2440 por %xmm1,%xmm5 2441 pand 432(%r10),%xmm3 2442 por %xmm2,%xmm4 2443 por %xmm3,%xmm5 2444 movdqa 64(%rdi),%xmm0 2445 movdqa 80(%rdi),%xmm1 2446 movdqa 96(%rdi),%xmm2 2447 pand 448(%r10),%xmm0 2448 movdqa 112(%rdi),%xmm3 2449 pand 464(%r10),%xmm1 2450 por %xmm0,%xmm4 2451 pand 480(%r10),%xmm2 2452 por %xmm1,%xmm5 2453 pand 496(%r10),%xmm3 2454 por %xmm2,%xmm4 2455 por %xmm3,%xmm5 2456 por %xmm5,%xmm4 2457 pshufd $0x4e,%xmm4,%xmm0 2458 por %xmm4,%xmm0 2459 leaq 256(%rdi),%rdi 2460.byte 102,72,15,126,194 2461 2462 movq %rbp,(%rbx) 2463 leaq 32(%rbx,%rax,1),%rbx 2464 mulxq 0(%rsi),%r8,%r11 2465 xorq %rbp,%rbp 2466 movq %rdx,%r9 2467 mulxq 8(%rsi),%r14,%r12 2468 adoxq -32(%rbx),%r8 2469 adcxq %r14,%r11 2470 mulxq 16(%rsi),%r15,%r13 2471 adoxq -24(%rbx),%r11 2472 adcxq %r15,%r12 2473 mulxq 24(%rsi),%rdx,%r14 2474 adoxq -16(%rbx),%r12 2475 adcxq %rdx,%r13 2476 leaq (%rcx,%rax,1),%rcx 2477 leaq 32(%rsi),%rsi 2478 adoxq -8(%rbx),%r13 2479 adcxq %rbp,%r14 2480 adoxq %rbp,%r14 2481 2482 movq %r8,%r15 2483 imulq 32+8(%rsp),%r8 2484 2485 movq %r8,%rdx 2486 xorq %rbp,%rbp 2487 movq %rdi,8+8(%rsp) 2488 2489 mulxq 0(%rcx),%rax,%r10 2490 adcxq %rax,%r15 2491 adoxq %r11,%r10 2492 mulxq 8(%rcx),%rax,%r11 2493 adcxq %rax,%r10 2494 adoxq %r12,%r11 2495 mulxq 16(%rcx),%rax,%r12 2496 adcxq %rax,%r11 2497 adoxq %r13,%r12 2498 mulxq 24(%rcx),%rax,%r15 2499 movq %r9,%rdx 2500 movq 24+8(%rsp),%rdi 2501 movq %r10,-32(%rbx) 2502 adcxq %rax,%r12 2503 movq %r11,-24(%rbx) 2504 adoxq %rbp,%r15 2505 movq %r12,-16(%rbx) 2506 leaq 32(%rcx),%rcx 2507 jmp .Lmulx4x_inner 2508 2509.align 32 2510.Lmulx4x_inner: 2511 mulxq 0(%rsi),%r10,%rax 2512 adcxq %rbp,%r15 2513 adoxq %r14,%r10 2514 mulxq 8(%rsi),%r11,%r14 2515 adcxq 0(%rbx),%r10 2516 adoxq %rax,%r11 2517 mulxq 16(%rsi),%r12,%rax 2518 adcxq 8(%rbx),%r11 2519 adoxq %r14,%r12 2520 mulxq 24(%rsi),%r13,%r14 2521 movq %r8,%rdx 2522 adcxq 16(%rbx),%r12 2523 adoxq %rax,%r13 2524 adcxq 24(%rbx),%r13 2525 adoxq %rbp,%r14 2526 leaq 32(%rsi),%rsi 2527 leaq 32(%rbx),%rbx 2528 adcxq %rbp,%r14 2529 2530 adoxq %r15,%r10 2531 mulxq 0(%rcx),%rax,%r15 2532 adcxq %rax,%r10 2533 adoxq %r15,%r11 2534 mulxq 8(%rcx),%rax,%r15 2535 adcxq %rax,%r11 2536 adoxq %r15,%r12 2537 mulxq 16(%rcx),%rax,%r15 2538 movq %r10,-40(%rbx) 2539 adcxq %rax,%r12 2540 adoxq %r15,%r13 2541 movq %r11,-32(%rbx) 2542 mulxq 24(%rcx),%rax,%r15 2543 movq %r9,%rdx 2544 leaq 32(%rcx),%rcx 2545 movq %r12,-24(%rbx) 2546 adcxq %rax,%r13 2547 adoxq %rbp,%r15 2548 movq %r13,-16(%rbx) 2549 2550 decq %rdi 2551 jnz .Lmulx4x_inner 2552 2553 movq 0+8(%rsp),%rax 2554 adcq %rbp,%r15 2555 subq 0(%rbx),%rdi 2556 movq 8+8(%rsp),%rdi 2557 movq 16+8(%rsp),%r10 2558 adcq %r15,%r14 2559 leaq (%rsi,%rax,1),%rsi 2560 adcq %rbp,%rbp 2561 movq %r14,-8(%rbx) 2562 2563 cmpq %r10,%rdi 2564 jb .Lmulx4x_outer 2565 2566 movq -8(%rcx),%r10 2567 movq %rbp,%r8 2568 movq (%rcx,%rax,1),%r12 2569 leaq (%rcx,%rax,1),%rbp 2570 movq %rax,%rcx 2571 leaq (%rbx,%rax,1),%rdi 2572 xorl %eax,%eax 2573 xorq %r15,%r15 2574 subq %r14,%r10 2575 adcq %r15,%r15 2576 orq %r15,%r8 2577 sarq $3+2,%rcx 2578 subq %r8,%rax 2579 movq 56+8(%rsp),%rdx 2580 decq %r12 2581 movq 8(%rbp),%r13 2582 xorq %r8,%r8 2583 movq 16(%rbp),%r14 2584 movq 24(%rbp),%r15 2585 jmp .Lsqrx4x_sub_entry 2586.cfi_endproc 2587.size mulx4x_internal,.-mulx4x_internal 2588.type bn_powerx5,@function 2589.align 32 2590bn_powerx5: 2591.cfi_startproc 2592 movq %rsp,%rax 2593.cfi_def_cfa_register %rax 2594.Lpowerx5_enter: 2595 pushq %rbx 2596.cfi_offset %rbx,-16 2597 pushq %rbp 2598.cfi_offset %rbp,-24 2599 pushq %r12 2600.cfi_offset %r12,-32 2601 pushq %r13 2602.cfi_offset %r13,-40 2603 pushq %r14 2604.cfi_offset %r14,-48 2605 pushq %r15 2606.cfi_offset %r15,-56 2607.Lpowerx5_prologue: 2608 2609 shll $3,%r9d 2610 leaq (%r9,%r9,2),%r10 2611 negq %r9 2612 movq (%r8),%r8 2613 2614 2615 2616 2617 2618 2619 2620 2621 leaq -320(%rsp,%r9,2),%r11 2622 movq %rsp,%rbp 2623 subq %rdi,%r11 2624 andq $4095,%r11 2625 cmpq %r11,%r10 2626 jb .Lpwrx_sp_alt 2627 subq %r11,%rbp 2628 leaq -320(%rbp,%r9,2),%rbp 2629 jmp .Lpwrx_sp_done 2630 2631.align 32 2632.Lpwrx_sp_alt: 2633 leaq 4096-320(,%r9,2),%r10 2634 leaq -320(%rbp,%r9,2),%rbp 2635 subq %r10,%r11 2636 movq $0,%r10 2637 cmovcq %r10,%r11 2638 subq %r11,%rbp 2639.Lpwrx_sp_done: 2640 andq $-64,%rbp 2641 movq %rsp,%r11 2642 subq %rbp,%r11 2643 andq $-4096,%r11 2644 leaq (%r11,%rbp,1),%rsp 2645 movq (%rsp),%r10 2646 cmpq %rbp,%rsp 2647 ja .Lpwrx_page_walk 2648 jmp .Lpwrx_page_walk_done 2649 2650.Lpwrx_page_walk: 2651 leaq -4096(%rsp),%rsp 2652 movq (%rsp),%r10 2653 cmpq %rbp,%rsp 2654 ja .Lpwrx_page_walk 2655.Lpwrx_page_walk_done: 2656 2657 movq %r9,%r10 2658 negq %r9 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 pxor %xmm0,%xmm0 2672.byte 102,72,15,110,207 2673.byte 102,72,15,110,209 2674.byte 102,73,15,110,218 2675.byte 102,72,15,110,226 2676 movq %r8,32(%rsp) 2677 movq %rax,40(%rsp) 2678.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2679.Lpowerx5_body: 2680 2681 call __bn_sqrx8x_internal 2682 call __bn_postx4x_internal 2683 call __bn_sqrx8x_internal 2684 call __bn_postx4x_internal 2685 call __bn_sqrx8x_internal 2686 call __bn_postx4x_internal 2687 call __bn_sqrx8x_internal 2688 call __bn_postx4x_internal 2689 call __bn_sqrx8x_internal 2690 call __bn_postx4x_internal 2691 2692 movq %r10,%r9 2693 movq %rsi,%rdi 2694.byte 102,72,15,126,209 2695.byte 102,72,15,126,226 2696 movq 40(%rsp),%rax 2697 2698 call mulx4x_internal 2699 2700 movq 40(%rsp),%rsi 2701.cfi_def_cfa %rsi,8 2702 movq $1,%rax 2703 2704 movq -48(%rsi),%r15 2705.cfi_restore %r15 2706 movq -40(%rsi),%r14 2707.cfi_restore %r14 2708 movq -32(%rsi),%r13 2709.cfi_restore %r13 2710 movq -24(%rsi),%r12 2711.cfi_restore %r12 2712 movq -16(%rsi),%rbp 2713.cfi_restore %rbp 2714 movq -8(%rsi),%rbx 2715.cfi_restore %rbx 2716 leaq (%rsi),%rsp 2717.cfi_def_cfa_register %rsp 2718.Lpowerx5_epilogue: 2719 .byte 0xf3,0xc3 2720.cfi_endproc 2721.size bn_powerx5,.-bn_powerx5 2722 2723.globl bn_sqrx8x_internal 2724.hidden bn_sqrx8x_internal 2725.type bn_sqrx8x_internal,@function 2726.align 32 2727bn_sqrx8x_internal: 2728__bn_sqrx8x_internal: 2729.cfi_startproc 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 leaq 48+8(%rsp),%rdi 2771 leaq (%rsi,%r9,1),%rbp 2772 movq %r9,0+8(%rsp) 2773 movq %rbp,8+8(%rsp) 2774 jmp .Lsqr8x_zero_start 2775 2776.align 32 2777.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2778.Lsqrx8x_zero: 2779.byte 0x3e 2780 movdqa %xmm0,0(%rdi) 2781 movdqa %xmm0,16(%rdi) 2782 movdqa %xmm0,32(%rdi) 2783 movdqa %xmm0,48(%rdi) 2784.Lsqr8x_zero_start: 2785 movdqa %xmm0,64(%rdi) 2786 movdqa %xmm0,80(%rdi) 2787 movdqa %xmm0,96(%rdi) 2788 movdqa %xmm0,112(%rdi) 2789 leaq 128(%rdi),%rdi 2790 subq $64,%r9 2791 jnz .Lsqrx8x_zero 2792 2793 movq 0(%rsi),%rdx 2794 2795 xorq %r10,%r10 2796 xorq %r11,%r11 2797 xorq %r12,%r12 2798 xorq %r13,%r13 2799 xorq %r14,%r14 2800 xorq %r15,%r15 2801 leaq 48+8(%rsp),%rdi 2802 xorq %rbp,%rbp 2803 jmp .Lsqrx8x_outer_loop 2804 2805.align 32 2806.Lsqrx8x_outer_loop: 2807 mulxq 8(%rsi),%r8,%rax 2808 adcxq %r9,%r8 2809 adoxq %rax,%r10 2810 mulxq 16(%rsi),%r9,%rax 2811 adcxq %r10,%r9 2812 adoxq %rax,%r11 2813.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2814 adcxq %r11,%r10 2815 adoxq %rax,%r12 2816.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2817 adcxq %r12,%r11 2818 adoxq %rax,%r13 2819 mulxq 40(%rsi),%r12,%rax 2820 adcxq %r13,%r12 2821 adoxq %rax,%r14 2822 mulxq 48(%rsi),%r13,%rax 2823 adcxq %r14,%r13 2824 adoxq %r15,%rax 2825 mulxq 56(%rsi),%r14,%r15 2826 movq 8(%rsi),%rdx 2827 adcxq %rax,%r14 2828 adoxq %rbp,%r15 2829 adcq 64(%rdi),%r15 2830 movq %r8,8(%rdi) 2831 movq %r9,16(%rdi) 2832 sbbq %rcx,%rcx 2833 xorq %rbp,%rbp 2834 2835 2836 mulxq 16(%rsi),%r8,%rbx 2837 mulxq 24(%rsi),%r9,%rax 2838 adcxq %r10,%r8 2839 adoxq %rbx,%r9 2840 mulxq 32(%rsi),%r10,%rbx 2841 adcxq %r11,%r9 2842 adoxq %rax,%r10 2843.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2844 adcxq %r12,%r10 2845 adoxq %rbx,%r11 2846.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2847 adcxq %r13,%r11 2848 adoxq %r14,%r12 2849.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2850 movq 16(%rsi),%rdx 2851 adcxq %rax,%r12 2852 adoxq %rbx,%r13 2853 adcxq %r15,%r13 2854 adoxq %rbp,%r14 2855 adcxq %rbp,%r14 2856 2857 movq %r8,24(%rdi) 2858 movq %r9,32(%rdi) 2859 2860 mulxq 24(%rsi),%r8,%rbx 2861 mulxq 32(%rsi),%r9,%rax 2862 adcxq %r10,%r8 2863 adoxq %rbx,%r9 2864 mulxq 40(%rsi),%r10,%rbx 2865 adcxq %r11,%r9 2866 adoxq %rax,%r10 2867.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2868 adcxq %r12,%r10 2869 adoxq %r13,%r11 2870.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2871.byte 0x3e 2872 movq 24(%rsi),%rdx 2873 adcxq %rbx,%r11 2874 adoxq %rax,%r12 2875 adcxq %r14,%r12 2876 movq %r8,40(%rdi) 2877 movq %r9,48(%rdi) 2878 mulxq 32(%rsi),%r8,%rax 2879 adoxq %rbp,%r13 2880 adcxq %rbp,%r13 2881 2882 mulxq 40(%rsi),%r9,%rbx 2883 adcxq %r10,%r8 2884 adoxq %rax,%r9 2885 mulxq 48(%rsi),%r10,%rax 2886 adcxq %r11,%r9 2887 adoxq %r12,%r10 2888 mulxq 56(%rsi),%r11,%r12 2889 movq 32(%rsi),%rdx 2890 movq 40(%rsi),%r14 2891 adcxq %rbx,%r10 2892 adoxq %rax,%r11 2893 movq 48(%rsi),%r15 2894 adcxq %r13,%r11 2895 adoxq %rbp,%r12 2896 adcxq %rbp,%r12 2897 2898 movq %r8,56(%rdi) 2899 movq %r9,64(%rdi) 2900 2901 mulxq %r14,%r9,%rax 2902 movq 56(%rsi),%r8 2903 adcxq %r10,%r9 2904 mulxq %r15,%r10,%rbx 2905 adoxq %rax,%r10 2906 adcxq %r11,%r10 2907 mulxq %r8,%r11,%rax 2908 movq %r14,%rdx 2909 adoxq %rbx,%r11 2910 adcxq %r12,%r11 2911 2912 adcxq %rbp,%rax 2913 2914 mulxq %r15,%r14,%rbx 2915 mulxq %r8,%r12,%r13 2916 movq %r15,%rdx 2917 leaq 64(%rsi),%rsi 2918 adcxq %r14,%r11 2919 adoxq %rbx,%r12 2920 adcxq %rax,%r12 2921 adoxq %rbp,%r13 2922 2923.byte 0x67,0x67 2924 mulxq %r8,%r8,%r14 2925 adcxq %r8,%r13 2926 adcxq %rbp,%r14 2927 2928 cmpq 8+8(%rsp),%rsi 2929 je .Lsqrx8x_outer_break 2930 2931 negq %rcx 2932 movq $-8,%rcx 2933 movq %rbp,%r15 2934 movq 64(%rdi),%r8 2935 adcxq 72(%rdi),%r9 2936 adcxq 80(%rdi),%r10 2937 adcxq 88(%rdi),%r11 2938 adcq 96(%rdi),%r12 2939 adcq 104(%rdi),%r13 2940 adcq 112(%rdi),%r14 2941 adcq 120(%rdi),%r15 2942 leaq (%rsi),%rbp 2943 leaq 128(%rdi),%rdi 2944 sbbq %rax,%rax 2945 2946 movq -64(%rsi),%rdx 2947 movq %rax,16+8(%rsp) 2948 movq %rdi,24+8(%rsp) 2949 2950 2951 xorl %eax,%eax 2952 jmp .Lsqrx8x_loop 2953 2954.align 32 2955.Lsqrx8x_loop: 2956 movq %r8,%rbx 2957 mulxq 0(%rbp),%rax,%r8 2958 adcxq %rax,%rbx 2959 adoxq %r9,%r8 2960 2961 mulxq 8(%rbp),%rax,%r9 2962 adcxq %rax,%r8 2963 adoxq %r10,%r9 2964 2965 mulxq 16(%rbp),%rax,%r10 2966 adcxq %rax,%r9 2967 adoxq %r11,%r10 2968 2969 mulxq 24(%rbp),%rax,%r11 2970 adcxq %rax,%r10 2971 adoxq %r12,%r11 2972 2973.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2974 adcxq %rax,%r11 2975 adoxq %r13,%r12 2976 2977 mulxq 40(%rbp),%rax,%r13 2978 adcxq %rax,%r12 2979 adoxq %r14,%r13 2980 2981 mulxq 48(%rbp),%rax,%r14 2982 movq %rbx,(%rdi,%rcx,8) 2983 movl $0,%ebx 2984 adcxq %rax,%r13 2985 adoxq %r15,%r14 2986 2987.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 2988 movq 8(%rsi,%rcx,8),%rdx 2989 adcxq %rax,%r14 2990 adoxq %rbx,%r15 2991 adcxq %rbx,%r15 2992 2993.byte 0x67 2994 incq %rcx 2995 jnz .Lsqrx8x_loop 2996 2997 leaq 64(%rbp),%rbp 2998 movq $-8,%rcx 2999 cmpq 8+8(%rsp),%rbp 3000 je .Lsqrx8x_break 3001 3002 subq 16+8(%rsp),%rbx 3003.byte 0x66 3004 movq -64(%rsi),%rdx 3005 adcxq 0(%rdi),%r8 3006 adcxq 8(%rdi),%r9 3007 adcq 16(%rdi),%r10 3008 adcq 24(%rdi),%r11 3009 adcq 32(%rdi),%r12 3010 adcq 40(%rdi),%r13 3011 adcq 48(%rdi),%r14 3012 adcq 56(%rdi),%r15 3013 leaq 64(%rdi),%rdi 3014.byte 0x67 3015 sbbq %rax,%rax 3016 xorl %ebx,%ebx 3017 movq %rax,16+8(%rsp) 3018 jmp .Lsqrx8x_loop 3019 3020.align 32 3021.Lsqrx8x_break: 3022 xorq %rbp,%rbp 3023 subq 16+8(%rsp),%rbx 3024 adcxq %rbp,%r8 3025 movq 24+8(%rsp),%rcx 3026 adcxq %rbp,%r9 3027 movq 0(%rsi),%rdx 3028 adcq $0,%r10 3029 movq %r8,0(%rdi) 3030 adcq $0,%r11 3031 adcq $0,%r12 3032 adcq $0,%r13 3033 adcq $0,%r14 3034 adcq $0,%r15 3035 cmpq %rcx,%rdi 3036 je .Lsqrx8x_outer_loop 3037 3038 movq %r9,8(%rdi) 3039 movq 8(%rcx),%r9 3040 movq %r10,16(%rdi) 3041 movq 16(%rcx),%r10 3042 movq %r11,24(%rdi) 3043 movq 24(%rcx),%r11 3044 movq %r12,32(%rdi) 3045 movq 32(%rcx),%r12 3046 movq %r13,40(%rdi) 3047 movq 40(%rcx),%r13 3048 movq %r14,48(%rdi) 3049 movq 48(%rcx),%r14 3050 movq %r15,56(%rdi) 3051 movq 56(%rcx),%r15 3052 movq %rcx,%rdi 3053 jmp .Lsqrx8x_outer_loop 3054 3055.align 32 3056.Lsqrx8x_outer_break: 3057 movq %r9,72(%rdi) 3058.byte 102,72,15,126,217 3059 movq %r10,80(%rdi) 3060 movq %r11,88(%rdi) 3061 movq %r12,96(%rdi) 3062 movq %r13,104(%rdi) 3063 movq %r14,112(%rdi) 3064 leaq 48+8(%rsp),%rdi 3065 movq (%rsi,%rcx,1),%rdx 3066 3067 movq 8(%rdi),%r11 3068 xorq %r10,%r10 3069 movq 0+8(%rsp),%r9 3070 adoxq %r11,%r11 3071 movq 16(%rdi),%r12 3072 movq 24(%rdi),%r13 3073 3074 3075.align 32 3076.Lsqrx4x_shift_n_add: 3077 mulxq %rdx,%rax,%rbx 3078 adoxq %r12,%r12 3079 adcxq %r10,%rax 3080.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3081.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3082 adoxq %r13,%r13 3083 adcxq %r11,%rbx 3084 movq 40(%rdi),%r11 3085 movq %rax,0(%rdi) 3086 movq %rbx,8(%rdi) 3087 3088 mulxq %rdx,%rax,%rbx 3089 adoxq %r10,%r10 3090 adcxq %r12,%rax 3091 movq 16(%rsi,%rcx,1),%rdx 3092 movq 48(%rdi),%r12 3093 adoxq %r11,%r11 3094 adcxq %r13,%rbx 3095 movq 56(%rdi),%r13 3096 movq %rax,16(%rdi) 3097 movq %rbx,24(%rdi) 3098 3099 mulxq %rdx,%rax,%rbx 3100 adoxq %r12,%r12 3101 adcxq %r10,%rax 3102 movq 24(%rsi,%rcx,1),%rdx 3103 leaq 32(%rcx),%rcx 3104 movq 64(%rdi),%r10 3105 adoxq %r13,%r13 3106 adcxq %r11,%rbx 3107 movq 72(%rdi),%r11 3108 movq %rax,32(%rdi) 3109 movq %rbx,40(%rdi) 3110 3111 mulxq %rdx,%rax,%rbx 3112 adoxq %r10,%r10 3113 adcxq %r12,%rax 3114 jrcxz .Lsqrx4x_shift_n_add_break 3115.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3116 adoxq %r11,%r11 3117 adcxq %r13,%rbx 3118 movq 80(%rdi),%r12 3119 movq 88(%rdi),%r13 3120 movq %rax,48(%rdi) 3121 movq %rbx,56(%rdi) 3122 leaq 64(%rdi),%rdi 3123 nop 3124 jmp .Lsqrx4x_shift_n_add 3125 3126.align 32 3127.Lsqrx4x_shift_n_add_break: 3128 adcxq %r13,%rbx 3129 movq %rax,48(%rdi) 3130 movq %rbx,56(%rdi) 3131 leaq 64(%rdi),%rdi 3132.byte 102,72,15,126,213 3133__bn_sqrx8x_reduction: 3134 xorl %eax,%eax 3135 movq 32+8(%rsp),%rbx 3136 movq 48+8(%rsp),%rdx 3137 leaq -64(%rbp,%r9,1),%rcx 3138 3139 movq %rcx,0+8(%rsp) 3140 movq %rdi,8+8(%rsp) 3141 3142 leaq 48+8(%rsp),%rdi 3143 jmp .Lsqrx8x_reduction_loop 3144 3145.align 32 3146.Lsqrx8x_reduction_loop: 3147 movq 8(%rdi),%r9 3148 movq 16(%rdi),%r10 3149 movq 24(%rdi),%r11 3150 movq 32(%rdi),%r12 3151 movq %rdx,%r8 3152 imulq %rbx,%rdx 3153 movq 40(%rdi),%r13 3154 movq 48(%rdi),%r14 3155 movq 56(%rdi),%r15 3156 movq %rax,24+8(%rsp) 3157 3158 leaq 64(%rdi),%rdi 3159 xorq %rsi,%rsi 3160 movq $-8,%rcx 3161 jmp .Lsqrx8x_reduce 3162 3163.align 32 3164.Lsqrx8x_reduce: 3165 movq %r8,%rbx 3166 mulxq 0(%rbp),%rax,%r8 3167 adcxq %rbx,%rax 3168 adoxq %r9,%r8 3169 3170 mulxq 8(%rbp),%rbx,%r9 3171 adcxq %rbx,%r8 3172 adoxq %r10,%r9 3173 3174 mulxq 16(%rbp),%rbx,%r10 3175 adcxq %rbx,%r9 3176 adoxq %r11,%r10 3177 3178 mulxq 24(%rbp),%rbx,%r11 3179 adcxq %rbx,%r10 3180 adoxq %r12,%r11 3181 3182.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3183 movq %rdx,%rax 3184 movq %r8,%rdx 3185 adcxq %rbx,%r11 3186 adoxq %r13,%r12 3187 3188 mulxq 32+8(%rsp),%rbx,%rdx 3189 movq %rax,%rdx 3190 movq %rax,64+48+8(%rsp,%rcx,8) 3191 3192 mulxq 40(%rbp),%rax,%r13 3193 adcxq %rax,%r12 3194 adoxq %r14,%r13 3195 3196 mulxq 48(%rbp),%rax,%r14 3197 adcxq %rax,%r13 3198 adoxq %r15,%r14 3199 3200 mulxq 56(%rbp),%rax,%r15 3201 movq %rbx,%rdx 3202 adcxq %rax,%r14 3203 adoxq %rsi,%r15 3204 adcxq %rsi,%r15 3205 3206.byte 0x67,0x67,0x67 3207 incq %rcx 3208 jnz .Lsqrx8x_reduce 3209 3210 movq %rsi,%rax 3211 cmpq 0+8(%rsp),%rbp 3212 jae .Lsqrx8x_no_tail 3213 3214 movq 48+8(%rsp),%rdx 3215 addq 0(%rdi),%r8 3216 leaq 64(%rbp),%rbp 3217 movq $-8,%rcx 3218 adcxq 8(%rdi),%r9 3219 adcxq 16(%rdi),%r10 3220 adcq 24(%rdi),%r11 3221 adcq 32(%rdi),%r12 3222 adcq 40(%rdi),%r13 3223 adcq 48(%rdi),%r14 3224 adcq 56(%rdi),%r15 3225 leaq 64(%rdi),%rdi 3226 sbbq %rax,%rax 3227 3228 xorq %rsi,%rsi 3229 movq %rax,16+8(%rsp) 3230 jmp .Lsqrx8x_tail 3231 3232.align 32 3233.Lsqrx8x_tail: 3234 movq %r8,%rbx 3235 mulxq 0(%rbp),%rax,%r8 3236 adcxq %rax,%rbx 3237 adoxq %r9,%r8 3238 3239 mulxq 8(%rbp),%rax,%r9 3240 adcxq %rax,%r8 3241 adoxq %r10,%r9 3242 3243 mulxq 16(%rbp),%rax,%r10 3244 adcxq %rax,%r9 3245 adoxq %r11,%r10 3246 3247 mulxq 24(%rbp),%rax,%r11 3248 adcxq %rax,%r10 3249 adoxq %r12,%r11 3250 3251.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3252 adcxq %rax,%r11 3253 adoxq %r13,%r12 3254 3255 mulxq 40(%rbp),%rax,%r13 3256 adcxq %rax,%r12 3257 adoxq %r14,%r13 3258 3259 mulxq 48(%rbp),%rax,%r14 3260 adcxq %rax,%r13 3261 adoxq %r15,%r14 3262 3263 mulxq 56(%rbp),%rax,%r15 3264 movq 72+48+8(%rsp,%rcx,8),%rdx 3265 adcxq %rax,%r14 3266 adoxq %rsi,%r15 3267 movq %rbx,(%rdi,%rcx,8) 3268 movq %r8,%rbx 3269 adcxq %rsi,%r15 3270 3271 incq %rcx 3272 jnz .Lsqrx8x_tail 3273 3274 cmpq 0+8(%rsp),%rbp 3275 jae .Lsqrx8x_tail_done 3276 3277 subq 16+8(%rsp),%rsi 3278 movq 48+8(%rsp),%rdx 3279 leaq 64(%rbp),%rbp 3280 adcq 0(%rdi),%r8 3281 adcq 8(%rdi),%r9 3282 adcq 16(%rdi),%r10 3283 adcq 24(%rdi),%r11 3284 adcq 32(%rdi),%r12 3285 adcq 40(%rdi),%r13 3286 adcq 48(%rdi),%r14 3287 adcq 56(%rdi),%r15 3288 leaq 64(%rdi),%rdi 3289 sbbq %rax,%rax 3290 subq $8,%rcx 3291 3292 xorq %rsi,%rsi 3293 movq %rax,16+8(%rsp) 3294 jmp .Lsqrx8x_tail 3295 3296.align 32 3297.Lsqrx8x_tail_done: 3298 xorq %rax,%rax 3299 addq 24+8(%rsp),%r8 3300 adcq $0,%r9 3301 adcq $0,%r10 3302 adcq $0,%r11 3303 adcq $0,%r12 3304 adcq $0,%r13 3305 adcq $0,%r14 3306 adcq $0,%r15 3307 adcq $0,%rax 3308 3309 subq 16+8(%rsp),%rsi 3310.Lsqrx8x_no_tail: 3311 adcq 0(%rdi),%r8 3312.byte 102,72,15,126,217 3313 adcq 8(%rdi),%r9 3314 movq 56(%rbp),%rsi 3315.byte 102,72,15,126,213 3316 adcq 16(%rdi),%r10 3317 adcq 24(%rdi),%r11 3318 adcq 32(%rdi),%r12 3319 adcq 40(%rdi),%r13 3320 adcq 48(%rdi),%r14 3321 adcq 56(%rdi),%r15 3322 adcq $0,%rax 3323 3324 movq 32+8(%rsp),%rbx 3325 movq 64(%rdi,%rcx,1),%rdx 3326 3327 movq %r8,0(%rdi) 3328 leaq 64(%rdi),%r8 3329 movq %r9,8(%rdi) 3330 movq %r10,16(%rdi) 3331 movq %r11,24(%rdi) 3332 movq %r12,32(%rdi) 3333 movq %r13,40(%rdi) 3334 movq %r14,48(%rdi) 3335 movq %r15,56(%rdi) 3336 3337 leaq 64(%rdi,%rcx,1),%rdi 3338 cmpq 8+8(%rsp),%r8 3339 jb .Lsqrx8x_reduction_loop 3340 .byte 0xf3,0xc3 3341.cfi_endproc 3342.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3343.align 32 3344__bn_postx4x_internal: 3345.cfi_startproc 3346 movq 0(%rbp),%r12 3347 movq %rcx,%r10 3348 movq %rcx,%r9 3349 negq %rax 3350 sarq $3+2,%rcx 3351 3352.byte 102,72,15,126,202 3353.byte 102,72,15,126,206 3354 decq %r12 3355 movq 8(%rbp),%r13 3356 xorq %r8,%r8 3357 movq 16(%rbp),%r14 3358 movq 24(%rbp),%r15 3359 jmp .Lsqrx4x_sub_entry 3360 3361.align 16 3362.Lsqrx4x_sub: 3363 movq 0(%rbp),%r12 3364 movq 8(%rbp),%r13 3365 movq 16(%rbp),%r14 3366 movq 24(%rbp),%r15 3367.Lsqrx4x_sub_entry: 3368 andnq %rax,%r12,%r12 3369 leaq 32(%rbp),%rbp 3370 andnq %rax,%r13,%r13 3371 andnq %rax,%r14,%r14 3372 andnq %rax,%r15,%r15 3373 3374 negq %r8 3375 adcq 0(%rdi),%r12 3376 adcq 8(%rdi),%r13 3377 adcq 16(%rdi),%r14 3378 adcq 24(%rdi),%r15 3379 movq %r12,0(%rdx) 3380 leaq 32(%rdi),%rdi 3381 movq %r13,8(%rdx) 3382 sbbq %r8,%r8 3383 movq %r14,16(%rdx) 3384 movq %r15,24(%rdx) 3385 leaq 32(%rdx),%rdx 3386 3387 incq %rcx 3388 jnz .Lsqrx4x_sub 3389 3390 negq %r9 3391 3392 .byte 0xf3,0xc3 3393.cfi_endproc 3394.size __bn_postx4x_internal,.-__bn_postx4x_internal 3395.globl bn_get_bits5 3396.type bn_get_bits5,@function 3397.align 16 3398bn_get_bits5: 3399.cfi_startproc 3400 leaq 0(%rdi),%r10 3401 leaq 1(%rdi),%r11 3402 movl %esi,%ecx 3403 shrl $4,%esi 3404 andl $15,%ecx 3405 leal -8(%rcx),%eax 3406 cmpl $11,%ecx 3407 cmovaq %r11,%r10 3408 cmoval %eax,%ecx 3409 movzwl (%r10,%rsi,2),%eax 3410 shrl %cl,%eax 3411 andl $31,%eax 3412 .byte 0xf3,0xc3 3413.cfi_endproc 3414.size bn_get_bits5,.-bn_get_bits5 3415 3416.globl bn_scatter5 3417.type bn_scatter5,@function 3418.align 16 3419bn_scatter5: 3420.cfi_startproc 3421 cmpl $0,%esi 3422 jz .Lscatter_epilogue 3423 leaq (%rdx,%rcx,8),%rdx 3424.Lscatter: 3425 movq (%rdi),%rax 3426 leaq 8(%rdi),%rdi 3427 movq %rax,(%rdx) 3428 leaq 256(%rdx),%rdx 3429 subl $1,%esi 3430 jnz .Lscatter 3431.Lscatter_epilogue: 3432 .byte 0xf3,0xc3 3433.cfi_endproc 3434.size bn_scatter5,.-bn_scatter5 3435 3436.globl bn_gather5 3437.type bn_gather5,@function 3438.align 32 3439bn_gather5: 3440.LSEH_begin_bn_gather5: 3441.cfi_startproc 3442 3443.byte 0x4c,0x8d,0x14,0x24 3444.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3445 leaq .Linc(%rip),%rax 3446 andq $-16,%rsp 3447 3448 movd %ecx,%xmm5 3449 movdqa 0(%rax),%xmm0 3450 movdqa 16(%rax),%xmm1 3451 leaq 128(%rdx),%r11 3452 leaq 128(%rsp),%rax 3453 3454 pshufd $0,%xmm5,%xmm5 3455 movdqa %xmm1,%xmm4 3456 movdqa %xmm1,%xmm2 3457 paddd %xmm0,%xmm1 3458 pcmpeqd %xmm5,%xmm0 3459 movdqa %xmm4,%xmm3 3460 3461 paddd %xmm1,%xmm2 3462 pcmpeqd %xmm5,%xmm1 3463 movdqa %xmm0,-128(%rax) 3464 movdqa %xmm4,%xmm0 3465 3466 paddd %xmm2,%xmm3 3467 pcmpeqd %xmm5,%xmm2 3468 movdqa %xmm1,-112(%rax) 3469 movdqa %xmm4,%xmm1 3470 3471 paddd %xmm3,%xmm0 3472 pcmpeqd %xmm5,%xmm3 3473 movdqa %xmm2,-96(%rax) 3474 movdqa %xmm4,%xmm2 3475 paddd %xmm0,%xmm1 3476 pcmpeqd %xmm5,%xmm0 3477 movdqa %xmm3,-80(%rax) 3478 movdqa %xmm4,%xmm3 3479 3480 paddd %xmm1,%xmm2 3481 pcmpeqd %xmm5,%xmm1 3482 movdqa %xmm0,-64(%rax) 3483 movdqa %xmm4,%xmm0 3484 3485 paddd %xmm2,%xmm3 3486 pcmpeqd %xmm5,%xmm2 3487 movdqa %xmm1,-48(%rax) 3488 movdqa %xmm4,%xmm1 3489 3490 paddd %xmm3,%xmm0 3491 pcmpeqd %xmm5,%xmm3 3492 movdqa %xmm2,-32(%rax) 3493 movdqa %xmm4,%xmm2 3494 paddd %xmm0,%xmm1 3495 pcmpeqd %xmm5,%xmm0 3496 movdqa %xmm3,-16(%rax) 3497 movdqa %xmm4,%xmm3 3498 3499 paddd %xmm1,%xmm2 3500 pcmpeqd %xmm5,%xmm1 3501 movdqa %xmm0,0(%rax) 3502 movdqa %xmm4,%xmm0 3503 3504 paddd %xmm2,%xmm3 3505 pcmpeqd %xmm5,%xmm2 3506 movdqa %xmm1,16(%rax) 3507 movdqa %xmm4,%xmm1 3508 3509 paddd %xmm3,%xmm0 3510 pcmpeqd %xmm5,%xmm3 3511 movdqa %xmm2,32(%rax) 3512 movdqa %xmm4,%xmm2 3513 paddd %xmm0,%xmm1 3514 pcmpeqd %xmm5,%xmm0 3515 movdqa %xmm3,48(%rax) 3516 movdqa %xmm4,%xmm3 3517 3518 paddd %xmm1,%xmm2 3519 pcmpeqd %xmm5,%xmm1 3520 movdqa %xmm0,64(%rax) 3521 movdqa %xmm4,%xmm0 3522 3523 paddd %xmm2,%xmm3 3524 pcmpeqd %xmm5,%xmm2 3525 movdqa %xmm1,80(%rax) 3526 movdqa %xmm4,%xmm1 3527 3528 paddd %xmm3,%xmm0 3529 pcmpeqd %xmm5,%xmm3 3530 movdqa %xmm2,96(%rax) 3531 movdqa %xmm4,%xmm2 3532 movdqa %xmm3,112(%rax) 3533 jmp .Lgather 3534 3535.align 32 3536.Lgather: 3537 pxor %xmm4,%xmm4 3538 pxor %xmm5,%xmm5 3539 movdqa -128(%r11),%xmm0 3540 movdqa -112(%r11),%xmm1 3541 movdqa -96(%r11),%xmm2 3542 pand -128(%rax),%xmm0 3543 movdqa -80(%r11),%xmm3 3544 pand -112(%rax),%xmm1 3545 por %xmm0,%xmm4 3546 pand -96(%rax),%xmm2 3547 por %xmm1,%xmm5 3548 pand -80(%rax),%xmm3 3549 por %xmm2,%xmm4 3550 por %xmm3,%xmm5 3551 movdqa -64(%r11),%xmm0 3552 movdqa -48(%r11),%xmm1 3553 movdqa -32(%r11),%xmm2 3554 pand -64(%rax),%xmm0 3555 movdqa -16(%r11),%xmm3 3556 pand -48(%rax),%xmm1 3557 por %xmm0,%xmm4 3558 pand -32(%rax),%xmm2 3559 por %xmm1,%xmm5 3560 pand -16(%rax),%xmm3 3561 por %xmm2,%xmm4 3562 por %xmm3,%xmm5 3563 movdqa 0(%r11),%xmm0 3564 movdqa 16(%r11),%xmm1 3565 movdqa 32(%r11),%xmm2 3566 pand 0(%rax),%xmm0 3567 movdqa 48(%r11),%xmm3 3568 pand 16(%rax),%xmm1 3569 por %xmm0,%xmm4 3570 pand 32(%rax),%xmm2 3571 por %xmm1,%xmm5 3572 pand 48(%rax),%xmm3 3573 por %xmm2,%xmm4 3574 por %xmm3,%xmm5 3575 movdqa 64(%r11),%xmm0 3576 movdqa 80(%r11),%xmm1 3577 movdqa 96(%r11),%xmm2 3578 pand 64(%rax),%xmm0 3579 movdqa 112(%r11),%xmm3 3580 pand 80(%rax),%xmm1 3581 por %xmm0,%xmm4 3582 pand 96(%rax),%xmm2 3583 por %xmm1,%xmm5 3584 pand 112(%rax),%xmm3 3585 por %xmm2,%xmm4 3586 por %xmm3,%xmm5 3587 por %xmm5,%xmm4 3588 leaq 256(%r11),%r11 3589 pshufd $0x4e,%xmm4,%xmm0 3590 por %xmm4,%xmm0 3591 movq %xmm0,(%rdi) 3592 leaq 8(%rdi),%rdi 3593 subl $1,%esi 3594 jnz .Lgather 3595 3596 leaq (%r10),%rsp 3597 .byte 0xf3,0xc3 3598.LSEH_end_bn_gather5: 3599.cfi_endproc 3600.size bn_gather5,.-bn_gather5 3601.align 64 3602.Linc: 3603.long 0,0, 1,1 3604.long 2,2, 2,2 3605.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3606