1/* Do not modify. This file is auto-generated from x86_64-mont5.pl. */ 2.text 3 4 5 6.globl bn_mul_mont_gather5 7.type bn_mul_mont_gather5,@function 8.align 64 9bn_mul_mont_gather5: 10.cfi_startproc 11 movl %r9d,%r9d 12 movq %rsp,%rax 13.cfi_def_cfa_register %rax 14 testl $7,%r9d 15 jnz .Lmul_enter 16 movl OPENSSL_ia32cap_P+8(%rip),%r11d 17 jmp .Lmul4x_enter 18 19.align 16 20.Lmul_enter: 21 movd 8(%rsp),%xmm5 22 pushq %rbx 23.cfi_offset %rbx,-16 24 pushq %rbp 25.cfi_offset %rbp,-24 26 pushq %r12 27.cfi_offset %r12,-32 28 pushq %r13 29.cfi_offset %r13,-40 30 pushq %r14 31.cfi_offset %r14,-48 32 pushq %r15 33.cfi_offset %r15,-56 34 35 negq %r9 36 movq %rsp,%r11 37 leaq -280(%rsp,%r9,8),%r10 38 negq %r9 39 andq $-1024,%r10 40 41 42 43 44 45 46 47 48 49 subq %r10,%r11 50 andq $-4096,%r11 51 leaq (%r10,%r11,1),%rsp 52 movq (%rsp),%r11 53 cmpq %r10,%rsp 54 ja .Lmul_page_walk 55 jmp .Lmul_page_walk_done 56 57.Lmul_page_walk: 58 leaq -4096(%rsp),%rsp 59 movq (%rsp),%r11 60 cmpq %r10,%rsp 61 ja .Lmul_page_walk 62.Lmul_page_walk_done: 63 64 leaq .Linc(%rip),%r10 65 movq %rax,8(%rsp,%r9,8) 66.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 67.Lmul_body: 68 69 leaq 128(%rdx),%r12 70 movdqa 0(%r10),%xmm0 71 movdqa 16(%r10),%xmm1 72 leaq 24-112(%rsp,%r9,8),%r10 73 andq $-16,%r10 74 75 pshufd $0,%xmm5,%xmm5 76 movdqa %xmm1,%xmm4 77 movdqa %xmm1,%xmm2 78 paddd %xmm0,%xmm1 79 pcmpeqd %xmm5,%xmm0 80.byte 0x67 81 movdqa %xmm4,%xmm3 82 paddd %xmm1,%xmm2 83 pcmpeqd %xmm5,%xmm1 84 movdqa %xmm0,112(%r10) 85 movdqa %xmm4,%xmm0 86 87 paddd %xmm2,%xmm3 88 pcmpeqd %xmm5,%xmm2 89 movdqa %xmm1,128(%r10) 90 movdqa %xmm4,%xmm1 91 92 paddd %xmm3,%xmm0 93 pcmpeqd %xmm5,%xmm3 94 movdqa %xmm2,144(%r10) 95 movdqa %xmm4,%xmm2 96 97 paddd %xmm0,%xmm1 98 pcmpeqd %xmm5,%xmm0 99 movdqa %xmm3,160(%r10) 100 movdqa %xmm4,%xmm3 101 paddd %xmm1,%xmm2 102 pcmpeqd %xmm5,%xmm1 103 movdqa %xmm0,176(%r10) 104 movdqa %xmm4,%xmm0 105 106 paddd %xmm2,%xmm3 107 pcmpeqd %xmm5,%xmm2 108 movdqa %xmm1,192(%r10) 109 movdqa %xmm4,%xmm1 110 111 paddd %xmm3,%xmm0 112 pcmpeqd %xmm5,%xmm3 113 movdqa %xmm2,208(%r10) 114 movdqa %xmm4,%xmm2 115 116 paddd %xmm0,%xmm1 117 pcmpeqd %xmm5,%xmm0 118 movdqa %xmm3,224(%r10) 119 movdqa %xmm4,%xmm3 120 paddd %xmm1,%xmm2 121 pcmpeqd %xmm5,%xmm1 122 movdqa %xmm0,240(%r10) 123 movdqa %xmm4,%xmm0 124 125 paddd %xmm2,%xmm3 126 pcmpeqd %xmm5,%xmm2 127 movdqa %xmm1,256(%r10) 128 movdqa %xmm4,%xmm1 129 130 paddd %xmm3,%xmm0 131 pcmpeqd %xmm5,%xmm3 132 movdqa %xmm2,272(%r10) 133 movdqa %xmm4,%xmm2 134 135 paddd %xmm0,%xmm1 136 pcmpeqd %xmm5,%xmm0 137 movdqa %xmm3,288(%r10) 138 movdqa %xmm4,%xmm3 139 paddd %xmm1,%xmm2 140 pcmpeqd %xmm5,%xmm1 141 movdqa %xmm0,304(%r10) 142 143 paddd %xmm2,%xmm3 144.byte 0x67 145 pcmpeqd %xmm5,%xmm2 146 movdqa %xmm1,320(%r10) 147 148 pcmpeqd %xmm5,%xmm3 149 movdqa %xmm2,336(%r10) 150 pand 64(%r12),%xmm0 151 152 pand 80(%r12),%xmm1 153 pand 96(%r12),%xmm2 154 movdqa %xmm3,352(%r10) 155 pand 112(%r12),%xmm3 156 por %xmm2,%xmm0 157 por %xmm3,%xmm1 158 movdqa -128(%r12),%xmm4 159 movdqa -112(%r12),%xmm5 160 movdqa -96(%r12),%xmm2 161 pand 112(%r10),%xmm4 162 movdqa -80(%r12),%xmm3 163 pand 128(%r10),%xmm5 164 por %xmm4,%xmm0 165 pand 144(%r10),%xmm2 166 por %xmm5,%xmm1 167 pand 160(%r10),%xmm3 168 por %xmm2,%xmm0 169 por %xmm3,%xmm1 170 movdqa -64(%r12),%xmm4 171 movdqa -48(%r12),%xmm5 172 movdqa -32(%r12),%xmm2 173 pand 176(%r10),%xmm4 174 movdqa -16(%r12),%xmm3 175 pand 192(%r10),%xmm5 176 por %xmm4,%xmm0 177 pand 208(%r10),%xmm2 178 por %xmm5,%xmm1 179 pand 224(%r10),%xmm3 180 por %xmm2,%xmm0 181 por %xmm3,%xmm1 182 movdqa 0(%r12),%xmm4 183 movdqa 16(%r12),%xmm5 184 movdqa 32(%r12),%xmm2 185 pand 240(%r10),%xmm4 186 movdqa 48(%r12),%xmm3 187 pand 256(%r10),%xmm5 188 por %xmm4,%xmm0 189 pand 272(%r10),%xmm2 190 por %xmm5,%xmm1 191 pand 288(%r10),%xmm3 192 por %xmm2,%xmm0 193 por %xmm3,%xmm1 194 por %xmm1,%xmm0 195 pshufd $0x4e,%xmm0,%xmm1 196 por %xmm1,%xmm0 197 leaq 256(%r12),%r12 198.byte 102,72,15,126,195 199 200 movq (%r8),%r8 201 movq (%rsi),%rax 202 203 xorq %r14,%r14 204 xorq %r15,%r15 205 206 movq %r8,%rbp 207 mulq %rbx 208 movq %rax,%r10 209 movq (%rcx),%rax 210 211 imulq %r10,%rbp 212 movq %rdx,%r11 213 214 mulq %rbp 215 addq %rax,%r10 216 movq 8(%rsi),%rax 217 adcq $0,%rdx 218 movq %rdx,%r13 219 220 leaq 1(%r15),%r15 221 jmp .L1st_enter 222 223.align 16 224.L1st: 225 addq %rax,%r13 226 movq (%rsi,%r15,8),%rax 227 adcq $0,%rdx 228 addq %r11,%r13 229 movq %r10,%r11 230 adcq $0,%rdx 231 movq %r13,-16(%rsp,%r15,8) 232 movq %rdx,%r13 233 234.L1st_enter: 235 mulq %rbx 236 addq %rax,%r11 237 movq (%rcx,%r15,8),%rax 238 adcq $0,%rdx 239 leaq 1(%r15),%r15 240 movq %rdx,%r10 241 242 mulq %rbp 243 cmpq %r9,%r15 244 jne .L1st 245 246 247 addq %rax,%r13 248 adcq $0,%rdx 249 addq %r11,%r13 250 adcq $0,%rdx 251 movq %r13,-16(%rsp,%r9,8) 252 movq %rdx,%r13 253 movq %r10,%r11 254 255 xorq %rdx,%rdx 256 addq %r11,%r13 257 adcq $0,%rdx 258 movq %r13,-8(%rsp,%r9,8) 259 movq %rdx,(%rsp,%r9,8) 260 261 leaq 1(%r14),%r14 262 jmp .Louter 263.align 16 264.Louter: 265 leaq 24+128(%rsp,%r9,8),%rdx 266 andq $-16,%rdx 267 pxor %xmm4,%xmm4 268 pxor %xmm5,%xmm5 269 movdqa -128(%r12),%xmm0 270 movdqa -112(%r12),%xmm1 271 movdqa -96(%r12),%xmm2 272 movdqa -80(%r12),%xmm3 273 pand -128(%rdx),%xmm0 274 pand -112(%rdx),%xmm1 275 por %xmm0,%xmm4 276 pand -96(%rdx),%xmm2 277 por %xmm1,%xmm5 278 pand -80(%rdx),%xmm3 279 por %xmm2,%xmm4 280 por %xmm3,%xmm5 281 movdqa -64(%r12),%xmm0 282 movdqa -48(%r12),%xmm1 283 movdqa -32(%r12),%xmm2 284 movdqa -16(%r12),%xmm3 285 pand -64(%rdx),%xmm0 286 pand -48(%rdx),%xmm1 287 por %xmm0,%xmm4 288 pand -32(%rdx),%xmm2 289 por %xmm1,%xmm5 290 pand -16(%rdx),%xmm3 291 por %xmm2,%xmm4 292 por %xmm3,%xmm5 293 movdqa 0(%r12),%xmm0 294 movdqa 16(%r12),%xmm1 295 movdqa 32(%r12),%xmm2 296 movdqa 48(%r12),%xmm3 297 pand 0(%rdx),%xmm0 298 pand 16(%rdx),%xmm1 299 por %xmm0,%xmm4 300 pand 32(%rdx),%xmm2 301 por %xmm1,%xmm5 302 pand 48(%rdx),%xmm3 303 por %xmm2,%xmm4 304 por %xmm3,%xmm5 305 movdqa 64(%r12),%xmm0 306 movdqa 80(%r12),%xmm1 307 movdqa 96(%r12),%xmm2 308 movdqa 112(%r12),%xmm3 309 pand 64(%rdx),%xmm0 310 pand 80(%rdx),%xmm1 311 por %xmm0,%xmm4 312 pand 96(%rdx),%xmm2 313 por %xmm1,%xmm5 314 pand 112(%rdx),%xmm3 315 por %xmm2,%xmm4 316 por %xmm3,%xmm5 317 por %xmm5,%xmm4 318 pshufd $0x4e,%xmm4,%xmm0 319 por %xmm4,%xmm0 320 leaq 256(%r12),%r12 321 322 movq (%rsi),%rax 323.byte 102,72,15,126,195 324 325 xorq %r15,%r15 326 movq %r8,%rbp 327 movq (%rsp),%r10 328 329 mulq %rbx 330 addq %rax,%r10 331 movq (%rcx),%rax 332 adcq $0,%rdx 333 334 imulq %r10,%rbp 335 movq %rdx,%r11 336 337 mulq %rbp 338 addq %rax,%r10 339 movq 8(%rsi),%rax 340 adcq $0,%rdx 341 movq 8(%rsp),%r10 342 movq %rdx,%r13 343 344 leaq 1(%r15),%r15 345 jmp .Linner_enter 346 347.align 16 348.Linner: 349 addq %rax,%r13 350 movq (%rsi,%r15,8),%rax 351 adcq $0,%rdx 352 addq %r10,%r13 353 movq (%rsp,%r15,8),%r10 354 adcq $0,%rdx 355 movq %r13,-16(%rsp,%r15,8) 356 movq %rdx,%r13 357 358.Linner_enter: 359 mulq %rbx 360 addq %rax,%r11 361 movq (%rcx,%r15,8),%rax 362 adcq $0,%rdx 363 addq %r11,%r10 364 movq %rdx,%r11 365 adcq $0,%r11 366 leaq 1(%r15),%r15 367 368 mulq %rbp 369 cmpq %r9,%r15 370 jne .Linner 371 372 addq %rax,%r13 373 adcq $0,%rdx 374 addq %r10,%r13 375 movq (%rsp,%r9,8),%r10 376 adcq $0,%rdx 377 movq %r13,-16(%rsp,%r9,8) 378 movq %rdx,%r13 379 380 xorq %rdx,%rdx 381 addq %r11,%r13 382 adcq $0,%rdx 383 addq %r10,%r13 384 adcq $0,%rdx 385 movq %r13,-8(%rsp,%r9,8) 386 movq %rdx,(%rsp,%r9,8) 387 388 leaq 1(%r14),%r14 389 cmpq %r9,%r14 390 jb .Louter 391 392 xorq %r14,%r14 393 movq (%rsp),%rax 394 leaq (%rsp),%rsi 395 movq %r9,%r15 396 jmp .Lsub 397.align 16 398.Lsub: sbbq (%rcx,%r14,8),%rax 399 movq %rax,(%rdi,%r14,8) 400 movq 8(%rsi,%r14,8),%rax 401 leaq 1(%r14),%r14 402 decq %r15 403 jnz .Lsub 404 405 sbbq $0,%rax 406 movq $-1,%rbx 407 xorq %rax,%rbx 408 xorq %r14,%r14 409 movq %r9,%r15 410 411.Lcopy: 412 movq (%rdi,%r14,8),%rcx 413 movq (%rsp,%r14,8),%rdx 414 andq %rbx,%rcx 415 andq %rax,%rdx 416 movq %r14,(%rsp,%r14,8) 417 orq %rcx,%rdx 418 movq %rdx,(%rdi,%r14,8) 419 leaq 1(%r14),%r14 420 subq $1,%r15 421 jnz .Lcopy 422 423 movq 8(%rsp,%r9,8),%rsi 424.cfi_def_cfa %rsi,8 425 movq $1,%rax 426 427 movq -48(%rsi),%r15 428.cfi_restore %r15 429 movq -40(%rsi),%r14 430.cfi_restore %r14 431 movq -32(%rsi),%r13 432.cfi_restore %r13 433 movq -24(%rsi),%r12 434.cfi_restore %r12 435 movq -16(%rsi),%rbp 436.cfi_restore %rbp 437 movq -8(%rsi),%rbx 438.cfi_restore %rbx 439 leaq (%rsi),%rsp 440.cfi_def_cfa_register %rsp 441.Lmul_epilogue: 442 .byte 0xf3,0xc3 443.cfi_endproc 444.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 445.type bn_mul4x_mont_gather5,@function 446.align 32 447bn_mul4x_mont_gather5: 448.cfi_startproc 449.byte 0x67 450 movq %rsp,%rax 451.cfi_def_cfa_register %rax 452.Lmul4x_enter: 453 andl $0x80108,%r11d 454 cmpl $0x80108,%r11d 455 je .Lmulx4x_enter 456 pushq %rbx 457.cfi_offset %rbx,-16 458 pushq %rbp 459.cfi_offset %rbp,-24 460 pushq %r12 461.cfi_offset %r12,-32 462 pushq %r13 463.cfi_offset %r13,-40 464 pushq %r14 465.cfi_offset %r14,-48 466 pushq %r15 467.cfi_offset %r15,-56 468.Lmul4x_prologue: 469 470.byte 0x67 471 shll $3,%r9d 472 leaq (%r9,%r9,2),%r10 473 negq %r9 474 475 476 477 478 479 480 481 482 483 484 leaq -320(%rsp,%r9,2),%r11 485 movq %rsp,%rbp 486 subq %rdi,%r11 487 andq $4095,%r11 488 cmpq %r11,%r10 489 jb .Lmul4xsp_alt 490 subq %r11,%rbp 491 leaq -320(%rbp,%r9,2),%rbp 492 jmp .Lmul4xsp_done 493 494.align 32 495.Lmul4xsp_alt: 496 leaq 4096-320(,%r9,2),%r10 497 leaq -320(%rbp,%r9,2),%rbp 498 subq %r10,%r11 499 movq $0,%r10 500 cmovcq %r10,%r11 501 subq %r11,%rbp 502.Lmul4xsp_done: 503 andq $-64,%rbp 504 movq %rsp,%r11 505 subq %rbp,%r11 506 andq $-4096,%r11 507 leaq (%r11,%rbp,1),%rsp 508 movq (%rsp),%r10 509 cmpq %rbp,%rsp 510 ja .Lmul4x_page_walk 511 jmp .Lmul4x_page_walk_done 512 513.Lmul4x_page_walk: 514 leaq -4096(%rsp),%rsp 515 movq (%rsp),%r10 516 cmpq %rbp,%rsp 517 ja .Lmul4x_page_walk 518.Lmul4x_page_walk_done: 519 520 negq %r9 521 522 movq %rax,40(%rsp) 523.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 524.Lmul4x_body: 525 526 call mul4x_internal 527 528 movq 40(%rsp),%rsi 529.cfi_def_cfa %rsi,8 530 movq $1,%rax 531 532 movq -48(%rsi),%r15 533.cfi_restore %r15 534 movq -40(%rsi),%r14 535.cfi_restore %r14 536 movq -32(%rsi),%r13 537.cfi_restore %r13 538 movq -24(%rsi),%r12 539.cfi_restore %r12 540 movq -16(%rsi),%rbp 541.cfi_restore %rbp 542 movq -8(%rsi),%rbx 543.cfi_restore %rbx 544 leaq (%rsi),%rsp 545.cfi_def_cfa_register %rsp 546.Lmul4x_epilogue: 547 .byte 0xf3,0xc3 548.cfi_endproc 549.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 550 551.type mul4x_internal,@function 552.align 32 553mul4x_internal: 554.cfi_startproc 555 shlq $5,%r9 556 movd 8(%rax),%xmm5 557 leaq .Linc(%rip),%rax 558 leaq 128(%rdx,%r9,1),%r13 559 shrq $5,%r9 560 movdqa 0(%rax),%xmm0 561 movdqa 16(%rax),%xmm1 562 leaq 88-112(%rsp,%r9,1),%r10 563 leaq 128(%rdx),%r12 564 565 pshufd $0,%xmm5,%xmm5 566 movdqa %xmm1,%xmm4 567.byte 0x67,0x67 568 movdqa %xmm1,%xmm2 569 paddd %xmm0,%xmm1 570 pcmpeqd %xmm5,%xmm0 571.byte 0x67 572 movdqa %xmm4,%xmm3 573 paddd %xmm1,%xmm2 574 pcmpeqd %xmm5,%xmm1 575 movdqa %xmm0,112(%r10) 576 movdqa %xmm4,%xmm0 577 578 paddd %xmm2,%xmm3 579 pcmpeqd %xmm5,%xmm2 580 movdqa %xmm1,128(%r10) 581 movdqa %xmm4,%xmm1 582 583 paddd %xmm3,%xmm0 584 pcmpeqd %xmm5,%xmm3 585 movdqa %xmm2,144(%r10) 586 movdqa %xmm4,%xmm2 587 588 paddd %xmm0,%xmm1 589 pcmpeqd %xmm5,%xmm0 590 movdqa %xmm3,160(%r10) 591 movdqa %xmm4,%xmm3 592 paddd %xmm1,%xmm2 593 pcmpeqd %xmm5,%xmm1 594 movdqa %xmm0,176(%r10) 595 movdqa %xmm4,%xmm0 596 597 paddd %xmm2,%xmm3 598 pcmpeqd %xmm5,%xmm2 599 movdqa %xmm1,192(%r10) 600 movdqa %xmm4,%xmm1 601 602 paddd %xmm3,%xmm0 603 pcmpeqd %xmm5,%xmm3 604 movdqa %xmm2,208(%r10) 605 movdqa %xmm4,%xmm2 606 607 paddd %xmm0,%xmm1 608 pcmpeqd %xmm5,%xmm0 609 movdqa %xmm3,224(%r10) 610 movdqa %xmm4,%xmm3 611 paddd %xmm1,%xmm2 612 pcmpeqd %xmm5,%xmm1 613 movdqa %xmm0,240(%r10) 614 movdqa %xmm4,%xmm0 615 616 paddd %xmm2,%xmm3 617 pcmpeqd %xmm5,%xmm2 618 movdqa %xmm1,256(%r10) 619 movdqa %xmm4,%xmm1 620 621 paddd %xmm3,%xmm0 622 pcmpeqd %xmm5,%xmm3 623 movdqa %xmm2,272(%r10) 624 movdqa %xmm4,%xmm2 625 626 paddd %xmm0,%xmm1 627 pcmpeqd %xmm5,%xmm0 628 movdqa %xmm3,288(%r10) 629 movdqa %xmm4,%xmm3 630 paddd %xmm1,%xmm2 631 pcmpeqd %xmm5,%xmm1 632 movdqa %xmm0,304(%r10) 633 634 paddd %xmm2,%xmm3 635.byte 0x67 636 pcmpeqd %xmm5,%xmm2 637 movdqa %xmm1,320(%r10) 638 639 pcmpeqd %xmm5,%xmm3 640 movdqa %xmm2,336(%r10) 641 pand 64(%r12),%xmm0 642 643 pand 80(%r12),%xmm1 644 pand 96(%r12),%xmm2 645 movdqa %xmm3,352(%r10) 646 pand 112(%r12),%xmm3 647 por %xmm2,%xmm0 648 por %xmm3,%xmm1 649 movdqa -128(%r12),%xmm4 650 movdqa -112(%r12),%xmm5 651 movdqa -96(%r12),%xmm2 652 pand 112(%r10),%xmm4 653 movdqa -80(%r12),%xmm3 654 pand 128(%r10),%xmm5 655 por %xmm4,%xmm0 656 pand 144(%r10),%xmm2 657 por %xmm5,%xmm1 658 pand 160(%r10),%xmm3 659 por %xmm2,%xmm0 660 por %xmm3,%xmm1 661 movdqa -64(%r12),%xmm4 662 movdqa -48(%r12),%xmm5 663 movdqa -32(%r12),%xmm2 664 pand 176(%r10),%xmm4 665 movdqa -16(%r12),%xmm3 666 pand 192(%r10),%xmm5 667 por %xmm4,%xmm0 668 pand 208(%r10),%xmm2 669 por %xmm5,%xmm1 670 pand 224(%r10),%xmm3 671 por %xmm2,%xmm0 672 por %xmm3,%xmm1 673 movdqa 0(%r12),%xmm4 674 movdqa 16(%r12),%xmm5 675 movdqa 32(%r12),%xmm2 676 pand 240(%r10),%xmm4 677 movdqa 48(%r12),%xmm3 678 pand 256(%r10),%xmm5 679 por %xmm4,%xmm0 680 pand 272(%r10),%xmm2 681 por %xmm5,%xmm1 682 pand 288(%r10),%xmm3 683 por %xmm2,%xmm0 684 por %xmm3,%xmm1 685 por %xmm1,%xmm0 686 pshufd $0x4e,%xmm0,%xmm1 687 por %xmm1,%xmm0 688 leaq 256(%r12),%r12 689.byte 102,72,15,126,195 690 691 movq %r13,16+8(%rsp) 692 movq %rdi,56+8(%rsp) 693 694 movq (%r8),%r8 695 movq (%rsi),%rax 696 leaq (%rsi,%r9,1),%rsi 697 negq %r9 698 699 movq %r8,%rbp 700 mulq %rbx 701 movq %rax,%r10 702 movq (%rcx),%rax 703 704 imulq %r10,%rbp 705 leaq 64+8(%rsp),%r14 706 movq %rdx,%r11 707 708 mulq %rbp 709 addq %rax,%r10 710 movq 8(%rsi,%r9,1),%rax 711 adcq $0,%rdx 712 movq %rdx,%rdi 713 714 mulq %rbx 715 addq %rax,%r11 716 movq 8(%rcx),%rax 717 adcq $0,%rdx 718 movq %rdx,%r10 719 720 mulq %rbp 721 addq %rax,%rdi 722 movq 16(%rsi,%r9,1),%rax 723 adcq $0,%rdx 724 addq %r11,%rdi 725 leaq 32(%r9),%r15 726 leaq 32(%rcx),%rcx 727 adcq $0,%rdx 728 movq %rdi,(%r14) 729 movq %rdx,%r13 730 jmp .L1st4x 731 732.align 32 733.L1st4x: 734 mulq %rbx 735 addq %rax,%r10 736 movq -16(%rcx),%rax 737 leaq 32(%r14),%r14 738 adcq $0,%rdx 739 movq %rdx,%r11 740 741 mulq %rbp 742 addq %rax,%r13 743 movq -8(%rsi,%r15,1),%rax 744 adcq $0,%rdx 745 addq %r10,%r13 746 adcq $0,%rdx 747 movq %r13,-24(%r14) 748 movq %rdx,%rdi 749 750 mulq %rbx 751 addq %rax,%r11 752 movq -8(%rcx),%rax 753 adcq $0,%rdx 754 movq %rdx,%r10 755 756 mulq %rbp 757 addq %rax,%rdi 758 movq (%rsi,%r15,1),%rax 759 adcq $0,%rdx 760 addq %r11,%rdi 761 adcq $0,%rdx 762 movq %rdi,-16(%r14) 763 movq %rdx,%r13 764 765 mulq %rbx 766 addq %rax,%r10 767 movq 0(%rcx),%rax 768 adcq $0,%rdx 769 movq %rdx,%r11 770 771 mulq %rbp 772 addq %rax,%r13 773 movq 8(%rsi,%r15,1),%rax 774 adcq $0,%rdx 775 addq %r10,%r13 776 adcq $0,%rdx 777 movq %r13,-8(%r14) 778 movq %rdx,%rdi 779 780 mulq %rbx 781 addq %rax,%r11 782 movq 8(%rcx),%rax 783 adcq $0,%rdx 784 movq %rdx,%r10 785 786 mulq %rbp 787 addq %rax,%rdi 788 movq 16(%rsi,%r15,1),%rax 789 adcq $0,%rdx 790 addq %r11,%rdi 791 leaq 32(%rcx),%rcx 792 adcq $0,%rdx 793 movq %rdi,(%r14) 794 movq %rdx,%r13 795 796 addq $32,%r15 797 jnz .L1st4x 798 799 mulq %rbx 800 addq %rax,%r10 801 movq -16(%rcx),%rax 802 leaq 32(%r14),%r14 803 adcq $0,%rdx 804 movq %rdx,%r11 805 806 mulq %rbp 807 addq %rax,%r13 808 movq -8(%rsi),%rax 809 adcq $0,%rdx 810 addq %r10,%r13 811 adcq $0,%rdx 812 movq %r13,-24(%r14) 813 movq %rdx,%rdi 814 815 mulq %rbx 816 addq %rax,%r11 817 movq -8(%rcx),%rax 818 adcq $0,%rdx 819 movq %rdx,%r10 820 821 mulq %rbp 822 addq %rax,%rdi 823 movq (%rsi,%r9,1),%rax 824 adcq $0,%rdx 825 addq %r11,%rdi 826 adcq $0,%rdx 827 movq %rdi,-16(%r14) 828 movq %rdx,%r13 829 830 leaq (%rcx,%r9,1),%rcx 831 832 xorq %rdi,%rdi 833 addq %r10,%r13 834 adcq $0,%rdi 835 movq %r13,-8(%r14) 836 837 jmp .Louter4x 838 839.align 32 840.Louter4x: 841 leaq 16+128(%r14),%rdx 842 pxor %xmm4,%xmm4 843 pxor %xmm5,%xmm5 844 movdqa -128(%r12),%xmm0 845 movdqa -112(%r12),%xmm1 846 movdqa -96(%r12),%xmm2 847 movdqa -80(%r12),%xmm3 848 pand -128(%rdx),%xmm0 849 pand -112(%rdx),%xmm1 850 por %xmm0,%xmm4 851 pand -96(%rdx),%xmm2 852 por %xmm1,%xmm5 853 pand -80(%rdx),%xmm3 854 por %xmm2,%xmm4 855 por %xmm3,%xmm5 856 movdqa -64(%r12),%xmm0 857 movdqa -48(%r12),%xmm1 858 movdqa -32(%r12),%xmm2 859 movdqa -16(%r12),%xmm3 860 pand -64(%rdx),%xmm0 861 pand -48(%rdx),%xmm1 862 por %xmm0,%xmm4 863 pand -32(%rdx),%xmm2 864 por %xmm1,%xmm5 865 pand -16(%rdx),%xmm3 866 por %xmm2,%xmm4 867 por %xmm3,%xmm5 868 movdqa 0(%r12),%xmm0 869 movdqa 16(%r12),%xmm1 870 movdqa 32(%r12),%xmm2 871 movdqa 48(%r12),%xmm3 872 pand 0(%rdx),%xmm0 873 pand 16(%rdx),%xmm1 874 por %xmm0,%xmm4 875 pand 32(%rdx),%xmm2 876 por %xmm1,%xmm5 877 pand 48(%rdx),%xmm3 878 por %xmm2,%xmm4 879 por %xmm3,%xmm5 880 movdqa 64(%r12),%xmm0 881 movdqa 80(%r12),%xmm1 882 movdqa 96(%r12),%xmm2 883 movdqa 112(%r12),%xmm3 884 pand 64(%rdx),%xmm0 885 pand 80(%rdx),%xmm1 886 por %xmm0,%xmm4 887 pand 96(%rdx),%xmm2 888 por %xmm1,%xmm5 889 pand 112(%rdx),%xmm3 890 por %xmm2,%xmm4 891 por %xmm3,%xmm5 892 por %xmm5,%xmm4 893 pshufd $0x4e,%xmm4,%xmm0 894 por %xmm4,%xmm0 895 leaq 256(%r12),%r12 896.byte 102,72,15,126,195 897 898 movq (%r14,%r9,1),%r10 899 movq %r8,%rbp 900 mulq %rbx 901 addq %rax,%r10 902 movq (%rcx),%rax 903 adcq $0,%rdx 904 905 imulq %r10,%rbp 906 movq %rdx,%r11 907 movq %rdi,(%r14) 908 909 leaq (%r14,%r9,1),%r14 910 911 mulq %rbp 912 addq %rax,%r10 913 movq 8(%rsi,%r9,1),%rax 914 adcq $0,%rdx 915 movq %rdx,%rdi 916 917 mulq %rbx 918 addq %rax,%r11 919 movq 8(%rcx),%rax 920 adcq $0,%rdx 921 addq 8(%r14),%r11 922 adcq $0,%rdx 923 movq %rdx,%r10 924 925 mulq %rbp 926 addq %rax,%rdi 927 movq 16(%rsi,%r9,1),%rax 928 adcq $0,%rdx 929 addq %r11,%rdi 930 leaq 32(%r9),%r15 931 leaq 32(%rcx),%rcx 932 adcq $0,%rdx 933 movq %rdx,%r13 934 jmp .Linner4x 935 936.align 32 937.Linner4x: 938 mulq %rbx 939 addq %rax,%r10 940 movq -16(%rcx),%rax 941 adcq $0,%rdx 942 addq 16(%r14),%r10 943 leaq 32(%r14),%r14 944 adcq $0,%rdx 945 movq %rdx,%r11 946 947 mulq %rbp 948 addq %rax,%r13 949 movq -8(%rsi,%r15,1),%rax 950 adcq $0,%rdx 951 addq %r10,%r13 952 adcq $0,%rdx 953 movq %rdi,-32(%r14) 954 movq %rdx,%rdi 955 956 mulq %rbx 957 addq %rax,%r11 958 movq -8(%rcx),%rax 959 adcq $0,%rdx 960 addq -8(%r14),%r11 961 adcq $0,%rdx 962 movq %rdx,%r10 963 964 mulq %rbp 965 addq %rax,%rdi 966 movq (%rsi,%r15,1),%rax 967 adcq $0,%rdx 968 addq %r11,%rdi 969 adcq $0,%rdx 970 movq %r13,-24(%r14) 971 movq %rdx,%r13 972 973 mulq %rbx 974 addq %rax,%r10 975 movq 0(%rcx),%rax 976 adcq $0,%rdx 977 addq (%r14),%r10 978 adcq $0,%rdx 979 movq %rdx,%r11 980 981 mulq %rbp 982 addq %rax,%r13 983 movq 8(%rsi,%r15,1),%rax 984 adcq $0,%rdx 985 addq %r10,%r13 986 adcq $0,%rdx 987 movq %rdi,-16(%r14) 988 movq %rdx,%rdi 989 990 mulq %rbx 991 addq %rax,%r11 992 movq 8(%rcx),%rax 993 adcq $0,%rdx 994 addq 8(%r14),%r11 995 adcq $0,%rdx 996 movq %rdx,%r10 997 998 mulq %rbp 999 addq %rax,%rdi 1000 movq 16(%rsi,%r15,1),%rax 1001 adcq $0,%rdx 1002 addq %r11,%rdi 1003 leaq 32(%rcx),%rcx 1004 adcq $0,%rdx 1005 movq %r13,-8(%r14) 1006 movq %rdx,%r13 1007 1008 addq $32,%r15 1009 jnz .Linner4x 1010 1011 mulq %rbx 1012 addq %rax,%r10 1013 movq -16(%rcx),%rax 1014 adcq $0,%rdx 1015 addq 16(%r14),%r10 1016 leaq 32(%r14),%r14 1017 adcq $0,%rdx 1018 movq %rdx,%r11 1019 1020 mulq %rbp 1021 addq %rax,%r13 1022 movq -8(%rsi),%rax 1023 adcq $0,%rdx 1024 addq %r10,%r13 1025 adcq $0,%rdx 1026 movq %rdi,-32(%r14) 1027 movq %rdx,%rdi 1028 1029 mulq %rbx 1030 addq %rax,%r11 1031 movq %rbp,%rax 1032 movq -8(%rcx),%rbp 1033 adcq $0,%rdx 1034 addq -8(%r14),%r11 1035 adcq $0,%rdx 1036 movq %rdx,%r10 1037 1038 mulq %rbp 1039 addq %rax,%rdi 1040 movq (%rsi,%r9,1),%rax 1041 adcq $0,%rdx 1042 addq %r11,%rdi 1043 adcq $0,%rdx 1044 movq %r13,-24(%r14) 1045 movq %rdx,%r13 1046 1047 movq %rdi,-16(%r14) 1048 leaq (%rcx,%r9,1),%rcx 1049 1050 xorq %rdi,%rdi 1051 addq %r10,%r13 1052 adcq $0,%rdi 1053 addq (%r14),%r13 1054 adcq $0,%rdi 1055 movq %r13,-8(%r14) 1056 1057 cmpq 16+8(%rsp),%r12 1058 jb .Louter4x 1059 xorq %rax,%rax 1060 subq %r13,%rbp 1061 adcq %r15,%r15 1062 orq %r15,%rdi 1063 subq %rdi,%rax 1064 leaq (%r14,%r9,1),%rbx 1065 movq (%rcx),%r12 1066 leaq (%rcx),%rbp 1067 movq %r9,%rcx 1068 sarq $3+2,%rcx 1069 movq 56+8(%rsp),%rdi 1070 decq %r12 1071 xorq %r10,%r10 1072 movq 8(%rbp),%r13 1073 movq 16(%rbp),%r14 1074 movq 24(%rbp),%r15 1075 jmp .Lsqr4x_sub_entry 1076.cfi_endproc 1077.size mul4x_internal,.-mul4x_internal 1078.globl bn_power5 1079.type bn_power5,@function 1080.align 32 1081bn_power5: 1082.cfi_startproc 1083 movq %rsp,%rax 1084.cfi_def_cfa_register %rax 1085 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1086 andl $0x80108,%r11d 1087 cmpl $0x80108,%r11d 1088 je .Lpowerx5_enter 1089 pushq %rbx 1090.cfi_offset %rbx,-16 1091 pushq %rbp 1092.cfi_offset %rbp,-24 1093 pushq %r12 1094.cfi_offset %r12,-32 1095 pushq %r13 1096.cfi_offset %r13,-40 1097 pushq %r14 1098.cfi_offset %r14,-48 1099 pushq %r15 1100.cfi_offset %r15,-56 1101.Lpower5_prologue: 1102 1103 shll $3,%r9d 1104 leal (%r9,%r9,2),%r10d 1105 negq %r9 1106 movq (%r8),%r8 1107 1108 1109 1110 1111 1112 1113 1114 1115 leaq -320(%rsp,%r9,2),%r11 1116 movq %rsp,%rbp 1117 subq %rdi,%r11 1118 andq $4095,%r11 1119 cmpq %r11,%r10 1120 jb .Lpwr_sp_alt 1121 subq %r11,%rbp 1122 leaq -320(%rbp,%r9,2),%rbp 1123 jmp .Lpwr_sp_done 1124 1125.align 32 1126.Lpwr_sp_alt: 1127 leaq 4096-320(,%r9,2),%r10 1128 leaq -320(%rbp,%r9,2),%rbp 1129 subq %r10,%r11 1130 movq $0,%r10 1131 cmovcq %r10,%r11 1132 subq %r11,%rbp 1133.Lpwr_sp_done: 1134 andq $-64,%rbp 1135 movq %rsp,%r11 1136 subq %rbp,%r11 1137 andq $-4096,%r11 1138 leaq (%r11,%rbp,1),%rsp 1139 movq (%rsp),%r10 1140 cmpq %rbp,%rsp 1141 ja .Lpwr_page_walk 1142 jmp .Lpwr_page_walk_done 1143 1144.Lpwr_page_walk: 1145 leaq -4096(%rsp),%rsp 1146 movq (%rsp),%r10 1147 cmpq %rbp,%rsp 1148 ja .Lpwr_page_walk 1149.Lpwr_page_walk_done: 1150 1151 movq %r9,%r10 1152 negq %r9 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 movq %r8,32(%rsp) 1164 movq %rax,40(%rsp) 1165.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1166.Lpower5_body: 1167.byte 102,72,15,110,207 1168.byte 102,72,15,110,209 1169.byte 102,73,15,110,218 1170.byte 102,72,15,110,226 1171 1172 call __bn_sqr8x_internal 1173 call __bn_post4x_internal 1174 call __bn_sqr8x_internal 1175 call __bn_post4x_internal 1176 call __bn_sqr8x_internal 1177 call __bn_post4x_internal 1178 call __bn_sqr8x_internal 1179 call __bn_post4x_internal 1180 call __bn_sqr8x_internal 1181 call __bn_post4x_internal 1182 1183.byte 102,72,15,126,209 1184.byte 102,72,15,126,226 1185 movq %rsi,%rdi 1186 movq 40(%rsp),%rax 1187 leaq 32(%rsp),%r8 1188 1189 call mul4x_internal 1190 1191 movq 40(%rsp),%rsi 1192.cfi_def_cfa %rsi,8 1193 movq $1,%rax 1194 movq -48(%rsi),%r15 1195.cfi_restore %r15 1196 movq -40(%rsi),%r14 1197.cfi_restore %r14 1198 movq -32(%rsi),%r13 1199.cfi_restore %r13 1200 movq -24(%rsi),%r12 1201.cfi_restore %r12 1202 movq -16(%rsi),%rbp 1203.cfi_restore %rbp 1204 movq -8(%rsi),%rbx 1205.cfi_restore %rbx 1206 leaq (%rsi),%rsp 1207.cfi_def_cfa_register %rsp 1208.Lpower5_epilogue: 1209 .byte 0xf3,0xc3 1210.cfi_endproc 1211.size bn_power5,.-bn_power5 1212 1213.globl bn_sqr8x_internal 1214.hidden bn_sqr8x_internal 1215.type bn_sqr8x_internal,@function 1216.align 32 1217bn_sqr8x_internal: 1218__bn_sqr8x_internal: 1219.cfi_startproc 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 leaq 32(%r10),%rbp 1294 leaq (%rsi,%r9,1),%rsi 1295 1296 movq %r9,%rcx 1297 1298 1299 movq -32(%rsi,%rbp,1),%r14 1300 leaq 48+8(%rsp,%r9,2),%rdi 1301 movq -24(%rsi,%rbp,1),%rax 1302 leaq -32(%rdi,%rbp,1),%rdi 1303 movq -16(%rsi,%rbp,1),%rbx 1304 movq %rax,%r15 1305 1306 mulq %r14 1307 movq %rax,%r10 1308 movq %rbx,%rax 1309 movq %rdx,%r11 1310 movq %r10,-24(%rdi,%rbp,1) 1311 1312 mulq %r14 1313 addq %rax,%r11 1314 movq %rbx,%rax 1315 adcq $0,%rdx 1316 movq %r11,-16(%rdi,%rbp,1) 1317 movq %rdx,%r10 1318 1319 1320 movq -8(%rsi,%rbp,1),%rbx 1321 mulq %r15 1322 movq %rax,%r12 1323 movq %rbx,%rax 1324 movq %rdx,%r13 1325 1326 leaq (%rbp),%rcx 1327 mulq %r14 1328 addq %rax,%r10 1329 movq %rbx,%rax 1330 movq %rdx,%r11 1331 adcq $0,%r11 1332 addq %r12,%r10 1333 adcq $0,%r11 1334 movq %r10,-8(%rdi,%rcx,1) 1335 jmp .Lsqr4x_1st 1336 1337.align 32 1338.Lsqr4x_1st: 1339 movq (%rsi,%rcx,1),%rbx 1340 mulq %r15 1341 addq %rax,%r13 1342 movq %rbx,%rax 1343 movq %rdx,%r12 1344 adcq $0,%r12 1345 1346 mulq %r14 1347 addq %rax,%r11 1348 movq %rbx,%rax 1349 movq 8(%rsi,%rcx,1),%rbx 1350 movq %rdx,%r10 1351 adcq $0,%r10 1352 addq %r13,%r11 1353 adcq $0,%r10 1354 1355 1356 mulq %r15 1357 addq %rax,%r12 1358 movq %rbx,%rax 1359 movq %r11,(%rdi,%rcx,1) 1360 movq %rdx,%r13 1361 adcq $0,%r13 1362 1363 mulq %r14 1364 addq %rax,%r10 1365 movq %rbx,%rax 1366 movq 16(%rsi,%rcx,1),%rbx 1367 movq %rdx,%r11 1368 adcq $0,%r11 1369 addq %r12,%r10 1370 adcq $0,%r11 1371 1372 mulq %r15 1373 addq %rax,%r13 1374 movq %rbx,%rax 1375 movq %r10,8(%rdi,%rcx,1) 1376 movq %rdx,%r12 1377 adcq $0,%r12 1378 1379 mulq %r14 1380 addq %rax,%r11 1381 movq %rbx,%rax 1382 movq 24(%rsi,%rcx,1),%rbx 1383 movq %rdx,%r10 1384 adcq $0,%r10 1385 addq %r13,%r11 1386 adcq $0,%r10 1387 1388 1389 mulq %r15 1390 addq %rax,%r12 1391 movq %rbx,%rax 1392 movq %r11,16(%rdi,%rcx,1) 1393 movq %rdx,%r13 1394 adcq $0,%r13 1395 leaq 32(%rcx),%rcx 1396 1397 mulq %r14 1398 addq %rax,%r10 1399 movq %rbx,%rax 1400 movq %rdx,%r11 1401 adcq $0,%r11 1402 addq %r12,%r10 1403 adcq $0,%r11 1404 movq %r10,-8(%rdi,%rcx,1) 1405 1406 cmpq $0,%rcx 1407 jne .Lsqr4x_1st 1408 1409 mulq %r15 1410 addq %rax,%r13 1411 leaq 16(%rbp),%rbp 1412 adcq $0,%rdx 1413 addq %r11,%r13 1414 adcq $0,%rdx 1415 1416 movq %r13,(%rdi) 1417 movq %rdx,%r12 1418 movq %rdx,8(%rdi) 1419 jmp .Lsqr4x_outer 1420 1421.align 32 1422.Lsqr4x_outer: 1423 movq -32(%rsi,%rbp,1),%r14 1424 leaq 48+8(%rsp,%r9,2),%rdi 1425 movq -24(%rsi,%rbp,1),%rax 1426 leaq -32(%rdi,%rbp,1),%rdi 1427 movq -16(%rsi,%rbp,1),%rbx 1428 movq %rax,%r15 1429 1430 mulq %r14 1431 movq -24(%rdi,%rbp,1),%r10 1432 addq %rax,%r10 1433 movq %rbx,%rax 1434 adcq $0,%rdx 1435 movq %r10,-24(%rdi,%rbp,1) 1436 movq %rdx,%r11 1437 1438 mulq %r14 1439 addq %rax,%r11 1440 movq %rbx,%rax 1441 adcq $0,%rdx 1442 addq -16(%rdi,%rbp,1),%r11 1443 movq %rdx,%r10 1444 adcq $0,%r10 1445 movq %r11,-16(%rdi,%rbp,1) 1446 1447 xorq %r12,%r12 1448 1449 movq -8(%rsi,%rbp,1),%rbx 1450 mulq %r15 1451 addq %rax,%r12 1452 movq %rbx,%rax 1453 adcq $0,%rdx 1454 addq -8(%rdi,%rbp,1),%r12 1455 movq %rdx,%r13 1456 adcq $0,%r13 1457 1458 mulq %r14 1459 addq %rax,%r10 1460 movq %rbx,%rax 1461 adcq $0,%rdx 1462 addq %r12,%r10 1463 movq %rdx,%r11 1464 adcq $0,%r11 1465 movq %r10,-8(%rdi,%rbp,1) 1466 1467 leaq (%rbp),%rcx 1468 jmp .Lsqr4x_inner 1469 1470.align 32 1471.Lsqr4x_inner: 1472 movq (%rsi,%rcx,1),%rbx 1473 mulq %r15 1474 addq %rax,%r13 1475 movq %rbx,%rax 1476 movq %rdx,%r12 1477 adcq $0,%r12 1478 addq (%rdi,%rcx,1),%r13 1479 adcq $0,%r12 1480 1481.byte 0x67 1482 mulq %r14 1483 addq %rax,%r11 1484 movq %rbx,%rax 1485 movq 8(%rsi,%rcx,1),%rbx 1486 movq %rdx,%r10 1487 adcq $0,%r10 1488 addq %r13,%r11 1489 adcq $0,%r10 1490 1491 mulq %r15 1492 addq %rax,%r12 1493 movq %r11,(%rdi,%rcx,1) 1494 movq %rbx,%rax 1495 movq %rdx,%r13 1496 adcq $0,%r13 1497 addq 8(%rdi,%rcx,1),%r12 1498 leaq 16(%rcx),%rcx 1499 adcq $0,%r13 1500 1501 mulq %r14 1502 addq %rax,%r10 1503 movq %rbx,%rax 1504 adcq $0,%rdx 1505 addq %r12,%r10 1506 movq %rdx,%r11 1507 adcq $0,%r11 1508 movq %r10,-8(%rdi,%rcx,1) 1509 1510 cmpq $0,%rcx 1511 jne .Lsqr4x_inner 1512 1513.byte 0x67 1514 mulq %r15 1515 addq %rax,%r13 1516 adcq $0,%rdx 1517 addq %r11,%r13 1518 adcq $0,%rdx 1519 1520 movq %r13,(%rdi) 1521 movq %rdx,%r12 1522 movq %rdx,8(%rdi) 1523 1524 addq $16,%rbp 1525 jnz .Lsqr4x_outer 1526 1527 1528 movq -32(%rsi),%r14 1529 leaq 48+8(%rsp,%r9,2),%rdi 1530 movq -24(%rsi),%rax 1531 leaq -32(%rdi,%rbp,1),%rdi 1532 movq -16(%rsi),%rbx 1533 movq %rax,%r15 1534 1535 mulq %r14 1536 addq %rax,%r10 1537 movq %rbx,%rax 1538 movq %rdx,%r11 1539 adcq $0,%r11 1540 1541 mulq %r14 1542 addq %rax,%r11 1543 movq %rbx,%rax 1544 movq %r10,-24(%rdi) 1545 movq %rdx,%r10 1546 adcq $0,%r10 1547 addq %r13,%r11 1548 movq -8(%rsi),%rbx 1549 adcq $0,%r10 1550 1551 mulq %r15 1552 addq %rax,%r12 1553 movq %rbx,%rax 1554 movq %r11,-16(%rdi) 1555 movq %rdx,%r13 1556 adcq $0,%r13 1557 1558 mulq %r14 1559 addq %rax,%r10 1560 movq %rbx,%rax 1561 movq %rdx,%r11 1562 adcq $0,%r11 1563 addq %r12,%r10 1564 adcq $0,%r11 1565 movq %r10,-8(%rdi) 1566 1567 mulq %r15 1568 addq %rax,%r13 1569 movq -16(%rsi),%rax 1570 adcq $0,%rdx 1571 addq %r11,%r13 1572 adcq $0,%rdx 1573 1574 movq %r13,(%rdi) 1575 movq %rdx,%r12 1576 movq %rdx,8(%rdi) 1577 1578 mulq %rbx 1579 addq $16,%rbp 1580 xorq %r14,%r14 1581 subq %r9,%rbp 1582 xorq %r15,%r15 1583 1584 addq %r12,%rax 1585 adcq $0,%rdx 1586 movq %rax,8(%rdi) 1587 movq %rdx,16(%rdi) 1588 movq %r15,24(%rdi) 1589 1590 movq -16(%rsi,%rbp,1),%rax 1591 leaq 48+8(%rsp),%rdi 1592 xorq %r10,%r10 1593 movq 8(%rdi),%r11 1594 1595 leaq (%r14,%r10,2),%r12 1596 shrq $63,%r10 1597 leaq (%rcx,%r11,2),%r13 1598 shrq $63,%r11 1599 orq %r10,%r13 1600 movq 16(%rdi),%r10 1601 movq %r11,%r14 1602 mulq %rax 1603 negq %r15 1604 movq 24(%rdi),%r11 1605 adcq %rax,%r12 1606 movq -8(%rsi,%rbp,1),%rax 1607 movq %r12,(%rdi) 1608 adcq %rdx,%r13 1609 1610 leaq (%r14,%r10,2),%rbx 1611 movq %r13,8(%rdi) 1612 sbbq %r15,%r15 1613 shrq $63,%r10 1614 leaq (%rcx,%r11,2),%r8 1615 shrq $63,%r11 1616 orq %r10,%r8 1617 movq 32(%rdi),%r10 1618 movq %r11,%r14 1619 mulq %rax 1620 negq %r15 1621 movq 40(%rdi),%r11 1622 adcq %rax,%rbx 1623 movq 0(%rsi,%rbp,1),%rax 1624 movq %rbx,16(%rdi) 1625 adcq %rdx,%r8 1626 leaq 16(%rbp),%rbp 1627 movq %r8,24(%rdi) 1628 sbbq %r15,%r15 1629 leaq 64(%rdi),%rdi 1630 jmp .Lsqr4x_shift_n_add 1631 1632.align 32 1633.Lsqr4x_shift_n_add: 1634 leaq (%r14,%r10,2),%r12 1635 shrq $63,%r10 1636 leaq (%rcx,%r11,2),%r13 1637 shrq $63,%r11 1638 orq %r10,%r13 1639 movq -16(%rdi),%r10 1640 movq %r11,%r14 1641 mulq %rax 1642 negq %r15 1643 movq -8(%rdi),%r11 1644 adcq %rax,%r12 1645 movq -8(%rsi,%rbp,1),%rax 1646 movq %r12,-32(%rdi) 1647 adcq %rdx,%r13 1648 1649 leaq (%r14,%r10,2),%rbx 1650 movq %r13,-24(%rdi) 1651 sbbq %r15,%r15 1652 shrq $63,%r10 1653 leaq (%rcx,%r11,2),%r8 1654 shrq $63,%r11 1655 orq %r10,%r8 1656 movq 0(%rdi),%r10 1657 movq %r11,%r14 1658 mulq %rax 1659 negq %r15 1660 movq 8(%rdi),%r11 1661 adcq %rax,%rbx 1662 movq 0(%rsi,%rbp,1),%rax 1663 movq %rbx,-16(%rdi) 1664 adcq %rdx,%r8 1665 1666 leaq (%r14,%r10,2),%r12 1667 movq %r8,-8(%rdi) 1668 sbbq %r15,%r15 1669 shrq $63,%r10 1670 leaq (%rcx,%r11,2),%r13 1671 shrq $63,%r11 1672 orq %r10,%r13 1673 movq 16(%rdi),%r10 1674 movq %r11,%r14 1675 mulq %rax 1676 negq %r15 1677 movq 24(%rdi),%r11 1678 adcq %rax,%r12 1679 movq 8(%rsi,%rbp,1),%rax 1680 movq %r12,0(%rdi) 1681 adcq %rdx,%r13 1682 1683 leaq (%r14,%r10,2),%rbx 1684 movq %r13,8(%rdi) 1685 sbbq %r15,%r15 1686 shrq $63,%r10 1687 leaq (%rcx,%r11,2),%r8 1688 shrq $63,%r11 1689 orq %r10,%r8 1690 movq 32(%rdi),%r10 1691 movq %r11,%r14 1692 mulq %rax 1693 negq %r15 1694 movq 40(%rdi),%r11 1695 adcq %rax,%rbx 1696 movq 16(%rsi,%rbp,1),%rax 1697 movq %rbx,16(%rdi) 1698 adcq %rdx,%r8 1699 movq %r8,24(%rdi) 1700 sbbq %r15,%r15 1701 leaq 64(%rdi),%rdi 1702 addq $32,%rbp 1703 jnz .Lsqr4x_shift_n_add 1704 1705 leaq (%r14,%r10,2),%r12 1706.byte 0x67 1707 shrq $63,%r10 1708 leaq (%rcx,%r11,2),%r13 1709 shrq $63,%r11 1710 orq %r10,%r13 1711 movq -16(%rdi),%r10 1712 movq %r11,%r14 1713 mulq %rax 1714 negq %r15 1715 movq -8(%rdi),%r11 1716 adcq %rax,%r12 1717 movq -8(%rsi),%rax 1718 movq %r12,-32(%rdi) 1719 adcq %rdx,%r13 1720 1721 leaq (%r14,%r10,2),%rbx 1722 movq %r13,-24(%rdi) 1723 sbbq %r15,%r15 1724 shrq $63,%r10 1725 leaq (%rcx,%r11,2),%r8 1726 shrq $63,%r11 1727 orq %r10,%r8 1728 mulq %rax 1729 negq %r15 1730 adcq %rax,%rbx 1731 adcq %rdx,%r8 1732 movq %rbx,-16(%rdi) 1733 movq %r8,-8(%rdi) 1734.byte 102,72,15,126,213 1735__bn_sqr8x_reduction: 1736 xorq %rax,%rax 1737 leaq (%r9,%rbp,1),%rcx 1738 leaq 48+8(%rsp,%r9,2),%rdx 1739 movq %rcx,0+8(%rsp) 1740 leaq 48+8(%rsp,%r9,1),%rdi 1741 movq %rdx,8+8(%rsp) 1742 negq %r9 1743 jmp .L8x_reduction_loop 1744 1745.align 32 1746.L8x_reduction_loop: 1747 leaq (%rdi,%r9,1),%rdi 1748.byte 0x66 1749 movq 0(%rdi),%rbx 1750 movq 8(%rdi),%r9 1751 movq 16(%rdi),%r10 1752 movq 24(%rdi),%r11 1753 movq 32(%rdi),%r12 1754 movq 40(%rdi),%r13 1755 movq 48(%rdi),%r14 1756 movq 56(%rdi),%r15 1757 movq %rax,(%rdx) 1758 leaq 64(%rdi),%rdi 1759 1760.byte 0x67 1761 movq %rbx,%r8 1762 imulq 32+8(%rsp),%rbx 1763 movq 0(%rbp),%rax 1764 movl $8,%ecx 1765 jmp .L8x_reduce 1766 1767.align 32 1768.L8x_reduce: 1769 mulq %rbx 1770 movq 8(%rbp),%rax 1771 negq %r8 1772 movq %rdx,%r8 1773 adcq $0,%r8 1774 1775 mulq %rbx 1776 addq %rax,%r9 1777 movq 16(%rbp),%rax 1778 adcq $0,%rdx 1779 addq %r9,%r8 1780 movq %rbx,48-8+8(%rsp,%rcx,8) 1781 movq %rdx,%r9 1782 adcq $0,%r9 1783 1784 mulq %rbx 1785 addq %rax,%r10 1786 movq 24(%rbp),%rax 1787 adcq $0,%rdx 1788 addq %r10,%r9 1789 movq 32+8(%rsp),%rsi 1790 movq %rdx,%r10 1791 adcq $0,%r10 1792 1793 mulq %rbx 1794 addq %rax,%r11 1795 movq 32(%rbp),%rax 1796 adcq $0,%rdx 1797 imulq %r8,%rsi 1798 addq %r11,%r10 1799 movq %rdx,%r11 1800 adcq $0,%r11 1801 1802 mulq %rbx 1803 addq %rax,%r12 1804 movq 40(%rbp),%rax 1805 adcq $0,%rdx 1806 addq %r12,%r11 1807 movq %rdx,%r12 1808 adcq $0,%r12 1809 1810 mulq %rbx 1811 addq %rax,%r13 1812 movq 48(%rbp),%rax 1813 adcq $0,%rdx 1814 addq %r13,%r12 1815 movq %rdx,%r13 1816 adcq $0,%r13 1817 1818 mulq %rbx 1819 addq %rax,%r14 1820 movq 56(%rbp),%rax 1821 adcq $0,%rdx 1822 addq %r14,%r13 1823 movq %rdx,%r14 1824 adcq $0,%r14 1825 1826 mulq %rbx 1827 movq %rsi,%rbx 1828 addq %rax,%r15 1829 movq 0(%rbp),%rax 1830 adcq $0,%rdx 1831 addq %r15,%r14 1832 movq %rdx,%r15 1833 adcq $0,%r15 1834 1835 decl %ecx 1836 jnz .L8x_reduce 1837 1838 leaq 64(%rbp),%rbp 1839 xorq %rax,%rax 1840 movq 8+8(%rsp),%rdx 1841 cmpq 0+8(%rsp),%rbp 1842 jae .L8x_no_tail 1843 1844.byte 0x66 1845 addq 0(%rdi),%r8 1846 adcq 8(%rdi),%r9 1847 adcq 16(%rdi),%r10 1848 adcq 24(%rdi),%r11 1849 adcq 32(%rdi),%r12 1850 adcq 40(%rdi),%r13 1851 adcq 48(%rdi),%r14 1852 adcq 56(%rdi),%r15 1853 sbbq %rsi,%rsi 1854 1855 movq 48+56+8(%rsp),%rbx 1856 movl $8,%ecx 1857 movq 0(%rbp),%rax 1858 jmp .L8x_tail 1859 1860.align 32 1861.L8x_tail: 1862 mulq %rbx 1863 addq %rax,%r8 1864 movq 8(%rbp),%rax 1865 movq %r8,(%rdi) 1866 movq %rdx,%r8 1867 adcq $0,%r8 1868 1869 mulq %rbx 1870 addq %rax,%r9 1871 movq 16(%rbp),%rax 1872 adcq $0,%rdx 1873 addq %r9,%r8 1874 leaq 8(%rdi),%rdi 1875 movq %rdx,%r9 1876 adcq $0,%r9 1877 1878 mulq %rbx 1879 addq %rax,%r10 1880 movq 24(%rbp),%rax 1881 adcq $0,%rdx 1882 addq %r10,%r9 1883 movq %rdx,%r10 1884 adcq $0,%r10 1885 1886 mulq %rbx 1887 addq %rax,%r11 1888 movq 32(%rbp),%rax 1889 adcq $0,%rdx 1890 addq %r11,%r10 1891 movq %rdx,%r11 1892 adcq $0,%r11 1893 1894 mulq %rbx 1895 addq %rax,%r12 1896 movq 40(%rbp),%rax 1897 adcq $0,%rdx 1898 addq %r12,%r11 1899 movq %rdx,%r12 1900 adcq $0,%r12 1901 1902 mulq %rbx 1903 addq %rax,%r13 1904 movq 48(%rbp),%rax 1905 adcq $0,%rdx 1906 addq %r13,%r12 1907 movq %rdx,%r13 1908 adcq $0,%r13 1909 1910 mulq %rbx 1911 addq %rax,%r14 1912 movq 56(%rbp),%rax 1913 adcq $0,%rdx 1914 addq %r14,%r13 1915 movq %rdx,%r14 1916 adcq $0,%r14 1917 1918 mulq %rbx 1919 movq 48-16+8(%rsp,%rcx,8),%rbx 1920 addq %rax,%r15 1921 adcq $0,%rdx 1922 addq %r15,%r14 1923 movq 0(%rbp),%rax 1924 movq %rdx,%r15 1925 adcq $0,%r15 1926 1927 decl %ecx 1928 jnz .L8x_tail 1929 1930 leaq 64(%rbp),%rbp 1931 movq 8+8(%rsp),%rdx 1932 cmpq 0+8(%rsp),%rbp 1933 jae .L8x_tail_done 1934 1935 movq 48+56+8(%rsp),%rbx 1936 negq %rsi 1937 movq 0(%rbp),%rax 1938 adcq 0(%rdi),%r8 1939 adcq 8(%rdi),%r9 1940 adcq 16(%rdi),%r10 1941 adcq 24(%rdi),%r11 1942 adcq 32(%rdi),%r12 1943 adcq 40(%rdi),%r13 1944 adcq 48(%rdi),%r14 1945 adcq 56(%rdi),%r15 1946 sbbq %rsi,%rsi 1947 1948 movl $8,%ecx 1949 jmp .L8x_tail 1950 1951.align 32 1952.L8x_tail_done: 1953 xorq %rax,%rax 1954 addq (%rdx),%r8 1955 adcq $0,%r9 1956 adcq $0,%r10 1957 adcq $0,%r11 1958 adcq $0,%r12 1959 adcq $0,%r13 1960 adcq $0,%r14 1961 adcq $0,%r15 1962 adcq $0,%rax 1963 1964 negq %rsi 1965.L8x_no_tail: 1966 adcq 0(%rdi),%r8 1967 adcq 8(%rdi),%r9 1968 adcq 16(%rdi),%r10 1969 adcq 24(%rdi),%r11 1970 adcq 32(%rdi),%r12 1971 adcq 40(%rdi),%r13 1972 adcq 48(%rdi),%r14 1973 adcq 56(%rdi),%r15 1974 adcq $0,%rax 1975 movq -8(%rbp),%rcx 1976 xorq %rsi,%rsi 1977 1978.byte 102,72,15,126,213 1979 1980 movq %r8,0(%rdi) 1981 movq %r9,8(%rdi) 1982.byte 102,73,15,126,217 1983 movq %r10,16(%rdi) 1984 movq %r11,24(%rdi) 1985 movq %r12,32(%rdi) 1986 movq %r13,40(%rdi) 1987 movq %r14,48(%rdi) 1988 movq %r15,56(%rdi) 1989 leaq 64(%rdi),%rdi 1990 1991 cmpq %rdx,%rdi 1992 jb .L8x_reduction_loop 1993 .byte 0xf3,0xc3 1994.cfi_endproc 1995.size bn_sqr8x_internal,.-bn_sqr8x_internal 1996.type __bn_post4x_internal,@function 1997.align 32 1998__bn_post4x_internal: 1999.cfi_startproc 2000 movq 0(%rbp),%r12 2001 leaq (%rdi,%r9,1),%rbx 2002 movq %r9,%rcx 2003.byte 102,72,15,126,207 2004 negq %rax 2005.byte 102,72,15,126,206 2006 sarq $3+2,%rcx 2007 decq %r12 2008 xorq %r10,%r10 2009 movq 8(%rbp),%r13 2010 movq 16(%rbp),%r14 2011 movq 24(%rbp),%r15 2012 jmp .Lsqr4x_sub_entry 2013 2014.align 16 2015.Lsqr4x_sub: 2016 movq 0(%rbp),%r12 2017 movq 8(%rbp),%r13 2018 movq 16(%rbp),%r14 2019 movq 24(%rbp),%r15 2020.Lsqr4x_sub_entry: 2021 leaq 32(%rbp),%rbp 2022 notq %r12 2023 notq %r13 2024 notq %r14 2025 notq %r15 2026 andq %rax,%r12 2027 andq %rax,%r13 2028 andq %rax,%r14 2029 andq %rax,%r15 2030 2031 negq %r10 2032 adcq 0(%rbx),%r12 2033 adcq 8(%rbx),%r13 2034 adcq 16(%rbx),%r14 2035 adcq 24(%rbx),%r15 2036 movq %r12,0(%rdi) 2037 leaq 32(%rbx),%rbx 2038 movq %r13,8(%rdi) 2039 sbbq %r10,%r10 2040 movq %r14,16(%rdi) 2041 movq %r15,24(%rdi) 2042 leaq 32(%rdi),%rdi 2043 2044 incq %rcx 2045 jnz .Lsqr4x_sub 2046 2047 movq %r9,%r10 2048 negq %r9 2049 .byte 0xf3,0xc3 2050.cfi_endproc 2051.size __bn_post4x_internal,.-__bn_post4x_internal 2052.type bn_mulx4x_mont_gather5,@function 2053.align 32 2054bn_mulx4x_mont_gather5: 2055.cfi_startproc 2056 movq %rsp,%rax 2057.cfi_def_cfa_register %rax 2058.Lmulx4x_enter: 2059 pushq %rbx 2060.cfi_offset %rbx,-16 2061 pushq %rbp 2062.cfi_offset %rbp,-24 2063 pushq %r12 2064.cfi_offset %r12,-32 2065 pushq %r13 2066.cfi_offset %r13,-40 2067 pushq %r14 2068.cfi_offset %r14,-48 2069 pushq %r15 2070.cfi_offset %r15,-56 2071.Lmulx4x_prologue: 2072 2073 shll $3,%r9d 2074 leaq (%r9,%r9,2),%r10 2075 negq %r9 2076 movq (%r8),%r8 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 leaq -320(%rsp,%r9,2),%r11 2088 movq %rsp,%rbp 2089 subq %rdi,%r11 2090 andq $4095,%r11 2091 cmpq %r11,%r10 2092 jb .Lmulx4xsp_alt 2093 subq %r11,%rbp 2094 leaq -320(%rbp,%r9,2),%rbp 2095 jmp .Lmulx4xsp_done 2096 2097.Lmulx4xsp_alt: 2098 leaq 4096-320(,%r9,2),%r10 2099 leaq -320(%rbp,%r9,2),%rbp 2100 subq %r10,%r11 2101 movq $0,%r10 2102 cmovcq %r10,%r11 2103 subq %r11,%rbp 2104.Lmulx4xsp_done: 2105 andq $-64,%rbp 2106 movq %rsp,%r11 2107 subq %rbp,%r11 2108 andq $-4096,%r11 2109 leaq (%r11,%rbp,1),%rsp 2110 movq (%rsp),%r10 2111 cmpq %rbp,%rsp 2112 ja .Lmulx4x_page_walk 2113 jmp .Lmulx4x_page_walk_done 2114 2115.Lmulx4x_page_walk: 2116 leaq -4096(%rsp),%rsp 2117 movq (%rsp),%r10 2118 cmpq %rbp,%rsp 2119 ja .Lmulx4x_page_walk 2120.Lmulx4x_page_walk_done: 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 movq %r8,32(%rsp) 2135 movq %rax,40(%rsp) 2136.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2137.Lmulx4x_body: 2138 call mulx4x_internal 2139 2140 movq 40(%rsp),%rsi 2141.cfi_def_cfa %rsi,8 2142 movq $1,%rax 2143 2144 movq -48(%rsi),%r15 2145.cfi_restore %r15 2146 movq -40(%rsi),%r14 2147.cfi_restore %r14 2148 movq -32(%rsi),%r13 2149.cfi_restore %r13 2150 movq -24(%rsi),%r12 2151.cfi_restore %r12 2152 movq -16(%rsi),%rbp 2153.cfi_restore %rbp 2154 movq -8(%rsi),%rbx 2155.cfi_restore %rbx 2156 leaq (%rsi),%rsp 2157.cfi_def_cfa_register %rsp 2158.Lmulx4x_epilogue: 2159 .byte 0xf3,0xc3 2160.cfi_endproc 2161.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2162 2163.type mulx4x_internal,@function 2164.align 32 2165mulx4x_internal: 2166.cfi_startproc 2167 movq %r9,8(%rsp) 2168 movq %r9,%r10 2169 negq %r9 2170 shlq $5,%r9 2171 negq %r10 2172 leaq 128(%rdx,%r9,1),%r13 2173 shrq $5+5,%r9 2174 movd 8(%rax),%xmm5 2175 subq $1,%r9 2176 leaq .Linc(%rip),%rax 2177 movq %r13,16+8(%rsp) 2178 movq %r9,24+8(%rsp) 2179 movq %rdi,56+8(%rsp) 2180 movdqa 0(%rax),%xmm0 2181 movdqa 16(%rax),%xmm1 2182 leaq 88-112(%rsp,%r10,1),%r10 2183 leaq 128(%rdx),%rdi 2184 2185 pshufd $0,%xmm5,%xmm5 2186 movdqa %xmm1,%xmm4 2187.byte 0x67 2188 movdqa %xmm1,%xmm2 2189.byte 0x67 2190 paddd %xmm0,%xmm1 2191 pcmpeqd %xmm5,%xmm0 2192 movdqa %xmm4,%xmm3 2193 paddd %xmm1,%xmm2 2194 pcmpeqd %xmm5,%xmm1 2195 movdqa %xmm0,112(%r10) 2196 movdqa %xmm4,%xmm0 2197 2198 paddd %xmm2,%xmm3 2199 pcmpeqd %xmm5,%xmm2 2200 movdqa %xmm1,128(%r10) 2201 movdqa %xmm4,%xmm1 2202 2203 paddd %xmm3,%xmm0 2204 pcmpeqd %xmm5,%xmm3 2205 movdqa %xmm2,144(%r10) 2206 movdqa %xmm4,%xmm2 2207 2208 paddd %xmm0,%xmm1 2209 pcmpeqd %xmm5,%xmm0 2210 movdqa %xmm3,160(%r10) 2211 movdqa %xmm4,%xmm3 2212 paddd %xmm1,%xmm2 2213 pcmpeqd %xmm5,%xmm1 2214 movdqa %xmm0,176(%r10) 2215 movdqa %xmm4,%xmm0 2216 2217 paddd %xmm2,%xmm3 2218 pcmpeqd %xmm5,%xmm2 2219 movdqa %xmm1,192(%r10) 2220 movdqa %xmm4,%xmm1 2221 2222 paddd %xmm3,%xmm0 2223 pcmpeqd %xmm5,%xmm3 2224 movdqa %xmm2,208(%r10) 2225 movdqa %xmm4,%xmm2 2226 2227 paddd %xmm0,%xmm1 2228 pcmpeqd %xmm5,%xmm0 2229 movdqa %xmm3,224(%r10) 2230 movdqa %xmm4,%xmm3 2231 paddd %xmm1,%xmm2 2232 pcmpeqd %xmm5,%xmm1 2233 movdqa %xmm0,240(%r10) 2234 movdqa %xmm4,%xmm0 2235 2236 paddd %xmm2,%xmm3 2237 pcmpeqd %xmm5,%xmm2 2238 movdqa %xmm1,256(%r10) 2239 movdqa %xmm4,%xmm1 2240 2241 paddd %xmm3,%xmm0 2242 pcmpeqd %xmm5,%xmm3 2243 movdqa %xmm2,272(%r10) 2244 movdqa %xmm4,%xmm2 2245 2246 paddd %xmm0,%xmm1 2247 pcmpeqd %xmm5,%xmm0 2248 movdqa %xmm3,288(%r10) 2249 movdqa %xmm4,%xmm3 2250.byte 0x67 2251 paddd %xmm1,%xmm2 2252 pcmpeqd %xmm5,%xmm1 2253 movdqa %xmm0,304(%r10) 2254 2255 paddd %xmm2,%xmm3 2256 pcmpeqd %xmm5,%xmm2 2257 movdqa %xmm1,320(%r10) 2258 2259 pcmpeqd %xmm5,%xmm3 2260 movdqa %xmm2,336(%r10) 2261 2262 pand 64(%rdi),%xmm0 2263 pand 80(%rdi),%xmm1 2264 pand 96(%rdi),%xmm2 2265 movdqa %xmm3,352(%r10) 2266 pand 112(%rdi),%xmm3 2267 por %xmm2,%xmm0 2268 por %xmm3,%xmm1 2269 movdqa -128(%rdi),%xmm4 2270 movdqa -112(%rdi),%xmm5 2271 movdqa -96(%rdi),%xmm2 2272 pand 112(%r10),%xmm4 2273 movdqa -80(%rdi),%xmm3 2274 pand 128(%r10),%xmm5 2275 por %xmm4,%xmm0 2276 pand 144(%r10),%xmm2 2277 por %xmm5,%xmm1 2278 pand 160(%r10),%xmm3 2279 por %xmm2,%xmm0 2280 por %xmm3,%xmm1 2281 movdqa -64(%rdi),%xmm4 2282 movdqa -48(%rdi),%xmm5 2283 movdqa -32(%rdi),%xmm2 2284 pand 176(%r10),%xmm4 2285 movdqa -16(%rdi),%xmm3 2286 pand 192(%r10),%xmm5 2287 por %xmm4,%xmm0 2288 pand 208(%r10),%xmm2 2289 por %xmm5,%xmm1 2290 pand 224(%r10),%xmm3 2291 por %xmm2,%xmm0 2292 por %xmm3,%xmm1 2293 movdqa 0(%rdi),%xmm4 2294 movdqa 16(%rdi),%xmm5 2295 movdqa 32(%rdi),%xmm2 2296 pand 240(%r10),%xmm4 2297 movdqa 48(%rdi),%xmm3 2298 pand 256(%r10),%xmm5 2299 por %xmm4,%xmm0 2300 pand 272(%r10),%xmm2 2301 por %xmm5,%xmm1 2302 pand 288(%r10),%xmm3 2303 por %xmm2,%xmm0 2304 por %xmm3,%xmm1 2305 pxor %xmm1,%xmm0 2306 pshufd $0x4e,%xmm0,%xmm1 2307 por %xmm1,%xmm0 2308 leaq 256(%rdi),%rdi 2309.byte 102,72,15,126,194 2310 leaq 64+32+8(%rsp),%rbx 2311 2312 movq %rdx,%r9 2313 mulxq 0(%rsi),%r8,%rax 2314 mulxq 8(%rsi),%r11,%r12 2315 addq %rax,%r11 2316 mulxq 16(%rsi),%rax,%r13 2317 adcq %rax,%r12 2318 adcq $0,%r13 2319 mulxq 24(%rsi),%rax,%r14 2320 2321 movq %r8,%r15 2322 imulq 32+8(%rsp),%r8 2323 xorq %rbp,%rbp 2324 movq %r8,%rdx 2325 2326 movq %rdi,8+8(%rsp) 2327 2328 leaq 32(%rsi),%rsi 2329 adcxq %rax,%r13 2330 adcxq %rbp,%r14 2331 2332 mulxq 0(%rcx),%rax,%r10 2333 adcxq %rax,%r15 2334 adoxq %r11,%r10 2335 mulxq 8(%rcx),%rax,%r11 2336 adcxq %rax,%r10 2337 adoxq %r12,%r11 2338 mulxq 16(%rcx),%rax,%r12 2339 movq 24+8(%rsp),%rdi 2340 movq %r10,-32(%rbx) 2341 adcxq %rax,%r11 2342 adoxq %r13,%r12 2343 mulxq 24(%rcx),%rax,%r15 2344 movq %r9,%rdx 2345 movq %r11,-24(%rbx) 2346 adcxq %rax,%r12 2347 adoxq %rbp,%r15 2348 leaq 32(%rcx),%rcx 2349 movq %r12,-16(%rbx) 2350 jmp .Lmulx4x_1st 2351 2352.align 32 2353.Lmulx4x_1st: 2354 adcxq %rbp,%r15 2355 mulxq 0(%rsi),%r10,%rax 2356 adcxq %r14,%r10 2357 mulxq 8(%rsi),%r11,%r14 2358 adcxq %rax,%r11 2359 mulxq 16(%rsi),%r12,%rax 2360 adcxq %r14,%r12 2361 mulxq 24(%rsi),%r13,%r14 2362.byte 0x67,0x67 2363 movq %r8,%rdx 2364 adcxq %rax,%r13 2365 adcxq %rbp,%r14 2366 leaq 32(%rsi),%rsi 2367 leaq 32(%rbx),%rbx 2368 2369 adoxq %r15,%r10 2370 mulxq 0(%rcx),%rax,%r15 2371 adcxq %rax,%r10 2372 adoxq %r15,%r11 2373 mulxq 8(%rcx),%rax,%r15 2374 adcxq %rax,%r11 2375 adoxq %r15,%r12 2376 mulxq 16(%rcx),%rax,%r15 2377 movq %r10,-40(%rbx) 2378 adcxq %rax,%r12 2379 movq %r11,-32(%rbx) 2380 adoxq %r15,%r13 2381 mulxq 24(%rcx),%rax,%r15 2382 movq %r9,%rdx 2383 movq %r12,-24(%rbx) 2384 adcxq %rax,%r13 2385 adoxq %rbp,%r15 2386 leaq 32(%rcx),%rcx 2387 movq %r13,-16(%rbx) 2388 2389 decq %rdi 2390 jnz .Lmulx4x_1st 2391 2392 movq 8(%rsp),%rax 2393 adcq %rbp,%r15 2394 leaq (%rsi,%rax,1),%rsi 2395 addq %r15,%r14 2396 movq 8+8(%rsp),%rdi 2397 adcq %rbp,%rbp 2398 movq %r14,-8(%rbx) 2399 jmp .Lmulx4x_outer 2400 2401.align 32 2402.Lmulx4x_outer: 2403 leaq 16-256(%rbx),%r10 2404 pxor %xmm4,%xmm4 2405.byte 0x67,0x67 2406 pxor %xmm5,%xmm5 2407 movdqa -128(%rdi),%xmm0 2408 movdqa -112(%rdi),%xmm1 2409 movdqa -96(%rdi),%xmm2 2410 pand 256(%r10),%xmm0 2411 movdqa -80(%rdi),%xmm3 2412 pand 272(%r10),%xmm1 2413 por %xmm0,%xmm4 2414 pand 288(%r10),%xmm2 2415 por %xmm1,%xmm5 2416 pand 304(%r10),%xmm3 2417 por %xmm2,%xmm4 2418 por %xmm3,%xmm5 2419 movdqa -64(%rdi),%xmm0 2420 movdqa -48(%rdi),%xmm1 2421 movdqa -32(%rdi),%xmm2 2422 pand 320(%r10),%xmm0 2423 movdqa -16(%rdi),%xmm3 2424 pand 336(%r10),%xmm1 2425 por %xmm0,%xmm4 2426 pand 352(%r10),%xmm2 2427 por %xmm1,%xmm5 2428 pand 368(%r10),%xmm3 2429 por %xmm2,%xmm4 2430 por %xmm3,%xmm5 2431 movdqa 0(%rdi),%xmm0 2432 movdqa 16(%rdi),%xmm1 2433 movdqa 32(%rdi),%xmm2 2434 pand 384(%r10),%xmm0 2435 movdqa 48(%rdi),%xmm3 2436 pand 400(%r10),%xmm1 2437 por %xmm0,%xmm4 2438 pand 416(%r10),%xmm2 2439 por %xmm1,%xmm5 2440 pand 432(%r10),%xmm3 2441 por %xmm2,%xmm4 2442 por %xmm3,%xmm5 2443 movdqa 64(%rdi),%xmm0 2444 movdqa 80(%rdi),%xmm1 2445 movdqa 96(%rdi),%xmm2 2446 pand 448(%r10),%xmm0 2447 movdqa 112(%rdi),%xmm3 2448 pand 464(%r10),%xmm1 2449 por %xmm0,%xmm4 2450 pand 480(%r10),%xmm2 2451 por %xmm1,%xmm5 2452 pand 496(%r10),%xmm3 2453 por %xmm2,%xmm4 2454 por %xmm3,%xmm5 2455 por %xmm5,%xmm4 2456 pshufd $0x4e,%xmm4,%xmm0 2457 por %xmm4,%xmm0 2458 leaq 256(%rdi),%rdi 2459.byte 102,72,15,126,194 2460 2461 movq %rbp,(%rbx) 2462 leaq 32(%rbx,%rax,1),%rbx 2463 mulxq 0(%rsi),%r8,%r11 2464 xorq %rbp,%rbp 2465 movq %rdx,%r9 2466 mulxq 8(%rsi),%r14,%r12 2467 adoxq -32(%rbx),%r8 2468 adcxq %r14,%r11 2469 mulxq 16(%rsi),%r15,%r13 2470 adoxq -24(%rbx),%r11 2471 adcxq %r15,%r12 2472 mulxq 24(%rsi),%rdx,%r14 2473 adoxq -16(%rbx),%r12 2474 adcxq %rdx,%r13 2475 leaq (%rcx,%rax,1),%rcx 2476 leaq 32(%rsi),%rsi 2477 adoxq -8(%rbx),%r13 2478 adcxq %rbp,%r14 2479 adoxq %rbp,%r14 2480 2481 movq %r8,%r15 2482 imulq 32+8(%rsp),%r8 2483 2484 movq %r8,%rdx 2485 xorq %rbp,%rbp 2486 movq %rdi,8+8(%rsp) 2487 2488 mulxq 0(%rcx),%rax,%r10 2489 adcxq %rax,%r15 2490 adoxq %r11,%r10 2491 mulxq 8(%rcx),%rax,%r11 2492 adcxq %rax,%r10 2493 adoxq %r12,%r11 2494 mulxq 16(%rcx),%rax,%r12 2495 adcxq %rax,%r11 2496 adoxq %r13,%r12 2497 mulxq 24(%rcx),%rax,%r15 2498 movq %r9,%rdx 2499 movq 24+8(%rsp),%rdi 2500 movq %r10,-32(%rbx) 2501 adcxq %rax,%r12 2502 movq %r11,-24(%rbx) 2503 adoxq %rbp,%r15 2504 movq %r12,-16(%rbx) 2505 leaq 32(%rcx),%rcx 2506 jmp .Lmulx4x_inner 2507 2508.align 32 2509.Lmulx4x_inner: 2510 mulxq 0(%rsi),%r10,%rax 2511 adcxq %rbp,%r15 2512 adoxq %r14,%r10 2513 mulxq 8(%rsi),%r11,%r14 2514 adcxq 0(%rbx),%r10 2515 adoxq %rax,%r11 2516 mulxq 16(%rsi),%r12,%rax 2517 adcxq 8(%rbx),%r11 2518 adoxq %r14,%r12 2519 mulxq 24(%rsi),%r13,%r14 2520 movq %r8,%rdx 2521 adcxq 16(%rbx),%r12 2522 adoxq %rax,%r13 2523 adcxq 24(%rbx),%r13 2524 adoxq %rbp,%r14 2525 leaq 32(%rsi),%rsi 2526 leaq 32(%rbx),%rbx 2527 adcxq %rbp,%r14 2528 2529 adoxq %r15,%r10 2530 mulxq 0(%rcx),%rax,%r15 2531 adcxq %rax,%r10 2532 adoxq %r15,%r11 2533 mulxq 8(%rcx),%rax,%r15 2534 adcxq %rax,%r11 2535 adoxq %r15,%r12 2536 mulxq 16(%rcx),%rax,%r15 2537 movq %r10,-40(%rbx) 2538 adcxq %rax,%r12 2539 adoxq %r15,%r13 2540 movq %r11,-32(%rbx) 2541 mulxq 24(%rcx),%rax,%r15 2542 movq %r9,%rdx 2543 leaq 32(%rcx),%rcx 2544 movq %r12,-24(%rbx) 2545 adcxq %rax,%r13 2546 adoxq %rbp,%r15 2547 movq %r13,-16(%rbx) 2548 2549 decq %rdi 2550 jnz .Lmulx4x_inner 2551 2552 movq 0+8(%rsp),%rax 2553 adcq %rbp,%r15 2554 subq 0(%rbx),%rdi 2555 movq 8+8(%rsp),%rdi 2556 movq 16+8(%rsp),%r10 2557 adcq %r15,%r14 2558 leaq (%rsi,%rax,1),%rsi 2559 adcq %rbp,%rbp 2560 movq %r14,-8(%rbx) 2561 2562 cmpq %r10,%rdi 2563 jb .Lmulx4x_outer 2564 2565 movq -8(%rcx),%r10 2566 movq %rbp,%r8 2567 movq (%rcx,%rax,1),%r12 2568 leaq (%rcx,%rax,1),%rbp 2569 movq %rax,%rcx 2570 leaq (%rbx,%rax,1),%rdi 2571 xorl %eax,%eax 2572 xorq %r15,%r15 2573 subq %r14,%r10 2574 adcq %r15,%r15 2575 orq %r15,%r8 2576 sarq $3+2,%rcx 2577 subq %r8,%rax 2578 movq 56+8(%rsp),%rdx 2579 decq %r12 2580 movq 8(%rbp),%r13 2581 xorq %r8,%r8 2582 movq 16(%rbp),%r14 2583 movq 24(%rbp),%r15 2584 jmp .Lsqrx4x_sub_entry 2585.cfi_endproc 2586.size mulx4x_internal,.-mulx4x_internal 2587.type bn_powerx5,@function 2588.align 32 2589bn_powerx5: 2590.cfi_startproc 2591 movq %rsp,%rax 2592.cfi_def_cfa_register %rax 2593.Lpowerx5_enter: 2594 pushq %rbx 2595.cfi_offset %rbx,-16 2596 pushq %rbp 2597.cfi_offset %rbp,-24 2598 pushq %r12 2599.cfi_offset %r12,-32 2600 pushq %r13 2601.cfi_offset %r13,-40 2602 pushq %r14 2603.cfi_offset %r14,-48 2604 pushq %r15 2605.cfi_offset %r15,-56 2606.Lpowerx5_prologue: 2607 2608 shll $3,%r9d 2609 leaq (%r9,%r9,2),%r10 2610 negq %r9 2611 movq (%r8),%r8 2612 2613 2614 2615 2616 2617 2618 2619 2620 leaq -320(%rsp,%r9,2),%r11 2621 movq %rsp,%rbp 2622 subq %rdi,%r11 2623 andq $4095,%r11 2624 cmpq %r11,%r10 2625 jb .Lpwrx_sp_alt 2626 subq %r11,%rbp 2627 leaq -320(%rbp,%r9,2),%rbp 2628 jmp .Lpwrx_sp_done 2629 2630.align 32 2631.Lpwrx_sp_alt: 2632 leaq 4096-320(,%r9,2),%r10 2633 leaq -320(%rbp,%r9,2),%rbp 2634 subq %r10,%r11 2635 movq $0,%r10 2636 cmovcq %r10,%r11 2637 subq %r11,%rbp 2638.Lpwrx_sp_done: 2639 andq $-64,%rbp 2640 movq %rsp,%r11 2641 subq %rbp,%r11 2642 andq $-4096,%r11 2643 leaq (%r11,%rbp,1),%rsp 2644 movq (%rsp),%r10 2645 cmpq %rbp,%rsp 2646 ja .Lpwrx_page_walk 2647 jmp .Lpwrx_page_walk_done 2648 2649.Lpwrx_page_walk: 2650 leaq -4096(%rsp),%rsp 2651 movq (%rsp),%r10 2652 cmpq %rbp,%rsp 2653 ja .Lpwrx_page_walk 2654.Lpwrx_page_walk_done: 2655 2656 movq %r9,%r10 2657 negq %r9 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 pxor %xmm0,%xmm0 2671.byte 102,72,15,110,207 2672.byte 102,72,15,110,209 2673.byte 102,73,15,110,218 2674.byte 102,72,15,110,226 2675 movq %r8,32(%rsp) 2676 movq %rax,40(%rsp) 2677.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2678.Lpowerx5_body: 2679 2680 call __bn_sqrx8x_internal 2681 call __bn_postx4x_internal 2682 call __bn_sqrx8x_internal 2683 call __bn_postx4x_internal 2684 call __bn_sqrx8x_internal 2685 call __bn_postx4x_internal 2686 call __bn_sqrx8x_internal 2687 call __bn_postx4x_internal 2688 call __bn_sqrx8x_internal 2689 call __bn_postx4x_internal 2690 2691 movq %r10,%r9 2692 movq %rsi,%rdi 2693.byte 102,72,15,126,209 2694.byte 102,72,15,126,226 2695 movq 40(%rsp),%rax 2696 2697 call mulx4x_internal 2698 2699 movq 40(%rsp),%rsi 2700.cfi_def_cfa %rsi,8 2701 movq $1,%rax 2702 2703 movq -48(%rsi),%r15 2704.cfi_restore %r15 2705 movq -40(%rsi),%r14 2706.cfi_restore %r14 2707 movq -32(%rsi),%r13 2708.cfi_restore %r13 2709 movq -24(%rsi),%r12 2710.cfi_restore %r12 2711 movq -16(%rsi),%rbp 2712.cfi_restore %rbp 2713 movq -8(%rsi),%rbx 2714.cfi_restore %rbx 2715 leaq (%rsi),%rsp 2716.cfi_def_cfa_register %rsp 2717.Lpowerx5_epilogue: 2718 .byte 0xf3,0xc3 2719.cfi_endproc 2720.size bn_powerx5,.-bn_powerx5 2721 2722.globl bn_sqrx8x_internal 2723.hidden bn_sqrx8x_internal 2724.type bn_sqrx8x_internal,@function 2725.align 32 2726bn_sqrx8x_internal: 2727__bn_sqrx8x_internal: 2728.cfi_startproc 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 leaq 48+8(%rsp),%rdi 2770 leaq (%rsi,%r9,1),%rbp 2771 movq %r9,0+8(%rsp) 2772 movq %rbp,8+8(%rsp) 2773 jmp .Lsqr8x_zero_start 2774 2775.align 32 2776.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2777.Lsqrx8x_zero: 2778.byte 0x3e 2779 movdqa %xmm0,0(%rdi) 2780 movdqa %xmm0,16(%rdi) 2781 movdqa %xmm0,32(%rdi) 2782 movdqa %xmm0,48(%rdi) 2783.Lsqr8x_zero_start: 2784 movdqa %xmm0,64(%rdi) 2785 movdqa %xmm0,80(%rdi) 2786 movdqa %xmm0,96(%rdi) 2787 movdqa %xmm0,112(%rdi) 2788 leaq 128(%rdi),%rdi 2789 subq $64,%r9 2790 jnz .Lsqrx8x_zero 2791 2792 movq 0(%rsi),%rdx 2793 2794 xorq %r10,%r10 2795 xorq %r11,%r11 2796 xorq %r12,%r12 2797 xorq %r13,%r13 2798 xorq %r14,%r14 2799 xorq %r15,%r15 2800 leaq 48+8(%rsp),%rdi 2801 xorq %rbp,%rbp 2802 jmp .Lsqrx8x_outer_loop 2803 2804.align 32 2805.Lsqrx8x_outer_loop: 2806 mulxq 8(%rsi),%r8,%rax 2807 adcxq %r9,%r8 2808 adoxq %rax,%r10 2809 mulxq 16(%rsi),%r9,%rax 2810 adcxq %r10,%r9 2811 adoxq %rax,%r11 2812.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2813 adcxq %r11,%r10 2814 adoxq %rax,%r12 2815.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2816 adcxq %r12,%r11 2817 adoxq %rax,%r13 2818 mulxq 40(%rsi),%r12,%rax 2819 adcxq %r13,%r12 2820 adoxq %rax,%r14 2821 mulxq 48(%rsi),%r13,%rax 2822 adcxq %r14,%r13 2823 adoxq %r15,%rax 2824 mulxq 56(%rsi),%r14,%r15 2825 movq 8(%rsi),%rdx 2826 adcxq %rax,%r14 2827 adoxq %rbp,%r15 2828 adcq 64(%rdi),%r15 2829 movq %r8,8(%rdi) 2830 movq %r9,16(%rdi) 2831 sbbq %rcx,%rcx 2832 xorq %rbp,%rbp 2833 2834 2835 mulxq 16(%rsi),%r8,%rbx 2836 mulxq 24(%rsi),%r9,%rax 2837 adcxq %r10,%r8 2838 adoxq %rbx,%r9 2839 mulxq 32(%rsi),%r10,%rbx 2840 adcxq %r11,%r9 2841 adoxq %rax,%r10 2842.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2843 adcxq %r12,%r10 2844 adoxq %rbx,%r11 2845.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2846 adcxq %r13,%r11 2847 adoxq %r14,%r12 2848.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2849 movq 16(%rsi),%rdx 2850 adcxq %rax,%r12 2851 adoxq %rbx,%r13 2852 adcxq %r15,%r13 2853 adoxq %rbp,%r14 2854 adcxq %rbp,%r14 2855 2856 movq %r8,24(%rdi) 2857 movq %r9,32(%rdi) 2858 2859 mulxq 24(%rsi),%r8,%rbx 2860 mulxq 32(%rsi),%r9,%rax 2861 adcxq %r10,%r8 2862 adoxq %rbx,%r9 2863 mulxq 40(%rsi),%r10,%rbx 2864 adcxq %r11,%r9 2865 adoxq %rax,%r10 2866.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2867 adcxq %r12,%r10 2868 adoxq %r13,%r11 2869.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2870.byte 0x3e 2871 movq 24(%rsi),%rdx 2872 adcxq %rbx,%r11 2873 adoxq %rax,%r12 2874 adcxq %r14,%r12 2875 movq %r8,40(%rdi) 2876 movq %r9,48(%rdi) 2877 mulxq 32(%rsi),%r8,%rax 2878 adoxq %rbp,%r13 2879 adcxq %rbp,%r13 2880 2881 mulxq 40(%rsi),%r9,%rbx 2882 adcxq %r10,%r8 2883 adoxq %rax,%r9 2884 mulxq 48(%rsi),%r10,%rax 2885 adcxq %r11,%r9 2886 adoxq %r12,%r10 2887 mulxq 56(%rsi),%r11,%r12 2888 movq 32(%rsi),%rdx 2889 movq 40(%rsi),%r14 2890 adcxq %rbx,%r10 2891 adoxq %rax,%r11 2892 movq 48(%rsi),%r15 2893 adcxq %r13,%r11 2894 adoxq %rbp,%r12 2895 adcxq %rbp,%r12 2896 2897 movq %r8,56(%rdi) 2898 movq %r9,64(%rdi) 2899 2900 mulxq %r14,%r9,%rax 2901 movq 56(%rsi),%r8 2902 adcxq %r10,%r9 2903 mulxq %r15,%r10,%rbx 2904 adoxq %rax,%r10 2905 adcxq %r11,%r10 2906 mulxq %r8,%r11,%rax 2907 movq %r14,%rdx 2908 adoxq %rbx,%r11 2909 adcxq %r12,%r11 2910 2911 adcxq %rbp,%rax 2912 2913 mulxq %r15,%r14,%rbx 2914 mulxq %r8,%r12,%r13 2915 movq %r15,%rdx 2916 leaq 64(%rsi),%rsi 2917 adcxq %r14,%r11 2918 adoxq %rbx,%r12 2919 adcxq %rax,%r12 2920 adoxq %rbp,%r13 2921 2922.byte 0x67,0x67 2923 mulxq %r8,%r8,%r14 2924 adcxq %r8,%r13 2925 adcxq %rbp,%r14 2926 2927 cmpq 8+8(%rsp),%rsi 2928 je .Lsqrx8x_outer_break 2929 2930 negq %rcx 2931 movq $-8,%rcx 2932 movq %rbp,%r15 2933 movq 64(%rdi),%r8 2934 adcxq 72(%rdi),%r9 2935 adcxq 80(%rdi),%r10 2936 adcxq 88(%rdi),%r11 2937 adcq 96(%rdi),%r12 2938 adcq 104(%rdi),%r13 2939 adcq 112(%rdi),%r14 2940 adcq 120(%rdi),%r15 2941 leaq (%rsi),%rbp 2942 leaq 128(%rdi),%rdi 2943 sbbq %rax,%rax 2944 2945 movq -64(%rsi),%rdx 2946 movq %rax,16+8(%rsp) 2947 movq %rdi,24+8(%rsp) 2948 2949 2950 xorl %eax,%eax 2951 jmp .Lsqrx8x_loop 2952 2953.align 32 2954.Lsqrx8x_loop: 2955 movq %r8,%rbx 2956 mulxq 0(%rbp),%rax,%r8 2957 adcxq %rax,%rbx 2958 adoxq %r9,%r8 2959 2960 mulxq 8(%rbp),%rax,%r9 2961 adcxq %rax,%r8 2962 adoxq %r10,%r9 2963 2964 mulxq 16(%rbp),%rax,%r10 2965 adcxq %rax,%r9 2966 adoxq %r11,%r10 2967 2968 mulxq 24(%rbp),%rax,%r11 2969 adcxq %rax,%r10 2970 adoxq %r12,%r11 2971 2972.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2973 adcxq %rax,%r11 2974 adoxq %r13,%r12 2975 2976 mulxq 40(%rbp),%rax,%r13 2977 adcxq %rax,%r12 2978 adoxq %r14,%r13 2979 2980 mulxq 48(%rbp),%rax,%r14 2981 movq %rbx,(%rdi,%rcx,8) 2982 movl $0,%ebx 2983 adcxq %rax,%r13 2984 adoxq %r15,%r14 2985 2986.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 2987 movq 8(%rsi,%rcx,8),%rdx 2988 adcxq %rax,%r14 2989 adoxq %rbx,%r15 2990 adcxq %rbx,%r15 2991 2992.byte 0x67 2993 incq %rcx 2994 jnz .Lsqrx8x_loop 2995 2996 leaq 64(%rbp),%rbp 2997 movq $-8,%rcx 2998 cmpq 8+8(%rsp),%rbp 2999 je .Lsqrx8x_break 3000 3001 subq 16+8(%rsp),%rbx 3002.byte 0x66 3003 movq -64(%rsi),%rdx 3004 adcxq 0(%rdi),%r8 3005 adcxq 8(%rdi),%r9 3006 adcq 16(%rdi),%r10 3007 adcq 24(%rdi),%r11 3008 adcq 32(%rdi),%r12 3009 adcq 40(%rdi),%r13 3010 adcq 48(%rdi),%r14 3011 adcq 56(%rdi),%r15 3012 leaq 64(%rdi),%rdi 3013.byte 0x67 3014 sbbq %rax,%rax 3015 xorl %ebx,%ebx 3016 movq %rax,16+8(%rsp) 3017 jmp .Lsqrx8x_loop 3018 3019.align 32 3020.Lsqrx8x_break: 3021 xorq %rbp,%rbp 3022 subq 16+8(%rsp),%rbx 3023 adcxq %rbp,%r8 3024 movq 24+8(%rsp),%rcx 3025 adcxq %rbp,%r9 3026 movq 0(%rsi),%rdx 3027 adcq $0,%r10 3028 movq %r8,0(%rdi) 3029 adcq $0,%r11 3030 adcq $0,%r12 3031 adcq $0,%r13 3032 adcq $0,%r14 3033 adcq $0,%r15 3034 cmpq %rcx,%rdi 3035 je .Lsqrx8x_outer_loop 3036 3037 movq %r9,8(%rdi) 3038 movq 8(%rcx),%r9 3039 movq %r10,16(%rdi) 3040 movq 16(%rcx),%r10 3041 movq %r11,24(%rdi) 3042 movq 24(%rcx),%r11 3043 movq %r12,32(%rdi) 3044 movq 32(%rcx),%r12 3045 movq %r13,40(%rdi) 3046 movq 40(%rcx),%r13 3047 movq %r14,48(%rdi) 3048 movq 48(%rcx),%r14 3049 movq %r15,56(%rdi) 3050 movq 56(%rcx),%r15 3051 movq %rcx,%rdi 3052 jmp .Lsqrx8x_outer_loop 3053 3054.align 32 3055.Lsqrx8x_outer_break: 3056 movq %r9,72(%rdi) 3057.byte 102,72,15,126,217 3058 movq %r10,80(%rdi) 3059 movq %r11,88(%rdi) 3060 movq %r12,96(%rdi) 3061 movq %r13,104(%rdi) 3062 movq %r14,112(%rdi) 3063 leaq 48+8(%rsp),%rdi 3064 movq (%rsi,%rcx,1),%rdx 3065 3066 movq 8(%rdi),%r11 3067 xorq %r10,%r10 3068 movq 0+8(%rsp),%r9 3069 adoxq %r11,%r11 3070 movq 16(%rdi),%r12 3071 movq 24(%rdi),%r13 3072 3073 3074.align 32 3075.Lsqrx4x_shift_n_add: 3076 mulxq %rdx,%rax,%rbx 3077 adoxq %r12,%r12 3078 adcxq %r10,%rax 3079.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3080.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3081 adoxq %r13,%r13 3082 adcxq %r11,%rbx 3083 movq 40(%rdi),%r11 3084 movq %rax,0(%rdi) 3085 movq %rbx,8(%rdi) 3086 3087 mulxq %rdx,%rax,%rbx 3088 adoxq %r10,%r10 3089 adcxq %r12,%rax 3090 movq 16(%rsi,%rcx,1),%rdx 3091 movq 48(%rdi),%r12 3092 adoxq %r11,%r11 3093 adcxq %r13,%rbx 3094 movq 56(%rdi),%r13 3095 movq %rax,16(%rdi) 3096 movq %rbx,24(%rdi) 3097 3098 mulxq %rdx,%rax,%rbx 3099 adoxq %r12,%r12 3100 adcxq %r10,%rax 3101 movq 24(%rsi,%rcx,1),%rdx 3102 leaq 32(%rcx),%rcx 3103 movq 64(%rdi),%r10 3104 adoxq %r13,%r13 3105 adcxq %r11,%rbx 3106 movq 72(%rdi),%r11 3107 movq %rax,32(%rdi) 3108 movq %rbx,40(%rdi) 3109 3110 mulxq %rdx,%rax,%rbx 3111 adoxq %r10,%r10 3112 adcxq %r12,%rax 3113 jrcxz .Lsqrx4x_shift_n_add_break 3114.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3115 adoxq %r11,%r11 3116 adcxq %r13,%rbx 3117 movq 80(%rdi),%r12 3118 movq 88(%rdi),%r13 3119 movq %rax,48(%rdi) 3120 movq %rbx,56(%rdi) 3121 leaq 64(%rdi),%rdi 3122 nop 3123 jmp .Lsqrx4x_shift_n_add 3124 3125.align 32 3126.Lsqrx4x_shift_n_add_break: 3127 adcxq %r13,%rbx 3128 movq %rax,48(%rdi) 3129 movq %rbx,56(%rdi) 3130 leaq 64(%rdi),%rdi 3131.byte 102,72,15,126,213 3132__bn_sqrx8x_reduction: 3133 xorl %eax,%eax 3134 movq 32+8(%rsp),%rbx 3135 movq 48+8(%rsp),%rdx 3136 leaq -64(%rbp,%r9,1),%rcx 3137 3138 movq %rcx,0+8(%rsp) 3139 movq %rdi,8+8(%rsp) 3140 3141 leaq 48+8(%rsp),%rdi 3142 jmp .Lsqrx8x_reduction_loop 3143 3144.align 32 3145.Lsqrx8x_reduction_loop: 3146 movq 8(%rdi),%r9 3147 movq 16(%rdi),%r10 3148 movq 24(%rdi),%r11 3149 movq 32(%rdi),%r12 3150 movq %rdx,%r8 3151 imulq %rbx,%rdx 3152 movq 40(%rdi),%r13 3153 movq 48(%rdi),%r14 3154 movq 56(%rdi),%r15 3155 movq %rax,24+8(%rsp) 3156 3157 leaq 64(%rdi),%rdi 3158 xorq %rsi,%rsi 3159 movq $-8,%rcx 3160 jmp .Lsqrx8x_reduce 3161 3162.align 32 3163.Lsqrx8x_reduce: 3164 movq %r8,%rbx 3165 mulxq 0(%rbp),%rax,%r8 3166 adcxq %rbx,%rax 3167 adoxq %r9,%r8 3168 3169 mulxq 8(%rbp),%rbx,%r9 3170 adcxq %rbx,%r8 3171 adoxq %r10,%r9 3172 3173 mulxq 16(%rbp),%rbx,%r10 3174 adcxq %rbx,%r9 3175 adoxq %r11,%r10 3176 3177 mulxq 24(%rbp),%rbx,%r11 3178 adcxq %rbx,%r10 3179 adoxq %r12,%r11 3180 3181.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3182 movq %rdx,%rax 3183 movq %r8,%rdx 3184 adcxq %rbx,%r11 3185 adoxq %r13,%r12 3186 3187 mulxq 32+8(%rsp),%rbx,%rdx 3188 movq %rax,%rdx 3189 movq %rax,64+48+8(%rsp,%rcx,8) 3190 3191 mulxq 40(%rbp),%rax,%r13 3192 adcxq %rax,%r12 3193 adoxq %r14,%r13 3194 3195 mulxq 48(%rbp),%rax,%r14 3196 adcxq %rax,%r13 3197 adoxq %r15,%r14 3198 3199 mulxq 56(%rbp),%rax,%r15 3200 movq %rbx,%rdx 3201 adcxq %rax,%r14 3202 adoxq %rsi,%r15 3203 adcxq %rsi,%r15 3204 3205.byte 0x67,0x67,0x67 3206 incq %rcx 3207 jnz .Lsqrx8x_reduce 3208 3209 movq %rsi,%rax 3210 cmpq 0+8(%rsp),%rbp 3211 jae .Lsqrx8x_no_tail 3212 3213 movq 48+8(%rsp),%rdx 3214 addq 0(%rdi),%r8 3215 leaq 64(%rbp),%rbp 3216 movq $-8,%rcx 3217 adcxq 8(%rdi),%r9 3218 adcxq 16(%rdi),%r10 3219 adcq 24(%rdi),%r11 3220 adcq 32(%rdi),%r12 3221 adcq 40(%rdi),%r13 3222 adcq 48(%rdi),%r14 3223 adcq 56(%rdi),%r15 3224 leaq 64(%rdi),%rdi 3225 sbbq %rax,%rax 3226 3227 xorq %rsi,%rsi 3228 movq %rax,16+8(%rsp) 3229 jmp .Lsqrx8x_tail 3230 3231.align 32 3232.Lsqrx8x_tail: 3233 movq %r8,%rbx 3234 mulxq 0(%rbp),%rax,%r8 3235 adcxq %rax,%rbx 3236 adoxq %r9,%r8 3237 3238 mulxq 8(%rbp),%rax,%r9 3239 adcxq %rax,%r8 3240 adoxq %r10,%r9 3241 3242 mulxq 16(%rbp),%rax,%r10 3243 adcxq %rax,%r9 3244 adoxq %r11,%r10 3245 3246 mulxq 24(%rbp),%rax,%r11 3247 adcxq %rax,%r10 3248 adoxq %r12,%r11 3249 3250.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3251 adcxq %rax,%r11 3252 adoxq %r13,%r12 3253 3254 mulxq 40(%rbp),%rax,%r13 3255 adcxq %rax,%r12 3256 adoxq %r14,%r13 3257 3258 mulxq 48(%rbp),%rax,%r14 3259 adcxq %rax,%r13 3260 adoxq %r15,%r14 3261 3262 mulxq 56(%rbp),%rax,%r15 3263 movq 72+48+8(%rsp,%rcx,8),%rdx 3264 adcxq %rax,%r14 3265 adoxq %rsi,%r15 3266 movq %rbx,(%rdi,%rcx,8) 3267 movq %r8,%rbx 3268 adcxq %rsi,%r15 3269 3270 incq %rcx 3271 jnz .Lsqrx8x_tail 3272 3273 cmpq 0+8(%rsp),%rbp 3274 jae .Lsqrx8x_tail_done 3275 3276 subq 16+8(%rsp),%rsi 3277 movq 48+8(%rsp),%rdx 3278 leaq 64(%rbp),%rbp 3279 adcq 0(%rdi),%r8 3280 adcq 8(%rdi),%r9 3281 adcq 16(%rdi),%r10 3282 adcq 24(%rdi),%r11 3283 adcq 32(%rdi),%r12 3284 adcq 40(%rdi),%r13 3285 adcq 48(%rdi),%r14 3286 adcq 56(%rdi),%r15 3287 leaq 64(%rdi),%rdi 3288 sbbq %rax,%rax 3289 subq $8,%rcx 3290 3291 xorq %rsi,%rsi 3292 movq %rax,16+8(%rsp) 3293 jmp .Lsqrx8x_tail 3294 3295.align 32 3296.Lsqrx8x_tail_done: 3297 xorq %rax,%rax 3298 addq 24+8(%rsp),%r8 3299 adcq $0,%r9 3300 adcq $0,%r10 3301 adcq $0,%r11 3302 adcq $0,%r12 3303 adcq $0,%r13 3304 adcq $0,%r14 3305 adcq $0,%r15 3306 adcq $0,%rax 3307 3308 subq 16+8(%rsp),%rsi 3309.Lsqrx8x_no_tail: 3310 adcq 0(%rdi),%r8 3311.byte 102,72,15,126,217 3312 adcq 8(%rdi),%r9 3313 movq 56(%rbp),%rsi 3314.byte 102,72,15,126,213 3315 adcq 16(%rdi),%r10 3316 adcq 24(%rdi),%r11 3317 adcq 32(%rdi),%r12 3318 adcq 40(%rdi),%r13 3319 adcq 48(%rdi),%r14 3320 adcq 56(%rdi),%r15 3321 adcq $0,%rax 3322 3323 movq 32+8(%rsp),%rbx 3324 movq 64(%rdi,%rcx,1),%rdx 3325 3326 movq %r8,0(%rdi) 3327 leaq 64(%rdi),%r8 3328 movq %r9,8(%rdi) 3329 movq %r10,16(%rdi) 3330 movq %r11,24(%rdi) 3331 movq %r12,32(%rdi) 3332 movq %r13,40(%rdi) 3333 movq %r14,48(%rdi) 3334 movq %r15,56(%rdi) 3335 3336 leaq 64(%rdi,%rcx,1),%rdi 3337 cmpq 8+8(%rsp),%r8 3338 jb .Lsqrx8x_reduction_loop 3339 .byte 0xf3,0xc3 3340.cfi_endproc 3341.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3342.align 32 3343__bn_postx4x_internal: 3344.cfi_startproc 3345 movq 0(%rbp),%r12 3346 movq %rcx,%r10 3347 movq %rcx,%r9 3348 negq %rax 3349 sarq $3+2,%rcx 3350 3351.byte 102,72,15,126,202 3352.byte 102,72,15,126,206 3353 decq %r12 3354 movq 8(%rbp),%r13 3355 xorq %r8,%r8 3356 movq 16(%rbp),%r14 3357 movq 24(%rbp),%r15 3358 jmp .Lsqrx4x_sub_entry 3359 3360.align 16 3361.Lsqrx4x_sub: 3362 movq 0(%rbp),%r12 3363 movq 8(%rbp),%r13 3364 movq 16(%rbp),%r14 3365 movq 24(%rbp),%r15 3366.Lsqrx4x_sub_entry: 3367 andnq %rax,%r12,%r12 3368 leaq 32(%rbp),%rbp 3369 andnq %rax,%r13,%r13 3370 andnq %rax,%r14,%r14 3371 andnq %rax,%r15,%r15 3372 3373 negq %r8 3374 adcq 0(%rdi),%r12 3375 adcq 8(%rdi),%r13 3376 adcq 16(%rdi),%r14 3377 adcq 24(%rdi),%r15 3378 movq %r12,0(%rdx) 3379 leaq 32(%rdi),%rdi 3380 movq %r13,8(%rdx) 3381 sbbq %r8,%r8 3382 movq %r14,16(%rdx) 3383 movq %r15,24(%rdx) 3384 leaq 32(%rdx),%rdx 3385 3386 incq %rcx 3387 jnz .Lsqrx4x_sub 3388 3389 negq %r9 3390 3391 .byte 0xf3,0xc3 3392.cfi_endproc 3393.size __bn_postx4x_internal,.-__bn_postx4x_internal 3394.globl bn_get_bits5 3395.type bn_get_bits5,@function 3396.align 16 3397bn_get_bits5: 3398.cfi_startproc 3399 leaq 0(%rdi),%r10 3400 leaq 1(%rdi),%r11 3401 movl %esi,%ecx 3402 shrl $4,%esi 3403 andl $15,%ecx 3404 leal -8(%rcx),%eax 3405 cmpl $11,%ecx 3406 cmovaq %r11,%r10 3407 cmoval %eax,%ecx 3408 movzwl (%r10,%rsi,2),%eax 3409 shrl %cl,%eax 3410 andl $31,%eax 3411 .byte 0xf3,0xc3 3412.cfi_endproc 3413.size bn_get_bits5,.-bn_get_bits5 3414 3415.globl bn_scatter5 3416.type bn_scatter5,@function 3417.align 16 3418bn_scatter5: 3419.cfi_startproc 3420 cmpl $0,%esi 3421 jz .Lscatter_epilogue 3422 leaq (%rdx,%rcx,8),%rdx 3423.Lscatter: 3424 movq (%rdi),%rax 3425 leaq 8(%rdi),%rdi 3426 movq %rax,(%rdx) 3427 leaq 256(%rdx),%rdx 3428 subl $1,%esi 3429 jnz .Lscatter 3430.Lscatter_epilogue: 3431 .byte 0xf3,0xc3 3432.cfi_endproc 3433.size bn_scatter5,.-bn_scatter5 3434 3435.globl bn_gather5 3436.type bn_gather5,@function 3437.align 32 3438bn_gather5: 3439.LSEH_begin_bn_gather5: 3440.cfi_startproc 3441 3442.byte 0x4c,0x8d,0x14,0x24 3443.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3444 leaq .Linc(%rip),%rax 3445 andq $-16,%rsp 3446 3447 movd %ecx,%xmm5 3448 movdqa 0(%rax),%xmm0 3449 movdqa 16(%rax),%xmm1 3450 leaq 128(%rdx),%r11 3451 leaq 128(%rsp),%rax 3452 3453 pshufd $0,%xmm5,%xmm5 3454 movdqa %xmm1,%xmm4 3455 movdqa %xmm1,%xmm2 3456 paddd %xmm0,%xmm1 3457 pcmpeqd %xmm5,%xmm0 3458 movdqa %xmm4,%xmm3 3459 3460 paddd %xmm1,%xmm2 3461 pcmpeqd %xmm5,%xmm1 3462 movdqa %xmm0,-128(%rax) 3463 movdqa %xmm4,%xmm0 3464 3465 paddd %xmm2,%xmm3 3466 pcmpeqd %xmm5,%xmm2 3467 movdqa %xmm1,-112(%rax) 3468 movdqa %xmm4,%xmm1 3469 3470 paddd %xmm3,%xmm0 3471 pcmpeqd %xmm5,%xmm3 3472 movdqa %xmm2,-96(%rax) 3473 movdqa %xmm4,%xmm2 3474 paddd %xmm0,%xmm1 3475 pcmpeqd %xmm5,%xmm0 3476 movdqa %xmm3,-80(%rax) 3477 movdqa %xmm4,%xmm3 3478 3479 paddd %xmm1,%xmm2 3480 pcmpeqd %xmm5,%xmm1 3481 movdqa %xmm0,-64(%rax) 3482 movdqa %xmm4,%xmm0 3483 3484 paddd %xmm2,%xmm3 3485 pcmpeqd %xmm5,%xmm2 3486 movdqa %xmm1,-48(%rax) 3487 movdqa %xmm4,%xmm1 3488 3489 paddd %xmm3,%xmm0 3490 pcmpeqd %xmm5,%xmm3 3491 movdqa %xmm2,-32(%rax) 3492 movdqa %xmm4,%xmm2 3493 paddd %xmm0,%xmm1 3494 pcmpeqd %xmm5,%xmm0 3495 movdqa %xmm3,-16(%rax) 3496 movdqa %xmm4,%xmm3 3497 3498 paddd %xmm1,%xmm2 3499 pcmpeqd %xmm5,%xmm1 3500 movdqa %xmm0,0(%rax) 3501 movdqa %xmm4,%xmm0 3502 3503 paddd %xmm2,%xmm3 3504 pcmpeqd %xmm5,%xmm2 3505 movdqa %xmm1,16(%rax) 3506 movdqa %xmm4,%xmm1 3507 3508 paddd %xmm3,%xmm0 3509 pcmpeqd %xmm5,%xmm3 3510 movdqa %xmm2,32(%rax) 3511 movdqa %xmm4,%xmm2 3512 paddd %xmm0,%xmm1 3513 pcmpeqd %xmm5,%xmm0 3514 movdqa %xmm3,48(%rax) 3515 movdqa %xmm4,%xmm3 3516 3517 paddd %xmm1,%xmm2 3518 pcmpeqd %xmm5,%xmm1 3519 movdqa %xmm0,64(%rax) 3520 movdqa %xmm4,%xmm0 3521 3522 paddd %xmm2,%xmm3 3523 pcmpeqd %xmm5,%xmm2 3524 movdqa %xmm1,80(%rax) 3525 movdqa %xmm4,%xmm1 3526 3527 paddd %xmm3,%xmm0 3528 pcmpeqd %xmm5,%xmm3 3529 movdqa %xmm2,96(%rax) 3530 movdqa %xmm4,%xmm2 3531 movdqa %xmm3,112(%rax) 3532 jmp .Lgather 3533 3534.align 32 3535.Lgather: 3536 pxor %xmm4,%xmm4 3537 pxor %xmm5,%xmm5 3538 movdqa -128(%r11),%xmm0 3539 movdqa -112(%r11),%xmm1 3540 movdqa -96(%r11),%xmm2 3541 pand -128(%rax),%xmm0 3542 movdqa -80(%r11),%xmm3 3543 pand -112(%rax),%xmm1 3544 por %xmm0,%xmm4 3545 pand -96(%rax),%xmm2 3546 por %xmm1,%xmm5 3547 pand -80(%rax),%xmm3 3548 por %xmm2,%xmm4 3549 por %xmm3,%xmm5 3550 movdqa -64(%r11),%xmm0 3551 movdqa -48(%r11),%xmm1 3552 movdqa -32(%r11),%xmm2 3553 pand -64(%rax),%xmm0 3554 movdqa -16(%r11),%xmm3 3555 pand -48(%rax),%xmm1 3556 por %xmm0,%xmm4 3557 pand -32(%rax),%xmm2 3558 por %xmm1,%xmm5 3559 pand -16(%rax),%xmm3 3560 por %xmm2,%xmm4 3561 por %xmm3,%xmm5 3562 movdqa 0(%r11),%xmm0 3563 movdqa 16(%r11),%xmm1 3564 movdqa 32(%r11),%xmm2 3565 pand 0(%rax),%xmm0 3566 movdqa 48(%r11),%xmm3 3567 pand 16(%rax),%xmm1 3568 por %xmm0,%xmm4 3569 pand 32(%rax),%xmm2 3570 por %xmm1,%xmm5 3571 pand 48(%rax),%xmm3 3572 por %xmm2,%xmm4 3573 por %xmm3,%xmm5 3574 movdqa 64(%r11),%xmm0 3575 movdqa 80(%r11),%xmm1 3576 movdqa 96(%r11),%xmm2 3577 pand 64(%rax),%xmm0 3578 movdqa 112(%r11),%xmm3 3579 pand 80(%rax),%xmm1 3580 por %xmm0,%xmm4 3581 pand 96(%rax),%xmm2 3582 por %xmm1,%xmm5 3583 pand 112(%rax),%xmm3 3584 por %xmm2,%xmm4 3585 por %xmm3,%xmm5 3586 por %xmm5,%xmm4 3587 leaq 256(%r11),%r11 3588 pshufd $0x4e,%xmm4,%xmm0 3589 por %xmm4,%xmm0 3590 movq %xmm0,(%rdi) 3591 leaq 8(%rdi),%rdi 3592 subl $1,%esi 3593 jnz .Lgather 3594 3595 leaq (%r10),%rsp 3596 .byte 0xf3,0xc3 3597.LSEH_end_bn_gather5: 3598.cfi_endproc 3599.size bn_gather5,.-bn_gather5 3600.align 64 3601.Linc: 3602.long 0,0, 1,1 3603.long 2,2, 2,2 3604.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3605