1/* $FreeBSD$ */ 2/* Do not modify. This file is auto-generated from x86_64-mont5.pl. */ 3.text 4 5 6 7.globl bn_mul_mont_gather5 8.type bn_mul_mont_gather5,@function 9.align 64 10bn_mul_mont_gather5: 11.cfi_startproc 12 movl %r9d,%r9d 13 movq %rsp,%rax 14.cfi_def_cfa_register %rax 15 testl $7,%r9d 16 jnz .Lmul_enter 17 movl OPENSSL_ia32cap_P+8(%rip),%r11d 18 jmp .Lmul4x_enter 19 20.align 16 21.Lmul_enter: 22 movd 8(%rsp),%xmm5 23 pushq %rbx 24.cfi_offset %rbx,-16 25 pushq %rbp 26.cfi_offset %rbp,-24 27 pushq %r12 28.cfi_offset %r12,-32 29 pushq %r13 30.cfi_offset %r13,-40 31 pushq %r14 32.cfi_offset %r14,-48 33 pushq %r15 34.cfi_offset %r15,-56 35 36 negq %r9 37 movq %rsp,%r11 38 leaq -280(%rsp,%r9,8),%r10 39 negq %r9 40 andq $-1024,%r10 41 42 43 44 45 46 47 48 49 50 subq %r10,%r11 51 andq $-4096,%r11 52 leaq (%r10,%r11,1),%rsp 53 movq (%rsp),%r11 54 cmpq %r10,%rsp 55 ja .Lmul_page_walk 56 jmp .Lmul_page_walk_done 57 58.Lmul_page_walk: 59 leaq -4096(%rsp),%rsp 60 movq (%rsp),%r11 61 cmpq %r10,%rsp 62 ja .Lmul_page_walk 63.Lmul_page_walk_done: 64 65 leaq .Linc(%rip),%r10 66 movq %rax,8(%rsp,%r9,8) 67.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 68.Lmul_body: 69 70 leaq 128(%rdx),%r12 71 movdqa 0(%r10),%xmm0 72 movdqa 16(%r10),%xmm1 73 leaq 24-112(%rsp,%r9,8),%r10 74 andq $-16,%r10 75 76 pshufd $0,%xmm5,%xmm5 77 movdqa %xmm1,%xmm4 78 movdqa %xmm1,%xmm2 79 paddd %xmm0,%xmm1 80 pcmpeqd %xmm5,%xmm0 81.byte 0x67 82 movdqa %xmm4,%xmm3 83 paddd %xmm1,%xmm2 84 pcmpeqd %xmm5,%xmm1 85 movdqa %xmm0,112(%r10) 86 movdqa %xmm4,%xmm0 87 88 paddd %xmm2,%xmm3 89 pcmpeqd %xmm5,%xmm2 90 movdqa %xmm1,128(%r10) 91 movdqa %xmm4,%xmm1 92 93 paddd %xmm3,%xmm0 94 pcmpeqd %xmm5,%xmm3 95 movdqa %xmm2,144(%r10) 96 movdqa %xmm4,%xmm2 97 98 paddd %xmm0,%xmm1 99 pcmpeqd %xmm5,%xmm0 100 movdqa %xmm3,160(%r10) 101 movdqa %xmm4,%xmm3 102 paddd %xmm1,%xmm2 103 pcmpeqd %xmm5,%xmm1 104 movdqa %xmm0,176(%r10) 105 movdqa %xmm4,%xmm0 106 107 paddd %xmm2,%xmm3 108 pcmpeqd %xmm5,%xmm2 109 movdqa %xmm1,192(%r10) 110 movdqa %xmm4,%xmm1 111 112 paddd %xmm3,%xmm0 113 pcmpeqd %xmm5,%xmm3 114 movdqa %xmm2,208(%r10) 115 movdqa %xmm4,%xmm2 116 117 paddd %xmm0,%xmm1 118 pcmpeqd %xmm5,%xmm0 119 movdqa %xmm3,224(%r10) 120 movdqa %xmm4,%xmm3 121 paddd %xmm1,%xmm2 122 pcmpeqd %xmm5,%xmm1 123 movdqa %xmm0,240(%r10) 124 movdqa %xmm4,%xmm0 125 126 paddd %xmm2,%xmm3 127 pcmpeqd %xmm5,%xmm2 128 movdqa %xmm1,256(%r10) 129 movdqa %xmm4,%xmm1 130 131 paddd %xmm3,%xmm0 132 pcmpeqd %xmm5,%xmm3 133 movdqa %xmm2,272(%r10) 134 movdqa %xmm4,%xmm2 135 136 paddd %xmm0,%xmm1 137 pcmpeqd %xmm5,%xmm0 138 movdqa %xmm3,288(%r10) 139 movdqa %xmm4,%xmm3 140 paddd %xmm1,%xmm2 141 pcmpeqd %xmm5,%xmm1 142 movdqa %xmm0,304(%r10) 143 144 paddd %xmm2,%xmm3 145.byte 0x67 146 pcmpeqd %xmm5,%xmm2 147 movdqa %xmm1,320(%r10) 148 149 pcmpeqd %xmm5,%xmm3 150 movdqa %xmm2,336(%r10) 151 pand 64(%r12),%xmm0 152 153 pand 80(%r12),%xmm1 154 pand 96(%r12),%xmm2 155 movdqa %xmm3,352(%r10) 156 pand 112(%r12),%xmm3 157 por %xmm2,%xmm0 158 por %xmm3,%xmm1 159 movdqa -128(%r12),%xmm4 160 movdqa -112(%r12),%xmm5 161 movdqa -96(%r12),%xmm2 162 pand 112(%r10),%xmm4 163 movdqa -80(%r12),%xmm3 164 pand 128(%r10),%xmm5 165 por %xmm4,%xmm0 166 pand 144(%r10),%xmm2 167 por %xmm5,%xmm1 168 pand 160(%r10),%xmm3 169 por %xmm2,%xmm0 170 por %xmm3,%xmm1 171 movdqa -64(%r12),%xmm4 172 movdqa -48(%r12),%xmm5 173 movdqa -32(%r12),%xmm2 174 pand 176(%r10),%xmm4 175 movdqa -16(%r12),%xmm3 176 pand 192(%r10),%xmm5 177 por %xmm4,%xmm0 178 pand 208(%r10),%xmm2 179 por %xmm5,%xmm1 180 pand 224(%r10),%xmm3 181 por %xmm2,%xmm0 182 por %xmm3,%xmm1 183 movdqa 0(%r12),%xmm4 184 movdqa 16(%r12),%xmm5 185 movdqa 32(%r12),%xmm2 186 pand 240(%r10),%xmm4 187 movdqa 48(%r12),%xmm3 188 pand 256(%r10),%xmm5 189 por %xmm4,%xmm0 190 pand 272(%r10),%xmm2 191 por %xmm5,%xmm1 192 pand 288(%r10),%xmm3 193 por %xmm2,%xmm0 194 por %xmm3,%xmm1 195 por %xmm1,%xmm0 196 pshufd $0x4e,%xmm0,%xmm1 197 por %xmm1,%xmm0 198 leaq 256(%r12),%r12 199.byte 102,72,15,126,195 200 201 movq (%r8),%r8 202 movq (%rsi),%rax 203 204 xorq %r14,%r14 205 xorq %r15,%r15 206 207 movq %r8,%rbp 208 mulq %rbx 209 movq %rax,%r10 210 movq (%rcx),%rax 211 212 imulq %r10,%rbp 213 movq %rdx,%r11 214 215 mulq %rbp 216 addq %rax,%r10 217 movq 8(%rsi),%rax 218 adcq $0,%rdx 219 movq %rdx,%r13 220 221 leaq 1(%r15),%r15 222 jmp .L1st_enter 223 224.align 16 225.L1st: 226 addq %rax,%r13 227 movq (%rsi,%r15,8),%rax 228 adcq $0,%rdx 229 addq %r11,%r13 230 movq %r10,%r11 231 adcq $0,%rdx 232 movq %r13,-16(%rsp,%r15,8) 233 movq %rdx,%r13 234 235.L1st_enter: 236 mulq %rbx 237 addq %rax,%r11 238 movq (%rcx,%r15,8),%rax 239 adcq $0,%rdx 240 leaq 1(%r15),%r15 241 movq %rdx,%r10 242 243 mulq %rbp 244 cmpq %r9,%r15 245 jne .L1st 246 247 248 addq %rax,%r13 249 adcq $0,%rdx 250 addq %r11,%r13 251 adcq $0,%rdx 252 movq %r13,-16(%rsp,%r9,8) 253 movq %rdx,%r13 254 movq %r10,%r11 255 256 xorq %rdx,%rdx 257 addq %r11,%r13 258 adcq $0,%rdx 259 movq %r13,-8(%rsp,%r9,8) 260 movq %rdx,(%rsp,%r9,8) 261 262 leaq 1(%r14),%r14 263 jmp .Louter 264.align 16 265.Louter: 266 leaq 24+128(%rsp,%r9,8),%rdx 267 andq $-16,%rdx 268 pxor %xmm4,%xmm4 269 pxor %xmm5,%xmm5 270 movdqa -128(%r12),%xmm0 271 movdqa -112(%r12),%xmm1 272 movdqa -96(%r12),%xmm2 273 movdqa -80(%r12),%xmm3 274 pand -128(%rdx),%xmm0 275 pand -112(%rdx),%xmm1 276 por %xmm0,%xmm4 277 pand -96(%rdx),%xmm2 278 por %xmm1,%xmm5 279 pand -80(%rdx),%xmm3 280 por %xmm2,%xmm4 281 por %xmm3,%xmm5 282 movdqa -64(%r12),%xmm0 283 movdqa -48(%r12),%xmm1 284 movdqa -32(%r12),%xmm2 285 movdqa -16(%r12),%xmm3 286 pand -64(%rdx),%xmm0 287 pand -48(%rdx),%xmm1 288 por %xmm0,%xmm4 289 pand -32(%rdx),%xmm2 290 por %xmm1,%xmm5 291 pand -16(%rdx),%xmm3 292 por %xmm2,%xmm4 293 por %xmm3,%xmm5 294 movdqa 0(%r12),%xmm0 295 movdqa 16(%r12),%xmm1 296 movdqa 32(%r12),%xmm2 297 movdqa 48(%r12),%xmm3 298 pand 0(%rdx),%xmm0 299 pand 16(%rdx),%xmm1 300 por %xmm0,%xmm4 301 pand 32(%rdx),%xmm2 302 por %xmm1,%xmm5 303 pand 48(%rdx),%xmm3 304 por %xmm2,%xmm4 305 por %xmm3,%xmm5 306 movdqa 64(%r12),%xmm0 307 movdqa 80(%r12),%xmm1 308 movdqa 96(%r12),%xmm2 309 movdqa 112(%r12),%xmm3 310 pand 64(%rdx),%xmm0 311 pand 80(%rdx),%xmm1 312 por %xmm0,%xmm4 313 pand 96(%rdx),%xmm2 314 por %xmm1,%xmm5 315 pand 112(%rdx),%xmm3 316 por %xmm2,%xmm4 317 por %xmm3,%xmm5 318 por %xmm5,%xmm4 319 pshufd $0x4e,%xmm4,%xmm0 320 por %xmm4,%xmm0 321 leaq 256(%r12),%r12 322 323 movq (%rsi),%rax 324.byte 102,72,15,126,195 325 326 xorq %r15,%r15 327 movq %r8,%rbp 328 movq (%rsp),%r10 329 330 mulq %rbx 331 addq %rax,%r10 332 movq (%rcx),%rax 333 adcq $0,%rdx 334 335 imulq %r10,%rbp 336 movq %rdx,%r11 337 338 mulq %rbp 339 addq %rax,%r10 340 movq 8(%rsi),%rax 341 adcq $0,%rdx 342 movq 8(%rsp),%r10 343 movq %rdx,%r13 344 345 leaq 1(%r15),%r15 346 jmp .Linner_enter 347 348.align 16 349.Linner: 350 addq %rax,%r13 351 movq (%rsi,%r15,8),%rax 352 adcq $0,%rdx 353 addq %r10,%r13 354 movq (%rsp,%r15,8),%r10 355 adcq $0,%rdx 356 movq %r13,-16(%rsp,%r15,8) 357 movq %rdx,%r13 358 359.Linner_enter: 360 mulq %rbx 361 addq %rax,%r11 362 movq (%rcx,%r15,8),%rax 363 adcq $0,%rdx 364 addq %r11,%r10 365 movq %rdx,%r11 366 adcq $0,%r11 367 leaq 1(%r15),%r15 368 369 mulq %rbp 370 cmpq %r9,%r15 371 jne .Linner 372 373 addq %rax,%r13 374 adcq $0,%rdx 375 addq %r10,%r13 376 movq (%rsp,%r9,8),%r10 377 adcq $0,%rdx 378 movq %r13,-16(%rsp,%r9,8) 379 movq %rdx,%r13 380 381 xorq %rdx,%rdx 382 addq %r11,%r13 383 adcq $0,%rdx 384 addq %r10,%r13 385 adcq $0,%rdx 386 movq %r13,-8(%rsp,%r9,8) 387 movq %rdx,(%rsp,%r9,8) 388 389 leaq 1(%r14),%r14 390 cmpq %r9,%r14 391 jb .Louter 392 393 xorq %r14,%r14 394 movq (%rsp),%rax 395 leaq (%rsp),%rsi 396 movq %r9,%r15 397 jmp .Lsub 398.align 16 399.Lsub: sbbq (%rcx,%r14,8),%rax 400 movq %rax,(%rdi,%r14,8) 401 movq 8(%rsi,%r14,8),%rax 402 leaq 1(%r14),%r14 403 decq %r15 404 jnz .Lsub 405 406 sbbq $0,%rax 407 movq $-1,%rbx 408 xorq %rax,%rbx 409 xorq %r14,%r14 410 movq %r9,%r15 411 412.Lcopy: 413 movq (%rdi,%r14,8),%rcx 414 movq (%rsp,%r14,8),%rdx 415 andq %rbx,%rcx 416 andq %rax,%rdx 417 movq %r14,(%rsp,%r14,8) 418 orq %rcx,%rdx 419 movq %rdx,(%rdi,%r14,8) 420 leaq 1(%r14),%r14 421 subq $1,%r15 422 jnz .Lcopy 423 424 movq 8(%rsp,%r9,8),%rsi 425.cfi_def_cfa %rsi,8 426 movq $1,%rax 427 428 movq -48(%rsi),%r15 429.cfi_restore %r15 430 movq -40(%rsi),%r14 431.cfi_restore %r14 432 movq -32(%rsi),%r13 433.cfi_restore %r13 434 movq -24(%rsi),%r12 435.cfi_restore %r12 436 movq -16(%rsi),%rbp 437.cfi_restore %rbp 438 movq -8(%rsi),%rbx 439.cfi_restore %rbx 440 leaq (%rsi),%rsp 441.cfi_def_cfa_register %rsp 442.Lmul_epilogue: 443 .byte 0xf3,0xc3 444.cfi_endproc 445.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 446.type bn_mul4x_mont_gather5,@function 447.align 32 448bn_mul4x_mont_gather5: 449.cfi_startproc 450.byte 0x67 451 movq %rsp,%rax 452.cfi_def_cfa_register %rax 453.Lmul4x_enter: 454 andl $0x80108,%r11d 455 cmpl $0x80108,%r11d 456 je .Lmulx4x_enter 457 pushq %rbx 458.cfi_offset %rbx,-16 459 pushq %rbp 460.cfi_offset %rbp,-24 461 pushq %r12 462.cfi_offset %r12,-32 463 pushq %r13 464.cfi_offset %r13,-40 465 pushq %r14 466.cfi_offset %r14,-48 467 pushq %r15 468.cfi_offset %r15,-56 469.Lmul4x_prologue: 470 471.byte 0x67 472 shll $3,%r9d 473 leaq (%r9,%r9,2),%r10 474 negq %r9 475 476 477 478 479 480 481 482 483 484 485 leaq -320(%rsp,%r9,2),%r11 486 movq %rsp,%rbp 487 subq %rdi,%r11 488 andq $4095,%r11 489 cmpq %r11,%r10 490 jb .Lmul4xsp_alt 491 subq %r11,%rbp 492 leaq -320(%rbp,%r9,2),%rbp 493 jmp .Lmul4xsp_done 494 495.align 32 496.Lmul4xsp_alt: 497 leaq 4096-320(,%r9,2),%r10 498 leaq -320(%rbp,%r9,2),%rbp 499 subq %r10,%r11 500 movq $0,%r10 501 cmovcq %r10,%r11 502 subq %r11,%rbp 503.Lmul4xsp_done: 504 andq $-64,%rbp 505 movq %rsp,%r11 506 subq %rbp,%r11 507 andq $-4096,%r11 508 leaq (%r11,%rbp,1),%rsp 509 movq (%rsp),%r10 510 cmpq %rbp,%rsp 511 ja .Lmul4x_page_walk 512 jmp .Lmul4x_page_walk_done 513 514.Lmul4x_page_walk: 515 leaq -4096(%rsp),%rsp 516 movq (%rsp),%r10 517 cmpq %rbp,%rsp 518 ja .Lmul4x_page_walk 519.Lmul4x_page_walk_done: 520 521 negq %r9 522 523 movq %rax,40(%rsp) 524.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 525.Lmul4x_body: 526 527 call mul4x_internal 528 529 movq 40(%rsp),%rsi 530.cfi_def_cfa %rsi,8 531 movq $1,%rax 532 533 movq -48(%rsi),%r15 534.cfi_restore %r15 535 movq -40(%rsi),%r14 536.cfi_restore %r14 537 movq -32(%rsi),%r13 538.cfi_restore %r13 539 movq -24(%rsi),%r12 540.cfi_restore %r12 541 movq -16(%rsi),%rbp 542.cfi_restore %rbp 543 movq -8(%rsi),%rbx 544.cfi_restore %rbx 545 leaq (%rsi),%rsp 546.cfi_def_cfa_register %rsp 547.Lmul4x_epilogue: 548 .byte 0xf3,0xc3 549.cfi_endproc 550.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 551 552.type mul4x_internal,@function 553.align 32 554mul4x_internal: 555.cfi_startproc 556 shlq $5,%r9 557 movd 8(%rax),%xmm5 558 leaq .Linc(%rip),%rax 559 leaq 128(%rdx,%r9,1),%r13 560 shrq $5,%r9 561 movdqa 0(%rax),%xmm0 562 movdqa 16(%rax),%xmm1 563 leaq 88-112(%rsp,%r9,1),%r10 564 leaq 128(%rdx),%r12 565 566 pshufd $0,%xmm5,%xmm5 567 movdqa %xmm1,%xmm4 568.byte 0x67,0x67 569 movdqa %xmm1,%xmm2 570 paddd %xmm0,%xmm1 571 pcmpeqd %xmm5,%xmm0 572.byte 0x67 573 movdqa %xmm4,%xmm3 574 paddd %xmm1,%xmm2 575 pcmpeqd %xmm5,%xmm1 576 movdqa %xmm0,112(%r10) 577 movdqa %xmm4,%xmm0 578 579 paddd %xmm2,%xmm3 580 pcmpeqd %xmm5,%xmm2 581 movdqa %xmm1,128(%r10) 582 movdqa %xmm4,%xmm1 583 584 paddd %xmm3,%xmm0 585 pcmpeqd %xmm5,%xmm3 586 movdqa %xmm2,144(%r10) 587 movdqa %xmm4,%xmm2 588 589 paddd %xmm0,%xmm1 590 pcmpeqd %xmm5,%xmm0 591 movdqa %xmm3,160(%r10) 592 movdqa %xmm4,%xmm3 593 paddd %xmm1,%xmm2 594 pcmpeqd %xmm5,%xmm1 595 movdqa %xmm0,176(%r10) 596 movdqa %xmm4,%xmm0 597 598 paddd %xmm2,%xmm3 599 pcmpeqd %xmm5,%xmm2 600 movdqa %xmm1,192(%r10) 601 movdqa %xmm4,%xmm1 602 603 paddd %xmm3,%xmm0 604 pcmpeqd %xmm5,%xmm3 605 movdqa %xmm2,208(%r10) 606 movdqa %xmm4,%xmm2 607 608 paddd %xmm0,%xmm1 609 pcmpeqd %xmm5,%xmm0 610 movdqa %xmm3,224(%r10) 611 movdqa %xmm4,%xmm3 612 paddd %xmm1,%xmm2 613 pcmpeqd %xmm5,%xmm1 614 movdqa %xmm0,240(%r10) 615 movdqa %xmm4,%xmm0 616 617 paddd %xmm2,%xmm3 618 pcmpeqd %xmm5,%xmm2 619 movdqa %xmm1,256(%r10) 620 movdqa %xmm4,%xmm1 621 622 paddd %xmm3,%xmm0 623 pcmpeqd %xmm5,%xmm3 624 movdqa %xmm2,272(%r10) 625 movdqa %xmm4,%xmm2 626 627 paddd %xmm0,%xmm1 628 pcmpeqd %xmm5,%xmm0 629 movdqa %xmm3,288(%r10) 630 movdqa %xmm4,%xmm3 631 paddd %xmm1,%xmm2 632 pcmpeqd %xmm5,%xmm1 633 movdqa %xmm0,304(%r10) 634 635 paddd %xmm2,%xmm3 636.byte 0x67 637 pcmpeqd %xmm5,%xmm2 638 movdqa %xmm1,320(%r10) 639 640 pcmpeqd %xmm5,%xmm3 641 movdqa %xmm2,336(%r10) 642 pand 64(%r12),%xmm0 643 644 pand 80(%r12),%xmm1 645 pand 96(%r12),%xmm2 646 movdqa %xmm3,352(%r10) 647 pand 112(%r12),%xmm3 648 por %xmm2,%xmm0 649 por %xmm3,%xmm1 650 movdqa -128(%r12),%xmm4 651 movdqa -112(%r12),%xmm5 652 movdqa -96(%r12),%xmm2 653 pand 112(%r10),%xmm4 654 movdqa -80(%r12),%xmm3 655 pand 128(%r10),%xmm5 656 por %xmm4,%xmm0 657 pand 144(%r10),%xmm2 658 por %xmm5,%xmm1 659 pand 160(%r10),%xmm3 660 por %xmm2,%xmm0 661 por %xmm3,%xmm1 662 movdqa -64(%r12),%xmm4 663 movdqa -48(%r12),%xmm5 664 movdqa -32(%r12),%xmm2 665 pand 176(%r10),%xmm4 666 movdqa -16(%r12),%xmm3 667 pand 192(%r10),%xmm5 668 por %xmm4,%xmm0 669 pand 208(%r10),%xmm2 670 por %xmm5,%xmm1 671 pand 224(%r10),%xmm3 672 por %xmm2,%xmm0 673 por %xmm3,%xmm1 674 movdqa 0(%r12),%xmm4 675 movdqa 16(%r12),%xmm5 676 movdqa 32(%r12),%xmm2 677 pand 240(%r10),%xmm4 678 movdqa 48(%r12),%xmm3 679 pand 256(%r10),%xmm5 680 por %xmm4,%xmm0 681 pand 272(%r10),%xmm2 682 por %xmm5,%xmm1 683 pand 288(%r10),%xmm3 684 por %xmm2,%xmm0 685 por %xmm3,%xmm1 686 por %xmm1,%xmm0 687 pshufd $0x4e,%xmm0,%xmm1 688 por %xmm1,%xmm0 689 leaq 256(%r12),%r12 690.byte 102,72,15,126,195 691 692 movq %r13,16+8(%rsp) 693 movq %rdi,56+8(%rsp) 694 695 movq (%r8),%r8 696 movq (%rsi),%rax 697 leaq (%rsi,%r9,1),%rsi 698 negq %r9 699 700 movq %r8,%rbp 701 mulq %rbx 702 movq %rax,%r10 703 movq (%rcx),%rax 704 705 imulq %r10,%rbp 706 leaq 64+8(%rsp),%r14 707 movq %rdx,%r11 708 709 mulq %rbp 710 addq %rax,%r10 711 movq 8(%rsi,%r9,1),%rax 712 adcq $0,%rdx 713 movq %rdx,%rdi 714 715 mulq %rbx 716 addq %rax,%r11 717 movq 8(%rcx),%rax 718 adcq $0,%rdx 719 movq %rdx,%r10 720 721 mulq %rbp 722 addq %rax,%rdi 723 movq 16(%rsi,%r9,1),%rax 724 adcq $0,%rdx 725 addq %r11,%rdi 726 leaq 32(%r9),%r15 727 leaq 32(%rcx),%rcx 728 adcq $0,%rdx 729 movq %rdi,(%r14) 730 movq %rdx,%r13 731 jmp .L1st4x 732 733.align 32 734.L1st4x: 735 mulq %rbx 736 addq %rax,%r10 737 movq -16(%rcx),%rax 738 leaq 32(%r14),%r14 739 adcq $0,%rdx 740 movq %rdx,%r11 741 742 mulq %rbp 743 addq %rax,%r13 744 movq -8(%rsi,%r15,1),%rax 745 adcq $0,%rdx 746 addq %r10,%r13 747 adcq $0,%rdx 748 movq %r13,-24(%r14) 749 movq %rdx,%rdi 750 751 mulq %rbx 752 addq %rax,%r11 753 movq -8(%rcx),%rax 754 adcq $0,%rdx 755 movq %rdx,%r10 756 757 mulq %rbp 758 addq %rax,%rdi 759 movq (%rsi,%r15,1),%rax 760 adcq $0,%rdx 761 addq %r11,%rdi 762 adcq $0,%rdx 763 movq %rdi,-16(%r14) 764 movq %rdx,%r13 765 766 mulq %rbx 767 addq %rax,%r10 768 movq 0(%rcx),%rax 769 adcq $0,%rdx 770 movq %rdx,%r11 771 772 mulq %rbp 773 addq %rax,%r13 774 movq 8(%rsi,%r15,1),%rax 775 adcq $0,%rdx 776 addq %r10,%r13 777 adcq $0,%rdx 778 movq %r13,-8(%r14) 779 movq %rdx,%rdi 780 781 mulq %rbx 782 addq %rax,%r11 783 movq 8(%rcx),%rax 784 adcq $0,%rdx 785 movq %rdx,%r10 786 787 mulq %rbp 788 addq %rax,%rdi 789 movq 16(%rsi,%r15,1),%rax 790 adcq $0,%rdx 791 addq %r11,%rdi 792 leaq 32(%rcx),%rcx 793 adcq $0,%rdx 794 movq %rdi,(%r14) 795 movq %rdx,%r13 796 797 addq $32,%r15 798 jnz .L1st4x 799 800 mulq %rbx 801 addq %rax,%r10 802 movq -16(%rcx),%rax 803 leaq 32(%r14),%r14 804 adcq $0,%rdx 805 movq %rdx,%r11 806 807 mulq %rbp 808 addq %rax,%r13 809 movq -8(%rsi),%rax 810 adcq $0,%rdx 811 addq %r10,%r13 812 adcq $0,%rdx 813 movq %r13,-24(%r14) 814 movq %rdx,%rdi 815 816 mulq %rbx 817 addq %rax,%r11 818 movq -8(%rcx),%rax 819 adcq $0,%rdx 820 movq %rdx,%r10 821 822 mulq %rbp 823 addq %rax,%rdi 824 movq (%rsi,%r9,1),%rax 825 adcq $0,%rdx 826 addq %r11,%rdi 827 adcq $0,%rdx 828 movq %rdi,-16(%r14) 829 movq %rdx,%r13 830 831 leaq (%rcx,%r9,1),%rcx 832 833 xorq %rdi,%rdi 834 addq %r10,%r13 835 adcq $0,%rdi 836 movq %r13,-8(%r14) 837 838 jmp .Louter4x 839 840.align 32 841.Louter4x: 842 leaq 16+128(%r14),%rdx 843 pxor %xmm4,%xmm4 844 pxor %xmm5,%xmm5 845 movdqa -128(%r12),%xmm0 846 movdqa -112(%r12),%xmm1 847 movdqa -96(%r12),%xmm2 848 movdqa -80(%r12),%xmm3 849 pand -128(%rdx),%xmm0 850 pand -112(%rdx),%xmm1 851 por %xmm0,%xmm4 852 pand -96(%rdx),%xmm2 853 por %xmm1,%xmm5 854 pand -80(%rdx),%xmm3 855 por %xmm2,%xmm4 856 por %xmm3,%xmm5 857 movdqa -64(%r12),%xmm0 858 movdqa -48(%r12),%xmm1 859 movdqa -32(%r12),%xmm2 860 movdqa -16(%r12),%xmm3 861 pand -64(%rdx),%xmm0 862 pand -48(%rdx),%xmm1 863 por %xmm0,%xmm4 864 pand -32(%rdx),%xmm2 865 por %xmm1,%xmm5 866 pand -16(%rdx),%xmm3 867 por %xmm2,%xmm4 868 por %xmm3,%xmm5 869 movdqa 0(%r12),%xmm0 870 movdqa 16(%r12),%xmm1 871 movdqa 32(%r12),%xmm2 872 movdqa 48(%r12),%xmm3 873 pand 0(%rdx),%xmm0 874 pand 16(%rdx),%xmm1 875 por %xmm0,%xmm4 876 pand 32(%rdx),%xmm2 877 por %xmm1,%xmm5 878 pand 48(%rdx),%xmm3 879 por %xmm2,%xmm4 880 por %xmm3,%xmm5 881 movdqa 64(%r12),%xmm0 882 movdqa 80(%r12),%xmm1 883 movdqa 96(%r12),%xmm2 884 movdqa 112(%r12),%xmm3 885 pand 64(%rdx),%xmm0 886 pand 80(%rdx),%xmm1 887 por %xmm0,%xmm4 888 pand 96(%rdx),%xmm2 889 por %xmm1,%xmm5 890 pand 112(%rdx),%xmm3 891 por %xmm2,%xmm4 892 por %xmm3,%xmm5 893 por %xmm5,%xmm4 894 pshufd $0x4e,%xmm4,%xmm0 895 por %xmm4,%xmm0 896 leaq 256(%r12),%r12 897.byte 102,72,15,126,195 898 899 movq (%r14,%r9,1),%r10 900 movq %r8,%rbp 901 mulq %rbx 902 addq %rax,%r10 903 movq (%rcx),%rax 904 adcq $0,%rdx 905 906 imulq %r10,%rbp 907 movq %rdx,%r11 908 movq %rdi,(%r14) 909 910 leaq (%r14,%r9,1),%r14 911 912 mulq %rbp 913 addq %rax,%r10 914 movq 8(%rsi,%r9,1),%rax 915 adcq $0,%rdx 916 movq %rdx,%rdi 917 918 mulq %rbx 919 addq %rax,%r11 920 movq 8(%rcx),%rax 921 adcq $0,%rdx 922 addq 8(%r14),%r11 923 adcq $0,%rdx 924 movq %rdx,%r10 925 926 mulq %rbp 927 addq %rax,%rdi 928 movq 16(%rsi,%r9,1),%rax 929 adcq $0,%rdx 930 addq %r11,%rdi 931 leaq 32(%r9),%r15 932 leaq 32(%rcx),%rcx 933 adcq $0,%rdx 934 movq %rdx,%r13 935 jmp .Linner4x 936 937.align 32 938.Linner4x: 939 mulq %rbx 940 addq %rax,%r10 941 movq -16(%rcx),%rax 942 adcq $0,%rdx 943 addq 16(%r14),%r10 944 leaq 32(%r14),%r14 945 adcq $0,%rdx 946 movq %rdx,%r11 947 948 mulq %rbp 949 addq %rax,%r13 950 movq -8(%rsi,%r15,1),%rax 951 adcq $0,%rdx 952 addq %r10,%r13 953 adcq $0,%rdx 954 movq %rdi,-32(%r14) 955 movq %rdx,%rdi 956 957 mulq %rbx 958 addq %rax,%r11 959 movq -8(%rcx),%rax 960 adcq $0,%rdx 961 addq -8(%r14),%r11 962 adcq $0,%rdx 963 movq %rdx,%r10 964 965 mulq %rbp 966 addq %rax,%rdi 967 movq (%rsi,%r15,1),%rax 968 adcq $0,%rdx 969 addq %r11,%rdi 970 adcq $0,%rdx 971 movq %r13,-24(%r14) 972 movq %rdx,%r13 973 974 mulq %rbx 975 addq %rax,%r10 976 movq 0(%rcx),%rax 977 adcq $0,%rdx 978 addq (%r14),%r10 979 adcq $0,%rdx 980 movq %rdx,%r11 981 982 mulq %rbp 983 addq %rax,%r13 984 movq 8(%rsi,%r15,1),%rax 985 adcq $0,%rdx 986 addq %r10,%r13 987 adcq $0,%rdx 988 movq %rdi,-16(%r14) 989 movq %rdx,%rdi 990 991 mulq %rbx 992 addq %rax,%r11 993 movq 8(%rcx),%rax 994 adcq $0,%rdx 995 addq 8(%r14),%r11 996 adcq $0,%rdx 997 movq %rdx,%r10 998 999 mulq %rbp 1000 addq %rax,%rdi 1001 movq 16(%rsi,%r15,1),%rax 1002 adcq $0,%rdx 1003 addq %r11,%rdi 1004 leaq 32(%rcx),%rcx 1005 adcq $0,%rdx 1006 movq %r13,-8(%r14) 1007 movq %rdx,%r13 1008 1009 addq $32,%r15 1010 jnz .Linner4x 1011 1012 mulq %rbx 1013 addq %rax,%r10 1014 movq -16(%rcx),%rax 1015 adcq $0,%rdx 1016 addq 16(%r14),%r10 1017 leaq 32(%r14),%r14 1018 adcq $0,%rdx 1019 movq %rdx,%r11 1020 1021 mulq %rbp 1022 addq %rax,%r13 1023 movq -8(%rsi),%rax 1024 adcq $0,%rdx 1025 addq %r10,%r13 1026 adcq $0,%rdx 1027 movq %rdi,-32(%r14) 1028 movq %rdx,%rdi 1029 1030 mulq %rbx 1031 addq %rax,%r11 1032 movq %rbp,%rax 1033 movq -8(%rcx),%rbp 1034 adcq $0,%rdx 1035 addq -8(%r14),%r11 1036 adcq $0,%rdx 1037 movq %rdx,%r10 1038 1039 mulq %rbp 1040 addq %rax,%rdi 1041 movq (%rsi,%r9,1),%rax 1042 adcq $0,%rdx 1043 addq %r11,%rdi 1044 adcq $0,%rdx 1045 movq %r13,-24(%r14) 1046 movq %rdx,%r13 1047 1048 movq %rdi,-16(%r14) 1049 leaq (%rcx,%r9,1),%rcx 1050 1051 xorq %rdi,%rdi 1052 addq %r10,%r13 1053 adcq $0,%rdi 1054 addq (%r14),%r13 1055 adcq $0,%rdi 1056 movq %r13,-8(%r14) 1057 1058 cmpq 16+8(%rsp),%r12 1059 jb .Louter4x 1060 xorq %rax,%rax 1061 subq %r13,%rbp 1062 adcq %r15,%r15 1063 orq %r15,%rdi 1064 subq %rdi,%rax 1065 leaq (%r14,%r9,1),%rbx 1066 movq (%rcx),%r12 1067 leaq (%rcx),%rbp 1068 movq %r9,%rcx 1069 sarq $3+2,%rcx 1070 movq 56+8(%rsp),%rdi 1071 decq %r12 1072 xorq %r10,%r10 1073 movq 8(%rbp),%r13 1074 movq 16(%rbp),%r14 1075 movq 24(%rbp),%r15 1076 jmp .Lsqr4x_sub_entry 1077.cfi_endproc 1078.size mul4x_internal,.-mul4x_internal 1079.globl bn_power5 1080.type bn_power5,@function 1081.align 32 1082bn_power5: 1083.cfi_startproc 1084 movq %rsp,%rax 1085.cfi_def_cfa_register %rax 1086 movl OPENSSL_ia32cap_P+8(%rip),%r11d 1087 andl $0x80108,%r11d 1088 cmpl $0x80108,%r11d 1089 je .Lpowerx5_enter 1090 pushq %rbx 1091.cfi_offset %rbx,-16 1092 pushq %rbp 1093.cfi_offset %rbp,-24 1094 pushq %r12 1095.cfi_offset %r12,-32 1096 pushq %r13 1097.cfi_offset %r13,-40 1098 pushq %r14 1099.cfi_offset %r14,-48 1100 pushq %r15 1101.cfi_offset %r15,-56 1102.Lpower5_prologue: 1103 1104 shll $3,%r9d 1105 leal (%r9,%r9,2),%r10d 1106 negq %r9 1107 movq (%r8),%r8 1108 1109 1110 1111 1112 1113 1114 1115 1116 leaq -320(%rsp,%r9,2),%r11 1117 movq %rsp,%rbp 1118 subq %rdi,%r11 1119 andq $4095,%r11 1120 cmpq %r11,%r10 1121 jb .Lpwr_sp_alt 1122 subq %r11,%rbp 1123 leaq -320(%rbp,%r9,2),%rbp 1124 jmp .Lpwr_sp_done 1125 1126.align 32 1127.Lpwr_sp_alt: 1128 leaq 4096-320(,%r9,2),%r10 1129 leaq -320(%rbp,%r9,2),%rbp 1130 subq %r10,%r11 1131 movq $0,%r10 1132 cmovcq %r10,%r11 1133 subq %r11,%rbp 1134.Lpwr_sp_done: 1135 andq $-64,%rbp 1136 movq %rsp,%r11 1137 subq %rbp,%r11 1138 andq $-4096,%r11 1139 leaq (%r11,%rbp,1),%rsp 1140 movq (%rsp),%r10 1141 cmpq %rbp,%rsp 1142 ja .Lpwr_page_walk 1143 jmp .Lpwr_page_walk_done 1144 1145.Lpwr_page_walk: 1146 leaq -4096(%rsp),%rsp 1147 movq (%rsp),%r10 1148 cmpq %rbp,%rsp 1149 ja .Lpwr_page_walk 1150.Lpwr_page_walk_done: 1151 1152 movq %r9,%r10 1153 negq %r9 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 movq %r8,32(%rsp) 1165 movq %rax,40(%rsp) 1166.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1167.Lpower5_body: 1168.byte 102,72,15,110,207 1169.byte 102,72,15,110,209 1170.byte 102,73,15,110,218 1171.byte 102,72,15,110,226 1172 1173 call __bn_sqr8x_internal 1174 call __bn_post4x_internal 1175 call __bn_sqr8x_internal 1176 call __bn_post4x_internal 1177 call __bn_sqr8x_internal 1178 call __bn_post4x_internal 1179 call __bn_sqr8x_internal 1180 call __bn_post4x_internal 1181 call __bn_sqr8x_internal 1182 call __bn_post4x_internal 1183 1184.byte 102,72,15,126,209 1185.byte 102,72,15,126,226 1186 movq %rsi,%rdi 1187 movq 40(%rsp),%rax 1188 leaq 32(%rsp),%r8 1189 1190 call mul4x_internal 1191 1192 movq 40(%rsp),%rsi 1193.cfi_def_cfa %rsi,8 1194 movq $1,%rax 1195 movq -48(%rsi),%r15 1196.cfi_restore %r15 1197 movq -40(%rsi),%r14 1198.cfi_restore %r14 1199 movq -32(%rsi),%r13 1200.cfi_restore %r13 1201 movq -24(%rsi),%r12 1202.cfi_restore %r12 1203 movq -16(%rsi),%rbp 1204.cfi_restore %rbp 1205 movq -8(%rsi),%rbx 1206.cfi_restore %rbx 1207 leaq (%rsi),%rsp 1208.cfi_def_cfa_register %rsp 1209.Lpower5_epilogue: 1210 .byte 0xf3,0xc3 1211.cfi_endproc 1212.size bn_power5,.-bn_power5 1213 1214.globl bn_sqr8x_internal 1215.hidden bn_sqr8x_internal 1216.type bn_sqr8x_internal,@function 1217.align 32 1218bn_sqr8x_internal: 1219__bn_sqr8x_internal: 1220.cfi_startproc 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 leaq 32(%r10),%rbp 1295 leaq (%rsi,%r9,1),%rsi 1296 1297 movq %r9,%rcx 1298 1299 1300 movq -32(%rsi,%rbp,1),%r14 1301 leaq 48+8(%rsp,%r9,2),%rdi 1302 movq -24(%rsi,%rbp,1),%rax 1303 leaq -32(%rdi,%rbp,1),%rdi 1304 movq -16(%rsi,%rbp,1),%rbx 1305 movq %rax,%r15 1306 1307 mulq %r14 1308 movq %rax,%r10 1309 movq %rbx,%rax 1310 movq %rdx,%r11 1311 movq %r10,-24(%rdi,%rbp,1) 1312 1313 mulq %r14 1314 addq %rax,%r11 1315 movq %rbx,%rax 1316 adcq $0,%rdx 1317 movq %r11,-16(%rdi,%rbp,1) 1318 movq %rdx,%r10 1319 1320 1321 movq -8(%rsi,%rbp,1),%rbx 1322 mulq %r15 1323 movq %rax,%r12 1324 movq %rbx,%rax 1325 movq %rdx,%r13 1326 1327 leaq (%rbp),%rcx 1328 mulq %r14 1329 addq %rax,%r10 1330 movq %rbx,%rax 1331 movq %rdx,%r11 1332 adcq $0,%r11 1333 addq %r12,%r10 1334 adcq $0,%r11 1335 movq %r10,-8(%rdi,%rcx,1) 1336 jmp .Lsqr4x_1st 1337 1338.align 32 1339.Lsqr4x_1st: 1340 movq (%rsi,%rcx,1),%rbx 1341 mulq %r15 1342 addq %rax,%r13 1343 movq %rbx,%rax 1344 movq %rdx,%r12 1345 adcq $0,%r12 1346 1347 mulq %r14 1348 addq %rax,%r11 1349 movq %rbx,%rax 1350 movq 8(%rsi,%rcx,1),%rbx 1351 movq %rdx,%r10 1352 adcq $0,%r10 1353 addq %r13,%r11 1354 adcq $0,%r10 1355 1356 1357 mulq %r15 1358 addq %rax,%r12 1359 movq %rbx,%rax 1360 movq %r11,(%rdi,%rcx,1) 1361 movq %rdx,%r13 1362 adcq $0,%r13 1363 1364 mulq %r14 1365 addq %rax,%r10 1366 movq %rbx,%rax 1367 movq 16(%rsi,%rcx,1),%rbx 1368 movq %rdx,%r11 1369 adcq $0,%r11 1370 addq %r12,%r10 1371 adcq $0,%r11 1372 1373 mulq %r15 1374 addq %rax,%r13 1375 movq %rbx,%rax 1376 movq %r10,8(%rdi,%rcx,1) 1377 movq %rdx,%r12 1378 adcq $0,%r12 1379 1380 mulq %r14 1381 addq %rax,%r11 1382 movq %rbx,%rax 1383 movq 24(%rsi,%rcx,1),%rbx 1384 movq %rdx,%r10 1385 adcq $0,%r10 1386 addq %r13,%r11 1387 adcq $0,%r10 1388 1389 1390 mulq %r15 1391 addq %rax,%r12 1392 movq %rbx,%rax 1393 movq %r11,16(%rdi,%rcx,1) 1394 movq %rdx,%r13 1395 adcq $0,%r13 1396 leaq 32(%rcx),%rcx 1397 1398 mulq %r14 1399 addq %rax,%r10 1400 movq %rbx,%rax 1401 movq %rdx,%r11 1402 adcq $0,%r11 1403 addq %r12,%r10 1404 adcq $0,%r11 1405 movq %r10,-8(%rdi,%rcx,1) 1406 1407 cmpq $0,%rcx 1408 jne .Lsqr4x_1st 1409 1410 mulq %r15 1411 addq %rax,%r13 1412 leaq 16(%rbp),%rbp 1413 adcq $0,%rdx 1414 addq %r11,%r13 1415 adcq $0,%rdx 1416 1417 movq %r13,(%rdi) 1418 movq %rdx,%r12 1419 movq %rdx,8(%rdi) 1420 jmp .Lsqr4x_outer 1421 1422.align 32 1423.Lsqr4x_outer: 1424 movq -32(%rsi,%rbp,1),%r14 1425 leaq 48+8(%rsp,%r9,2),%rdi 1426 movq -24(%rsi,%rbp,1),%rax 1427 leaq -32(%rdi,%rbp,1),%rdi 1428 movq -16(%rsi,%rbp,1),%rbx 1429 movq %rax,%r15 1430 1431 mulq %r14 1432 movq -24(%rdi,%rbp,1),%r10 1433 addq %rax,%r10 1434 movq %rbx,%rax 1435 adcq $0,%rdx 1436 movq %r10,-24(%rdi,%rbp,1) 1437 movq %rdx,%r11 1438 1439 mulq %r14 1440 addq %rax,%r11 1441 movq %rbx,%rax 1442 adcq $0,%rdx 1443 addq -16(%rdi,%rbp,1),%r11 1444 movq %rdx,%r10 1445 adcq $0,%r10 1446 movq %r11,-16(%rdi,%rbp,1) 1447 1448 xorq %r12,%r12 1449 1450 movq -8(%rsi,%rbp,1),%rbx 1451 mulq %r15 1452 addq %rax,%r12 1453 movq %rbx,%rax 1454 adcq $0,%rdx 1455 addq -8(%rdi,%rbp,1),%r12 1456 movq %rdx,%r13 1457 adcq $0,%r13 1458 1459 mulq %r14 1460 addq %rax,%r10 1461 movq %rbx,%rax 1462 adcq $0,%rdx 1463 addq %r12,%r10 1464 movq %rdx,%r11 1465 adcq $0,%r11 1466 movq %r10,-8(%rdi,%rbp,1) 1467 1468 leaq (%rbp),%rcx 1469 jmp .Lsqr4x_inner 1470 1471.align 32 1472.Lsqr4x_inner: 1473 movq (%rsi,%rcx,1),%rbx 1474 mulq %r15 1475 addq %rax,%r13 1476 movq %rbx,%rax 1477 movq %rdx,%r12 1478 adcq $0,%r12 1479 addq (%rdi,%rcx,1),%r13 1480 adcq $0,%r12 1481 1482.byte 0x67 1483 mulq %r14 1484 addq %rax,%r11 1485 movq %rbx,%rax 1486 movq 8(%rsi,%rcx,1),%rbx 1487 movq %rdx,%r10 1488 adcq $0,%r10 1489 addq %r13,%r11 1490 adcq $0,%r10 1491 1492 mulq %r15 1493 addq %rax,%r12 1494 movq %r11,(%rdi,%rcx,1) 1495 movq %rbx,%rax 1496 movq %rdx,%r13 1497 adcq $0,%r13 1498 addq 8(%rdi,%rcx,1),%r12 1499 leaq 16(%rcx),%rcx 1500 adcq $0,%r13 1501 1502 mulq %r14 1503 addq %rax,%r10 1504 movq %rbx,%rax 1505 adcq $0,%rdx 1506 addq %r12,%r10 1507 movq %rdx,%r11 1508 adcq $0,%r11 1509 movq %r10,-8(%rdi,%rcx,1) 1510 1511 cmpq $0,%rcx 1512 jne .Lsqr4x_inner 1513 1514.byte 0x67 1515 mulq %r15 1516 addq %rax,%r13 1517 adcq $0,%rdx 1518 addq %r11,%r13 1519 adcq $0,%rdx 1520 1521 movq %r13,(%rdi) 1522 movq %rdx,%r12 1523 movq %rdx,8(%rdi) 1524 1525 addq $16,%rbp 1526 jnz .Lsqr4x_outer 1527 1528 1529 movq -32(%rsi),%r14 1530 leaq 48+8(%rsp,%r9,2),%rdi 1531 movq -24(%rsi),%rax 1532 leaq -32(%rdi,%rbp,1),%rdi 1533 movq -16(%rsi),%rbx 1534 movq %rax,%r15 1535 1536 mulq %r14 1537 addq %rax,%r10 1538 movq %rbx,%rax 1539 movq %rdx,%r11 1540 adcq $0,%r11 1541 1542 mulq %r14 1543 addq %rax,%r11 1544 movq %rbx,%rax 1545 movq %r10,-24(%rdi) 1546 movq %rdx,%r10 1547 adcq $0,%r10 1548 addq %r13,%r11 1549 movq -8(%rsi),%rbx 1550 adcq $0,%r10 1551 1552 mulq %r15 1553 addq %rax,%r12 1554 movq %rbx,%rax 1555 movq %r11,-16(%rdi) 1556 movq %rdx,%r13 1557 adcq $0,%r13 1558 1559 mulq %r14 1560 addq %rax,%r10 1561 movq %rbx,%rax 1562 movq %rdx,%r11 1563 adcq $0,%r11 1564 addq %r12,%r10 1565 adcq $0,%r11 1566 movq %r10,-8(%rdi) 1567 1568 mulq %r15 1569 addq %rax,%r13 1570 movq -16(%rsi),%rax 1571 adcq $0,%rdx 1572 addq %r11,%r13 1573 adcq $0,%rdx 1574 1575 movq %r13,(%rdi) 1576 movq %rdx,%r12 1577 movq %rdx,8(%rdi) 1578 1579 mulq %rbx 1580 addq $16,%rbp 1581 xorq %r14,%r14 1582 subq %r9,%rbp 1583 xorq %r15,%r15 1584 1585 addq %r12,%rax 1586 adcq $0,%rdx 1587 movq %rax,8(%rdi) 1588 movq %rdx,16(%rdi) 1589 movq %r15,24(%rdi) 1590 1591 movq -16(%rsi,%rbp,1),%rax 1592 leaq 48+8(%rsp),%rdi 1593 xorq %r10,%r10 1594 movq 8(%rdi),%r11 1595 1596 leaq (%r14,%r10,2),%r12 1597 shrq $63,%r10 1598 leaq (%rcx,%r11,2),%r13 1599 shrq $63,%r11 1600 orq %r10,%r13 1601 movq 16(%rdi),%r10 1602 movq %r11,%r14 1603 mulq %rax 1604 negq %r15 1605 movq 24(%rdi),%r11 1606 adcq %rax,%r12 1607 movq -8(%rsi,%rbp,1),%rax 1608 movq %r12,(%rdi) 1609 adcq %rdx,%r13 1610 1611 leaq (%r14,%r10,2),%rbx 1612 movq %r13,8(%rdi) 1613 sbbq %r15,%r15 1614 shrq $63,%r10 1615 leaq (%rcx,%r11,2),%r8 1616 shrq $63,%r11 1617 orq %r10,%r8 1618 movq 32(%rdi),%r10 1619 movq %r11,%r14 1620 mulq %rax 1621 negq %r15 1622 movq 40(%rdi),%r11 1623 adcq %rax,%rbx 1624 movq 0(%rsi,%rbp,1),%rax 1625 movq %rbx,16(%rdi) 1626 adcq %rdx,%r8 1627 leaq 16(%rbp),%rbp 1628 movq %r8,24(%rdi) 1629 sbbq %r15,%r15 1630 leaq 64(%rdi),%rdi 1631 jmp .Lsqr4x_shift_n_add 1632 1633.align 32 1634.Lsqr4x_shift_n_add: 1635 leaq (%r14,%r10,2),%r12 1636 shrq $63,%r10 1637 leaq (%rcx,%r11,2),%r13 1638 shrq $63,%r11 1639 orq %r10,%r13 1640 movq -16(%rdi),%r10 1641 movq %r11,%r14 1642 mulq %rax 1643 negq %r15 1644 movq -8(%rdi),%r11 1645 adcq %rax,%r12 1646 movq -8(%rsi,%rbp,1),%rax 1647 movq %r12,-32(%rdi) 1648 adcq %rdx,%r13 1649 1650 leaq (%r14,%r10,2),%rbx 1651 movq %r13,-24(%rdi) 1652 sbbq %r15,%r15 1653 shrq $63,%r10 1654 leaq (%rcx,%r11,2),%r8 1655 shrq $63,%r11 1656 orq %r10,%r8 1657 movq 0(%rdi),%r10 1658 movq %r11,%r14 1659 mulq %rax 1660 negq %r15 1661 movq 8(%rdi),%r11 1662 adcq %rax,%rbx 1663 movq 0(%rsi,%rbp,1),%rax 1664 movq %rbx,-16(%rdi) 1665 adcq %rdx,%r8 1666 1667 leaq (%r14,%r10,2),%r12 1668 movq %r8,-8(%rdi) 1669 sbbq %r15,%r15 1670 shrq $63,%r10 1671 leaq (%rcx,%r11,2),%r13 1672 shrq $63,%r11 1673 orq %r10,%r13 1674 movq 16(%rdi),%r10 1675 movq %r11,%r14 1676 mulq %rax 1677 negq %r15 1678 movq 24(%rdi),%r11 1679 adcq %rax,%r12 1680 movq 8(%rsi,%rbp,1),%rax 1681 movq %r12,0(%rdi) 1682 adcq %rdx,%r13 1683 1684 leaq (%r14,%r10,2),%rbx 1685 movq %r13,8(%rdi) 1686 sbbq %r15,%r15 1687 shrq $63,%r10 1688 leaq (%rcx,%r11,2),%r8 1689 shrq $63,%r11 1690 orq %r10,%r8 1691 movq 32(%rdi),%r10 1692 movq %r11,%r14 1693 mulq %rax 1694 negq %r15 1695 movq 40(%rdi),%r11 1696 adcq %rax,%rbx 1697 movq 16(%rsi,%rbp,1),%rax 1698 movq %rbx,16(%rdi) 1699 adcq %rdx,%r8 1700 movq %r8,24(%rdi) 1701 sbbq %r15,%r15 1702 leaq 64(%rdi),%rdi 1703 addq $32,%rbp 1704 jnz .Lsqr4x_shift_n_add 1705 1706 leaq (%r14,%r10,2),%r12 1707.byte 0x67 1708 shrq $63,%r10 1709 leaq (%rcx,%r11,2),%r13 1710 shrq $63,%r11 1711 orq %r10,%r13 1712 movq -16(%rdi),%r10 1713 movq %r11,%r14 1714 mulq %rax 1715 negq %r15 1716 movq -8(%rdi),%r11 1717 adcq %rax,%r12 1718 movq -8(%rsi),%rax 1719 movq %r12,-32(%rdi) 1720 adcq %rdx,%r13 1721 1722 leaq (%r14,%r10,2),%rbx 1723 movq %r13,-24(%rdi) 1724 sbbq %r15,%r15 1725 shrq $63,%r10 1726 leaq (%rcx,%r11,2),%r8 1727 shrq $63,%r11 1728 orq %r10,%r8 1729 mulq %rax 1730 negq %r15 1731 adcq %rax,%rbx 1732 adcq %rdx,%r8 1733 movq %rbx,-16(%rdi) 1734 movq %r8,-8(%rdi) 1735.byte 102,72,15,126,213 1736__bn_sqr8x_reduction: 1737 xorq %rax,%rax 1738 leaq (%r9,%rbp,1),%rcx 1739 leaq 48+8(%rsp,%r9,2),%rdx 1740 movq %rcx,0+8(%rsp) 1741 leaq 48+8(%rsp,%r9,1),%rdi 1742 movq %rdx,8+8(%rsp) 1743 negq %r9 1744 jmp .L8x_reduction_loop 1745 1746.align 32 1747.L8x_reduction_loop: 1748 leaq (%rdi,%r9,1),%rdi 1749.byte 0x66 1750 movq 0(%rdi),%rbx 1751 movq 8(%rdi),%r9 1752 movq 16(%rdi),%r10 1753 movq 24(%rdi),%r11 1754 movq 32(%rdi),%r12 1755 movq 40(%rdi),%r13 1756 movq 48(%rdi),%r14 1757 movq 56(%rdi),%r15 1758 movq %rax,(%rdx) 1759 leaq 64(%rdi),%rdi 1760 1761.byte 0x67 1762 movq %rbx,%r8 1763 imulq 32+8(%rsp),%rbx 1764 movq 0(%rbp),%rax 1765 movl $8,%ecx 1766 jmp .L8x_reduce 1767 1768.align 32 1769.L8x_reduce: 1770 mulq %rbx 1771 movq 8(%rbp),%rax 1772 negq %r8 1773 movq %rdx,%r8 1774 adcq $0,%r8 1775 1776 mulq %rbx 1777 addq %rax,%r9 1778 movq 16(%rbp),%rax 1779 adcq $0,%rdx 1780 addq %r9,%r8 1781 movq %rbx,48-8+8(%rsp,%rcx,8) 1782 movq %rdx,%r9 1783 adcq $0,%r9 1784 1785 mulq %rbx 1786 addq %rax,%r10 1787 movq 24(%rbp),%rax 1788 adcq $0,%rdx 1789 addq %r10,%r9 1790 movq 32+8(%rsp),%rsi 1791 movq %rdx,%r10 1792 adcq $0,%r10 1793 1794 mulq %rbx 1795 addq %rax,%r11 1796 movq 32(%rbp),%rax 1797 adcq $0,%rdx 1798 imulq %r8,%rsi 1799 addq %r11,%r10 1800 movq %rdx,%r11 1801 adcq $0,%r11 1802 1803 mulq %rbx 1804 addq %rax,%r12 1805 movq 40(%rbp),%rax 1806 adcq $0,%rdx 1807 addq %r12,%r11 1808 movq %rdx,%r12 1809 adcq $0,%r12 1810 1811 mulq %rbx 1812 addq %rax,%r13 1813 movq 48(%rbp),%rax 1814 adcq $0,%rdx 1815 addq %r13,%r12 1816 movq %rdx,%r13 1817 adcq $0,%r13 1818 1819 mulq %rbx 1820 addq %rax,%r14 1821 movq 56(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r14,%r13 1824 movq %rdx,%r14 1825 adcq $0,%r14 1826 1827 mulq %rbx 1828 movq %rsi,%rbx 1829 addq %rax,%r15 1830 movq 0(%rbp),%rax 1831 adcq $0,%rdx 1832 addq %r15,%r14 1833 movq %rdx,%r15 1834 adcq $0,%r15 1835 1836 decl %ecx 1837 jnz .L8x_reduce 1838 1839 leaq 64(%rbp),%rbp 1840 xorq %rax,%rax 1841 movq 8+8(%rsp),%rdx 1842 cmpq 0+8(%rsp),%rbp 1843 jae .L8x_no_tail 1844 1845.byte 0x66 1846 addq 0(%rdi),%r8 1847 adcq 8(%rdi),%r9 1848 adcq 16(%rdi),%r10 1849 adcq 24(%rdi),%r11 1850 adcq 32(%rdi),%r12 1851 adcq 40(%rdi),%r13 1852 adcq 48(%rdi),%r14 1853 adcq 56(%rdi),%r15 1854 sbbq %rsi,%rsi 1855 1856 movq 48+56+8(%rsp),%rbx 1857 movl $8,%ecx 1858 movq 0(%rbp),%rax 1859 jmp .L8x_tail 1860 1861.align 32 1862.L8x_tail: 1863 mulq %rbx 1864 addq %rax,%r8 1865 movq 8(%rbp),%rax 1866 movq %r8,(%rdi) 1867 movq %rdx,%r8 1868 adcq $0,%r8 1869 1870 mulq %rbx 1871 addq %rax,%r9 1872 movq 16(%rbp),%rax 1873 adcq $0,%rdx 1874 addq %r9,%r8 1875 leaq 8(%rdi),%rdi 1876 movq %rdx,%r9 1877 adcq $0,%r9 1878 1879 mulq %rbx 1880 addq %rax,%r10 1881 movq 24(%rbp),%rax 1882 adcq $0,%rdx 1883 addq %r10,%r9 1884 movq %rdx,%r10 1885 adcq $0,%r10 1886 1887 mulq %rbx 1888 addq %rax,%r11 1889 movq 32(%rbp),%rax 1890 adcq $0,%rdx 1891 addq %r11,%r10 1892 movq %rdx,%r11 1893 adcq $0,%r11 1894 1895 mulq %rbx 1896 addq %rax,%r12 1897 movq 40(%rbp),%rax 1898 adcq $0,%rdx 1899 addq %r12,%r11 1900 movq %rdx,%r12 1901 adcq $0,%r12 1902 1903 mulq %rbx 1904 addq %rax,%r13 1905 movq 48(%rbp),%rax 1906 adcq $0,%rdx 1907 addq %r13,%r12 1908 movq %rdx,%r13 1909 adcq $0,%r13 1910 1911 mulq %rbx 1912 addq %rax,%r14 1913 movq 56(%rbp),%rax 1914 adcq $0,%rdx 1915 addq %r14,%r13 1916 movq %rdx,%r14 1917 adcq $0,%r14 1918 1919 mulq %rbx 1920 movq 48-16+8(%rsp,%rcx,8),%rbx 1921 addq %rax,%r15 1922 adcq $0,%rdx 1923 addq %r15,%r14 1924 movq 0(%rbp),%rax 1925 movq %rdx,%r15 1926 adcq $0,%r15 1927 1928 decl %ecx 1929 jnz .L8x_tail 1930 1931 leaq 64(%rbp),%rbp 1932 movq 8+8(%rsp),%rdx 1933 cmpq 0+8(%rsp),%rbp 1934 jae .L8x_tail_done 1935 1936 movq 48+56+8(%rsp),%rbx 1937 negq %rsi 1938 movq 0(%rbp),%rax 1939 adcq 0(%rdi),%r8 1940 adcq 8(%rdi),%r9 1941 adcq 16(%rdi),%r10 1942 adcq 24(%rdi),%r11 1943 adcq 32(%rdi),%r12 1944 adcq 40(%rdi),%r13 1945 adcq 48(%rdi),%r14 1946 adcq 56(%rdi),%r15 1947 sbbq %rsi,%rsi 1948 1949 movl $8,%ecx 1950 jmp .L8x_tail 1951 1952.align 32 1953.L8x_tail_done: 1954 xorq %rax,%rax 1955 addq (%rdx),%r8 1956 adcq $0,%r9 1957 adcq $0,%r10 1958 adcq $0,%r11 1959 adcq $0,%r12 1960 adcq $0,%r13 1961 adcq $0,%r14 1962 adcq $0,%r15 1963 adcq $0,%rax 1964 1965 negq %rsi 1966.L8x_no_tail: 1967 adcq 0(%rdi),%r8 1968 adcq 8(%rdi),%r9 1969 adcq 16(%rdi),%r10 1970 adcq 24(%rdi),%r11 1971 adcq 32(%rdi),%r12 1972 adcq 40(%rdi),%r13 1973 adcq 48(%rdi),%r14 1974 adcq 56(%rdi),%r15 1975 adcq $0,%rax 1976 movq -8(%rbp),%rcx 1977 xorq %rsi,%rsi 1978 1979.byte 102,72,15,126,213 1980 1981 movq %r8,0(%rdi) 1982 movq %r9,8(%rdi) 1983.byte 102,73,15,126,217 1984 movq %r10,16(%rdi) 1985 movq %r11,24(%rdi) 1986 movq %r12,32(%rdi) 1987 movq %r13,40(%rdi) 1988 movq %r14,48(%rdi) 1989 movq %r15,56(%rdi) 1990 leaq 64(%rdi),%rdi 1991 1992 cmpq %rdx,%rdi 1993 jb .L8x_reduction_loop 1994 .byte 0xf3,0xc3 1995.cfi_endproc 1996.size bn_sqr8x_internal,.-bn_sqr8x_internal 1997.type __bn_post4x_internal,@function 1998.align 32 1999__bn_post4x_internal: 2000.cfi_startproc 2001 movq 0(%rbp),%r12 2002 leaq (%rdi,%r9,1),%rbx 2003 movq %r9,%rcx 2004.byte 102,72,15,126,207 2005 negq %rax 2006.byte 102,72,15,126,206 2007 sarq $3+2,%rcx 2008 decq %r12 2009 xorq %r10,%r10 2010 movq 8(%rbp),%r13 2011 movq 16(%rbp),%r14 2012 movq 24(%rbp),%r15 2013 jmp .Lsqr4x_sub_entry 2014 2015.align 16 2016.Lsqr4x_sub: 2017 movq 0(%rbp),%r12 2018 movq 8(%rbp),%r13 2019 movq 16(%rbp),%r14 2020 movq 24(%rbp),%r15 2021.Lsqr4x_sub_entry: 2022 leaq 32(%rbp),%rbp 2023 notq %r12 2024 notq %r13 2025 notq %r14 2026 notq %r15 2027 andq %rax,%r12 2028 andq %rax,%r13 2029 andq %rax,%r14 2030 andq %rax,%r15 2031 2032 negq %r10 2033 adcq 0(%rbx),%r12 2034 adcq 8(%rbx),%r13 2035 adcq 16(%rbx),%r14 2036 adcq 24(%rbx),%r15 2037 movq %r12,0(%rdi) 2038 leaq 32(%rbx),%rbx 2039 movq %r13,8(%rdi) 2040 sbbq %r10,%r10 2041 movq %r14,16(%rdi) 2042 movq %r15,24(%rdi) 2043 leaq 32(%rdi),%rdi 2044 2045 incq %rcx 2046 jnz .Lsqr4x_sub 2047 2048 movq %r9,%r10 2049 negq %r9 2050 .byte 0xf3,0xc3 2051.cfi_endproc 2052.size __bn_post4x_internal,.-__bn_post4x_internal 2053.globl bn_from_montgomery 2054.type bn_from_montgomery,@function 2055.align 32 2056bn_from_montgomery: 2057.cfi_startproc 2058 testl $7,%r9d 2059 jz bn_from_mont8x 2060 xorl %eax,%eax 2061 .byte 0xf3,0xc3 2062.cfi_endproc 2063.size bn_from_montgomery,.-bn_from_montgomery 2064 2065.type bn_from_mont8x,@function 2066.align 32 2067bn_from_mont8x: 2068.cfi_startproc 2069.byte 0x67 2070 movq %rsp,%rax 2071.cfi_def_cfa_register %rax 2072 pushq %rbx 2073.cfi_offset %rbx,-16 2074 pushq %rbp 2075.cfi_offset %rbp,-24 2076 pushq %r12 2077.cfi_offset %r12,-32 2078 pushq %r13 2079.cfi_offset %r13,-40 2080 pushq %r14 2081.cfi_offset %r14,-48 2082 pushq %r15 2083.cfi_offset %r15,-56 2084.Lfrom_prologue: 2085 2086 shll $3,%r9d 2087 leaq (%r9,%r9,2),%r10 2088 negq %r9 2089 movq (%r8),%r8 2090 2091 2092 2093 2094 2095 2096 2097 2098 leaq -320(%rsp,%r9,2),%r11 2099 movq %rsp,%rbp 2100 subq %rdi,%r11 2101 andq $4095,%r11 2102 cmpq %r11,%r10 2103 jb .Lfrom_sp_alt 2104 subq %r11,%rbp 2105 leaq -320(%rbp,%r9,2),%rbp 2106 jmp .Lfrom_sp_done 2107 2108.align 32 2109.Lfrom_sp_alt: 2110 leaq 4096-320(,%r9,2),%r10 2111 leaq -320(%rbp,%r9,2),%rbp 2112 subq %r10,%r11 2113 movq $0,%r10 2114 cmovcq %r10,%r11 2115 subq %r11,%rbp 2116.Lfrom_sp_done: 2117 andq $-64,%rbp 2118 movq %rsp,%r11 2119 subq %rbp,%r11 2120 andq $-4096,%r11 2121 leaq (%r11,%rbp,1),%rsp 2122 movq (%rsp),%r10 2123 cmpq %rbp,%rsp 2124 ja .Lfrom_page_walk 2125 jmp .Lfrom_page_walk_done 2126 2127.Lfrom_page_walk: 2128 leaq -4096(%rsp),%rsp 2129 movq (%rsp),%r10 2130 cmpq %rbp,%rsp 2131 ja .Lfrom_page_walk 2132.Lfrom_page_walk_done: 2133 2134 movq %r9,%r10 2135 negq %r9 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 movq %r8,32(%rsp) 2147 movq %rax,40(%rsp) 2148.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2149.Lfrom_body: 2150 movq %r9,%r11 2151 leaq 48(%rsp),%rax 2152 pxor %xmm0,%xmm0 2153 jmp .Lmul_by_1 2154 2155.align 32 2156.Lmul_by_1: 2157 movdqu (%rsi),%xmm1 2158 movdqu 16(%rsi),%xmm2 2159 movdqu 32(%rsi),%xmm3 2160 movdqa %xmm0,(%rax,%r9,1) 2161 movdqu 48(%rsi),%xmm4 2162 movdqa %xmm0,16(%rax,%r9,1) 2163.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2164 movdqa %xmm1,(%rax) 2165 movdqa %xmm0,32(%rax,%r9,1) 2166 movdqa %xmm2,16(%rax) 2167 movdqa %xmm0,48(%rax,%r9,1) 2168 movdqa %xmm3,32(%rax) 2169 movdqa %xmm4,48(%rax) 2170 leaq 64(%rax),%rax 2171 subq $64,%r11 2172 jnz .Lmul_by_1 2173 2174.byte 102,72,15,110,207 2175.byte 102,72,15,110,209 2176.byte 0x67 2177 movq %rcx,%rbp 2178.byte 102,73,15,110,218 2179 movl OPENSSL_ia32cap_P+8(%rip),%r11d 2180 andl $0x80108,%r11d 2181 cmpl $0x80108,%r11d 2182 jne .Lfrom_mont_nox 2183 2184 leaq (%rax,%r9,1),%rdi 2185 call __bn_sqrx8x_reduction 2186 call __bn_postx4x_internal 2187 2188 pxor %xmm0,%xmm0 2189 leaq 48(%rsp),%rax 2190 jmp .Lfrom_mont_zero 2191 2192.align 32 2193.Lfrom_mont_nox: 2194 call __bn_sqr8x_reduction 2195 call __bn_post4x_internal 2196 2197 pxor %xmm0,%xmm0 2198 leaq 48(%rsp),%rax 2199 jmp .Lfrom_mont_zero 2200 2201.align 32 2202.Lfrom_mont_zero: 2203 movq 40(%rsp),%rsi 2204.cfi_def_cfa %rsi,8 2205 movdqa %xmm0,0(%rax) 2206 movdqa %xmm0,16(%rax) 2207 movdqa %xmm0,32(%rax) 2208 movdqa %xmm0,48(%rax) 2209 leaq 64(%rax),%rax 2210 subq $32,%r9 2211 jnz .Lfrom_mont_zero 2212 2213 movq $1,%rax 2214 movq -48(%rsi),%r15 2215.cfi_restore %r15 2216 movq -40(%rsi),%r14 2217.cfi_restore %r14 2218 movq -32(%rsi),%r13 2219.cfi_restore %r13 2220 movq -24(%rsi),%r12 2221.cfi_restore %r12 2222 movq -16(%rsi),%rbp 2223.cfi_restore %rbp 2224 movq -8(%rsi),%rbx 2225.cfi_restore %rbx 2226 leaq (%rsi),%rsp 2227.cfi_def_cfa_register %rsp 2228.Lfrom_epilogue: 2229 .byte 0xf3,0xc3 2230.cfi_endproc 2231.size bn_from_mont8x,.-bn_from_mont8x 2232.type bn_mulx4x_mont_gather5,@function 2233.align 32 2234bn_mulx4x_mont_gather5: 2235.cfi_startproc 2236 movq %rsp,%rax 2237.cfi_def_cfa_register %rax 2238.Lmulx4x_enter: 2239 pushq %rbx 2240.cfi_offset %rbx,-16 2241 pushq %rbp 2242.cfi_offset %rbp,-24 2243 pushq %r12 2244.cfi_offset %r12,-32 2245 pushq %r13 2246.cfi_offset %r13,-40 2247 pushq %r14 2248.cfi_offset %r14,-48 2249 pushq %r15 2250.cfi_offset %r15,-56 2251.Lmulx4x_prologue: 2252 2253 shll $3,%r9d 2254 leaq (%r9,%r9,2),%r10 2255 negq %r9 2256 movq (%r8),%r8 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 leaq -320(%rsp,%r9,2),%r11 2268 movq %rsp,%rbp 2269 subq %rdi,%r11 2270 andq $4095,%r11 2271 cmpq %r11,%r10 2272 jb .Lmulx4xsp_alt 2273 subq %r11,%rbp 2274 leaq -320(%rbp,%r9,2),%rbp 2275 jmp .Lmulx4xsp_done 2276 2277.Lmulx4xsp_alt: 2278 leaq 4096-320(,%r9,2),%r10 2279 leaq -320(%rbp,%r9,2),%rbp 2280 subq %r10,%r11 2281 movq $0,%r10 2282 cmovcq %r10,%r11 2283 subq %r11,%rbp 2284.Lmulx4xsp_done: 2285 andq $-64,%rbp 2286 movq %rsp,%r11 2287 subq %rbp,%r11 2288 andq $-4096,%r11 2289 leaq (%r11,%rbp,1),%rsp 2290 movq (%rsp),%r10 2291 cmpq %rbp,%rsp 2292 ja .Lmulx4x_page_walk 2293 jmp .Lmulx4x_page_walk_done 2294 2295.Lmulx4x_page_walk: 2296 leaq -4096(%rsp),%rsp 2297 movq (%rsp),%r10 2298 cmpq %rbp,%rsp 2299 ja .Lmulx4x_page_walk 2300.Lmulx4x_page_walk_done: 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 movq %r8,32(%rsp) 2315 movq %rax,40(%rsp) 2316.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2317.Lmulx4x_body: 2318 call mulx4x_internal 2319 2320 movq 40(%rsp),%rsi 2321.cfi_def_cfa %rsi,8 2322 movq $1,%rax 2323 2324 movq -48(%rsi),%r15 2325.cfi_restore %r15 2326 movq -40(%rsi),%r14 2327.cfi_restore %r14 2328 movq -32(%rsi),%r13 2329.cfi_restore %r13 2330 movq -24(%rsi),%r12 2331.cfi_restore %r12 2332 movq -16(%rsi),%rbp 2333.cfi_restore %rbp 2334 movq -8(%rsi),%rbx 2335.cfi_restore %rbx 2336 leaq (%rsi),%rsp 2337.cfi_def_cfa_register %rsp 2338.Lmulx4x_epilogue: 2339 .byte 0xf3,0xc3 2340.cfi_endproc 2341.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2342 2343.type mulx4x_internal,@function 2344.align 32 2345mulx4x_internal: 2346.cfi_startproc 2347 movq %r9,8(%rsp) 2348 movq %r9,%r10 2349 negq %r9 2350 shlq $5,%r9 2351 negq %r10 2352 leaq 128(%rdx,%r9,1),%r13 2353 shrq $5+5,%r9 2354 movd 8(%rax),%xmm5 2355 subq $1,%r9 2356 leaq .Linc(%rip),%rax 2357 movq %r13,16+8(%rsp) 2358 movq %r9,24+8(%rsp) 2359 movq %rdi,56+8(%rsp) 2360 movdqa 0(%rax),%xmm0 2361 movdqa 16(%rax),%xmm1 2362 leaq 88-112(%rsp,%r10,1),%r10 2363 leaq 128(%rdx),%rdi 2364 2365 pshufd $0,%xmm5,%xmm5 2366 movdqa %xmm1,%xmm4 2367.byte 0x67 2368 movdqa %xmm1,%xmm2 2369.byte 0x67 2370 paddd %xmm0,%xmm1 2371 pcmpeqd %xmm5,%xmm0 2372 movdqa %xmm4,%xmm3 2373 paddd %xmm1,%xmm2 2374 pcmpeqd %xmm5,%xmm1 2375 movdqa %xmm0,112(%r10) 2376 movdqa %xmm4,%xmm0 2377 2378 paddd %xmm2,%xmm3 2379 pcmpeqd %xmm5,%xmm2 2380 movdqa %xmm1,128(%r10) 2381 movdqa %xmm4,%xmm1 2382 2383 paddd %xmm3,%xmm0 2384 pcmpeqd %xmm5,%xmm3 2385 movdqa %xmm2,144(%r10) 2386 movdqa %xmm4,%xmm2 2387 2388 paddd %xmm0,%xmm1 2389 pcmpeqd %xmm5,%xmm0 2390 movdqa %xmm3,160(%r10) 2391 movdqa %xmm4,%xmm3 2392 paddd %xmm1,%xmm2 2393 pcmpeqd %xmm5,%xmm1 2394 movdqa %xmm0,176(%r10) 2395 movdqa %xmm4,%xmm0 2396 2397 paddd %xmm2,%xmm3 2398 pcmpeqd %xmm5,%xmm2 2399 movdqa %xmm1,192(%r10) 2400 movdqa %xmm4,%xmm1 2401 2402 paddd %xmm3,%xmm0 2403 pcmpeqd %xmm5,%xmm3 2404 movdqa %xmm2,208(%r10) 2405 movdqa %xmm4,%xmm2 2406 2407 paddd %xmm0,%xmm1 2408 pcmpeqd %xmm5,%xmm0 2409 movdqa %xmm3,224(%r10) 2410 movdqa %xmm4,%xmm3 2411 paddd %xmm1,%xmm2 2412 pcmpeqd %xmm5,%xmm1 2413 movdqa %xmm0,240(%r10) 2414 movdqa %xmm4,%xmm0 2415 2416 paddd %xmm2,%xmm3 2417 pcmpeqd %xmm5,%xmm2 2418 movdqa %xmm1,256(%r10) 2419 movdqa %xmm4,%xmm1 2420 2421 paddd %xmm3,%xmm0 2422 pcmpeqd %xmm5,%xmm3 2423 movdqa %xmm2,272(%r10) 2424 movdqa %xmm4,%xmm2 2425 2426 paddd %xmm0,%xmm1 2427 pcmpeqd %xmm5,%xmm0 2428 movdqa %xmm3,288(%r10) 2429 movdqa %xmm4,%xmm3 2430.byte 0x67 2431 paddd %xmm1,%xmm2 2432 pcmpeqd %xmm5,%xmm1 2433 movdqa %xmm0,304(%r10) 2434 2435 paddd %xmm2,%xmm3 2436 pcmpeqd %xmm5,%xmm2 2437 movdqa %xmm1,320(%r10) 2438 2439 pcmpeqd %xmm5,%xmm3 2440 movdqa %xmm2,336(%r10) 2441 2442 pand 64(%rdi),%xmm0 2443 pand 80(%rdi),%xmm1 2444 pand 96(%rdi),%xmm2 2445 movdqa %xmm3,352(%r10) 2446 pand 112(%rdi),%xmm3 2447 por %xmm2,%xmm0 2448 por %xmm3,%xmm1 2449 movdqa -128(%rdi),%xmm4 2450 movdqa -112(%rdi),%xmm5 2451 movdqa -96(%rdi),%xmm2 2452 pand 112(%r10),%xmm4 2453 movdqa -80(%rdi),%xmm3 2454 pand 128(%r10),%xmm5 2455 por %xmm4,%xmm0 2456 pand 144(%r10),%xmm2 2457 por %xmm5,%xmm1 2458 pand 160(%r10),%xmm3 2459 por %xmm2,%xmm0 2460 por %xmm3,%xmm1 2461 movdqa -64(%rdi),%xmm4 2462 movdqa -48(%rdi),%xmm5 2463 movdqa -32(%rdi),%xmm2 2464 pand 176(%r10),%xmm4 2465 movdqa -16(%rdi),%xmm3 2466 pand 192(%r10),%xmm5 2467 por %xmm4,%xmm0 2468 pand 208(%r10),%xmm2 2469 por %xmm5,%xmm1 2470 pand 224(%r10),%xmm3 2471 por %xmm2,%xmm0 2472 por %xmm3,%xmm1 2473 movdqa 0(%rdi),%xmm4 2474 movdqa 16(%rdi),%xmm5 2475 movdqa 32(%rdi),%xmm2 2476 pand 240(%r10),%xmm4 2477 movdqa 48(%rdi),%xmm3 2478 pand 256(%r10),%xmm5 2479 por %xmm4,%xmm0 2480 pand 272(%r10),%xmm2 2481 por %xmm5,%xmm1 2482 pand 288(%r10),%xmm3 2483 por %xmm2,%xmm0 2484 por %xmm3,%xmm1 2485 pxor %xmm1,%xmm0 2486 pshufd $0x4e,%xmm0,%xmm1 2487 por %xmm1,%xmm0 2488 leaq 256(%rdi),%rdi 2489.byte 102,72,15,126,194 2490 leaq 64+32+8(%rsp),%rbx 2491 2492 movq %rdx,%r9 2493 mulxq 0(%rsi),%r8,%rax 2494 mulxq 8(%rsi),%r11,%r12 2495 addq %rax,%r11 2496 mulxq 16(%rsi),%rax,%r13 2497 adcq %rax,%r12 2498 adcq $0,%r13 2499 mulxq 24(%rsi),%rax,%r14 2500 2501 movq %r8,%r15 2502 imulq 32+8(%rsp),%r8 2503 xorq %rbp,%rbp 2504 movq %r8,%rdx 2505 2506 movq %rdi,8+8(%rsp) 2507 2508 leaq 32(%rsi),%rsi 2509 adcxq %rax,%r13 2510 adcxq %rbp,%r14 2511 2512 mulxq 0(%rcx),%rax,%r10 2513 adcxq %rax,%r15 2514 adoxq %r11,%r10 2515 mulxq 8(%rcx),%rax,%r11 2516 adcxq %rax,%r10 2517 adoxq %r12,%r11 2518 mulxq 16(%rcx),%rax,%r12 2519 movq 24+8(%rsp),%rdi 2520 movq %r10,-32(%rbx) 2521 adcxq %rax,%r11 2522 adoxq %r13,%r12 2523 mulxq 24(%rcx),%rax,%r15 2524 movq %r9,%rdx 2525 movq %r11,-24(%rbx) 2526 adcxq %rax,%r12 2527 adoxq %rbp,%r15 2528 leaq 32(%rcx),%rcx 2529 movq %r12,-16(%rbx) 2530 jmp .Lmulx4x_1st 2531 2532.align 32 2533.Lmulx4x_1st: 2534 adcxq %rbp,%r15 2535 mulxq 0(%rsi),%r10,%rax 2536 adcxq %r14,%r10 2537 mulxq 8(%rsi),%r11,%r14 2538 adcxq %rax,%r11 2539 mulxq 16(%rsi),%r12,%rax 2540 adcxq %r14,%r12 2541 mulxq 24(%rsi),%r13,%r14 2542.byte 0x67,0x67 2543 movq %r8,%rdx 2544 adcxq %rax,%r13 2545 adcxq %rbp,%r14 2546 leaq 32(%rsi),%rsi 2547 leaq 32(%rbx),%rbx 2548 2549 adoxq %r15,%r10 2550 mulxq 0(%rcx),%rax,%r15 2551 adcxq %rax,%r10 2552 adoxq %r15,%r11 2553 mulxq 8(%rcx),%rax,%r15 2554 adcxq %rax,%r11 2555 adoxq %r15,%r12 2556 mulxq 16(%rcx),%rax,%r15 2557 movq %r10,-40(%rbx) 2558 adcxq %rax,%r12 2559 movq %r11,-32(%rbx) 2560 adoxq %r15,%r13 2561 mulxq 24(%rcx),%rax,%r15 2562 movq %r9,%rdx 2563 movq %r12,-24(%rbx) 2564 adcxq %rax,%r13 2565 adoxq %rbp,%r15 2566 leaq 32(%rcx),%rcx 2567 movq %r13,-16(%rbx) 2568 2569 decq %rdi 2570 jnz .Lmulx4x_1st 2571 2572 movq 8(%rsp),%rax 2573 adcq %rbp,%r15 2574 leaq (%rsi,%rax,1),%rsi 2575 addq %r15,%r14 2576 movq 8+8(%rsp),%rdi 2577 adcq %rbp,%rbp 2578 movq %r14,-8(%rbx) 2579 jmp .Lmulx4x_outer 2580 2581.align 32 2582.Lmulx4x_outer: 2583 leaq 16-256(%rbx),%r10 2584 pxor %xmm4,%xmm4 2585.byte 0x67,0x67 2586 pxor %xmm5,%xmm5 2587 movdqa -128(%rdi),%xmm0 2588 movdqa -112(%rdi),%xmm1 2589 movdqa -96(%rdi),%xmm2 2590 pand 256(%r10),%xmm0 2591 movdqa -80(%rdi),%xmm3 2592 pand 272(%r10),%xmm1 2593 por %xmm0,%xmm4 2594 pand 288(%r10),%xmm2 2595 por %xmm1,%xmm5 2596 pand 304(%r10),%xmm3 2597 por %xmm2,%xmm4 2598 por %xmm3,%xmm5 2599 movdqa -64(%rdi),%xmm0 2600 movdqa -48(%rdi),%xmm1 2601 movdqa -32(%rdi),%xmm2 2602 pand 320(%r10),%xmm0 2603 movdqa -16(%rdi),%xmm3 2604 pand 336(%r10),%xmm1 2605 por %xmm0,%xmm4 2606 pand 352(%r10),%xmm2 2607 por %xmm1,%xmm5 2608 pand 368(%r10),%xmm3 2609 por %xmm2,%xmm4 2610 por %xmm3,%xmm5 2611 movdqa 0(%rdi),%xmm0 2612 movdqa 16(%rdi),%xmm1 2613 movdqa 32(%rdi),%xmm2 2614 pand 384(%r10),%xmm0 2615 movdqa 48(%rdi),%xmm3 2616 pand 400(%r10),%xmm1 2617 por %xmm0,%xmm4 2618 pand 416(%r10),%xmm2 2619 por %xmm1,%xmm5 2620 pand 432(%r10),%xmm3 2621 por %xmm2,%xmm4 2622 por %xmm3,%xmm5 2623 movdqa 64(%rdi),%xmm0 2624 movdqa 80(%rdi),%xmm1 2625 movdqa 96(%rdi),%xmm2 2626 pand 448(%r10),%xmm0 2627 movdqa 112(%rdi),%xmm3 2628 pand 464(%r10),%xmm1 2629 por %xmm0,%xmm4 2630 pand 480(%r10),%xmm2 2631 por %xmm1,%xmm5 2632 pand 496(%r10),%xmm3 2633 por %xmm2,%xmm4 2634 por %xmm3,%xmm5 2635 por %xmm5,%xmm4 2636 pshufd $0x4e,%xmm4,%xmm0 2637 por %xmm4,%xmm0 2638 leaq 256(%rdi),%rdi 2639.byte 102,72,15,126,194 2640 2641 movq %rbp,(%rbx) 2642 leaq 32(%rbx,%rax,1),%rbx 2643 mulxq 0(%rsi),%r8,%r11 2644 xorq %rbp,%rbp 2645 movq %rdx,%r9 2646 mulxq 8(%rsi),%r14,%r12 2647 adoxq -32(%rbx),%r8 2648 adcxq %r14,%r11 2649 mulxq 16(%rsi),%r15,%r13 2650 adoxq -24(%rbx),%r11 2651 adcxq %r15,%r12 2652 mulxq 24(%rsi),%rdx,%r14 2653 adoxq -16(%rbx),%r12 2654 adcxq %rdx,%r13 2655 leaq (%rcx,%rax,1),%rcx 2656 leaq 32(%rsi),%rsi 2657 adoxq -8(%rbx),%r13 2658 adcxq %rbp,%r14 2659 adoxq %rbp,%r14 2660 2661 movq %r8,%r15 2662 imulq 32+8(%rsp),%r8 2663 2664 movq %r8,%rdx 2665 xorq %rbp,%rbp 2666 movq %rdi,8+8(%rsp) 2667 2668 mulxq 0(%rcx),%rax,%r10 2669 adcxq %rax,%r15 2670 adoxq %r11,%r10 2671 mulxq 8(%rcx),%rax,%r11 2672 adcxq %rax,%r10 2673 adoxq %r12,%r11 2674 mulxq 16(%rcx),%rax,%r12 2675 adcxq %rax,%r11 2676 adoxq %r13,%r12 2677 mulxq 24(%rcx),%rax,%r15 2678 movq %r9,%rdx 2679 movq 24+8(%rsp),%rdi 2680 movq %r10,-32(%rbx) 2681 adcxq %rax,%r12 2682 movq %r11,-24(%rbx) 2683 adoxq %rbp,%r15 2684 movq %r12,-16(%rbx) 2685 leaq 32(%rcx),%rcx 2686 jmp .Lmulx4x_inner 2687 2688.align 32 2689.Lmulx4x_inner: 2690 mulxq 0(%rsi),%r10,%rax 2691 adcxq %rbp,%r15 2692 adoxq %r14,%r10 2693 mulxq 8(%rsi),%r11,%r14 2694 adcxq 0(%rbx),%r10 2695 adoxq %rax,%r11 2696 mulxq 16(%rsi),%r12,%rax 2697 adcxq 8(%rbx),%r11 2698 adoxq %r14,%r12 2699 mulxq 24(%rsi),%r13,%r14 2700 movq %r8,%rdx 2701 adcxq 16(%rbx),%r12 2702 adoxq %rax,%r13 2703 adcxq 24(%rbx),%r13 2704 adoxq %rbp,%r14 2705 leaq 32(%rsi),%rsi 2706 leaq 32(%rbx),%rbx 2707 adcxq %rbp,%r14 2708 2709 adoxq %r15,%r10 2710 mulxq 0(%rcx),%rax,%r15 2711 adcxq %rax,%r10 2712 adoxq %r15,%r11 2713 mulxq 8(%rcx),%rax,%r15 2714 adcxq %rax,%r11 2715 adoxq %r15,%r12 2716 mulxq 16(%rcx),%rax,%r15 2717 movq %r10,-40(%rbx) 2718 adcxq %rax,%r12 2719 adoxq %r15,%r13 2720 movq %r11,-32(%rbx) 2721 mulxq 24(%rcx),%rax,%r15 2722 movq %r9,%rdx 2723 leaq 32(%rcx),%rcx 2724 movq %r12,-24(%rbx) 2725 adcxq %rax,%r13 2726 adoxq %rbp,%r15 2727 movq %r13,-16(%rbx) 2728 2729 decq %rdi 2730 jnz .Lmulx4x_inner 2731 2732 movq 0+8(%rsp),%rax 2733 adcq %rbp,%r15 2734 subq 0(%rbx),%rdi 2735 movq 8+8(%rsp),%rdi 2736 movq 16+8(%rsp),%r10 2737 adcq %r15,%r14 2738 leaq (%rsi,%rax,1),%rsi 2739 adcq %rbp,%rbp 2740 movq %r14,-8(%rbx) 2741 2742 cmpq %r10,%rdi 2743 jb .Lmulx4x_outer 2744 2745 movq -8(%rcx),%r10 2746 movq %rbp,%r8 2747 movq (%rcx,%rax,1),%r12 2748 leaq (%rcx,%rax,1),%rbp 2749 movq %rax,%rcx 2750 leaq (%rbx,%rax,1),%rdi 2751 xorl %eax,%eax 2752 xorq %r15,%r15 2753 subq %r14,%r10 2754 adcq %r15,%r15 2755 orq %r15,%r8 2756 sarq $3+2,%rcx 2757 subq %r8,%rax 2758 movq 56+8(%rsp),%rdx 2759 decq %r12 2760 movq 8(%rbp),%r13 2761 xorq %r8,%r8 2762 movq 16(%rbp),%r14 2763 movq 24(%rbp),%r15 2764 jmp .Lsqrx4x_sub_entry 2765.cfi_endproc 2766.size mulx4x_internal,.-mulx4x_internal 2767.type bn_powerx5,@function 2768.align 32 2769bn_powerx5: 2770.cfi_startproc 2771 movq %rsp,%rax 2772.cfi_def_cfa_register %rax 2773.Lpowerx5_enter: 2774 pushq %rbx 2775.cfi_offset %rbx,-16 2776 pushq %rbp 2777.cfi_offset %rbp,-24 2778 pushq %r12 2779.cfi_offset %r12,-32 2780 pushq %r13 2781.cfi_offset %r13,-40 2782 pushq %r14 2783.cfi_offset %r14,-48 2784 pushq %r15 2785.cfi_offset %r15,-56 2786.Lpowerx5_prologue: 2787 2788 shll $3,%r9d 2789 leaq (%r9,%r9,2),%r10 2790 negq %r9 2791 movq (%r8),%r8 2792 2793 2794 2795 2796 2797 2798 2799 2800 leaq -320(%rsp,%r9,2),%r11 2801 movq %rsp,%rbp 2802 subq %rdi,%r11 2803 andq $4095,%r11 2804 cmpq %r11,%r10 2805 jb .Lpwrx_sp_alt 2806 subq %r11,%rbp 2807 leaq -320(%rbp,%r9,2),%rbp 2808 jmp .Lpwrx_sp_done 2809 2810.align 32 2811.Lpwrx_sp_alt: 2812 leaq 4096-320(,%r9,2),%r10 2813 leaq -320(%rbp,%r9,2),%rbp 2814 subq %r10,%r11 2815 movq $0,%r10 2816 cmovcq %r10,%r11 2817 subq %r11,%rbp 2818.Lpwrx_sp_done: 2819 andq $-64,%rbp 2820 movq %rsp,%r11 2821 subq %rbp,%r11 2822 andq $-4096,%r11 2823 leaq (%r11,%rbp,1),%rsp 2824 movq (%rsp),%r10 2825 cmpq %rbp,%rsp 2826 ja .Lpwrx_page_walk 2827 jmp .Lpwrx_page_walk_done 2828 2829.Lpwrx_page_walk: 2830 leaq -4096(%rsp),%rsp 2831 movq (%rsp),%r10 2832 cmpq %rbp,%rsp 2833 ja .Lpwrx_page_walk 2834.Lpwrx_page_walk_done: 2835 2836 movq %r9,%r10 2837 negq %r9 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 pxor %xmm0,%xmm0 2851.byte 102,72,15,110,207 2852.byte 102,72,15,110,209 2853.byte 102,73,15,110,218 2854.byte 102,72,15,110,226 2855 movq %r8,32(%rsp) 2856 movq %rax,40(%rsp) 2857.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2858.Lpowerx5_body: 2859 2860 call __bn_sqrx8x_internal 2861 call __bn_postx4x_internal 2862 call __bn_sqrx8x_internal 2863 call __bn_postx4x_internal 2864 call __bn_sqrx8x_internal 2865 call __bn_postx4x_internal 2866 call __bn_sqrx8x_internal 2867 call __bn_postx4x_internal 2868 call __bn_sqrx8x_internal 2869 call __bn_postx4x_internal 2870 2871 movq %r10,%r9 2872 movq %rsi,%rdi 2873.byte 102,72,15,126,209 2874.byte 102,72,15,126,226 2875 movq 40(%rsp),%rax 2876 2877 call mulx4x_internal 2878 2879 movq 40(%rsp),%rsi 2880.cfi_def_cfa %rsi,8 2881 movq $1,%rax 2882 2883 movq -48(%rsi),%r15 2884.cfi_restore %r15 2885 movq -40(%rsi),%r14 2886.cfi_restore %r14 2887 movq -32(%rsi),%r13 2888.cfi_restore %r13 2889 movq -24(%rsi),%r12 2890.cfi_restore %r12 2891 movq -16(%rsi),%rbp 2892.cfi_restore %rbp 2893 movq -8(%rsi),%rbx 2894.cfi_restore %rbx 2895 leaq (%rsi),%rsp 2896.cfi_def_cfa_register %rsp 2897.Lpowerx5_epilogue: 2898 .byte 0xf3,0xc3 2899.cfi_endproc 2900.size bn_powerx5,.-bn_powerx5 2901 2902.globl bn_sqrx8x_internal 2903.hidden bn_sqrx8x_internal 2904.type bn_sqrx8x_internal,@function 2905.align 32 2906bn_sqrx8x_internal: 2907__bn_sqrx8x_internal: 2908.cfi_startproc 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 leaq 48+8(%rsp),%rdi 2950 leaq (%rsi,%r9,1),%rbp 2951 movq %r9,0+8(%rsp) 2952 movq %rbp,8+8(%rsp) 2953 jmp .Lsqr8x_zero_start 2954 2955.align 32 2956.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2957.Lsqrx8x_zero: 2958.byte 0x3e 2959 movdqa %xmm0,0(%rdi) 2960 movdqa %xmm0,16(%rdi) 2961 movdqa %xmm0,32(%rdi) 2962 movdqa %xmm0,48(%rdi) 2963.Lsqr8x_zero_start: 2964 movdqa %xmm0,64(%rdi) 2965 movdqa %xmm0,80(%rdi) 2966 movdqa %xmm0,96(%rdi) 2967 movdqa %xmm0,112(%rdi) 2968 leaq 128(%rdi),%rdi 2969 subq $64,%r9 2970 jnz .Lsqrx8x_zero 2971 2972 movq 0(%rsi),%rdx 2973 2974 xorq %r10,%r10 2975 xorq %r11,%r11 2976 xorq %r12,%r12 2977 xorq %r13,%r13 2978 xorq %r14,%r14 2979 xorq %r15,%r15 2980 leaq 48+8(%rsp),%rdi 2981 xorq %rbp,%rbp 2982 jmp .Lsqrx8x_outer_loop 2983 2984.align 32 2985.Lsqrx8x_outer_loop: 2986 mulxq 8(%rsi),%r8,%rax 2987 adcxq %r9,%r8 2988 adoxq %rax,%r10 2989 mulxq 16(%rsi),%r9,%rax 2990 adcxq %r10,%r9 2991 adoxq %rax,%r11 2992.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2993 adcxq %r11,%r10 2994 adoxq %rax,%r12 2995.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2996 adcxq %r12,%r11 2997 adoxq %rax,%r13 2998 mulxq 40(%rsi),%r12,%rax 2999 adcxq %r13,%r12 3000 adoxq %rax,%r14 3001 mulxq 48(%rsi),%r13,%rax 3002 adcxq %r14,%r13 3003 adoxq %r15,%rax 3004 mulxq 56(%rsi),%r14,%r15 3005 movq 8(%rsi),%rdx 3006 adcxq %rax,%r14 3007 adoxq %rbp,%r15 3008 adcq 64(%rdi),%r15 3009 movq %r8,8(%rdi) 3010 movq %r9,16(%rdi) 3011 sbbq %rcx,%rcx 3012 xorq %rbp,%rbp 3013 3014 3015 mulxq 16(%rsi),%r8,%rbx 3016 mulxq 24(%rsi),%r9,%rax 3017 adcxq %r10,%r8 3018 adoxq %rbx,%r9 3019 mulxq 32(%rsi),%r10,%rbx 3020 adcxq %r11,%r9 3021 adoxq %rax,%r10 3022.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3023 adcxq %r12,%r10 3024 adoxq %rbx,%r11 3025.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3026 adcxq %r13,%r11 3027 adoxq %r14,%r12 3028.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3029 movq 16(%rsi),%rdx 3030 adcxq %rax,%r12 3031 adoxq %rbx,%r13 3032 adcxq %r15,%r13 3033 adoxq %rbp,%r14 3034 adcxq %rbp,%r14 3035 3036 movq %r8,24(%rdi) 3037 movq %r9,32(%rdi) 3038 3039 mulxq 24(%rsi),%r8,%rbx 3040 mulxq 32(%rsi),%r9,%rax 3041 adcxq %r10,%r8 3042 adoxq %rbx,%r9 3043 mulxq 40(%rsi),%r10,%rbx 3044 adcxq %r11,%r9 3045 adoxq %rax,%r10 3046.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3047 adcxq %r12,%r10 3048 adoxq %r13,%r11 3049.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3050.byte 0x3e 3051 movq 24(%rsi),%rdx 3052 adcxq %rbx,%r11 3053 adoxq %rax,%r12 3054 adcxq %r14,%r12 3055 movq %r8,40(%rdi) 3056 movq %r9,48(%rdi) 3057 mulxq 32(%rsi),%r8,%rax 3058 adoxq %rbp,%r13 3059 adcxq %rbp,%r13 3060 3061 mulxq 40(%rsi),%r9,%rbx 3062 adcxq %r10,%r8 3063 adoxq %rax,%r9 3064 mulxq 48(%rsi),%r10,%rax 3065 adcxq %r11,%r9 3066 adoxq %r12,%r10 3067 mulxq 56(%rsi),%r11,%r12 3068 movq 32(%rsi),%rdx 3069 movq 40(%rsi),%r14 3070 adcxq %rbx,%r10 3071 adoxq %rax,%r11 3072 movq 48(%rsi),%r15 3073 adcxq %r13,%r11 3074 adoxq %rbp,%r12 3075 adcxq %rbp,%r12 3076 3077 movq %r8,56(%rdi) 3078 movq %r9,64(%rdi) 3079 3080 mulxq %r14,%r9,%rax 3081 movq 56(%rsi),%r8 3082 adcxq %r10,%r9 3083 mulxq %r15,%r10,%rbx 3084 adoxq %rax,%r10 3085 adcxq %r11,%r10 3086 mulxq %r8,%r11,%rax 3087 movq %r14,%rdx 3088 adoxq %rbx,%r11 3089 adcxq %r12,%r11 3090 3091 adcxq %rbp,%rax 3092 3093 mulxq %r15,%r14,%rbx 3094 mulxq %r8,%r12,%r13 3095 movq %r15,%rdx 3096 leaq 64(%rsi),%rsi 3097 adcxq %r14,%r11 3098 adoxq %rbx,%r12 3099 adcxq %rax,%r12 3100 adoxq %rbp,%r13 3101 3102.byte 0x67,0x67 3103 mulxq %r8,%r8,%r14 3104 adcxq %r8,%r13 3105 adcxq %rbp,%r14 3106 3107 cmpq 8+8(%rsp),%rsi 3108 je .Lsqrx8x_outer_break 3109 3110 negq %rcx 3111 movq $-8,%rcx 3112 movq %rbp,%r15 3113 movq 64(%rdi),%r8 3114 adcxq 72(%rdi),%r9 3115 adcxq 80(%rdi),%r10 3116 adcxq 88(%rdi),%r11 3117 adcq 96(%rdi),%r12 3118 adcq 104(%rdi),%r13 3119 adcq 112(%rdi),%r14 3120 adcq 120(%rdi),%r15 3121 leaq (%rsi),%rbp 3122 leaq 128(%rdi),%rdi 3123 sbbq %rax,%rax 3124 3125 movq -64(%rsi),%rdx 3126 movq %rax,16+8(%rsp) 3127 movq %rdi,24+8(%rsp) 3128 3129 3130 xorl %eax,%eax 3131 jmp .Lsqrx8x_loop 3132 3133.align 32 3134.Lsqrx8x_loop: 3135 movq %r8,%rbx 3136 mulxq 0(%rbp),%rax,%r8 3137 adcxq %rax,%rbx 3138 adoxq %r9,%r8 3139 3140 mulxq 8(%rbp),%rax,%r9 3141 adcxq %rax,%r8 3142 adoxq %r10,%r9 3143 3144 mulxq 16(%rbp),%rax,%r10 3145 adcxq %rax,%r9 3146 adoxq %r11,%r10 3147 3148 mulxq 24(%rbp),%rax,%r11 3149 adcxq %rax,%r10 3150 adoxq %r12,%r11 3151 3152.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3153 adcxq %rax,%r11 3154 adoxq %r13,%r12 3155 3156 mulxq 40(%rbp),%rax,%r13 3157 adcxq %rax,%r12 3158 adoxq %r14,%r13 3159 3160 mulxq 48(%rbp),%rax,%r14 3161 movq %rbx,(%rdi,%rcx,8) 3162 movl $0,%ebx 3163 adcxq %rax,%r13 3164 adoxq %r15,%r14 3165 3166.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3167 movq 8(%rsi,%rcx,8),%rdx 3168 adcxq %rax,%r14 3169 adoxq %rbx,%r15 3170 adcxq %rbx,%r15 3171 3172.byte 0x67 3173 incq %rcx 3174 jnz .Lsqrx8x_loop 3175 3176 leaq 64(%rbp),%rbp 3177 movq $-8,%rcx 3178 cmpq 8+8(%rsp),%rbp 3179 je .Lsqrx8x_break 3180 3181 subq 16+8(%rsp),%rbx 3182.byte 0x66 3183 movq -64(%rsi),%rdx 3184 adcxq 0(%rdi),%r8 3185 adcxq 8(%rdi),%r9 3186 adcq 16(%rdi),%r10 3187 adcq 24(%rdi),%r11 3188 adcq 32(%rdi),%r12 3189 adcq 40(%rdi),%r13 3190 adcq 48(%rdi),%r14 3191 adcq 56(%rdi),%r15 3192 leaq 64(%rdi),%rdi 3193.byte 0x67 3194 sbbq %rax,%rax 3195 xorl %ebx,%ebx 3196 movq %rax,16+8(%rsp) 3197 jmp .Lsqrx8x_loop 3198 3199.align 32 3200.Lsqrx8x_break: 3201 xorq %rbp,%rbp 3202 subq 16+8(%rsp),%rbx 3203 adcxq %rbp,%r8 3204 movq 24+8(%rsp),%rcx 3205 adcxq %rbp,%r9 3206 movq 0(%rsi),%rdx 3207 adcq $0,%r10 3208 movq %r8,0(%rdi) 3209 adcq $0,%r11 3210 adcq $0,%r12 3211 adcq $0,%r13 3212 adcq $0,%r14 3213 adcq $0,%r15 3214 cmpq %rcx,%rdi 3215 je .Lsqrx8x_outer_loop 3216 3217 movq %r9,8(%rdi) 3218 movq 8(%rcx),%r9 3219 movq %r10,16(%rdi) 3220 movq 16(%rcx),%r10 3221 movq %r11,24(%rdi) 3222 movq 24(%rcx),%r11 3223 movq %r12,32(%rdi) 3224 movq 32(%rcx),%r12 3225 movq %r13,40(%rdi) 3226 movq 40(%rcx),%r13 3227 movq %r14,48(%rdi) 3228 movq 48(%rcx),%r14 3229 movq %r15,56(%rdi) 3230 movq 56(%rcx),%r15 3231 movq %rcx,%rdi 3232 jmp .Lsqrx8x_outer_loop 3233 3234.align 32 3235.Lsqrx8x_outer_break: 3236 movq %r9,72(%rdi) 3237.byte 102,72,15,126,217 3238 movq %r10,80(%rdi) 3239 movq %r11,88(%rdi) 3240 movq %r12,96(%rdi) 3241 movq %r13,104(%rdi) 3242 movq %r14,112(%rdi) 3243 leaq 48+8(%rsp),%rdi 3244 movq (%rsi,%rcx,1),%rdx 3245 3246 movq 8(%rdi),%r11 3247 xorq %r10,%r10 3248 movq 0+8(%rsp),%r9 3249 adoxq %r11,%r11 3250 movq 16(%rdi),%r12 3251 movq 24(%rdi),%r13 3252 3253 3254.align 32 3255.Lsqrx4x_shift_n_add: 3256 mulxq %rdx,%rax,%rbx 3257 adoxq %r12,%r12 3258 adcxq %r10,%rax 3259.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3260.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3261 adoxq %r13,%r13 3262 adcxq %r11,%rbx 3263 movq 40(%rdi),%r11 3264 movq %rax,0(%rdi) 3265 movq %rbx,8(%rdi) 3266 3267 mulxq %rdx,%rax,%rbx 3268 adoxq %r10,%r10 3269 adcxq %r12,%rax 3270 movq 16(%rsi,%rcx,1),%rdx 3271 movq 48(%rdi),%r12 3272 adoxq %r11,%r11 3273 adcxq %r13,%rbx 3274 movq 56(%rdi),%r13 3275 movq %rax,16(%rdi) 3276 movq %rbx,24(%rdi) 3277 3278 mulxq %rdx,%rax,%rbx 3279 adoxq %r12,%r12 3280 adcxq %r10,%rax 3281 movq 24(%rsi,%rcx,1),%rdx 3282 leaq 32(%rcx),%rcx 3283 movq 64(%rdi),%r10 3284 adoxq %r13,%r13 3285 adcxq %r11,%rbx 3286 movq 72(%rdi),%r11 3287 movq %rax,32(%rdi) 3288 movq %rbx,40(%rdi) 3289 3290 mulxq %rdx,%rax,%rbx 3291 adoxq %r10,%r10 3292 adcxq %r12,%rax 3293 jrcxz .Lsqrx4x_shift_n_add_break 3294.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3295 adoxq %r11,%r11 3296 adcxq %r13,%rbx 3297 movq 80(%rdi),%r12 3298 movq 88(%rdi),%r13 3299 movq %rax,48(%rdi) 3300 movq %rbx,56(%rdi) 3301 leaq 64(%rdi),%rdi 3302 nop 3303 jmp .Lsqrx4x_shift_n_add 3304 3305.align 32 3306.Lsqrx4x_shift_n_add_break: 3307 adcxq %r13,%rbx 3308 movq %rax,48(%rdi) 3309 movq %rbx,56(%rdi) 3310 leaq 64(%rdi),%rdi 3311.byte 102,72,15,126,213 3312__bn_sqrx8x_reduction: 3313 xorl %eax,%eax 3314 movq 32+8(%rsp),%rbx 3315 movq 48+8(%rsp),%rdx 3316 leaq -64(%rbp,%r9,1),%rcx 3317 3318 movq %rcx,0+8(%rsp) 3319 movq %rdi,8+8(%rsp) 3320 3321 leaq 48+8(%rsp),%rdi 3322 jmp .Lsqrx8x_reduction_loop 3323 3324.align 32 3325.Lsqrx8x_reduction_loop: 3326 movq 8(%rdi),%r9 3327 movq 16(%rdi),%r10 3328 movq 24(%rdi),%r11 3329 movq 32(%rdi),%r12 3330 movq %rdx,%r8 3331 imulq %rbx,%rdx 3332 movq 40(%rdi),%r13 3333 movq 48(%rdi),%r14 3334 movq 56(%rdi),%r15 3335 movq %rax,24+8(%rsp) 3336 3337 leaq 64(%rdi),%rdi 3338 xorq %rsi,%rsi 3339 movq $-8,%rcx 3340 jmp .Lsqrx8x_reduce 3341 3342.align 32 3343.Lsqrx8x_reduce: 3344 movq %r8,%rbx 3345 mulxq 0(%rbp),%rax,%r8 3346 adcxq %rbx,%rax 3347 adoxq %r9,%r8 3348 3349 mulxq 8(%rbp),%rbx,%r9 3350 adcxq %rbx,%r8 3351 adoxq %r10,%r9 3352 3353 mulxq 16(%rbp),%rbx,%r10 3354 adcxq %rbx,%r9 3355 adoxq %r11,%r10 3356 3357 mulxq 24(%rbp),%rbx,%r11 3358 adcxq %rbx,%r10 3359 adoxq %r12,%r11 3360 3361.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3362 movq %rdx,%rax 3363 movq %r8,%rdx 3364 adcxq %rbx,%r11 3365 adoxq %r13,%r12 3366 3367 mulxq 32+8(%rsp),%rbx,%rdx 3368 movq %rax,%rdx 3369 movq %rax,64+48+8(%rsp,%rcx,8) 3370 3371 mulxq 40(%rbp),%rax,%r13 3372 adcxq %rax,%r12 3373 adoxq %r14,%r13 3374 3375 mulxq 48(%rbp),%rax,%r14 3376 adcxq %rax,%r13 3377 adoxq %r15,%r14 3378 3379 mulxq 56(%rbp),%rax,%r15 3380 movq %rbx,%rdx 3381 adcxq %rax,%r14 3382 adoxq %rsi,%r15 3383 adcxq %rsi,%r15 3384 3385.byte 0x67,0x67,0x67 3386 incq %rcx 3387 jnz .Lsqrx8x_reduce 3388 3389 movq %rsi,%rax 3390 cmpq 0+8(%rsp),%rbp 3391 jae .Lsqrx8x_no_tail 3392 3393 movq 48+8(%rsp),%rdx 3394 addq 0(%rdi),%r8 3395 leaq 64(%rbp),%rbp 3396 movq $-8,%rcx 3397 adcxq 8(%rdi),%r9 3398 adcxq 16(%rdi),%r10 3399 adcq 24(%rdi),%r11 3400 adcq 32(%rdi),%r12 3401 adcq 40(%rdi),%r13 3402 adcq 48(%rdi),%r14 3403 adcq 56(%rdi),%r15 3404 leaq 64(%rdi),%rdi 3405 sbbq %rax,%rax 3406 3407 xorq %rsi,%rsi 3408 movq %rax,16+8(%rsp) 3409 jmp .Lsqrx8x_tail 3410 3411.align 32 3412.Lsqrx8x_tail: 3413 movq %r8,%rbx 3414 mulxq 0(%rbp),%rax,%r8 3415 adcxq %rax,%rbx 3416 adoxq %r9,%r8 3417 3418 mulxq 8(%rbp),%rax,%r9 3419 adcxq %rax,%r8 3420 adoxq %r10,%r9 3421 3422 mulxq 16(%rbp),%rax,%r10 3423 adcxq %rax,%r9 3424 adoxq %r11,%r10 3425 3426 mulxq 24(%rbp),%rax,%r11 3427 adcxq %rax,%r10 3428 adoxq %r12,%r11 3429 3430.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3431 adcxq %rax,%r11 3432 adoxq %r13,%r12 3433 3434 mulxq 40(%rbp),%rax,%r13 3435 adcxq %rax,%r12 3436 adoxq %r14,%r13 3437 3438 mulxq 48(%rbp),%rax,%r14 3439 adcxq %rax,%r13 3440 adoxq %r15,%r14 3441 3442 mulxq 56(%rbp),%rax,%r15 3443 movq 72+48+8(%rsp,%rcx,8),%rdx 3444 adcxq %rax,%r14 3445 adoxq %rsi,%r15 3446 movq %rbx,(%rdi,%rcx,8) 3447 movq %r8,%rbx 3448 adcxq %rsi,%r15 3449 3450 incq %rcx 3451 jnz .Lsqrx8x_tail 3452 3453 cmpq 0+8(%rsp),%rbp 3454 jae .Lsqrx8x_tail_done 3455 3456 subq 16+8(%rsp),%rsi 3457 movq 48+8(%rsp),%rdx 3458 leaq 64(%rbp),%rbp 3459 adcq 0(%rdi),%r8 3460 adcq 8(%rdi),%r9 3461 adcq 16(%rdi),%r10 3462 adcq 24(%rdi),%r11 3463 adcq 32(%rdi),%r12 3464 adcq 40(%rdi),%r13 3465 adcq 48(%rdi),%r14 3466 adcq 56(%rdi),%r15 3467 leaq 64(%rdi),%rdi 3468 sbbq %rax,%rax 3469 subq $8,%rcx 3470 3471 xorq %rsi,%rsi 3472 movq %rax,16+8(%rsp) 3473 jmp .Lsqrx8x_tail 3474 3475.align 32 3476.Lsqrx8x_tail_done: 3477 xorq %rax,%rax 3478 addq 24+8(%rsp),%r8 3479 adcq $0,%r9 3480 adcq $0,%r10 3481 adcq $0,%r11 3482 adcq $0,%r12 3483 adcq $0,%r13 3484 adcq $0,%r14 3485 adcq $0,%r15 3486 adcq $0,%rax 3487 3488 subq 16+8(%rsp),%rsi 3489.Lsqrx8x_no_tail: 3490 adcq 0(%rdi),%r8 3491.byte 102,72,15,126,217 3492 adcq 8(%rdi),%r9 3493 movq 56(%rbp),%rsi 3494.byte 102,72,15,126,213 3495 adcq 16(%rdi),%r10 3496 adcq 24(%rdi),%r11 3497 adcq 32(%rdi),%r12 3498 adcq 40(%rdi),%r13 3499 adcq 48(%rdi),%r14 3500 adcq 56(%rdi),%r15 3501 adcq $0,%rax 3502 3503 movq 32+8(%rsp),%rbx 3504 movq 64(%rdi,%rcx,1),%rdx 3505 3506 movq %r8,0(%rdi) 3507 leaq 64(%rdi),%r8 3508 movq %r9,8(%rdi) 3509 movq %r10,16(%rdi) 3510 movq %r11,24(%rdi) 3511 movq %r12,32(%rdi) 3512 movq %r13,40(%rdi) 3513 movq %r14,48(%rdi) 3514 movq %r15,56(%rdi) 3515 3516 leaq 64(%rdi,%rcx,1),%rdi 3517 cmpq 8+8(%rsp),%r8 3518 jb .Lsqrx8x_reduction_loop 3519 .byte 0xf3,0xc3 3520.cfi_endproc 3521.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3522.align 32 3523__bn_postx4x_internal: 3524.cfi_startproc 3525 movq 0(%rbp),%r12 3526 movq %rcx,%r10 3527 movq %rcx,%r9 3528 negq %rax 3529 sarq $3+2,%rcx 3530 3531.byte 102,72,15,126,202 3532.byte 102,72,15,126,206 3533 decq %r12 3534 movq 8(%rbp),%r13 3535 xorq %r8,%r8 3536 movq 16(%rbp),%r14 3537 movq 24(%rbp),%r15 3538 jmp .Lsqrx4x_sub_entry 3539 3540.align 16 3541.Lsqrx4x_sub: 3542 movq 0(%rbp),%r12 3543 movq 8(%rbp),%r13 3544 movq 16(%rbp),%r14 3545 movq 24(%rbp),%r15 3546.Lsqrx4x_sub_entry: 3547 andnq %rax,%r12,%r12 3548 leaq 32(%rbp),%rbp 3549 andnq %rax,%r13,%r13 3550 andnq %rax,%r14,%r14 3551 andnq %rax,%r15,%r15 3552 3553 negq %r8 3554 adcq 0(%rdi),%r12 3555 adcq 8(%rdi),%r13 3556 adcq 16(%rdi),%r14 3557 adcq 24(%rdi),%r15 3558 movq %r12,0(%rdx) 3559 leaq 32(%rdi),%rdi 3560 movq %r13,8(%rdx) 3561 sbbq %r8,%r8 3562 movq %r14,16(%rdx) 3563 movq %r15,24(%rdx) 3564 leaq 32(%rdx),%rdx 3565 3566 incq %rcx 3567 jnz .Lsqrx4x_sub 3568 3569 negq %r9 3570 3571 .byte 0xf3,0xc3 3572.cfi_endproc 3573.size __bn_postx4x_internal,.-__bn_postx4x_internal 3574.globl bn_get_bits5 3575.type bn_get_bits5,@function 3576.align 16 3577bn_get_bits5: 3578.cfi_startproc 3579 leaq 0(%rdi),%r10 3580 leaq 1(%rdi),%r11 3581 movl %esi,%ecx 3582 shrl $4,%esi 3583 andl $15,%ecx 3584 leal -8(%rcx),%eax 3585 cmpl $11,%ecx 3586 cmovaq %r11,%r10 3587 cmoval %eax,%ecx 3588 movzwl (%r10,%rsi,2),%eax 3589 shrl %cl,%eax 3590 andl $31,%eax 3591 .byte 0xf3,0xc3 3592.cfi_endproc 3593.size bn_get_bits5,.-bn_get_bits5 3594 3595.globl bn_scatter5 3596.type bn_scatter5,@function 3597.align 16 3598bn_scatter5: 3599.cfi_startproc 3600 cmpl $0,%esi 3601 jz .Lscatter_epilogue 3602 leaq (%rdx,%rcx,8),%rdx 3603.Lscatter: 3604 movq (%rdi),%rax 3605 leaq 8(%rdi),%rdi 3606 movq %rax,(%rdx) 3607 leaq 256(%rdx),%rdx 3608 subl $1,%esi 3609 jnz .Lscatter 3610.Lscatter_epilogue: 3611 .byte 0xf3,0xc3 3612.cfi_endproc 3613.size bn_scatter5,.-bn_scatter5 3614 3615.globl bn_gather5 3616.type bn_gather5,@function 3617.align 32 3618bn_gather5: 3619.LSEH_begin_bn_gather5: 3620.cfi_startproc 3621 3622.byte 0x4c,0x8d,0x14,0x24 3623.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3624 leaq .Linc(%rip),%rax 3625 andq $-16,%rsp 3626 3627 movd %ecx,%xmm5 3628 movdqa 0(%rax),%xmm0 3629 movdqa 16(%rax),%xmm1 3630 leaq 128(%rdx),%r11 3631 leaq 128(%rsp),%rax 3632 3633 pshufd $0,%xmm5,%xmm5 3634 movdqa %xmm1,%xmm4 3635 movdqa %xmm1,%xmm2 3636 paddd %xmm0,%xmm1 3637 pcmpeqd %xmm5,%xmm0 3638 movdqa %xmm4,%xmm3 3639 3640 paddd %xmm1,%xmm2 3641 pcmpeqd %xmm5,%xmm1 3642 movdqa %xmm0,-128(%rax) 3643 movdqa %xmm4,%xmm0 3644 3645 paddd %xmm2,%xmm3 3646 pcmpeqd %xmm5,%xmm2 3647 movdqa %xmm1,-112(%rax) 3648 movdqa %xmm4,%xmm1 3649 3650 paddd %xmm3,%xmm0 3651 pcmpeqd %xmm5,%xmm3 3652 movdqa %xmm2,-96(%rax) 3653 movdqa %xmm4,%xmm2 3654 paddd %xmm0,%xmm1 3655 pcmpeqd %xmm5,%xmm0 3656 movdqa %xmm3,-80(%rax) 3657 movdqa %xmm4,%xmm3 3658 3659 paddd %xmm1,%xmm2 3660 pcmpeqd %xmm5,%xmm1 3661 movdqa %xmm0,-64(%rax) 3662 movdqa %xmm4,%xmm0 3663 3664 paddd %xmm2,%xmm3 3665 pcmpeqd %xmm5,%xmm2 3666 movdqa %xmm1,-48(%rax) 3667 movdqa %xmm4,%xmm1 3668 3669 paddd %xmm3,%xmm0 3670 pcmpeqd %xmm5,%xmm3 3671 movdqa %xmm2,-32(%rax) 3672 movdqa %xmm4,%xmm2 3673 paddd %xmm0,%xmm1 3674 pcmpeqd %xmm5,%xmm0 3675 movdqa %xmm3,-16(%rax) 3676 movdqa %xmm4,%xmm3 3677 3678 paddd %xmm1,%xmm2 3679 pcmpeqd %xmm5,%xmm1 3680 movdqa %xmm0,0(%rax) 3681 movdqa %xmm4,%xmm0 3682 3683 paddd %xmm2,%xmm3 3684 pcmpeqd %xmm5,%xmm2 3685 movdqa %xmm1,16(%rax) 3686 movdqa %xmm4,%xmm1 3687 3688 paddd %xmm3,%xmm0 3689 pcmpeqd %xmm5,%xmm3 3690 movdqa %xmm2,32(%rax) 3691 movdqa %xmm4,%xmm2 3692 paddd %xmm0,%xmm1 3693 pcmpeqd %xmm5,%xmm0 3694 movdqa %xmm3,48(%rax) 3695 movdqa %xmm4,%xmm3 3696 3697 paddd %xmm1,%xmm2 3698 pcmpeqd %xmm5,%xmm1 3699 movdqa %xmm0,64(%rax) 3700 movdqa %xmm4,%xmm0 3701 3702 paddd %xmm2,%xmm3 3703 pcmpeqd %xmm5,%xmm2 3704 movdqa %xmm1,80(%rax) 3705 movdqa %xmm4,%xmm1 3706 3707 paddd %xmm3,%xmm0 3708 pcmpeqd %xmm5,%xmm3 3709 movdqa %xmm2,96(%rax) 3710 movdqa %xmm4,%xmm2 3711 movdqa %xmm3,112(%rax) 3712 jmp .Lgather 3713 3714.align 32 3715.Lgather: 3716 pxor %xmm4,%xmm4 3717 pxor %xmm5,%xmm5 3718 movdqa -128(%r11),%xmm0 3719 movdqa -112(%r11),%xmm1 3720 movdqa -96(%r11),%xmm2 3721 pand -128(%rax),%xmm0 3722 movdqa -80(%r11),%xmm3 3723 pand -112(%rax),%xmm1 3724 por %xmm0,%xmm4 3725 pand -96(%rax),%xmm2 3726 por %xmm1,%xmm5 3727 pand -80(%rax),%xmm3 3728 por %xmm2,%xmm4 3729 por %xmm3,%xmm5 3730 movdqa -64(%r11),%xmm0 3731 movdqa -48(%r11),%xmm1 3732 movdqa -32(%r11),%xmm2 3733 pand -64(%rax),%xmm0 3734 movdqa -16(%r11),%xmm3 3735 pand -48(%rax),%xmm1 3736 por %xmm0,%xmm4 3737 pand -32(%rax),%xmm2 3738 por %xmm1,%xmm5 3739 pand -16(%rax),%xmm3 3740 por %xmm2,%xmm4 3741 por %xmm3,%xmm5 3742 movdqa 0(%r11),%xmm0 3743 movdqa 16(%r11),%xmm1 3744 movdqa 32(%r11),%xmm2 3745 pand 0(%rax),%xmm0 3746 movdqa 48(%r11),%xmm3 3747 pand 16(%rax),%xmm1 3748 por %xmm0,%xmm4 3749 pand 32(%rax),%xmm2 3750 por %xmm1,%xmm5 3751 pand 48(%rax),%xmm3 3752 por %xmm2,%xmm4 3753 por %xmm3,%xmm5 3754 movdqa 64(%r11),%xmm0 3755 movdqa 80(%r11),%xmm1 3756 movdqa 96(%r11),%xmm2 3757 pand 64(%rax),%xmm0 3758 movdqa 112(%r11),%xmm3 3759 pand 80(%rax),%xmm1 3760 por %xmm0,%xmm4 3761 pand 96(%rax),%xmm2 3762 por %xmm1,%xmm5 3763 pand 112(%rax),%xmm3 3764 por %xmm2,%xmm4 3765 por %xmm3,%xmm5 3766 por %xmm5,%xmm4 3767 leaq 256(%r11),%r11 3768 pshufd $0x4e,%xmm4,%xmm0 3769 por %xmm4,%xmm0 3770 movq %xmm0,(%rdi) 3771 leaq 8(%rdi),%rdi 3772 subl $1,%esi 3773 jnz .Lgather 3774 3775 leaq (%r10),%rsp 3776 .byte 0xf3,0xc3 3777.LSEH_end_bn_gather5: 3778.cfi_endproc 3779.size bn_gather5,.-bn_gather5 3780.align 64 3781.Linc: 3782.long 0,0, 1,1 3783.long 2,2, 2,2 3784.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3785