1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from x86_64-mont5.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.globl bn_mul_mont_gather5 7bc3d5698SJohn Baldwin.type bn_mul_mont_gather5,@function 8bc3d5698SJohn Baldwin.align 64 9bc3d5698SJohn Baldwinbn_mul_mont_gather5: 10bc3d5698SJohn Baldwin.cfi_startproc 11bc3d5698SJohn Baldwin movl %r9d,%r9d 12bc3d5698SJohn Baldwin movq %rsp,%rax 13bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 14bc3d5698SJohn Baldwin testl $7,%r9d 15bc3d5698SJohn Baldwin jnz .Lmul_enter 16bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%r11d 17bc3d5698SJohn Baldwin jmp .Lmul4x_enter 18bc3d5698SJohn Baldwin 19bc3d5698SJohn Baldwin.align 16 20bc3d5698SJohn Baldwin.Lmul_enter: 21bc3d5698SJohn Baldwin movd 8(%rsp),%xmm5 22bc3d5698SJohn Baldwin pushq %rbx 23bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 24bc3d5698SJohn Baldwin pushq %rbp 25bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 26bc3d5698SJohn Baldwin pushq %r12 27bc3d5698SJohn Baldwin.cfi_offset %r12,-32 28bc3d5698SJohn Baldwin pushq %r13 29bc3d5698SJohn Baldwin.cfi_offset %r13,-40 30bc3d5698SJohn Baldwin pushq %r14 31bc3d5698SJohn Baldwin.cfi_offset %r14,-48 32bc3d5698SJohn Baldwin pushq %r15 33bc3d5698SJohn Baldwin.cfi_offset %r15,-56 34bc3d5698SJohn Baldwin 35bc3d5698SJohn Baldwin negq %r9 36bc3d5698SJohn Baldwin movq %rsp,%r11 37bc3d5698SJohn Baldwin leaq -280(%rsp,%r9,8),%r10 38bc3d5698SJohn Baldwin negq %r9 39bc3d5698SJohn Baldwin andq $-1024,%r10 40bc3d5698SJohn Baldwin 41bc3d5698SJohn Baldwin 42bc3d5698SJohn Baldwin 43bc3d5698SJohn Baldwin 44bc3d5698SJohn Baldwin 45bc3d5698SJohn Baldwin 46bc3d5698SJohn Baldwin 47bc3d5698SJohn Baldwin 48bc3d5698SJohn Baldwin 49bc3d5698SJohn Baldwin subq %r10,%r11 50bc3d5698SJohn Baldwin andq $-4096,%r11 51bc3d5698SJohn Baldwin leaq (%r10,%r11,1),%rsp 52bc3d5698SJohn Baldwin movq (%rsp),%r11 53bc3d5698SJohn Baldwin cmpq %r10,%rsp 54bc3d5698SJohn Baldwin ja .Lmul_page_walk 55bc3d5698SJohn Baldwin jmp .Lmul_page_walk_done 56bc3d5698SJohn Baldwin 57bc3d5698SJohn Baldwin.Lmul_page_walk: 58bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 59bc3d5698SJohn Baldwin movq (%rsp),%r11 60bc3d5698SJohn Baldwin cmpq %r10,%rsp 61bc3d5698SJohn Baldwin ja .Lmul_page_walk 62bc3d5698SJohn Baldwin.Lmul_page_walk_done: 63bc3d5698SJohn Baldwin 64bc3d5698SJohn Baldwin leaq .Linc(%rip),%r10 65bc3d5698SJohn Baldwin movq %rax,8(%rsp,%r9,8) 66bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 67bc3d5698SJohn Baldwin.Lmul_body: 68bc3d5698SJohn Baldwin 69bc3d5698SJohn Baldwin leaq 128(%rdx),%r12 70bc3d5698SJohn Baldwin movdqa 0(%r10),%xmm0 71bc3d5698SJohn Baldwin movdqa 16(%r10),%xmm1 72bc3d5698SJohn Baldwin leaq 24-112(%rsp,%r9,8),%r10 73bc3d5698SJohn Baldwin andq $-16,%r10 74bc3d5698SJohn Baldwin 75bc3d5698SJohn Baldwin pshufd $0,%xmm5,%xmm5 76bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 77bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 78bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 79bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 80bc3d5698SJohn Baldwin.byte 0x67 81bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 82bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 83bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 84bc3d5698SJohn Baldwin movdqa %xmm0,112(%r10) 85bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 86bc3d5698SJohn Baldwin 87bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 88bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 89bc3d5698SJohn Baldwin movdqa %xmm1,128(%r10) 90bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 91bc3d5698SJohn Baldwin 92bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 93bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 94bc3d5698SJohn Baldwin movdqa %xmm2,144(%r10) 95bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 96bc3d5698SJohn Baldwin 97bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 98bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 99bc3d5698SJohn Baldwin movdqa %xmm3,160(%r10) 100bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 101bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 102bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 103bc3d5698SJohn Baldwin movdqa %xmm0,176(%r10) 104bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 105bc3d5698SJohn Baldwin 106bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 107bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 108bc3d5698SJohn Baldwin movdqa %xmm1,192(%r10) 109bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 110bc3d5698SJohn Baldwin 111bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 112bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 113bc3d5698SJohn Baldwin movdqa %xmm2,208(%r10) 114bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 115bc3d5698SJohn Baldwin 116bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 117bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 118bc3d5698SJohn Baldwin movdqa %xmm3,224(%r10) 119bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 120bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 121bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 122bc3d5698SJohn Baldwin movdqa %xmm0,240(%r10) 123bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 124bc3d5698SJohn Baldwin 125bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 126bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 127bc3d5698SJohn Baldwin movdqa %xmm1,256(%r10) 128bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 129bc3d5698SJohn Baldwin 130bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 131bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 132bc3d5698SJohn Baldwin movdqa %xmm2,272(%r10) 133bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 134bc3d5698SJohn Baldwin 135bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 136bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 137bc3d5698SJohn Baldwin movdqa %xmm3,288(%r10) 138bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 139bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 140bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 141bc3d5698SJohn Baldwin movdqa %xmm0,304(%r10) 142bc3d5698SJohn Baldwin 143bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 144bc3d5698SJohn Baldwin.byte 0x67 145bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 146bc3d5698SJohn Baldwin movdqa %xmm1,320(%r10) 147bc3d5698SJohn Baldwin 148bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 149bc3d5698SJohn Baldwin movdqa %xmm2,336(%r10) 150bc3d5698SJohn Baldwin pand 64(%r12),%xmm0 151bc3d5698SJohn Baldwin 152bc3d5698SJohn Baldwin pand 80(%r12),%xmm1 153bc3d5698SJohn Baldwin pand 96(%r12),%xmm2 154bc3d5698SJohn Baldwin movdqa %xmm3,352(%r10) 155bc3d5698SJohn Baldwin pand 112(%r12),%xmm3 156bc3d5698SJohn Baldwin por %xmm2,%xmm0 157bc3d5698SJohn Baldwin por %xmm3,%xmm1 158bc3d5698SJohn Baldwin movdqa -128(%r12),%xmm4 159bc3d5698SJohn Baldwin movdqa -112(%r12),%xmm5 160bc3d5698SJohn Baldwin movdqa -96(%r12),%xmm2 161bc3d5698SJohn Baldwin pand 112(%r10),%xmm4 162bc3d5698SJohn Baldwin movdqa -80(%r12),%xmm3 163bc3d5698SJohn Baldwin pand 128(%r10),%xmm5 164bc3d5698SJohn Baldwin por %xmm4,%xmm0 165bc3d5698SJohn Baldwin pand 144(%r10),%xmm2 166bc3d5698SJohn Baldwin por %xmm5,%xmm1 167bc3d5698SJohn Baldwin pand 160(%r10),%xmm3 168bc3d5698SJohn Baldwin por %xmm2,%xmm0 169bc3d5698SJohn Baldwin por %xmm3,%xmm1 170bc3d5698SJohn Baldwin movdqa -64(%r12),%xmm4 171bc3d5698SJohn Baldwin movdqa -48(%r12),%xmm5 172bc3d5698SJohn Baldwin movdqa -32(%r12),%xmm2 173bc3d5698SJohn Baldwin pand 176(%r10),%xmm4 174bc3d5698SJohn Baldwin movdqa -16(%r12),%xmm3 175bc3d5698SJohn Baldwin pand 192(%r10),%xmm5 176bc3d5698SJohn Baldwin por %xmm4,%xmm0 177bc3d5698SJohn Baldwin pand 208(%r10),%xmm2 178bc3d5698SJohn Baldwin por %xmm5,%xmm1 179bc3d5698SJohn Baldwin pand 224(%r10),%xmm3 180bc3d5698SJohn Baldwin por %xmm2,%xmm0 181bc3d5698SJohn Baldwin por %xmm3,%xmm1 182bc3d5698SJohn Baldwin movdqa 0(%r12),%xmm4 183bc3d5698SJohn Baldwin movdqa 16(%r12),%xmm5 184bc3d5698SJohn Baldwin movdqa 32(%r12),%xmm2 185bc3d5698SJohn Baldwin pand 240(%r10),%xmm4 186bc3d5698SJohn Baldwin movdqa 48(%r12),%xmm3 187bc3d5698SJohn Baldwin pand 256(%r10),%xmm5 188bc3d5698SJohn Baldwin por %xmm4,%xmm0 189bc3d5698SJohn Baldwin pand 272(%r10),%xmm2 190bc3d5698SJohn Baldwin por %xmm5,%xmm1 191bc3d5698SJohn Baldwin pand 288(%r10),%xmm3 192bc3d5698SJohn Baldwin por %xmm2,%xmm0 193bc3d5698SJohn Baldwin por %xmm3,%xmm1 194bc3d5698SJohn Baldwin por %xmm1,%xmm0 195bc3d5698SJohn Baldwin pshufd $0x4e,%xmm0,%xmm1 196bc3d5698SJohn Baldwin por %xmm1,%xmm0 197bc3d5698SJohn Baldwin leaq 256(%r12),%r12 198bc3d5698SJohn Baldwin.byte 102,72,15,126,195 199bc3d5698SJohn Baldwin 200bc3d5698SJohn Baldwin movq (%r8),%r8 201bc3d5698SJohn Baldwin movq (%rsi),%rax 202bc3d5698SJohn Baldwin 203bc3d5698SJohn Baldwin xorq %r14,%r14 204bc3d5698SJohn Baldwin xorq %r15,%r15 205bc3d5698SJohn Baldwin 206bc3d5698SJohn Baldwin movq %r8,%rbp 207bc3d5698SJohn Baldwin mulq %rbx 208bc3d5698SJohn Baldwin movq %rax,%r10 209bc3d5698SJohn Baldwin movq (%rcx),%rax 210bc3d5698SJohn Baldwin 211bc3d5698SJohn Baldwin imulq %r10,%rbp 212bc3d5698SJohn Baldwin movq %rdx,%r11 213bc3d5698SJohn Baldwin 214bc3d5698SJohn Baldwin mulq %rbp 215bc3d5698SJohn Baldwin addq %rax,%r10 216bc3d5698SJohn Baldwin movq 8(%rsi),%rax 217bc3d5698SJohn Baldwin adcq $0,%rdx 218bc3d5698SJohn Baldwin movq %rdx,%r13 219bc3d5698SJohn Baldwin 220bc3d5698SJohn Baldwin leaq 1(%r15),%r15 221bc3d5698SJohn Baldwin jmp .L1st_enter 222bc3d5698SJohn Baldwin 223bc3d5698SJohn Baldwin.align 16 224bc3d5698SJohn Baldwin.L1st: 225bc3d5698SJohn Baldwin addq %rax,%r13 226bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 227bc3d5698SJohn Baldwin adcq $0,%rdx 228bc3d5698SJohn Baldwin addq %r11,%r13 229bc3d5698SJohn Baldwin movq %r10,%r11 230bc3d5698SJohn Baldwin adcq $0,%rdx 231bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 232bc3d5698SJohn Baldwin movq %rdx,%r13 233bc3d5698SJohn Baldwin 234bc3d5698SJohn Baldwin.L1st_enter: 235bc3d5698SJohn Baldwin mulq %rbx 236bc3d5698SJohn Baldwin addq %rax,%r11 237bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 238bc3d5698SJohn Baldwin adcq $0,%rdx 239bc3d5698SJohn Baldwin leaq 1(%r15),%r15 240bc3d5698SJohn Baldwin movq %rdx,%r10 241bc3d5698SJohn Baldwin 242bc3d5698SJohn Baldwin mulq %rbp 243bc3d5698SJohn Baldwin cmpq %r9,%r15 244bc3d5698SJohn Baldwin jne .L1st 245bc3d5698SJohn Baldwin 246bc3d5698SJohn Baldwin 247bc3d5698SJohn Baldwin addq %rax,%r13 248bc3d5698SJohn Baldwin adcq $0,%rdx 249bc3d5698SJohn Baldwin addq %r11,%r13 250bc3d5698SJohn Baldwin adcq $0,%rdx 251bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r9,8) 252bc3d5698SJohn Baldwin movq %rdx,%r13 253bc3d5698SJohn Baldwin movq %r10,%r11 254bc3d5698SJohn Baldwin 255bc3d5698SJohn Baldwin xorq %rdx,%rdx 256bc3d5698SJohn Baldwin addq %r11,%r13 257bc3d5698SJohn Baldwin adcq $0,%rdx 258bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r9,8) 259bc3d5698SJohn Baldwin movq %rdx,(%rsp,%r9,8) 260bc3d5698SJohn Baldwin 261bc3d5698SJohn Baldwin leaq 1(%r14),%r14 262bc3d5698SJohn Baldwin jmp .Louter 263bc3d5698SJohn Baldwin.align 16 264bc3d5698SJohn Baldwin.Louter: 265bc3d5698SJohn Baldwin leaq 24+128(%rsp,%r9,8),%rdx 266bc3d5698SJohn Baldwin andq $-16,%rdx 267bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 268bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 269bc3d5698SJohn Baldwin movdqa -128(%r12),%xmm0 270bc3d5698SJohn Baldwin movdqa -112(%r12),%xmm1 271bc3d5698SJohn Baldwin movdqa -96(%r12),%xmm2 272bc3d5698SJohn Baldwin movdqa -80(%r12),%xmm3 273bc3d5698SJohn Baldwin pand -128(%rdx),%xmm0 274bc3d5698SJohn Baldwin pand -112(%rdx),%xmm1 275bc3d5698SJohn Baldwin por %xmm0,%xmm4 276bc3d5698SJohn Baldwin pand -96(%rdx),%xmm2 277bc3d5698SJohn Baldwin por %xmm1,%xmm5 278bc3d5698SJohn Baldwin pand -80(%rdx),%xmm3 279bc3d5698SJohn Baldwin por %xmm2,%xmm4 280bc3d5698SJohn Baldwin por %xmm3,%xmm5 281bc3d5698SJohn Baldwin movdqa -64(%r12),%xmm0 282bc3d5698SJohn Baldwin movdqa -48(%r12),%xmm1 283bc3d5698SJohn Baldwin movdqa -32(%r12),%xmm2 284bc3d5698SJohn Baldwin movdqa -16(%r12),%xmm3 285bc3d5698SJohn Baldwin pand -64(%rdx),%xmm0 286bc3d5698SJohn Baldwin pand -48(%rdx),%xmm1 287bc3d5698SJohn Baldwin por %xmm0,%xmm4 288bc3d5698SJohn Baldwin pand -32(%rdx),%xmm2 289bc3d5698SJohn Baldwin por %xmm1,%xmm5 290bc3d5698SJohn Baldwin pand -16(%rdx),%xmm3 291bc3d5698SJohn Baldwin por %xmm2,%xmm4 292bc3d5698SJohn Baldwin por %xmm3,%xmm5 293bc3d5698SJohn Baldwin movdqa 0(%r12),%xmm0 294bc3d5698SJohn Baldwin movdqa 16(%r12),%xmm1 295bc3d5698SJohn Baldwin movdqa 32(%r12),%xmm2 296bc3d5698SJohn Baldwin movdqa 48(%r12),%xmm3 297bc3d5698SJohn Baldwin pand 0(%rdx),%xmm0 298bc3d5698SJohn Baldwin pand 16(%rdx),%xmm1 299bc3d5698SJohn Baldwin por %xmm0,%xmm4 300bc3d5698SJohn Baldwin pand 32(%rdx),%xmm2 301bc3d5698SJohn Baldwin por %xmm1,%xmm5 302bc3d5698SJohn Baldwin pand 48(%rdx),%xmm3 303bc3d5698SJohn Baldwin por %xmm2,%xmm4 304bc3d5698SJohn Baldwin por %xmm3,%xmm5 305bc3d5698SJohn Baldwin movdqa 64(%r12),%xmm0 306bc3d5698SJohn Baldwin movdqa 80(%r12),%xmm1 307bc3d5698SJohn Baldwin movdqa 96(%r12),%xmm2 308bc3d5698SJohn Baldwin movdqa 112(%r12),%xmm3 309bc3d5698SJohn Baldwin pand 64(%rdx),%xmm0 310bc3d5698SJohn Baldwin pand 80(%rdx),%xmm1 311bc3d5698SJohn Baldwin por %xmm0,%xmm4 312bc3d5698SJohn Baldwin pand 96(%rdx),%xmm2 313bc3d5698SJohn Baldwin por %xmm1,%xmm5 314bc3d5698SJohn Baldwin pand 112(%rdx),%xmm3 315bc3d5698SJohn Baldwin por %xmm2,%xmm4 316bc3d5698SJohn Baldwin por %xmm3,%xmm5 317bc3d5698SJohn Baldwin por %xmm5,%xmm4 318bc3d5698SJohn Baldwin pshufd $0x4e,%xmm4,%xmm0 319bc3d5698SJohn Baldwin por %xmm4,%xmm0 320bc3d5698SJohn Baldwin leaq 256(%r12),%r12 321bc3d5698SJohn Baldwin 322bc3d5698SJohn Baldwin movq (%rsi),%rax 323bc3d5698SJohn Baldwin.byte 102,72,15,126,195 324bc3d5698SJohn Baldwin 325bc3d5698SJohn Baldwin xorq %r15,%r15 326bc3d5698SJohn Baldwin movq %r8,%rbp 327bc3d5698SJohn Baldwin movq (%rsp),%r10 328bc3d5698SJohn Baldwin 329bc3d5698SJohn Baldwin mulq %rbx 330bc3d5698SJohn Baldwin addq %rax,%r10 331bc3d5698SJohn Baldwin movq (%rcx),%rax 332bc3d5698SJohn Baldwin adcq $0,%rdx 333bc3d5698SJohn Baldwin 334bc3d5698SJohn Baldwin imulq %r10,%rbp 335bc3d5698SJohn Baldwin movq %rdx,%r11 336bc3d5698SJohn Baldwin 337bc3d5698SJohn Baldwin mulq %rbp 338bc3d5698SJohn Baldwin addq %rax,%r10 339bc3d5698SJohn Baldwin movq 8(%rsi),%rax 340bc3d5698SJohn Baldwin adcq $0,%rdx 341bc3d5698SJohn Baldwin movq 8(%rsp),%r10 342bc3d5698SJohn Baldwin movq %rdx,%r13 343bc3d5698SJohn Baldwin 344bc3d5698SJohn Baldwin leaq 1(%r15),%r15 345bc3d5698SJohn Baldwin jmp .Linner_enter 346bc3d5698SJohn Baldwin 347bc3d5698SJohn Baldwin.align 16 348bc3d5698SJohn Baldwin.Linner: 349bc3d5698SJohn Baldwin addq %rax,%r13 350bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 351bc3d5698SJohn Baldwin adcq $0,%rdx 352bc3d5698SJohn Baldwin addq %r10,%r13 353bc3d5698SJohn Baldwin movq (%rsp,%r15,8),%r10 354bc3d5698SJohn Baldwin adcq $0,%rdx 355bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 356bc3d5698SJohn Baldwin movq %rdx,%r13 357bc3d5698SJohn Baldwin 358bc3d5698SJohn Baldwin.Linner_enter: 359bc3d5698SJohn Baldwin mulq %rbx 360bc3d5698SJohn Baldwin addq %rax,%r11 361bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 362bc3d5698SJohn Baldwin adcq $0,%rdx 363bc3d5698SJohn Baldwin addq %r11,%r10 364bc3d5698SJohn Baldwin movq %rdx,%r11 365bc3d5698SJohn Baldwin adcq $0,%r11 366bc3d5698SJohn Baldwin leaq 1(%r15),%r15 367bc3d5698SJohn Baldwin 368bc3d5698SJohn Baldwin mulq %rbp 369bc3d5698SJohn Baldwin cmpq %r9,%r15 370bc3d5698SJohn Baldwin jne .Linner 371bc3d5698SJohn Baldwin 372bc3d5698SJohn Baldwin addq %rax,%r13 373bc3d5698SJohn Baldwin adcq $0,%rdx 374bc3d5698SJohn Baldwin addq %r10,%r13 375bc3d5698SJohn Baldwin movq (%rsp,%r9,8),%r10 376bc3d5698SJohn Baldwin adcq $0,%rdx 377bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r9,8) 378bc3d5698SJohn Baldwin movq %rdx,%r13 379bc3d5698SJohn Baldwin 380bc3d5698SJohn Baldwin xorq %rdx,%rdx 381bc3d5698SJohn Baldwin addq %r11,%r13 382bc3d5698SJohn Baldwin adcq $0,%rdx 383bc3d5698SJohn Baldwin addq %r10,%r13 384bc3d5698SJohn Baldwin adcq $0,%rdx 385bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r9,8) 386bc3d5698SJohn Baldwin movq %rdx,(%rsp,%r9,8) 387bc3d5698SJohn Baldwin 388bc3d5698SJohn Baldwin leaq 1(%r14),%r14 389bc3d5698SJohn Baldwin cmpq %r9,%r14 390bc3d5698SJohn Baldwin jb .Louter 391bc3d5698SJohn Baldwin 392bc3d5698SJohn Baldwin xorq %r14,%r14 393bc3d5698SJohn Baldwin movq (%rsp),%rax 394bc3d5698SJohn Baldwin leaq (%rsp),%rsi 395bc3d5698SJohn Baldwin movq %r9,%r15 396bc3d5698SJohn Baldwin jmp .Lsub 397bc3d5698SJohn Baldwin.align 16 398bc3d5698SJohn Baldwin.Lsub: sbbq (%rcx,%r14,8),%rax 399bc3d5698SJohn Baldwin movq %rax,(%rdi,%r14,8) 400bc3d5698SJohn Baldwin movq 8(%rsi,%r14,8),%rax 401bc3d5698SJohn Baldwin leaq 1(%r14),%r14 402bc3d5698SJohn Baldwin decq %r15 403bc3d5698SJohn Baldwin jnz .Lsub 404bc3d5698SJohn Baldwin 405bc3d5698SJohn Baldwin sbbq $0,%rax 406bc3d5698SJohn Baldwin movq $-1,%rbx 407bc3d5698SJohn Baldwin xorq %rax,%rbx 408bc3d5698SJohn Baldwin xorq %r14,%r14 409bc3d5698SJohn Baldwin movq %r9,%r15 410bc3d5698SJohn Baldwin 411bc3d5698SJohn Baldwin.Lcopy: 412bc3d5698SJohn Baldwin movq (%rdi,%r14,8),%rcx 413bc3d5698SJohn Baldwin movq (%rsp,%r14,8),%rdx 414bc3d5698SJohn Baldwin andq %rbx,%rcx 415bc3d5698SJohn Baldwin andq %rax,%rdx 416bc3d5698SJohn Baldwin movq %r14,(%rsp,%r14,8) 417bc3d5698SJohn Baldwin orq %rcx,%rdx 418bc3d5698SJohn Baldwin movq %rdx,(%rdi,%r14,8) 419bc3d5698SJohn Baldwin leaq 1(%r14),%r14 420bc3d5698SJohn Baldwin subq $1,%r15 421bc3d5698SJohn Baldwin jnz .Lcopy 422bc3d5698SJohn Baldwin 423bc3d5698SJohn Baldwin movq 8(%rsp,%r9,8),%rsi 424bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 425bc3d5698SJohn Baldwin movq $1,%rax 426bc3d5698SJohn Baldwin 427bc3d5698SJohn Baldwin movq -48(%rsi),%r15 428bc3d5698SJohn Baldwin.cfi_restore %r15 429bc3d5698SJohn Baldwin movq -40(%rsi),%r14 430bc3d5698SJohn Baldwin.cfi_restore %r14 431bc3d5698SJohn Baldwin movq -32(%rsi),%r13 432bc3d5698SJohn Baldwin.cfi_restore %r13 433bc3d5698SJohn Baldwin movq -24(%rsi),%r12 434bc3d5698SJohn Baldwin.cfi_restore %r12 435bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 436bc3d5698SJohn Baldwin.cfi_restore %rbp 437bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 438bc3d5698SJohn Baldwin.cfi_restore %rbx 439bc3d5698SJohn Baldwin leaq (%rsi),%rsp 440bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 441bc3d5698SJohn Baldwin.Lmul_epilogue: 442bc3d5698SJohn Baldwin .byte 0xf3,0xc3 443bc3d5698SJohn Baldwin.cfi_endproc 444bc3d5698SJohn Baldwin.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 445bc3d5698SJohn Baldwin.type bn_mul4x_mont_gather5,@function 446bc3d5698SJohn Baldwin.align 32 447bc3d5698SJohn Baldwinbn_mul4x_mont_gather5: 448bc3d5698SJohn Baldwin.cfi_startproc 449bc3d5698SJohn Baldwin.byte 0x67 450bc3d5698SJohn Baldwin movq %rsp,%rax 451bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 452bc3d5698SJohn Baldwin.Lmul4x_enter: 453bc3d5698SJohn Baldwin andl $0x80108,%r11d 454bc3d5698SJohn Baldwin cmpl $0x80108,%r11d 455bc3d5698SJohn Baldwin je .Lmulx4x_enter 456bc3d5698SJohn Baldwin pushq %rbx 457bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 458bc3d5698SJohn Baldwin pushq %rbp 459bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 460bc3d5698SJohn Baldwin pushq %r12 461bc3d5698SJohn Baldwin.cfi_offset %r12,-32 462bc3d5698SJohn Baldwin pushq %r13 463bc3d5698SJohn Baldwin.cfi_offset %r13,-40 464bc3d5698SJohn Baldwin pushq %r14 465bc3d5698SJohn Baldwin.cfi_offset %r14,-48 466bc3d5698SJohn Baldwin pushq %r15 467bc3d5698SJohn Baldwin.cfi_offset %r15,-56 468bc3d5698SJohn Baldwin.Lmul4x_prologue: 469bc3d5698SJohn Baldwin 470bc3d5698SJohn Baldwin.byte 0x67 471bc3d5698SJohn Baldwin shll $3,%r9d 472bc3d5698SJohn Baldwin leaq (%r9,%r9,2),%r10 473bc3d5698SJohn Baldwin negq %r9 474bc3d5698SJohn Baldwin 475bc3d5698SJohn Baldwin 476bc3d5698SJohn Baldwin 477bc3d5698SJohn Baldwin 478bc3d5698SJohn Baldwin 479bc3d5698SJohn Baldwin 480bc3d5698SJohn Baldwin 481bc3d5698SJohn Baldwin 482bc3d5698SJohn Baldwin 483bc3d5698SJohn Baldwin 484bc3d5698SJohn Baldwin leaq -320(%rsp,%r9,2),%r11 485bc3d5698SJohn Baldwin movq %rsp,%rbp 486bc3d5698SJohn Baldwin subq %rdi,%r11 487bc3d5698SJohn Baldwin andq $4095,%r11 488bc3d5698SJohn Baldwin cmpq %r11,%r10 489bc3d5698SJohn Baldwin jb .Lmul4xsp_alt 490bc3d5698SJohn Baldwin subq %r11,%rbp 491bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 492bc3d5698SJohn Baldwin jmp .Lmul4xsp_done 493bc3d5698SJohn Baldwin 494bc3d5698SJohn Baldwin.align 32 495bc3d5698SJohn Baldwin.Lmul4xsp_alt: 496bc3d5698SJohn Baldwin leaq 4096-320(,%r9,2),%r10 497bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 498bc3d5698SJohn Baldwin subq %r10,%r11 499bc3d5698SJohn Baldwin movq $0,%r10 500bc3d5698SJohn Baldwin cmovcq %r10,%r11 501bc3d5698SJohn Baldwin subq %r11,%rbp 502bc3d5698SJohn Baldwin.Lmul4xsp_done: 503bc3d5698SJohn Baldwin andq $-64,%rbp 504bc3d5698SJohn Baldwin movq %rsp,%r11 505bc3d5698SJohn Baldwin subq %rbp,%r11 506bc3d5698SJohn Baldwin andq $-4096,%r11 507bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 508bc3d5698SJohn Baldwin movq (%rsp),%r10 509bc3d5698SJohn Baldwin cmpq %rbp,%rsp 510bc3d5698SJohn Baldwin ja .Lmul4x_page_walk 511bc3d5698SJohn Baldwin jmp .Lmul4x_page_walk_done 512bc3d5698SJohn Baldwin 513bc3d5698SJohn Baldwin.Lmul4x_page_walk: 514bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 515bc3d5698SJohn Baldwin movq (%rsp),%r10 516bc3d5698SJohn Baldwin cmpq %rbp,%rsp 517bc3d5698SJohn Baldwin ja .Lmul4x_page_walk 518bc3d5698SJohn Baldwin.Lmul4x_page_walk_done: 519bc3d5698SJohn Baldwin 520bc3d5698SJohn Baldwin negq %r9 521bc3d5698SJohn Baldwin 522bc3d5698SJohn Baldwin movq %rax,40(%rsp) 523bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 524bc3d5698SJohn Baldwin.Lmul4x_body: 525bc3d5698SJohn Baldwin 526bc3d5698SJohn Baldwin call mul4x_internal 527bc3d5698SJohn Baldwin 528bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 529bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 530bc3d5698SJohn Baldwin movq $1,%rax 531bc3d5698SJohn Baldwin 532bc3d5698SJohn Baldwin movq -48(%rsi),%r15 533bc3d5698SJohn Baldwin.cfi_restore %r15 534bc3d5698SJohn Baldwin movq -40(%rsi),%r14 535bc3d5698SJohn Baldwin.cfi_restore %r14 536bc3d5698SJohn Baldwin movq -32(%rsi),%r13 537bc3d5698SJohn Baldwin.cfi_restore %r13 538bc3d5698SJohn Baldwin movq -24(%rsi),%r12 539bc3d5698SJohn Baldwin.cfi_restore %r12 540bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 541bc3d5698SJohn Baldwin.cfi_restore %rbp 542bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 543bc3d5698SJohn Baldwin.cfi_restore %rbx 544bc3d5698SJohn Baldwin leaq (%rsi),%rsp 545bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 546bc3d5698SJohn Baldwin.Lmul4x_epilogue: 547bc3d5698SJohn Baldwin .byte 0xf3,0xc3 548bc3d5698SJohn Baldwin.cfi_endproc 549bc3d5698SJohn Baldwin.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 550bc3d5698SJohn Baldwin 551bc3d5698SJohn Baldwin.type mul4x_internal,@function 552bc3d5698SJohn Baldwin.align 32 553bc3d5698SJohn Baldwinmul4x_internal: 554bc3d5698SJohn Baldwin.cfi_startproc 555bc3d5698SJohn Baldwin shlq $5,%r9 556bc3d5698SJohn Baldwin movd 8(%rax),%xmm5 557bc3d5698SJohn Baldwin leaq .Linc(%rip),%rax 558bc3d5698SJohn Baldwin leaq 128(%rdx,%r9,1),%r13 559bc3d5698SJohn Baldwin shrq $5,%r9 560bc3d5698SJohn Baldwin movdqa 0(%rax),%xmm0 561bc3d5698SJohn Baldwin movdqa 16(%rax),%xmm1 562bc3d5698SJohn Baldwin leaq 88-112(%rsp,%r9,1),%r10 563bc3d5698SJohn Baldwin leaq 128(%rdx),%r12 564bc3d5698SJohn Baldwin 565bc3d5698SJohn Baldwin pshufd $0,%xmm5,%xmm5 566bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 567bc3d5698SJohn Baldwin.byte 0x67,0x67 568bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 569bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 570bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 571bc3d5698SJohn Baldwin.byte 0x67 572bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 573bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 574bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 575bc3d5698SJohn Baldwin movdqa %xmm0,112(%r10) 576bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 577bc3d5698SJohn Baldwin 578bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 579bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 580bc3d5698SJohn Baldwin movdqa %xmm1,128(%r10) 581bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 582bc3d5698SJohn Baldwin 583bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 584bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 585bc3d5698SJohn Baldwin movdqa %xmm2,144(%r10) 586bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 587bc3d5698SJohn Baldwin 588bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 589bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 590bc3d5698SJohn Baldwin movdqa %xmm3,160(%r10) 591bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 592bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 593bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 594bc3d5698SJohn Baldwin movdqa %xmm0,176(%r10) 595bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 596bc3d5698SJohn Baldwin 597bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 598bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 599bc3d5698SJohn Baldwin movdqa %xmm1,192(%r10) 600bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 601bc3d5698SJohn Baldwin 602bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 603bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 604bc3d5698SJohn Baldwin movdqa %xmm2,208(%r10) 605bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 606bc3d5698SJohn Baldwin 607bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 608bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 609bc3d5698SJohn Baldwin movdqa %xmm3,224(%r10) 610bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 611bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 612bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 613bc3d5698SJohn Baldwin movdqa %xmm0,240(%r10) 614bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 615bc3d5698SJohn Baldwin 616bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 617bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 618bc3d5698SJohn Baldwin movdqa %xmm1,256(%r10) 619bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 620bc3d5698SJohn Baldwin 621bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 622bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 623bc3d5698SJohn Baldwin movdqa %xmm2,272(%r10) 624bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 625bc3d5698SJohn Baldwin 626bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 627bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 628bc3d5698SJohn Baldwin movdqa %xmm3,288(%r10) 629bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 630bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 631bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 632bc3d5698SJohn Baldwin movdqa %xmm0,304(%r10) 633bc3d5698SJohn Baldwin 634bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 635bc3d5698SJohn Baldwin.byte 0x67 636bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 637bc3d5698SJohn Baldwin movdqa %xmm1,320(%r10) 638bc3d5698SJohn Baldwin 639bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 640bc3d5698SJohn Baldwin movdqa %xmm2,336(%r10) 641bc3d5698SJohn Baldwin pand 64(%r12),%xmm0 642bc3d5698SJohn Baldwin 643bc3d5698SJohn Baldwin pand 80(%r12),%xmm1 644bc3d5698SJohn Baldwin pand 96(%r12),%xmm2 645bc3d5698SJohn Baldwin movdqa %xmm3,352(%r10) 646bc3d5698SJohn Baldwin pand 112(%r12),%xmm3 647bc3d5698SJohn Baldwin por %xmm2,%xmm0 648bc3d5698SJohn Baldwin por %xmm3,%xmm1 649bc3d5698SJohn Baldwin movdqa -128(%r12),%xmm4 650bc3d5698SJohn Baldwin movdqa -112(%r12),%xmm5 651bc3d5698SJohn Baldwin movdqa -96(%r12),%xmm2 652bc3d5698SJohn Baldwin pand 112(%r10),%xmm4 653bc3d5698SJohn Baldwin movdqa -80(%r12),%xmm3 654bc3d5698SJohn Baldwin pand 128(%r10),%xmm5 655bc3d5698SJohn Baldwin por %xmm4,%xmm0 656bc3d5698SJohn Baldwin pand 144(%r10),%xmm2 657bc3d5698SJohn Baldwin por %xmm5,%xmm1 658bc3d5698SJohn Baldwin pand 160(%r10),%xmm3 659bc3d5698SJohn Baldwin por %xmm2,%xmm0 660bc3d5698SJohn Baldwin por %xmm3,%xmm1 661bc3d5698SJohn Baldwin movdqa -64(%r12),%xmm4 662bc3d5698SJohn Baldwin movdqa -48(%r12),%xmm5 663bc3d5698SJohn Baldwin movdqa -32(%r12),%xmm2 664bc3d5698SJohn Baldwin pand 176(%r10),%xmm4 665bc3d5698SJohn Baldwin movdqa -16(%r12),%xmm3 666bc3d5698SJohn Baldwin pand 192(%r10),%xmm5 667bc3d5698SJohn Baldwin por %xmm4,%xmm0 668bc3d5698SJohn Baldwin pand 208(%r10),%xmm2 669bc3d5698SJohn Baldwin por %xmm5,%xmm1 670bc3d5698SJohn Baldwin pand 224(%r10),%xmm3 671bc3d5698SJohn Baldwin por %xmm2,%xmm0 672bc3d5698SJohn Baldwin por %xmm3,%xmm1 673bc3d5698SJohn Baldwin movdqa 0(%r12),%xmm4 674bc3d5698SJohn Baldwin movdqa 16(%r12),%xmm5 675bc3d5698SJohn Baldwin movdqa 32(%r12),%xmm2 676bc3d5698SJohn Baldwin pand 240(%r10),%xmm4 677bc3d5698SJohn Baldwin movdqa 48(%r12),%xmm3 678bc3d5698SJohn Baldwin pand 256(%r10),%xmm5 679bc3d5698SJohn Baldwin por %xmm4,%xmm0 680bc3d5698SJohn Baldwin pand 272(%r10),%xmm2 681bc3d5698SJohn Baldwin por %xmm5,%xmm1 682bc3d5698SJohn Baldwin pand 288(%r10),%xmm3 683bc3d5698SJohn Baldwin por %xmm2,%xmm0 684bc3d5698SJohn Baldwin por %xmm3,%xmm1 685bc3d5698SJohn Baldwin por %xmm1,%xmm0 686bc3d5698SJohn Baldwin pshufd $0x4e,%xmm0,%xmm1 687bc3d5698SJohn Baldwin por %xmm1,%xmm0 688bc3d5698SJohn Baldwin leaq 256(%r12),%r12 689bc3d5698SJohn Baldwin.byte 102,72,15,126,195 690bc3d5698SJohn Baldwin 691bc3d5698SJohn Baldwin movq %r13,16+8(%rsp) 692bc3d5698SJohn Baldwin movq %rdi,56+8(%rsp) 693bc3d5698SJohn Baldwin 694bc3d5698SJohn Baldwin movq (%r8),%r8 695bc3d5698SJohn Baldwin movq (%rsi),%rax 696bc3d5698SJohn Baldwin leaq (%rsi,%r9,1),%rsi 697bc3d5698SJohn Baldwin negq %r9 698bc3d5698SJohn Baldwin 699bc3d5698SJohn Baldwin movq %r8,%rbp 700bc3d5698SJohn Baldwin mulq %rbx 701bc3d5698SJohn Baldwin movq %rax,%r10 702bc3d5698SJohn Baldwin movq (%rcx),%rax 703bc3d5698SJohn Baldwin 704bc3d5698SJohn Baldwin imulq %r10,%rbp 705bc3d5698SJohn Baldwin leaq 64+8(%rsp),%r14 706bc3d5698SJohn Baldwin movq %rdx,%r11 707bc3d5698SJohn Baldwin 708bc3d5698SJohn Baldwin mulq %rbp 709bc3d5698SJohn Baldwin addq %rax,%r10 710bc3d5698SJohn Baldwin movq 8(%rsi,%r9,1),%rax 711bc3d5698SJohn Baldwin adcq $0,%rdx 712bc3d5698SJohn Baldwin movq %rdx,%rdi 713bc3d5698SJohn Baldwin 714bc3d5698SJohn Baldwin mulq %rbx 715bc3d5698SJohn Baldwin addq %rax,%r11 716bc3d5698SJohn Baldwin movq 8(%rcx),%rax 717bc3d5698SJohn Baldwin adcq $0,%rdx 718bc3d5698SJohn Baldwin movq %rdx,%r10 719bc3d5698SJohn Baldwin 720bc3d5698SJohn Baldwin mulq %rbp 721bc3d5698SJohn Baldwin addq %rax,%rdi 722bc3d5698SJohn Baldwin movq 16(%rsi,%r9,1),%rax 723bc3d5698SJohn Baldwin adcq $0,%rdx 724bc3d5698SJohn Baldwin addq %r11,%rdi 725bc3d5698SJohn Baldwin leaq 32(%r9),%r15 726bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 727bc3d5698SJohn Baldwin adcq $0,%rdx 728bc3d5698SJohn Baldwin movq %rdi,(%r14) 729bc3d5698SJohn Baldwin movq %rdx,%r13 730bc3d5698SJohn Baldwin jmp .L1st4x 731bc3d5698SJohn Baldwin 732bc3d5698SJohn Baldwin.align 32 733bc3d5698SJohn Baldwin.L1st4x: 734bc3d5698SJohn Baldwin mulq %rbx 735bc3d5698SJohn Baldwin addq %rax,%r10 736bc3d5698SJohn Baldwin movq -16(%rcx),%rax 737bc3d5698SJohn Baldwin leaq 32(%r14),%r14 738bc3d5698SJohn Baldwin adcq $0,%rdx 739bc3d5698SJohn Baldwin movq %rdx,%r11 740bc3d5698SJohn Baldwin 741bc3d5698SJohn Baldwin mulq %rbp 742bc3d5698SJohn Baldwin addq %rax,%r13 743bc3d5698SJohn Baldwin movq -8(%rsi,%r15,1),%rax 744bc3d5698SJohn Baldwin adcq $0,%rdx 745bc3d5698SJohn Baldwin addq %r10,%r13 746bc3d5698SJohn Baldwin adcq $0,%rdx 747bc3d5698SJohn Baldwin movq %r13,-24(%r14) 748bc3d5698SJohn Baldwin movq %rdx,%rdi 749bc3d5698SJohn Baldwin 750bc3d5698SJohn Baldwin mulq %rbx 751bc3d5698SJohn Baldwin addq %rax,%r11 752bc3d5698SJohn Baldwin movq -8(%rcx),%rax 753bc3d5698SJohn Baldwin adcq $0,%rdx 754bc3d5698SJohn Baldwin movq %rdx,%r10 755bc3d5698SJohn Baldwin 756bc3d5698SJohn Baldwin mulq %rbp 757bc3d5698SJohn Baldwin addq %rax,%rdi 758bc3d5698SJohn Baldwin movq (%rsi,%r15,1),%rax 759bc3d5698SJohn Baldwin adcq $0,%rdx 760bc3d5698SJohn Baldwin addq %r11,%rdi 761bc3d5698SJohn Baldwin adcq $0,%rdx 762bc3d5698SJohn Baldwin movq %rdi,-16(%r14) 763bc3d5698SJohn Baldwin movq %rdx,%r13 764bc3d5698SJohn Baldwin 765bc3d5698SJohn Baldwin mulq %rbx 766bc3d5698SJohn Baldwin addq %rax,%r10 767bc3d5698SJohn Baldwin movq 0(%rcx),%rax 768bc3d5698SJohn Baldwin adcq $0,%rdx 769bc3d5698SJohn Baldwin movq %rdx,%r11 770bc3d5698SJohn Baldwin 771bc3d5698SJohn Baldwin mulq %rbp 772bc3d5698SJohn Baldwin addq %rax,%r13 773bc3d5698SJohn Baldwin movq 8(%rsi,%r15,1),%rax 774bc3d5698SJohn Baldwin adcq $0,%rdx 775bc3d5698SJohn Baldwin addq %r10,%r13 776bc3d5698SJohn Baldwin adcq $0,%rdx 777bc3d5698SJohn Baldwin movq %r13,-8(%r14) 778bc3d5698SJohn Baldwin movq %rdx,%rdi 779bc3d5698SJohn Baldwin 780bc3d5698SJohn Baldwin mulq %rbx 781bc3d5698SJohn Baldwin addq %rax,%r11 782bc3d5698SJohn Baldwin movq 8(%rcx),%rax 783bc3d5698SJohn Baldwin adcq $0,%rdx 784bc3d5698SJohn Baldwin movq %rdx,%r10 785bc3d5698SJohn Baldwin 786bc3d5698SJohn Baldwin mulq %rbp 787bc3d5698SJohn Baldwin addq %rax,%rdi 788bc3d5698SJohn Baldwin movq 16(%rsi,%r15,1),%rax 789bc3d5698SJohn Baldwin adcq $0,%rdx 790bc3d5698SJohn Baldwin addq %r11,%rdi 791bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 792bc3d5698SJohn Baldwin adcq $0,%rdx 793bc3d5698SJohn Baldwin movq %rdi,(%r14) 794bc3d5698SJohn Baldwin movq %rdx,%r13 795bc3d5698SJohn Baldwin 796bc3d5698SJohn Baldwin addq $32,%r15 797bc3d5698SJohn Baldwin jnz .L1st4x 798bc3d5698SJohn Baldwin 799bc3d5698SJohn Baldwin mulq %rbx 800bc3d5698SJohn Baldwin addq %rax,%r10 801bc3d5698SJohn Baldwin movq -16(%rcx),%rax 802bc3d5698SJohn Baldwin leaq 32(%r14),%r14 803bc3d5698SJohn Baldwin adcq $0,%rdx 804bc3d5698SJohn Baldwin movq %rdx,%r11 805bc3d5698SJohn Baldwin 806bc3d5698SJohn Baldwin mulq %rbp 807bc3d5698SJohn Baldwin addq %rax,%r13 808bc3d5698SJohn Baldwin movq -8(%rsi),%rax 809bc3d5698SJohn Baldwin adcq $0,%rdx 810bc3d5698SJohn Baldwin addq %r10,%r13 811bc3d5698SJohn Baldwin adcq $0,%rdx 812bc3d5698SJohn Baldwin movq %r13,-24(%r14) 813bc3d5698SJohn Baldwin movq %rdx,%rdi 814bc3d5698SJohn Baldwin 815bc3d5698SJohn Baldwin mulq %rbx 816bc3d5698SJohn Baldwin addq %rax,%r11 817bc3d5698SJohn Baldwin movq -8(%rcx),%rax 818bc3d5698SJohn Baldwin adcq $0,%rdx 819bc3d5698SJohn Baldwin movq %rdx,%r10 820bc3d5698SJohn Baldwin 821bc3d5698SJohn Baldwin mulq %rbp 822bc3d5698SJohn Baldwin addq %rax,%rdi 823bc3d5698SJohn Baldwin movq (%rsi,%r9,1),%rax 824bc3d5698SJohn Baldwin adcq $0,%rdx 825bc3d5698SJohn Baldwin addq %r11,%rdi 826bc3d5698SJohn Baldwin adcq $0,%rdx 827bc3d5698SJohn Baldwin movq %rdi,-16(%r14) 828bc3d5698SJohn Baldwin movq %rdx,%r13 829bc3d5698SJohn Baldwin 830bc3d5698SJohn Baldwin leaq (%rcx,%r9,1),%rcx 831bc3d5698SJohn Baldwin 832bc3d5698SJohn Baldwin xorq %rdi,%rdi 833bc3d5698SJohn Baldwin addq %r10,%r13 834bc3d5698SJohn Baldwin adcq $0,%rdi 835bc3d5698SJohn Baldwin movq %r13,-8(%r14) 836bc3d5698SJohn Baldwin 837bc3d5698SJohn Baldwin jmp .Louter4x 838bc3d5698SJohn Baldwin 839bc3d5698SJohn Baldwin.align 32 840bc3d5698SJohn Baldwin.Louter4x: 841bc3d5698SJohn Baldwin leaq 16+128(%r14),%rdx 842bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 843bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 844bc3d5698SJohn Baldwin movdqa -128(%r12),%xmm0 845bc3d5698SJohn Baldwin movdqa -112(%r12),%xmm1 846bc3d5698SJohn Baldwin movdqa -96(%r12),%xmm2 847bc3d5698SJohn Baldwin movdqa -80(%r12),%xmm3 848bc3d5698SJohn Baldwin pand -128(%rdx),%xmm0 849bc3d5698SJohn Baldwin pand -112(%rdx),%xmm1 850bc3d5698SJohn Baldwin por %xmm0,%xmm4 851bc3d5698SJohn Baldwin pand -96(%rdx),%xmm2 852bc3d5698SJohn Baldwin por %xmm1,%xmm5 853bc3d5698SJohn Baldwin pand -80(%rdx),%xmm3 854bc3d5698SJohn Baldwin por %xmm2,%xmm4 855bc3d5698SJohn Baldwin por %xmm3,%xmm5 856bc3d5698SJohn Baldwin movdqa -64(%r12),%xmm0 857bc3d5698SJohn Baldwin movdqa -48(%r12),%xmm1 858bc3d5698SJohn Baldwin movdqa -32(%r12),%xmm2 859bc3d5698SJohn Baldwin movdqa -16(%r12),%xmm3 860bc3d5698SJohn Baldwin pand -64(%rdx),%xmm0 861bc3d5698SJohn Baldwin pand -48(%rdx),%xmm1 862bc3d5698SJohn Baldwin por %xmm0,%xmm4 863bc3d5698SJohn Baldwin pand -32(%rdx),%xmm2 864bc3d5698SJohn Baldwin por %xmm1,%xmm5 865bc3d5698SJohn Baldwin pand -16(%rdx),%xmm3 866bc3d5698SJohn Baldwin por %xmm2,%xmm4 867bc3d5698SJohn Baldwin por %xmm3,%xmm5 868bc3d5698SJohn Baldwin movdqa 0(%r12),%xmm0 869bc3d5698SJohn Baldwin movdqa 16(%r12),%xmm1 870bc3d5698SJohn Baldwin movdqa 32(%r12),%xmm2 871bc3d5698SJohn Baldwin movdqa 48(%r12),%xmm3 872bc3d5698SJohn Baldwin pand 0(%rdx),%xmm0 873bc3d5698SJohn Baldwin pand 16(%rdx),%xmm1 874bc3d5698SJohn Baldwin por %xmm0,%xmm4 875bc3d5698SJohn Baldwin pand 32(%rdx),%xmm2 876bc3d5698SJohn Baldwin por %xmm1,%xmm5 877bc3d5698SJohn Baldwin pand 48(%rdx),%xmm3 878bc3d5698SJohn Baldwin por %xmm2,%xmm4 879bc3d5698SJohn Baldwin por %xmm3,%xmm5 880bc3d5698SJohn Baldwin movdqa 64(%r12),%xmm0 881bc3d5698SJohn Baldwin movdqa 80(%r12),%xmm1 882bc3d5698SJohn Baldwin movdqa 96(%r12),%xmm2 883bc3d5698SJohn Baldwin movdqa 112(%r12),%xmm3 884bc3d5698SJohn Baldwin pand 64(%rdx),%xmm0 885bc3d5698SJohn Baldwin pand 80(%rdx),%xmm1 886bc3d5698SJohn Baldwin por %xmm0,%xmm4 887bc3d5698SJohn Baldwin pand 96(%rdx),%xmm2 888bc3d5698SJohn Baldwin por %xmm1,%xmm5 889bc3d5698SJohn Baldwin pand 112(%rdx),%xmm3 890bc3d5698SJohn Baldwin por %xmm2,%xmm4 891bc3d5698SJohn Baldwin por %xmm3,%xmm5 892bc3d5698SJohn Baldwin por %xmm5,%xmm4 893bc3d5698SJohn Baldwin pshufd $0x4e,%xmm4,%xmm0 894bc3d5698SJohn Baldwin por %xmm4,%xmm0 895bc3d5698SJohn Baldwin leaq 256(%r12),%r12 896bc3d5698SJohn Baldwin.byte 102,72,15,126,195 897bc3d5698SJohn Baldwin 898bc3d5698SJohn Baldwin movq (%r14,%r9,1),%r10 899bc3d5698SJohn Baldwin movq %r8,%rbp 900bc3d5698SJohn Baldwin mulq %rbx 901bc3d5698SJohn Baldwin addq %rax,%r10 902bc3d5698SJohn Baldwin movq (%rcx),%rax 903bc3d5698SJohn Baldwin adcq $0,%rdx 904bc3d5698SJohn Baldwin 905bc3d5698SJohn Baldwin imulq %r10,%rbp 906bc3d5698SJohn Baldwin movq %rdx,%r11 907bc3d5698SJohn Baldwin movq %rdi,(%r14) 908bc3d5698SJohn Baldwin 909bc3d5698SJohn Baldwin leaq (%r14,%r9,1),%r14 910bc3d5698SJohn Baldwin 911bc3d5698SJohn Baldwin mulq %rbp 912bc3d5698SJohn Baldwin addq %rax,%r10 913bc3d5698SJohn Baldwin movq 8(%rsi,%r9,1),%rax 914bc3d5698SJohn Baldwin adcq $0,%rdx 915bc3d5698SJohn Baldwin movq %rdx,%rdi 916bc3d5698SJohn Baldwin 917bc3d5698SJohn Baldwin mulq %rbx 918bc3d5698SJohn Baldwin addq %rax,%r11 919bc3d5698SJohn Baldwin movq 8(%rcx),%rax 920bc3d5698SJohn Baldwin adcq $0,%rdx 921bc3d5698SJohn Baldwin addq 8(%r14),%r11 922bc3d5698SJohn Baldwin adcq $0,%rdx 923bc3d5698SJohn Baldwin movq %rdx,%r10 924bc3d5698SJohn Baldwin 925bc3d5698SJohn Baldwin mulq %rbp 926bc3d5698SJohn Baldwin addq %rax,%rdi 927bc3d5698SJohn Baldwin movq 16(%rsi,%r9,1),%rax 928bc3d5698SJohn Baldwin adcq $0,%rdx 929bc3d5698SJohn Baldwin addq %r11,%rdi 930bc3d5698SJohn Baldwin leaq 32(%r9),%r15 931bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 932bc3d5698SJohn Baldwin adcq $0,%rdx 933bc3d5698SJohn Baldwin movq %rdx,%r13 934bc3d5698SJohn Baldwin jmp .Linner4x 935bc3d5698SJohn Baldwin 936bc3d5698SJohn Baldwin.align 32 937bc3d5698SJohn Baldwin.Linner4x: 938bc3d5698SJohn Baldwin mulq %rbx 939bc3d5698SJohn Baldwin addq %rax,%r10 940bc3d5698SJohn Baldwin movq -16(%rcx),%rax 941bc3d5698SJohn Baldwin adcq $0,%rdx 942bc3d5698SJohn Baldwin addq 16(%r14),%r10 943bc3d5698SJohn Baldwin leaq 32(%r14),%r14 944bc3d5698SJohn Baldwin adcq $0,%rdx 945bc3d5698SJohn Baldwin movq %rdx,%r11 946bc3d5698SJohn Baldwin 947bc3d5698SJohn Baldwin mulq %rbp 948bc3d5698SJohn Baldwin addq %rax,%r13 949bc3d5698SJohn Baldwin movq -8(%rsi,%r15,1),%rax 950bc3d5698SJohn Baldwin adcq $0,%rdx 951bc3d5698SJohn Baldwin addq %r10,%r13 952bc3d5698SJohn Baldwin adcq $0,%rdx 953bc3d5698SJohn Baldwin movq %rdi,-32(%r14) 954bc3d5698SJohn Baldwin movq %rdx,%rdi 955bc3d5698SJohn Baldwin 956bc3d5698SJohn Baldwin mulq %rbx 957bc3d5698SJohn Baldwin addq %rax,%r11 958bc3d5698SJohn Baldwin movq -8(%rcx),%rax 959bc3d5698SJohn Baldwin adcq $0,%rdx 960bc3d5698SJohn Baldwin addq -8(%r14),%r11 961bc3d5698SJohn Baldwin adcq $0,%rdx 962bc3d5698SJohn Baldwin movq %rdx,%r10 963bc3d5698SJohn Baldwin 964bc3d5698SJohn Baldwin mulq %rbp 965bc3d5698SJohn Baldwin addq %rax,%rdi 966bc3d5698SJohn Baldwin movq (%rsi,%r15,1),%rax 967bc3d5698SJohn Baldwin adcq $0,%rdx 968bc3d5698SJohn Baldwin addq %r11,%rdi 969bc3d5698SJohn Baldwin adcq $0,%rdx 970bc3d5698SJohn Baldwin movq %r13,-24(%r14) 971bc3d5698SJohn Baldwin movq %rdx,%r13 972bc3d5698SJohn Baldwin 973bc3d5698SJohn Baldwin mulq %rbx 974bc3d5698SJohn Baldwin addq %rax,%r10 975bc3d5698SJohn Baldwin movq 0(%rcx),%rax 976bc3d5698SJohn Baldwin adcq $0,%rdx 977bc3d5698SJohn Baldwin addq (%r14),%r10 978bc3d5698SJohn Baldwin adcq $0,%rdx 979bc3d5698SJohn Baldwin movq %rdx,%r11 980bc3d5698SJohn Baldwin 981bc3d5698SJohn Baldwin mulq %rbp 982bc3d5698SJohn Baldwin addq %rax,%r13 983bc3d5698SJohn Baldwin movq 8(%rsi,%r15,1),%rax 984bc3d5698SJohn Baldwin adcq $0,%rdx 985bc3d5698SJohn Baldwin addq %r10,%r13 986bc3d5698SJohn Baldwin adcq $0,%rdx 987bc3d5698SJohn Baldwin movq %rdi,-16(%r14) 988bc3d5698SJohn Baldwin movq %rdx,%rdi 989bc3d5698SJohn Baldwin 990bc3d5698SJohn Baldwin mulq %rbx 991bc3d5698SJohn Baldwin addq %rax,%r11 992bc3d5698SJohn Baldwin movq 8(%rcx),%rax 993bc3d5698SJohn Baldwin adcq $0,%rdx 994bc3d5698SJohn Baldwin addq 8(%r14),%r11 995bc3d5698SJohn Baldwin adcq $0,%rdx 996bc3d5698SJohn Baldwin movq %rdx,%r10 997bc3d5698SJohn Baldwin 998bc3d5698SJohn Baldwin mulq %rbp 999bc3d5698SJohn Baldwin addq %rax,%rdi 1000bc3d5698SJohn Baldwin movq 16(%rsi,%r15,1),%rax 1001bc3d5698SJohn Baldwin adcq $0,%rdx 1002bc3d5698SJohn Baldwin addq %r11,%rdi 1003bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1004bc3d5698SJohn Baldwin adcq $0,%rdx 1005bc3d5698SJohn Baldwin movq %r13,-8(%r14) 1006bc3d5698SJohn Baldwin movq %rdx,%r13 1007bc3d5698SJohn Baldwin 1008bc3d5698SJohn Baldwin addq $32,%r15 1009bc3d5698SJohn Baldwin jnz .Linner4x 1010bc3d5698SJohn Baldwin 1011bc3d5698SJohn Baldwin mulq %rbx 1012bc3d5698SJohn Baldwin addq %rax,%r10 1013bc3d5698SJohn Baldwin movq -16(%rcx),%rax 1014bc3d5698SJohn Baldwin adcq $0,%rdx 1015bc3d5698SJohn Baldwin addq 16(%r14),%r10 1016bc3d5698SJohn Baldwin leaq 32(%r14),%r14 1017bc3d5698SJohn Baldwin adcq $0,%rdx 1018bc3d5698SJohn Baldwin movq %rdx,%r11 1019bc3d5698SJohn Baldwin 1020bc3d5698SJohn Baldwin mulq %rbp 1021bc3d5698SJohn Baldwin addq %rax,%r13 1022bc3d5698SJohn Baldwin movq -8(%rsi),%rax 1023bc3d5698SJohn Baldwin adcq $0,%rdx 1024bc3d5698SJohn Baldwin addq %r10,%r13 1025bc3d5698SJohn Baldwin adcq $0,%rdx 1026bc3d5698SJohn Baldwin movq %rdi,-32(%r14) 1027bc3d5698SJohn Baldwin movq %rdx,%rdi 1028bc3d5698SJohn Baldwin 1029bc3d5698SJohn Baldwin mulq %rbx 1030bc3d5698SJohn Baldwin addq %rax,%r11 1031bc3d5698SJohn Baldwin movq %rbp,%rax 1032bc3d5698SJohn Baldwin movq -8(%rcx),%rbp 1033bc3d5698SJohn Baldwin adcq $0,%rdx 1034bc3d5698SJohn Baldwin addq -8(%r14),%r11 1035bc3d5698SJohn Baldwin adcq $0,%rdx 1036bc3d5698SJohn Baldwin movq %rdx,%r10 1037bc3d5698SJohn Baldwin 1038bc3d5698SJohn Baldwin mulq %rbp 1039bc3d5698SJohn Baldwin addq %rax,%rdi 1040bc3d5698SJohn Baldwin movq (%rsi,%r9,1),%rax 1041bc3d5698SJohn Baldwin adcq $0,%rdx 1042bc3d5698SJohn Baldwin addq %r11,%rdi 1043bc3d5698SJohn Baldwin adcq $0,%rdx 1044bc3d5698SJohn Baldwin movq %r13,-24(%r14) 1045bc3d5698SJohn Baldwin movq %rdx,%r13 1046bc3d5698SJohn Baldwin 1047bc3d5698SJohn Baldwin movq %rdi,-16(%r14) 1048bc3d5698SJohn Baldwin leaq (%rcx,%r9,1),%rcx 1049bc3d5698SJohn Baldwin 1050bc3d5698SJohn Baldwin xorq %rdi,%rdi 1051bc3d5698SJohn Baldwin addq %r10,%r13 1052bc3d5698SJohn Baldwin adcq $0,%rdi 1053bc3d5698SJohn Baldwin addq (%r14),%r13 1054bc3d5698SJohn Baldwin adcq $0,%rdi 1055bc3d5698SJohn Baldwin movq %r13,-8(%r14) 1056bc3d5698SJohn Baldwin 1057bc3d5698SJohn Baldwin cmpq 16+8(%rsp),%r12 1058bc3d5698SJohn Baldwin jb .Louter4x 1059bc3d5698SJohn Baldwin xorq %rax,%rax 1060bc3d5698SJohn Baldwin subq %r13,%rbp 1061bc3d5698SJohn Baldwin adcq %r15,%r15 1062bc3d5698SJohn Baldwin orq %r15,%rdi 1063bc3d5698SJohn Baldwin subq %rdi,%rax 1064bc3d5698SJohn Baldwin leaq (%r14,%r9,1),%rbx 1065bc3d5698SJohn Baldwin movq (%rcx),%r12 1066bc3d5698SJohn Baldwin leaq (%rcx),%rbp 1067bc3d5698SJohn Baldwin movq %r9,%rcx 1068bc3d5698SJohn Baldwin sarq $3+2,%rcx 1069bc3d5698SJohn Baldwin movq 56+8(%rsp),%rdi 1070bc3d5698SJohn Baldwin decq %r12 1071bc3d5698SJohn Baldwin xorq %r10,%r10 1072bc3d5698SJohn Baldwin movq 8(%rbp),%r13 1073bc3d5698SJohn Baldwin movq 16(%rbp),%r14 1074bc3d5698SJohn Baldwin movq 24(%rbp),%r15 1075bc3d5698SJohn Baldwin jmp .Lsqr4x_sub_entry 1076bc3d5698SJohn Baldwin.cfi_endproc 1077bc3d5698SJohn Baldwin.size mul4x_internal,.-mul4x_internal 1078bc3d5698SJohn Baldwin.globl bn_power5 1079bc3d5698SJohn Baldwin.type bn_power5,@function 1080bc3d5698SJohn Baldwin.align 32 1081bc3d5698SJohn Baldwinbn_power5: 1082bc3d5698SJohn Baldwin.cfi_startproc 1083bc3d5698SJohn Baldwin movq %rsp,%rax 1084bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 1085bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%r11d 1086bc3d5698SJohn Baldwin andl $0x80108,%r11d 1087bc3d5698SJohn Baldwin cmpl $0x80108,%r11d 1088bc3d5698SJohn Baldwin je .Lpowerx5_enter 1089bc3d5698SJohn Baldwin pushq %rbx 1090bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1091bc3d5698SJohn Baldwin pushq %rbp 1092bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1093bc3d5698SJohn Baldwin pushq %r12 1094bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1095bc3d5698SJohn Baldwin pushq %r13 1096bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1097bc3d5698SJohn Baldwin pushq %r14 1098bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1099bc3d5698SJohn Baldwin pushq %r15 1100bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1101bc3d5698SJohn Baldwin.Lpower5_prologue: 1102bc3d5698SJohn Baldwin 1103bc3d5698SJohn Baldwin shll $3,%r9d 1104bc3d5698SJohn Baldwin leal (%r9,%r9,2),%r10d 1105bc3d5698SJohn Baldwin negq %r9 1106bc3d5698SJohn Baldwin movq (%r8),%r8 1107bc3d5698SJohn Baldwin 1108bc3d5698SJohn Baldwin 1109bc3d5698SJohn Baldwin 1110bc3d5698SJohn Baldwin 1111bc3d5698SJohn Baldwin 1112bc3d5698SJohn Baldwin 1113bc3d5698SJohn Baldwin 1114bc3d5698SJohn Baldwin 1115bc3d5698SJohn Baldwin leaq -320(%rsp,%r9,2),%r11 1116bc3d5698SJohn Baldwin movq %rsp,%rbp 1117bc3d5698SJohn Baldwin subq %rdi,%r11 1118bc3d5698SJohn Baldwin andq $4095,%r11 1119bc3d5698SJohn Baldwin cmpq %r11,%r10 1120bc3d5698SJohn Baldwin jb .Lpwr_sp_alt 1121bc3d5698SJohn Baldwin subq %r11,%rbp 1122bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 1123bc3d5698SJohn Baldwin jmp .Lpwr_sp_done 1124bc3d5698SJohn Baldwin 1125bc3d5698SJohn Baldwin.align 32 1126bc3d5698SJohn Baldwin.Lpwr_sp_alt: 1127bc3d5698SJohn Baldwin leaq 4096-320(,%r9,2),%r10 1128bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 1129bc3d5698SJohn Baldwin subq %r10,%r11 1130bc3d5698SJohn Baldwin movq $0,%r10 1131bc3d5698SJohn Baldwin cmovcq %r10,%r11 1132bc3d5698SJohn Baldwin subq %r11,%rbp 1133bc3d5698SJohn Baldwin.Lpwr_sp_done: 1134bc3d5698SJohn Baldwin andq $-64,%rbp 1135bc3d5698SJohn Baldwin movq %rsp,%r11 1136bc3d5698SJohn Baldwin subq %rbp,%r11 1137bc3d5698SJohn Baldwin andq $-4096,%r11 1138bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 1139bc3d5698SJohn Baldwin movq (%rsp),%r10 1140bc3d5698SJohn Baldwin cmpq %rbp,%rsp 1141bc3d5698SJohn Baldwin ja .Lpwr_page_walk 1142bc3d5698SJohn Baldwin jmp .Lpwr_page_walk_done 1143bc3d5698SJohn Baldwin 1144bc3d5698SJohn Baldwin.Lpwr_page_walk: 1145bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 1146bc3d5698SJohn Baldwin movq (%rsp),%r10 1147bc3d5698SJohn Baldwin cmpq %rbp,%rsp 1148bc3d5698SJohn Baldwin ja .Lpwr_page_walk 1149bc3d5698SJohn Baldwin.Lpwr_page_walk_done: 1150bc3d5698SJohn Baldwin 1151bc3d5698SJohn Baldwin movq %r9,%r10 1152bc3d5698SJohn Baldwin negq %r9 1153bc3d5698SJohn Baldwin 1154bc3d5698SJohn Baldwin 1155bc3d5698SJohn Baldwin 1156bc3d5698SJohn Baldwin 1157bc3d5698SJohn Baldwin 1158bc3d5698SJohn Baldwin 1159bc3d5698SJohn Baldwin 1160bc3d5698SJohn Baldwin 1161bc3d5698SJohn Baldwin 1162bc3d5698SJohn Baldwin 1163bc3d5698SJohn Baldwin movq %r8,32(%rsp) 1164bc3d5698SJohn Baldwin movq %rax,40(%rsp) 1165bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 1166bc3d5698SJohn Baldwin.Lpower5_body: 1167bc3d5698SJohn Baldwin.byte 102,72,15,110,207 1168bc3d5698SJohn Baldwin.byte 102,72,15,110,209 1169bc3d5698SJohn Baldwin.byte 102,73,15,110,218 1170bc3d5698SJohn Baldwin.byte 102,72,15,110,226 1171bc3d5698SJohn Baldwin 1172bc3d5698SJohn Baldwin call __bn_sqr8x_internal 1173bc3d5698SJohn Baldwin call __bn_post4x_internal 1174bc3d5698SJohn Baldwin call __bn_sqr8x_internal 1175bc3d5698SJohn Baldwin call __bn_post4x_internal 1176bc3d5698SJohn Baldwin call __bn_sqr8x_internal 1177bc3d5698SJohn Baldwin call __bn_post4x_internal 1178bc3d5698SJohn Baldwin call __bn_sqr8x_internal 1179bc3d5698SJohn Baldwin call __bn_post4x_internal 1180bc3d5698SJohn Baldwin call __bn_sqr8x_internal 1181bc3d5698SJohn Baldwin call __bn_post4x_internal 1182bc3d5698SJohn Baldwin 1183bc3d5698SJohn Baldwin.byte 102,72,15,126,209 1184bc3d5698SJohn Baldwin.byte 102,72,15,126,226 1185bc3d5698SJohn Baldwin movq %rsi,%rdi 1186bc3d5698SJohn Baldwin movq 40(%rsp),%rax 1187bc3d5698SJohn Baldwin leaq 32(%rsp),%r8 1188bc3d5698SJohn Baldwin 1189bc3d5698SJohn Baldwin call mul4x_internal 1190bc3d5698SJohn Baldwin 1191bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 1192bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 1193bc3d5698SJohn Baldwin movq $1,%rax 1194bc3d5698SJohn Baldwin movq -48(%rsi),%r15 1195bc3d5698SJohn Baldwin.cfi_restore %r15 1196bc3d5698SJohn Baldwin movq -40(%rsi),%r14 1197bc3d5698SJohn Baldwin.cfi_restore %r14 1198bc3d5698SJohn Baldwin movq -32(%rsi),%r13 1199bc3d5698SJohn Baldwin.cfi_restore %r13 1200bc3d5698SJohn Baldwin movq -24(%rsi),%r12 1201bc3d5698SJohn Baldwin.cfi_restore %r12 1202bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 1203bc3d5698SJohn Baldwin.cfi_restore %rbp 1204bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 1205bc3d5698SJohn Baldwin.cfi_restore %rbx 1206bc3d5698SJohn Baldwin leaq (%rsi),%rsp 1207bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1208bc3d5698SJohn Baldwin.Lpower5_epilogue: 1209bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1210bc3d5698SJohn Baldwin.cfi_endproc 1211bc3d5698SJohn Baldwin.size bn_power5,.-bn_power5 1212bc3d5698SJohn Baldwin 1213bc3d5698SJohn Baldwin.globl bn_sqr8x_internal 1214bc3d5698SJohn Baldwin.hidden bn_sqr8x_internal 1215bc3d5698SJohn Baldwin.type bn_sqr8x_internal,@function 1216bc3d5698SJohn Baldwin.align 32 1217bc3d5698SJohn Baldwinbn_sqr8x_internal: 1218bc3d5698SJohn Baldwin__bn_sqr8x_internal: 1219bc3d5698SJohn Baldwin.cfi_startproc 1220bc3d5698SJohn Baldwin 1221bc3d5698SJohn Baldwin 1222bc3d5698SJohn Baldwin 1223bc3d5698SJohn Baldwin 1224bc3d5698SJohn Baldwin 1225bc3d5698SJohn Baldwin 1226bc3d5698SJohn Baldwin 1227bc3d5698SJohn Baldwin 1228bc3d5698SJohn Baldwin 1229bc3d5698SJohn Baldwin 1230bc3d5698SJohn Baldwin 1231bc3d5698SJohn Baldwin 1232bc3d5698SJohn Baldwin 1233bc3d5698SJohn Baldwin 1234bc3d5698SJohn Baldwin 1235bc3d5698SJohn Baldwin 1236bc3d5698SJohn Baldwin 1237bc3d5698SJohn Baldwin 1238bc3d5698SJohn Baldwin 1239bc3d5698SJohn Baldwin 1240bc3d5698SJohn Baldwin 1241bc3d5698SJohn Baldwin 1242bc3d5698SJohn Baldwin 1243bc3d5698SJohn Baldwin 1244bc3d5698SJohn Baldwin 1245bc3d5698SJohn Baldwin 1246bc3d5698SJohn Baldwin 1247bc3d5698SJohn Baldwin 1248bc3d5698SJohn Baldwin 1249bc3d5698SJohn Baldwin 1250bc3d5698SJohn Baldwin 1251bc3d5698SJohn Baldwin 1252bc3d5698SJohn Baldwin 1253bc3d5698SJohn Baldwin 1254bc3d5698SJohn Baldwin 1255bc3d5698SJohn Baldwin 1256bc3d5698SJohn Baldwin 1257bc3d5698SJohn Baldwin 1258bc3d5698SJohn Baldwin 1259bc3d5698SJohn Baldwin 1260bc3d5698SJohn Baldwin 1261bc3d5698SJohn Baldwin 1262bc3d5698SJohn Baldwin 1263bc3d5698SJohn Baldwin 1264bc3d5698SJohn Baldwin 1265bc3d5698SJohn Baldwin 1266bc3d5698SJohn Baldwin 1267bc3d5698SJohn Baldwin 1268bc3d5698SJohn Baldwin 1269bc3d5698SJohn Baldwin 1270bc3d5698SJohn Baldwin 1271bc3d5698SJohn Baldwin 1272bc3d5698SJohn Baldwin 1273bc3d5698SJohn Baldwin 1274bc3d5698SJohn Baldwin 1275bc3d5698SJohn Baldwin 1276bc3d5698SJohn Baldwin 1277bc3d5698SJohn Baldwin 1278bc3d5698SJohn Baldwin 1279bc3d5698SJohn Baldwin 1280bc3d5698SJohn Baldwin 1281bc3d5698SJohn Baldwin 1282bc3d5698SJohn Baldwin 1283bc3d5698SJohn Baldwin 1284bc3d5698SJohn Baldwin 1285bc3d5698SJohn Baldwin 1286bc3d5698SJohn Baldwin 1287bc3d5698SJohn Baldwin 1288bc3d5698SJohn Baldwin 1289bc3d5698SJohn Baldwin 1290bc3d5698SJohn Baldwin 1291bc3d5698SJohn Baldwin 1292bc3d5698SJohn Baldwin 1293bc3d5698SJohn Baldwin leaq 32(%r10),%rbp 1294bc3d5698SJohn Baldwin leaq (%rsi,%r9,1),%rsi 1295bc3d5698SJohn Baldwin 1296bc3d5698SJohn Baldwin movq %r9,%rcx 1297bc3d5698SJohn Baldwin 1298bc3d5698SJohn Baldwin 1299bc3d5698SJohn Baldwin movq -32(%rsi,%rbp,1),%r14 1300bc3d5698SJohn Baldwin leaq 48+8(%rsp,%r9,2),%rdi 1301bc3d5698SJohn Baldwin movq -24(%rsi,%rbp,1),%rax 1302bc3d5698SJohn Baldwin leaq -32(%rdi,%rbp,1),%rdi 1303bc3d5698SJohn Baldwin movq -16(%rsi,%rbp,1),%rbx 1304bc3d5698SJohn Baldwin movq %rax,%r15 1305bc3d5698SJohn Baldwin 1306bc3d5698SJohn Baldwin mulq %r14 1307bc3d5698SJohn Baldwin movq %rax,%r10 1308bc3d5698SJohn Baldwin movq %rbx,%rax 1309bc3d5698SJohn Baldwin movq %rdx,%r11 1310bc3d5698SJohn Baldwin movq %r10,-24(%rdi,%rbp,1) 1311bc3d5698SJohn Baldwin 1312bc3d5698SJohn Baldwin mulq %r14 1313bc3d5698SJohn Baldwin addq %rax,%r11 1314bc3d5698SJohn Baldwin movq %rbx,%rax 1315bc3d5698SJohn Baldwin adcq $0,%rdx 1316bc3d5698SJohn Baldwin movq %r11,-16(%rdi,%rbp,1) 1317bc3d5698SJohn Baldwin movq %rdx,%r10 1318bc3d5698SJohn Baldwin 1319bc3d5698SJohn Baldwin 1320bc3d5698SJohn Baldwin movq -8(%rsi,%rbp,1),%rbx 1321bc3d5698SJohn Baldwin mulq %r15 1322bc3d5698SJohn Baldwin movq %rax,%r12 1323bc3d5698SJohn Baldwin movq %rbx,%rax 1324bc3d5698SJohn Baldwin movq %rdx,%r13 1325bc3d5698SJohn Baldwin 1326bc3d5698SJohn Baldwin leaq (%rbp),%rcx 1327bc3d5698SJohn Baldwin mulq %r14 1328bc3d5698SJohn Baldwin addq %rax,%r10 1329bc3d5698SJohn Baldwin movq %rbx,%rax 1330bc3d5698SJohn Baldwin movq %rdx,%r11 1331bc3d5698SJohn Baldwin adcq $0,%r11 1332bc3d5698SJohn Baldwin addq %r12,%r10 1333bc3d5698SJohn Baldwin adcq $0,%r11 1334bc3d5698SJohn Baldwin movq %r10,-8(%rdi,%rcx,1) 1335bc3d5698SJohn Baldwin jmp .Lsqr4x_1st 1336bc3d5698SJohn Baldwin 1337bc3d5698SJohn Baldwin.align 32 1338bc3d5698SJohn Baldwin.Lsqr4x_1st: 1339bc3d5698SJohn Baldwin movq (%rsi,%rcx,1),%rbx 1340bc3d5698SJohn Baldwin mulq %r15 1341bc3d5698SJohn Baldwin addq %rax,%r13 1342bc3d5698SJohn Baldwin movq %rbx,%rax 1343bc3d5698SJohn Baldwin movq %rdx,%r12 1344bc3d5698SJohn Baldwin adcq $0,%r12 1345bc3d5698SJohn Baldwin 1346bc3d5698SJohn Baldwin mulq %r14 1347bc3d5698SJohn Baldwin addq %rax,%r11 1348bc3d5698SJohn Baldwin movq %rbx,%rax 1349bc3d5698SJohn Baldwin movq 8(%rsi,%rcx,1),%rbx 1350bc3d5698SJohn Baldwin movq %rdx,%r10 1351bc3d5698SJohn Baldwin adcq $0,%r10 1352bc3d5698SJohn Baldwin addq %r13,%r11 1353bc3d5698SJohn Baldwin adcq $0,%r10 1354bc3d5698SJohn Baldwin 1355bc3d5698SJohn Baldwin 1356bc3d5698SJohn Baldwin mulq %r15 1357bc3d5698SJohn Baldwin addq %rax,%r12 1358bc3d5698SJohn Baldwin movq %rbx,%rax 1359bc3d5698SJohn Baldwin movq %r11,(%rdi,%rcx,1) 1360bc3d5698SJohn Baldwin movq %rdx,%r13 1361bc3d5698SJohn Baldwin adcq $0,%r13 1362bc3d5698SJohn Baldwin 1363bc3d5698SJohn Baldwin mulq %r14 1364bc3d5698SJohn Baldwin addq %rax,%r10 1365bc3d5698SJohn Baldwin movq %rbx,%rax 1366bc3d5698SJohn Baldwin movq 16(%rsi,%rcx,1),%rbx 1367bc3d5698SJohn Baldwin movq %rdx,%r11 1368bc3d5698SJohn Baldwin adcq $0,%r11 1369bc3d5698SJohn Baldwin addq %r12,%r10 1370bc3d5698SJohn Baldwin adcq $0,%r11 1371bc3d5698SJohn Baldwin 1372bc3d5698SJohn Baldwin mulq %r15 1373bc3d5698SJohn Baldwin addq %rax,%r13 1374bc3d5698SJohn Baldwin movq %rbx,%rax 1375bc3d5698SJohn Baldwin movq %r10,8(%rdi,%rcx,1) 1376bc3d5698SJohn Baldwin movq %rdx,%r12 1377bc3d5698SJohn Baldwin adcq $0,%r12 1378bc3d5698SJohn Baldwin 1379bc3d5698SJohn Baldwin mulq %r14 1380bc3d5698SJohn Baldwin addq %rax,%r11 1381bc3d5698SJohn Baldwin movq %rbx,%rax 1382bc3d5698SJohn Baldwin movq 24(%rsi,%rcx,1),%rbx 1383bc3d5698SJohn Baldwin movq %rdx,%r10 1384bc3d5698SJohn Baldwin adcq $0,%r10 1385bc3d5698SJohn Baldwin addq %r13,%r11 1386bc3d5698SJohn Baldwin adcq $0,%r10 1387bc3d5698SJohn Baldwin 1388bc3d5698SJohn Baldwin 1389bc3d5698SJohn Baldwin mulq %r15 1390bc3d5698SJohn Baldwin addq %rax,%r12 1391bc3d5698SJohn Baldwin movq %rbx,%rax 1392bc3d5698SJohn Baldwin movq %r11,16(%rdi,%rcx,1) 1393bc3d5698SJohn Baldwin movq %rdx,%r13 1394bc3d5698SJohn Baldwin adcq $0,%r13 1395bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1396bc3d5698SJohn Baldwin 1397bc3d5698SJohn Baldwin mulq %r14 1398bc3d5698SJohn Baldwin addq %rax,%r10 1399bc3d5698SJohn Baldwin movq %rbx,%rax 1400bc3d5698SJohn Baldwin movq %rdx,%r11 1401bc3d5698SJohn Baldwin adcq $0,%r11 1402bc3d5698SJohn Baldwin addq %r12,%r10 1403bc3d5698SJohn Baldwin adcq $0,%r11 1404bc3d5698SJohn Baldwin movq %r10,-8(%rdi,%rcx,1) 1405bc3d5698SJohn Baldwin 1406bc3d5698SJohn Baldwin cmpq $0,%rcx 1407bc3d5698SJohn Baldwin jne .Lsqr4x_1st 1408bc3d5698SJohn Baldwin 1409bc3d5698SJohn Baldwin mulq %r15 1410bc3d5698SJohn Baldwin addq %rax,%r13 1411bc3d5698SJohn Baldwin leaq 16(%rbp),%rbp 1412bc3d5698SJohn Baldwin adcq $0,%rdx 1413bc3d5698SJohn Baldwin addq %r11,%r13 1414bc3d5698SJohn Baldwin adcq $0,%rdx 1415bc3d5698SJohn Baldwin 1416bc3d5698SJohn Baldwin movq %r13,(%rdi) 1417bc3d5698SJohn Baldwin movq %rdx,%r12 1418bc3d5698SJohn Baldwin movq %rdx,8(%rdi) 1419bc3d5698SJohn Baldwin jmp .Lsqr4x_outer 1420bc3d5698SJohn Baldwin 1421bc3d5698SJohn Baldwin.align 32 1422bc3d5698SJohn Baldwin.Lsqr4x_outer: 1423bc3d5698SJohn Baldwin movq -32(%rsi,%rbp,1),%r14 1424bc3d5698SJohn Baldwin leaq 48+8(%rsp,%r9,2),%rdi 1425bc3d5698SJohn Baldwin movq -24(%rsi,%rbp,1),%rax 1426bc3d5698SJohn Baldwin leaq -32(%rdi,%rbp,1),%rdi 1427bc3d5698SJohn Baldwin movq -16(%rsi,%rbp,1),%rbx 1428bc3d5698SJohn Baldwin movq %rax,%r15 1429bc3d5698SJohn Baldwin 1430bc3d5698SJohn Baldwin mulq %r14 1431bc3d5698SJohn Baldwin movq -24(%rdi,%rbp,1),%r10 1432bc3d5698SJohn Baldwin addq %rax,%r10 1433bc3d5698SJohn Baldwin movq %rbx,%rax 1434bc3d5698SJohn Baldwin adcq $0,%rdx 1435bc3d5698SJohn Baldwin movq %r10,-24(%rdi,%rbp,1) 1436bc3d5698SJohn Baldwin movq %rdx,%r11 1437bc3d5698SJohn Baldwin 1438bc3d5698SJohn Baldwin mulq %r14 1439bc3d5698SJohn Baldwin addq %rax,%r11 1440bc3d5698SJohn Baldwin movq %rbx,%rax 1441bc3d5698SJohn Baldwin adcq $0,%rdx 1442bc3d5698SJohn Baldwin addq -16(%rdi,%rbp,1),%r11 1443bc3d5698SJohn Baldwin movq %rdx,%r10 1444bc3d5698SJohn Baldwin adcq $0,%r10 1445bc3d5698SJohn Baldwin movq %r11,-16(%rdi,%rbp,1) 1446bc3d5698SJohn Baldwin 1447bc3d5698SJohn Baldwin xorq %r12,%r12 1448bc3d5698SJohn Baldwin 1449bc3d5698SJohn Baldwin movq -8(%rsi,%rbp,1),%rbx 1450bc3d5698SJohn Baldwin mulq %r15 1451bc3d5698SJohn Baldwin addq %rax,%r12 1452bc3d5698SJohn Baldwin movq %rbx,%rax 1453bc3d5698SJohn Baldwin adcq $0,%rdx 1454bc3d5698SJohn Baldwin addq -8(%rdi,%rbp,1),%r12 1455bc3d5698SJohn Baldwin movq %rdx,%r13 1456bc3d5698SJohn Baldwin adcq $0,%r13 1457bc3d5698SJohn Baldwin 1458bc3d5698SJohn Baldwin mulq %r14 1459bc3d5698SJohn Baldwin addq %rax,%r10 1460bc3d5698SJohn Baldwin movq %rbx,%rax 1461bc3d5698SJohn Baldwin adcq $0,%rdx 1462bc3d5698SJohn Baldwin addq %r12,%r10 1463bc3d5698SJohn Baldwin movq %rdx,%r11 1464bc3d5698SJohn Baldwin adcq $0,%r11 1465bc3d5698SJohn Baldwin movq %r10,-8(%rdi,%rbp,1) 1466bc3d5698SJohn Baldwin 1467bc3d5698SJohn Baldwin leaq (%rbp),%rcx 1468bc3d5698SJohn Baldwin jmp .Lsqr4x_inner 1469bc3d5698SJohn Baldwin 1470bc3d5698SJohn Baldwin.align 32 1471bc3d5698SJohn Baldwin.Lsqr4x_inner: 1472bc3d5698SJohn Baldwin movq (%rsi,%rcx,1),%rbx 1473bc3d5698SJohn Baldwin mulq %r15 1474bc3d5698SJohn Baldwin addq %rax,%r13 1475bc3d5698SJohn Baldwin movq %rbx,%rax 1476bc3d5698SJohn Baldwin movq %rdx,%r12 1477bc3d5698SJohn Baldwin adcq $0,%r12 1478bc3d5698SJohn Baldwin addq (%rdi,%rcx,1),%r13 1479bc3d5698SJohn Baldwin adcq $0,%r12 1480bc3d5698SJohn Baldwin 1481bc3d5698SJohn Baldwin.byte 0x67 1482bc3d5698SJohn Baldwin mulq %r14 1483bc3d5698SJohn Baldwin addq %rax,%r11 1484bc3d5698SJohn Baldwin movq %rbx,%rax 1485bc3d5698SJohn Baldwin movq 8(%rsi,%rcx,1),%rbx 1486bc3d5698SJohn Baldwin movq %rdx,%r10 1487bc3d5698SJohn Baldwin adcq $0,%r10 1488bc3d5698SJohn Baldwin addq %r13,%r11 1489bc3d5698SJohn Baldwin adcq $0,%r10 1490bc3d5698SJohn Baldwin 1491bc3d5698SJohn Baldwin mulq %r15 1492bc3d5698SJohn Baldwin addq %rax,%r12 1493bc3d5698SJohn Baldwin movq %r11,(%rdi,%rcx,1) 1494bc3d5698SJohn Baldwin movq %rbx,%rax 1495bc3d5698SJohn Baldwin movq %rdx,%r13 1496bc3d5698SJohn Baldwin adcq $0,%r13 1497bc3d5698SJohn Baldwin addq 8(%rdi,%rcx,1),%r12 1498bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1499bc3d5698SJohn Baldwin adcq $0,%r13 1500bc3d5698SJohn Baldwin 1501bc3d5698SJohn Baldwin mulq %r14 1502bc3d5698SJohn Baldwin addq %rax,%r10 1503bc3d5698SJohn Baldwin movq %rbx,%rax 1504bc3d5698SJohn Baldwin adcq $0,%rdx 1505bc3d5698SJohn Baldwin addq %r12,%r10 1506bc3d5698SJohn Baldwin movq %rdx,%r11 1507bc3d5698SJohn Baldwin adcq $0,%r11 1508bc3d5698SJohn Baldwin movq %r10,-8(%rdi,%rcx,1) 1509bc3d5698SJohn Baldwin 1510bc3d5698SJohn Baldwin cmpq $0,%rcx 1511bc3d5698SJohn Baldwin jne .Lsqr4x_inner 1512bc3d5698SJohn Baldwin 1513bc3d5698SJohn Baldwin.byte 0x67 1514bc3d5698SJohn Baldwin mulq %r15 1515bc3d5698SJohn Baldwin addq %rax,%r13 1516bc3d5698SJohn Baldwin adcq $0,%rdx 1517bc3d5698SJohn Baldwin addq %r11,%r13 1518bc3d5698SJohn Baldwin adcq $0,%rdx 1519bc3d5698SJohn Baldwin 1520bc3d5698SJohn Baldwin movq %r13,(%rdi) 1521bc3d5698SJohn Baldwin movq %rdx,%r12 1522bc3d5698SJohn Baldwin movq %rdx,8(%rdi) 1523bc3d5698SJohn Baldwin 1524bc3d5698SJohn Baldwin addq $16,%rbp 1525bc3d5698SJohn Baldwin jnz .Lsqr4x_outer 1526bc3d5698SJohn Baldwin 1527bc3d5698SJohn Baldwin 1528bc3d5698SJohn Baldwin movq -32(%rsi),%r14 1529bc3d5698SJohn Baldwin leaq 48+8(%rsp,%r9,2),%rdi 1530bc3d5698SJohn Baldwin movq -24(%rsi),%rax 1531bc3d5698SJohn Baldwin leaq -32(%rdi,%rbp,1),%rdi 1532bc3d5698SJohn Baldwin movq -16(%rsi),%rbx 1533bc3d5698SJohn Baldwin movq %rax,%r15 1534bc3d5698SJohn Baldwin 1535bc3d5698SJohn Baldwin mulq %r14 1536bc3d5698SJohn Baldwin addq %rax,%r10 1537bc3d5698SJohn Baldwin movq %rbx,%rax 1538bc3d5698SJohn Baldwin movq %rdx,%r11 1539bc3d5698SJohn Baldwin adcq $0,%r11 1540bc3d5698SJohn Baldwin 1541bc3d5698SJohn Baldwin mulq %r14 1542bc3d5698SJohn Baldwin addq %rax,%r11 1543bc3d5698SJohn Baldwin movq %rbx,%rax 1544bc3d5698SJohn Baldwin movq %r10,-24(%rdi) 1545bc3d5698SJohn Baldwin movq %rdx,%r10 1546bc3d5698SJohn Baldwin adcq $0,%r10 1547bc3d5698SJohn Baldwin addq %r13,%r11 1548bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 1549bc3d5698SJohn Baldwin adcq $0,%r10 1550bc3d5698SJohn Baldwin 1551bc3d5698SJohn Baldwin mulq %r15 1552bc3d5698SJohn Baldwin addq %rax,%r12 1553bc3d5698SJohn Baldwin movq %rbx,%rax 1554bc3d5698SJohn Baldwin movq %r11,-16(%rdi) 1555bc3d5698SJohn Baldwin movq %rdx,%r13 1556bc3d5698SJohn Baldwin adcq $0,%r13 1557bc3d5698SJohn Baldwin 1558bc3d5698SJohn Baldwin mulq %r14 1559bc3d5698SJohn Baldwin addq %rax,%r10 1560bc3d5698SJohn Baldwin movq %rbx,%rax 1561bc3d5698SJohn Baldwin movq %rdx,%r11 1562bc3d5698SJohn Baldwin adcq $0,%r11 1563bc3d5698SJohn Baldwin addq %r12,%r10 1564bc3d5698SJohn Baldwin adcq $0,%r11 1565bc3d5698SJohn Baldwin movq %r10,-8(%rdi) 1566bc3d5698SJohn Baldwin 1567bc3d5698SJohn Baldwin mulq %r15 1568bc3d5698SJohn Baldwin addq %rax,%r13 1569bc3d5698SJohn Baldwin movq -16(%rsi),%rax 1570bc3d5698SJohn Baldwin adcq $0,%rdx 1571bc3d5698SJohn Baldwin addq %r11,%r13 1572bc3d5698SJohn Baldwin adcq $0,%rdx 1573bc3d5698SJohn Baldwin 1574bc3d5698SJohn Baldwin movq %r13,(%rdi) 1575bc3d5698SJohn Baldwin movq %rdx,%r12 1576bc3d5698SJohn Baldwin movq %rdx,8(%rdi) 1577bc3d5698SJohn Baldwin 1578bc3d5698SJohn Baldwin mulq %rbx 1579bc3d5698SJohn Baldwin addq $16,%rbp 1580bc3d5698SJohn Baldwin xorq %r14,%r14 1581bc3d5698SJohn Baldwin subq %r9,%rbp 1582bc3d5698SJohn Baldwin xorq %r15,%r15 1583bc3d5698SJohn Baldwin 1584bc3d5698SJohn Baldwin addq %r12,%rax 1585bc3d5698SJohn Baldwin adcq $0,%rdx 1586bc3d5698SJohn Baldwin movq %rax,8(%rdi) 1587bc3d5698SJohn Baldwin movq %rdx,16(%rdi) 1588bc3d5698SJohn Baldwin movq %r15,24(%rdi) 1589bc3d5698SJohn Baldwin 1590bc3d5698SJohn Baldwin movq -16(%rsi,%rbp,1),%rax 1591bc3d5698SJohn Baldwin leaq 48+8(%rsp),%rdi 1592bc3d5698SJohn Baldwin xorq %r10,%r10 1593bc3d5698SJohn Baldwin movq 8(%rdi),%r11 1594bc3d5698SJohn Baldwin 1595bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%r12 1596bc3d5698SJohn Baldwin shrq $63,%r10 1597bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r13 1598bc3d5698SJohn Baldwin shrq $63,%r11 1599bc3d5698SJohn Baldwin orq %r10,%r13 1600bc3d5698SJohn Baldwin movq 16(%rdi),%r10 1601bc3d5698SJohn Baldwin movq %r11,%r14 1602bc3d5698SJohn Baldwin mulq %rax 1603bc3d5698SJohn Baldwin negq %r15 1604bc3d5698SJohn Baldwin movq 24(%rdi),%r11 1605bc3d5698SJohn Baldwin adcq %rax,%r12 1606bc3d5698SJohn Baldwin movq -8(%rsi,%rbp,1),%rax 1607bc3d5698SJohn Baldwin movq %r12,(%rdi) 1608bc3d5698SJohn Baldwin adcq %rdx,%r13 1609bc3d5698SJohn Baldwin 1610bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%rbx 1611bc3d5698SJohn Baldwin movq %r13,8(%rdi) 1612bc3d5698SJohn Baldwin sbbq %r15,%r15 1613bc3d5698SJohn Baldwin shrq $63,%r10 1614bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r8 1615bc3d5698SJohn Baldwin shrq $63,%r11 1616bc3d5698SJohn Baldwin orq %r10,%r8 1617bc3d5698SJohn Baldwin movq 32(%rdi),%r10 1618bc3d5698SJohn Baldwin movq %r11,%r14 1619bc3d5698SJohn Baldwin mulq %rax 1620bc3d5698SJohn Baldwin negq %r15 1621bc3d5698SJohn Baldwin movq 40(%rdi),%r11 1622bc3d5698SJohn Baldwin adcq %rax,%rbx 1623bc3d5698SJohn Baldwin movq 0(%rsi,%rbp,1),%rax 1624bc3d5698SJohn Baldwin movq %rbx,16(%rdi) 1625bc3d5698SJohn Baldwin adcq %rdx,%r8 1626bc3d5698SJohn Baldwin leaq 16(%rbp),%rbp 1627bc3d5698SJohn Baldwin movq %r8,24(%rdi) 1628bc3d5698SJohn Baldwin sbbq %r15,%r15 1629bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1630bc3d5698SJohn Baldwin jmp .Lsqr4x_shift_n_add 1631bc3d5698SJohn Baldwin 1632bc3d5698SJohn Baldwin.align 32 1633bc3d5698SJohn Baldwin.Lsqr4x_shift_n_add: 1634bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%r12 1635bc3d5698SJohn Baldwin shrq $63,%r10 1636bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r13 1637bc3d5698SJohn Baldwin shrq $63,%r11 1638bc3d5698SJohn Baldwin orq %r10,%r13 1639bc3d5698SJohn Baldwin movq -16(%rdi),%r10 1640bc3d5698SJohn Baldwin movq %r11,%r14 1641bc3d5698SJohn Baldwin mulq %rax 1642bc3d5698SJohn Baldwin negq %r15 1643bc3d5698SJohn Baldwin movq -8(%rdi),%r11 1644bc3d5698SJohn Baldwin adcq %rax,%r12 1645bc3d5698SJohn Baldwin movq -8(%rsi,%rbp,1),%rax 1646bc3d5698SJohn Baldwin movq %r12,-32(%rdi) 1647bc3d5698SJohn Baldwin adcq %rdx,%r13 1648bc3d5698SJohn Baldwin 1649bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%rbx 1650bc3d5698SJohn Baldwin movq %r13,-24(%rdi) 1651bc3d5698SJohn Baldwin sbbq %r15,%r15 1652bc3d5698SJohn Baldwin shrq $63,%r10 1653bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r8 1654bc3d5698SJohn Baldwin shrq $63,%r11 1655bc3d5698SJohn Baldwin orq %r10,%r8 1656bc3d5698SJohn Baldwin movq 0(%rdi),%r10 1657bc3d5698SJohn Baldwin movq %r11,%r14 1658bc3d5698SJohn Baldwin mulq %rax 1659bc3d5698SJohn Baldwin negq %r15 1660bc3d5698SJohn Baldwin movq 8(%rdi),%r11 1661bc3d5698SJohn Baldwin adcq %rax,%rbx 1662bc3d5698SJohn Baldwin movq 0(%rsi,%rbp,1),%rax 1663bc3d5698SJohn Baldwin movq %rbx,-16(%rdi) 1664bc3d5698SJohn Baldwin adcq %rdx,%r8 1665bc3d5698SJohn Baldwin 1666bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%r12 1667bc3d5698SJohn Baldwin movq %r8,-8(%rdi) 1668bc3d5698SJohn Baldwin sbbq %r15,%r15 1669bc3d5698SJohn Baldwin shrq $63,%r10 1670bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r13 1671bc3d5698SJohn Baldwin shrq $63,%r11 1672bc3d5698SJohn Baldwin orq %r10,%r13 1673bc3d5698SJohn Baldwin movq 16(%rdi),%r10 1674bc3d5698SJohn Baldwin movq %r11,%r14 1675bc3d5698SJohn Baldwin mulq %rax 1676bc3d5698SJohn Baldwin negq %r15 1677bc3d5698SJohn Baldwin movq 24(%rdi),%r11 1678bc3d5698SJohn Baldwin adcq %rax,%r12 1679bc3d5698SJohn Baldwin movq 8(%rsi,%rbp,1),%rax 1680bc3d5698SJohn Baldwin movq %r12,0(%rdi) 1681bc3d5698SJohn Baldwin adcq %rdx,%r13 1682bc3d5698SJohn Baldwin 1683bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%rbx 1684bc3d5698SJohn Baldwin movq %r13,8(%rdi) 1685bc3d5698SJohn Baldwin sbbq %r15,%r15 1686bc3d5698SJohn Baldwin shrq $63,%r10 1687bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r8 1688bc3d5698SJohn Baldwin shrq $63,%r11 1689bc3d5698SJohn Baldwin orq %r10,%r8 1690bc3d5698SJohn Baldwin movq 32(%rdi),%r10 1691bc3d5698SJohn Baldwin movq %r11,%r14 1692bc3d5698SJohn Baldwin mulq %rax 1693bc3d5698SJohn Baldwin negq %r15 1694bc3d5698SJohn Baldwin movq 40(%rdi),%r11 1695bc3d5698SJohn Baldwin adcq %rax,%rbx 1696bc3d5698SJohn Baldwin movq 16(%rsi,%rbp,1),%rax 1697bc3d5698SJohn Baldwin movq %rbx,16(%rdi) 1698bc3d5698SJohn Baldwin adcq %rdx,%r8 1699bc3d5698SJohn Baldwin movq %r8,24(%rdi) 1700bc3d5698SJohn Baldwin sbbq %r15,%r15 1701bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1702bc3d5698SJohn Baldwin addq $32,%rbp 1703bc3d5698SJohn Baldwin jnz .Lsqr4x_shift_n_add 1704bc3d5698SJohn Baldwin 1705bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%r12 1706bc3d5698SJohn Baldwin.byte 0x67 1707bc3d5698SJohn Baldwin shrq $63,%r10 1708bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r13 1709bc3d5698SJohn Baldwin shrq $63,%r11 1710bc3d5698SJohn Baldwin orq %r10,%r13 1711bc3d5698SJohn Baldwin movq -16(%rdi),%r10 1712bc3d5698SJohn Baldwin movq %r11,%r14 1713bc3d5698SJohn Baldwin mulq %rax 1714bc3d5698SJohn Baldwin negq %r15 1715bc3d5698SJohn Baldwin movq -8(%rdi),%r11 1716bc3d5698SJohn Baldwin adcq %rax,%r12 1717bc3d5698SJohn Baldwin movq -8(%rsi),%rax 1718bc3d5698SJohn Baldwin movq %r12,-32(%rdi) 1719bc3d5698SJohn Baldwin adcq %rdx,%r13 1720bc3d5698SJohn Baldwin 1721bc3d5698SJohn Baldwin leaq (%r14,%r10,2),%rbx 1722bc3d5698SJohn Baldwin movq %r13,-24(%rdi) 1723bc3d5698SJohn Baldwin sbbq %r15,%r15 1724bc3d5698SJohn Baldwin shrq $63,%r10 1725bc3d5698SJohn Baldwin leaq (%rcx,%r11,2),%r8 1726bc3d5698SJohn Baldwin shrq $63,%r11 1727bc3d5698SJohn Baldwin orq %r10,%r8 1728bc3d5698SJohn Baldwin mulq %rax 1729bc3d5698SJohn Baldwin negq %r15 1730bc3d5698SJohn Baldwin adcq %rax,%rbx 1731bc3d5698SJohn Baldwin adcq %rdx,%r8 1732bc3d5698SJohn Baldwin movq %rbx,-16(%rdi) 1733bc3d5698SJohn Baldwin movq %r8,-8(%rdi) 1734bc3d5698SJohn Baldwin.byte 102,72,15,126,213 1735bc3d5698SJohn Baldwin__bn_sqr8x_reduction: 1736bc3d5698SJohn Baldwin xorq %rax,%rax 1737bc3d5698SJohn Baldwin leaq (%r9,%rbp,1),%rcx 1738bc3d5698SJohn Baldwin leaq 48+8(%rsp,%r9,2),%rdx 1739bc3d5698SJohn Baldwin movq %rcx,0+8(%rsp) 1740bc3d5698SJohn Baldwin leaq 48+8(%rsp,%r9,1),%rdi 1741bc3d5698SJohn Baldwin movq %rdx,8+8(%rsp) 1742bc3d5698SJohn Baldwin negq %r9 1743bc3d5698SJohn Baldwin jmp .L8x_reduction_loop 1744bc3d5698SJohn Baldwin 1745bc3d5698SJohn Baldwin.align 32 1746bc3d5698SJohn Baldwin.L8x_reduction_loop: 1747bc3d5698SJohn Baldwin leaq (%rdi,%r9,1),%rdi 1748bc3d5698SJohn Baldwin.byte 0x66 1749bc3d5698SJohn Baldwin movq 0(%rdi),%rbx 1750bc3d5698SJohn Baldwin movq 8(%rdi),%r9 1751bc3d5698SJohn Baldwin movq 16(%rdi),%r10 1752bc3d5698SJohn Baldwin movq 24(%rdi),%r11 1753bc3d5698SJohn Baldwin movq 32(%rdi),%r12 1754bc3d5698SJohn Baldwin movq 40(%rdi),%r13 1755bc3d5698SJohn Baldwin movq 48(%rdi),%r14 1756bc3d5698SJohn Baldwin movq 56(%rdi),%r15 1757bc3d5698SJohn Baldwin movq %rax,(%rdx) 1758bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1759bc3d5698SJohn Baldwin 1760bc3d5698SJohn Baldwin.byte 0x67 1761bc3d5698SJohn Baldwin movq %rbx,%r8 1762bc3d5698SJohn Baldwin imulq 32+8(%rsp),%rbx 1763bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1764bc3d5698SJohn Baldwin movl $8,%ecx 1765bc3d5698SJohn Baldwin jmp .L8x_reduce 1766bc3d5698SJohn Baldwin 1767bc3d5698SJohn Baldwin.align 32 1768bc3d5698SJohn Baldwin.L8x_reduce: 1769bc3d5698SJohn Baldwin mulq %rbx 1770bc3d5698SJohn Baldwin movq 8(%rbp),%rax 1771bc3d5698SJohn Baldwin negq %r8 1772bc3d5698SJohn Baldwin movq %rdx,%r8 1773bc3d5698SJohn Baldwin adcq $0,%r8 1774bc3d5698SJohn Baldwin 1775bc3d5698SJohn Baldwin mulq %rbx 1776bc3d5698SJohn Baldwin addq %rax,%r9 1777bc3d5698SJohn Baldwin movq 16(%rbp),%rax 1778bc3d5698SJohn Baldwin adcq $0,%rdx 1779bc3d5698SJohn Baldwin addq %r9,%r8 1780bc3d5698SJohn Baldwin movq %rbx,48-8+8(%rsp,%rcx,8) 1781bc3d5698SJohn Baldwin movq %rdx,%r9 1782bc3d5698SJohn Baldwin adcq $0,%r9 1783bc3d5698SJohn Baldwin 1784bc3d5698SJohn Baldwin mulq %rbx 1785bc3d5698SJohn Baldwin addq %rax,%r10 1786bc3d5698SJohn Baldwin movq 24(%rbp),%rax 1787bc3d5698SJohn Baldwin adcq $0,%rdx 1788bc3d5698SJohn Baldwin addq %r10,%r9 1789bc3d5698SJohn Baldwin movq 32+8(%rsp),%rsi 1790bc3d5698SJohn Baldwin movq %rdx,%r10 1791bc3d5698SJohn Baldwin adcq $0,%r10 1792bc3d5698SJohn Baldwin 1793bc3d5698SJohn Baldwin mulq %rbx 1794bc3d5698SJohn Baldwin addq %rax,%r11 1795bc3d5698SJohn Baldwin movq 32(%rbp),%rax 1796bc3d5698SJohn Baldwin adcq $0,%rdx 1797bc3d5698SJohn Baldwin imulq %r8,%rsi 1798bc3d5698SJohn Baldwin addq %r11,%r10 1799bc3d5698SJohn Baldwin movq %rdx,%r11 1800bc3d5698SJohn Baldwin adcq $0,%r11 1801bc3d5698SJohn Baldwin 1802bc3d5698SJohn Baldwin mulq %rbx 1803bc3d5698SJohn Baldwin addq %rax,%r12 1804bc3d5698SJohn Baldwin movq 40(%rbp),%rax 1805bc3d5698SJohn Baldwin adcq $0,%rdx 1806bc3d5698SJohn Baldwin addq %r12,%r11 1807bc3d5698SJohn Baldwin movq %rdx,%r12 1808bc3d5698SJohn Baldwin adcq $0,%r12 1809bc3d5698SJohn Baldwin 1810bc3d5698SJohn Baldwin mulq %rbx 1811bc3d5698SJohn Baldwin addq %rax,%r13 1812bc3d5698SJohn Baldwin movq 48(%rbp),%rax 1813bc3d5698SJohn Baldwin adcq $0,%rdx 1814bc3d5698SJohn Baldwin addq %r13,%r12 1815bc3d5698SJohn Baldwin movq %rdx,%r13 1816bc3d5698SJohn Baldwin adcq $0,%r13 1817bc3d5698SJohn Baldwin 1818bc3d5698SJohn Baldwin mulq %rbx 1819bc3d5698SJohn Baldwin addq %rax,%r14 1820bc3d5698SJohn Baldwin movq 56(%rbp),%rax 1821bc3d5698SJohn Baldwin adcq $0,%rdx 1822bc3d5698SJohn Baldwin addq %r14,%r13 1823bc3d5698SJohn Baldwin movq %rdx,%r14 1824bc3d5698SJohn Baldwin adcq $0,%r14 1825bc3d5698SJohn Baldwin 1826bc3d5698SJohn Baldwin mulq %rbx 1827bc3d5698SJohn Baldwin movq %rsi,%rbx 1828bc3d5698SJohn Baldwin addq %rax,%r15 1829bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1830bc3d5698SJohn Baldwin adcq $0,%rdx 1831bc3d5698SJohn Baldwin addq %r15,%r14 1832bc3d5698SJohn Baldwin movq %rdx,%r15 1833bc3d5698SJohn Baldwin adcq $0,%r15 1834bc3d5698SJohn Baldwin 1835bc3d5698SJohn Baldwin decl %ecx 1836bc3d5698SJohn Baldwin jnz .L8x_reduce 1837bc3d5698SJohn Baldwin 1838bc3d5698SJohn Baldwin leaq 64(%rbp),%rbp 1839bc3d5698SJohn Baldwin xorq %rax,%rax 1840bc3d5698SJohn Baldwin movq 8+8(%rsp),%rdx 1841bc3d5698SJohn Baldwin cmpq 0+8(%rsp),%rbp 1842bc3d5698SJohn Baldwin jae .L8x_no_tail 1843bc3d5698SJohn Baldwin 1844bc3d5698SJohn Baldwin.byte 0x66 1845bc3d5698SJohn Baldwin addq 0(%rdi),%r8 1846bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 1847bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 1848bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 1849bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 1850bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 1851bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 1852bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 1853bc3d5698SJohn Baldwin sbbq %rsi,%rsi 1854bc3d5698SJohn Baldwin 1855bc3d5698SJohn Baldwin movq 48+56+8(%rsp),%rbx 1856bc3d5698SJohn Baldwin movl $8,%ecx 1857bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1858bc3d5698SJohn Baldwin jmp .L8x_tail 1859bc3d5698SJohn Baldwin 1860bc3d5698SJohn Baldwin.align 32 1861bc3d5698SJohn Baldwin.L8x_tail: 1862bc3d5698SJohn Baldwin mulq %rbx 1863bc3d5698SJohn Baldwin addq %rax,%r8 1864bc3d5698SJohn Baldwin movq 8(%rbp),%rax 1865bc3d5698SJohn Baldwin movq %r8,(%rdi) 1866bc3d5698SJohn Baldwin movq %rdx,%r8 1867bc3d5698SJohn Baldwin adcq $0,%r8 1868bc3d5698SJohn Baldwin 1869bc3d5698SJohn Baldwin mulq %rbx 1870bc3d5698SJohn Baldwin addq %rax,%r9 1871bc3d5698SJohn Baldwin movq 16(%rbp),%rax 1872bc3d5698SJohn Baldwin adcq $0,%rdx 1873bc3d5698SJohn Baldwin addq %r9,%r8 1874bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1875bc3d5698SJohn Baldwin movq %rdx,%r9 1876bc3d5698SJohn Baldwin adcq $0,%r9 1877bc3d5698SJohn Baldwin 1878bc3d5698SJohn Baldwin mulq %rbx 1879bc3d5698SJohn Baldwin addq %rax,%r10 1880bc3d5698SJohn Baldwin movq 24(%rbp),%rax 1881bc3d5698SJohn Baldwin adcq $0,%rdx 1882bc3d5698SJohn Baldwin addq %r10,%r9 1883bc3d5698SJohn Baldwin movq %rdx,%r10 1884bc3d5698SJohn Baldwin adcq $0,%r10 1885bc3d5698SJohn Baldwin 1886bc3d5698SJohn Baldwin mulq %rbx 1887bc3d5698SJohn Baldwin addq %rax,%r11 1888bc3d5698SJohn Baldwin movq 32(%rbp),%rax 1889bc3d5698SJohn Baldwin adcq $0,%rdx 1890bc3d5698SJohn Baldwin addq %r11,%r10 1891bc3d5698SJohn Baldwin movq %rdx,%r11 1892bc3d5698SJohn Baldwin adcq $0,%r11 1893bc3d5698SJohn Baldwin 1894bc3d5698SJohn Baldwin mulq %rbx 1895bc3d5698SJohn Baldwin addq %rax,%r12 1896bc3d5698SJohn Baldwin movq 40(%rbp),%rax 1897bc3d5698SJohn Baldwin adcq $0,%rdx 1898bc3d5698SJohn Baldwin addq %r12,%r11 1899bc3d5698SJohn Baldwin movq %rdx,%r12 1900bc3d5698SJohn Baldwin adcq $0,%r12 1901bc3d5698SJohn Baldwin 1902bc3d5698SJohn Baldwin mulq %rbx 1903bc3d5698SJohn Baldwin addq %rax,%r13 1904bc3d5698SJohn Baldwin movq 48(%rbp),%rax 1905bc3d5698SJohn Baldwin adcq $0,%rdx 1906bc3d5698SJohn Baldwin addq %r13,%r12 1907bc3d5698SJohn Baldwin movq %rdx,%r13 1908bc3d5698SJohn Baldwin adcq $0,%r13 1909bc3d5698SJohn Baldwin 1910bc3d5698SJohn Baldwin mulq %rbx 1911bc3d5698SJohn Baldwin addq %rax,%r14 1912bc3d5698SJohn Baldwin movq 56(%rbp),%rax 1913bc3d5698SJohn Baldwin adcq $0,%rdx 1914bc3d5698SJohn Baldwin addq %r14,%r13 1915bc3d5698SJohn Baldwin movq %rdx,%r14 1916bc3d5698SJohn Baldwin adcq $0,%r14 1917bc3d5698SJohn Baldwin 1918bc3d5698SJohn Baldwin mulq %rbx 1919bc3d5698SJohn Baldwin movq 48-16+8(%rsp,%rcx,8),%rbx 1920bc3d5698SJohn Baldwin addq %rax,%r15 1921bc3d5698SJohn Baldwin adcq $0,%rdx 1922bc3d5698SJohn Baldwin addq %r15,%r14 1923bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1924bc3d5698SJohn Baldwin movq %rdx,%r15 1925bc3d5698SJohn Baldwin adcq $0,%r15 1926bc3d5698SJohn Baldwin 1927bc3d5698SJohn Baldwin decl %ecx 1928bc3d5698SJohn Baldwin jnz .L8x_tail 1929bc3d5698SJohn Baldwin 1930bc3d5698SJohn Baldwin leaq 64(%rbp),%rbp 1931bc3d5698SJohn Baldwin movq 8+8(%rsp),%rdx 1932bc3d5698SJohn Baldwin cmpq 0+8(%rsp),%rbp 1933bc3d5698SJohn Baldwin jae .L8x_tail_done 1934bc3d5698SJohn Baldwin 1935bc3d5698SJohn Baldwin movq 48+56+8(%rsp),%rbx 1936bc3d5698SJohn Baldwin negq %rsi 1937bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1938bc3d5698SJohn Baldwin adcq 0(%rdi),%r8 1939bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 1940bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 1941bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 1942bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 1943bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 1944bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 1945bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 1946bc3d5698SJohn Baldwin sbbq %rsi,%rsi 1947bc3d5698SJohn Baldwin 1948bc3d5698SJohn Baldwin movl $8,%ecx 1949bc3d5698SJohn Baldwin jmp .L8x_tail 1950bc3d5698SJohn Baldwin 1951bc3d5698SJohn Baldwin.align 32 1952bc3d5698SJohn Baldwin.L8x_tail_done: 1953bc3d5698SJohn Baldwin xorq %rax,%rax 1954bc3d5698SJohn Baldwin addq (%rdx),%r8 1955bc3d5698SJohn Baldwin adcq $0,%r9 1956bc3d5698SJohn Baldwin adcq $0,%r10 1957bc3d5698SJohn Baldwin adcq $0,%r11 1958bc3d5698SJohn Baldwin adcq $0,%r12 1959bc3d5698SJohn Baldwin adcq $0,%r13 1960bc3d5698SJohn Baldwin adcq $0,%r14 1961bc3d5698SJohn Baldwin adcq $0,%r15 1962bc3d5698SJohn Baldwin adcq $0,%rax 1963bc3d5698SJohn Baldwin 1964bc3d5698SJohn Baldwin negq %rsi 1965bc3d5698SJohn Baldwin.L8x_no_tail: 1966bc3d5698SJohn Baldwin adcq 0(%rdi),%r8 1967bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 1968bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 1969bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 1970bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 1971bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 1972bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 1973bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 1974bc3d5698SJohn Baldwin adcq $0,%rax 1975bc3d5698SJohn Baldwin movq -8(%rbp),%rcx 1976bc3d5698SJohn Baldwin xorq %rsi,%rsi 1977bc3d5698SJohn Baldwin 1978bc3d5698SJohn Baldwin.byte 102,72,15,126,213 1979bc3d5698SJohn Baldwin 1980bc3d5698SJohn Baldwin movq %r8,0(%rdi) 1981bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1982bc3d5698SJohn Baldwin.byte 102,73,15,126,217 1983bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1984bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1985bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1986bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1987bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1988bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1989bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1990bc3d5698SJohn Baldwin 1991bc3d5698SJohn Baldwin cmpq %rdx,%rdi 1992bc3d5698SJohn Baldwin jb .L8x_reduction_loop 1993bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1994bc3d5698SJohn Baldwin.cfi_endproc 1995bc3d5698SJohn Baldwin.size bn_sqr8x_internal,.-bn_sqr8x_internal 1996bc3d5698SJohn Baldwin.type __bn_post4x_internal,@function 1997bc3d5698SJohn Baldwin.align 32 1998bc3d5698SJohn Baldwin__bn_post4x_internal: 1999bc3d5698SJohn Baldwin.cfi_startproc 2000bc3d5698SJohn Baldwin movq 0(%rbp),%r12 2001bc3d5698SJohn Baldwin leaq (%rdi,%r9,1),%rbx 2002bc3d5698SJohn Baldwin movq %r9,%rcx 2003bc3d5698SJohn Baldwin.byte 102,72,15,126,207 2004bc3d5698SJohn Baldwin negq %rax 2005bc3d5698SJohn Baldwin.byte 102,72,15,126,206 2006bc3d5698SJohn Baldwin sarq $3+2,%rcx 2007bc3d5698SJohn Baldwin decq %r12 2008bc3d5698SJohn Baldwin xorq %r10,%r10 2009bc3d5698SJohn Baldwin movq 8(%rbp),%r13 2010bc3d5698SJohn Baldwin movq 16(%rbp),%r14 2011bc3d5698SJohn Baldwin movq 24(%rbp),%r15 2012bc3d5698SJohn Baldwin jmp .Lsqr4x_sub_entry 2013bc3d5698SJohn Baldwin 2014bc3d5698SJohn Baldwin.align 16 2015bc3d5698SJohn Baldwin.Lsqr4x_sub: 2016bc3d5698SJohn Baldwin movq 0(%rbp),%r12 2017bc3d5698SJohn Baldwin movq 8(%rbp),%r13 2018bc3d5698SJohn Baldwin movq 16(%rbp),%r14 2019bc3d5698SJohn Baldwin movq 24(%rbp),%r15 2020bc3d5698SJohn Baldwin.Lsqr4x_sub_entry: 2021bc3d5698SJohn Baldwin leaq 32(%rbp),%rbp 2022bc3d5698SJohn Baldwin notq %r12 2023bc3d5698SJohn Baldwin notq %r13 2024bc3d5698SJohn Baldwin notq %r14 2025bc3d5698SJohn Baldwin notq %r15 2026bc3d5698SJohn Baldwin andq %rax,%r12 2027bc3d5698SJohn Baldwin andq %rax,%r13 2028bc3d5698SJohn Baldwin andq %rax,%r14 2029bc3d5698SJohn Baldwin andq %rax,%r15 2030bc3d5698SJohn Baldwin 2031bc3d5698SJohn Baldwin negq %r10 2032bc3d5698SJohn Baldwin adcq 0(%rbx),%r12 2033bc3d5698SJohn Baldwin adcq 8(%rbx),%r13 2034bc3d5698SJohn Baldwin adcq 16(%rbx),%r14 2035bc3d5698SJohn Baldwin adcq 24(%rbx),%r15 2036bc3d5698SJohn Baldwin movq %r12,0(%rdi) 2037bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 2038bc3d5698SJohn Baldwin movq %r13,8(%rdi) 2039bc3d5698SJohn Baldwin sbbq %r10,%r10 2040bc3d5698SJohn Baldwin movq %r14,16(%rdi) 2041bc3d5698SJohn Baldwin movq %r15,24(%rdi) 2042bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 2043bc3d5698SJohn Baldwin 2044bc3d5698SJohn Baldwin incq %rcx 2045bc3d5698SJohn Baldwin jnz .Lsqr4x_sub 2046bc3d5698SJohn Baldwin 2047bc3d5698SJohn Baldwin movq %r9,%r10 2048bc3d5698SJohn Baldwin negq %r9 2049bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2050bc3d5698SJohn Baldwin.cfi_endproc 2051bc3d5698SJohn Baldwin.size __bn_post4x_internal,.-__bn_post4x_internal 2052bc3d5698SJohn Baldwin.type bn_mulx4x_mont_gather5,@function 2053bc3d5698SJohn Baldwin.align 32 2054bc3d5698SJohn Baldwinbn_mulx4x_mont_gather5: 2055bc3d5698SJohn Baldwin.cfi_startproc 2056bc3d5698SJohn Baldwin movq %rsp,%rax 2057bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 2058bc3d5698SJohn Baldwin.Lmulx4x_enter: 2059bc3d5698SJohn Baldwin pushq %rbx 2060bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 2061bc3d5698SJohn Baldwin pushq %rbp 2062bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 2063bc3d5698SJohn Baldwin pushq %r12 2064bc3d5698SJohn Baldwin.cfi_offset %r12,-32 2065bc3d5698SJohn Baldwin pushq %r13 2066bc3d5698SJohn Baldwin.cfi_offset %r13,-40 2067bc3d5698SJohn Baldwin pushq %r14 2068bc3d5698SJohn Baldwin.cfi_offset %r14,-48 2069bc3d5698SJohn Baldwin pushq %r15 2070bc3d5698SJohn Baldwin.cfi_offset %r15,-56 2071bc3d5698SJohn Baldwin.Lmulx4x_prologue: 2072bc3d5698SJohn Baldwin 2073bc3d5698SJohn Baldwin shll $3,%r9d 2074bc3d5698SJohn Baldwin leaq (%r9,%r9,2),%r10 2075bc3d5698SJohn Baldwin negq %r9 2076bc3d5698SJohn Baldwin movq (%r8),%r8 2077bc3d5698SJohn Baldwin 2078bc3d5698SJohn Baldwin 2079bc3d5698SJohn Baldwin 2080bc3d5698SJohn Baldwin 2081bc3d5698SJohn Baldwin 2082bc3d5698SJohn Baldwin 2083bc3d5698SJohn Baldwin 2084bc3d5698SJohn Baldwin 2085bc3d5698SJohn Baldwin 2086bc3d5698SJohn Baldwin 2087bc3d5698SJohn Baldwin leaq -320(%rsp,%r9,2),%r11 2088bc3d5698SJohn Baldwin movq %rsp,%rbp 2089bc3d5698SJohn Baldwin subq %rdi,%r11 2090bc3d5698SJohn Baldwin andq $4095,%r11 2091bc3d5698SJohn Baldwin cmpq %r11,%r10 2092bc3d5698SJohn Baldwin jb .Lmulx4xsp_alt 2093bc3d5698SJohn Baldwin subq %r11,%rbp 2094bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 2095bc3d5698SJohn Baldwin jmp .Lmulx4xsp_done 2096bc3d5698SJohn Baldwin 2097bc3d5698SJohn Baldwin.Lmulx4xsp_alt: 2098bc3d5698SJohn Baldwin leaq 4096-320(,%r9,2),%r10 2099bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 2100bc3d5698SJohn Baldwin subq %r10,%r11 2101bc3d5698SJohn Baldwin movq $0,%r10 2102bc3d5698SJohn Baldwin cmovcq %r10,%r11 2103bc3d5698SJohn Baldwin subq %r11,%rbp 2104bc3d5698SJohn Baldwin.Lmulx4xsp_done: 2105bc3d5698SJohn Baldwin andq $-64,%rbp 2106bc3d5698SJohn Baldwin movq %rsp,%r11 2107bc3d5698SJohn Baldwin subq %rbp,%r11 2108bc3d5698SJohn Baldwin andq $-4096,%r11 2109bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 2110bc3d5698SJohn Baldwin movq (%rsp),%r10 2111bc3d5698SJohn Baldwin cmpq %rbp,%rsp 2112bc3d5698SJohn Baldwin ja .Lmulx4x_page_walk 2113bc3d5698SJohn Baldwin jmp .Lmulx4x_page_walk_done 2114bc3d5698SJohn Baldwin 2115bc3d5698SJohn Baldwin.Lmulx4x_page_walk: 2116bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 2117bc3d5698SJohn Baldwin movq (%rsp),%r10 2118bc3d5698SJohn Baldwin cmpq %rbp,%rsp 2119bc3d5698SJohn Baldwin ja .Lmulx4x_page_walk 2120bc3d5698SJohn Baldwin.Lmulx4x_page_walk_done: 2121bc3d5698SJohn Baldwin 2122bc3d5698SJohn Baldwin 2123bc3d5698SJohn Baldwin 2124bc3d5698SJohn Baldwin 2125bc3d5698SJohn Baldwin 2126bc3d5698SJohn Baldwin 2127bc3d5698SJohn Baldwin 2128bc3d5698SJohn Baldwin 2129bc3d5698SJohn Baldwin 2130bc3d5698SJohn Baldwin 2131bc3d5698SJohn Baldwin 2132bc3d5698SJohn Baldwin 2133bc3d5698SJohn Baldwin 2134bc3d5698SJohn Baldwin movq %r8,32(%rsp) 2135bc3d5698SJohn Baldwin movq %rax,40(%rsp) 2136bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2137bc3d5698SJohn Baldwin.Lmulx4x_body: 2138bc3d5698SJohn Baldwin call mulx4x_internal 2139bc3d5698SJohn Baldwin 2140bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 2141bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 2142bc3d5698SJohn Baldwin movq $1,%rax 2143bc3d5698SJohn Baldwin 2144bc3d5698SJohn Baldwin movq -48(%rsi),%r15 2145bc3d5698SJohn Baldwin.cfi_restore %r15 2146bc3d5698SJohn Baldwin movq -40(%rsi),%r14 2147bc3d5698SJohn Baldwin.cfi_restore %r14 2148bc3d5698SJohn Baldwin movq -32(%rsi),%r13 2149bc3d5698SJohn Baldwin.cfi_restore %r13 2150bc3d5698SJohn Baldwin movq -24(%rsi),%r12 2151bc3d5698SJohn Baldwin.cfi_restore %r12 2152bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 2153bc3d5698SJohn Baldwin.cfi_restore %rbp 2154bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 2155bc3d5698SJohn Baldwin.cfi_restore %rbx 2156bc3d5698SJohn Baldwin leaq (%rsi),%rsp 2157bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2158bc3d5698SJohn Baldwin.Lmulx4x_epilogue: 2159bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2160bc3d5698SJohn Baldwin.cfi_endproc 2161bc3d5698SJohn Baldwin.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 2162bc3d5698SJohn Baldwin 2163bc3d5698SJohn Baldwin.type mulx4x_internal,@function 2164bc3d5698SJohn Baldwin.align 32 2165bc3d5698SJohn Baldwinmulx4x_internal: 2166bc3d5698SJohn Baldwin.cfi_startproc 2167bc3d5698SJohn Baldwin movq %r9,8(%rsp) 2168bc3d5698SJohn Baldwin movq %r9,%r10 2169bc3d5698SJohn Baldwin negq %r9 2170bc3d5698SJohn Baldwin shlq $5,%r9 2171bc3d5698SJohn Baldwin negq %r10 2172bc3d5698SJohn Baldwin leaq 128(%rdx,%r9,1),%r13 2173bc3d5698SJohn Baldwin shrq $5+5,%r9 2174bc3d5698SJohn Baldwin movd 8(%rax),%xmm5 2175bc3d5698SJohn Baldwin subq $1,%r9 2176bc3d5698SJohn Baldwin leaq .Linc(%rip),%rax 2177bc3d5698SJohn Baldwin movq %r13,16+8(%rsp) 2178bc3d5698SJohn Baldwin movq %r9,24+8(%rsp) 2179bc3d5698SJohn Baldwin movq %rdi,56+8(%rsp) 2180bc3d5698SJohn Baldwin movdqa 0(%rax),%xmm0 2181bc3d5698SJohn Baldwin movdqa 16(%rax),%xmm1 2182bc3d5698SJohn Baldwin leaq 88-112(%rsp,%r10,1),%r10 2183bc3d5698SJohn Baldwin leaq 128(%rdx),%rdi 2184bc3d5698SJohn Baldwin 2185bc3d5698SJohn Baldwin pshufd $0,%xmm5,%xmm5 2186bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 2187bc3d5698SJohn Baldwin.byte 0x67 2188bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 2189bc3d5698SJohn Baldwin.byte 0x67 2190bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 2191bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 2192bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 2193bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 2194bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 2195bc3d5698SJohn Baldwin movdqa %xmm0,112(%r10) 2196bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 2197bc3d5698SJohn Baldwin 2198bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 2199bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 2200bc3d5698SJohn Baldwin movdqa %xmm1,128(%r10) 2201bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 2202bc3d5698SJohn Baldwin 2203bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 2204bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 2205bc3d5698SJohn Baldwin movdqa %xmm2,144(%r10) 2206bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 2207bc3d5698SJohn Baldwin 2208bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 2209bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 2210bc3d5698SJohn Baldwin movdqa %xmm3,160(%r10) 2211bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 2212bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 2213bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 2214bc3d5698SJohn Baldwin movdqa %xmm0,176(%r10) 2215bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 2216bc3d5698SJohn Baldwin 2217bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 2218bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 2219bc3d5698SJohn Baldwin movdqa %xmm1,192(%r10) 2220bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 2221bc3d5698SJohn Baldwin 2222bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 2223bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 2224bc3d5698SJohn Baldwin movdqa %xmm2,208(%r10) 2225bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 2226bc3d5698SJohn Baldwin 2227bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 2228bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 2229bc3d5698SJohn Baldwin movdqa %xmm3,224(%r10) 2230bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 2231bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 2232bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 2233bc3d5698SJohn Baldwin movdqa %xmm0,240(%r10) 2234bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 2235bc3d5698SJohn Baldwin 2236bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 2237bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 2238bc3d5698SJohn Baldwin movdqa %xmm1,256(%r10) 2239bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 2240bc3d5698SJohn Baldwin 2241bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 2242bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 2243bc3d5698SJohn Baldwin movdqa %xmm2,272(%r10) 2244bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 2245bc3d5698SJohn Baldwin 2246bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 2247bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 2248bc3d5698SJohn Baldwin movdqa %xmm3,288(%r10) 2249bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 2250bc3d5698SJohn Baldwin.byte 0x67 2251bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 2252bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 2253bc3d5698SJohn Baldwin movdqa %xmm0,304(%r10) 2254bc3d5698SJohn Baldwin 2255bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 2256bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 2257bc3d5698SJohn Baldwin movdqa %xmm1,320(%r10) 2258bc3d5698SJohn Baldwin 2259bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 2260bc3d5698SJohn Baldwin movdqa %xmm2,336(%r10) 2261bc3d5698SJohn Baldwin 2262bc3d5698SJohn Baldwin pand 64(%rdi),%xmm0 2263bc3d5698SJohn Baldwin pand 80(%rdi),%xmm1 2264bc3d5698SJohn Baldwin pand 96(%rdi),%xmm2 2265bc3d5698SJohn Baldwin movdqa %xmm3,352(%r10) 2266bc3d5698SJohn Baldwin pand 112(%rdi),%xmm3 2267bc3d5698SJohn Baldwin por %xmm2,%xmm0 2268bc3d5698SJohn Baldwin por %xmm3,%xmm1 2269bc3d5698SJohn Baldwin movdqa -128(%rdi),%xmm4 2270bc3d5698SJohn Baldwin movdqa -112(%rdi),%xmm5 2271bc3d5698SJohn Baldwin movdqa -96(%rdi),%xmm2 2272bc3d5698SJohn Baldwin pand 112(%r10),%xmm4 2273bc3d5698SJohn Baldwin movdqa -80(%rdi),%xmm3 2274bc3d5698SJohn Baldwin pand 128(%r10),%xmm5 2275bc3d5698SJohn Baldwin por %xmm4,%xmm0 2276bc3d5698SJohn Baldwin pand 144(%r10),%xmm2 2277bc3d5698SJohn Baldwin por %xmm5,%xmm1 2278bc3d5698SJohn Baldwin pand 160(%r10),%xmm3 2279bc3d5698SJohn Baldwin por %xmm2,%xmm0 2280bc3d5698SJohn Baldwin por %xmm3,%xmm1 2281bc3d5698SJohn Baldwin movdqa -64(%rdi),%xmm4 2282bc3d5698SJohn Baldwin movdqa -48(%rdi),%xmm5 2283bc3d5698SJohn Baldwin movdqa -32(%rdi),%xmm2 2284bc3d5698SJohn Baldwin pand 176(%r10),%xmm4 2285bc3d5698SJohn Baldwin movdqa -16(%rdi),%xmm3 2286bc3d5698SJohn Baldwin pand 192(%r10),%xmm5 2287bc3d5698SJohn Baldwin por %xmm4,%xmm0 2288bc3d5698SJohn Baldwin pand 208(%r10),%xmm2 2289bc3d5698SJohn Baldwin por %xmm5,%xmm1 2290bc3d5698SJohn Baldwin pand 224(%r10),%xmm3 2291bc3d5698SJohn Baldwin por %xmm2,%xmm0 2292bc3d5698SJohn Baldwin por %xmm3,%xmm1 2293bc3d5698SJohn Baldwin movdqa 0(%rdi),%xmm4 2294bc3d5698SJohn Baldwin movdqa 16(%rdi),%xmm5 2295bc3d5698SJohn Baldwin movdqa 32(%rdi),%xmm2 2296bc3d5698SJohn Baldwin pand 240(%r10),%xmm4 2297bc3d5698SJohn Baldwin movdqa 48(%rdi),%xmm3 2298bc3d5698SJohn Baldwin pand 256(%r10),%xmm5 2299bc3d5698SJohn Baldwin por %xmm4,%xmm0 2300bc3d5698SJohn Baldwin pand 272(%r10),%xmm2 2301bc3d5698SJohn Baldwin por %xmm5,%xmm1 2302bc3d5698SJohn Baldwin pand 288(%r10),%xmm3 2303bc3d5698SJohn Baldwin por %xmm2,%xmm0 2304bc3d5698SJohn Baldwin por %xmm3,%xmm1 2305bc3d5698SJohn Baldwin pxor %xmm1,%xmm0 2306bc3d5698SJohn Baldwin pshufd $0x4e,%xmm0,%xmm1 2307bc3d5698SJohn Baldwin por %xmm1,%xmm0 2308bc3d5698SJohn Baldwin leaq 256(%rdi),%rdi 2309bc3d5698SJohn Baldwin.byte 102,72,15,126,194 2310bc3d5698SJohn Baldwin leaq 64+32+8(%rsp),%rbx 2311bc3d5698SJohn Baldwin 2312bc3d5698SJohn Baldwin movq %rdx,%r9 2313bc3d5698SJohn Baldwin mulxq 0(%rsi),%r8,%rax 2314bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r12 2315bc3d5698SJohn Baldwin addq %rax,%r11 2316bc3d5698SJohn Baldwin mulxq 16(%rsi),%rax,%r13 2317bc3d5698SJohn Baldwin adcq %rax,%r12 2318bc3d5698SJohn Baldwin adcq $0,%r13 2319bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r14 2320bc3d5698SJohn Baldwin 2321bc3d5698SJohn Baldwin movq %r8,%r15 2322bc3d5698SJohn Baldwin imulq 32+8(%rsp),%r8 2323bc3d5698SJohn Baldwin xorq %rbp,%rbp 2324bc3d5698SJohn Baldwin movq %r8,%rdx 2325bc3d5698SJohn Baldwin 2326bc3d5698SJohn Baldwin movq %rdi,8+8(%rsp) 2327bc3d5698SJohn Baldwin 2328bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 2329bc3d5698SJohn Baldwin adcxq %rax,%r13 2330bc3d5698SJohn Baldwin adcxq %rbp,%r14 2331bc3d5698SJohn Baldwin 2332bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r10 2333bc3d5698SJohn Baldwin adcxq %rax,%r15 2334bc3d5698SJohn Baldwin adoxq %r11,%r10 2335bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r11 2336bc3d5698SJohn Baldwin adcxq %rax,%r10 2337bc3d5698SJohn Baldwin adoxq %r12,%r11 2338bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r12 2339bc3d5698SJohn Baldwin movq 24+8(%rsp),%rdi 2340bc3d5698SJohn Baldwin movq %r10,-32(%rbx) 2341bc3d5698SJohn Baldwin adcxq %rax,%r11 2342bc3d5698SJohn Baldwin adoxq %r13,%r12 2343bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 2344bc3d5698SJohn Baldwin movq %r9,%rdx 2345bc3d5698SJohn Baldwin movq %r11,-24(%rbx) 2346bc3d5698SJohn Baldwin adcxq %rax,%r12 2347bc3d5698SJohn Baldwin adoxq %rbp,%r15 2348bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2349bc3d5698SJohn Baldwin movq %r12,-16(%rbx) 2350bc3d5698SJohn Baldwin jmp .Lmulx4x_1st 2351bc3d5698SJohn Baldwin 2352bc3d5698SJohn Baldwin.align 32 2353bc3d5698SJohn Baldwin.Lmulx4x_1st: 2354bc3d5698SJohn Baldwin adcxq %rbp,%r15 2355bc3d5698SJohn Baldwin mulxq 0(%rsi),%r10,%rax 2356bc3d5698SJohn Baldwin adcxq %r14,%r10 2357bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r14 2358bc3d5698SJohn Baldwin adcxq %rax,%r11 2359bc3d5698SJohn Baldwin mulxq 16(%rsi),%r12,%rax 2360bc3d5698SJohn Baldwin adcxq %r14,%r12 2361bc3d5698SJohn Baldwin mulxq 24(%rsi),%r13,%r14 2362bc3d5698SJohn Baldwin.byte 0x67,0x67 2363bc3d5698SJohn Baldwin movq %r8,%rdx 2364bc3d5698SJohn Baldwin adcxq %rax,%r13 2365bc3d5698SJohn Baldwin adcxq %rbp,%r14 2366bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 2367bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 2368bc3d5698SJohn Baldwin 2369bc3d5698SJohn Baldwin adoxq %r15,%r10 2370bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r15 2371bc3d5698SJohn Baldwin adcxq %rax,%r10 2372bc3d5698SJohn Baldwin adoxq %r15,%r11 2373bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r15 2374bc3d5698SJohn Baldwin adcxq %rax,%r11 2375bc3d5698SJohn Baldwin adoxq %r15,%r12 2376bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r15 2377bc3d5698SJohn Baldwin movq %r10,-40(%rbx) 2378bc3d5698SJohn Baldwin adcxq %rax,%r12 2379bc3d5698SJohn Baldwin movq %r11,-32(%rbx) 2380bc3d5698SJohn Baldwin adoxq %r15,%r13 2381bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 2382bc3d5698SJohn Baldwin movq %r9,%rdx 2383bc3d5698SJohn Baldwin movq %r12,-24(%rbx) 2384bc3d5698SJohn Baldwin adcxq %rax,%r13 2385bc3d5698SJohn Baldwin adoxq %rbp,%r15 2386bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2387bc3d5698SJohn Baldwin movq %r13,-16(%rbx) 2388bc3d5698SJohn Baldwin 2389bc3d5698SJohn Baldwin decq %rdi 2390bc3d5698SJohn Baldwin jnz .Lmulx4x_1st 2391bc3d5698SJohn Baldwin 2392bc3d5698SJohn Baldwin movq 8(%rsp),%rax 2393bc3d5698SJohn Baldwin adcq %rbp,%r15 2394bc3d5698SJohn Baldwin leaq (%rsi,%rax,1),%rsi 2395bc3d5698SJohn Baldwin addq %r15,%r14 2396bc3d5698SJohn Baldwin movq 8+8(%rsp),%rdi 2397bc3d5698SJohn Baldwin adcq %rbp,%rbp 2398bc3d5698SJohn Baldwin movq %r14,-8(%rbx) 2399bc3d5698SJohn Baldwin jmp .Lmulx4x_outer 2400bc3d5698SJohn Baldwin 2401bc3d5698SJohn Baldwin.align 32 2402bc3d5698SJohn Baldwin.Lmulx4x_outer: 2403bc3d5698SJohn Baldwin leaq 16-256(%rbx),%r10 2404bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 2405bc3d5698SJohn Baldwin.byte 0x67,0x67 2406bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2407bc3d5698SJohn Baldwin movdqa -128(%rdi),%xmm0 2408bc3d5698SJohn Baldwin movdqa -112(%rdi),%xmm1 2409bc3d5698SJohn Baldwin movdqa -96(%rdi),%xmm2 2410bc3d5698SJohn Baldwin pand 256(%r10),%xmm0 2411bc3d5698SJohn Baldwin movdqa -80(%rdi),%xmm3 2412bc3d5698SJohn Baldwin pand 272(%r10),%xmm1 2413bc3d5698SJohn Baldwin por %xmm0,%xmm4 2414bc3d5698SJohn Baldwin pand 288(%r10),%xmm2 2415bc3d5698SJohn Baldwin por %xmm1,%xmm5 2416bc3d5698SJohn Baldwin pand 304(%r10),%xmm3 2417bc3d5698SJohn Baldwin por %xmm2,%xmm4 2418bc3d5698SJohn Baldwin por %xmm3,%xmm5 2419bc3d5698SJohn Baldwin movdqa -64(%rdi),%xmm0 2420bc3d5698SJohn Baldwin movdqa -48(%rdi),%xmm1 2421bc3d5698SJohn Baldwin movdqa -32(%rdi),%xmm2 2422bc3d5698SJohn Baldwin pand 320(%r10),%xmm0 2423bc3d5698SJohn Baldwin movdqa -16(%rdi),%xmm3 2424bc3d5698SJohn Baldwin pand 336(%r10),%xmm1 2425bc3d5698SJohn Baldwin por %xmm0,%xmm4 2426bc3d5698SJohn Baldwin pand 352(%r10),%xmm2 2427bc3d5698SJohn Baldwin por %xmm1,%xmm5 2428bc3d5698SJohn Baldwin pand 368(%r10),%xmm3 2429bc3d5698SJohn Baldwin por %xmm2,%xmm4 2430bc3d5698SJohn Baldwin por %xmm3,%xmm5 2431bc3d5698SJohn Baldwin movdqa 0(%rdi),%xmm0 2432bc3d5698SJohn Baldwin movdqa 16(%rdi),%xmm1 2433bc3d5698SJohn Baldwin movdqa 32(%rdi),%xmm2 2434bc3d5698SJohn Baldwin pand 384(%r10),%xmm0 2435bc3d5698SJohn Baldwin movdqa 48(%rdi),%xmm3 2436bc3d5698SJohn Baldwin pand 400(%r10),%xmm1 2437bc3d5698SJohn Baldwin por %xmm0,%xmm4 2438bc3d5698SJohn Baldwin pand 416(%r10),%xmm2 2439bc3d5698SJohn Baldwin por %xmm1,%xmm5 2440bc3d5698SJohn Baldwin pand 432(%r10),%xmm3 2441bc3d5698SJohn Baldwin por %xmm2,%xmm4 2442bc3d5698SJohn Baldwin por %xmm3,%xmm5 2443bc3d5698SJohn Baldwin movdqa 64(%rdi),%xmm0 2444bc3d5698SJohn Baldwin movdqa 80(%rdi),%xmm1 2445bc3d5698SJohn Baldwin movdqa 96(%rdi),%xmm2 2446bc3d5698SJohn Baldwin pand 448(%r10),%xmm0 2447bc3d5698SJohn Baldwin movdqa 112(%rdi),%xmm3 2448bc3d5698SJohn Baldwin pand 464(%r10),%xmm1 2449bc3d5698SJohn Baldwin por %xmm0,%xmm4 2450bc3d5698SJohn Baldwin pand 480(%r10),%xmm2 2451bc3d5698SJohn Baldwin por %xmm1,%xmm5 2452bc3d5698SJohn Baldwin pand 496(%r10),%xmm3 2453bc3d5698SJohn Baldwin por %xmm2,%xmm4 2454bc3d5698SJohn Baldwin por %xmm3,%xmm5 2455bc3d5698SJohn Baldwin por %xmm5,%xmm4 2456bc3d5698SJohn Baldwin pshufd $0x4e,%xmm4,%xmm0 2457bc3d5698SJohn Baldwin por %xmm4,%xmm0 2458bc3d5698SJohn Baldwin leaq 256(%rdi),%rdi 2459bc3d5698SJohn Baldwin.byte 102,72,15,126,194 2460bc3d5698SJohn Baldwin 2461bc3d5698SJohn Baldwin movq %rbp,(%rbx) 2462bc3d5698SJohn Baldwin leaq 32(%rbx,%rax,1),%rbx 2463bc3d5698SJohn Baldwin mulxq 0(%rsi),%r8,%r11 2464bc3d5698SJohn Baldwin xorq %rbp,%rbp 2465bc3d5698SJohn Baldwin movq %rdx,%r9 2466bc3d5698SJohn Baldwin mulxq 8(%rsi),%r14,%r12 2467bc3d5698SJohn Baldwin adoxq -32(%rbx),%r8 2468bc3d5698SJohn Baldwin adcxq %r14,%r11 2469bc3d5698SJohn Baldwin mulxq 16(%rsi),%r15,%r13 2470bc3d5698SJohn Baldwin adoxq -24(%rbx),%r11 2471bc3d5698SJohn Baldwin adcxq %r15,%r12 2472bc3d5698SJohn Baldwin mulxq 24(%rsi),%rdx,%r14 2473bc3d5698SJohn Baldwin adoxq -16(%rbx),%r12 2474bc3d5698SJohn Baldwin adcxq %rdx,%r13 2475bc3d5698SJohn Baldwin leaq (%rcx,%rax,1),%rcx 2476bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 2477bc3d5698SJohn Baldwin adoxq -8(%rbx),%r13 2478bc3d5698SJohn Baldwin adcxq %rbp,%r14 2479bc3d5698SJohn Baldwin adoxq %rbp,%r14 2480bc3d5698SJohn Baldwin 2481bc3d5698SJohn Baldwin movq %r8,%r15 2482bc3d5698SJohn Baldwin imulq 32+8(%rsp),%r8 2483bc3d5698SJohn Baldwin 2484bc3d5698SJohn Baldwin movq %r8,%rdx 2485bc3d5698SJohn Baldwin xorq %rbp,%rbp 2486bc3d5698SJohn Baldwin movq %rdi,8+8(%rsp) 2487bc3d5698SJohn Baldwin 2488bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r10 2489bc3d5698SJohn Baldwin adcxq %rax,%r15 2490bc3d5698SJohn Baldwin adoxq %r11,%r10 2491bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r11 2492bc3d5698SJohn Baldwin adcxq %rax,%r10 2493bc3d5698SJohn Baldwin adoxq %r12,%r11 2494bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r12 2495bc3d5698SJohn Baldwin adcxq %rax,%r11 2496bc3d5698SJohn Baldwin adoxq %r13,%r12 2497bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 2498bc3d5698SJohn Baldwin movq %r9,%rdx 2499bc3d5698SJohn Baldwin movq 24+8(%rsp),%rdi 2500bc3d5698SJohn Baldwin movq %r10,-32(%rbx) 2501bc3d5698SJohn Baldwin adcxq %rax,%r12 2502bc3d5698SJohn Baldwin movq %r11,-24(%rbx) 2503bc3d5698SJohn Baldwin adoxq %rbp,%r15 2504bc3d5698SJohn Baldwin movq %r12,-16(%rbx) 2505bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2506bc3d5698SJohn Baldwin jmp .Lmulx4x_inner 2507bc3d5698SJohn Baldwin 2508bc3d5698SJohn Baldwin.align 32 2509bc3d5698SJohn Baldwin.Lmulx4x_inner: 2510bc3d5698SJohn Baldwin mulxq 0(%rsi),%r10,%rax 2511bc3d5698SJohn Baldwin adcxq %rbp,%r15 2512bc3d5698SJohn Baldwin adoxq %r14,%r10 2513bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r14 2514bc3d5698SJohn Baldwin adcxq 0(%rbx),%r10 2515bc3d5698SJohn Baldwin adoxq %rax,%r11 2516bc3d5698SJohn Baldwin mulxq 16(%rsi),%r12,%rax 2517bc3d5698SJohn Baldwin adcxq 8(%rbx),%r11 2518bc3d5698SJohn Baldwin adoxq %r14,%r12 2519bc3d5698SJohn Baldwin mulxq 24(%rsi),%r13,%r14 2520bc3d5698SJohn Baldwin movq %r8,%rdx 2521bc3d5698SJohn Baldwin adcxq 16(%rbx),%r12 2522bc3d5698SJohn Baldwin adoxq %rax,%r13 2523bc3d5698SJohn Baldwin adcxq 24(%rbx),%r13 2524bc3d5698SJohn Baldwin adoxq %rbp,%r14 2525bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 2526bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 2527bc3d5698SJohn Baldwin adcxq %rbp,%r14 2528bc3d5698SJohn Baldwin 2529bc3d5698SJohn Baldwin adoxq %r15,%r10 2530bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r15 2531bc3d5698SJohn Baldwin adcxq %rax,%r10 2532bc3d5698SJohn Baldwin adoxq %r15,%r11 2533bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r15 2534bc3d5698SJohn Baldwin adcxq %rax,%r11 2535bc3d5698SJohn Baldwin adoxq %r15,%r12 2536bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r15 2537bc3d5698SJohn Baldwin movq %r10,-40(%rbx) 2538bc3d5698SJohn Baldwin adcxq %rax,%r12 2539bc3d5698SJohn Baldwin adoxq %r15,%r13 2540bc3d5698SJohn Baldwin movq %r11,-32(%rbx) 2541bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 2542bc3d5698SJohn Baldwin movq %r9,%rdx 2543bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2544bc3d5698SJohn Baldwin movq %r12,-24(%rbx) 2545bc3d5698SJohn Baldwin adcxq %rax,%r13 2546bc3d5698SJohn Baldwin adoxq %rbp,%r15 2547bc3d5698SJohn Baldwin movq %r13,-16(%rbx) 2548bc3d5698SJohn Baldwin 2549bc3d5698SJohn Baldwin decq %rdi 2550bc3d5698SJohn Baldwin jnz .Lmulx4x_inner 2551bc3d5698SJohn Baldwin 2552bc3d5698SJohn Baldwin movq 0+8(%rsp),%rax 2553bc3d5698SJohn Baldwin adcq %rbp,%r15 2554bc3d5698SJohn Baldwin subq 0(%rbx),%rdi 2555bc3d5698SJohn Baldwin movq 8+8(%rsp),%rdi 2556bc3d5698SJohn Baldwin movq 16+8(%rsp),%r10 2557bc3d5698SJohn Baldwin adcq %r15,%r14 2558bc3d5698SJohn Baldwin leaq (%rsi,%rax,1),%rsi 2559bc3d5698SJohn Baldwin adcq %rbp,%rbp 2560bc3d5698SJohn Baldwin movq %r14,-8(%rbx) 2561bc3d5698SJohn Baldwin 2562bc3d5698SJohn Baldwin cmpq %r10,%rdi 2563bc3d5698SJohn Baldwin jb .Lmulx4x_outer 2564bc3d5698SJohn Baldwin 2565bc3d5698SJohn Baldwin movq -8(%rcx),%r10 2566bc3d5698SJohn Baldwin movq %rbp,%r8 2567bc3d5698SJohn Baldwin movq (%rcx,%rax,1),%r12 2568bc3d5698SJohn Baldwin leaq (%rcx,%rax,1),%rbp 2569bc3d5698SJohn Baldwin movq %rax,%rcx 2570bc3d5698SJohn Baldwin leaq (%rbx,%rax,1),%rdi 2571bc3d5698SJohn Baldwin xorl %eax,%eax 2572bc3d5698SJohn Baldwin xorq %r15,%r15 2573bc3d5698SJohn Baldwin subq %r14,%r10 2574bc3d5698SJohn Baldwin adcq %r15,%r15 2575bc3d5698SJohn Baldwin orq %r15,%r8 2576bc3d5698SJohn Baldwin sarq $3+2,%rcx 2577bc3d5698SJohn Baldwin subq %r8,%rax 2578bc3d5698SJohn Baldwin movq 56+8(%rsp),%rdx 2579bc3d5698SJohn Baldwin decq %r12 2580bc3d5698SJohn Baldwin movq 8(%rbp),%r13 2581bc3d5698SJohn Baldwin xorq %r8,%r8 2582bc3d5698SJohn Baldwin movq 16(%rbp),%r14 2583bc3d5698SJohn Baldwin movq 24(%rbp),%r15 2584bc3d5698SJohn Baldwin jmp .Lsqrx4x_sub_entry 2585bc3d5698SJohn Baldwin.cfi_endproc 2586bc3d5698SJohn Baldwin.size mulx4x_internal,.-mulx4x_internal 2587bc3d5698SJohn Baldwin.type bn_powerx5,@function 2588bc3d5698SJohn Baldwin.align 32 2589bc3d5698SJohn Baldwinbn_powerx5: 2590bc3d5698SJohn Baldwin.cfi_startproc 2591bc3d5698SJohn Baldwin movq %rsp,%rax 2592bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 2593bc3d5698SJohn Baldwin.Lpowerx5_enter: 2594bc3d5698SJohn Baldwin pushq %rbx 2595bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 2596bc3d5698SJohn Baldwin pushq %rbp 2597bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 2598bc3d5698SJohn Baldwin pushq %r12 2599bc3d5698SJohn Baldwin.cfi_offset %r12,-32 2600bc3d5698SJohn Baldwin pushq %r13 2601bc3d5698SJohn Baldwin.cfi_offset %r13,-40 2602bc3d5698SJohn Baldwin pushq %r14 2603bc3d5698SJohn Baldwin.cfi_offset %r14,-48 2604bc3d5698SJohn Baldwin pushq %r15 2605bc3d5698SJohn Baldwin.cfi_offset %r15,-56 2606bc3d5698SJohn Baldwin.Lpowerx5_prologue: 2607bc3d5698SJohn Baldwin 2608bc3d5698SJohn Baldwin shll $3,%r9d 2609bc3d5698SJohn Baldwin leaq (%r9,%r9,2),%r10 2610bc3d5698SJohn Baldwin negq %r9 2611bc3d5698SJohn Baldwin movq (%r8),%r8 2612bc3d5698SJohn Baldwin 2613bc3d5698SJohn Baldwin 2614bc3d5698SJohn Baldwin 2615bc3d5698SJohn Baldwin 2616bc3d5698SJohn Baldwin 2617bc3d5698SJohn Baldwin 2618bc3d5698SJohn Baldwin 2619bc3d5698SJohn Baldwin 2620bc3d5698SJohn Baldwin leaq -320(%rsp,%r9,2),%r11 2621bc3d5698SJohn Baldwin movq %rsp,%rbp 2622bc3d5698SJohn Baldwin subq %rdi,%r11 2623bc3d5698SJohn Baldwin andq $4095,%r11 2624bc3d5698SJohn Baldwin cmpq %r11,%r10 2625bc3d5698SJohn Baldwin jb .Lpwrx_sp_alt 2626bc3d5698SJohn Baldwin subq %r11,%rbp 2627bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 2628bc3d5698SJohn Baldwin jmp .Lpwrx_sp_done 2629bc3d5698SJohn Baldwin 2630bc3d5698SJohn Baldwin.align 32 2631bc3d5698SJohn Baldwin.Lpwrx_sp_alt: 2632bc3d5698SJohn Baldwin leaq 4096-320(,%r9,2),%r10 2633bc3d5698SJohn Baldwin leaq -320(%rbp,%r9,2),%rbp 2634bc3d5698SJohn Baldwin subq %r10,%r11 2635bc3d5698SJohn Baldwin movq $0,%r10 2636bc3d5698SJohn Baldwin cmovcq %r10,%r11 2637bc3d5698SJohn Baldwin subq %r11,%rbp 2638bc3d5698SJohn Baldwin.Lpwrx_sp_done: 2639bc3d5698SJohn Baldwin andq $-64,%rbp 2640bc3d5698SJohn Baldwin movq %rsp,%r11 2641bc3d5698SJohn Baldwin subq %rbp,%r11 2642bc3d5698SJohn Baldwin andq $-4096,%r11 2643bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 2644bc3d5698SJohn Baldwin movq (%rsp),%r10 2645bc3d5698SJohn Baldwin cmpq %rbp,%rsp 2646bc3d5698SJohn Baldwin ja .Lpwrx_page_walk 2647bc3d5698SJohn Baldwin jmp .Lpwrx_page_walk_done 2648bc3d5698SJohn Baldwin 2649bc3d5698SJohn Baldwin.Lpwrx_page_walk: 2650bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 2651bc3d5698SJohn Baldwin movq (%rsp),%r10 2652bc3d5698SJohn Baldwin cmpq %rbp,%rsp 2653bc3d5698SJohn Baldwin ja .Lpwrx_page_walk 2654bc3d5698SJohn Baldwin.Lpwrx_page_walk_done: 2655bc3d5698SJohn Baldwin 2656bc3d5698SJohn Baldwin movq %r9,%r10 2657bc3d5698SJohn Baldwin negq %r9 2658bc3d5698SJohn Baldwin 2659bc3d5698SJohn Baldwin 2660bc3d5698SJohn Baldwin 2661bc3d5698SJohn Baldwin 2662bc3d5698SJohn Baldwin 2663bc3d5698SJohn Baldwin 2664bc3d5698SJohn Baldwin 2665bc3d5698SJohn Baldwin 2666bc3d5698SJohn Baldwin 2667bc3d5698SJohn Baldwin 2668bc3d5698SJohn Baldwin 2669bc3d5698SJohn Baldwin 2670bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 2671bc3d5698SJohn Baldwin.byte 102,72,15,110,207 2672bc3d5698SJohn Baldwin.byte 102,72,15,110,209 2673bc3d5698SJohn Baldwin.byte 102,73,15,110,218 2674bc3d5698SJohn Baldwin.byte 102,72,15,110,226 2675bc3d5698SJohn Baldwin movq %r8,32(%rsp) 2676bc3d5698SJohn Baldwin movq %rax,40(%rsp) 2677bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 2678bc3d5698SJohn Baldwin.Lpowerx5_body: 2679bc3d5698SJohn Baldwin 2680bc3d5698SJohn Baldwin call __bn_sqrx8x_internal 2681bc3d5698SJohn Baldwin call __bn_postx4x_internal 2682bc3d5698SJohn Baldwin call __bn_sqrx8x_internal 2683bc3d5698SJohn Baldwin call __bn_postx4x_internal 2684bc3d5698SJohn Baldwin call __bn_sqrx8x_internal 2685bc3d5698SJohn Baldwin call __bn_postx4x_internal 2686bc3d5698SJohn Baldwin call __bn_sqrx8x_internal 2687bc3d5698SJohn Baldwin call __bn_postx4x_internal 2688bc3d5698SJohn Baldwin call __bn_sqrx8x_internal 2689bc3d5698SJohn Baldwin call __bn_postx4x_internal 2690bc3d5698SJohn Baldwin 2691bc3d5698SJohn Baldwin movq %r10,%r9 2692bc3d5698SJohn Baldwin movq %rsi,%rdi 2693bc3d5698SJohn Baldwin.byte 102,72,15,126,209 2694bc3d5698SJohn Baldwin.byte 102,72,15,126,226 2695bc3d5698SJohn Baldwin movq 40(%rsp),%rax 2696bc3d5698SJohn Baldwin 2697bc3d5698SJohn Baldwin call mulx4x_internal 2698bc3d5698SJohn Baldwin 2699bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 2700bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 2701bc3d5698SJohn Baldwin movq $1,%rax 2702bc3d5698SJohn Baldwin 2703bc3d5698SJohn Baldwin movq -48(%rsi),%r15 2704bc3d5698SJohn Baldwin.cfi_restore %r15 2705bc3d5698SJohn Baldwin movq -40(%rsi),%r14 2706bc3d5698SJohn Baldwin.cfi_restore %r14 2707bc3d5698SJohn Baldwin movq -32(%rsi),%r13 2708bc3d5698SJohn Baldwin.cfi_restore %r13 2709bc3d5698SJohn Baldwin movq -24(%rsi),%r12 2710bc3d5698SJohn Baldwin.cfi_restore %r12 2711bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 2712bc3d5698SJohn Baldwin.cfi_restore %rbp 2713bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 2714bc3d5698SJohn Baldwin.cfi_restore %rbx 2715bc3d5698SJohn Baldwin leaq (%rsi),%rsp 2716bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2717bc3d5698SJohn Baldwin.Lpowerx5_epilogue: 2718bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2719bc3d5698SJohn Baldwin.cfi_endproc 2720bc3d5698SJohn Baldwin.size bn_powerx5,.-bn_powerx5 2721bc3d5698SJohn Baldwin 2722bc3d5698SJohn Baldwin.globl bn_sqrx8x_internal 2723bc3d5698SJohn Baldwin.hidden bn_sqrx8x_internal 2724bc3d5698SJohn Baldwin.type bn_sqrx8x_internal,@function 2725bc3d5698SJohn Baldwin.align 32 2726bc3d5698SJohn Baldwinbn_sqrx8x_internal: 2727bc3d5698SJohn Baldwin__bn_sqrx8x_internal: 2728bc3d5698SJohn Baldwin.cfi_startproc 2729bc3d5698SJohn Baldwin 2730bc3d5698SJohn Baldwin 2731bc3d5698SJohn Baldwin 2732bc3d5698SJohn Baldwin 2733bc3d5698SJohn Baldwin 2734bc3d5698SJohn Baldwin 2735bc3d5698SJohn Baldwin 2736bc3d5698SJohn Baldwin 2737bc3d5698SJohn Baldwin 2738bc3d5698SJohn Baldwin 2739bc3d5698SJohn Baldwin 2740bc3d5698SJohn Baldwin 2741bc3d5698SJohn Baldwin 2742bc3d5698SJohn Baldwin 2743bc3d5698SJohn Baldwin 2744bc3d5698SJohn Baldwin 2745bc3d5698SJohn Baldwin 2746bc3d5698SJohn Baldwin 2747bc3d5698SJohn Baldwin 2748bc3d5698SJohn Baldwin 2749bc3d5698SJohn Baldwin 2750bc3d5698SJohn Baldwin 2751bc3d5698SJohn Baldwin 2752bc3d5698SJohn Baldwin 2753bc3d5698SJohn Baldwin 2754bc3d5698SJohn Baldwin 2755bc3d5698SJohn Baldwin 2756bc3d5698SJohn Baldwin 2757bc3d5698SJohn Baldwin 2758bc3d5698SJohn Baldwin 2759bc3d5698SJohn Baldwin 2760bc3d5698SJohn Baldwin 2761bc3d5698SJohn Baldwin 2762bc3d5698SJohn Baldwin 2763bc3d5698SJohn Baldwin 2764bc3d5698SJohn Baldwin 2765bc3d5698SJohn Baldwin 2766bc3d5698SJohn Baldwin 2767bc3d5698SJohn Baldwin 2768bc3d5698SJohn Baldwin 2769bc3d5698SJohn Baldwin leaq 48+8(%rsp),%rdi 2770bc3d5698SJohn Baldwin leaq (%rsi,%r9,1),%rbp 2771bc3d5698SJohn Baldwin movq %r9,0+8(%rsp) 2772bc3d5698SJohn Baldwin movq %rbp,8+8(%rsp) 2773bc3d5698SJohn Baldwin jmp .Lsqr8x_zero_start 2774bc3d5698SJohn Baldwin 2775bc3d5698SJohn Baldwin.align 32 2776bc3d5698SJohn Baldwin.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2777bc3d5698SJohn Baldwin.Lsqrx8x_zero: 2778bc3d5698SJohn Baldwin.byte 0x3e 2779bc3d5698SJohn Baldwin movdqa %xmm0,0(%rdi) 2780bc3d5698SJohn Baldwin movdqa %xmm0,16(%rdi) 2781bc3d5698SJohn Baldwin movdqa %xmm0,32(%rdi) 2782bc3d5698SJohn Baldwin movdqa %xmm0,48(%rdi) 2783bc3d5698SJohn Baldwin.Lsqr8x_zero_start: 2784bc3d5698SJohn Baldwin movdqa %xmm0,64(%rdi) 2785bc3d5698SJohn Baldwin movdqa %xmm0,80(%rdi) 2786bc3d5698SJohn Baldwin movdqa %xmm0,96(%rdi) 2787bc3d5698SJohn Baldwin movdqa %xmm0,112(%rdi) 2788bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 2789bc3d5698SJohn Baldwin subq $64,%r9 2790bc3d5698SJohn Baldwin jnz .Lsqrx8x_zero 2791bc3d5698SJohn Baldwin 2792bc3d5698SJohn Baldwin movq 0(%rsi),%rdx 2793bc3d5698SJohn Baldwin 2794bc3d5698SJohn Baldwin xorq %r10,%r10 2795bc3d5698SJohn Baldwin xorq %r11,%r11 2796bc3d5698SJohn Baldwin xorq %r12,%r12 2797bc3d5698SJohn Baldwin xorq %r13,%r13 2798bc3d5698SJohn Baldwin xorq %r14,%r14 2799bc3d5698SJohn Baldwin xorq %r15,%r15 2800bc3d5698SJohn Baldwin leaq 48+8(%rsp),%rdi 2801bc3d5698SJohn Baldwin xorq %rbp,%rbp 2802bc3d5698SJohn Baldwin jmp .Lsqrx8x_outer_loop 2803bc3d5698SJohn Baldwin 2804bc3d5698SJohn Baldwin.align 32 2805bc3d5698SJohn Baldwin.Lsqrx8x_outer_loop: 2806bc3d5698SJohn Baldwin mulxq 8(%rsi),%r8,%rax 2807bc3d5698SJohn Baldwin adcxq %r9,%r8 2808bc3d5698SJohn Baldwin adoxq %rax,%r10 2809bc3d5698SJohn Baldwin mulxq 16(%rsi),%r9,%rax 2810bc3d5698SJohn Baldwin adcxq %r10,%r9 2811bc3d5698SJohn Baldwin adoxq %rax,%r11 2812bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2813bc3d5698SJohn Baldwin adcxq %r11,%r10 2814bc3d5698SJohn Baldwin adoxq %rax,%r12 2815bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2816bc3d5698SJohn Baldwin adcxq %r12,%r11 2817bc3d5698SJohn Baldwin adoxq %rax,%r13 2818bc3d5698SJohn Baldwin mulxq 40(%rsi),%r12,%rax 2819bc3d5698SJohn Baldwin adcxq %r13,%r12 2820bc3d5698SJohn Baldwin adoxq %rax,%r14 2821bc3d5698SJohn Baldwin mulxq 48(%rsi),%r13,%rax 2822bc3d5698SJohn Baldwin adcxq %r14,%r13 2823bc3d5698SJohn Baldwin adoxq %r15,%rax 2824bc3d5698SJohn Baldwin mulxq 56(%rsi),%r14,%r15 2825bc3d5698SJohn Baldwin movq 8(%rsi),%rdx 2826bc3d5698SJohn Baldwin adcxq %rax,%r14 2827bc3d5698SJohn Baldwin adoxq %rbp,%r15 2828bc3d5698SJohn Baldwin adcq 64(%rdi),%r15 2829bc3d5698SJohn Baldwin movq %r8,8(%rdi) 2830bc3d5698SJohn Baldwin movq %r9,16(%rdi) 2831bc3d5698SJohn Baldwin sbbq %rcx,%rcx 2832bc3d5698SJohn Baldwin xorq %rbp,%rbp 2833bc3d5698SJohn Baldwin 2834bc3d5698SJohn Baldwin 2835bc3d5698SJohn Baldwin mulxq 16(%rsi),%r8,%rbx 2836bc3d5698SJohn Baldwin mulxq 24(%rsi),%r9,%rax 2837bc3d5698SJohn Baldwin adcxq %r10,%r8 2838bc3d5698SJohn Baldwin adoxq %rbx,%r9 2839bc3d5698SJohn Baldwin mulxq 32(%rsi),%r10,%rbx 2840bc3d5698SJohn Baldwin adcxq %r11,%r9 2841bc3d5698SJohn Baldwin adoxq %rax,%r10 2842bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2843bc3d5698SJohn Baldwin adcxq %r12,%r10 2844bc3d5698SJohn Baldwin adoxq %rbx,%r11 2845bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2846bc3d5698SJohn Baldwin adcxq %r13,%r11 2847bc3d5698SJohn Baldwin adoxq %r14,%r12 2848bc3d5698SJohn Baldwin.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2849bc3d5698SJohn Baldwin movq 16(%rsi),%rdx 2850bc3d5698SJohn Baldwin adcxq %rax,%r12 2851bc3d5698SJohn Baldwin adoxq %rbx,%r13 2852bc3d5698SJohn Baldwin adcxq %r15,%r13 2853bc3d5698SJohn Baldwin adoxq %rbp,%r14 2854bc3d5698SJohn Baldwin adcxq %rbp,%r14 2855bc3d5698SJohn Baldwin 2856bc3d5698SJohn Baldwin movq %r8,24(%rdi) 2857bc3d5698SJohn Baldwin movq %r9,32(%rdi) 2858bc3d5698SJohn Baldwin 2859bc3d5698SJohn Baldwin mulxq 24(%rsi),%r8,%rbx 2860bc3d5698SJohn Baldwin mulxq 32(%rsi),%r9,%rax 2861bc3d5698SJohn Baldwin adcxq %r10,%r8 2862bc3d5698SJohn Baldwin adoxq %rbx,%r9 2863bc3d5698SJohn Baldwin mulxq 40(%rsi),%r10,%rbx 2864bc3d5698SJohn Baldwin adcxq %r11,%r9 2865bc3d5698SJohn Baldwin adoxq %rax,%r10 2866bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2867bc3d5698SJohn Baldwin adcxq %r12,%r10 2868bc3d5698SJohn Baldwin adoxq %r13,%r11 2869bc3d5698SJohn Baldwin.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2870bc3d5698SJohn Baldwin.byte 0x3e 2871bc3d5698SJohn Baldwin movq 24(%rsi),%rdx 2872bc3d5698SJohn Baldwin adcxq %rbx,%r11 2873bc3d5698SJohn Baldwin adoxq %rax,%r12 2874bc3d5698SJohn Baldwin adcxq %r14,%r12 2875bc3d5698SJohn Baldwin movq %r8,40(%rdi) 2876bc3d5698SJohn Baldwin movq %r9,48(%rdi) 2877bc3d5698SJohn Baldwin mulxq 32(%rsi),%r8,%rax 2878bc3d5698SJohn Baldwin adoxq %rbp,%r13 2879bc3d5698SJohn Baldwin adcxq %rbp,%r13 2880bc3d5698SJohn Baldwin 2881bc3d5698SJohn Baldwin mulxq 40(%rsi),%r9,%rbx 2882bc3d5698SJohn Baldwin adcxq %r10,%r8 2883bc3d5698SJohn Baldwin adoxq %rax,%r9 2884bc3d5698SJohn Baldwin mulxq 48(%rsi),%r10,%rax 2885bc3d5698SJohn Baldwin adcxq %r11,%r9 2886bc3d5698SJohn Baldwin adoxq %r12,%r10 2887bc3d5698SJohn Baldwin mulxq 56(%rsi),%r11,%r12 2888bc3d5698SJohn Baldwin movq 32(%rsi),%rdx 2889bc3d5698SJohn Baldwin movq 40(%rsi),%r14 2890bc3d5698SJohn Baldwin adcxq %rbx,%r10 2891bc3d5698SJohn Baldwin adoxq %rax,%r11 2892bc3d5698SJohn Baldwin movq 48(%rsi),%r15 2893bc3d5698SJohn Baldwin adcxq %r13,%r11 2894bc3d5698SJohn Baldwin adoxq %rbp,%r12 2895bc3d5698SJohn Baldwin adcxq %rbp,%r12 2896bc3d5698SJohn Baldwin 2897bc3d5698SJohn Baldwin movq %r8,56(%rdi) 2898bc3d5698SJohn Baldwin movq %r9,64(%rdi) 2899bc3d5698SJohn Baldwin 2900bc3d5698SJohn Baldwin mulxq %r14,%r9,%rax 2901bc3d5698SJohn Baldwin movq 56(%rsi),%r8 2902bc3d5698SJohn Baldwin adcxq %r10,%r9 2903bc3d5698SJohn Baldwin mulxq %r15,%r10,%rbx 2904bc3d5698SJohn Baldwin adoxq %rax,%r10 2905bc3d5698SJohn Baldwin adcxq %r11,%r10 2906bc3d5698SJohn Baldwin mulxq %r8,%r11,%rax 2907bc3d5698SJohn Baldwin movq %r14,%rdx 2908bc3d5698SJohn Baldwin adoxq %rbx,%r11 2909bc3d5698SJohn Baldwin adcxq %r12,%r11 2910bc3d5698SJohn Baldwin 2911bc3d5698SJohn Baldwin adcxq %rbp,%rax 2912bc3d5698SJohn Baldwin 2913bc3d5698SJohn Baldwin mulxq %r15,%r14,%rbx 2914bc3d5698SJohn Baldwin mulxq %r8,%r12,%r13 2915bc3d5698SJohn Baldwin movq %r15,%rdx 2916bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 2917bc3d5698SJohn Baldwin adcxq %r14,%r11 2918bc3d5698SJohn Baldwin adoxq %rbx,%r12 2919bc3d5698SJohn Baldwin adcxq %rax,%r12 2920bc3d5698SJohn Baldwin adoxq %rbp,%r13 2921bc3d5698SJohn Baldwin 2922bc3d5698SJohn Baldwin.byte 0x67,0x67 2923bc3d5698SJohn Baldwin mulxq %r8,%r8,%r14 2924bc3d5698SJohn Baldwin adcxq %r8,%r13 2925bc3d5698SJohn Baldwin adcxq %rbp,%r14 2926bc3d5698SJohn Baldwin 2927bc3d5698SJohn Baldwin cmpq 8+8(%rsp),%rsi 2928bc3d5698SJohn Baldwin je .Lsqrx8x_outer_break 2929bc3d5698SJohn Baldwin 2930bc3d5698SJohn Baldwin negq %rcx 2931bc3d5698SJohn Baldwin movq $-8,%rcx 2932bc3d5698SJohn Baldwin movq %rbp,%r15 2933bc3d5698SJohn Baldwin movq 64(%rdi),%r8 2934bc3d5698SJohn Baldwin adcxq 72(%rdi),%r9 2935bc3d5698SJohn Baldwin adcxq 80(%rdi),%r10 2936bc3d5698SJohn Baldwin adcxq 88(%rdi),%r11 2937bc3d5698SJohn Baldwin adcq 96(%rdi),%r12 2938bc3d5698SJohn Baldwin adcq 104(%rdi),%r13 2939bc3d5698SJohn Baldwin adcq 112(%rdi),%r14 2940bc3d5698SJohn Baldwin adcq 120(%rdi),%r15 2941bc3d5698SJohn Baldwin leaq (%rsi),%rbp 2942bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 2943bc3d5698SJohn Baldwin sbbq %rax,%rax 2944bc3d5698SJohn Baldwin 2945bc3d5698SJohn Baldwin movq -64(%rsi),%rdx 2946bc3d5698SJohn Baldwin movq %rax,16+8(%rsp) 2947bc3d5698SJohn Baldwin movq %rdi,24+8(%rsp) 2948bc3d5698SJohn Baldwin 2949bc3d5698SJohn Baldwin 2950bc3d5698SJohn Baldwin xorl %eax,%eax 2951bc3d5698SJohn Baldwin jmp .Lsqrx8x_loop 2952bc3d5698SJohn Baldwin 2953bc3d5698SJohn Baldwin.align 32 2954bc3d5698SJohn Baldwin.Lsqrx8x_loop: 2955bc3d5698SJohn Baldwin movq %r8,%rbx 2956bc3d5698SJohn Baldwin mulxq 0(%rbp),%rax,%r8 2957bc3d5698SJohn Baldwin adcxq %rax,%rbx 2958bc3d5698SJohn Baldwin adoxq %r9,%r8 2959bc3d5698SJohn Baldwin 2960bc3d5698SJohn Baldwin mulxq 8(%rbp),%rax,%r9 2961bc3d5698SJohn Baldwin adcxq %rax,%r8 2962bc3d5698SJohn Baldwin adoxq %r10,%r9 2963bc3d5698SJohn Baldwin 2964bc3d5698SJohn Baldwin mulxq 16(%rbp),%rax,%r10 2965bc3d5698SJohn Baldwin adcxq %rax,%r9 2966bc3d5698SJohn Baldwin adoxq %r11,%r10 2967bc3d5698SJohn Baldwin 2968bc3d5698SJohn Baldwin mulxq 24(%rbp),%rax,%r11 2969bc3d5698SJohn Baldwin adcxq %rax,%r10 2970bc3d5698SJohn Baldwin adoxq %r12,%r11 2971bc3d5698SJohn Baldwin 2972bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 2973bc3d5698SJohn Baldwin adcxq %rax,%r11 2974bc3d5698SJohn Baldwin adoxq %r13,%r12 2975bc3d5698SJohn Baldwin 2976bc3d5698SJohn Baldwin mulxq 40(%rbp),%rax,%r13 2977bc3d5698SJohn Baldwin adcxq %rax,%r12 2978bc3d5698SJohn Baldwin adoxq %r14,%r13 2979bc3d5698SJohn Baldwin 2980bc3d5698SJohn Baldwin mulxq 48(%rbp),%rax,%r14 2981bc3d5698SJohn Baldwin movq %rbx,(%rdi,%rcx,8) 2982bc3d5698SJohn Baldwin movl $0,%ebx 2983bc3d5698SJohn Baldwin adcxq %rax,%r13 2984bc3d5698SJohn Baldwin adoxq %r15,%r14 2985bc3d5698SJohn Baldwin 2986bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 2987bc3d5698SJohn Baldwin movq 8(%rsi,%rcx,8),%rdx 2988bc3d5698SJohn Baldwin adcxq %rax,%r14 2989bc3d5698SJohn Baldwin adoxq %rbx,%r15 2990bc3d5698SJohn Baldwin adcxq %rbx,%r15 2991bc3d5698SJohn Baldwin 2992bc3d5698SJohn Baldwin.byte 0x67 2993bc3d5698SJohn Baldwin incq %rcx 2994bc3d5698SJohn Baldwin jnz .Lsqrx8x_loop 2995bc3d5698SJohn Baldwin 2996bc3d5698SJohn Baldwin leaq 64(%rbp),%rbp 2997bc3d5698SJohn Baldwin movq $-8,%rcx 2998bc3d5698SJohn Baldwin cmpq 8+8(%rsp),%rbp 2999bc3d5698SJohn Baldwin je .Lsqrx8x_break 3000bc3d5698SJohn Baldwin 3001bc3d5698SJohn Baldwin subq 16+8(%rsp),%rbx 3002bc3d5698SJohn Baldwin.byte 0x66 3003bc3d5698SJohn Baldwin movq -64(%rsi),%rdx 3004bc3d5698SJohn Baldwin adcxq 0(%rdi),%r8 3005bc3d5698SJohn Baldwin adcxq 8(%rdi),%r9 3006bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 3007bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 3008bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 3009bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 3010bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 3011bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 3012bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3013bc3d5698SJohn Baldwin.byte 0x67 3014bc3d5698SJohn Baldwin sbbq %rax,%rax 3015bc3d5698SJohn Baldwin xorl %ebx,%ebx 3016bc3d5698SJohn Baldwin movq %rax,16+8(%rsp) 3017bc3d5698SJohn Baldwin jmp .Lsqrx8x_loop 3018bc3d5698SJohn Baldwin 3019bc3d5698SJohn Baldwin.align 32 3020bc3d5698SJohn Baldwin.Lsqrx8x_break: 3021bc3d5698SJohn Baldwin xorq %rbp,%rbp 3022bc3d5698SJohn Baldwin subq 16+8(%rsp),%rbx 3023bc3d5698SJohn Baldwin adcxq %rbp,%r8 3024bc3d5698SJohn Baldwin movq 24+8(%rsp),%rcx 3025bc3d5698SJohn Baldwin adcxq %rbp,%r9 3026bc3d5698SJohn Baldwin movq 0(%rsi),%rdx 3027bc3d5698SJohn Baldwin adcq $0,%r10 3028bc3d5698SJohn Baldwin movq %r8,0(%rdi) 3029bc3d5698SJohn Baldwin adcq $0,%r11 3030bc3d5698SJohn Baldwin adcq $0,%r12 3031bc3d5698SJohn Baldwin adcq $0,%r13 3032bc3d5698SJohn Baldwin adcq $0,%r14 3033bc3d5698SJohn Baldwin adcq $0,%r15 3034bc3d5698SJohn Baldwin cmpq %rcx,%rdi 3035bc3d5698SJohn Baldwin je .Lsqrx8x_outer_loop 3036bc3d5698SJohn Baldwin 3037bc3d5698SJohn Baldwin movq %r9,8(%rdi) 3038bc3d5698SJohn Baldwin movq 8(%rcx),%r9 3039bc3d5698SJohn Baldwin movq %r10,16(%rdi) 3040bc3d5698SJohn Baldwin movq 16(%rcx),%r10 3041bc3d5698SJohn Baldwin movq %r11,24(%rdi) 3042bc3d5698SJohn Baldwin movq 24(%rcx),%r11 3043bc3d5698SJohn Baldwin movq %r12,32(%rdi) 3044bc3d5698SJohn Baldwin movq 32(%rcx),%r12 3045bc3d5698SJohn Baldwin movq %r13,40(%rdi) 3046bc3d5698SJohn Baldwin movq 40(%rcx),%r13 3047bc3d5698SJohn Baldwin movq %r14,48(%rdi) 3048bc3d5698SJohn Baldwin movq 48(%rcx),%r14 3049bc3d5698SJohn Baldwin movq %r15,56(%rdi) 3050bc3d5698SJohn Baldwin movq 56(%rcx),%r15 3051bc3d5698SJohn Baldwin movq %rcx,%rdi 3052bc3d5698SJohn Baldwin jmp .Lsqrx8x_outer_loop 3053bc3d5698SJohn Baldwin 3054bc3d5698SJohn Baldwin.align 32 3055bc3d5698SJohn Baldwin.Lsqrx8x_outer_break: 3056bc3d5698SJohn Baldwin movq %r9,72(%rdi) 3057bc3d5698SJohn Baldwin.byte 102,72,15,126,217 3058bc3d5698SJohn Baldwin movq %r10,80(%rdi) 3059bc3d5698SJohn Baldwin movq %r11,88(%rdi) 3060bc3d5698SJohn Baldwin movq %r12,96(%rdi) 3061bc3d5698SJohn Baldwin movq %r13,104(%rdi) 3062bc3d5698SJohn Baldwin movq %r14,112(%rdi) 3063bc3d5698SJohn Baldwin leaq 48+8(%rsp),%rdi 3064bc3d5698SJohn Baldwin movq (%rsi,%rcx,1),%rdx 3065bc3d5698SJohn Baldwin 3066bc3d5698SJohn Baldwin movq 8(%rdi),%r11 3067bc3d5698SJohn Baldwin xorq %r10,%r10 3068bc3d5698SJohn Baldwin movq 0+8(%rsp),%r9 3069bc3d5698SJohn Baldwin adoxq %r11,%r11 3070bc3d5698SJohn Baldwin movq 16(%rdi),%r12 3071bc3d5698SJohn Baldwin movq 24(%rdi),%r13 3072bc3d5698SJohn Baldwin 3073bc3d5698SJohn Baldwin 3074bc3d5698SJohn Baldwin.align 32 3075bc3d5698SJohn Baldwin.Lsqrx4x_shift_n_add: 3076bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rbx 3077bc3d5698SJohn Baldwin adoxq %r12,%r12 3078bc3d5698SJohn Baldwin adcxq %r10,%rax 3079bc3d5698SJohn Baldwin.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3080bc3d5698SJohn Baldwin.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3081bc3d5698SJohn Baldwin adoxq %r13,%r13 3082bc3d5698SJohn Baldwin adcxq %r11,%rbx 3083bc3d5698SJohn Baldwin movq 40(%rdi),%r11 3084bc3d5698SJohn Baldwin movq %rax,0(%rdi) 3085bc3d5698SJohn Baldwin movq %rbx,8(%rdi) 3086bc3d5698SJohn Baldwin 3087bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rbx 3088bc3d5698SJohn Baldwin adoxq %r10,%r10 3089bc3d5698SJohn Baldwin adcxq %r12,%rax 3090bc3d5698SJohn Baldwin movq 16(%rsi,%rcx,1),%rdx 3091bc3d5698SJohn Baldwin movq 48(%rdi),%r12 3092bc3d5698SJohn Baldwin adoxq %r11,%r11 3093bc3d5698SJohn Baldwin adcxq %r13,%rbx 3094bc3d5698SJohn Baldwin movq 56(%rdi),%r13 3095bc3d5698SJohn Baldwin movq %rax,16(%rdi) 3096bc3d5698SJohn Baldwin movq %rbx,24(%rdi) 3097bc3d5698SJohn Baldwin 3098bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rbx 3099bc3d5698SJohn Baldwin adoxq %r12,%r12 3100bc3d5698SJohn Baldwin adcxq %r10,%rax 3101bc3d5698SJohn Baldwin movq 24(%rsi,%rcx,1),%rdx 3102bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 3103bc3d5698SJohn Baldwin movq 64(%rdi),%r10 3104bc3d5698SJohn Baldwin adoxq %r13,%r13 3105bc3d5698SJohn Baldwin adcxq %r11,%rbx 3106bc3d5698SJohn Baldwin movq 72(%rdi),%r11 3107bc3d5698SJohn Baldwin movq %rax,32(%rdi) 3108bc3d5698SJohn Baldwin movq %rbx,40(%rdi) 3109bc3d5698SJohn Baldwin 3110bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rbx 3111bc3d5698SJohn Baldwin adoxq %r10,%r10 3112bc3d5698SJohn Baldwin adcxq %r12,%rax 3113bc3d5698SJohn Baldwin jrcxz .Lsqrx4x_shift_n_add_break 3114bc3d5698SJohn Baldwin.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3115bc3d5698SJohn Baldwin adoxq %r11,%r11 3116bc3d5698SJohn Baldwin adcxq %r13,%rbx 3117bc3d5698SJohn Baldwin movq 80(%rdi),%r12 3118bc3d5698SJohn Baldwin movq 88(%rdi),%r13 3119bc3d5698SJohn Baldwin movq %rax,48(%rdi) 3120bc3d5698SJohn Baldwin movq %rbx,56(%rdi) 3121bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3122bc3d5698SJohn Baldwin nop 3123bc3d5698SJohn Baldwin jmp .Lsqrx4x_shift_n_add 3124bc3d5698SJohn Baldwin 3125bc3d5698SJohn Baldwin.align 32 3126bc3d5698SJohn Baldwin.Lsqrx4x_shift_n_add_break: 3127bc3d5698SJohn Baldwin adcxq %r13,%rbx 3128bc3d5698SJohn Baldwin movq %rax,48(%rdi) 3129bc3d5698SJohn Baldwin movq %rbx,56(%rdi) 3130bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3131bc3d5698SJohn Baldwin.byte 102,72,15,126,213 3132bc3d5698SJohn Baldwin__bn_sqrx8x_reduction: 3133bc3d5698SJohn Baldwin xorl %eax,%eax 3134bc3d5698SJohn Baldwin movq 32+8(%rsp),%rbx 3135bc3d5698SJohn Baldwin movq 48+8(%rsp),%rdx 3136bc3d5698SJohn Baldwin leaq -64(%rbp,%r9,1),%rcx 3137bc3d5698SJohn Baldwin 3138bc3d5698SJohn Baldwin movq %rcx,0+8(%rsp) 3139bc3d5698SJohn Baldwin movq %rdi,8+8(%rsp) 3140bc3d5698SJohn Baldwin 3141bc3d5698SJohn Baldwin leaq 48+8(%rsp),%rdi 3142bc3d5698SJohn Baldwin jmp .Lsqrx8x_reduction_loop 3143bc3d5698SJohn Baldwin 3144bc3d5698SJohn Baldwin.align 32 3145bc3d5698SJohn Baldwin.Lsqrx8x_reduction_loop: 3146bc3d5698SJohn Baldwin movq 8(%rdi),%r9 3147bc3d5698SJohn Baldwin movq 16(%rdi),%r10 3148bc3d5698SJohn Baldwin movq 24(%rdi),%r11 3149bc3d5698SJohn Baldwin movq 32(%rdi),%r12 3150bc3d5698SJohn Baldwin movq %rdx,%r8 3151bc3d5698SJohn Baldwin imulq %rbx,%rdx 3152bc3d5698SJohn Baldwin movq 40(%rdi),%r13 3153bc3d5698SJohn Baldwin movq 48(%rdi),%r14 3154bc3d5698SJohn Baldwin movq 56(%rdi),%r15 3155bc3d5698SJohn Baldwin movq %rax,24+8(%rsp) 3156bc3d5698SJohn Baldwin 3157bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3158bc3d5698SJohn Baldwin xorq %rsi,%rsi 3159bc3d5698SJohn Baldwin movq $-8,%rcx 3160bc3d5698SJohn Baldwin jmp .Lsqrx8x_reduce 3161bc3d5698SJohn Baldwin 3162bc3d5698SJohn Baldwin.align 32 3163bc3d5698SJohn Baldwin.Lsqrx8x_reduce: 3164bc3d5698SJohn Baldwin movq %r8,%rbx 3165bc3d5698SJohn Baldwin mulxq 0(%rbp),%rax,%r8 3166bc3d5698SJohn Baldwin adcxq %rbx,%rax 3167bc3d5698SJohn Baldwin adoxq %r9,%r8 3168bc3d5698SJohn Baldwin 3169bc3d5698SJohn Baldwin mulxq 8(%rbp),%rbx,%r9 3170bc3d5698SJohn Baldwin adcxq %rbx,%r8 3171bc3d5698SJohn Baldwin adoxq %r10,%r9 3172bc3d5698SJohn Baldwin 3173bc3d5698SJohn Baldwin mulxq 16(%rbp),%rbx,%r10 3174bc3d5698SJohn Baldwin adcxq %rbx,%r9 3175bc3d5698SJohn Baldwin adoxq %r11,%r10 3176bc3d5698SJohn Baldwin 3177bc3d5698SJohn Baldwin mulxq 24(%rbp),%rbx,%r11 3178bc3d5698SJohn Baldwin adcxq %rbx,%r10 3179bc3d5698SJohn Baldwin adoxq %r12,%r11 3180bc3d5698SJohn Baldwin 3181bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3182bc3d5698SJohn Baldwin movq %rdx,%rax 3183bc3d5698SJohn Baldwin movq %r8,%rdx 3184bc3d5698SJohn Baldwin adcxq %rbx,%r11 3185bc3d5698SJohn Baldwin adoxq %r13,%r12 3186bc3d5698SJohn Baldwin 3187bc3d5698SJohn Baldwin mulxq 32+8(%rsp),%rbx,%rdx 3188bc3d5698SJohn Baldwin movq %rax,%rdx 3189bc3d5698SJohn Baldwin movq %rax,64+48+8(%rsp,%rcx,8) 3190bc3d5698SJohn Baldwin 3191bc3d5698SJohn Baldwin mulxq 40(%rbp),%rax,%r13 3192bc3d5698SJohn Baldwin adcxq %rax,%r12 3193bc3d5698SJohn Baldwin adoxq %r14,%r13 3194bc3d5698SJohn Baldwin 3195bc3d5698SJohn Baldwin mulxq 48(%rbp),%rax,%r14 3196bc3d5698SJohn Baldwin adcxq %rax,%r13 3197bc3d5698SJohn Baldwin adoxq %r15,%r14 3198bc3d5698SJohn Baldwin 3199bc3d5698SJohn Baldwin mulxq 56(%rbp),%rax,%r15 3200bc3d5698SJohn Baldwin movq %rbx,%rdx 3201bc3d5698SJohn Baldwin adcxq %rax,%r14 3202bc3d5698SJohn Baldwin adoxq %rsi,%r15 3203bc3d5698SJohn Baldwin adcxq %rsi,%r15 3204bc3d5698SJohn Baldwin 3205bc3d5698SJohn Baldwin.byte 0x67,0x67,0x67 3206bc3d5698SJohn Baldwin incq %rcx 3207bc3d5698SJohn Baldwin jnz .Lsqrx8x_reduce 3208bc3d5698SJohn Baldwin 3209bc3d5698SJohn Baldwin movq %rsi,%rax 3210bc3d5698SJohn Baldwin cmpq 0+8(%rsp),%rbp 3211bc3d5698SJohn Baldwin jae .Lsqrx8x_no_tail 3212bc3d5698SJohn Baldwin 3213bc3d5698SJohn Baldwin movq 48+8(%rsp),%rdx 3214bc3d5698SJohn Baldwin addq 0(%rdi),%r8 3215bc3d5698SJohn Baldwin leaq 64(%rbp),%rbp 3216bc3d5698SJohn Baldwin movq $-8,%rcx 3217bc3d5698SJohn Baldwin adcxq 8(%rdi),%r9 3218bc3d5698SJohn Baldwin adcxq 16(%rdi),%r10 3219bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 3220bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 3221bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 3222bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 3223bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 3224bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3225bc3d5698SJohn Baldwin sbbq %rax,%rax 3226bc3d5698SJohn Baldwin 3227bc3d5698SJohn Baldwin xorq %rsi,%rsi 3228bc3d5698SJohn Baldwin movq %rax,16+8(%rsp) 3229bc3d5698SJohn Baldwin jmp .Lsqrx8x_tail 3230bc3d5698SJohn Baldwin 3231bc3d5698SJohn Baldwin.align 32 3232bc3d5698SJohn Baldwin.Lsqrx8x_tail: 3233bc3d5698SJohn Baldwin movq %r8,%rbx 3234bc3d5698SJohn Baldwin mulxq 0(%rbp),%rax,%r8 3235bc3d5698SJohn Baldwin adcxq %rax,%rbx 3236bc3d5698SJohn Baldwin adoxq %r9,%r8 3237bc3d5698SJohn Baldwin 3238bc3d5698SJohn Baldwin mulxq 8(%rbp),%rax,%r9 3239bc3d5698SJohn Baldwin adcxq %rax,%r8 3240bc3d5698SJohn Baldwin adoxq %r10,%r9 3241bc3d5698SJohn Baldwin 3242bc3d5698SJohn Baldwin mulxq 16(%rbp),%rax,%r10 3243bc3d5698SJohn Baldwin adcxq %rax,%r9 3244bc3d5698SJohn Baldwin adoxq %r11,%r10 3245bc3d5698SJohn Baldwin 3246bc3d5698SJohn Baldwin mulxq 24(%rbp),%rax,%r11 3247bc3d5698SJohn Baldwin adcxq %rax,%r10 3248bc3d5698SJohn Baldwin adoxq %r12,%r11 3249bc3d5698SJohn Baldwin 3250bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3251bc3d5698SJohn Baldwin adcxq %rax,%r11 3252bc3d5698SJohn Baldwin adoxq %r13,%r12 3253bc3d5698SJohn Baldwin 3254bc3d5698SJohn Baldwin mulxq 40(%rbp),%rax,%r13 3255bc3d5698SJohn Baldwin adcxq %rax,%r12 3256bc3d5698SJohn Baldwin adoxq %r14,%r13 3257bc3d5698SJohn Baldwin 3258bc3d5698SJohn Baldwin mulxq 48(%rbp),%rax,%r14 3259bc3d5698SJohn Baldwin adcxq %rax,%r13 3260bc3d5698SJohn Baldwin adoxq %r15,%r14 3261bc3d5698SJohn Baldwin 3262bc3d5698SJohn Baldwin mulxq 56(%rbp),%rax,%r15 3263bc3d5698SJohn Baldwin movq 72+48+8(%rsp,%rcx,8),%rdx 3264bc3d5698SJohn Baldwin adcxq %rax,%r14 3265bc3d5698SJohn Baldwin adoxq %rsi,%r15 3266bc3d5698SJohn Baldwin movq %rbx,(%rdi,%rcx,8) 3267bc3d5698SJohn Baldwin movq %r8,%rbx 3268bc3d5698SJohn Baldwin adcxq %rsi,%r15 3269bc3d5698SJohn Baldwin 3270bc3d5698SJohn Baldwin incq %rcx 3271bc3d5698SJohn Baldwin jnz .Lsqrx8x_tail 3272bc3d5698SJohn Baldwin 3273bc3d5698SJohn Baldwin cmpq 0+8(%rsp),%rbp 3274bc3d5698SJohn Baldwin jae .Lsqrx8x_tail_done 3275bc3d5698SJohn Baldwin 3276bc3d5698SJohn Baldwin subq 16+8(%rsp),%rsi 3277bc3d5698SJohn Baldwin movq 48+8(%rsp),%rdx 3278bc3d5698SJohn Baldwin leaq 64(%rbp),%rbp 3279bc3d5698SJohn Baldwin adcq 0(%rdi),%r8 3280bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 3281bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 3282bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 3283bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 3284bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 3285bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 3286bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 3287bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 3288bc3d5698SJohn Baldwin sbbq %rax,%rax 3289bc3d5698SJohn Baldwin subq $8,%rcx 3290bc3d5698SJohn Baldwin 3291bc3d5698SJohn Baldwin xorq %rsi,%rsi 3292bc3d5698SJohn Baldwin movq %rax,16+8(%rsp) 3293bc3d5698SJohn Baldwin jmp .Lsqrx8x_tail 3294bc3d5698SJohn Baldwin 3295bc3d5698SJohn Baldwin.align 32 3296bc3d5698SJohn Baldwin.Lsqrx8x_tail_done: 3297bc3d5698SJohn Baldwin xorq %rax,%rax 3298bc3d5698SJohn Baldwin addq 24+8(%rsp),%r8 3299bc3d5698SJohn Baldwin adcq $0,%r9 3300bc3d5698SJohn Baldwin adcq $0,%r10 3301bc3d5698SJohn Baldwin adcq $0,%r11 3302bc3d5698SJohn Baldwin adcq $0,%r12 3303bc3d5698SJohn Baldwin adcq $0,%r13 3304bc3d5698SJohn Baldwin adcq $0,%r14 3305bc3d5698SJohn Baldwin adcq $0,%r15 3306bc3d5698SJohn Baldwin adcq $0,%rax 3307bc3d5698SJohn Baldwin 3308bc3d5698SJohn Baldwin subq 16+8(%rsp),%rsi 3309bc3d5698SJohn Baldwin.Lsqrx8x_no_tail: 3310bc3d5698SJohn Baldwin adcq 0(%rdi),%r8 3311bc3d5698SJohn Baldwin.byte 102,72,15,126,217 3312bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 3313bc3d5698SJohn Baldwin movq 56(%rbp),%rsi 3314bc3d5698SJohn Baldwin.byte 102,72,15,126,213 3315bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 3316bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 3317bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 3318bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 3319bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 3320bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 3321bc3d5698SJohn Baldwin adcq $0,%rax 3322bc3d5698SJohn Baldwin 3323bc3d5698SJohn Baldwin movq 32+8(%rsp),%rbx 3324bc3d5698SJohn Baldwin movq 64(%rdi,%rcx,1),%rdx 3325bc3d5698SJohn Baldwin 3326bc3d5698SJohn Baldwin movq %r8,0(%rdi) 3327bc3d5698SJohn Baldwin leaq 64(%rdi),%r8 3328bc3d5698SJohn Baldwin movq %r9,8(%rdi) 3329bc3d5698SJohn Baldwin movq %r10,16(%rdi) 3330bc3d5698SJohn Baldwin movq %r11,24(%rdi) 3331bc3d5698SJohn Baldwin movq %r12,32(%rdi) 3332bc3d5698SJohn Baldwin movq %r13,40(%rdi) 3333bc3d5698SJohn Baldwin movq %r14,48(%rdi) 3334bc3d5698SJohn Baldwin movq %r15,56(%rdi) 3335bc3d5698SJohn Baldwin 3336bc3d5698SJohn Baldwin leaq 64(%rdi,%rcx,1),%rdi 3337bc3d5698SJohn Baldwin cmpq 8+8(%rsp),%r8 3338bc3d5698SJohn Baldwin jb .Lsqrx8x_reduction_loop 3339bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3340bc3d5698SJohn Baldwin.cfi_endproc 3341bc3d5698SJohn Baldwin.size bn_sqrx8x_internal,.-bn_sqrx8x_internal 3342bc3d5698SJohn Baldwin.align 32 3343bc3d5698SJohn Baldwin__bn_postx4x_internal: 3344bc3d5698SJohn Baldwin.cfi_startproc 3345bc3d5698SJohn Baldwin movq 0(%rbp),%r12 3346bc3d5698SJohn Baldwin movq %rcx,%r10 3347bc3d5698SJohn Baldwin movq %rcx,%r9 3348bc3d5698SJohn Baldwin negq %rax 3349bc3d5698SJohn Baldwin sarq $3+2,%rcx 3350bc3d5698SJohn Baldwin 3351bc3d5698SJohn Baldwin.byte 102,72,15,126,202 3352bc3d5698SJohn Baldwin.byte 102,72,15,126,206 3353bc3d5698SJohn Baldwin decq %r12 3354bc3d5698SJohn Baldwin movq 8(%rbp),%r13 3355bc3d5698SJohn Baldwin xorq %r8,%r8 3356bc3d5698SJohn Baldwin movq 16(%rbp),%r14 3357bc3d5698SJohn Baldwin movq 24(%rbp),%r15 3358bc3d5698SJohn Baldwin jmp .Lsqrx4x_sub_entry 3359bc3d5698SJohn Baldwin 3360bc3d5698SJohn Baldwin.align 16 3361bc3d5698SJohn Baldwin.Lsqrx4x_sub: 3362bc3d5698SJohn Baldwin movq 0(%rbp),%r12 3363bc3d5698SJohn Baldwin movq 8(%rbp),%r13 3364bc3d5698SJohn Baldwin movq 16(%rbp),%r14 3365bc3d5698SJohn Baldwin movq 24(%rbp),%r15 3366bc3d5698SJohn Baldwin.Lsqrx4x_sub_entry: 3367bc3d5698SJohn Baldwin andnq %rax,%r12,%r12 3368bc3d5698SJohn Baldwin leaq 32(%rbp),%rbp 3369bc3d5698SJohn Baldwin andnq %rax,%r13,%r13 3370bc3d5698SJohn Baldwin andnq %rax,%r14,%r14 3371bc3d5698SJohn Baldwin andnq %rax,%r15,%r15 3372bc3d5698SJohn Baldwin 3373bc3d5698SJohn Baldwin negq %r8 3374bc3d5698SJohn Baldwin adcq 0(%rdi),%r12 3375bc3d5698SJohn Baldwin adcq 8(%rdi),%r13 3376bc3d5698SJohn Baldwin adcq 16(%rdi),%r14 3377bc3d5698SJohn Baldwin adcq 24(%rdi),%r15 3378bc3d5698SJohn Baldwin movq %r12,0(%rdx) 3379bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 3380bc3d5698SJohn Baldwin movq %r13,8(%rdx) 3381bc3d5698SJohn Baldwin sbbq %r8,%r8 3382bc3d5698SJohn Baldwin movq %r14,16(%rdx) 3383bc3d5698SJohn Baldwin movq %r15,24(%rdx) 3384bc3d5698SJohn Baldwin leaq 32(%rdx),%rdx 3385bc3d5698SJohn Baldwin 3386bc3d5698SJohn Baldwin incq %rcx 3387bc3d5698SJohn Baldwin jnz .Lsqrx4x_sub 3388bc3d5698SJohn Baldwin 3389bc3d5698SJohn Baldwin negq %r9 3390bc3d5698SJohn Baldwin 3391bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3392bc3d5698SJohn Baldwin.cfi_endproc 3393bc3d5698SJohn Baldwin.size __bn_postx4x_internal,.-__bn_postx4x_internal 3394bc3d5698SJohn Baldwin.globl bn_get_bits5 3395bc3d5698SJohn Baldwin.type bn_get_bits5,@function 3396bc3d5698SJohn Baldwin.align 16 3397bc3d5698SJohn Baldwinbn_get_bits5: 3398bc3d5698SJohn Baldwin.cfi_startproc 3399bc3d5698SJohn Baldwin leaq 0(%rdi),%r10 3400bc3d5698SJohn Baldwin leaq 1(%rdi),%r11 3401bc3d5698SJohn Baldwin movl %esi,%ecx 3402bc3d5698SJohn Baldwin shrl $4,%esi 3403bc3d5698SJohn Baldwin andl $15,%ecx 3404bc3d5698SJohn Baldwin leal -8(%rcx),%eax 3405bc3d5698SJohn Baldwin cmpl $11,%ecx 3406bc3d5698SJohn Baldwin cmovaq %r11,%r10 3407bc3d5698SJohn Baldwin cmoval %eax,%ecx 3408bc3d5698SJohn Baldwin movzwl (%r10,%rsi,2),%eax 3409bc3d5698SJohn Baldwin shrl %cl,%eax 3410bc3d5698SJohn Baldwin andl $31,%eax 3411bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3412bc3d5698SJohn Baldwin.cfi_endproc 3413bc3d5698SJohn Baldwin.size bn_get_bits5,.-bn_get_bits5 3414bc3d5698SJohn Baldwin 3415bc3d5698SJohn Baldwin.globl bn_scatter5 3416bc3d5698SJohn Baldwin.type bn_scatter5,@function 3417bc3d5698SJohn Baldwin.align 16 3418bc3d5698SJohn Baldwinbn_scatter5: 3419bc3d5698SJohn Baldwin.cfi_startproc 3420bc3d5698SJohn Baldwin cmpl $0,%esi 3421bc3d5698SJohn Baldwin jz .Lscatter_epilogue 3422bc3d5698SJohn Baldwin leaq (%rdx,%rcx,8),%rdx 3423bc3d5698SJohn Baldwin.Lscatter: 3424bc3d5698SJohn Baldwin movq (%rdi),%rax 3425bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 3426bc3d5698SJohn Baldwin movq %rax,(%rdx) 3427bc3d5698SJohn Baldwin leaq 256(%rdx),%rdx 3428bc3d5698SJohn Baldwin subl $1,%esi 3429bc3d5698SJohn Baldwin jnz .Lscatter 3430bc3d5698SJohn Baldwin.Lscatter_epilogue: 3431bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3432bc3d5698SJohn Baldwin.cfi_endproc 3433bc3d5698SJohn Baldwin.size bn_scatter5,.-bn_scatter5 3434bc3d5698SJohn Baldwin 3435bc3d5698SJohn Baldwin.globl bn_gather5 3436bc3d5698SJohn Baldwin.type bn_gather5,@function 3437bc3d5698SJohn Baldwin.align 32 3438bc3d5698SJohn Baldwinbn_gather5: 3439bc3d5698SJohn Baldwin.LSEH_begin_bn_gather5: 3440bc3d5698SJohn Baldwin.cfi_startproc 3441bc3d5698SJohn Baldwin 3442bc3d5698SJohn Baldwin.byte 0x4c,0x8d,0x14,0x24 3443bc3d5698SJohn Baldwin.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3444bc3d5698SJohn Baldwin leaq .Linc(%rip),%rax 3445bc3d5698SJohn Baldwin andq $-16,%rsp 3446bc3d5698SJohn Baldwin 3447bc3d5698SJohn Baldwin movd %ecx,%xmm5 3448bc3d5698SJohn Baldwin movdqa 0(%rax),%xmm0 3449bc3d5698SJohn Baldwin movdqa 16(%rax),%xmm1 3450bc3d5698SJohn Baldwin leaq 128(%rdx),%r11 3451bc3d5698SJohn Baldwin leaq 128(%rsp),%rax 3452bc3d5698SJohn Baldwin 3453bc3d5698SJohn Baldwin pshufd $0,%xmm5,%xmm5 3454bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 3455bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 3456bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 3457bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 3458bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 3459bc3d5698SJohn Baldwin 3460bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 3461bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 3462bc3d5698SJohn Baldwin movdqa %xmm0,-128(%rax) 3463bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 3464bc3d5698SJohn Baldwin 3465bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 3466bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 3467bc3d5698SJohn Baldwin movdqa %xmm1,-112(%rax) 3468bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 3469bc3d5698SJohn Baldwin 3470bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 3471bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 3472bc3d5698SJohn Baldwin movdqa %xmm2,-96(%rax) 3473bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 3474bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 3475bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 3476bc3d5698SJohn Baldwin movdqa %xmm3,-80(%rax) 3477bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 3478bc3d5698SJohn Baldwin 3479bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 3480bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 3481bc3d5698SJohn Baldwin movdqa %xmm0,-64(%rax) 3482bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 3483bc3d5698SJohn Baldwin 3484bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 3485bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 3486bc3d5698SJohn Baldwin movdqa %xmm1,-48(%rax) 3487bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 3488bc3d5698SJohn Baldwin 3489bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 3490bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 3491bc3d5698SJohn Baldwin movdqa %xmm2,-32(%rax) 3492bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 3493bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 3494bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 3495bc3d5698SJohn Baldwin movdqa %xmm3,-16(%rax) 3496bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 3497bc3d5698SJohn Baldwin 3498bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 3499bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 3500bc3d5698SJohn Baldwin movdqa %xmm0,0(%rax) 3501bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 3502bc3d5698SJohn Baldwin 3503bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 3504bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 3505bc3d5698SJohn Baldwin movdqa %xmm1,16(%rax) 3506bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 3507bc3d5698SJohn Baldwin 3508bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 3509bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 3510bc3d5698SJohn Baldwin movdqa %xmm2,32(%rax) 3511bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 3512bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 3513bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm0 3514bc3d5698SJohn Baldwin movdqa %xmm3,48(%rax) 3515bc3d5698SJohn Baldwin movdqa %xmm4,%xmm3 3516bc3d5698SJohn Baldwin 3517bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 3518bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm1 3519bc3d5698SJohn Baldwin movdqa %xmm0,64(%rax) 3520bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 3521bc3d5698SJohn Baldwin 3522bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 3523bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm2 3524bc3d5698SJohn Baldwin movdqa %xmm1,80(%rax) 3525bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 3526bc3d5698SJohn Baldwin 3527bc3d5698SJohn Baldwin paddd %xmm3,%xmm0 3528bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm3 3529bc3d5698SJohn Baldwin movdqa %xmm2,96(%rax) 3530bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 3531bc3d5698SJohn Baldwin movdqa %xmm3,112(%rax) 3532bc3d5698SJohn Baldwin jmp .Lgather 3533bc3d5698SJohn Baldwin 3534bc3d5698SJohn Baldwin.align 32 3535bc3d5698SJohn Baldwin.Lgather: 3536bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3537bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3538bc3d5698SJohn Baldwin movdqa -128(%r11),%xmm0 3539bc3d5698SJohn Baldwin movdqa -112(%r11),%xmm1 3540bc3d5698SJohn Baldwin movdqa -96(%r11),%xmm2 3541bc3d5698SJohn Baldwin pand -128(%rax),%xmm0 3542bc3d5698SJohn Baldwin movdqa -80(%r11),%xmm3 3543bc3d5698SJohn Baldwin pand -112(%rax),%xmm1 3544bc3d5698SJohn Baldwin por %xmm0,%xmm4 3545bc3d5698SJohn Baldwin pand -96(%rax),%xmm2 3546bc3d5698SJohn Baldwin por %xmm1,%xmm5 3547bc3d5698SJohn Baldwin pand -80(%rax),%xmm3 3548bc3d5698SJohn Baldwin por %xmm2,%xmm4 3549bc3d5698SJohn Baldwin por %xmm3,%xmm5 3550bc3d5698SJohn Baldwin movdqa -64(%r11),%xmm0 3551bc3d5698SJohn Baldwin movdqa -48(%r11),%xmm1 3552bc3d5698SJohn Baldwin movdqa -32(%r11),%xmm2 3553bc3d5698SJohn Baldwin pand -64(%rax),%xmm0 3554bc3d5698SJohn Baldwin movdqa -16(%r11),%xmm3 3555bc3d5698SJohn Baldwin pand -48(%rax),%xmm1 3556bc3d5698SJohn Baldwin por %xmm0,%xmm4 3557bc3d5698SJohn Baldwin pand -32(%rax),%xmm2 3558bc3d5698SJohn Baldwin por %xmm1,%xmm5 3559bc3d5698SJohn Baldwin pand -16(%rax),%xmm3 3560bc3d5698SJohn Baldwin por %xmm2,%xmm4 3561bc3d5698SJohn Baldwin por %xmm3,%xmm5 3562bc3d5698SJohn Baldwin movdqa 0(%r11),%xmm0 3563bc3d5698SJohn Baldwin movdqa 16(%r11),%xmm1 3564bc3d5698SJohn Baldwin movdqa 32(%r11),%xmm2 3565bc3d5698SJohn Baldwin pand 0(%rax),%xmm0 3566bc3d5698SJohn Baldwin movdqa 48(%r11),%xmm3 3567bc3d5698SJohn Baldwin pand 16(%rax),%xmm1 3568bc3d5698SJohn Baldwin por %xmm0,%xmm4 3569bc3d5698SJohn Baldwin pand 32(%rax),%xmm2 3570bc3d5698SJohn Baldwin por %xmm1,%xmm5 3571bc3d5698SJohn Baldwin pand 48(%rax),%xmm3 3572bc3d5698SJohn Baldwin por %xmm2,%xmm4 3573bc3d5698SJohn Baldwin por %xmm3,%xmm5 3574bc3d5698SJohn Baldwin movdqa 64(%r11),%xmm0 3575bc3d5698SJohn Baldwin movdqa 80(%r11),%xmm1 3576bc3d5698SJohn Baldwin movdqa 96(%r11),%xmm2 3577bc3d5698SJohn Baldwin pand 64(%rax),%xmm0 3578bc3d5698SJohn Baldwin movdqa 112(%r11),%xmm3 3579bc3d5698SJohn Baldwin pand 80(%rax),%xmm1 3580bc3d5698SJohn Baldwin por %xmm0,%xmm4 3581bc3d5698SJohn Baldwin pand 96(%rax),%xmm2 3582bc3d5698SJohn Baldwin por %xmm1,%xmm5 3583bc3d5698SJohn Baldwin pand 112(%rax),%xmm3 3584bc3d5698SJohn Baldwin por %xmm2,%xmm4 3585bc3d5698SJohn Baldwin por %xmm3,%xmm5 3586bc3d5698SJohn Baldwin por %xmm5,%xmm4 3587bc3d5698SJohn Baldwin leaq 256(%r11),%r11 3588bc3d5698SJohn Baldwin pshufd $0x4e,%xmm4,%xmm0 3589bc3d5698SJohn Baldwin por %xmm4,%xmm0 3590bc3d5698SJohn Baldwin movq %xmm0,(%rdi) 3591bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 3592bc3d5698SJohn Baldwin subl $1,%esi 3593bc3d5698SJohn Baldwin jnz .Lgather 3594bc3d5698SJohn Baldwin 3595bc3d5698SJohn Baldwin leaq (%r10),%rsp 3596bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3597bc3d5698SJohn Baldwin.LSEH_end_bn_gather5: 3598bc3d5698SJohn Baldwin.cfi_endproc 3599bc3d5698SJohn Baldwin.size bn_gather5,.-bn_gather5 3600bc3d5698SJohn Baldwin.align 64 3601bc3d5698SJohn Baldwin.Linc: 3602bc3d5698SJohn Baldwin.long 0,0, 1,1 3603bc3d5698SJohn Baldwin.long 2,2, 2,2 3604bc3d5698SJohn Baldwin.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3605*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 3606*c0855eaaSJohn Baldwin .p2align 3 3607*c0855eaaSJohn Baldwin .long 1f - 0f 3608*c0855eaaSJohn Baldwin .long 4f - 1f 3609*c0855eaaSJohn Baldwin .long 5 3610*c0855eaaSJohn Baldwin0: 3611*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 3612*c0855eaaSJohn Baldwin # on Solaris. 3613*c0855eaaSJohn Baldwin .byte 0x47 3614*c0855eaaSJohn Baldwin .byte 0x4e 3615*c0855eaaSJohn Baldwin .byte 0x55 3616*c0855eaaSJohn Baldwin .byte 0 3617*c0855eaaSJohn Baldwin1: 3618*c0855eaaSJohn Baldwin .p2align 3 3619*c0855eaaSJohn Baldwin .long 0xc0000002 3620*c0855eaaSJohn Baldwin .long 3f - 2f 3621*c0855eaaSJohn Baldwin2: 3622*c0855eaaSJohn Baldwin .long 3 3623*c0855eaaSJohn Baldwin3: 3624*c0855eaaSJohn Baldwin .p2align 3 3625*c0855eaaSJohn Baldwin4: 3626