1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from x86_64-mont.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.globl bn_mul_mont 7bc3d5698SJohn Baldwin.type bn_mul_mont,@function 8bc3d5698SJohn Baldwin.align 16 9bc3d5698SJohn Baldwinbn_mul_mont: 10bc3d5698SJohn Baldwin.cfi_startproc 11bc3d5698SJohn Baldwin movl %r9d,%r9d 12bc3d5698SJohn Baldwin movq %rsp,%rax 13bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 14bc3d5698SJohn Baldwin testl $3,%r9d 15bc3d5698SJohn Baldwin jnz .Lmul_enter 16bc3d5698SJohn Baldwin cmpl $8,%r9d 17bc3d5698SJohn Baldwin jb .Lmul_enter 18bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%r11d 19bc3d5698SJohn Baldwin cmpq %rsi,%rdx 20bc3d5698SJohn Baldwin jne .Lmul4x_enter 21bc3d5698SJohn Baldwin testl $7,%r9d 22bc3d5698SJohn Baldwin jz .Lsqr8x_enter 23bc3d5698SJohn Baldwin jmp .Lmul4x_enter 24bc3d5698SJohn Baldwin 25bc3d5698SJohn Baldwin.align 16 26bc3d5698SJohn Baldwin.Lmul_enter: 27bc3d5698SJohn Baldwin pushq %rbx 28bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 29bc3d5698SJohn Baldwin pushq %rbp 30bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 31bc3d5698SJohn Baldwin pushq %r12 32bc3d5698SJohn Baldwin.cfi_offset %r12,-32 33bc3d5698SJohn Baldwin pushq %r13 34bc3d5698SJohn Baldwin.cfi_offset %r13,-40 35bc3d5698SJohn Baldwin pushq %r14 36bc3d5698SJohn Baldwin.cfi_offset %r14,-48 37bc3d5698SJohn Baldwin pushq %r15 38bc3d5698SJohn Baldwin.cfi_offset %r15,-56 39bc3d5698SJohn Baldwin 40bc3d5698SJohn Baldwin negq %r9 41bc3d5698SJohn Baldwin movq %rsp,%r11 42bc3d5698SJohn Baldwin leaq -16(%rsp,%r9,8),%r10 43bc3d5698SJohn Baldwin negq %r9 44bc3d5698SJohn Baldwin andq $-1024,%r10 45bc3d5698SJohn Baldwin 46bc3d5698SJohn Baldwin 47bc3d5698SJohn Baldwin 48bc3d5698SJohn Baldwin 49bc3d5698SJohn Baldwin 50bc3d5698SJohn Baldwin 51bc3d5698SJohn Baldwin 52bc3d5698SJohn Baldwin 53bc3d5698SJohn Baldwin 54bc3d5698SJohn Baldwin subq %r10,%r11 55bc3d5698SJohn Baldwin andq $-4096,%r11 56bc3d5698SJohn Baldwin leaq (%r10,%r11,1),%rsp 57bc3d5698SJohn Baldwin movq (%rsp),%r11 58bc3d5698SJohn Baldwin cmpq %r10,%rsp 59bc3d5698SJohn Baldwin ja .Lmul_page_walk 60bc3d5698SJohn Baldwin jmp .Lmul_page_walk_done 61bc3d5698SJohn Baldwin 62bc3d5698SJohn Baldwin.align 16 63bc3d5698SJohn Baldwin.Lmul_page_walk: 64bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 65bc3d5698SJohn Baldwin movq (%rsp),%r11 66bc3d5698SJohn Baldwin cmpq %r10,%rsp 67bc3d5698SJohn Baldwin ja .Lmul_page_walk 68bc3d5698SJohn Baldwin.Lmul_page_walk_done: 69bc3d5698SJohn Baldwin 70bc3d5698SJohn Baldwin movq %rax,8(%rsp,%r9,8) 71bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 72bc3d5698SJohn Baldwin.Lmul_body: 73bc3d5698SJohn Baldwin movq %rdx,%r12 74bc3d5698SJohn Baldwin movq (%r8),%r8 75bc3d5698SJohn Baldwin movq (%r12),%rbx 76bc3d5698SJohn Baldwin movq (%rsi),%rax 77bc3d5698SJohn Baldwin 78bc3d5698SJohn Baldwin xorq %r14,%r14 79bc3d5698SJohn Baldwin xorq %r15,%r15 80bc3d5698SJohn Baldwin 81bc3d5698SJohn Baldwin movq %r8,%rbp 82bc3d5698SJohn Baldwin mulq %rbx 83bc3d5698SJohn Baldwin movq %rax,%r10 84bc3d5698SJohn Baldwin movq (%rcx),%rax 85bc3d5698SJohn Baldwin 86bc3d5698SJohn Baldwin imulq %r10,%rbp 87bc3d5698SJohn Baldwin movq %rdx,%r11 88bc3d5698SJohn Baldwin 89bc3d5698SJohn Baldwin mulq %rbp 90bc3d5698SJohn Baldwin addq %rax,%r10 91bc3d5698SJohn Baldwin movq 8(%rsi),%rax 92bc3d5698SJohn Baldwin adcq $0,%rdx 93bc3d5698SJohn Baldwin movq %rdx,%r13 94bc3d5698SJohn Baldwin 95bc3d5698SJohn Baldwin leaq 1(%r15),%r15 96bc3d5698SJohn Baldwin jmp .L1st_enter 97bc3d5698SJohn Baldwin 98bc3d5698SJohn Baldwin.align 16 99bc3d5698SJohn Baldwin.L1st: 100bc3d5698SJohn Baldwin addq %rax,%r13 101bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 102bc3d5698SJohn Baldwin adcq $0,%rdx 103bc3d5698SJohn Baldwin addq %r11,%r13 104bc3d5698SJohn Baldwin movq %r10,%r11 105bc3d5698SJohn Baldwin adcq $0,%rdx 106bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 107bc3d5698SJohn Baldwin movq %rdx,%r13 108bc3d5698SJohn Baldwin 109bc3d5698SJohn Baldwin.L1st_enter: 110bc3d5698SJohn Baldwin mulq %rbx 111bc3d5698SJohn Baldwin addq %rax,%r11 112bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 113bc3d5698SJohn Baldwin adcq $0,%rdx 114bc3d5698SJohn Baldwin leaq 1(%r15),%r15 115bc3d5698SJohn Baldwin movq %rdx,%r10 116bc3d5698SJohn Baldwin 117bc3d5698SJohn Baldwin mulq %rbp 118bc3d5698SJohn Baldwin cmpq %r9,%r15 119bc3d5698SJohn Baldwin jne .L1st 120bc3d5698SJohn Baldwin 121bc3d5698SJohn Baldwin addq %rax,%r13 122bc3d5698SJohn Baldwin movq (%rsi),%rax 123bc3d5698SJohn Baldwin adcq $0,%rdx 124bc3d5698SJohn Baldwin addq %r11,%r13 125bc3d5698SJohn Baldwin adcq $0,%rdx 126bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 127bc3d5698SJohn Baldwin movq %rdx,%r13 128bc3d5698SJohn Baldwin movq %r10,%r11 129bc3d5698SJohn Baldwin 130bc3d5698SJohn Baldwin xorq %rdx,%rdx 131bc3d5698SJohn Baldwin addq %r11,%r13 132bc3d5698SJohn Baldwin adcq $0,%rdx 133bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r9,8) 134bc3d5698SJohn Baldwin movq %rdx,(%rsp,%r9,8) 135bc3d5698SJohn Baldwin 136bc3d5698SJohn Baldwin leaq 1(%r14),%r14 137bc3d5698SJohn Baldwin jmp .Louter 138bc3d5698SJohn Baldwin.align 16 139bc3d5698SJohn Baldwin.Louter: 140bc3d5698SJohn Baldwin movq (%r12,%r14,8),%rbx 141bc3d5698SJohn Baldwin xorq %r15,%r15 142bc3d5698SJohn Baldwin movq %r8,%rbp 143bc3d5698SJohn Baldwin movq (%rsp),%r10 144bc3d5698SJohn Baldwin mulq %rbx 145bc3d5698SJohn Baldwin addq %rax,%r10 146bc3d5698SJohn Baldwin movq (%rcx),%rax 147bc3d5698SJohn Baldwin adcq $0,%rdx 148bc3d5698SJohn Baldwin 149bc3d5698SJohn Baldwin imulq %r10,%rbp 150bc3d5698SJohn Baldwin movq %rdx,%r11 151bc3d5698SJohn Baldwin 152bc3d5698SJohn Baldwin mulq %rbp 153bc3d5698SJohn Baldwin addq %rax,%r10 154bc3d5698SJohn Baldwin movq 8(%rsi),%rax 155bc3d5698SJohn Baldwin adcq $0,%rdx 156bc3d5698SJohn Baldwin movq 8(%rsp),%r10 157bc3d5698SJohn Baldwin movq %rdx,%r13 158bc3d5698SJohn Baldwin 159bc3d5698SJohn Baldwin leaq 1(%r15),%r15 160bc3d5698SJohn Baldwin jmp .Linner_enter 161bc3d5698SJohn Baldwin 162bc3d5698SJohn Baldwin.align 16 163bc3d5698SJohn Baldwin.Linner: 164bc3d5698SJohn Baldwin addq %rax,%r13 165bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 166bc3d5698SJohn Baldwin adcq $0,%rdx 167bc3d5698SJohn Baldwin addq %r10,%r13 168bc3d5698SJohn Baldwin movq (%rsp,%r15,8),%r10 169bc3d5698SJohn Baldwin adcq $0,%rdx 170bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 171bc3d5698SJohn Baldwin movq %rdx,%r13 172bc3d5698SJohn Baldwin 173bc3d5698SJohn Baldwin.Linner_enter: 174bc3d5698SJohn Baldwin mulq %rbx 175bc3d5698SJohn Baldwin addq %rax,%r11 176bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 177bc3d5698SJohn Baldwin adcq $0,%rdx 178bc3d5698SJohn Baldwin addq %r11,%r10 179bc3d5698SJohn Baldwin movq %rdx,%r11 180bc3d5698SJohn Baldwin adcq $0,%r11 181bc3d5698SJohn Baldwin leaq 1(%r15),%r15 182bc3d5698SJohn Baldwin 183bc3d5698SJohn Baldwin mulq %rbp 184bc3d5698SJohn Baldwin cmpq %r9,%r15 185bc3d5698SJohn Baldwin jne .Linner 186bc3d5698SJohn Baldwin 187bc3d5698SJohn Baldwin addq %rax,%r13 188bc3d5698SJohn Baldwin movq (%rsi),%rax 189bc3d5698SJohn Baldwin adcq $0,%rdx 190bc3d5698SJohn Baldwin addq %r10,%r13 191bc3d5698SJohn Baldwin movq (%rsp,%r15,8),%r10 192bc3d5698SJohn Baldwin adcq $0,%rdx 193bc3d5698SJohn Baldwin movq %r13,-16(%rsp,%r15,8) 194bc3d5698SJohn Baldwin movq %rdx,%r13 195bc3d5698SJohn Baldwin 196bc3d5698SJohn Baldwin xorq %rdx,%rdx 197bc3d5698SJohn Baldwin addq %r11,%r13 198bc3d5698SJohn Baldwin adcq $0,%rdx 199bc3d5698SJohn Baldwin addq %r10,%r13 200bc3d5698SJohn Baldwin adcq $0,%rdx 201bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r9,8) 202bc3d5698SJohn Baldwin movq %rdx,(%rsp,%r9,8) 203bc3d5698SJohn Baldwin 204bc3d5698SJohn Baldwin leaq 1(%r14),%r14 205bc3d5698SJohn Baldwin cmpq %r9,%r14 206bc3d5698SJohn Baldwin jb .Louter 207bc3d5698SJohn Baldwin 208bc3d5698SJohn Baldwin xorq %r14,%r14 209bc3d5698SJohn Baldwin movq (%rsp),%rax 210bc3d5698SJohn Baldwin movq %r9,%r15 211bc3d5698SJohn Baldwin 212bc3d5698SJohn Baldwin.align 16 213bc3d5698SJohn Baldwin.Lsub: sbbq (%rcx,%r14,8),%rax 214bc3d5698SJohn Baldwin movq %rax,(%rdi,%r14,8) 215bc3d5698SJohn Baldwin movq 8(%rsp,%r14,8),%rax 216bc3d5698SJohn Baldwin leaq 1(%r14),%r14 217bc3d5698SJohn Baldwin decq %r15 218bc3d5698SJohn Baldwin jnz .Lsub 219bc3d5698SJohn Baldwin 220bc3d5698SJohn Baldwin sbbq $0,%rax 221bc3d5698SJohn Baldwin movq $-1,%rbx 222bc3d5698SJohn Baldwin xorq %rax,%rbx 223bc3d5698SJohn Baldwin xorq %r14,%r14 224bc3d5698SJohn Baldwin movq %r9,%r15 225bc3d5698SJohn Baldwin 226bc3d5698SJohn Baldwin.Lcopy: 227bc3d5698SJohn Baldwin movq (%rdi,%r14,8),%rcx 228bc3d5698SJohn Baldwin movq (%rsp,%r14,8),%rdx 229bc3d5698SJohn Baldwin andq %rbx,%rcx 230bc3d5698SJohn Baldwin andq %rax,%rdx 231bc3d5698SJohn Baldwin movq %r9,(%rsp,%r14,8) 232bc3d5698SJohn Baldwin orq %rcx,%rdx 233bc3d5698SJohn Baldwin movq %rdx,(%rdi,%r14,8) 234bc3d5698SJohn Baldwin leaq 1(%r14),%r14 235bc3d5698SJohn Baldwin subq $1,%r15 236bc3d5698SJohn Baldwin jnz .Lcopy 237bc3d5698SJohn Baldwin 238bc3d5698SJohn Baldwin movq 8(%rsp,%r9,8),%rsi 239bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 240bc3d5698SJohn Baldwin movq $1,%rax 241bc3d5698SJohn Baldwin movq -48(%rsi),%r15 242bc3d5698SJohn Baldwin.cfi_restore %r15 243bc3d5698SJohn Baldwin movq -40(%rsi),%r14 244bc3d5698SJohn Baldwin.cfi_restore %r14 245bc3d5698SJohn Baldwin movq -32(%rsi),%r13 246bc3d5698SJohn Baldwin.cfi_restore %r13 247bc3d5698SJohn Baldwin movq -24(%rsi),%r12 248bc3d5698SJohn Baldwin.cfi_restore %r12 249bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 250bc3d5698SJohn Baldwin.cfi_restore %rbp 251bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 252bc3d5698SJohn Baldwin.cfi_restore %rbx 253bc3d5698SJohn Baldwin leaq (%rsi),%rsp 254bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 255bc3d5698SJohn Baldwin.Lmul_epilogue: 256bc3d5698SJohn Baldwin .byte 0xf3,0xc3 257bc3d5698SJohn Baldwin.cfi_endproc 258bc3d5698SJohn Baldwin.size bn_mul_mont,.-bn_mul_mont 259bc3d5698SJohn Baldwin.type bn_mul4x_mont,@function 260bc3d5698SJohn Baldwin.align 16 261bc3d5698SJohn Baldwinbn_mul4x_mont: 262bc3d5698SJohn Baldwin.cfi_startproc 263bc3d5698SJohn Baldwin movl %r9d,%r9d 264bc3d5698SJohn Baldwin movq %rsp,%rax 265bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 266bc3d5698SJohn Baldwin.Lmul4x_enter: 267bc3d5698SJohn Baldwin andl $0x80100,%r11d 268bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 269bc3d5698SJohn Baldwin je .Lmulx4x_enter 270bc3d5698SJohn Baldwin pushq %rbx 271bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 272bc3d5698SJohn Baldwin pushq %rbp 273bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 274bc3d5698SJohn Baldwin pushq %r12 275bc3d5698SJohn Baldwin.cfi_offset %r12,-32 276bc3d5698SJohn Baldwin pushq %r13 277bc3d5698SJohn Baldwin.cfi_offset %r13,-40 278bc3d5698SJohn Baldwin pushq %r14 279bc3d5698SJohn Baldwin.cfi_offset %r14,-48 280bc3d5698SJohn Baldwin pushq %r15 281bc3d5698SJohn Baldwin.cfi_offset %r15,-56 282bc3d5698SJohn Baldwin 283bc3d5698SJohn Baldwin negq %r9 284bc3d5698SJohn Baldwin movq %rsp,%r11 285bc3d5698SJohn Baldwin leaq -32(%rsp,%r9,8),%r10 286bc3d5698SJohn Baldwin negq %r9 287bc3d5698SJohn Baldwin andq $-1024,%r10 288bc3d5698SJohn Baldwin 289bc3d5698SJohn Baldwin subq %r10,%r11 290bc3d5698SJohn Baldwin andq $-4096,%r11 291bc3d5698SJohn Baldwin leaq (%r10,%r11,1),%rsp 292bc3d5698SJohn Baldwin movq (%rsp),%r11 293bc3d5698SJohn Baldwin cmpq %r10,%rsp 294bc3d5698SJohn Baldwin ja .Lmul4x_page_walk 295bc3d5698SJohn Baldwin jmp .Lmul4x_page_walk_done 296bc3d5698SJohn Baldwin 297bc3d5698SJohn Baldwin.Lmul4x_page_walk: 298bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 299bc3d5698SJohn Baldwin movq (%rsp),%r11 300bc3d5698SJohn Baldwin cmpq %r10,%rsp 301bc3d5698SJohn Baldwin ja .Lmul4x_page_walk 302bc3d5698SJohn Baldwin.Lmul4x_page_walk_done: 303bc3d5698SJohn Baldwin 304bc3d5698SJohn Baldwin movq %rax,8(%rsp,%r9,8) 305bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08 306bc3d5698SJohn Baldwin.Lmul4x_body: 307bc3d5698SJohn Baldwin movq %rdi,16(%rsp,%r9,8) 308bc3d5698SJohn Baldwin movq %rdx,%r12 309bc3d5698SJohn Baldwin movq (%r8),%r8 310bc3d5698SJohn Baldwin movq (%r12),%rbx 311bc3d5698SJohn Baldwin movq (%rsi),%rax 312bc3d5698SJohn Baldwin 313bc3d5698SJohn Baldwin xorq %r14,%r14 314bc3d5698SJohn Baldwin xorq %r15,%r15 315bc3d5698SJohn Baldwin 316bc3d5698SJohn Baldwin movq %r8,%rbp 317bc3d5698SJohn Baldwin mulq %rbx 318bc3d5698SJohn Baldwin movq %rax,%r10 319bc3d5698SJohn Baldwin movq (%rcx),%rax 320bc3d5698SJohn Baldwin 321bc3d5698SJohn Baldwin imulq %r10,%rbp 322bc3d5698SJohn Baldwin movq %rdx,%r11 323bc3d5698SJohn Baldwin 324bc3d5698SJohn Baldwin mulq %rbp 325bc3d5698SJohn Baldwin addq %rax,%r10 326bc3d5698SJohn Baldwin movq 8(%rsi),%rax 327bc3d5698SJohn Baldwin adcq $0,%rdx 328bc3d5698SJohn Baldwin movq %rdx,%rdi 329bc3d5698SJohn Baldwin 330bc3d5698SJohn Baldwin mulq %rbx 331bc3d5698SJohn Baldwin addq %rax,%r11 332bc3d5698SJohn Baldwin movq 8(%rcx),%rax 333bc3d5698SJohn Baldwin adcq $0,%rdx 334bc3d5698SJohn Baldwin movq %rdx,%r10 335bc3d5698SJohn Baldwin 336bc3d5698SJohn Baldwin mulq %rbp 337bc3d5698SJohn Baldwin addq %rax,%rdi 338bc3d5698SJohn Baldwin movq 16(%rsi),%rax 339bc3d5698SJohn Baldwin adcq $0,%rdx 340bc3d5698SJohn Baldwin addq %r11,%rdi 341bc3d5698SJohn Baldwin leaq 4(%r15),%r15 342bc3d5698SJohn Baldwin adcq $0,%rdx 343bc3d5698SJohn Baldwin movq %rdi,(%rsp) 344bc3d5698SJohn Baldwin movq %rdx,%r13 345bc3d5698SJohn Baldwin jmp .L1st4x 346bc3d5698SJohn Baldwin.align 16 347bc3d5698SJohn Baldwin.L1st4x: 348bc3d5698SJohn Baldwin mulq %rbx 349bc3d5698SJohn Baldwin addq %rax,%r10 350bc3d5698SJohn Baldwin movq -16(%rcx,%r15,8),%rax 351bc3d5698SJohn Baldwin adcq $0,%rdx 352bc3d5698SJohn Baldwin movq %rdx,%r11 353bc3d5698SJohn Baldwin 354bc3d5698SJohn Baldwin mulq %rbp 355bc3d5698SJohn Baldwin addq %rax,%r13 356bc3d5698SJohn Baldwin movq -8(%rsi,%r15,8),%rax 357bc3d5698SJohn Baldwin adcq $0,%rdx 358bc3d5698SJohn Baldwin addq %r10,%r13 359bc3d5698SJohn Baldwin adcq $0,%rdx 360bc3d5698SJohn Baldwin movq %r13,-24(%rsp,%r15,8) 361bc3d5698SJohn Baldwin movq %rdx,%rdi 362bc3d5698SJohn Baldwin 363bc3d5698SJohn Baldwin mulq %rbx 364bc3d5698SJohn Baldwin addq %rax,%r11 365bc3d5698SJohn Baldwin movq -8(%rcx,%r15,8),%rax 366bc3d5698SJohn Baldwin adcq $0,%rdx 367bc3d5698SJohn Baldwin movq %rdx,%r10 368bc3d5698SJohn Baldwin 369bc3d5698SJohn Baldwin mulq %rbp 370bc3d5698SJohn Baldwin addq %rax,%rdi 371bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 372bc3d5698SJohn Baldwin adcq $0,%rdx 373bc3d5698SJohn Baldwin addq %r11,%rdi 374bc3d5698SJohn Baldwin adcq $0,%rdx 375bc3d5698SJohn Baldwin movq %rdi,-16(%rsp,%r15,8) 376bc3d5698SJohn Baldwin movq %rdx,%r13 377bc3d5698SJohn Baldwin 378bc3d5698SJohn Baldwin mulq %rbx 379bc3d5698SJohn Baldwin addq %rax,%r10 380bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 381bc3d5698SJohn Baldwin adcq $0,%rdx 382bc3d5698SJohn Baldwin movq %rdx,%r11 383bc3d5698SJohn Baldwin 384bc3d5698SJohn Baldwin mulq %rbp 385bc3d5698SJohn Baldwin addq %rax,%r13 386bc3d5698SJohn Baldwin movq 8(%rsi,%r15,8),%rax 387bc3d5698SJohn Baldwin adcq $0,%rdx 388bc3d5698SJohn Baldwin addq %r10,%r13 389bc3d5698SJohn Baldwin adcq $0,%rdx 390bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r15,8) 391bc3d5698SJohn Baldwin movq %rdx,%rdi 392bc3d5698SJohn Baldwin 393bc3d5698SJohn Baldwin mulq %rbx 394bc3d5698SJohn Baldwin addq %rax,%r11 395bc3d5698SJohn Baldwin movq 8(%rcx,%r15,8),%rax 396bc3d5698SJohn Baldwin adcq $0,%rdx 397bc3d5698SJohn Baldwin leaq 4(%r15),%r15 398bc3d5698SJohn Baldwin movq %rdx,%r10 399bc3d5698SJohn Baldwin 400bc3d5698SJohn Baldwin mulq %rbp 401bc3d5698SJohn Baldwin addq %rax,%rdi 402bc3d5698SJohn Baldwin movq -16(%rsi,%r15,8),%rax 403bc3d5698SJohn Baldwin adcq $0,%rdx 404bc3d5698SJohn Baldwin addq %r11,%rdi 405bc3d5698SJohn Baldwin adcq $0,%rdx 406bc3d5698SJohn Baldwin movq %rdi,-32(%rsp,%r15,8) 407bc3d5698SJohn Baldwin movq %rdx,%r13 408bc3d5698SJohn Baldwin cmpq %r9,%r15 409bc3d5698SJohn Baldwin jb .L1st4x 410bc3d5698SJohn Baldwin 411bc3d5698SJohn Baldwin mulq %rbx 412bc3d5698SJohn Baldwin addq %rax,%r10 413bc3d5698SJohn Baldwin movq -16(%rcx,%r15,8),%rax 414bc3d5698SJohn Baldwin adcq $0,%rdx 415bc3d5698SJohn Baldwin movq %rdx,%r11 416bc3d5698SJohn Baldwin 417bc3d5698SJohn Baldwin mulq %rbp 418bc3d5698SJohn Baldwin addq %rax,%r13 419bc3d5698SJohn Baldwin movq -8(%rsi,%r15,8),%rax 420bc3d5698SJohn Baldwin adcq $0,%rdx 421bc3d5698SJohn Baldwin addq %r10,%r13 422bc3d5698SJohn Baldwin adcq $0,%rdx 423bc3d5698SJohn Baldwin movq %r13,-24(%rsp,%r15,8) 424bc3d5698SJohn Baldwin movq %rdx,%rdi 425bc3d5698SJohn Baldwin 426bc3d5698SJohn Baldwin mulq %rbx 427bc3d5698SJohn Baldwin addq %rax,%r11 428bc3d5698SJohn Baldwin movq -8(%rcx,%r15,8),%rax 429bc3d5698SJohn Baldwin adcq $0,%rdx 430bc3d5698SJohn Baldwin movq %rdx,%r10 431bc3d5698SJohn Baldwin 432bc3d5698SJohn Baldwin mulq %rbp 433bc3d5698SJohn Baldwin addq %rax,%rdi 434bc3d5698SJohn Baldwin movq (%rsi),%rax 435bc3d5698SJohn Baldwin adcq $0,%rdx 436bc3d5698SJohn Baldwin addq %r11,%rdi 437bc3d5698SJohn Baldwin adcq $0,%rdx 438bc3d5698SJohn Baldwin movq %rdi,-16(%rsp,%r15,8) 439bc3d5698SJohn Baldwin movq %rdx,%r13 440bc3d5698SJohn Baldwin 441bc3d5698SJohn Baldwin xorq %rdi,%rdi 442bc3d5698SJohn Baldwin addq %r10,%r13 443bc3d5698SJohn Baldwin adcq $0,%rdi 444bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r15,8) 445bc3d5698SJohn Baldwin movq %rdi,(%rsp,%r15,8) 446bc3d5698SJohn Baldwin 447bc3d5698SJohn Baldwin leaq 1(%r14),%r14 448bc3d5698SJohn Baldwin.align 4 449bc3d5698SJohn Baldwin.Louter4x: 450bc3d5698SJohn Baldwin movq (%r12,%r14,8),%rbx 451bc3d5698SJohn Baldwin xorq %r15,%r15 452bc3d5698SJohn Baldwin movq (%rsp),%r10 453bc3d5698SJohn Baldwin movq %r8,%rbp 454bc3d5698SJohn Baldwin mulq %rbx 455bc3d5698SJohn Baldwin addq %rax,%r10 456bc3d5698SJohn Baldwin movq (%rcx),%rax 457bc3d5698SJohn Baldwin adcq $0,%rdx 458bc3d5698SJohn Baldwin 459bc3d5698SJohn Baldwin imulq %r10,%rbp 460bc3d5698SJohn Baldwin movq %rdx,%r11 461bc3d5698SJohn Baldwin 462bc3d5698SJohn Baldwin mulq %rbp 463bc3d5698SJohn Baldwin addq %rax,%r10 464bc3d5698SJohn Baldwin movq 8(%rsi),%rax 465bc3d5698SJohn Baldwin adcq $0,%rdx 466bc3d5698SJohn Baldwin movq %rdx,%rdi 467bc3d5698SJohn Baldwin 468bc3d5698SJohn Baldwin mulq %rbx 469bc3d5698SJohn Baldwin addq %rax,%r11 470bc3d5698SJohn Baldwin movq 8(%rcx),%rax 471bc3d5698SJohn Baldwin adcq $0,%rdx 472bc3d5698SJohn Baldwin addq 8(%rsp),%r11 473bc3d5698SJohn Baldwin adcq $0,%rdx 474bc3d5698SJohn Baldwin movq %rdx,%r10 475bc3d5698SJohn Baldwin 476bc3d5698SJohn Baldwin mulq %rbp 477bc3d5698SJohn Baldwin addq %rax,%rdi 478bc3d5698SJohn Baldwin movq 16(%rsi),%rax 479bc3d5698SJohn Baldwin adcq $0,%rdx 480bc3d5698SJohn Baldwin addq %r11,%rdi 481bc3d5698SJohn Baldwin leaq 4(%r15),%r15 482bc3d5698SJohn Baldwin adcq $0,%rdx 483bc3d5698SJohn Baldwin movq %rdi,(%rsp) 484bc3d5698SJohn Baldwin movq %rdx,%r13 485bc3d5698SJohn Baldwin jmp .Linner4x 486bc3d5698SJohn Baldwin.align 16 487bc3d5698SJohn Baldwin.Linner4x: 488bc3d5698SJohn Baldwin mulq %rbx 489bc3d5698SJohn Baldwin addq %rax,%r10 490bc3d5698SJohn Baldwin movq -16(%rcx,%r15,8),%rax 491bc3d5698SJohn Baldwin adcq $0,%rdx 492bc3d5698SJohn Baldwin addq -16(%rsp,%r15,8),%r10 493bc3d5698SJohn Baldwin adcq $0,%rdx 494bc3d5698SJohn Baldwin movq %rdx,%r11 495bc3d5698SJohn Baldwin 496bc3d5698SJohn Baldwin mulq %rbp 497bc3d5698SJohn Baldwin addq %rax,%r13 498bc3d5698SJohn Baldwin movq -8(%rsi,%r15,8),%rax 499bc3d5698SJohn Baldwin adcq $0,%rdx 500bc3d5698SJohn Baldwin addq %r10,%r13 501bc3d5698SJohn Baldwin adcq $0,%rdx 502bc3d5698SJohn Baldwin movq %r13,-24(%rsp,%r15,8) 503bc3d5698SJohn Baldwin movq %rdx,%rdi 504bc3d5698SJohn Baldwin 505bc3d5698SJohn Baldwin mulq %rbx 506bc3d5698SJohn Baldwin addq %rax,%r11 507bc3d5698SJohn Baldwin movq -8(%rcx,%r15,8),%rax 508bc3d5698SJohn Baldwin adcq $0,%rdx 509bc3d5698SJohn Baldwin addq -8(%rsp,%r15,8),%r11 510bc3d5698SJohn Baldwin adcq $0,%rdx 511bc3d5698SJohn Baldwin movq %rdx,%r10 512bc3d5698SJohn Baldwin 513bc3d5698SJohn Baldwin mulq %rbp 514bc3d5698SJohn Baldwin addq %rax,%rdi 515bc3d5698SJohn Baldwin movq (%rsi,%r15,8),%rax 516bc3d5698SJohn Baldwin adcq $0,%rdx 517bc3d5698SJohn Baldwin addq %r11,%rdi 518bc3d5698SJohn Baldwin adcq $0,%rdx 519bc3d5698SJohn Baldwin movq %rdi,-16(%rsp,%r15,8) 520bc3d5698SJohn Baldwin movq %rdx,%r13 521bc3d5698SJohn Baldwin 522bc3d5698SJohn Baldwin mulq %rbx 523bc3d5698SJohn Baldwin addq %rax,%r10 524bc3d5698SJohn Baldwin movq (%rcx,%r15,8),%rax 525bc3d5698SJohn Baldwin adcq $0,%rdx 526bc3d5698SJohn Baldwin addq (%rsp,%r15,8),%r10 527bc3d5698SJohn Baldwin adcq $0,%rdx 528bc3d5698SJohn Baldwin movq %rdx,%r11 529bc3d5698SJohn Baldwin 530bc3d5698SJohn Baldwin mulq %rbp 531bc3d5698SJohn Baldwin addq %rax,%r13 532bc3d5698SJohn Baldwin movq 8(%rsi,%r15,8),%rax 533bc3d5698SJohn Baldwin adcq $0,%rdx 534bc3d5698SJohn Baldwin addq %r10,%r13 535bc3d5698SJohn Baldwin adcq $0,%rdx 536bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r15,8) 537bc3d5698SJohn Baldwin movq %rdx,%rdi 538bc3d5698SJohn Baldwin 539bc3d5698SJohn Baldwin mulq %rbx 540bc3d5698SJohn Baldwin addq %rax,%r11 541bc3d5698SJohn Baldwin movq 8(%rcx,%r15,8),%rax 542bc3d5698SJohn Baldwin adcq $0,%rdx 543bc3d5698SJohn Baldwin addq 8(%rsp,%r15,8),%r11 544bc3d5698SJohn Baldwin adcq $0,%rdx 545bc3d5698SJohn Baldwin leaq 4(%r15),%r15 546bc3d5698SJohn Baldwin movq %rdx,%r10 547bc3d5698SJohn Baldwin 548bc3d5698SJohn Baldwin mulq %rbp 549bc3d5698SJohn Baldwin addq %rax,%rdi 550bc3d5698SJohn Baldwin movq -16(%rsi,%r15,8),%rax 551bc3d5698SJohn Baldwin adcq $0,%rdx 552bc3d5698SJohn Baldwin addq %r11,%rdi 553bc3d5698SJohn Baldwin adcq $0,%rdx 554bc3d5698SJohn Baldwin movq %rdi,-32(%rsp,%r15,8) 555bc3d5698SJohn Baldwin movq %rdx,%r13 556bc3d5698SJohn Baldwin cmpq %r9,%r15 557bc3d5698SJohn Baldwin jb .Linner4x 558bc3d5698SJohn Baldwin 559bc3d5698SJohn Baldwin mulq %rbx 560bc3d5698SJohn Baldwin addq %rax,%r10 561bc3d5698SJohn Baldwin movq -16(%rcx,%r15,8),%rax 562bc3d5698SJohn Baldwin adcq $0,%rdx 563bc3d5698SJohn Baldwin addq -16(%rsp,%r15,8),%r10 564bc3d5698SJohn Baldwin adcq $0,%rdx 565bc3d5698SJohn Baldwin movq %rdx,%r11 566bc3d5698SJohn Baldwin 567bc3d5698SJohn Baldwin mulq %rbp 568bc3d5698SJohn Baldwin addq %rax,%r13 569bc3d5698SJohn Baldwin movq -8(%rsi,%r15,8),%rax 570bc3d5698SJohn Baldwin adcq $0,%rdx 571bc3d5698SJohn Baldwin addq %r10,%r13 572bc3d5698SJohn Baldwin adcq $0,%rdx 573bc3d5698SJohn Baldwin movq %r13,-24(%rsp,%r15,8) 574bc3d5698SJohn Baldwin movq %rdx,%rdi 575bc3d5698SJohn Baldwin 576bc3d5698SJohn Baldwin mulq %rbx 577bc3d5698SJohn Baldwin addq %rax,%r11 578bc3d5698SJohn Baldwin movq -8(%rcx,%r15,8),%rax 579bc3d5698SJohn Baldwin adcq $0,%rdx 580bc3d5698SJohn Baldwin addq -8(%rsp,%r15,8),%r11 581bc3d5698SJohn Baldwin adcq $0,%rdx 582bc3d5698SJohn Baldwin leaq 1(%r14),%r14 583bc3d5698SJohn Baldwin movq %rdx,%r10 584bc3d5698SJohn Baldwin 585bc3d5698SJohn Baldwin mulq %rbp 586bc3d5698SJohn Baldwin addq %rax,%rdi 587bc3d5698SJohn Baldwin movq (%rsi),%rax 588bc3d5698SJohn Baldwin adcq $0,%rdx 589bc3d5698SJohn Baldwin addq %r11,%rdi 590bc3d5698SJohn Baldwin adcq $0,%rdx 591bc3d5698SJohn Baldwin movq %rdi,-16(%rsp,%r15,8) 592bc3d5698SJohn Baldwin movq %rdx,%r13 593bc3d5698SJohn Baldwin 594bc3d5698SJohn Baldwin xorq %rdi,%rdi 595bc3d5698SJohn Baldwin addq %r10,%r13 596bc3d5698SJohn Baldwin adcq $0,%rdi 597bc3d5698SJohn Baldwin addq (%rsp,%r9,8),%r13 598bc3d5698SJohn Baldwin adcq $0,%rdi 599bc3d5698SJohn Baldwin movq %r13,-8(%rsp,%r15,8) 600bc3d5698SJohn Baldwin movq %rdi,(%rsp,%r15,8) 601bc3d5698SJohn Baldwin 602bc3d5698SJohn Baldwin cmpq %r9,%r14 603bc3d5698SJohn Baldwin jb .Louter4x 604bc3d5698SJohn Baldwin movq 16(%rsp,%r9,8),%rdi 605bc3d5698SJohn Baldwin leaq -4(%r9),%r15 606bc3d5698SJohn Baldwin movq 0(%rsp),%rax 607bc3d5698SJohn Baldwin movq 8(%rsp),%rdx 608bc3d5698SJohn Baldwin shrq $2,%r15 609bc3d5698SJohn Baldwin leaq (%rsp),%rsi 610bc3d5698SJohn Baldwin xorq %r14,%r14 611bc3d5698SJohn Baldwin 612bc3d5698SJohn Baldwin subq 0(%rcx),%rax 613bc3d5698SJohn Baldwin movq 16(%rsi),%rbx 614bc3d5698SJohn Baldwin movq 24(%rsi),%rbp 615bc3d5698SJohn Baldwin sbbq 8(%rcx),%rdx 616bc3d5698SJohn Baldwin 617bc3d5698SJohn Baldwin.Lsub4x: 618bc3d5698SJohn Baldwin movq %rax,0(%rdi,%r14,8) 619bc3d5698SJohn Baldwin movq %rdx,8(%rdi,%r14,8) 620bc3d5698SJohn Baldwin sbbq 16(%rcx,%r14,8),%rbx 621bc3d5698SJohn Baldwin movq 32(%rsi,%r14,8),%rax 622bc3d5698SJohn Baldwin movq 40(%rsi,%r14,8),%rdx 623bc3d5698SJohn Baldwin sbbq 24(%rcx,%r14,8),%rbp 624bc3d5698SJohn Baldwin movq %rbx,16(%rdi,%r14,8) 625bc3d5698SJohn Baldwin movq %rbp,24(%rdi,%r14,8) 626bc3d5698SJohn Baldwin sbbq 32(%rcx,%r14,8),%rax 627bc3d5698SJohn Baldwin movq 48(%rsi,%r14,8),%rbx 628bc3d5698SJohn Baldwin movq 56(%rsi,%r14,8),%rbp 629bc3d5698SJohn Baldwin sbbq 40(%rcx,%r14,8),%rdx 630bc3d5698SJohn Baldwin leaq 4(%r14),%r14 631bc3d5698SJohn Baldwin decq %r15 632bc3d5698SJohn Baldwin jnz .Lsub4x 633bc3d5698SJohn Baldwin 634bc3d5698SJohn Baldwin movq %rax,0(%rdi,%r14,8) 635bc3d5698SJohn Baldwin movq 32(%rsi,%r14,8),%rax 636bc3d5698SJohn Baldwin sbbq 16(%rcx,%r14,8),%rbx 637bc3d5698SJohn Baldwin movq %rdx,8(%rdi,%r14,8) 638bc3d5698SJohn Baldwin sbbq 24(%rcx,%r14,8),%rbp 639bc3d5698SJohn Baldwin movq %rbx,16(%rdi,%r14,8) 640bc3d5698SJohn Baldwin 641bc3d5698SJohn Baldwin sbbq $0,%rax 642bc3d5698SJohn Baldwin movq %rbp,24(%rdi,%r14,8) 643bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 644bc3d5698SJohn Baldwin.byte 102,72,15,110,224 645bc3d5698SJohn Baldwin pcmpeqd %xmm5,%xmm5 646bc3d5698SJohn Baldwin pshufd $0,%xmm4,%xmm4 647bc3d5698SJohn Baldwin movq %r9,%r15 648bc3d5698SJohn Baldwin pxor %xmm4,%xmm5 649bc3d5698SJohn Baldwin shrq $2,%r15 650bc3d5698SJohn Baldwin xorl %eax,%eax 651bc3d5698SJohn Baldwin 652bc3d5698SJohn Baldwin jmp .Lcopy4x 653bc3d5698SJohn Baldwin.align 16 654bc3d5698SJohn Baldwin.Lcopy4x: 655bc3d5698SJohn Baldwin movdqa (%rsp,%rax,1),%xmm1 656bc3d5698SJohn Baldwin movdqu (%rdi,%rax,1),%xmm2 657bc3d5698SJohn Baldwin pand %xmm4,%xmm1 658bc3d5698SJohn Baldwin pand %xmm5,%xmm2 659bc3d5698SJohn Baldwin movdqa 16(%rsp,%rax,1),%xmm3 660bc3d5698SJohn Baldwin movdqa %xmm0,(%rsp,%rax,1) 661bc3d5698SJohn Baldwin por %xmm2,%xmm1 662bc3d5698SJohn Baldwin movdqu 16(%rdi,%rax,1),%xmm2 663bc3d5698SJohn Baldwin movdqu %xmm1,(%rdi,%rax,1) 664bc3d5698SJohn Baldwin pand %xmm4,%xmm3 665bc3d5698SJohn Baldwin pand %xmm5,%xmm2 666bc3d5698SJohn Baldwin movdqa %xmm0,16(%rsp,%rax,1) 667bc3d5698SJohn Baldwin por %xmm2,%xmm3 668bc3d5698SJohn Baldwin movdqu %xmm3,16(%rdi,%rax,1) 669bc3d5698SJohn Baldwin leaq 32(%rax),%rax 670bc3d5698SJohn Baldwin decq %r15 671bc3d5698SJohn Baldwin jnz .Lcopy4x 672bc3d5698SJohn Baldwin movq 8(%rsp,%r9,8),%rsi 673bc3d5698SJohn Baldwin.cfi_def_cfa %rsi, 8 674bc3d5698SJohn Baldwin movq $1,%rax 675bc3d5698SJohn Baldwin movq -48(%rsi),%r15 676bc3d5698SJohn Baldwin.cfi_restore %r15 677bc3d5698SJohn Baldwin movq -40(%rsi),%r14 678bc3d5698SJohn Baldwin.cfi_restore %r14 679bc3d5698SJohn Baldwin movq -32(%rsi),%r13 680bc3d5698SJohn Baldwin.cfi_restore %r13 681bc3d5698SJohn Baldwin movq -24(%rsi),%r12 682bc3d5698SJohn Baldwin.cfi_restore %r12 683bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 684bc3d5698SJohn Baldwin.cfi_restore %rbp 685bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 686bc3d5698SJohn Baldwin.cfi_restore %rbx 687bc3d5698SJohn Baldwin leaq (%rsi),%rsp 688bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 689bc3d5698SJohn Baldwin.Lmul4x_epilogue: 690bc3d5698SJohn Baldwin .byte 0xf3,0xc3 691bc3d5698SJohn Baldwin.cfi_endproc 692bc3d5698SJohn Baldwin.size bn_mul4x_mont,.-bn_mul4x_mont 693bc3d5698SJohn Baldwin 694bc3d5698SJohn Baldwin 695bc3d5698SJohn Baldwin 696bc3d5698SJohn Baldwin.type bn_sqr8x_mont,@function 697bc3d5698SJohn Baldwin.align 32 698bc3d5698SJohn Baldwinbn_sqr8x_mont: 699bc3d5698SJohn Baldwin.cfi_startproc 700bc3d5698SJohn Baldwin movq %rsp,%rax 701bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 702bc3d5698SJohn Baldwin.Lsqr8x_enter: 703bc3d5698SJohn Baldwin pushq %rbx 704bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 705bc3d5698SJohn Baldwin pushq %rbp 706bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 707bc3d5698SJohn Baldwin pushq %r12 708bc3d5698SJohn Baldwin.cfi_offset %r12,-32 709bc3d5698SJohn Baldwin pushq %r13 710bc3d5698SJohn Baldwin.cfi_offset %r13,-40 711bc3d5698SJohn Baldwin pushq %r14 712bc3d5698SJohn Baldwin.cfi_offset %r14,-48 713bc3d5698SJohn Baldwin pushq %r15 714bc3d5698SJohn Baldwin.cfi_offset %r15,-56 715bc3d5698SJohn Baldwin.Lsqr8x_prologue: 716bc3d5698SJohn Baldwin 717bc3d5698SJohn Baldwin movl %r9d,%r10d 718bc3d5698SJohn Baldwin shll $3,%r9d 719bc3d5698SJohn Baldwin shlq $3+2,%r10 720bc3d5698SJohn Baldwin negq %r9 721bc3d5698SJohn Baldwin 722bc3d5698SJohn Baldwin 723bc3d5698SJohn Baldwin 724bc3d5698SJohn Baldwin 725bc3d5698SJohn Baldwin 726bc3d5698SJohn Baldwin 727bc3d5698SJohn Baldwin leaq -64(%rsp,%r9,2),%r11 728bc3d5698SJohn Baldwin movq %rsp,%rbp 729bc3d5698SJohn Baldwin movq (%r8),%r8 730bc3d5698SJohn Baldwin subq %rsi,%r11 731bc3d5698SJohn Baldwin andq $4095,%r11 732bc3d5698SJohn Baldwin cmpq %r11,%r10 733bc3d5698SJohn Baldwin jb .Lsqr8x_sp_alt 734bc3d5698SJohn Baldwin subq %r11,%rbp 735bc3d5698SJohn Baldwin leaq -64(%rbp,%r9,2),%rbp 736bc3d5698SJohn Baldwin jmp .Lsqr8x_sp_done 737bc3d5698SJohn Baldwin 738bc3d5698SJohn Baldwin.align 32 739bc3d5698SJohn Baldwin.Lsqr8x_sp_alt: 740bc3d5698SJohn Baldwin leaq 4096-64(,%r9,2),%r10 741bc3d5698SJohn Baldwin leaq -64(%rbp,%r9,2),%rbp 742bc3d5698SJohn Baldwin subq %r10,%r11 743bc3d5698SJohn Baldwin movq $0,%r10 744bc3d5698SJohn Baldwin cmovcq %r10,%r11 745bc3d5698SJohn Baldwin subq %r11,%rbp 746bc3d5698SJohn Baldwin.Lsqr8x_sp_done: 747bc3d5698SJohn Baldwin andq $-64,%rbp 748bc3d5698SJohn Baldwin movq %rsp,%r11 749bc3d5698SJohn Baldwin subq %rbp,%r11 750bc3d5698SJohn Baldwin andq $-4096,%r11 751bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 752bc3d5698SJohn Baldwin movq (%rsp),%r10 753bc3d5698SJohn Baldwin cmpq %rbp,%rsp 754bc3d5698SJohn Baldwin ja .Lsqr8x_page_walk 755bc3d5698SJohn Baldwin jmp .Lsqr8x_page_walk_done 756bc3d5698SJohn Baldwin 757bc3d5698SJohn Baldwin.align 16 758bc3d5698SJohn Baldwin.Lsqr8x_page_walk: 759bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 760bc3d5698SJohn Baldwin movq (%rsp),%r10 761bc3d5698SJohn Baldwin cmpq %rbp,%rsp 762bc3d5698SJohn Baldwin ja .Lsqr8x_page_walk 763bc3d5698SJohn Baldwin.Lsqr8x_page_walk_done: 764bc3d5698SJohn Baldwin 765bc3d5698SJohn Baldwin movq %r9,%r10 766bc3d5698SJohn Baldwin negq %r9 767bc3d5698SJohn Baldwin 768bc3d5698SJohn Baldwin movq %r8,32(%rsp) 769bc3d5698SJohn Baldwin movq %rax,40(%rsp) 770bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 771bc3d5698SJohn Baldwin.Lsqr8x_body: 772bc3d5698SJohn Baldwin 773bc3d5698SJohn Baldwin.byte 102,72,15,110,209 774bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 775bc3d5698SJohn Baldwin.byte 102,72,15,110,207 776bc3d5698SJohn Baldwin.byte 102,73,15,110,218 777bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%eax 778bc3d5698SJohn Baldwin andl $0x80100,%eax 779bc3d5698SJohn Baldwin cmpl $0x80100,%eax 780bc3d5698SJohn Baldwin jne .Lsqr8x_nox 781bc3d5698SJohn Baldwin 782bc3d5698SJohn Baldwin call bn_sqrx8x_internal 783bc3d5698SJohn Baldwin 784bc3d5698SJohn Baldwin 785bc3d5698SJohn Baldwin 786bc3d5698SJohn Baldwin 787bc3d5698SJohn Baldwin leaq (%r8,%rcx,1),%rbx 788bc3d5698SJohn Baldwin movq %rcx,%r9 789bc3d5698SJohn Baldwin movq %rcx,%rdx 790bc3d5698SJohn Baldwin.byte 102,72,15,126,207 791bc3d5698SJohn Baldwin sarq $3+2,%rcx 792bc3d5698SJohn Baldwin jmp .Lsqr8x_sub 793bc3d5698SJohn Baldwin 794bc3d5698SJohn Baldwin.align 32 795bc3d5698SJohn Baldwin.Lsqr8x_nox: 796bc3d5698SJohn Baldwin call bn_sqr8x_internal 797bc3d5698SJohn Baldwin 798bc3d5698SJohn Baldwin 799bc3d5698SJohn Baldwin 800bc3d5698SJohn Baldwin 801bc3d5698SJohn Baldwin leaq (%rdi,%r9,1),%rbx 802bc3d5698SJohn Baldwin movq %r9,%rcx 803bc3d5698SJohn Baldwin movq %r9,%rdx 804bc3d5698SJohn Baldwin.byte 102,72,15,126,207 805bc3d5698SJohn Baldwin sarq $3+2,%rcx 806bc3d5698SJohn Baldwin jmp .Lsqr8x_sub 807bc3d5698SJohn Baldwin 808bc3d5698SJohn Baldwin.align 32 809bc3d5698SJohn Baldwin.Lsqr8x_sub: 810bc3d5698SJohn Baldwin movq 0(%rbx),%r12 811bc3d5698SJohn Baldwin movq 8(%rbx),%r13 812bc3d5698SJohn Baldwin movq 16(%rbx),%r14 813bc3d5698SJohn Baldwin movq 24(%rbx),%r15 814bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 815bc3d5698SJohn Baldwin sbbq 0(%rbp),%r12 816bc3d5698SJohn Baldwin sbbq 8(%rbp),%r13 817bc3d5698SJohn Baldwin sbbq 16(%rbp),%r14 818bc3d5698SJohn Baldwin sbbq 24(%rbp),%r15 819bc3d5698SJohn Baldwin leaq 32(%rbp),%rbp 820bc3d5698SJohn Baldwin movq %r12,0(%rdi) 821bc3d5698SJohn Baldwin movq %r13,8(%rdi) 822bc3d5698SJohn Baldwin movq %r14,16(%rdi) 823bc3d5698SJohn Baldwin movq %r15,24(%rdi) 824bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 825bc3d5698SJohn Baldwin incq %rcx 826bc3d5698SJohn Baldwin jnz .Lsqr8x_sub 827bc3d5698SJohn Baldwin 828bc3d5698SJohn Baldwin sbbq $0,%rax 829bc3d5698SJohn Baldwin leaq (%rbx,%r9,1),%rbx 830bc3d5698SJohn Baldwin leaq (%rdi,%r9,1),%rdi 831bc3d5698SJohn Baldwin 832bc3d5698SJohn Baldwin.byte 102,72,15,110,200 833bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 834bc3d5698SJohn Baldwin pshufd $0,%xmm1,%xmm1 835bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 836bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 837bc3d5698SJohn Baldwin jmp .Lsqr8x_cond_copy 838bc3d5698SJohn Baldwin 839bc3d5698SJohn Baldwin.align 32 840bc3d5698SJohn Baldwin.Lsqr8x_cond_copy: 841bc3d5698SJohn Baldwin movdqa 0(%rbx),%xmm2 842bc3d5698SJohn Baldwin movdqa 16(%rbx),%xmm3 843bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 844bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm4 845bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm5 846bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 847bc3d5698SJohn Baldwin movdqa %xmm0,-32(%rbx) 848bc3d5698SJohn Baldwin movdqa %xmm0,-16(%rbx) 849bc3d5698SJohn Baldwin movdqa %xmm0,-32(%rbx,%rdx,1) 850bc3d5698SJohn Baldwin movdqa %xmm0,-16(%rbx,%rdx,1) 851bc3d5698SJohn Baldwin pcmpeqd %xmm1,%xmm0 852bc3d5698SJohn Baldwin pand %xmm1,%xmm2 853bc3d5698SJohn Baldwin pand %xmm1,%xmm3 854bc3d5698SJohn Baldwin pand %xmm0,%xmm4 855bc3d5698SJohn Baldwin pand %xmm0,%xmm5 856bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 857bc3d5698SJohn Baldwin por %xmm2,%xmm4 858bc3d5698SJohn Baldwin por %xmm3,%xmm5 859bc3d5698SJohn Baldwin movdqu %xmm4,-32(%rdi) 860bc3d5698SJohn Baldwin movdqu %xmm5,-16(%rdi) 861bc3d5698SJohn Baldwin addq $32,%r9 862bc3d5698SJohn Baldwin jnz .Lsqr8x_cond_copy 863bc3d5698SJohn Baldwin 864bc3d5698SJohn Baldwin movq $1,%rax 865bc3d5698SJohn Baldwin movq -48(%rsi),%r15 866bc3d5698SJohn Baldwin.cfi_restore %r15 867bc3d5698SJohn Baldwin movq -40(%rsi),%r14 868bc3d5698SJohn Baldwin.cfi_restore %r14 869bc3d5698SJohn Baldwin movq -32(%rsi),%r13 870bc3d5698SJohn Baldwin.cfi_restore %r13 871bc3d5698SJohn Baldwin movq -24(%rsi),%r12 872bc3d5698SJohn Baldwin.cfi_restore %r12 873bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 874bc3d5698SJohn Baldwin.cfi_restore %rbp 875bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 876bc3d5698SJohn Baldwin.cfi_restore %rbx 877bc3d5698SJohn Baldwin leaq (%rsi),%rsp 878bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 879bc3d5698SJohn Baldwin.Lsqr8x_epilogue: 880bc3d5698SJohn Baldwin .byte 0xf3,0xc3 881bc3d5698SJohn Baldwin.cfi_endproc 882bc3d5698SJohn Baldwin.size bn_sqr8x_mont,.-bn_sqr8x_mont 883bc3d5698SJohn Baldwin.type bn_mulx4x_mont,@function 884bc3d5698SJohn Baldwin.align 32 885bc3d5698SJohn Baldwinbn_mulx4x_mont: 886bc3d5698SJohn Baldwin.cfi_startproc 887bc3d5698SJohn Baldwin movq %rsp,%rax 888bc3d5698SJohn Baldwin.cfi_def_cfa_register %rax 889bc3d5698SJohn Baldwin.Lmulx4x_enter: 890bc3d5698SJohn Baldwin pushq %rbx 891bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 892bc3d5698SJohn Baldwin pushq %rbp 893bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 894bc3d5698SJohn Baldwin pushq %r12 895bc3d5698SJohn Baldwin.cfi_offset %r12,-32 896bc3d5698SJohn Baldwin pushq %r13 897bc3d5698SJohn Baldwin.cfi_offset %r13,-40 898bc3d5698SJohn Baldwin pushq %r14 899bc3d5698SJohn Baldwin.cfi_offset %r14,-48 900bc3d5698SJohn Baldwin pushq %r15 901bc3d5698SJohn Baldwin.cfi_offset %r15,-56 902bc3d5698SJohn Baldwin.Lmulx4x_prologue: 903bc3d5698SJohn Baldwin 904bc3d5698SJohn Baldwin shll $3,%r9d 905bc3d5698SJohn Baldwin xorq %r10,%r10 906bc3d5698SJohn Baldwin subq %r9,%r10 907bc3d5698SJohn Baldwin movq (%r8),%r8 908bc3d5698SJohn Baldwin leaq -72(%rsp,%r10,1),%rbp 909bc3d5698SJohn Baldwin andq $-128,%rbp 910bc3d5698SJohn Baldwin movq %rsp,%r11 911bc3d5698SJohn Baldwin subq %rbp,%r11 912bc3d5698SJohn Baldwin andq $-4096,%r11 913bc3d5698SJohn Baldwin leaq (%r11,%rbp,1),%rsp 914bc3d5698SJohn Baldwin movq (%rsp),%r10 915bc3d5698SJohn Baldwin cmpq %rbp,%rsp 916bc3d5698SJohn Baldwin ja .Lmulx4x_page_walk 917bc3d5698SJohn Baldwin jmp .Lmulx4x_page_walk_done 918bc3d5698SJohn Baldwin 919bc3d5698SJohn Baldwin.align 16 920bc3d5698SJohn Baldwin.Lmulx4x_page_walk: 921bc3d5698SJohn Baldwin leaq -4096(%rsp),%rsp 922bc3d5698SJohn Baldwin movq (%rsp),%r10 923bc3d5698SJohn Baldwin cmpq %rbp,%rsp 924bc3d5698SJohn Baldwin ja .Lmulx4x_page_walk 925bc3d5698SJohn Baldwin.Lmulx4x_page_walk_done: 926bc3d5698SJohn Baldwin 927bc3d5698SJohn Baldwin leaq (%rdx,%r9,1),%r10 928bc3d5698SJohn Baldwin 929bc3d5698SJohn Baldwin 930bc3d5698SJohn Baldwin 931bc3d5698SJohn Baldwin 932bc3d5698SJohn Baldwin 933bc3d5698SJohn Baldwin 934bc3d5698SJohn Baldwin 935bc3d5698SJohn Baldwin 936bc3d5698SJohn Baldwin 937bc3d5698SJohn Baldwin 938bc3d5698SJohn Baldwin 939bc3d5698SJohn Baldwin 940bc3d5698SJohn Baldwin movq %r9,0(%rsp) 941bc3d5698SJohn Baldwin shrq $5,%r9 942bc3d5698SJohn Baldwin movq %r10,16(%rsp) 943bc3d5698SJohn Baldwin subq $1,%r9 944bc3d5698SJohn Baldwin movq %r8,24(%rsp) 945bc3d5698SJohn Baldwin movq %rdi,32(%rsp) 946bc3d5698SJohn Baldwin movq %rax,40(%rsp) 947bc3d5698SJohn Baldwin.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08 948bc3d5698SJohn Baldwin movq %r9,48(%rsp) 949bc3d5698SJohn Baldwin jmp .Lmulx4x_body 950bc3d5698SJohn Baldwin 951bc3d5698SJohn Baldwin.align 32 952bc3d5698SJohn Baldwin.Lmulx4x_body: 953bc3d5698SJohn Baldwin leaq 8(%rdx),%rdi 954bc3d5698SJohn Baldwin movq (%rdx),%rdx 955bc3d5698SJohn Baldwin leaq 64+32(%rsp),%rbx 956bc3d5698SJohn Baldwin movq %rdx,%r9 957bc3d5698SJohn Baldwin 958bc3d5698SJohn Baldwin mulxq 0(%rsi),%r8,%rax 959bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r14 960bc3d5698SJohn Baldwin addq %rax,%r11 961bc3d5698SJohn Baldwin movq %rdi,8(%rsp) 962bc3d5698SJohn Baldwin mulxq 16(%rsi),%r12,%r13 963bc3d5698SJohn Baldwin adcq %r14,%r12 964bc3d5698SJohn Baldwin adcq $0,%r13 965bc3d5698SJohn Baldwin 966bc3d5698SJohn Baldwin movq %r8,%rdi 967bc3d5698SJohn Baldwin imulq 24(%rsp),%r8 968bc3d5698SJohn Baldwin xorq %rbp,%rbp 969bc3d5698SJohn Baldwin 970bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r14 971bc3d5698SJohn Baldwin movq %r8,%rdx 972bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 973bc3d5698SJohn Baldwin adcxq %rax,%r13 974bc3d5698SJohn Baldwin adcxq %rbp,%r14 975bc3d5698SJohn Baldwin 976bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r10 977bc3d5698SJohn Baldwin adcxq %rax,%rdi 978bc3d5698SJohn Baldwin adoxq %r11,%r10 979bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r11 980bc3d5698SJohn Baldwin adcxq %rax,%r10 981bc3d5698SJohn Baldwin adoxq %r12,%r11 982bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 983bc3d5698SJohn Baldwin movq 48(%rsp),%rdi 984bc3d5698SJohn Baldwin movq %r10,-32(%rbx) 985bc3d5698SJohn Baldwin adcxq %rax,%r11 986bc3d5698SJohn Baldwin adoxq %r13,%r12 987bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 988bc3d5698SJohn Baldwin movq %r9,%rdx 989bc3d5698SJohn Baldwin movq %r11,-24(%rbx) 990bc3d5698SJohn Baldwin adcxq %rax,%r12 991bc3d5698SJohn Baldwin adoxq %rbp,%r15 992bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 993bc3d5698SJohn Baldwin movq %r12,-16(%rbx) 994bc3d5698SJohn Baldwin 995bc3d5698SJohn Baldwin jmp .Lmulx4x_1st 996bc3d5698SJohn Baldwin 997bc3d5698SJohn Baldwin.align 32 998bc3d5698SJohn Baldwin.Lmulx4x_1st: 999bc3d5698SJohn Baldwin adcxq %rbp,%r15 1000bc3d5698SJohn Baldwin mulxq 0(%rsi),%r10,%rax 1001bc3d5698SJohn Baldwin adcxq %r14,%r10 1002bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r14 1003bc3d5698SJohn Baldwin adcxq %rax,%r11 1004bc3d5698SJohn Baldwin mulxq 16(%rsi),%r12,%rax 1005bc3d5698SJohn Baldwin adcxq %r14,%r12 1006bc3d5698SJohn Baldwin mulxq 24(%rsi),%r13,%r14 1007bc3d5698SJohn Baldwin.byte 0x67,0x67 1008bc3d5698SJohn Baldwin movq %r8,%rdx 1009bc3d5698SJohn Baldwin adcxq %rax,%r13 1010bc3d5698SJohn Baldwin adcxq %rbp,%r14 1011bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 1012bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 1013bc3d5698SJohn Baldwin 1014bc3d5698SJohn Baldwin adoxq %r15,%r10 1015bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r15 1016bc3d5698SJohn Baldwin adcxq %rax,%r10 1017bc3d5698SJohn Baldwin adoxq %r15,%r11 1018bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r15 1019bc3d5698SJohn Baldwin adcxq %rax,%r11 1020bc3d5698SJohn Baldwin adoxq %r15,%r12 1021bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r15 1022bc3d5698SJohn Baldwin movq %r10,-40(%rbx) 1023bc3d5698SJohn Baldwin adcxq %rax,%r12 1024bc3d5698SJohn Baldwin movq %r11,-32(%rbx) 1025bc3d5698SJohn Baldwin adoxq %r15,%r13 1026bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 1027bc3d5698SJohn Baldwin movq %r9,%rdx 1028bc3d5698SJohn Baldwin movq %r12,-24(%rbx) 1029bc3d5698SJohn Baldwin adcxq %rax,%r13 1030bc3d5698SJohn Baldwin adoxq %rbp,%r15 1031bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1032bc3d5698SJohn Baldwin movq %r13,-16(%rbx) 1033bc3d5698SJohn Baldwin 1034bc3d5698SJohn Baldwin decq %rdi 1035bc3d5698SJohn Baldwin jnz .Lmulx4x_1st 1036bc3d5698SJohn Baldwin 1037bc3d5698SJohn Baldwin movq 0(%rsp),%rax 1038bc3d5698SJohn Baldwin movq 8(%rsp),%rdi 1039bc3d5698SJohn Baldwin adcq %rbp,%r15 1040bc3d5698SJohn Baldwin addq %r15,%r14 1041bc3d5698SJohn Baldwin sbbq %r15,%r15 1042bc3d5698SJohn Baldwin movq %r14,-8(%rbx) 1043bc3d5698SJohn Baldwin jmp .Lmulx4x_outer 1044bc3d5698SJohn Baldwin 1045bc3d5698SJohn Baldwin.align 32 1046bc3d5698SJohn Baldwin.Lmulx4x_outer: 1047bc3d5698SJohn Baldwin movq (%rdi),%rdx 1048bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1049bc3d5698SJohn Baldwin subq %rax,%rsi 1050bc3d5698SJohn Baldwin movq %r15,(%rbx) 1051bc3d5698SJohn Baldwin leaq 64+32(%rsp),%rbx 1052bc3d5698SJohn Baldwin subq %rax,%rcx 1053bc3d5698SJohn Baldwin 1054bc3d5698SJohn Baldwin mulxq 0(%rsi),%r8,%r11 1055bc3d5698SJohn Baldwin xorl %ebp,%ebp 1056bc3d5698SJohn Baldwin movq %rdx,%r9 1057bc3d5698SJohn Baldwin mulxq 8(%rsi),%r14,%r12 1058bc3d5698SJohn Baldwin adoxq -32(%rbx),%r8 1059bc3d5698SJohn Baldwin adcxq %r14,%r11 1060bc3d5698SJohn Baldwin mulxq 16(%rsi),%r15,%r13 1061bc3d5698SJohn Baldwin adoxq -24(%rbx),%r11 1062bc3d5698SJohn Baldwin adcxq %r15,%r12 1063bc3d5698SJohn Baldwin adoxq -16(%rbx),%r12 1064bc3d5698SJohn Baldwin adcxq %rbp,%r13 1065bc3d5698SJohn Baldwin adoxq %rbp,%r13 1066bc3d5698SJohn Baldwin 1067bc3d5698SJohn Baldwin movq %rdi,8(%rsp) 1068bc3d5698SJohn Baldwin movq %r8,%r15 1069bc3d5698SJohn Baldwin imulq 24(%rsp),%r8 1070bc3d5698SJohn Baldwin xorl %ebp,%ebp 1071bc3d5698SJohn Baldwin 1072bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r14 1073bc3d5698SJohn Baldwin movq %r8,%rdx 1074bc3d5698SJohn Baldwin adcxq %rax,%r13 1075bc3d5698SJohn Baldwin adoxq -8(%rbx),%r13 1076bc3d5698SJohn Baldwin adcxq %rbp,%r14 1077bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 1078bc3d5698SJohn Baldwin adoxq %rbp,%r14 1079bc3d5698SJohn Baldwin 1080bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r10 1081bc3d5698SJohn Baldwin adcxq %rax,%r15 1082bc3d5698SJohn Baldwin adoxq %r11,%r10 1083bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r11 1084bc3d5698SJohn Baldwin adcxq %rax,%r10 1085bc3d5698SJohn Baldwin adoxq %r12,%r11 1086bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r12 1087bc3d5698SJohn Baldwin movq %r10,-32(%rbx) 1088bc3d5698SJohn Baldwin adcxq %rax,%r11 1089bc3d5698SJohn Baldwin adoxq %r13,%r12 1090bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 1091bc3d5698SJohn Baldwin movq %r9,%rdx 1092bc3d5698SJohn Baldwin movq %r11,-24(%rbx) 1093bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1094bc3d5698SJohn Baldwin adcxq %rax,%r12 1095bc3d5698SJohn Baldwin adoxq %rbp,%r15 1096bc3d5698SJohn Baldwin movq 48(%rsp),%rdi 1097bc3d5698SJohn Baldwin movq %r12,-16(%rbx) 1098bc3d5698SJohn Baldwin 1099bc3d5698SJohn Baldwin jmp .Lmulx4x_inner 1100bc3d5698SJohn Baldwin 1101bc3d5698SJohn Baldwin.align 32 1102bc3d5698SJohn Baldwin.Lmulx4x_inner: 1103bc3d5698SJohn Baldwin mulxq 0(%rsi),%r10,%rax 1104bc3d5698SJohn Baldwin adcxq %rbp,%r15 1105bc3d5698SJohn Baldwin adoxq %r14,%r10 1106bc3d5698SJohn Baldwin mulxq 8(%rsi),%r11,%r14 1107bc3d5698SJohn Baldwin adcxq 0(%rbx),%r10 1108bc3d5698SJohn Baldwin adoxq %rax,%r11 1109bc3d5698SJohn Baldwin mulxq 16(%rsi),%r12,%rax 1110bc3d5698SJohn Baldwin adcxq 8(%rbx),%r11 1111bc3d5698SJohn Baldwin adoxq %r14,%r12 1112bc3d5698SJohn Baldwin mulxq 24(%rsi),%r13,%r14 1113bc3d5698SJohn Baldwin movq %r8,%rdx 1114bc3d5698SJohn Baldwin adcxq 16(%rbx),%r12 1115bc3d5698SJohn Baldwin adoxq %rax,%r13 1116bc3d5698SJohn Baldwin adcxq 24(%rbx),%r13 1117bc3d5698SJohn Baldwin adoxq %rbp,%r14 1118bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 1119bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 1120bc3d5698SJohn Baldwin adcxq %rbp,%r14 1121bc3d5698SJohn Baldwin 1122bc3d5698SJohn Baldwin adoxq %r15,%r10 1123bc3d5698SJohn Baldwin mulxq 0(%rcx),%rax,%r15 1124bc3d5698SJohn Baldwin adcxq %rax,%r10 1125bc3d5698SJohn Baldwin adoxq %r15,%r11 1126bc3d5698SJohn Baldwin mulxq 8(%rcx),%rax,%r15 1127bc3d5698SJohn Baldwin adcxq %rax,%r11 1128bc3d5698SJohn Baldwin adoxq %r15,%r12 1129bc3d5698SJohn Baldwin mulxq 16(%rcx),%rax,%r15 1130bc3d5698SJohn Baldwin movq %r10,-40(%rbx) 1131bc3d5698SJohn Baldwin adcxq %rax,%r12 1132bc3d5698SJohn Baldwin adoxq %r15,%r13 1133bc3d5698SJohn Baldwin mulxq 24(%rcx),%rax,%r15 1134bc3d5698SJohn Baldwin movq %r9,%rdx 1135bc3d5698SJohn Baldwin movq %r11,-32(%rbx) 1136bc3d5698SJohn Baldwin movq %r12,-24(%rbx) 1137bc3d5698SJohn Baldwin adcxq %rax,%r13 1138bc3d5698SJohn Baldwin adoxq %rbp,%r15 1139bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1140bc3d5698SJohn Baldwin movq %r13,-16(%rbx) 1141bc3d5698SJohn Baldwin 1142bc3d5698SJohn Baldwin decq %rdi 1143bc3d5698SJohn Baldwin jnz .Lmulx4x_inner 1144bc3d5698SJohn Baldwin 1145bc3d5698SJohn Baldwin movq 0(%rsp),%rax 1146bc3d5698SJohn Baldwin movq 8(%rsp),%rdi 1147bc3d5698SJohn Baldwin adcq %rbp,%r15 1148bc3d5698SJohn Baldwin subq 0(%rbx),%rbp 1149bc3d5698SJohn Baldwin adcq %r15,%r14 1150bc3d5698SJohn Baldwin sbbq %r15,%r15 1151bc3d5698SJohn Baldwin movq %r14,-8(%rbx) 1152bc3d5698SJohn Baldwin 1153bc3d5698SJohn Baldwin cmpq 16(%rsp),%rdi 1154bc3d5698SJohn Baldwin jne .Lmulx4x_outer 1155bc3d5698SJohn Baldwin 1156bc3d5698SJohn Baldwin leaq 64(%rsp),%rbx 1157bc3d5698SJohn Baldwin subq %rax,%rcx 1158bc3d5698SJohn Baldwin negq %r15 1159bc3d5698SJohn Baldwin movq %rax,%rdx 1160bc3d5698SJohn Baldwin shrq $3+2,%rax 1161bc3d5698SJohn Baldwin movq 32(%rsp),%rdi 1162bc3d5698SJohn Baldwin jmp .Lmulx4x_sub 1163bc3d5698SJohn Baldwin 1164bc3d5698SJohn Baldwin.align 32 1165bc3d5698SJohn Baldwin.Lmulx4x_sub: 1166bc3d5698SJohn Baldwin movq 0(%rbx),%r11 1167bc3d5698SJohn Baldwin movq 8(%rbx),%r12 1168bc3d5698SJohn Baldwin movq 16(%rbx),%r13 1169bc3d5698SJohn Baldwin movq 24(%rbx),%r14 1170bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 1171bc3d5698SJohn Baldwin sbbq 0(%rcx),%r11 1172bc3d5698SJohn Baldwin sbbq 8(%rcx),%r12 1173bc3d5698SJohn Baldwin sbbq 16(%rcx),%r13 1174bc3d5698SJohn Baldwin sbbq 24(%rcx),%r14 1175bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1176bc3d5698SJohn Baldwin movq %r11,0(%rdi) 1177bc3d5698SJohn Baldwin movq %r12,8(%rdi) 1178bc3d5698SJohn Baldwin movq %r13,16(%rdi) 1179bc3d5698SJohn Baldwin movq %r14,24(%rdi) 1180bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 1181bc3d5698SJohn Baldwin decq %rax 1182bc3d5698SJohn Baldwin jnz .Lmulx4x_sub 1183bc3d5698SJohn Baldwin 1184bc3d5698SJohn Baldwin sbbq $0,%r15 1185bc3d5698SJohn Baldwin leaq 64(%rsp),%rbx 1186bc3d5698SJohn Baldwin subq %rdx,%rdi 1187bc3d5698SJohn Baldwin 1188bc3d5698SJohn Baldwin.byte 102,73,15,110,207 1189bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1190bc3d5698SJohn Baldwin pshufd $0,%xmm1,%xmm1 1191bc3d5698SJohn Baldwin movq 40(%rsp),%rsi 1192bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 1193bc3d5698SJohn Baldwin jmp .Lmulx4x_cond_copy 1194bc3d5698SJohn Baldwin 1195bc3d5698SJohn Baldwin.align 32 1196bc3d5698SJohn Baldwin.Lmulx4x_cond_copy: 1197bc3d5698SJohn Baldwin movdqa 0(%rbx),%xmm2 1198bc3d5698SJohn Baldwin movdqa 16(%rbx),%xmm3 1199bc3d5698SJohn Baldwin leaq 32(%rbx),%rbx 1200bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm4 1201bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm5 1202bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 1203bc3d5698SJohn Baldwin movdqa %xmm0,-32(%rbx) 1204bc3d5698SJohn Baldwin movdqa %xmm0,-16(%rbx) 1205bc3d5698SJohn Baldwin pcmpeqd %xmm1,%xmm0 1206bc3d5698SJohn Baldwin pand %xmm1,%xmm2 1207bc3d5698SJohn Baldwin pand %xmm1,%xmm3 1208bc3d5698SJohn Baldwin pand %xmm0,%xmm4 1209bc3d5698SJohn Baldwin pand %xmm0,%xmm5 1210bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1211bc3d5698SJohn Baldwin por %xmm2,%xmm4 1212bc3d5698SJohn Baldwin por %xmm3,%xmm5 1213bc3d5698SJohn Baldwin movdqu %xmm4,-32(%rdi) 1214bc3d5698SJohn Baldwin movdqu %xmm5,-16(%rdi) 1215bc3d5698SJohn Baldwin subq $32,%rdx 1216bc3d5698SJohn Baldwin jnz .Lmulx4x_cond_copy 1217bc3d5698SJohn Baldwin 1218bc3d5698SJohn Baldwin movq %rdx,(%rbx) 1219bc3d5698SJohn Baldwin 1220bc3d5698SJohn Baldwin movq $1,%rax 1221bc3d5698SJohn Baldwin movq -48(%rsi),%r15 1222bc3d5698SJohn Baldwin.cfi_restore %r15 1223bc3d5698SJohn Baldwin movq -40(%rsi),%r14 1224bc3d5698SJohn Baldwin.cfi_restore %r14 1225bc3d5698SJohn Baldwin movq -32(%rsi),%r13 1226bc3d5698SJohn Baldwin.cfi_restore %r13 1227bc3d5698SJohn Baldwin movq -24(%rsi),%r12 1228bc3d5698SJohn Baldwin.cfi_restore %r12 1229bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 1230bc3d5698SJohn Baldwin.cfi_restore %rbp 1231bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 1232bc3d5698SJohn Baldwin.cfi_restore %rbx 1233bc3d5698SJohn Baldwin leaq (%rsi),%rsp 1234bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1235bc3d5698SJohn Baldwin.Lmulx4x_epilogue: 1236bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1237bc3d5698SJohn Baldwin.cfi_endproc 1238bc3d5698SJohn Baldwin.size bn_mulx4x_mont,.-bn_mulx4x_mont 1239bc3d5698SJohn Baldwin.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1240bc3d5698SJohn Baldwin.align 16 1241*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 1242*c0855eaaSJohn Baldwin .p2align 3 1243*c0855eaaSJohn Baldwin .long 1f - 0f 1244*c0855eaaSJohn Baldwin .long 4f - 1f 1245*c0855eaaSJohn Baldwin .long 5 1246*c0855eaaSJohn Baldwin0: 1247*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 1248*c0855eaaSJohn Baldwin # on Solaris. 1249*c0855eaaSJohn Baldwin .byte 0x47 1250*c0855eaaSJohn Baldwin .byte 0x4e 1251*c0855eaaSJohn Baldwin .byte 0x55 1252*c0855eaaSJohn Baldwin .byte 0 1253*c0855eaaSJohn Baldwin1: 1254*c0855eaaSJohn Baldwin .p2align 3 1255*c0855eaaSJohn Baldwin .long 0xc0000002 1256*c0855eaaSJohn Baldwin .long 3f - 2f 1257*c0855eaaSJohn Baldwin2: 1258*c0855eaaSJohn Baldwin .long 3 1259*c0855eaaSJohn Baldwin3: 1260*c0855eaaSJohn Baldwin .p2align 3 1261*c0855eaaSJohn Baldwin4: 1262