1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.globl rsaz_512_sqr 7bc3d5698SJohn Baldwin.type rsaz_512_sqr,@function 8bc3d5698SJohn Baldwin.align 32 9bc3d5698SJohn Baldwinrsaz_512_sqr: 10bc3d5698SJohn Baldwin.cfi_startproc 11bc3d5698SJohn Baldwin pushq %rbx 12bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 13bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 14bc3d5698SJohn Baldwin pushq %rbp 15bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 16bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 17bc3d5698SJohn Baldwin pushq %r12 18bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 19bc3d5698SJohn Baldwin.cfi_offset %r12,-32 20bc3d5698SJohn Baldwin pushq %r13 21bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 22bc3d5698SJohn Baldwin.cfi_offset %r13,-40 23bc3d5698SJohn Baldwin pushq %r14 24bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 25bc3d5698SJohn Baldwin.cfi_offset %r14,-48 26bc3d5698SJohn Baldwin pushq %r15 27bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 28bc3d5698SJohn Baldwin.cfi_offset %r15,-56 29bc3d5698SJohn Baldwin 30bc3d5698SJohn Baldwin subq $128+24,%rsp 31bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 32bc3d5698SJohn Baldwin.Lsqr_body: 33bc3d5698SJohn Baldwin.byte 102,72,15,110,202 34bc3d5698SJohn Baldwin movq (%rsi),%rdx 35bc3d5698SJohn Baldwin movq 8(%rsi),%rax 36bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 37bc3d5698SJohn Baldwin movl $0x80100,%r11d 38bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 39bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 40bc3d5698SJohn Baldwin je .Loop_sqrx 41bc3d5698SJohn Baldwin jmp .Loop_sqr 42bc3d5698SJohn Baldwin 43bc3d5698SJohn Baldwin.align 32 44bc3d5698SJohn Baldwin.Loop_sqr: 45bc3d5698SJohn Baldwin movl %r8d,128+8(%rsp) 46bc3d5698SJohn Baldwin 47bc3d5698SJohn Baldwin movq %rdx,%rbx 48bc3d5698SJohn Baldwin movq %rax,%rbp 49bc3d5698SJohn Baldwin mulq %rdx 50bc3d5698SJohn Baldwin movq %rax,%r8 51bc3d5698SJohn Baldwin movq 16(%rsi),%rax 52bc3d5698SJohn Baldwin movq %rdx,%r9 53bc3d5698SJohn Baldwin 54bc3d5698SJohn Baldwin mulq %rbx 55bc3d5698SJohn Baldwin addq %rax,%r9 56bc3d5698SJohn Baldwin movq 24(%rsi),%rax 57bc3d5698SJohn Baldwin movq %rdx,%r10 58bc3d5698SJohn Baldwin adcq $0,%r10 59bc3d5698SJohn Baldwin 60bc3d5698SJohn Baldwin mulq %rbx 61bc3d5698SJohn Baldwin addq %rax,%r10 62bc3d5698SJohn Baldwin movq 32(%rsi),%rax 63bc3d5698SJohn Baldwin movq %rdx,%r11 64bc3d5698SJohn Baldwin adcq $0,%r11 65bc3d5698SJohn Baldwin 66bc3d5698SJohn Baldwin mulq %rbx 67bc3d5698SJohn Baldwin addq %rax,%r11 68bc3d5698SJohn Baldwin movq 40(%rsi),%rax 69bc3d5698SJohn Baldwin movq %rdx,%r12 70bc3d5698SJohn Baldwin adcq $0,%r12 71bc3d5698SJohn Baldwin 72bc3d5698SJohn Baldwin mulq %rbx 73bc3d5698SJohn Baldwin addq %rax,%r12 74bc3d5698SJohn Baldwin movq 48(%rsi),%rax 75bc3d5698SJohn Baldwin movq %rdx,%r13 76bc3d5698SJohn Baldwin adcq $0,%r13 77bc3d5698SJohn Baldwin 78bc3d5698SJohn Baldwin mulq %rbx 79bc3d5698SJohn Baldwin addq %rax,%r13 80bc3d5698SJohn Baldwin movq 56(%rsi),%rax 81bc3d5698SJohn Baldwin movq %rdx,%r14 82bc3d5698SJohn Baldwin adcq $0,%r14 83bc3d5698SJohn Baldwin 84bc3d5698SJohn Baldwin mulq %rbx 85bc3d5698SJohn Baldwin addq %rax,%r14 86bc3d5698SJohn Baldwin movq %rbx,%rax 87bc3d5698SJohn Baldwin adcq $0,%rdx 88bc3d5698SJohn Baldwin 89bc3d5698SJohn Baldwin xorq %rcx,%rcx 90bc3d5698SJohn Baldwin addq %r8,%r8 91bc3d5698SJohn Baldwin movq %rdx,%r15 92bc3d5698SJohn Baldwin adcq $0,%rcx 93bc3d5698SJohn Baldwin 94bc3d5698SJohn Baldwin mulq %rax 95bc3d5698SJohn Baldwin addq %r8,%rdx 96bc3d5698SJohn Baldwin adcq $0,%rcx 97bc3d5698SJohn Baldwin 98bc3d5698SJohn Baldwin movq %rax,(%rsp) 99bc3d5698SJohn Baldwin movq %rdx,8(%rsp) 100bc3d5698SJohn Baldwin 101bc3d5698SJohn Baldwin 102bc3d5698SJohn Baldwin movq 16(%rsi),%rax 103bc3d5698SJohn Baldwin mulq %rbp 104bc3d5698SJohn Baldwin addq %rax,%r10 105bc3d5698SJohn Baldwin movq 24(%rsi),%rax 106bc3d5698SJohn Baldwin movq %rdx,%rbx 107bc3d5698SJohn Baldwin adcq $0,%rbx 108bc3d5698SJohn Baldwin 109bc3d5698SJohn Baldwin mulq %rbp 110bc3d5698SJohn Baldwin addq %rax,%r11 111bc3d5698SJohn Baldwin movq 32(%rsi),%rax 112bc3d5698SJohn Baldwin adcq $0,%rdx 113bc3d5698SJohn Baldwin addq %rbx,%r11 114bc3d5698SJohn Baldwin movq %rdx,%rbx 115bc3d5698SJohn Baldwin adcq $0,%rbx 116bc3d5698SJohn Baldwin 117bc3d5698SJohn Baldwin mulq %rbp 118bc3d5698SJohn Baldwin addq %rax,%r12 119bc3d5698SJohn Baldwin movq 40(%rsi),%rax 120bc3d5698SJohn Baldwin adcq $0,%rdx 121bc3d5698SJohn Baldwin addq %rbx,%r12 122bc3d5698SJohn Baldwin movq %rdx,%rbx 123bc3d5698SJohn Baldwin adcq $0,%rbx 124bc3d5698SJohn Baldwin 125bc3d5698SJohn Baldwin mulq %rbp 126bc3d5698SJohn Baldwin addq %rax,%r13 127bc3d5698SJohn Baldwin movq 48(%rsi),%rax 128bc3d5698SJohn Baldwin adcq $0,%rdx 129bc3d5698SJohn Baldwin addq %rbx,%r13 130bc3d5698SJohn Baldwin movq %rdx,%rbx 131bc3d5698SJohn Baldwin adcq $0,%rbx 132bc3d5698SJohn Baldwin 133bc3d5698SJohn Baldwin mulq %rbp 134bc3d5698SJohn Baldwin addq %rax,%r14 135bc3d5698SJohn Baldwin movq 56(%rsi),%rax 136bc3d5698SJohn Baldwin adcq $0,%rdx 137bc3d5698SJohn Baldwin addq %rbx,%r14 138bc3d5698SJohn Baldwin movq %rdx,%rbx 139bc3d5698SJohn Baldwin adcq $0,%rbx 140bc3d5698SJohn Baldwin 141bc3d5698SJohn Baldwin mulq %rbp 142bc3d5698SJohn Baldwin addq %rax,%r15 143bc3d5698SJohn Baldwin movq %rbp,%rax 144bc3d5698SJohn Baldwin adcq $0,%rdx 145bc3d5698SJohn Baldwin addq %rbx,%r15 146bc3d5698SJohn Baldwin adcq $0,%rdx 147bc3d5698SJohn Baldwin 148bc3d5698SJohn Baldwin xorq %rbx,%rbx 149bc3d5698SJohn Baldwin addq %r9,%r9 150bc3d5698SJohn Baldwin movq %rdx,%r8 151bc3d5698SJohn Baldwin adcq %r10,%r10 152bc3d5698SJohn Baldwin adcq $0,%rbx 153bc3d5698SJohn Baldwin 154bc3d5698SJohn Baldwin mulq %rax 155bc3d5698SJohn Baldwin 156bc3d5698SJohn Baldwin addq %rcx,%rax 157bc3d5698SJohn Baldwin movq 16(%rsi),%rbp 158bc3d5698SJohn Baldwin addq %rax,%r9 159bc3d5698SJohn Baldwin movq 24(%rsi),%rax 160bc3d5698SJohn Baldwin adcq %rdx,%r10 161bc3d5698SJohn Baldwin adcq $0,%rbx 162bc3d5698SJohn Baldwin 163bc3d5698SJohn Baldwin movq %r9,16(%rsp) 164bc3d5698SJohn Baldwin movq %r10,24(%rsp) 165bc3d5698SJohn Baldwin 166bc3d5698SJohn Baldwin 167bc3d5698SJohn Baldwin mulq %rbp 168bc3d5698SJohn Baldwin addq %rax,%r12 169bc3d5698SJohn Baldwin movq 32(%rsi),%rax 170bc3d5698SJohn Baldwin movq %rdx,%rcx 171bc3d5698SJohn Baldwin adcq $0,%rcx 172bc3d5698SJohn Baldwin 173bc3d5698SJohn Baldwin mulq %rbp 174bc3d5698SJohn Baldwin addq %rax,%r13 175bc3d5698SJohn Baldwin movq 40(%rsi),%rax 176bc3d5698SJohn Baldwin adcq $0,%rdx 177bc3d5698SJohn Baldwin addq %rcx,%r13 178bc3d5698SJohn Baldwin movq %rdx,%rcx 179bc3d5698SJohn Baldwin adcq $0,%rcx 180bc3d5698SJohn Baldwin 181bc3d5698SJohn Baldwin mulq %rbp 182bc3d5698SJohn Baldwin addq %rax,%r14 183bc3d5698SJohn Baldwin movq 48(%rsi),%rax 184bc3d5698SJohn Baldwin adcq $0,%rdx 185bc3d5698SJohn Baldwin addq %rcx,%r14 186bc3d5698SJohn Baldwin movq %rdx,%rcx 187bc3d5698SJohn Baldwin adcq $0,%rcx 188bc3d5698SJohn Baldwin 189bc3d5698SJohn Baldwin mulq %rbp 190bc3d5698SJohn Baldwin addq %rax,%r15 191bc3d5698SJohn Baldwin movq 56(%rsi),%rax 192bc3d5698SJohn Baldwin adcq $0,%rdx 193bc3d5698SJohn Baldwin addq %rcx,%r15 194bc3d5698SJohn Baldwin movq %rdx,%rcx 195bc3d5698SJohn Baldwin adcq $0,%rcx 196bc3d5698SJohn Baldwin 197bc3d5698SJohn Baldwin mulq %rbp 198bc3d5698SJohn Baldwin addq %rax,%r8 199bc3d5698SJohn Baldwin movq %rbp,%rax 200bc3d5698SJohn Baldwin adcq $0,%rdx 201bc3d5698SJohn Baldwin addq %rcx,%r8 202bc3d5698SJohn Baldwin adcq $0,%rdx 203bc3d5698SJohn Baldwin 204bc3d5698SJohn Baldwin xorq %rcx,%rcx 205bc3d5698SJohn Baldwin addq %r11,%r11 206bc3d5698SJohn Baldwin movq %rdx,%r9 207bc3d5698SJohn Baldwin adcq %r12,%r12 208bc3d5698SJohn Baldwin adcq $0,%rcx 209bc3d5698SJohn Baldwin 210bc3d5698SJohn Baldwin mulq %rax 211bc3d5698SJohn Baldwin 212bc3d5698SJohn Baldwin addq %rbx,%rax 213bc3d5698SJohn Baldwin movq 24(%rsi),%r10 214bc3d5698SJohn Baldwin addq %rax,%r11 215bc3d5698SJohn Baldwin movq 32(%rsi),%rax 216bc3d5698SJohn Baldwin adcq %rdx,%r12 217bc3d5698SJohn Baldwin adcq $0,%rcx 218bc3d5698SJohn Baldwin 219bc3d5698SJohn Baldwin movq %r11,32(%rsp) 220bc3d5698SJohn Baldwin movq %r12,40(%rsp) 221bc3d5698SJohn Baldwin 222bc3d5698SJohn Baldwin 223bc3d5698SJohn Baldwin movq %rax,%r11 224bc3d5698SJohn Baldwin mulq %r10 225bc3d5698SJohn Baldwin addq %rax,%r14 226bc3d5698SJohn Baldwin movq 40(%rsi),%rax 227bc3d5698SJohn Baldwin movq %rdx,%rbx 228bc3d5698SJohn Baldwin adcq $0,%rbx 229bc3d5698SJohn Baldwin 230bc3d5698SJohn Baldwin movq %rax,%r12 231bc3d5698SJohn Baldwin mulq %r10 232bc3d5698SJohn Baldwin addq %rax,%r15 233bc3d5698SJohn Baldwin movq 48(%rsi),%rax 234bc3d5698SJohn Baldwin adcq $0,%rdx 235bc3d5698SJohn Baldwin addq %rbx,%r15 236bc3d5698SJohn Baldwin movq %rdx,%rbx 237bc3d5698SJohn Baldwin adcq $0,%rbx 238bc3d5698SJohn Baldwin 239bc3d5698SJohn Baldwin movq %rax,%rbp 240bc3d5698SJohn Baldwin mulq %r10 241bc3d5698SJohn Baldwin addq %rax,%r8 242bc3d5698SJohn Baldwin movq 56(%rsi),%rax 243bc3d5698SJohn Baldwin adcq $0,%rdx 244bc3d5698SJohn Baldwin addq %rbx,%r8 245bc3d5698SJohn Baldwin movq %rdx,%rbx 246bc3d5698SJohn Baldwin adcq $0,%rbx 247bc3d5698SJohn Baldwin 248bc3d5698SJohn Baldwin mulq %r10 249bc3d5698SJohn Baldwin addq %rax,%r9 250bc3d5698SJohn Baldwin movq %r10,%rax 251bc3d5698SJohn Baldwin adcq $0,%rdx 252bc3d5698SJohn Baldwin addq %rbx,%r9 253bc3d5698SJohn Baldwin adcq $0,%rdx 254bc3d5698SJohn Baldwin 255bc3d5698SJohn Baldwin xorq %rbx,%rbx 256bc3d5698SJohn Baldwin addq %r13,%r13 257bc3d5698SJohn Baldwin movq %rdx,%r10 258bc3d5698SJohn Baldwin adcq %r14,%r14 259bc3d5698SJohn Baldwin adcq $0,%rbx 260bc3d5698SJohn Baldwin 261bc3d5698SJohn Baldwin mulq %rax 262bc3d5698SJohn Baldwin 263bc3d5698SJohn Baldwin addq %rcx,%rax 264bc3d5698SJohn Baldwin addq %rax,%r13 265bc3d5698SJohn Baldwin movq %r12,%rax 266bc3d5698SJohn Baldwin adcq %rdx,%r14 267bc3d5698SJohn Baldwin adcq $0,%rbx 268bc3d5698SJohn Baldwin 269bc3d5698SJohn Baldwin movq %r13,48(%rsp) 270bc3d5698SJohn Baldwin movq %r14,56(%rsp) 271bc3d5698SJohn Baldwin 272bc3d5698SJohn Baldwin 273bc3d5698SJohn Baldwin mulq %r11 274bc3d5698SJohn Baldwin addq %rax,%r8 275bc3d5698SJohn Baldwin movq %rbp,%rax 276bc3d5698SJohn Baldwin movq %rdx,%rcx 277bc3d5698SJohn Baldwin adcq $0,%rcx 278bc3d5698SJohn Baldwin 279bc3d5698SJohn Baldwin mulq %r11 280bc3d5698SJohn Baldwin addq %rax,%r9 281bc3d5698SJohn Baldwin movq 56(%rsi),%rax 282bc3d5698SJohn Baldwin adcq $0,%rdx 283bc3d5698SJohn Baldwin addq %rcx,%r9 284bc3d5698SJohn Baldwin movq %rdx,%rcx 285bc3d5698SJohn Baldwin adcq $0,%rcx 286bc3d5698SJohn Baldwin 287bc3d5698SJohn Baldwin movq %rax,%r14 288bc3d5698SJohn Baldwin mulq %r11 289bc3d5698SJohn Baldwin addq %rax,%r10 290bc3d5698SJohn Baldwin movq %r11,%rax 291bc3d5698SJohn Baldwin adcq $0,%rdx 292bc3d5698SJohn Baldwin addq %rcx,%r10 293bc3d5698SJohn Baldwin adcq $0,%rdx 294bc3d5698SJohn Baldwin 295bc3d5698SJohn Baldwin xorq %rcx,%rcx 296bc3d5698SJohn Baldwin addq %r15,%r15 297bc3d5698SJohn Baldwin movq %rdx,%r11 298bc3d5698SJohn Baldwin adcq %r8,%r8 299bc3d5698SJohn Baldwin adcq $0,%rcx 300bc3d5698SJohn Baldwin 301bc3d5698SJohn Baldwin mulq %rax 302bc3d5698SJohn Baldwin 303bc3d5698SJohn Baldwin addq %rbx,%rax 304bc3d5698SJohn Baldwin addq %rax,%r15 305bc3d5698SJohn Baldwin movq %rbp,%rax 306bc3d5698SJohn Baldwin adcq %rdx,%r8 307bc3d5698SJohn Baldwin adcq $0,%rcx 308bc3d5698SJohn Baldwin 309bc3d5698SJohn Baldwin movq %r15,64(%rsp) 310bc3d5698SJohn Baldwin movq %r8,72(%rsp) 311bc3d5698SJohn Baldwin 312bc3d5698SJohn Baldwin 313bc3d5698SJohn Baldwin mulq %r12 314bc3d5698SJohn Baldwin addq %rax,%r10 315bc3d5698SJohn Baldwin movq %r14,%rax 316bc3d5698SJohn Baldwin movq %rdx,%rbx 317bc3d5698SJohn Baldwin adcq $0,%rbx 318bc3d5698SJohn Baldwin 319bc3d5698SJohn Baldwin mulq %r12 320bc3d5698SJohn Baldwin addq %rax,%r11 321bc3d5698SJohn Baldwin movq %r12,%rax 322bc3d5698SJohn Baldwin adcq $0,%rdx 323bc3d5698SJohn Baldwin addq %rbx,%r11 324bc3d5698SJohn Baldwin adcq $0,%rdx 325bc3d5698SJohn Baldwin 326bc3d5698SJohn Baldwin xorq %rbx,%rbx 327bc3d5698SJohn Baldwin addq %r9,%r9 328bc3d5698SJohn Baldwin movq %rdx,%r12 329bc3d5698SJohn Baldwin adcq %r10,%r10 330bc3d5698SJohn Baldwin adcq $0,%rbx 331bc3d5698SJohn Baldwin 332bc3d5698SJohn Baldwin mulq %rax 333bc3d5698SJohn Baldwin 334bc3d5698SJohn Baldwin addq %rcx,%rax 335bc3d5698SJohn Baldwin addq %rax,%r9 336bc3d5698SJohn Baldwin movq %r14,%rax 337bc3d5698SJohn Baldwin adcq %rdx,%r10 338bc3d5698SJohn Baldwin adcq $0,%rbx 339bc3d5698SJohn Baldwin 340bc3d5698SJohn Baldwin movq %r9,80(%rsp) 341bc3d5698SJohn Baldwin movq %r10,88(%rsp) 342bc3d5698SJohn Baldwin 343bc3d5698SJohn Baldwin 344bc3d5698SJohn Baldwin mulq %rbp 345bc3d5698SJohn Baldwin addq %rax,%r12 346bc3d5698SJohn Baldwin movq %rbp,%rax 347bc3d5698SJohn Baldwin adcq $0,%rdx 348bc3d5698SJohn Baldwin 349bc3d5698SJohn Baldwin xorq %rcx,%rcx 350bc3d5698SJohn Baldwin addq %r11,%r11 351bc3d5698SJohn Baldwin movq %rdx,%r13 352bc3d5698SJohn Baldwin adcq %r12,%r12 353bc3d5698SJohn Baldwin adcq $0,%rcx 354bc3d5698SJohn Baldwin 355bc3d5698SJohn Baldwin mulq %rax 356bc3d5698SJohn Baldwin 357bc3d5698SJohn Baldwin addq %rbx,%rax 358bc3d5698SJohn Baldwin addq %rax,%r11 359bc3d5698SJohn Baldwin movq %r14,%rax 360bc3d5698SJohn Baldwin adcq %rdx,%r12 361bc3d5698SJohn Baldwin adcq $0,%rcx 362bc3d5698SJohn Baldwin 363bc3d5698SJohn Baldwin movq %r11,96(%rsp) 364bc3d5698SJohn Baldwin movq %r12,104(%rsp) 365bc3d5698SJohn Baldwin 366bc3d5698SJohn Baldwin 367bc3d5698SJohn Baldwin xorq %rbx,%rbx 368bc3d5698SJohn Baldwin addq %r13,%r13 369bc3d5698SJohn Baldwin adcq $0,%rbx 370bc3d5698SJohn Baldwin 371bc3d5698SJohn Baldwin mulq %rax 372bc3d5698SJohn Baldwin 373bc3d5698SJohn Baldwin addq %rcx,%rax 374bc3d5698SJohn Baldwin addq %r13,%rax 375bc3d5698SJohn Baldwin adcq %rbx,%rdx 376bc3d5698SJohn Baldwin 377bc3d5698SJohn Baldwin movq (%rsp),%r8 378bc3d5698SJohn Baldwin movq 8(%rsp),%r9 379bc3d5698SJohn Baldwin movq 16(%rsp),%r10 380bc3d5698SJohn Baldwin movq 24(%rsp),%r11 381bc3d5698SJohn Baldwin movq 32(%rsp),%r12 382bc3d5698SJohn Baldwin movq 40(%rsp),%r13 383bc3d5698SJohn Baldwin movq 48(%rsp),%r14 384bc3d5698SJohn Baldwin movq 56(%rsp),%r15 385bc3d5698SJohn Baldwin.byte 102,72,15,126,205 386bc3d5698SJohn Baldwin 387bc3d5698SJohn Baldwin movq %rax,112(%rsp) 388bc3d5698SJohn Baldwin movq %rdx,120(%rsp) 389bc3d5698SJohn Baldwin 390bc3d5698SJohn Baldwin call __rsaz_512_reduce 391bc3d5698SJohn Baldwin 392bc3d5698SJohn Baldwin addq 64(%rsp),%r8 393bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 394bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 395bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 396bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 397bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 398bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 399bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 400bc3d5698SJohn Baldwin sbbq %rcx,%rcx 401bc3d5698SJohn Baldwin 402bc3d5698SJohn Baldwin call __rsaz_512_subtract 403bc3d5698SJohn Baldwin 404bc3d5698SJohn Baldwin movq %r8,%rdx 405bc3d5698SJohn Baldwin movq %r9,%rax 406bc3d5698SJohn Baldwin movl 128+8(%rsp),%r8d 407bc3d5698SJohn Baldwin movq %rdi,%rsi 408bc3d5698SJohn Baldwin 409bc3d5698SJohn Baldwin decl %r8d 410bc3d5698SJohn Baldwin jnz .Loop_sqr 411bc3d5698SJohn Baldwin jmp .Lsqr_tail 412bc3d5698SJohn Baldwin 413bc3d5698SJohn Baldwin.align 32 414bc3d5698SJohn Baldwin.Loop_sqrx: 415bc3d5698SJohn Baldwin movl %r8d,128+8(%rsp) 416bc3d5698SJohn Baldwin.byte 102,72,15,110,199 417bc3d5698SJohn Baldwin 418bc3d5698SJohn Baldwin mulxq %rax,%r8,%r9 419bc3d5698SJohn Baldwin movq %rax,%rbx 420bc3d5698SJohn Baldwin 421bc3d5698SJohn Baldwin mulxq 16(%rsi),%rcx,%r10 422bc3d5698SJohn Baldwin xorq %rbp,%rbp 423bc3d5698SJohn Baldwin 424bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 425bc3d5698SJohn Baldwin adcxq %rcx,%r9 426bc3d5698SJohn Baldwin 427bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 428bc3d5698SJohn Baldwin adcxq %rax,%r10 429bc3d5698SJohn Baldwin 430bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 431bc3d5698SJohn Baldwin adcxq %rcx,%r11 432bc3d5698SJohn Baldwin 433bc3d5698SJohn Baldwin mulxq 48(%rsi),%rcx,%r14 434bc3d5698SJohn Baldwin adcxq %rax,%r12 435bc3d5698SJohn Baldwin adcxq %rcx,%r13 436bc3d5698SJohn Baldwin 437bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 438bc3d5698SJohn Baldwin adcxq %rax,%r14 439bc3d5698SJohn Baldwin adcxq %rbp,%r15 440bc3d5698SJohn Baldwin 441bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 442bc3d5698SJohn Baldwin movq %rbx,%rdx 443bc3d5698SJohn Baldwin xorq %rcx,%rcx 444bc3d5698SJohn Baldwin adoxq %r8,%r8 445bc3d5698SJohn Baldwin adcxq %rdi,%r8 446bc3d5698SJohn Baldwin adoxq %rbp,%rcx 447bc3d5698SJohn Baldwin adcxq %rbp,%rcx 448bc3d5698SJohn Baldwin 449bc3d5698SJohn Baldwin movq %rax,(%rsp) 450bc3d5698SJohn Baldwin movq %r8,8(%rsp) 451bc3d5698SJohn Baldwin 452bc3d5698SJohn Baldwin 453bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 454bc3d5698SJohn Baldwin adoxq %rax,%r10 455bc3d5698SJohn Baldwin adcxq %rbx,%r11 456bc3d5698SJohn Baldwin 457bc3d5698SJohn Baldwin mulxq 24(%rsi),%rdi,%r8 458bc3d5698SJohn Baldwin adoxq %rdi,%r11 459bc3d5698SJohn Baldwin.byte 0x66 460bc3d5698SJohn Baldwin adcxq %r8,%r12 461bc3d5698SJohn Baldwin 462bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rbx 463bc3d5698SJohn Baldwin adoxq %rax,%r12 464bc3d5698SJohn Baldwin adcxq %rbx,%r13 465bc3d5698SJohn Baldwin 466bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r8 467bc3d5698SJohn Baldwin adoxq %rdi,%r13 468bc3d5698SJohn Baldwin adcxq %r8,%r14 469bc3d5698SJohn Baldwin 470bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 471bc3d5698SJohn Baldwin adoxq %rax,%r14 472bc3d5698SJohn Baldwin adcxq %rbx,%r15 473bc3d5698SJohn Baldwin 474bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 475bc3d5698SJohn Baldwin adoxq %rdi,%r15 476bc3d5698SJohn Baldwin adcxq %rbp,%r8 477bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 478bc3d5698SJohn Baldwin adoxq %rbp,%r8 479bc3d5698SJohn Baldwin.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 480bc3d5698SJohn Baldwin 481bc3d5698SJohn Baldwin xorq %rbx,%rbx 482bc3d5698SJohn Baldwin adoxq %r9,%r9 483bc3d5698SJohn Baldwin 484bc3d5698SJohn Baldwin adcxq %rcx,%rax 485bc3d5698SJohn Baldwin adoxq %r10,%r10 486bc3d5698SJohn Baldwin adcxq %rax,%r9 487bc3d5698SJohn Baldwin adoxq %rbp,%rbx 488bc3d5698SJohn Baldwin adcxq %rdi,%r10 489bc3d5698SJohn Baldwin adcxq %rbp,%rbx 490bc3d5698SJohn Baldwin 491bc3d5698SJohn Baldwin movq %r9,16(%rsp) 492bc3d5698SJohn Baldwin.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 493bc3d5698SJohn Baldwin 494bc3d5698SJohn Baldwin 495bc3d5698SJohn Baldwin mulxq 24(%rsi),%rdi,%r9 496bc3d5698SJohn Baldwin adoxq %rdi,%r12 497bc3d5698SJohn Baldwin adcxq %r9,%r13 498bc3d5698SJohn Baldwin 499bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rcx 500bc3d5698SJohn Baldwin adoxq %rax,%r13 501bc3d5698SJohn Baldwin adcxq %rcx,%r14 502bc3d5698SJohn Baldwin 503bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 504bc3d5698SJohn Baldwin adoxq %rdi,%r14 505bc3d5698SJohn Baldwin adcxq %r9,%r15 506bc3d5698SJohn Baldwin 507bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 508bc3d5698SJohn Baldwin adoxq %rax,%r15 509bc3d5698SJohn Baldwin adcxq %rcx,%r8 510bc3d5698SJohn Baldwin 511bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r9 512bc3d5698SJohn Baldwin adoxq %rdi,%r8 513bc3d5698SJohn Baldwin adcxq %rbp,%r9 514bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 515bc3d5698SJohn Baldwin adoxq %rbp,%r9 516bc3d5698SJohn Baldwin movq 24(%rsi),%rdx 517bc3d5698SJohn Baldwin 518bc3d5698SJohn Baldwin xorq %rcx,%rcx 519bc3d5698SJohn Baldwin adoxq %r11,%r11 520bc3d5698SJohn Baldwin 521bc3d5698SJohn Baldwin adcxq %rbx,%rax 522bc3d5698SJohn Baldwin adoxq %r12,%r12 523bc3d5698SJohn Baldwin adcxq %rax,%r11 524bc3d5698SJohn Baldwin adoxq %rbp,%rcx 525bc3d5698SJohn Baldwin adcxq %rdi,%r12 526bc3d5698SJohn Baldwin adcxq %rbp,%rcx 527bc3d5698SJohn Baldwin 528bc3d5698SJohn Baldwin movq %r11,32(%rsp) 529bc3d5698SJohn Baldwin movq %r12,40(%rsp) 530bc3d5698SJohn Baldwin 531bc3d5698SJohn Baldwin 532bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%rbx 533bc3d5698SJohn Baldwin adoxq %rax,%r14 534bc3d5698SJohn Baldwin adcxq %rbx,%r15 535bc3d5698SJohn Baldwin 536bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r10 537bc3d5698SJohn Baldwin adoxq %rdi,%r15 538bc3d5698SJohn Baldwin adcxq %r10,%r8 539bc3d5698SJohn Baldwin 540bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%rbx 541bc3d5698SJohn Baldwin adoxq %rax,%r8 542bc3d5698SJohn Baldwin adcxq %rbx,%r9 543bc3d5698SJohn Baldwin 544bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r10 545bc3d5698SJohn Baldwin adoxq %rdi,%r9 546bc3d5698SJohn Baldwin adcxq %rbp,%r10 547bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 548bc3d5698SJohn Baldwin adoxq %rbp,%r10 549bc3d5698SJohn Baldwin movq 32(%rsi),%rdx 550bc3d5698SJohn Baldwin 551bc3d5698SJohn Baldwin xorq %rbx,%rbx 552bc3d5698SJohn Baldwin adoxq %r13,%r13 553bc3d5698SJohn Baldwin 554bc3d5698SJohn Baldwin adcxq %rcx,%rax 555bc3d5698SJohn Baldwin adoxq %r14,%r14 556bc3d5698SJohn Baldwin adcxq %rax,%r13 557bc3d5698SJohn Baldwin adoxq %rbp,%rbx 558bc3d5698SJohn Baldwin adcxq %rdi,%r14 559bc3d5698SJohn Baldwin adcxq %rbp,%rbx 560bc3d5698SJohn Baldwin 561bc3d5698SJohn Baldwin movq %r13,48(%rsp) 562bc3d5698SJohn Baldwin movq %r14,56(%rsp) 563bc3d5698SJohn Baldwin 564bc3d5698SJohn Baldwin 565bc3d5698SJohn Baldwin mulxq 40(%rsi),%rdi,%r11 566bc3d5698SJohn Baldwin adoxq %rdi,%r8 567bc3d5698SJohn Baldwin adcxq %r11,%r9 568bc3d5698SJohn Baldwin 569bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%rcx 570bc3d5698SJohn Baldwin adoxq %rax,%r9 571bc3d5698SJohn Baldwin adcxq %rcx,%r10 572bc3d5698SJohn Baldwin 573bc3d5698SJohn Baldwin mulxq 56(%rsi),%rdi,%r11 574bc3d5698SJohn Baldwin adoxq %rdi,%r10 575bc3d5698SJohn Baldwin adcxq %rbp,%r11 576bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 577bc3d5698SJohn Baldwin movq 40(%rsi),%rdx 578bc3d5698SJohn Baldwin adoxq %rbp,%r11 579bc3d5698SJohn Baldwin 580bc3d5698SJohn Baldwin xorq %rcx,%rcx 581bc3d5698SJohn Baldwin adoxq %r15,%r15 582bc3d5698SJohn Baldwin 583bc3d5698SJohn Baldwin adcxq %rbx,%rax 584bc3d5698SJohn Baldwin adoxq %r8,%r8 585bc3d5698SJohn Baldwin adcxq %rax,%r15 586bc3d5698SJohn Baldwin adoxq %rbp,%rcx 587bc3d5698SJohn Baldwin adcxq %rdi,%r8 588bc3d5698SJohn Baldwin adcxq %rbp,%rcx 589bc3d5698SJohn Baldwin 590bc3d5698SJohn Baldwin movq %r15,64(%rsp) 591bc3d5698SJohn Baldwin movq %r8,72(%rsp) 592bc3d5698SJohn Baldwin 593bc3d5698SJohn Baldwin 594bc3d5698SJohn Baldwin.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 595bc3d5698SJohn Baldwin adoxq %rax,%r10 596bc3d5698SJohn Baldwin adcxq %rbx,%r11 597bc3d5698SJohn Baldwin 598bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 599bc3d5698SJohn Baldwin adoxq %rdi,%r11 600bc3d5698SJohn Baldwin adcxq %rbp,%r12 601bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 602bc3d5698SJohn Baldwin adoxq %rbp,%r12 603bc3d5698SJohn Baldwin movq 48(%rsi),%rdx 604bc3d5698SJohn Baldwin 605bc3d5698SJohn Baldwin xorq %rbx,%rbx 606bc3d5698SJohn Baldwin adoxq %r9,%r9 607bc3d5698SJohn Baldwin 608bc3d5698SJohn Baldwin adcxq %rcx,%rax 609bc3d5698SJohn Baldwin adoxq %r10,%r10 610bc3d5698SJohn Baldwin adcxq %rax,%r9 611bc3d5698SJohn Baldwin adcxq %rdi,%r10 612bc3d5698SJohn Baldwin adoxq %rbp,%rbx 613bc3d5698SJohn Baldwin adcxq %rbp,%rbx 614bc3d5698SJohn Baldwin 615bc3d5698SJohn Baldwin movq %r9,80(%rsp) 616bc3d5698SJohn Baldwin movq %r10,88(%rsp) 617bc3d5698SJohn Baldwin 618bc3d5698SJohn Baldwin 619bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 620bc3d5698SJohn Baldwin adoxq %rax,%r12 621bc3d5698SJohn Baldwin adoxq %rbp,%r13 622bc3d5698SJohn Baldwin 623bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdi 624bc3d5698SJohn Baldwin xorq %rcx,%rcx 625bc3d5698SJohn Baldwin movq 56(%rsi),%rdx 626bc3d5698SJohn Baldwin adoxq %r11,%r11 627bc3d5698SJohn Baldwin 628bc3d5698SJohn Baldwin adcxq %rbx,%rax 629bc3d5698SJohn Baldwin adoxq %r12,%r12 630bc3d5698SJohn Baldwin adcxq %rax,%r11 631bc3d5698SJohn Baldwin adoxq %rbp,%rcx 632bc3d5698SJohn Baldwin adcxq %rdi,%r12 633bc3d5698SJohn Baldwin adcxq %rbp,%rcx 634bc3d5698SJohn Baldwin 635bc3d5698SJohn Baldwin.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 636bc3d5698SJohn Baldwin.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 637bc3d5698SJohn Baldwin 638bc3d5698SJohn Baldwin 639bc3d5698SJohn Baldwin mulxq %rdx,%rax,%rdx 640bc3d5698SJohn Baldwin xorq %rbx,%rbx 641bc3d5698SJohn Baldwin adoxq %r13,%r13 642bc3d5698SJohn Baldwin 643bc3d5698SJohn Baldwin adcxq %rcx,%rax 644bc3d5698SJohn Baldwin adoxq %rbp,%rbx 645bc3d5698SJohn Baldwin adcxq %r13,%rax 646bc3d5698SJohn Baldwin adcxq %rdx,%rbx 647bc3d5698SJohn Baldwin 648bc3d5698SJohn Baldwin.byte 102,72,15,126,199 649bc3d5698SJohn Baldwin.byte 102,72,15,126,205 650bc3d5698SJohn Baldwin 651bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 652bc3d5698SJohn Baldwin movq (%rsp),%r8 653bc3d5698SJohn Baldwin movq 8(%rsp),%r9 654bc3d5698SJohn Baldwin movq 16(%rsp),%r10 655bc3d5698SJohn Baldwin movq 24(%rsp),%r11 656bc3d5698SJohn Baldwin movq 32(%rsp),%r12 657bc3d5698SJohn Baldwin movq 40(%rsp),%r13 658bc3d5698SJohn Baldwin movq 48(%rsp),%r14 659bc3d5698SJohn Baldwin movq 56(%rsp),%r15 660bc3d5698SJohn Baldwin 661bc3d5698SJohn Baldwin movq %rax,112(%rsp) 662bc3d5698SJohn Baldwin movq %rbx,120(%rsp) 663bc3d5698SJohn Baldwin 664bc3d5698SJohn Baldwin call __rsaz_512_reducex 665bc3d5698SJohn Baldwin 666bc3d5698SJohn Baldwin addq 64(%rsp),%r8 667bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 668bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 669bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 670bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 671bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 672bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 673bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 674bc3d5698SJohn Baldwin sbbq %rcx,%rcx 675bc3d5698SJohn Baldwin 676bc3d5698SJohn Baldwin call __rsaz_512_subtract 677bc3d5698SJohn Baldwin 678bc3d5698SJohn Baldwin movq %r8,%rdx 679bc3d5698SJohn Baldwin movq %r9,%rax 680bc3d5698SJohn Baldwin movl 128+8(%rsp),%r8d 681bc3d5698SJohn Baldwin movq %rdi,%rsi 682bc3d5698SJohn Baldwin 683bc3d5698SJohn Baldwin decl %r8d 684bc3d5698SJohn Baldwin jnz .Loop_sqrx 685bc3d5698SJohn Baldwin 686bc3d5698SJohn Baldwin.Lsqr_tail: 687bc3d5698SJohn Baldwin 688bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 689bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 690bc3d5698SJohn Baldwin movq -48(%rax),%r15 691bc3d5698SJohn Baldwin.cfi_restore %r15 692bc3d5698SJohn Baldwin movq -40(%rax),%r14 693bc3d5698SJohn Baldwin.cfi_restore %r14 694bc3d5698SJohn Baldwin movq -32(%rax),%r13 695bc3d5698SJohn Baldwin.cfi_restore %r13 696bc3d5698SJohn Baldwin movq -24(%rax),%r12 697bc3d5698SJohn Baldwin.cfi_restore %r12 698bc3d5698SJohn Baldwin movq -16(%rax),%rbp 699bc3d5698SJohn Baldwin.cfi_restore %rbp 700bc3d5698SJohn Baldwin movq -8(%rax),%rbx 701bc3d5698SJohn Baldwin.cfi_restore %rbx 702bc3d5698SJohn Baldwin leaq (%rax),%rsp 703bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 704bc3d5698SJohn Baldwin.Lsqr_epilogue: 705bc3d5698SJohn Baldwin .byte 0xf3,0xc3 706bc3d5698SJohn Baldwin.cfi_endproc 707bc3d5698SJohn Baldwin.size rsaz_512_sqr,.-rsaz_512_sqr 708bc3d5698SJohn Baldwin.globl rsaz_512_mul 709bc3d5698SJohn Baldwin.type rsaz_512_mul,@function 710bc3d5698SJohn Baldwin.align 32 711bc3d5698SJohn Baldwinrsaz_512_mul: 712bc3d5698SJohn Baldwin.cfi_startproc 713bc3d5698SJohn Baldwin pushq %rbx 714bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 715bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 716bc3d5698SJohn Baldwin pushq %rbp 717bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 718bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 719bc3d5698SJohn Baldwin pushq %r12 720bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 721bc3d5698SJohn Baldwin.cfi_offset %r12,-32 722bc3d5698SJohn Baldwin pushq %r13 723bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 724bc3d5698SJohn Baldwin.cfi_offset %r13,-40 725bc3d5698SJohn Baldwin pushq %r14 726bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 727bc3d5698SJohn Baldwin.cfi_offset %r14,-48 728bc3d5698SJohn Baldwin pushq %r15 729bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 730bc3d5698SJohn Baldwin.cfi_offset %r15,-56 731bc3d5698SJohn Baldwin 732bc3d5698SJohn Baldwin subq $128+24,%rsp 733bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 734bc3d5698SJohn Baldwin.Lmul_body: 735bc3d5698SJohn Baldwin.byte 102,72,15,110,199 736bc3d5698SJohn Baldwin.byte 102,72,15,110,201 737bc3d5698SJohn Baldwin movq %r8,128(%rsp) 738bc3d5698SJohn Baldwin movl $0x80100,%r11d 739bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 740bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 741bc3d5698SJohn Baldwin je .Lmulx 742bc3d5698SJohn Baldwin movq (%rdx),%rbx 743bc3d5698SJohn Baldwin movq %rdx,%rbp 744bc3d5698SJohn Baldwin call __rsaz_512_mul 745bc3d5698SJohn Baldwin 746bc3d5698SJohn Baldwin.byte 102,72,15,126,199 747bc3d5698SJohn Baldwin.byte 102,72,15,126,205 748bc3d5698SJohn Baldwin 749bc3d5698SJohn Baldwin movq (%rsp),%r8 750bc3d5698SJohn Baldwin movq 8(%rsp),%r9 751bc3d5698SJohn Baldwin movq 16(%rsp),%r10 752bc3d5698SJohn Baldwin movq 24(%rsp),%r11 753bc3d5698SJohn Baldwin movq 32(%rsp),%r12 754bc3d5698SJohn Baldwin movq 40(%rsp),%r13 755bc3d5698SJohn Baldwin movq 48(%rsp),%r14 756bc3d5698SJohn Baldwin movq 56(%rsp),%r15 757bc3d5698SJohn Baldwin 758bc3d5698SJohn Baldwin call __rsaz_512_reduce 759bc3d5698SJohn Baldwin jmp .Lmul_tail 760bc3d5698SJohn Baldwin 761bc3d5698SJohn Baldwin.align 32 762bc3d5698SJohn Baldwin.Lmulx: 763bc3d5698SJohn Baldwin movq %rdx,%rbp 764bc3d5698SJohn Baldwin movq (%rdx),%rdx 765bc3d5698SJohn Baldwin call __rsaz_512_mulx 766bc3d5698SJohn Baldwin 767bc3d5698SJohn Baldwin.byte 102,72,15,126,199 768bc3d5698SJohn Baldwin.byte 102,72,15,126,205 769bc3d5698SJohn Baldwin 770bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 771bc3d5698SJohn Baldwin movq (%rsp),%r8 772bc3d5698SJohn Baldwin movq 8(%rsp),%r9 773bc3d5698SJohn Baldwin movq 16(%rsp),%r10 774bc3d5698SJohn Baldwin movq 24(%rsp),%r11 775bc3d5698SJohn Baldwin movq 32(%rsp),%r12 776bc3d5698SJohn Baldwin movq 40(%rsp),%r13 777bc3d5698SJohn Baldwin movq 48(%rsp),%r14 778bc3d5698SJohn Baldwin movq 56(%rsp),%r15 779bc3d5698SJohn Baldwin 780bc3d5698SJohn Baldwin call __rsaz_512_reducex 781bc3d5698SJohn Baldwin.Lmul_tail: 782bc3d5698SJohn Baldwin addq 64(%rsp),%r8 783bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 784bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 785bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 786bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 787bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 788bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 789bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 790bc3d5698SJohn Baldwin sbbq %rcx,%rcx 791bc3d5698SJohn Baldwin 792bc3d5698SJohn Baldwin call __rsaz_512_subtract 793bc3d5698SJohn Baldwin 794bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 795bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 796bc3d5698SJohn Baldwin movq -48(%rax),%r15 797bc3d5698SJohn Baldwin.cfi_restore %r15 798bc3d5698SJohn Baldwin movq -40(%rax),%r14 799bc3d5698SJohn Baldwin.cfi_restore %r14 800bc3d5698SJohn Baldwin movq -32(%rax),%r13 801bc3d5698SJohn Baldwin.cfi_restore %r13 802bc3d5698SJohn Baldwin movq -24(%rax),%r12 803bc3d5698SJohn Baldwin.cfi_restore %r12 804bc3d5698SJohn Baldwin movq -16(%rax),%rbp 805bc3d5698SJohn Baldwin.cfi_restore %rbp 806bc3d5698SJohn Baldwin movq -8(%rax),%rbx 807bc3d5698SJohn Baldwin.cfi_restore %rbx 808bc3d5698SJohn Baldwin leaq (%rax),%rsp 809bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 810bc3d5698SJohn Baldwin.Lmul_epilogue: 811bc3d5698SJohn Baldwin .byte 0xf3,0xc3 812bc3d5698SJohn Baldwin.cfi_endproc 813bc3d5698SJohn Baldwin.size rsaz_512_mul,.-rsaz_512_mul 814bc3d5698SJohn Baldwin.globl rsaz_512_mul_gather4 815bc3d5698SJohn Baldwin.type rsaz_512_mul_gather4,@function 816bc3d5698SJohn Baldwin.align 32 817bc3d5698SJohn Baldwinrsaz_512_mul_gather4: 818bc3d5698SJohn Baldwin.cfi_startproc 819bc3d5698SJohn Baldwin pushq %rbx 820bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 821bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 822bc3d5698SJohn Baldwin pushq %rbp 823bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 824bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 825bc3d5698SJohn Baldwin pushq %r12 826bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 827bc3d5698SJohn Baldwin.cfi_offset %r12,-32 828bc3d5698SJohn Baldwin pushq %r13 829bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 830bc3d5698SJohn Baldwin.cfi_offset %r13,-40 831bc3d5698SJohn Baldwin pushq %r14 832bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 833bc3d5698SJohn Baldwin.cfi_offset %r14,-48 834bc3d5698SJohn Baldwin pushq %r15 835bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 836bc3d5698SJohn Baldwin.cfi_offset %r15,-56 837bc3d5698SJohn Baldwin 838bc3d5698SJohn Baldwin subq $152,%rsp 839bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 152 840bc3d5698SJohn Baldwin.Lmul_gather4_body: 841bc3d5698SJohn Baldwin movd %r9d,%xmm8 842bc3d5698SJohn Baldwin movdqa .Linc+16(%rip),%xmm1 843bc3d5698SJohn Baldwin movdqa .Linc(%rip),%xmm0 844bc3d5698SJohn Baldwin 845bc3d5698SJohn Baldwin pshufd $0,%xmm8,%xmm8 846bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 847bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 848bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 849bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm0 850bc3d5698SJohn Baldwin movdqa %xmm7,%xmm3 851bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 852bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm1 853bc3d5698SJohn Baldwin movdqa %xmm7,%xmm4 854bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 855bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm2 856bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 857bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 858bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm3 859bc3d5698SJohn Baldwin movdqa %xmm7,%xmm6 860bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 861bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm4 862bc3d5698SJohn Baldwin paddd %xmm5,%xmm6 863bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm5 864bc3d5698SJohn Baldwin paddd %xmm6,%xmm7 865bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm6 866bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm7 867bc3d5698SJohn Baldwin 868bc3d5698SJohn Baldwin movdqa 0(%rdx),%xmm8 869bc3d5698SJohn Baldwin movdqa 16(%rdx),%xmm9 870bc3d5698SJohn Baldwin movdqa 32(%rdx),%xmm10 871bc3d5698SJohn Baldwin movdqa 48(%rdx),%xmm11 872bc3d5698SJohn Baldwin pand %xmm0,%xmm8 873bc3d5698SJohn Baldwin movdqa 64(%rdx),%xmm12 874bc3d5698SJohn Baldwin pand %xmm1,%xmm9 875bc3d5698SJohn Baldwin movdqa 80(%rdx),%xmm13 876bc3d5698SJohn Baldwin pand %xmm2,%xmm10 877bc3d5698SJohn Baldwin movdqa 96(%rdx),%xmm14 878bc3d5698SJohn Baldwin pand %xmm3,%xmm11 879bc3d5698SJohn Baldwin movdqa 112(%rdx),%xmm15 880bc3d5698SJohn Baldwin leaq 128(%rdx),%rbp 881bc3d5698SJohn Baldwin pand %xmm4,%xmm12 882bc3d5698SJohn Baldwin pand %xmm5,%xmm13 883bc3d5698SJohn Baldwin pand %xmm6,%xmm14 884bc3d5698SJohn Baldwin pand %xmm7,%xmm15 885bc3d5698SJohn Baldwin por %xmm10,%xmm8 886bc3d5698SJohn Baldwin por %xmm11,%xmm9 887bc3d5698SJohn Baldwin por %xmm12,%xmm8 888bc3d5698SJohn Baldwin por %xmm13,%xmm9 889bc3d5698SJohn Baldwin por %xmm14,%xmm8 890bc3d5698SJohn Baldwin por %xmm15,%xmm9 891bc3d5698SJohn Baldwin 892bc3d5698SJohn Baldwin por %xmm9,%xmm8 893bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 894bc3d5698SJohn Baldwin por %xmm9,%xmm8 895bc3d5698SJohn Baldwin movl $0x80100,%r11d 896bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 897bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 898bc3d5698SJohn Baldwin je .Lmulx_gather 899bc3d5698SJohn Baldwin.byte 102,76,15,126,195 900bc3d5698SJohn Baldwin 901bc3d5698SJohn Baldwin movq %r8,128(%rsp) 902bc3d5698SJohn Baldwin movq %rdi,128+8(%rsp) 903bc3d5698SJohn Baldwin movq %rcx,128+16(%rsp) 904bc3d5698SJohn Baldwin 905bc3d5698SJohn Baldwin movq (%rsi),%rax 906bc3d5698SJohn Baldwin movq 8(%rsi),%rcx 907bc3d5698SJohn Baldwin mulq %rbx 908bc3d5698SJohn Baldwin movq %rax,(%rsp) 909bc3d5698SJohn Baldwin movq %rcx,%rax 910bc3d5698SJohn Baldwin movq %rdx,%r8 911bc3d5698SJohn Baldwin 912bc3d5698SJohn Baldwin mulq %rbx 913bc3d5698SJohn Baldwin addq %rax,%r8 914bc3d5698SJohn Baldwin movq 16(%rsi),%rax 915bc3d5698SJohn Baldwin movq %rdx,%r9 916bc3d5698SJohn Baldwin adcq $0,%r9 917bc3d5698SJohn Baldwin 918bc3d5698SJohn Baldwin mulq %rbx 919bc3d5698SJohn Baldwin addq %rax,%r9 920bc3d5698SJohn Baldwin movq 24(%rsi),%rax 921bc3d5698SJohn Baldwin movq %rdx,%r10 922bc3d5698SJohn Baldwin adcq $0,%r10 923bc3d5698SJohn Baldwin 924bc3d5698SJohn Baldwin mulq %rbx 925bc3d5698SJohn Baldwin addq %rax,%r10 926bc3d5698SJohn Baldwin movq 32(%rsi),%rax 927bc3d5698SJohn Baldwin movq %rdx,%r11 928bc3d5698SJohn Baldwin adcq $0,%r11 929bc3d5698SJohn Baldwin 930bc3d5698SJohn Baldwin mulq %rbx 931bc3d5698SJohn Baldwin addq %rax,%r11 932bc3d5698SJohn Baldwin movq 40(%rsi),%rax 933bc3d5698SJohn Baldwin movq %rdx,%r12 934bc3d5698SJohn Baldwin adcq $0,%r12 935bc3d5698SJohn Baldwin 936bc3d5698SJohn Baldwin mulq %rbx 937bc3d5698SJohn Baldwin addq %rax,%r12 938bc3d5698SJohn Baldwin movq 48(%rsi),%rax 939bc3d5698SJohn Baldwin movq %rdx,%r13 940bc3d5698SJohn Baldwin adcq $0,%r13 941bc3d5698SJohn Baldwin 942bc3d5698SJohn Baldwin mulq %rbx 943bc3d5698SJohn Baldwin addq %rax,%r13 944bc3d5698SJohn Baldwin movq 56(%rsi),%rax 945bc3d5698SJohn Baldwin movq %rdx,%r14 946bc3d5698SJohn Baldwin adcq $0,%r14 947bc3d5698SJohn Baldwin 948bc3d5698SJohn Baldwin mulq %rbx 949bc3d5698SJohn Baldwin addq %rax,%r14 950bc3d5698SJohn Baldwin movq (%rsi),%rax 951bc3d5698SJohn Baldwin movq %rdx,%r15 952bc3d5698SJohn Baldwin adcq $0,%r15 953bc3d5698SJohn Baldwin 954bc3d5698SJohn Baldwin leaq 8(%rsp),%rdi 955bc3d5698SJohn Baldwin movl $7,%ecx 956bc3d5698SJohn Baldwin jmp .Loop_mul_gather 957bc3d5698SJohn Baldwin 958bc3d5698SJohn Baldwin.align 32 959bc3d5698SJohn Baldwin.Loop_mul_gather: 960bc3d5698SJohn Baldwin movdqa 0(%rbp),%xmm8 961bc3d5698SJohn Baldwin movdqa 16(%rbp),%xmm9 962bc3d5698SJohn Baldwin movdqa 32(%rbp),%xmm10 963bc3d5698SJohn Baldwin movdqa 48(%rbp),%xmm11 964bc3d5698SJohn Baldwin pand %xmm0,%xmm8 965bc3d5698SJohn Baldwin movdqa 64(%rbp),%xmm12 966bc3d5698SJohn Baldwin pand %xmm1,%xmm9 967bc3d5698SJohn Baldwin movdqa 80(%rbp),%xmm13 968bc3d5698SJohn Baldwin pand %xmm2,%xmm10 969bc3d5698SJohn Baldwin movdqa 96(%rbp),%xmm14 970bc3d5698SJohn Baldwin pand %xmm3,%xmm11 971bc3d5698SJohn Baldwin movdqa 112(%rbp),%xmm15 972bc3d5698SJohn Baldwin leaq 128(%rbp),%rbp 973bc3d5698SJohn Baldwin pand %xmm4,%xmm12 974bc3d5698SJohn Baldwin pand %xmm5,%xmm13 975bc3d5698SJohn Baldwin pand %xmm6,%xmm14 976bc3d5698SJohn Baldwin pand %xmm7,%xmm15 977bc3d5698SJohn Baldwin por %xmm10,%xmm8 978bc3d5698SJohn Baldwin por %xmm11,%xmm9 979bc3d5698SJohn Baldwin por %xmm12,%xmm8 980bc3d5698SJohn Baldwin por %xmm13,%xmm9 981bc3d5698SJohn Baldwin por %xmm14,%xmm8 982bc3d5698SJohn Baldwin por %xmm15,%xmm9 983bc3d5698SJohn Baldwin 984bc3d5698SJohn Baldwin por %xmm9,%xmm8 985bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 986bc3d5698SJohn Baldwin por %xmm9,%xmm8 987bc3d5698SJohn Baldwin.byte 102,76,15,126,195 988bc3d5698SJohn Baldwin 989bc3d5698SJohn Baldwin mulq %rbx 990bc3d5698SJohn Baldwin addq %rax,%r8 991bc3d5698SJohn Baldwin movq 8(%rsi),%rax 992bc3d5698SJohn Baldwin movq %r8,(%rdi) 993bc3d5698SJohn Baldwin movq %rdx,%r8 994bc3d5698SJohn Baldwin adcq $0,%r8 995bc3d5698SJohn Baldwin 996bc3d5698SJohn Baldwin mulq %rbx 997bc3d5698SJohn Baldwin addq %rax,%r9 998bc3d5698SJohn Baldwin movq 16(%rsi),%rax 999bc3d5698SJohn Baldwin adcq $0,%rdx 1000bc3d5698SJohn Baldwin addq %r9,%r8 1001bc3d5698SJohn Baldwin movq %rdx,%r9 1002bc3d5698SJohn Baldwin adcq $0,%r9 1003bc3d5698SJohn Baldwin 1004bc3d5698SJohn Baldwin mulq %rbx 1005bc3d5698SJohn Baldwin addq %rax,%r10 1006bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1007bc3d5698SJohn Baldwin adcq $0,%rdx 1008bc3d5698SJohn Baldwin addq %r10,%r9 1009bc3d5698SJohn Baldwin movq %rdx,%r10 1010bc3d5698SJohn Baldwin adcq $0,%r10 1011bc3d5698SJohn Baldwin 1012bc3d5698SJohn Baldwin mulq %rbx 1013bc3d5698SJohn Baldwin addq %rax,%r11 1014bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1015bc3d5698SJohn Baldwin adcq $0,%rdx 1016bc3d5698SJohn Baldwin addq %r11,%r10 1017bc3d5698SJohn Baldwin movq %rdx,%r11 1018bc3d5698SJohn Baldwin adcq $0,%r11 1019bc3d5698SJohn Baldwin 1020bc3d5698SJohn Baldwin mulq %rbx 1021bc3d5698SJohn Baldwin addq %rax,%r12 1022bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1023bc3d5698SJohn Baldwin adcq $0,%rdx 1024bc3d5698SJohn Baldwin addq %r12,%r11 1025bc3d5698SJohn Baldwin movq %rdx,%r12 1026bc3d5698SJohn Baldwin adcq $0,%r12 1027bc3d5698SJohn Baldwin 1028bc3d5698SJohn Baldwin mulq %rbx 1029bc3d5698SJohn Baldwin addq %rax,%r13 1030bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1031bc3d5698SJohn Baldwin adcq $0,%rdx 1032bc3d5698SJohn Baldwin addq %r13,%r12 1033bc3d5698SJohn Baldwin movq %rdx,%r13 1034bc3d5698SJohn Baldwin adcq $0,%r13 1035bc3d5698SJohn Baldwin 1036bc3d5698SJohn Baldwin mulq %rbx 1037bc3d5698SJohn Baldwin addq %rax,%r14 1038bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1039bc3d5698SJohn Baldwin adcq $0,%rdx 1040bc3d5698SJohn Baldwin addq %r14,%r13 1041bc3d5698SJohn Baldwin movq %rdx,%r14 1042bc3d5698SJohn Baldwin adcq $0,%r14 1043bc3d5698SJohn Baldwin 1044bc3d5698SJohn Baldwin mulq %rbx 1045bc3d5698SJohn Baldwin addq %rax,%r15 1046bc3d5698SJohn Baldwin movq (%rsi),%rax 1047bc3d5698SJohn Baldwin adcq $0,%rdx 1048bc3d5698SJohn Baldwin addq %r15,%r14 1049bc3d5698SJohn Baldwin movq %rdx,%r15 1050bc3d5698SJohn Baldwin adcq $0,%r15 1051bc3d5698SJohn Baldwin 1052bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1053bc3d5698SJohn Baldwin 1054bc3d5698SJohn Baldwin decl %ecx 1055bc3d5698SJohn Baldwin jnz .Loop_mul_gather 1056bc3d5698SJohn Baldwin 1057bc3d5698SJohn Baldwin movq %r8,(%rdi) 1058bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1059bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1060bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1061bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1062bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1063bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1064bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1065bc3d5698SJohn Baldwin 1066bc3d5698SJohn Baldwin movq 128+8(%rsp),%rdi 1067bc3d5698SJohn Baldwin movq 128+16(%rsp),%rbp 1068bc3d5698SJohn Baldwin 1069bc3d5698SJohn Baldwin movq (%rsp),%r8 1070bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1071bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1072bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1073bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1074bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1075bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1076bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1077bc3d5698SJohn Baldwin 1078bc3d5698SJohn Baldwin call __rsaz_512_reduce 1079bc3d5698SJohn Baldwin jmp .Lmul_gather_tail 1080bc3d5698SJohn Baldwin 1081bc3d5698SJohn Baldwin.align 32 1082bc3d5698SJohn Baldwin.Lmulx_gather: 1083bc3d5698SJohn Baldwin.byte 102,76,15,126,194 1084bc3d5698SJohn Baldwin 1085bc3d5698SJohn Baldwin movq %r8,128(%rsp) 1086bc3d5698SJohn Baldwin movq %rdi,128+8(%rsp) 1087bc3d5698SJohn Baldwin movq %rcx,128+16(%rsp) 1088bc3d5698SJohn Baldwin 1089bc3d5698SJohn Baldwin mulxq (%rsi),%rbx,%r8 1090bc3d5698SJohn Baldwin movq %rbx,(%rsp) 1091bc3d5698SJohn Baldwin xorl %edi,%edi 1092bc3d5698SJohn Baldwin 1093bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1094bc3d5698SJohn Baldwin 1095bc3d5698SJohn Baldwin mulxq 16(%rsi),%rbx,%r10 1096bc3d5698SJohn Baldwin adcxq %rax,%r8 1097bc3d5698SJohn Baldwin 1098bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1099bc3d5698SJohn Baldwin adcxq %rbx,%r9 1100bc3d5698SJohn Baldwin 1101bc3d5698SJohn Baldwin mulxq 32(%rsi),%rbx,%r12 1102bc3d5698SJohn Baldwin adcxq %rax,%r10 1103bc3d5698SJohn Baldwin 1104bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1105bc3d5698SJohn Baldwin adcxq %rbx,%r11 1106bc3d5698SJohn Baldwin 1107bc3d5698SJohn Baldwin mulxq 48(%rsi),%rbx,%r14 1108bc3d5698SJohn Baldwin adcxq %rax,%r12 1109bc3d5698SJohn Baldwin 1110bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1111bc3d5698SJohn Baldwin adcxq %rbx,%r13 1112bc3d5698SJohn Baldwin adcxq %rax,%r14 1113bc3d5698SJohn Baldwin.byte 0x67 1114bc3d5698SJohn Baldwin movq %r8,%rbx 1115bc3d5698SJohn Baldwin adcxq %rdi,%r15 1116bc3d5698SJohn Baldwin 1117bc3d5698SJohn Baldwin movq $-7,%rcx 1118bc3d5698SJohn Baldwin jmp .Loop_mulx_gather 1119bc3d5698SJohn Baldwin 1120bc3d5698SJohn Baldwin.align 32 1121bc3d5698SJohn Baldwin.Loop_mulx_gather: 1122bc3d5698SJohn Baldwin movdqa 0(%rbp),%xmm8 1123bc3d5698SJohn Baldwin movdqa 16(%rbp),%xmm9 1124bc3d5698SJohn Baldwin movdqa 32(%rbp),%xmm10 1125bc3d5698SJohn Baldwin movdqa 48(%rbp),%xmm11 1126bc3d5698SJohn Baldwin pand %xmm0,%xmm8 1127bc3d5698SJohn Baldwin movdqa 64(%rbp),%xmm12 1128bc3d5698SJohn Baldwin pand %xmm1,%xmm9 1129bc3d5698SJohn Baldwin movdqa 80(%rbp),%xmm13 1130bc3d5698SJohn Baldwin pand %xmm2,%xmm10 1131bc3d5698SJohn Baldwin movdqa 96(%rbp),%xmm14 1132bc3d5698SJohn Baldwin pand %xmm3,%xmm11 1133bc3d5698SJohn Baldwin movdqa 112(%rbp),%xmm15 1134bc3d5698SJohn Baldwin leaq 128(%rbp),%rbp 1135bc3d5698SJohn Baldwin pand %xmm4,%xmm12 1136bc3d5698SJohn Baldwin pand %xmm5,%xmm13 1137bc3d5698SJohn Baldwin pand %xmm6,%xmm14 1138bc3d5698SJohn Baldwin pand %xmm7,%xmm15 1139bc3d5698SJohn Baldwin por %xmm10,%xmm8 1140bc3d5698SJohn Baldwin por %xmm11,%xmm9 1141bc3d5698SJohn Baldwin por %xmm12,%xmm8 1142bc3d5698SJohn Baldwin por %xmm13,%xmm9 1143bc3d5698SJohn Baldwin por %xmm14,%xmm8 1144bc3d5698SJohn Baldwin por %xmm15,%xmm9 1145bc3d5698SJohn Baldwin 1146bc3d5698SJohn Baldwin por %xmm9,%xmm8 1147bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 1148bc3d5698SJohn Baldwin por %xmm9,%xmm8 1149bc3d5698SJohn Baldwin.byte 102,76,15,126,194 1150bc3d5698SJohn Baldwin 1151bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 1152bc3d5698SJohn Baldwin adcxq %rax,%rbx 1153bc3d5698SJohn Baldwin adoxq %r9,%r8 1154bc3d5698SJohn Baldwin 1155bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1156bc3d5698SJohn Baldwin adcxq %rax,%r8 1157bc3d5698SJohn Baldwin adoxq %r10,%r9 1158bc3d5698SJohn Baldwin 1159bc3d5698SJohn Baldwin mulxq 16(%rsi),%rax,%r10 1160bc3d5698SJohn Baldwin adcxq %rax,%r9 1161bc3d5698SJohn Baldwin adoxq %r11,%r10 1162bc3d5698SJohn Baldwin 1163bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 1164bc3d5698SJohn Baldwin adcxq %rax,%r10 1165bc3d5698SJohn Baldwin adoxq %r12,%r11 1166bc3d5698SJohn Baldwin 1167bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%r12 1168bc3d5698SJohn Baldwin adcxq %rax,%r11 1169bc3d5698SJohn Baldwin adoxq %r13,%r12 1170bc3d5698SJohn Baldwin 1171bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1172bc3d5698SJohn Baldwin adcxq %rax,%r12 1173bc3d5698SJohn Baldwin adoxq %r14,%r13 1174bc3d5698SJohn Baldwin 1175bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1176bc3d5698SJohn Baldwin adcxq %rax,%r13 1177bc3d5698SJohn Baldwin.byte 0x67 1178bc3d5698SJohn Baldwin adoxq %r15,%r14 1179bc3d5698SJohn Baldwin 1180bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1181bc3d5698SJohn Baldwin movq %rbx,64(%rsp,%rcx,8) 1182bc3d5698SJohn Baldwin adcxq %rax,%r14 1183bc3d5698SJohn Baldwin adoxq %rdi,%r15 1184bc3d5698SJohn Baldwin movq %r8,%rbx 1185bc3d5698SJohn Baldwin adcxq %rdi,%r15 1186bc3d5698SJohn Baldwin 1187bc3d5698SJohn Baldwin incq %rcx 1188bc3d5698SJohn Baldwin jnz .Loop_mulx_gather 1189bc3d5698SJohn Baldwin 1190bc3d5698SJohn Baldwin movq %r8,64(%rsp) 1191bc3d5698SJohn Baldwin movq %r9,64+8(%rsp) 1192bc3d5698SJohn Baldwin movq %r10,64+16(%rsp) 1193bc3d5698SJohn Baldwin movq %r11,64+24(%rsp) 1194bc3d5698SJohn Baldwin movq %r12,64+32(%rsp) 1195bc3d5698SJohn Baldwin movq %r13,64+40(%rsp) 1196bc3d5698SJohn Baldwin movq %r14,64+48(%rsp) 1197bc3d5698SJohn Baldwin movq %r15,64+56(%rsp) 1198bc3d5698SJohn Baldwin 1199bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1200bc3d5698SJohn Baldwin movq 128+8(%rsp),%rdi 1201bc3d5698SJohn Baldwin movq 128+16(%rsp),%rbp 1202bc3d5698SJohn Baldwin 1203bc3d5698SJohn Baldwin movq (%rsp),%r8 1204bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1205bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1206bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1207bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1208bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1209bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1210bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1211bc3d5698SJohn Baldwin 1212bc3d5698SJohn Baldwin call __rsaz_512_reducex 1213bc3d5698SJohn Baldwin 1214bc3d5698SJohn Baldwin.Lmul_gather_tail: 1215bc3d5698SJohn Baldwin addq 64(%rsp),%r8 1216bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 1217bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 1218bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 1219bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 1220bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 1221bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 1222bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 1223bc3d5698SJohn Baldwin sbbq %rcx,%rcx 1224bc3d5698SJohn Baldwin 1225bc3d5698SJohn Baldwin call __rsaz_512_subtract 1226bc3d5698SJohn Baldwin 1227bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1228bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1229bc3d5698SJohn Baldwin movq -48(%rax),%r15 1230bc3d5698SJohn Baldwin.cfi_restore %r15 1231bc3d5698SJohn Baldwin movq -40(%rax),%r14 1232bc3d5698SJohn Baldwin.cfi_restore %r14 1233bc3d5698SJohn Baldwin movq -32(%rax),%r13 1234bc3d5698SJohn Baldwin.cfi_restore %r13 1235bc3d5698SJohn Baldwin movq -24(%rax),%r12 1236bc3d5698SJohn Baldwin.cfi_restore %r12 1237bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1238bc3d5698SJohn Baldwin.cfi_restore %rbp 1239bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1240bc3d5698SJohn Baldwin.cfi_restore %rbx 1241bc3d5698SJohn Baldwin leaq (%rax),%rsp 1242bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1243bc3d5698SJohn Baldwin.Lmul_gather4_epilogue: 1244bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1245bc3d5698SJohn Baldwin.cfi_endproc 1246bc3d5698SJohn Baldwin.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4 1247bc3d5698SJohn Baldwin.globl rsaz_512_mul_scatter4 1248bc3d5698SJohn Baldwin.type rsaz_512_mul_scatter4,@function 1249bc3d5698SJohn Baldwin.align 32 1250bc3d5698SJohn Baldwinrsaz_512_mul_scatter4: 1251bc3d5698SJohn Baldwin.cfi_startproc 1252bc3d5698SJohn Baldwin pushq %rbx 1253bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1254bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1255bc3d5698SJohn Baldwin pushq %rbp 1256bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1257bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1258bc3d5698SJohn Baldwin pushq %r12 1259bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1260bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1261bc3d5698SJohn Baldwin pushq %r13 1262bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1263bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1264bc3d5698SJohn Baldwin pushq %r14 1265bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1266bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1267bc3d5698SJohn Baldwin pushq %r15 1268bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1269bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1270bc3d5698SJohn Baldwin 1271bc3d5698SJohn Baldwin movl %r9d,%r9d 1272bc3d5698SJohn Baldwin subq $128+24,%rsp 1273bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 1274bc3d5698SJohn Baldwin.Lmul_scatter4_body: 1275bc3d5698SJohn Baldwin leaq (%r8,%r9,8),%r8 1276bc3d5698SJohn Baldwin.byte 102,72,15,110,199 1277bc3d5698SJohn Baldwin.byte 102,72,15,110,202 1278bc3d5698SJohn Baldwin.byte 102,73,15,110,208 1279bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 1280bc3d5698SJohn Baldwin 1281bc3d5698SJohn Baldwin movq %rdi,%rbp 1282bc3d5698SJohn Baldwin movl $0x80100,%r11d 1283bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+8(%rip),%r11d 1284bc3d5698SJohn Baldwin cmpl $0x80100,%r11d 1285bc3d5698SJohn Baldwin je .Lmulx_scatter 1286bc3d5698SJohn Baldwin movq (%rdi),%rbx 1287bc3d5698SJohn Baldwin call __rsaz_512_mul 1288bc3d5698SJohn Baldwin 1289bc3d5698SJohn Baldwin.byte 102,72,15,126,199 1290bc3d5698SJohn Baldwin.byte 102,72,15,126,205 1291bc3d5698SJohn Baldwin 1292bc3d5698SJohn Baldwin movq (%rsp),%r8 1293bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1294bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1295bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1296bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1297bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1298bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1299bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1300bc3d5698SJohn Baldwin 1301bc3d5698SJohn Baldwin call __rsaz_512_reduce 1302bc3d5698SJohn Baldwin jmp .Lmul_scatter_tail 1303bc3d5698SJohn Baldwin 1304bc3d5698SJohn Baldwin.align 32 1305bc3d5698SJohn Baldwin.Lmulx_scatter: 1306bc3d5698SJohn Baldwin movq (%rdi),%rdx 1307bc3d5698SJohn Baldwin call __rsaz_512_mulx 1308bc3d5698SJohn Baldwin 1309bc3d5698SJohn Baldwin.byte 102,72,15,126,199 1310bc3d5698SJohn Baldwin.byte 102,72,15,126,205 1311bc3d5698SJohn Baldwin 1312bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1313bc3d5698SJohn Baldwin movq (%rsp),%r8 1314bc3d5698SJohn Baldwin movq 8(%rsp),%r9 1315bc3d5698SJohn Baldwin movq 16(%rsp),%r10 1316bc3d5698SJohn Baldwin movq 24(%rsp),%r11 1317bc3d5698SJohn Baldwin movq 32(%rsp),%r12 1318bc3d5698SJohn Baldwin movq 40(%rsp),%r13 1319bc3d5698SJohn Baldwin movq 48(%rsp),%r14 1320bc3d5698SJohn Baldwin movq 56(%rsp),%r15 1321bc3d5698SJohn Baldwin 1322bc3d5698SJohn Baldwin call __rsaz_512_reducex 1323bc3d5698SJohn Baldwin 1324bc3d5698SJohn Baldwin.Lmul_scatter_tail: 1325bc3d5698SJohn Baldwin addq 64(%rsp),%r8 1326bc3d5698SJohn Baldwin adcq 72(%rsp),%r9 1327bc3d5698SJohn Baldwin adcq 80(%rsp),%r10 1328bc3d5698SJohn Baldwin adcq 88(%rsp),%r11 1329bc3d5698SJohn Baldwin adcq 96(%rsp),%r12 1330bc3d5698SJohn Baldwin adcq 104(%rsp),%r13 1331bc3d5698SJohn Baldwin adcq 112(%rsp),%r14 1332bc3d5698SJohn Baldwin adcq 120(%rsp),%r15 1333bc3d5698SJohn Baldwin.byte 102,72,15,126,214 1334bc3d5698SJohn Baldwin sbbq %rcx,%rcx 1335bc3d5698SJohn Baldwin 1336bc3d5698SJohn Baldwin call __rsaz_512_subtract 1337bc3d5698SJohn Baldwin 1338bc3d5698SJohn Baldwin movq %r8,0(%rsi) 1339bc3d5698SJohn Baldwin movq %r9,128(%rsi) 1340bc3d5698SJohn Baldwin movq %r10,256(%rsi) 1341bc3d5698SJohn Baldwin movq %r11,384(%rsi) 1342bc3d5698SJohn Baldwin movq %r12,512(%rsi) 1343bc3d5698SJohn Baldwin movq %r13,640(%rsi) 1344bc3d5698SJohn Baldwin movq %r14,768(%rsi) 1345bc3d5698SJohn Baldwin movq %r15,896(%rsi) 1346bc3d5698SJohn Baldwin 1347bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1348bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1349bc3d5698SJohn Baldwin movq -48(%rax),%r15 1350bc3d5698SJohn Baldwin.cfi_restore %r15 1351bc3d5698SJohn Baldwin movq -40(%rax),%r14 1352bc3d5698SJohn Baldwin.cfi_restore %r14 1353bc3d5698SJohn Baldwin movq -32(%rax),%r13 1354bc3d5698SJohn Baldwin.cfi_restore %r13 1355bc3d5698SJohn Baldwin movq -24(%rax),%r12 1356bc3d5698SJohn Baldwin.cfi_restore %r12 1357bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1358bc3d5698SJohn Baldwin.cfi_restore %rbp 1359bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1360bc3d5698SJohn Baldwin.cfi_restore %rbx 1361bc3d5698SJohn Baldwin leaq (%rax),%rsp 1362bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1363bc3d5698SJohn Baldwin.Lmul_scatter4_epilogue: 1364bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1365bc3d5698SJohn Baldwin.cfi_endproc 1366bc3d5698SJohn Baldwin.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4 1367bc3d5698SJohn Baldwin.globl rsaz_512_mul_by_one 1368bc3d5698SJohn Baldwin.type rsaz_512_mul_by_one,@function 1369bc3d5698SJohn Baldwin.align 32 1370bc3d5698SJohn Baldwinrsaz_512_mul_by_one: 1371bc3d5698SJohn Baldwin.cfi_startproc 1372bc3d5698SJohn Baldwin pushq %rbx 1373bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1374bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1375bc3d5698SJohn Baldwin pushq %rbp 1376bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1377bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1378bc3d5698SJohn Baldwin pushq %r12 1379bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1380bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1381bc3d5698SJohn Baldwin pushq %r13 1382bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1383bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1384bc3d5698SJohn Baldwin pushq %r14 1385bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1386bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1387bc3d5698SJohn Baldwin pushq %r15 1388bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1389bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1390bc3d5698SJohn Baldwin 1391bc3d5698SJohn Baldwin subq $128+24,%rsp 1392bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 128+24 1393bc3d5698SJohn Baldwin.Lmul_by_one_body: 1394bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%eax 1395bc3d5698SJohn Baldwin movq %rdx,%rbp 1396bc3d5698SJohn Baldwin movq %rcx,128(%rsp) 1397bc3d5698SJohn Baldwin 1398bc3d5698SJohn Baldwin movq (%rsi),%r8 1399bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1400bc3d5698SJohn Baldwin movq 8(%rsi),%r9 1401bc3d5698SJohn Baldwin movq 16(%rsi),%r10 1402bc3d5698SJohn Baldwin movq 24(%rsi),%r11 1403bc3d5698SJohn Baldwin movq 32(%rsi),%r12 1404bc3d5698SJohn Baldwin movq 40(%rsi),%r13 1405bc3d5698SJohn Baldwin movq 48(%rsi),%r14 1406bc3d5698SJohn Baldwin movq 56(%rsi),%r15 1407bc3d5698SJohn Baldwin 1408bc3d5698SJohn Baldwin movdqa %xmm0,(%rsp) 1409bc3d5698SJohn Baldwin movdqa %xmm0,16(%rsp) 1410bc3d5698SJohn Baldwin movdqa %xmm0,32(%rsp) 1411bc3d5698SJohn Baldwin movdqa %xmm0,48(%rsp) 1412bc3d5698SJohn Baldwin movdqa %xmm0,64(%rsp) 1413bc3d5698SJohn Baldwin movdqa %xmm0,80(%rsp) 1414bc3d5698SJohn Baldwin movdqa %xmm0,96(%rsp) 1415bc3d5698SJohn Baldwin andl $0x80100,%eax 1416bc3d5698SJohn Baldwin cmpl $0x80100,%eax 1417bc3d5698SJohn Baldwin je .Lby_one_callx 1418bc3d5698SJohn Baldwin call __rsaz_512_reduce 1419bc3d5698SJohn Baldwin jmp .Lby_one_tail 1420bc3d5698SJohn Baldwin.align 32 1421bc3d5698SJohn Baldwin.Lby_one_callx: 1422bc3d5698SJohn Baldwin movq 128(%rsp),%rdx 1423bc3d5698SJohn Baldwin call __rsaz_512_reducex 1424bc3d5698SJohn Baldwin.Lby_one_tail: 1425bc3d5698SJohn Baldwin movq %r8,(%rdi) 1426bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1427bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1428bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1429bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1430bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1431bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1432bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1433bc3d5698SJohn Baldwin 1434bc3d5698SJohn Baldwin leaq 128+24+48(%rsp),%rax 1435bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 1436bc3d5698SJohn Baldwin movq -48(%rax),%r15 1437bc3d5698SJohn Baldwin.cfi_restore %r15 1438bc3d5698SJohn Baldwin movq -40(%rax),%r14 1439bc3d5698SJohn Baldwin.cfi_restore %r14 1440bc3d5698SJohn Baldwin movq -32(%rax),%r13 1441bc3d5698SJohn Baldwin.cfi_restore %r13 1442bc3d5698SJohn Baldwin movq -24(%rax),%r12 1443bc3d5698SJohn Baldwin.cfi_restore %r12 1444bc3d5698SJohn Baldwin movq -16(%rax),%rbp 1445bc3d5698SJohn Baldwin.cfi_restore %rbp 1446bc3d5698SJohn Baldwin movq -8(%rax),%rbx 1447bc3d5698SJohn Baldwin.cfi_restore %rbx 1448bc3d5698SJohn Baldwin leaq (%rax),%rsp 1449bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1450bc3d5698SJohn Baldwin.Lmul_by_one_epilogue: 1451bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1452bc3d5698SJohn Baldwin.cfi_endproc 1453bc3d5698SJohn Baldwin.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one 1454bc3d5698SJohn Baldwin.type __rsaz_512_reduce,@function 1455bc3d5698SJohn Baldwin.align 32 1456bc3d5698SJohn Baldwin__rsaz_512_reduce: 1457bc3d5698SJohn Baldwin.cfi_startproc 1458bc3d5698SJohn Baldwin movq %r8,%rbx 1459bc3d5698SJohn Baldwin imulq 128+8(%rsp),%rbx 1460bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1461bc3d5698SJohn Baldwin movl $8,%ecx 1462bc3d5698SJohn Baldwin jmp .Lreduction_loop 1463bc3d5698SJohn Baldwin 1464bc3d5698SJohn Baldwin.align 32 1465bc3d5698SJohn Baldwin.Lreduction_loop: 1466bc3d5698SJohn Baldwin mulq %rbx 1467bc3d5698SJohn Baldwin movq 8(%rbp),%rax 1468bc3d5698SJohn Baldwin negq %r8 1469bc3d5698SJohn Baldwin movq %rdx,%r8 1470bc3d5698SJohn Baldwin adcq $0,%r8 1471bc3d5698SJohn Baldwin 1472bc3d5698SJohn Baldwin mulq %rbx 1473bc3d5698SJohn Baldwin addq %rax,%r9 1474bc3d5698SJohn Baldwin movq 16(%rbp),%rax 1475bc3d5698SJohn Baldwin adcq $0,%rdx 1476bc3d5698SJohn Baldwin addq %r9,%r8 1477bc3d5698SJohn Baldwin movq %rdx,%r9 1478bc3d5698SJohn Baldwin adcq $0,%r9 1479bc3d5698SJohn Baldwin 1480bc3d5698SJohn Baldwin mulq %rbx 1481bc3d5698SJohn Baldwin addq %rax,%r10 1482bc3d5698SJohn Baldwin movq 24(%rbp),%rax 1483bc3d5698SJohn Baldwin adcq $0,%rdx 1484bc3d5698SJohn Baldwin addq %r10,%r9 1485bc3d5698SJohn Baldwin movq %rdx,%r10 1486bc3d5698SJohn Baldwin adcq $0,%r10 1487bc3d5698SJohn Baldwin 1488bc3d5698SJohn Baldwin mulq %rbx 1489bc3d5698SJohn Baldwin addq %rax,%r11 1490bc3d5698SJohn Baldwin movq 32(%rbp),%rax 1491bc3d5698SJohn Baldwin adcq $0,%rdx 1492bc3d5698SJohn Baldwin addq %r11,%r10 1493bc3d5698SJohn Baldwin movq 128+8(%rsp),%rsi 1494bc3d5698SJohn Baldwin 1495bc3d5698SJohn Baldwin 1496bc3d5698SJohn Baldwin adcq $0,%rdx 1497bc3d5698SJohn Baldwin movq %rdx,%r11 1498bc3d5698SJohn Baldwin 1499bc3d5698SJohn Baldwin mulq %rbx 1500bc3d5698SJohn Baldwin addq %rax,%r12 1501bc3d5698SJohn Baldwin movq 40(%rbp),%rax 1502bc3d5698SJohn Baldwin adcq $0,%rdx 1503bc3d5698SJohn Baldwin imulq %r8,%rsi 1504bc3d5698SJohn Baldwin addq %r12,%r11 1505bc3d5698SJohn Baldwin movq %rdx,%r12 1506bc3d5698SJohn Baldwin adcq $0,%r12 1507bc3d5698SJohn Baldwin 1508bc3d5698SJohn Baldwin mulq %rbx 1509bc3d5698SJohn Baldwin addq %rax,%r13 1510bc3d5698SJohn Baldwin movq 48(%rbp),%rax 1511bc3d5698SJohn Baldwin adcq $0,%rdx 1512bc3d5698SJohn Baldwin addq %r13,%r12 1513bc3d5698SJohn Baldwin movq %rdx,%r13 1514bc3d5698SJohn Baldwin adcq $0,%r13 1515bc3d5698SJohn Baldwin 1516bc3d5698SJohn Baldwin mulq %rbx 1517bc3d5698SJohn Baldwin addq %rax,%r14 1518bc3d5698SJohn Baldwin movq 56(%rbp),%rax 1519bc3d5698SJohn Baldwin adcq $0,%rdx 1520bc3d5698SJohn Baldwin addq %r14,%r13 1521bc3d5698SJohn Baldwin movq %rdx,%r14 1522bc3d5698SJohn Baldwin adcq $0,%r14 1523bc3d5698SJohn Baldwin 1524bc3d5698SJohn Baldwin mulq %rbx 1525bc3d5698SJohn Baldwin movq %rsi,%rbx 1526bc3d5698SJohn Baldwin addq %rax,%r15 1527bc3d5698SJohn Baldwin movq 0(%rbp),%rax 1528bc3d5698SJohn Baldwin adcq $0,%rdx 1529bc3d5698SJohn Baldwin addq %r15,%r14 1530bc3d5698SJohn Baldwin movq %rdx,%r15 1531bc3d5698SJohn Baldwin adcq $0,%r15 1532bc3d5698SJohn Baldwin 1533bc3d5698SJohn Baldwin decl %ecx 1534bc3d5698SJohn Baldwin jne .Lreduction_loop 1535bc3d5698SJohn Baldwin 1536bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1537bc3d5698SJohn Baldwin.cfi_endproc 1538bc3d5698SJohn Baldwin.size __rsaz_512_reduce,.-__rsaz_512_reduce 1539bc3d5698SJohn Baldwin.type __rsaz_512_reducex,@function 1540bc3d5698SJohn Baldwin.align 32 1541bc3d5698SJohn Baldwin__rsaz_512_reducex: 1542bc3d5698SJohn Baldwin.cfi_startproc 1543bc3d5698SJohn Baldwin 1544bc3d5698SJohn Baldwin imulq %r8,%rdx 1545bc3d5698SJohn Baldwin xorq %rsi,%rsi 1546bc3d5698SJohn Baldwin movl $8,%ecx 1547bc3d5698SJohn Baldwin jmp .Lreduction_loopx 1548bc3d5698SJohn Baldwin 1549bc3d5698SJohn Baldwin.align 32 1550bc3d5698SJohn Baldwin.Lreduction_loopx: 1551bc3d5698SJohn Baldwin movq %r8,%rbx 1552bc3d5698SJohn Baldwin mulxq 0(%rbp),%rax,%r8 1553bc3d5698SJohn Baldwin adcxq %rbx,%rax 1554bc3d5698SJohn Baldwin adoxq %r9,%r8 1555bc3d5698SJohn Baldwin 1556bc3d5698SJohn Baldwin mulxq 8(%rbp),%rax,%r9 1557bc3d5698SJohn Baldwin adcxq %rax,%r8 1558bc3d5698SJohn Baldwin adoxq %r10,%r9 1559bc3d5698SJohn Baldwin 1560bc3d5698SJohn Baldwin mulxq 16(%rbp),%rbx,%r10 1561bc3d5698SJohn Baldwin adcxq %rbx,%r9 1562bc3d5698SJohn Baldwin adoxq %r11,%r10 1563bc3d5698SJohn Baldwin 1564bc3d5698SJohn Baldwin mulxq 24(%rbp),%rbx,%r11 1565bc3d5698SJohn Baldwin adcxq %rbx,%r10 1566bc3d5698SJohn Baldwin adoxq %r12,%r11 1567bc3d5698SJohn Baldwin 1568bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 1569bc3d5698SJohn Baldwin movq %rdx,%rax 1570bc3d5698SJohn Baldwin movq %r8,%rdx 1571bc3d5698SJohn Baldwin adcxq %rbx,%r11 1572bc3d5698SJohn Baldwin adoxq %r13,%r12 1573bc3d5698SJohn Baldwin 1574bc3d5698SJohn Baldwin mulxq 128+8(%rsp),%rbx,%rdx 1575bc3d5698SJohn Baldwin movq %rax,%rdx 1576bc3d5698SJohn Baldwin 1577bc3d5698SJohn Baldwin mulxq 40(%rbp),%rax,%r13 1578bc3d5698SJohn Baldwin adcxq %rax,%r12 1579bc3d5698SJohn Baldwin adoxq %r14,%r13 1580bc3d5698SJohn Baldwin 1581bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 1582bc3d5698SJohn Baldwin adcxq %rax,%r13 1583bc3d5698SJohn Baldwin adoxq %r15,%r14 1584bc3d5698SJohn Baldwin 1585bc3d5698SJohn Baldwin mulxq 56(%rbp),%rax,%r15 1586bc3d5698SJohn Baldwin movq %rbx,%rdx 1587bc3d5698SJohn Baldwin adcxq %rax,%r14 1588bc3d5698SJohn Baldwin adoxq %rsi,%r15 1589bc3d5698SJohn Baldwin adcxq %rsi,%r15 1590bc3d5698SJohn Baldwin 1591bc3d5698SJohn Baldwin decl %ecx 1592bc3d5698SJohn Baldwin jne .Lreduction_loopx 1593bc3d5698SJohn Baldwin 1594bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1595bc3d5698SJohn Baldwin.cfi_endproc 1596bc3d5698SJohn Baldwin.size __rsaz_512_reducex,.-__rsaz_512_reducex 1597bc3d5698SJohn Baldwin.type __rsaz_512_subtract,@function 1598bc3d5698SJohn Baldwin.align 32 1599bc3d5698SJohn Baldwin__rsaz_512_subtract: 1600bc3d5698SJohn Baldwin.cfi_startproc 1601bc3d5698SJohn Baldwin movq %r8,(%rdi) 1602bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1603bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1604bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1605bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1606bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1607bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1608bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1609bc3d5698SJohn Baldwin 1610bc3d5698SJohn Baldwin movq 0(%rbp),%r8 1611bc3d5698SJohn Baldwin movq 8(%rbp),%r9 1612bc3d5698SJohn Baldwin negq %r8 1613bc3d5698SJohn Baldwin notq %r9 1614bc3d5698SJohn Baldwin andq %rcx,%r8 1615bc3d5698SJohn Baldwin movq 16(%rbp),%r10 1616bc3d5698SJohn Baldwin andq %rcx,%r9 1617bc3d5698SJohn Baldwin notq %r10 1618bc3d5698SJohn Baldwin movq 24(%rbp),%r11 1619bc3d5698SJohn Baldwin andq %rcx,%r10 1620bc3d5698SJohn Baldwin notq %r11 1621bc3d5698SJohn Baldwin movq 32(%rbp),%r12 1622bc3d5698SJohn Baldwin andq %rcx,%r11 1623bc3d5698SJohn Baldwin notq %r12 1624bc3d5698SJohn Baldwin movq 40(%rbp),%r13 1625bc3d5698SJohn Baldwin andq %rcx,%r12 1626bc3d5698SJohn Baldwin notq %r13 1627bc3d5698SJohn Baldwin movq 48(%rbp),%r14 1628bc3d5698SJohn Baldwin andq %rcx,%r13 1629bc3d5698SJohn Baldwin notq %r14 1630bc3d5698SJohn Baldwin movq 56(%rbp),%r15 1631bc3d5698SJohn Baldwin andq %rcx,%r14 1632bc3d5698SJohn Baldwin notq %r15 1633bc3d5698SJohn Baldwin andq %rcx,%r15 1634bc3d5698SJohn Baldwin 1635bc3d5698SJohn Baldwin addq (%rdi),%r8 1636bc3d5698SJohn Baldwin adcq 8(%rdi),%r9 1637bc3d5698SJohn Baldwin adcq 16(%rdi),%r10 1638bc3d5698SJohn Baldwin adcq 24(%rdi),%r11 1639bc3d5698SJohn Baldwin adcq 32(%rdi),%r12 1640bc3d5698SJohn Baldwin adcq 40(%rdi),%r13 1641bc3d5698SJohn Baldwin adcq 48(%rdi),%r14 1642bc3d5698SJohn Baldwin adcq 56(%rdi),%r15 1643bc3d5698SJohn Baldwin 1644bc3d5698SJohn Baldwin movq %r8,(%rdi) 1645bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1646bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1647bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1648bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1649bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1650bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1651bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1652bc3d5698SJohn Baldwin 1653bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1654bc3d5698SJohn Baldwin.cfi_endproc 1655bc3d5698SJohn Baldwin.size __rsaz_512_subtract,.-__rsaz_512_subtract 1656bc3d5698SJohn Baldwin.type __rsaz_512_mul,@function 1657bc3d5698SJohn Baldwin.align 32 1658bc3d5698SJohn Baldwin__rsaz_512_mul: 1659bc3d5698SJohn Baldwin.cfi_startproc 1660bc3d5698SJohn Baldwin leaq 8(%rsp),%rdi 1661bc3d5698SJohn Baldwin 1662bc3d5698SJohn Baldwin movq (%rsi),%rax 1663bc3d5698SJohn Baldwin mulq %rbx 1664bc3d5698SJohn Baldwin movq %rax,(%rdi) 1665bc3d5698SJohn Baldwin movq 8(%rsi),%rax 1666bc3d5698SJohn Baldwin movq %rdx,%r8 1667bc3d5698SJohn Baldwin 1668bc3d5698SJohn Baldwin mulq %rbx 1669bc3d5698SJohn Baldwin addq %rax,%r8 1670bc3d5698SJohn Baldwin movq 16(%rsi),%rax 1671bc3d5698SJohn Baldwin movq %rdx,%r9 1672bc3d5698SJohn Baldwin adcq $0,%r9 1673bc3d5698SJohn Baldwin 1674bc3d5698SJohn Baldwin mulq %rbx 1675bc3d5698SJohn Baldwin addq %rax,%r9 1676bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1677bc3d5698SJohn Baldwin movq %rdx,%r10 1678bc3d5698SJohn Baldwin adcq $0,%r10 1679bc3d5698SJohn Baldwin 1680bc3d5698SJohn Baldwin mulq %rbx 1681bc3d5698SJohn Baldwin addq %rax,%r10 1682bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1683bc3d5698SJohn Baldwin movq %rdx,%r11 1684bc3d5698SJohn Baldwin adcq $0,%r11 1685bc3d5698SJohn Baldwin 1686bc3d5698SJohn Baldwin mulq %rbx 1687bc3d5698SJohn Baldwin addq %rax,%r11 1688bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1689bc3d5698SJohn Baldwin movq %rdx,%r12 1690bc3d5698SJohn Baldwin adcq $0,%r12 1691bc3d5698SJohn Baldwin 1692bc3d5698SJohn Baldwin mulq %rbx 1693bc3d5698SJohn Baldwin addq %rax,%r12 1694bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1695bc3d5698SJohn Baldwin movq %rdx,%r13 1696bc3d5698SJohn Baldwin adcq $0,%r13 1697bc3d5698SJohn Baldwin 1698bc3d5698SJohn Baldwin mulq %rbx 1699bc3d5698SJohn Baldwin addq %rax,%r13 1700bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1701bc3d5698SJohn Baldwin movq %rdx,%r14 1702bc3d5698SJohn Baldwin adcq $0,%r14 1703bc3d5698SJohn Baldwin 1704bc3d5698SJohn Baldwin mulq %rbx 1705bc3d5698SJohn Baldwin addq %rax,%r14 1706bc3d5698SJohn Baldwin movq (%rsi),%rax 1707bc3d5698SJohn Baldwin movq %rdx,%r15 1708bc3d5698SJohn Baldwin adcq $0,%r15 1709bc3d5698SJohn Baldwin 1710bc3d5698SJohn Baldwin leaq 8(%rbp),%rbp 1711bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1712bc3d5698SJohn Baldwin 1713bc3d5698SJohn Baldwin movl $7,%ecx 1714bc3d5698SJohn Baldwin jmp .Loop_mul 1715bc3d5698SJohn Baldwin 1716bc3d5698SJohn Baldwin.align 32 1717bc3d5698SJohn Baldwin.Loop_mul: 1718bc3d5698SJohn Baldwin movq (%rbp),%rbx 1719bc3d5698SJohn Baldwin mulq %rbx 1720bc3d5698SJohn Baldwin addq %rax,%r8 1721bc3d5698SJohn Baldwin movq 8(%rsi),%rax 1722bc3d5698SJohn Baldwin movq %r8,(%rdi) 1723bc3d5698SJohn Baldwin movq %rdx,%r8 1724bc3d5698SJohn Baldwin adcq $0,%r8 1725bc3d5698SJohn Baldwin 1726bc3d5698SJohn Baldwin mulq %rbx 1727bc3d5698SJohn Baldwin addq %rax,%r9 1728bc3d5698SJohn Baldwin movq 16(%rsi),%rax 1729bc3d5698SJohn Baldwin adcq $0,%rdx 1730bc3d5698SJohn Baldwin addq %r9,%r8 1731bc3d5698SJohn Baldwin movq %rdx,%r9 1732bc3d5698SJohn Baldwin adcq $0,%r9 1733bc3d5698SJohn Baldwin 1734bc3d5698SJohn Baldwin mulq %rbx 1735bc3d5698SJohn Baldwin addq %rax,%r10 1736bc3d5698SJohn Baldwin movq 24(%rsi),%rax 1737bc3d5698SJohn Baldwin adcq $0,%rdx 1738bc3d5698SJohn Baldwin addq %r10,%r9 1739bc3d5698SJohn Baldwin movq %rdx,%r10 1740bc3d5698SJohn Baldwin adcq $0,%r10 1741bc3d5698SJohn Baldwin 1742bc3d5698SJohn Baldwin mulq %rbx 1743bc3d5698SJohn Baldwin addq %rax,%r11 1744bc3d5698SJohn Baldwin movq 32(%rsi),%rax 1745bc3d5698SJohn Baldwin adcq $0,%rdx 1746bc3d5698SJohn Baldwin addq %r11,%r10 1747bc3d5698SJohn Baldwin movq %rdx,%r11 1748bc3d5698SJohn Baldwin adcq $0,%r11 1749bc3d5698SJohn Baldwin 1750bc3d5698SJohn Baldwin mulq %rbx 1751bc3d5698SJohn Baldwin addq %rax,%r12 1752bc3d5698SJohn Baldwin movq 40(%rsi),%rax 1753bc3d5698SJohn Baldwin adcq $0,%rdx 1754bc3d5698SJohn Baldwin addq %r12,%r11 1755bc3d5698SJohn Baldwin movq %rdx,%r12 1756bc3d5698SJohn Baldwin adcq $0,%r12 1757bc3d5698SJohn Baldwin 1758bc3d5698SJohn Baldwin mulq %rbx 1759bc3d5698SJohn Baldwin addq %rax,%r13 1760bc3d5698SJohn Baldwin movq 48(%rsi),%rax 1761bc3d5698SJohn Baldwin adcq $0,%rdx 1762bc3d5698SJohn Baldwin addq %r13,%r12 1763bc3d5698SJohn Baldwin movq %rdx,%r13 1764bc3d5698SJohn Baldwin adcq $0,%r13 1765bc3d5698SJohn Baldwin 1766bc3d5698SJohn Baldwin mulq %rbx 1767bc3d5698SJohn Baldwin addq %rax,%r14 1768bc3d5698SJohn Baldwin movq 56(%rsi),%rax 1769bc3d5698SJohn Baldwin adcq $0,%rdx 1770bc3d5698SJohn Baldwin addq %r14,%r13 1771bc3d5698SJohn Baldwin movq %rdx,%r14 1772bc3d5698SJohn Baldwin leaq 8(%rbp),%rbp 1773bc3d5698SJohn Baldwin adcq $0,%r14 1774bc3d5698SJohn Baldwin 1775bc3d5698SJohn Baldwin mulq %rbx 1776bc3d5698SJohn Baldwin addq %rax,%r15 1777bc3d5698SJohn Baldwin movq (%rsi),%rax 1778bc3d5698SJohn Baldwin adcq $0,%rdx 1779bc3d5698SJohn Baldwin addq %r15,%r14 1780bc3d5698SJohn Baldwin movq %rdx,%r15 1781bc3d5698SJohn Baldwin adcq $0,%r15 1782bc3d5698SJohn Baldwin 1783bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 1784bc3d5698SJohn Baldwin 1785bc3d5698SJohn Baldwin decl %ecx 1786bc3d5698SJohn Baldwin jnz .Loop_mul 1787bc3d5698SJohn Baldwin 1788bc3d5698SJohn Baldwin movq %r8,(%rdi) 1789bc3d5698SJohn Baldwin movq %r9,8(%rdi) 1790bc3d5698SJohn Baldwin movq %r10,16(%rdi) 1791bc3d5698SJohn Baldwin movq %r11,24(%rdi) 1792bc3d5698SJohn Baldwin movq %r12,32(%rdi) 1793bc3d5698SJohn Baldwin movq %r13,40(%rdi) 1794bc3d5698SJohn Baldwin movq %r14,48(%rdi) 1795bc3d5698SJohn Baldwin movq %r15,56(%rdi) 1796bc3d5698SJohn Baldwin 1797bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1798bc3d5698SJohn Baldwin.cfi_endproc 1799bc3d5698SJohn Baldwin.size __rsaz_512_mul,.-__rsaz_512_mul 1800bc3d5698SJohn Baldwin.type __rsaz_512_mulx,@function 1801bc3d5698SJohn Baldwin.align 32 1802bc3d5698SJohn Baldwin__rsaz_512_mulx: 1803bc3d5698SJohn Baldwin.cfi_startproc 1804bc3d5698SJohn Baldwin mulxq (%rsi),%rbx,%r8 1805bc3d5698SJohn Baldwin movq $-6,%rcx 1806bc3d5698SJohn Baldwin 1807bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1808bc3d5698SJohn Baldwin movq %rbx,8(%rsp) 1809bc3d5698SJohn Baldwin 1810bc3d5698SJohn Baldwin mulxq 16(%rsi),%rbx,%r10 1811bc3d5698SJohn Baldwin adcq %rax,%r8 1812bc3d5698SJohn Baldwin 1813bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1814bc3d5698SJohn Baldwin adcq %rbx,%r9 1815bc3d5698SJohn Baldwin 1816bc3d5698SJohn Baldwin mulxq 32(%rsi),%rbx,%r12 1817bc3d5698SJohn Baldwin adcq %rax,%r10 1818bc3d5698SJohn Baldwin 1819bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1820bc3d5698SJohn Baldwin adcq %rbx,%r11 1821bc3d5698SJohn Baldwin 1822bc3d5698SJohn Baldwin mulxq 48(%rsi),%rbx,%r14 1823bc3d5698SJohn Baldwin adcq %rax,%r12 1824bc3d5698SJohn Baldwin 1825bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1826bc3d5698SJohn Baldwin movq 8(%rbp),%rdx 1827bc3d5698SJohn Baldwin adcq %rbx,%r13 1828bc3d5698SJohn Baldwin adcq %rax,%r14 1829bc3d5698SJohn Baldwin adcq $0,%r15 1830bc3d5698SJohn Baldwin 1831bc3d5698SJohn Baldwin xorq %rdi,%rdi 1832bc3d5698SJohn Baldwin jmp .Loop_mulx 1833bc3d5698SJohn Baldwin 1834bc3d5698SJohn Baldwin.align 32 1835bc3d5698SJohn Baldwin.Loop_mulx: 1836bc3d5698SJohn Baldwin movq %r8,%rbx 1837bc3d5698SJohn Baldwin mulxq (%rsi),%rax,%r8 1838bc3d5698SJohn Baldwin adcxq %rax,%rbx 1839bc3d5698SJohn Baldwin adoxq %r9,%r8 1840bc3d5698SJohn Baldwin 1841bc3d5698SJohn Baldwin mulxq 8(%rsi),%rax,%r9 1842bc3d5698SJohn Baldwin adcxq %rax,%r8 1843bc3d5698SJohn Baldwin adoxq %r10,%r9 1844bc3d5698SJohn Baldwin 1845bc3d5698SJohn Baldwin mulxq 16(%rsi),%rax,%r10 1846bc3d5698SJohn Baldwin adcxq %rax,%r9 1847bc3d5698SJohn Baldwin adoxq %r11,%r10 1848bc3d5698SJohn Baldwin 1849bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1850bc3d5698SJohn Baldwin adcxq %rax,%r10 1851bc3d5698SJohn Baldwin adoxq %r12,%r11 1852bc3d5698SJohn Baldwin 1853bc3d5698SJohn Baldwin.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 1854bc3d5698SJohn Baldwin adcxq %rax,%r11 1855bc3d5698SJohn Baldwin adoxq %r13,%r12 1856bc3d5698SJohn Baldwin 1857bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1858bc3d5698SJohn Baldwin adcxq %rax,%r12 1859bc3d5698SJohn Baldwin adoxq %r14,%r13 1860bc3d5698SJohn Baldwin 1861bc3d5698SJohn Baldwin mulxq 48(%rsi),%rax,%r14 1862bc3d5698SJohn Baldwin adcxq %rax,%r13 1863bc3d5698SJohn Baldwin adoxq %r15,%r14 1864bc3d5698SJohn Baldwin 1865bc3d5698SJohn Baldwin mulxq 56(%rsi),%rax,%r15 1866bc3d5698SJohn Baldwin movq 64(%rbp,%rcx,8),%rdx 1867bc3d5698SJohn Baldwin movq %rbx,8+64-8(%rsp,%rcx,8) 1868bc3d5698SJohn Baldwin adcxq %rax,%r14 1869bc3d5698SJohn Baldwin adoxq %rdi,%r15 1870bc3d5698SJohn Baldwin adcxq %rdi,%r15 1871bc3d5698SJohn Baldwin 1872bc3d5698SJohn Baldwin incq %rcx 1873bc3d5698SJohn Baldwin jnz .Loop_mulx 1874bc3d5698SJohn Baldwin 1875bc3d5698SJohn Baldwin movq %r8,%rbx 1876bc3d5698SJohn Baldwin mulxq (%rsi),%rax,%r8 1877bc3d5698SJohn Baldwin adcxq %rax,%rbx 1878bc3d5698SJohn Baldwin adoxq %r9,%r8 1879bc3d5698SJohn Baldwin 1880bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 1881bc3d5698SJohn Baldwin adcxq %rax,%r8 1882bc3d5698SJohn Baldwin adoxq %r10,%r9 1883bc3d5698SJohn Baldwin 1884bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 1885bc3d5698SJohn Baldwin adcxq %rax,%r9 1886bc3d5698SJohn Baldwin adoxq %r11,%r10 1887bc3d5698SJohn Baldwin 1888bc3d5698SJohn Baldwin mulxq 24(%rsi),%rax,%r11 1889bc3d5698SJohn Baldwin adcxq %rax,%r10 1890bc3d5698SJohn Baldwin adoxq %r12,%r11 1891bc3d5698SJohn Baldwin 1892bc3d5698SJohn Baldwin mulxq 32(%rsi),%rax,%r12 1893bc3d5698SJohn Baldwin adcxq %rax,%r11 1894bc3d5698SJohn Baldwin adoxq %r13,%r12 1895bc3d5698SJohn Baldwin 1896bc3d5698SJohn Baldwin mulxq 40(%rsi),%rax,%r13 1897bc3d5698SJohn Baldwin adcxq %rax,%r12 1898bc3d5698SJohn Baldwin adoxq %r14,%r13 1899bc3d5698SJohn Baldwin 1900bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1901bc3d5698SJohn Baldwin adcxq %rax,%r13 1902bc3d5698SJohn Baldwin adoxq %r15,%r14 1903bc3d5698SJohn Baldwin 1904bc3d5698SJohn Baldwin.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 1905bc3d5698SJohn Baldwin adcxq %rax,%r14 1906bc3d5698SJohn Baldwin adoxq %rdi,%r15 1907bc3d5698SJohn Baldwin adcxq %rdi,%r15 1908bc3d5698SJohn Baldwin 1909bc3d5698SJohn Baldwin movq %rbx,8+64-8(%rsp) 1910bc3d5698SJohn Baldwin movq %r8,8+64(%rsp) 1911bc3d5698SJohn Baldwin movq %r9,8+64+8(%rsp) 1912bc3d5698SJohn Baldwin movq %r10,8+64+16(%rsp) 1913bc3d5698SJohn Baldwin movq %r11,8+64+24(%rsp) 1914bc3d5698SJohn Baldwin movq %r12,8+64+32(%rsp) 1915bc3d5698SJohn Baldwin movq %r13,8+64+40(%rsp) 1916bc3d5698SJohn Baldwin movq %r14,8+64+48(%rsp) 1917bc3d5698SJohn Baldwin movq %r15,8+64+56(%rsp) 1918bc3d5698SJohn Baldwin 1919bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1920bc3d5698SJohn Baldwin.cfi_endproc 1921bc3d5698SJohn Baldwin.size __rsaz_512_mulx,.-__rsaz_512_mulx 1922bc3d5698SJohn Baldwin.globl rsaz_512_scatter4 1923bc3d5698SJohn Baldwin.type rsaz_512_scatter4,@function 1924bc3d5698SJohn Baldwin.align 16 1925bc3d5698SJohn Baldwinrsaz_512_scatter4: 1926bc3d5698SJohn Baldwin.cfi_startproc 1927bc3d5698SJohn Baldwin leaq (%rdi,%rdx,8),%rdi 1928bc3d5698SJohn Baldwin movl $8,%r9d 1929bc3d5698SJohn Baldwin jmp .Loop_scatter 1930bc3d5698SJohn Baldwin.align 16 1931bc3d5698SJohn Baldwin.Loop_scatter: 1932bc3d5698SJohn Baldwin movq (%rsi),%rax 1933bc3d5698SJohn Baldwin leaq 8(%rsi),%rsi 1934bc3d5698SJohn Baldwin movq %rax,(%rdi) 1935bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1936bc3d5698SJohn Baldwin decl %r9d 1937bc3d5698SJohn Baldwin jnz .Loop_scatter 1938bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1939bc3d5698SJohn Baldwin.cfi_endproc 1940bc3d5698SJohn Baldwin.size rsaz_512_scatter4,.-rsaz_512_scatter4 1941bc3d5698SJohn Baldwin 1942bc3d5698SJohn Baldwin.globl rsaz_512_gather4 1943bc3d5698SJohn Baldwin.type rsaz_512_gather4,@function 1944bc3d5698SJohn Baldwin.align 16 1945bc3d5698SJohn Baldwinrsaz_512_gather4: 1946bc3d5698SJohn Baldwin.cfi_startproc 1947bc3d5698SJohn Baldwin movd %edx,%xmm8 1948bc3d5698SJohn Baldwin movdqa .Linc+16(%rip),%xmm1 1949bc3d5698SJohn Baldwin movdqa .Linc(%rip),%xmm0 1950bc3d5698SJohn Baldwin 1951bc3d5698SJohn Baldwin pshufd $0,%xmm8,%xmm8 1952bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 1953bc3d5698SJohn Baldwin movdqa %xmm1,%xmm2 1954bc3d5698SJohn Baldwin paddd %xmm0,%xmm1 1955bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm0 1956bc3d5698SJohn Baldwin movdqa %xmm7,%xmm3 1957bc3d5698SJohn Baldwin paddd %xmm1,%xmm2 1958bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm1 1959bc3d5698SJohn Baldwin movdqa %xmm7,%xmm4 1960bc3d5698SJohn Baldwin paddd %xmm2,%xmm3 1961bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm2 1962bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1963bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 1964bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm3 1965bc3d5698SJohn Baldwin movdqa %xmm7,%xmm6 1966bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 1967bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm4 1968bc3d5698SJohn Baldwin paddd %xmm5,%xmm6 1969bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm5 1970bc3d5698SJohn Baldwin paddd %xmm6,%xmm7 1971bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm6 1972bc3d5698SJohn Baldwin pcmpeqd %xmm8,%xmm7 1973bc3d5698SJohn Baldwin movl $8,%r9d 1974bc3d5698SJohn Baldwin jmp .Loop_gather 1975bc3d5698SJohn Baldwin.align 16 1976bc3d5698SJohn Baldwin.Loop_gather: 1977bc3d5698SJohn Baldwin movdqa 0(%rsi),%xmm8 1978bc3d5698SJohn Baldwin movdqa 16(%rsi),%xmm9 1979bc3d5698SJohn Baldwin movdqa 32(%rsi),%xmm10 1980bc3d5698SJohn Baldwin movdqa 48(%rsi),%xmm11 1981bc3d5698SJohn Baldwin pand %xmm0,%xmm8 1982bc3d5698SJohn Baldwin movdqa 64(%rsi),%xmm12 1983bc3d5698SJohn Baldwin pand %xmm1,%xmm9 1984bc3d5698SJohn Baldwin movdqa 80(%rsi),%xmm13 1985bc3d5698SJohn Baldwin pand %xmm2,%xmm10 1986bc3d5698SJohn Baldwin movdqa 96(%rsi),%xmm14 1987bc3d5698SJohn Baldwin pand %xmm3,%xmm11 1988bc3d5698SJohn Baldwin movdqa 112(%rsi),%xmm15 1989bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1990bc3d5698SJohn Baldwin pand %xmm4,%xmm12 1991bc3d5698SJohn Baldwin pand %xmm5,%xmm13 1992bc3d5698SJohn Baldwin pand %xmm6,%xmm14 1993bc3d5698SJohn Baldwin pand %xmm7,%xmm15 1994bc3d5698SJohn Baldwin por %xmm10,%xmm8 1995bc3d5698SJohn Baldwin por %xmm11,%xmm9 1996bc3d5698SJohn Baldwin por %xmm12,%xmm8 1997bc3d5698SJohn Baldwin por %xmm13,%xmm9 1998bc3d5698SJohn Baldwin por %xmm14,%xmm8 1999bc3d5698SJohn Baldwin por %xmm15,%xmm9 2000bc3d5698SJohn Baldwin 2001bc3d5698SJohn Baldwin por %xmm9,%xmm8 2002bc3d5698SJohn Baldwin pshufd $0x4e,%xmm8,%xmm9 2003bc3d5698SJohn Baldwin por %xmm9,%xmm8 2004bc3d5698SJohn Baldwin movq %xmm8,(%rdi) 2005bc3d5698SJohn Baldwin leaq 8(%rdi),%rdi 2006bc3d5698SJohn Baldwin decl %r9d 2007bc3d5698SJohn Baldwin jnz .Loop_gather 2008bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2009bc3d5698SJohn Baldwin.LSEH_end_rsaz_512_gather4: 2010bc3d5698SJohn Baldwin.cfi_endproc 2011bc3d5698SJohn Baldwin.size rsaz_512_gather4,.-rsaz_512_gather4 2012bc3d5698SJohn Baldwin 2013bc3d5698SJohn Baldwin.align 64 2014bc3d5698SJohn Baldwin.Linc: 2015bc3d5698SJohn Baldwin.long 0,0, 1,1 2016bc3d5698SJohn Baldwin.long 2,2, 2,2 2017*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 2018*c0855eaaSJohn Baldwin .p2align 3 2019*c0855eaaSJohn Baldwin .long 1f - 0f 2020*c0855eaaSJohn Baldwin .long 4f - 1f 2021*c0855eaaSJohn Baldwin .long 5 2022*c0855eaaSJohn Baldwin0: 2023*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 2024*c0855eaaSJohn Baldwin # on Solaris. 2025*c0855eaaSJohn Baldwin .byte 0x47 2026*c0855eaaSJohn Baldwin .byte 0x4e 2027*c0855eaaSJohn Baldwin .byte 0x55 2028*c0855eaaSJohn Baldwin .byte 0 2029*c0855eaaSJohn Baldwin1: 2030*c0855eaaSJohn Baldwin .p2align 3 2031*c0855eaaSJohn Baldwin .long 0xc0000002 2032*c0855eaaSJohn Baldwin .long 3f - 2f 2033*c0855eaaSJohn Baldwin2: 2034*c0855eaaSJohn Baldwin .long 3 2035*c0855eaaSJohn Baldwin3: 2036*c0855eaaSJohn Baldwin .p2align 3 2037*c0855eaaSJohn Baldwin4: 2038