1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from poly1305-x86_64.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.globl poly1305_init 7bc3d5698SJohn Baldwin.hidden poly1305_init 8bc3d5698SJohn Baldwin.globl poly1305_blocks 9bc3d5698SJohn Baldwin.hidden poly1305_blocks 10bc3d5698SJohn Baldwin.globl poly1305_emit 11bc3d5698SJohn Baldwin.hidden poly1305_emit 12bc3d5698SJohn Baldwin 13bc3d5698SJohn Baldwin.type poly1305_init,@function 14bc3d5698SJohn Baldwin.align 32 15bc3d5698SJohn Baldwinpoly1305_init: 16bc3d5698SJohn Baldwin.cfi_startproc 17bc3d5698SJohn Baldwin xorq %rax,%rax 18bc3d5698SJohn Baldwin movq %rax,0(%rdi) 19bc3d5698SJohn Baldwin movq %rax,8(%rdi) 20bc3d5698SJohn Baldwin movq %rax,16(%rdi) 21bc3d5698SJohn Baldwin 22bc3d5698SJohn Baldwin cmpq $0,%rsi 23bc3d5698SJohn Baldwin je .Lno_key 24bc3d5698SJohn Baldwin 25bc3d5698SJohn Baldwin leaq poly1305_blocks(%rip),%r10 26bc3d5698SJohn Baldwin leaq poly1305_emit(%rip),%r11 27bc3d5698SJohn Baldwin movq OPENSSL_ia32cap_P+4(%rip),%r9 28bc3d5698SJohn Baldwin leaq poly1305_blocks_avx(%rip),%rax 29bc3d5698SJohn Baldwin leaq poly1305_emit_avx(%rip),%rcx 30bc3d5698SJohn Baldwin btq $28,%r9 31bc3d5698SJohn Baldwin cmovcq %rax,%r10 32bc3d5698SJohn Baldwin cmovcq %rcx,%r11 33bc3d5698SJohn Baldwin leaq poly1305_blocks_avx2(%rip),%rax 34bc3d5698SJohn Baldwin btq $37,%r9 35bc3d5698SJohn Baldwin cmovcq %rax,%r10 36bc3d5698SJohn Baldwin movq $0x0ffffffc0fffffff,%rax 37bc3d5698SJohn Baldwin movq $0x0ffffffc0ffffffc,%rcx 38bc3d5698SJohn Baldwin andq 0(%rsi),%rax 39bc3d5698SJohn Baldwin andq 8(%rsi),%rcx 40bc3d5698SJohn Baldwin movq %rax,24(%rdi) 41bc3d5698SJohn Baldwin movq %rcx,32(%rdi) 42bc3d5698SJohn Baldwin movq %r10,0(%rdx) 43bc3d5698SJohn Baldwin movq %r11,8(%rdx) 44bc3d5698SJohn Baldwin movl $1,%eax 45bc3d5698SJohn Baldwin.Lno_key: 46bc3d5698SJohn Baldwin .byte 0xf3,0xc3 47bc3d5698SJohn Baldwin.cfi_endproc 48bc3d5698SJohn Baldwin.size poly1305_init,.-poly1305_init 49bc3d5698SJohn Baldwin 50bc3d5698SJohn Baldwin.type poly1305_blocks,@function 51bc3d5698SJohn Baldwin.align 32 52bc3d5698SJohn Baldwinpoly1305_blocks: 53bc3d5698SJohn Baldwin.cfi_startproc 54bc3d5698SJohn Baldwin.Lblocks: 55bc3d5698SJohn Baldwin shrq $4,%rdx 56bc3d5698SJohn Baldwin jz .Lno_data 57bc3d5698SJohn Baldwin 58bc3d5698SJohn Baldwin pushq %rbx 59bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 60bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 61bc3d5698SJohn Baldwin pushq %rbp 62bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 63bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 64bc3d5698SJohn Baldwin pushq %r12 65bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 66bc3d5698SJohn Baldwin.cfi_offset %r12,-32 67bc3d5698SJohn Baldwin pushq %r13 68bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 69bc3d5698SJohn Baldwin.cfi_offset %r13,-40 70bc3d5698SJohn Baldwin pushq %r14 71bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 72bc3d5698SJohn Baldwin.cfi_offset %r14,-48 73bc3d5698SJohn Baldwin pushq %r15 74bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 75bc3d5698SJohn Baldwin.cfi_offset %r15,-56 76bc3d5698SJohn Baldwin.Lblocks_body: 77bc3d5698SJohn Baldwin 78bc3d5698SJohn Baldwin movq %rdx,%r15 79bc3d5698SJohn Baldwin 80bc3d5698SJohn Baldwin movq 24(%rdi),%r11 81bc3d5698SJohn Baldwin movq 32(%rdi),%r13 82bc3d5698SJohn Baldwin 83bc3d5698SJohn Baldwin movq 0(%rdi),%r14 84bc3d5698SJohn Baldwin movq 8(%rdi),%rbx 85bc3d5698SJohn Baldwin movq 16(%rdi),%rbp 86bc3d5698SJohn Baldwin 87bc3d5698SJohn Baldwin movq %r13,%r12 88bc3d5698SJohn Baldwin shrq $2,%r13 89bc3d5698SJohn Baldwin movq %r12,%rax 90bc3d5698SJohn Baldwin addq %r12,%r13 91bc3d5698SJohn Baldwin jmp .Loop 92bc3d5698SJohn Baldwin 93bc3d5698SJohn Baldwin.align 32 94bc3d5698SJohn Baldwin.Loop: 95bc3d5698SJohn Baldwin addq 0(%rsi),%r14 96bc3d5698SJohn Baldwin adcq 8(%rsi),%rbx 97bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 98bc3d5698SJohn Baldwin adcq %rcx,%rbp 99bc3d5698SJohn Baldwin mulq %r14 100bc3d5698SJohn Baldwin movq %rax,%r9 101bc3d5698SJohn Baldwin movq %r11,%rax 102bc3d5698SJohn Baldwin movq %rdx,%r10 103bc3d5698SJohn Baldwin 104bc3d5698SJohn Baldwin mulq %r14 105bc3d5698SJohn Baldwin movq %rax,%r14 106bc3d5698SJohn Baldwin movq %r11,%rax 107bc3d5698SJohn Baldwin movq %rdx,%r8 108bc3d5698SJohn Baldwin 109bc3d5698SJohn Baldwin mulq %rbx 110bc3d5698SJohn Baldwin addq %rax,%r9 111bc3d5698SJohn Baldwin movq %r13,%rax 112bc3d5698SJohn Baldwin adcq %rdx,%r10 113bc3d5698SJohn Baldwin 114bc3d5698SJohn Baldwin mulq %rbx 115bc3d5698SJohn Baldwin movq %rbp,%rbx 116bc3d5698SJohn Baldwin addq %rax,%r14 117bc3d5698SJohn Baldwin adcq %rdx,%r8 118bc3d5698SJohn Baldwin 119bc3d5698SJohn Baldwin imulq %r13,%rbx 120bc3d5698SJohn Baldwin addq %rbx,%r9 121bc3d5698SJohn Baldwin movq %r8,%rbx 122bc3d5698SJohn Baldwin adcq $0,%r10 123bc3d5698SJohn Baldwin 124bc3d5698SJohn Baldwin imulq %r11,%rbp 125bc3d5698SJohn Baldwin addq %r9,%rbx 126bc3d5698SJohn Baldwin movq $-4,%rax 127bc3d5698SJohn Baldwin adcq %rbp,%r10 128bc3d5698SJohn Baldwin 129bc3d5698SJohn Baldwin andq %r10,%rax 130bc3d5698SJohn Baldwin movq %r10,%rbp 131bc3d5698SJohn Baldwin shrq $2,%r10 132bc3d5698SJohn Baldwin andq $3,%rbp 133bc3d5698SJohn Baldwin addq %r10,%rax 134bc3d5698SJohn Baldwin addq %rax,%r14 135bc3d5698SJohn Baldwin adcq $0,%rbx 136bc3d5698SJohn Baldwin adcq $0,%rbp 137bc3d5698SJohn Baldwin movq %r12,%rax 138bc3d5698SJohn Baldwin decq %r15 139bc3d5698SJohn Baldwin jnz .Loop 140bc3d5698SJohn Baldwin 141bc3d5698SJohn Baldwin movq %r14,0(%rdi) 142bc3d5698SJohn Baldwin movq %rbx,8(%rdi) 143bc3d5698SJohn Baldwin movq %rbp,16(%rdi) 144bc3d5698SJohn Baldwin 145bc3d5698SJohn Baldwin movq 0(%rsp),%r15 146bc3d5698SJohn Baldwin.cfi_restore %r15 147bc3d5698SJohn Baldwin movq 8(%rsp),%r14 148bc3d5698SJohn Baldwin.cfi_restore %r14 149bc3d5698SJohn Baldwin movq 16(%rsp),%r13 150bc3d5698SJohn Baldwin.cfi_restore %r13 151bc3d5698SJohn Baldwin movq 24(%rsp),%r12 152bc3d5698SJohn Baldwin.cfi_restore %r12 153bc3d5698SJohn Baldwin movq 32(%rsp),%rbp 154bc3d5698SJohn Baldwin.cfi_restore %rbp 155bc3d5698SJohn Baldwin movq 40(%rsp),%rbx 156bc3d5698SJohn Baldwin.cfi_restore %rbx 157bc3d5698SJohn Baldwin leaq 48(%rsp),%rsp 158bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -48 159bc3d5698SJohn Baldwin.Lno_data: 160bc3d5698SJohn Baldwin.Lblocks_epilogue: 161bc3d5698SJohn Baldwin .byte 0xf3,0xc3 162bc3d5698SJohn Baldwin.cfi_endproc 163bc3d5698SJohn Baldwin.size poly1305_blocks,.-poly1305_blocks 164bc3d5698SJohn Baldwin 165bc3d5698SJohn Baldwin.type poly1305_emit,@function 166bc3d5698SJohn Baldwin.align 32 167bc3d5698SJohn Baldwinpoly1305_emit: 168bc3d5698SJohn Baldwin.cfi_startproc 169bc3d5698SJohn Baldwin.Lemit: 170bc3d5698SJohn Baldwin movq 0(%rdi),%r8 171bc3d5698SJohn Baldwin movq 8(%rdi),%r9 172bc3d5698SJohn Baldwin movq 16(%rdi),%r10 173bc3d5698SJohn Baldwin 174bc3d5698SJohn Baldwin movq %r8,%rax 175bc3d5698SJohn Baldwin addq $5,%r8 176bc3d5698SJohn Baldwin movq %r9,%rcx 177bc3d5698SJohn Baldwin adcq $0,%r9 178bc3d5698SJohn Baldwin adcq $0,%r10 179bc3d5698SJohn Baldwin shrq $2,%r10 180bc3d5698SJohn Baldwin cmovnzq %r8,%rax 181bc3d5698SJohn Baldwin cmovnzq %r9,%rcx 182bc3d5698SJohn Baldwin 183bc3d5698SJohn Baldwin addq 0(%rdx),%rax 184bc3d5698SJohn Baldwin adcq 8(%rdx),%rcx 185bc3d5698SJohn Baldwin movq %rax,0(%rsi) 186bc3d5698SJohn Baldwin movq %rcx,8(%rsi) 187bc3d5698SJohn Baldwin 188bc3d5698SJohn Baldwin .byte 0xf3,0xc3 189bc3d5698SJohn Baldwin.cfi_endproc 190bc3d5698SJohn Baldwin.size poly1305_emit,.-poly1305_emit 191bc3d5698SJohn Baldwin.type __poly1305_block,@function 192bc3d5698SJohn Baldwin.align 32 193bc3d5698SJohn Baldwin__poly1305_block: 194bc3d5698SJohn Baldwin.cfi_startproc 195bc3d5698SJohn Baldwin mulq %r14 196bc3d5698SJohn Baldwin movq %rax,%r9 197bc3d5698SJohn Baldwin movq %r11,%rax 198bc3d5698SJohn Baldwin movq %rdx,%r10 199bc3d5698SJohn Baldwin 200bc3d5698SJohn Baldwin mulq %r14 201bc3d5698SJohn Baldwin movq %rax,%r14 202bc3d5698SJohn Baldwin movq %r11,%rax 203bc3d5698SJohn Baldwin movq %rdx,%r8 204bc3d5698SJohn Baldwin 205bc3d5698SJohn Baldwin mulq %rbx 206bc3d5698SJohn Baldwin addq %rax,%r9 207bc3d5698SJohn Baldwin movq %r13,%rax 208bc3d5698SJohn Baldwin adcq %rdx,%r10 209bc3d5698SJohn Baldwin 210bc3d5698SJohn Baldwin mulq %rbx 211bc3d5698SJohn Baldwin movq %rbp,%rbx 212bc3d5698SJohn Baldwin addq %rax,%r14 213bc3d5698SJohn Baldwin adcq %rdx,%r8 214bc3d5698SJohn Baldwin 215bc3d5698SJohn Baldwin imulq %r13,%rbx 216bc3d5698SJohn Baldwin addq %rbx,%r9 217bc3d5698SJohn Baldwin movq %r8,%rbx 218bc3d5698SJohn Baldwin adcq $0,%r10 219bc3d5698SJohn Baldwin 220bc3d5698SJohn Baldwin imulq %r11,%rbp 221bc3d5698SJohn Baldwin addq %r9,%rbx 222bc3d5698SJohn Baldwin movq $-4,%rax 223bc3d5698SJohn Baldwin adcq %rbp,%r10 224bc3d5698SJohn Baldwin 225bc3d5698SJohn Baldwin andq %r10,%rax 226bc3d5698SJohn Baldwin movq %r10,%rbp 227bc3d5698SJohn Baldwin shrq $2,%r10 228bc3d5698SJohn Baldwin andq $3,%rbp 229bc3d5698SJohn Baldwin addq %r10,%rax 230bc3d5698SJohn Baldwin addq %rax,%r14 231bc3d5698SJohn Baldwin adcq $0,%rbx 232bc3d5698SJohn Baldwin adcq $0,%rbp 233bc3d5698SJohn Baldwin .byte 0xf3,0xc3 234bc3d5698SJohn Baldwin.cfi_endproc 235bc3d5698SJohn Baldwin.size __poly1305_block,.-__poly1305_block 236bc3d5698SJohn Baldwin 237bc3d5698SJohn Baldwin.type __poly1305_init_avx,@function 238bc3d5698SJohn Baldwin.align 32 239bc3d5698SJohn Baldwin__poly1305_init_avx: 240bc3d5698SJohn Baldwin.cfi_startproc 241bc3d5698SJohn Baldwin movq %r11,%r14 242bc3d5698SJohn Baldwin movq %r12,%rbx 243bc3d5698SJohn Baldwin xorq %rbp,%rbp 244bc3d5698SJohn Baldwin 245bc3d5698SJohn Baldwin leaq 48+64(%rdi),%rdi 246bc3d5698SJohn Baldwin 247bc3d5698SJohn Baldwin movq %r12,%rax 248bc3d5698SJohn Baldwin call __poly1305_block 249bc3d5698SJohn Baldwin 250bc3d5698SJohn Baldwin movl $0x3ffffff,%eax 251bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 252bc3d5698SJohn Baldwin movq %r14,%r8 253bc3d5698SJohn Baldwin andl %r14d,%eax 254bc3d5698SJohn Baldwin movq %r11,%r9 255bc3d5698SJohn Baldwin andl %r11d,%edx 256bc3d5698SJohn Baldwin movl %eax,-64(%rdi) 257bc3d5698SJohn Baldwin shrq $26,%r8 258bc3d5698SJohn Baldwin movl %edx,-60(%rdi) 259bc3d5698SJohn Baldwin shrq $26,%r9 260bc3d5698SJohn Baldwin 261bc3d5698SJohn Baldwin movl $0x3ffffff,%eax 262bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 263bc3d5698SJohn Baldwin andl %r8d,%eax 264bc3d5698SJohn Baldwin andl %r9d,%edx 265bc3d5698SJohn Baldwin movl %eax,-48(%rdi) 266bc3d5698SJohn Baldwin leal (%rax,%rax,4),%eax 267bc3d5698SJohn Baldwin movl %edx,-44(%rdi) 268bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 269bc3d5698SJohn Baldwin movl %eax,-32(%rdi) 270bc3d5698SJohn Baldwin shrq $26,%r8 271bc3d5698SJohn Baldwin movl %edx,-28(%rdi) 272bc3d5698SJohn Baldwin shrq $26,%r9 273bc3d5698SJohn Baldwin 274bc3d5698SJohn Baldwin movq %rbx,%rax 275bc3d5698SJohn Baldwin movq %r12,%rdx 276bc3d5698SJohn Baldwin shlq $12,%rax 277bc3d5698SJohn Baldwin shlq $12,%rdx 278bc3d5698SJohn Baldwin orq %r8,%rax 279bc3d5698SJohn Baldwin orq %r9,%rdx 280bc3d5698SJohn Baldwin andl $0x3ffffff,%eax 281bc3d5698SJohn Baldwin andl $0x3ffffff,%edx 282bc3d5698SJohn Baldwin movl %eax,-16(%rdi) 283bc3d5698SJohn Baldwin leal (%rax,%rax,4),%eax 284bc3d5698SJohn Baldwin movl %edx,-12(%rdi) 285bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 286bc3d5698SJohn Baldwin movl %eax,0(%rdi) 287bc3d5698SJohn Baldwin movq %rbx,%r8 288bc3d5698SJohn Baldwin movl %edx,4(%rdi) 289bc3d5698SJohn Baldwin movq %r12,%r9 290bc3d5698SJohn Baldwin 291bc3d5698SJohn Baldwin movl $0x3ffffff,%eax 292bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 293bc3d5698SJohn Baldwin shrq $14,%r8 294bc3d5698SJohn Baldwin shrq $14,%r9 295bc3d5698SJohn Baldwin andl %r8d,%eax 296bc3d5698SJohn Baldwin andl %r9d,%edx 297bc3d5698SJohn Baldwin movl %eax,16(%rdi) 298bc3d5698SJohn Baldwin leal (%rax,%rax,4),%eax 299bc3d5698SJohn Baldwin movl %edx,20(%rdi) 300bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 301bc3d5698SJohn Baldwin movl %eax,32(%rdi) 302bc3d5698SJohn Baldwin shrq $26,%r8 303bc3d5698SJohn Baldwin movl %edx,36(%rdi) 304bc3d5698SJohn Baldwin shrq $26,%r9 305bc3d5698SJohn Baldwin 306bc3d5698SJohn Baldwin movq %rbp,%rax 307bc3d5698SJohn Baldwin shlq $24,%rax 308bc3d5698SJohn Baldwin orq %rax,%r8 309bc3d5698SJohn Baldwin movl %r8d,48(%rdi) 310bc3d5698SJohn Baldwin leaq (%r8,%r8,4),%r8 311bc3d5698SJohn Baldwin movl %r9d,52(%rdi) 312bc3d5698SJohn Baldwin leaq (%r9,%r9,4),%r9 313bc3d5698SJohn Baldwin movl %r8d,64(%rdi) 314bc3d5698SJohn Baldwin movl %r9d,68(%rdi) 315bc3d5698SJohn Baldwin 316bc3d5698SJohn Baldwin movq %r12,%rax 317bc3d5698SJohn Baldwin call __poly1305_block 318bc3d5698SJohn Baldwin 319bc3d5698SJohn Baldwin movl $0x3ffffff,%eax 320bc3d5698SJohn Baldwin movq %r14,%r8 321bc3d5698SJohn Baldwin andl %r14d,%eax 322bc3d5698SJohn Baldwin shrq $26,%r8 323bc3d5698SJohn Baldwin movl %eax,-52(%rdi) 324bc3d5698SJohn Baldwin 325bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 326bc3d5698SJohn Baldwin andl %r8d,%edx 327bc3d5698SJohn Baldwin movl %edx,-36(%rdi) 328bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 329bc3d5698SJohn Baldwin shrq $26,%r8 330bc3d5698SJohn Baldwin movl %edx,-20(%rdi) 331bc3d5698SJohn Baldwin 332bc3d5698SJohn Baldwin movq %rbx,%rax 333bc3d5698SJohn Baldwin shlq $12,%rax 334bc3d5698SJohn Baldwin orq %r8,%rax 335bc3d5698SJohn Baldwin andl $0x3ffffff,%eax 336bc3d5698SJohn Baldwin movl %eax,-4(%rdi) 337bc3d5698SJohn Baldwin leal (%rax,%rax,4),%eax 338bc3d5698SJohn Baldwin movq %rbx,%r8 339bc3d5698SJohn Baldwin movl %eax,12(%rdi) 340bc3d5698SJohn Baldwin 341bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 342bc3d5698SJohn Baldwin shrq $14,%r8 343bc3d5698SJohn Baldwin andl %r8d,%edx 344bc3d5698SJohn Baldwin movl %edx,28(%rdi) 345bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 346bc3d5698SJohn Baldwin shrq $26,%r8 347bc3d5698SJohn Baldwin movl %edx,44(%rdi) 348bc3d5698SJohn Baldwin 349bc3d5698SJohn Baldwin movq %rbp,%rax 350bc3d5698SJohn Baldwin shlq $24,%rax 351bc3d5698SJohn Baldwin orq %rax,%r8 352bc3d5698SJohn Baldwin movl %r8d,60(%rdi) 353bc3d5698SJohn Baldwin leaq (%r8,%r8,4),%r8 354bc3d5698SJohn Baldwin movl %r8d,76(%rdi) 355bc3d5698SJohn Baldwin 356bc3d5698SJohn Baldwin movq %r12,%rax 357bc3d5698SJohn Baldwin call __poly1305_block 358bc3d5698SJohn Baldwin 359bc3d5698SJohn Baldwin movl $0x3ffffff,%eax 360bc3d5698SJohn Baldwin movq %r14,%r8 361bc3d5698SJohn Baldwin andl %r14d,%eax 362bc3d5698SJohn Baldwin shrq $26,%r8 363bc3d5698SJohn Baldwin movl %eax,-56(%rdi) 364bc3d5698SJohn Baldwin 365bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 366bc3d5698SJohn Baldwin andl %r8d,%edx 367bc3d5698SJohn Baldwin movl %edx,-40(%rdi) 368bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 369bc3d5698SJohn Baldwin shrq $26,%r8 370bc3d5698SJohn Baldwin movl %edx,-24(%rdi) 371bc3d5698SJohn Baldwin 372bc3d5698SJohn Baldwin movq %rbx,%rax 373bc3d5698SJohn Baldwin shlq $12,%rax 374bc3d5698SJohn Baldwin orq %r8,%rax 375bc3d5698SJohn Baldwin andl $0x3ffffff,%eax 376bc3d5698SJohn Baldwin movl %eax,-8(%rdi) 377bc3d5698SJohn Baldwin leal (%rax,%rax,4),%eax 378bc3d5698SJohn Baldwin movq %rbx,%r8 379bc3d5698SJohn Baldwin movl %eax,8(%rdi) 380bc3d5698SJohn Baldwin 381bc3d5698SJohn Baldwin movl $0x3ffffff,%edx 382bc3d5698SJohn Baldwin shrq $14,%r8 383bc3d5698SJohn Baldwin andl %r8d,%edx 384bc3d5698SJohn Baldwin movl %edx,24(%rdi) 385bc3d5698SJohn Baldwin leal (%rdx,%rdx,4),%edx 386bc3d5698SJohn Baldwin shrq $26,%r8 387bc3d5698SJohn Baldwin movl %edx,40(%rdi) 388bc3d5698SJohn Baldwin 389bc3d5698SJohn Baldwin movq %rbp,%rax 390bc3d5698SJohn Baldwin shlq $24,%rax 391bc3d5698SJohn Baldwin orq %rax,%r8 392bc3d5698SJohn Baldwin movl %r8d,56(%rdi) 393bc3d5698SJohn Baldwin leaq (%r8,%r8,4),%r8 394bc3d5698SJohn Baldwin movl %r8d,72(%rdi) 395bc3d5698SJohn Baldwin 396bc3d5698SJohn Baldwin leaq -48-64(%rdi),%rdi 397bc3d5698SJohn Baldwin .byte 0xf3,0xc3 398bc3d5698SJohn Baldwin.cfi_endproc 399bc3d5698SJohn Baldwin.size __poly1305_init_avx,.-__poly1305_init_avx 400bc3d5698SJohn Baldwin 401bc3d5698SJohn Baldwin.type poly1305_blocks_avx,@function 402bc3d5698SJohn Baldwin.align 32 403bc3d5698SJohn Baldwinpoly1305_blocks_avx: 404bc3d5698SJohn Baldwin.cfi_startproc 405bc3d5698SJohn Baldwin movl 20(%rdi),%r8d 406bc3d5698SJohn Baldwin cmpq $128,%rdx 407bc3d5698SJohn Baldwin jae .Lblocks_avx 408bc3d5698SJohn Baldwin testl %r8d,%r8d 409bc3d5698SJohn Baldwin jz .Lblocks 410bc3d5698SJohn Baldwin 411bc3d5698SJohn Baldwin.Lblocks_avx: 412bc3d5698SJohn Baldwin andq $-16,%rdx 413bc3d5698SJohn Baldwin jz .Lno_data_avx 414bc3d5698SJohn Baldwin 415bc3d5698SJohn Baldwin vzeroupper 416bc3d5698SJohn Baldwin 417bc3d5698SJohn Baldwin testl %r8d,%r8d 418bc3d5698SJohn Baldwin jz .Lbase2_64_avx 419bc3d5698SJohn Baldwin 420bc3d5698SJohn Baldwin testq $31,%rdx 421bc3d5698SJohn Baldwin jz .Leven_avx 422bc3d5698SJohn Baldwin 423bc3d5698SJohn Baldwin pushq %rbx 424bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 425bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 426bc3d5698SJohn Baldwin pushq %rbp 427bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 428bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 429bc3d5698SJohn Baldwin pushq %r12 430bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 431bc3d5698SJohn Baldwin.cfi_offset %r12,-32 432bc3d5698SJohn Baldwin pushq %r13 433bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 434bc3d5698SJohn Baldwin.cfi_offset %r13,-40 435bc3d5698SJohn Baldwin pushq %r14 436bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 437bc3d5698SJohn Baldwin.cfi_offset %r14,-48 438bc3d5698SJohn Baldwin pushq %r15 439bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 440bc3d5698SJohn Baldwin.cfi_offset %r15,-56 441bc3d5698SJohn Baldwin.Lblocks_avx_body: 442bc3d5698SJohn Baldwin 443bc3d5698SJohn Baldwin movq %rdx,%r15 444bc3d5698SJohn Baldwin 445bc3d5698SJohn Baldwin movq 0(%rdi),%r8 446bc3d5698SJohn Baldwin movq 8(%rdi),%r9 447bc3d5698SJohn Baldwin movl 16(%rdi),%ebp 448bc3d5698SJohn Baldwin 449bc3d5698SJohn Baldwin movq 24(%rdi),%r11 450bc3d5698SJohn Baldwin movq 32(%rdi),%r13 451bc3d5698SJohn Baldwin 452bc3d5698SJohn Baldwin 453bc3d5698SJohn Baldwin movl %r8d,%r14d 454bc3d5698SJohn Baldwin andq $-2147483648,%r8 455bc3d5698SJohn Baldwin movq %r9,%r12 456bc3d5698SJohn Baldwin movl %r9d,%ebx 457bc3d5698SJohn Baldwin andq $-2147483648,%r9 458bc3d5698SJohn Baldwin 459bc3d5698SJohn Baldwin shrq $6,%r8 460bc3d5698SJohn Baldwin shlq $52,%r12 461bc3d5698SJohn Baldwin addq %r8,%r14 462bc3d5698SJohn Baldwin shrq $12,%rbx 463bc3d5698SJohn Baldwin shrq $18,%r9 464bc3d5698SJohn Baldwin addq %r12,%r14 465bc3d5698SJohn Baldwin adcq %r9,%rbx 466bc3d5698SJohn Baldwin 467bc3d5698SJohn Baldwin movq %rbp,%r8 468bc3d5698SJohn Baldwin shlq $40,%r8 469bc3d5698SJohn Baldwin shrq $24,%rbp 470bc3d5698SJohn Baldwin addq %r8,%rbx 471bc3d5698SJohn Baldwin adcq $0,%rbp 472bc3d5698SJohn Baldwin 473bc3d5698SJohn Baldwin movq $-4,%r9 474bc3d5698SJohn Baldwin movq %rbp,%r8 475bc3d5698SJohn Baldwin andq %rbp,%r9 476bc3d5698SJohn Baldwin shrq $2,%r8 477bc3d5698SJohn Baldwin andq $3,%rbp 478bc3d5698SJohn Baldwin addq %r9,%r8 479bc3d5698SJohn Baldwin addq %r8,%r14 480bc3d5698SJohn Baldwin adcq $0,%rbx 481bc3d5698SJohn Baldwin adcq $0,%rbp 482bc3d5698SJohn Baldwin 483bc3d5698SJohn Baldwin movq %r13,%r12 484bc3d5698SJohn Baldwin movq %r13,%rax 485bc3d5698SJohn Baldwin shrq $2,%r13 486bc3d5698SJohn Baldwin addq %r12,%r13 487bc3d5698SJohn Baldwin 488bc3d5698SJohn Baldwin addq 0(%rsi),%r14 489bc3d5698SJohn Baldwin adcq 8(%rsi),%rbx 490bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 491bc3d5698SJohn Baldwin adcq %rcx,%rbp 492bc3d5698SJohn Baldwin 493bc3d5698SJohn Baldwin call __poly1305_block 494bc3d5698SJohn Baldwin 495bc3d5698SJohn Baldwin testq %rcx,%rcx 496bc3d5698SJohn Baldwin jz .Lstore_base2_64_avx 497bc3d5698SJohn Baldwin 498bc3d5698SJohn Baldwin 499bc3d5698SJohn Baldwin movq %r14,%rax 500bc3d5698SJohn Baldwin movq %r14,%rdx 501bc3d5698SJohn Baldwin shrq $52,%r14 502bc3d5698SJohn Baldwin movq %rbx,%r11 503bc3d5698SJohn Baldwin movq %rbx,%r12 504bc3d5698SJohn Baldwin shrq $26,%rdx 505bc3d5698SJohn Baldwin andq $0x3ffffff,%rax 506bc3d5698SJohn Baldwin shlq $12,%r11 507bc3d5698SJohn Baldwin andq $0x3ffffff,%rdx 508bc3d5698SJohn Baldwin shrq $14,%rbx 509bc3d5698SJohn Baldwin orq %r11,%r14 510bc3d5698SJohn Baldwin shlq $24,%rbp 511bc3d5698SJohn Baldwin andq $0x3ffffff,%r14 512bc3d5698SJohn Baldwin shrq $40,%r12 513bc3d5698SJohn Baldwin andq $0x3ffffff,%rbx 514bc3d5698SJohn Baldwin orq %r12,%rbp 515bc3d5698SJohn Baldwin 516bc3d5698SJohn Baldwin subq $16,%r15 517bc3d5698SJohn Baldwin jz .Lstore_base2_26_avx 518bc3d5698SJohn Baldwin 519bc3d5698SJohn Baldwin vmovd %eax,%xmm0 520bc3d5698SJohn Baldwin vmovd %edx,%xmm1 521bc3d5698SJohn Baldwin vmovd %r14d,%xmm2 522bc3d5698SJohn Baldwin vmovd %ebx,%xmm3 523bc3d5698SJohn Baldwin vmovd %ebp,%xmm4 524bc3d5698SJohn Baldwin jmp .Lproceed_avx 525bc3d5698SJohn Baldwin 526bc3d5698SJohn Baldwin.align 32 527bc3d5698SJohn Baldwin.Lstore_base2_64_avx: 528bc3d5698SJohn Baldwin movq %r14,0(%rdi) 529bc3d5698SJohn Baldwin movq %rbx,8(%rdi) 530bc3d5698SJohn Baldwin movq %rbp,16(%rdi) 531bc3d5698SJohn Baldwin jmp .Ldone_avx 532bc3d5698SJohn Baldwin 533bc3d5698SJohn Baldwin.align 16 534bc3d5698SJohn Baldwin.Lstore_base2_26_avx: 535bc3d5698SJohn Baldwin movl %eax,0(%rdi) 536bc3d5698SJohn Baldwin movl %edx,4(%rdi) 537bc3d5698SJohn Baldwin movl %r14d,8(%rdi) 538bc3d5698SJohn Baldwin movl %ebx,12(%rdi) 539bc3d5698SJohn Baldwin movl %ebp,16(%rdi) 540bc3d5698SJohn Baldwin.align 16 541bc3d5698SJohn Baldwin.Ldone_avx: 542bc3d5698SJohn Baldwin movq 0(%rsp),%r15 543bc3d5698SJohn Baldwin.cfi_restore %r15 544bc3d5698SJohn Baldwin movq 8(%rsp),%r14 545bc3d5698SJohn Baldwin.cfi_restore %r14 546bc3d5698SJohn Baldwin movq 16(%rsp),%r13 547bc3d5698SJohn Baldwin.cfi_restore %r13 548bc3d5698SJohn Baldwin movq 24(%rsp),%r12 549bc3d5698SJohn Baldwin.cfi_restore %r12 550bc3d5698SJohn Baldwin movq 32(%rsp),%rbp 551bc3d5698SJohn Baldwin.cfi_restore %rbp 552bc3d5698SJohn Baldwin movq 40(%rsp),%rbx 553bc3d5698SJohn Baldwin.cfi_restore %rbx 554bc3d5698SJohn Baldwin leaq 48(%rsp),%rsp 555bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -48 556bc3d5698SJohn Baldwin.Lno_data_avx: 557bc3d5698SJohn Baldwin.Lblocks_avx_epilogue: 558bc3d5698SJohn Baldwin .byte 0xf3,0xc3 559bc3d5698SJohn Baldwin.cfi_endproc 560bc3d5698SJohn Baldwin 561bc3d5698SJohn Baldwin.align 32 562bc3d5698SJohn Baldwin.Lbase2_64_avx: 563bc3d5698SJohn Baldwin.cfi_startproc 564bc3d5698SJohn Baldwin pushq %rbx 565bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 566bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 567bc3d5698SJohn Baldwin pushq %rbp 568bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 569bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 570bc3d5698SJohn Baldwin pushq %r12 571bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 572bc3d5698SJohn Baldwin.cfi_offset %r12,-32 573bc3d5698SJohn Baldwin pushq %r13 574bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 575bc3d5698SJohn Baldwin.cfi_offset %r13,-40 576bc3d5698SJohn Baldwin pushq %r14 577bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 578bc3d5698SJohn Baldwin.cfi_offset %r14,-48 579bc3d5698SJohn Baldwin pushq %r15 580bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 581bc3d5698SJohn Baldwin.cfi_offset %r15,-56 582bc3d5698SJohn Baldwin.Lbase2_64_avx_body: 583bc3d5698SJohn Baldwin 584bc3d5698SJohn Baldwin movq %rdx,%r15 585bc3d5698SJohn Baldwin 586bc3d5698SJohn Baldwin movq 24(%rdi),%r11 587bc3d5698SJohn Baldwin movq 32(%rdi),%r13 588bc3d5698SJohn Baldwin 589bc3d5698SJohn Baldwin movq 0(%rdi),%r14 590bc3d5698SJohn Baldwin movq 8(%rdi),%rbx 591bc3d5698SJohn Baldwin movl 16(%rdi),%ebp 592bc3d5698SJohn Baldwin 593bc3d5698SJohn Baldwin movq %r13,%r12 594bc3d5698SJohn Baldwin movq %r13,%rax 595bc3d5698SJohn Baldwin shrq $2,%r13 596bc3d5698SJohn Baldwin addq %r12,%r13 597bc3d5698SJohn Baldwin 598bc3d5698SJohn Baldwin testq $31,%rdx 599bc3d5698SJohn Baldwin jz .Linit_avx 600bc3d5698SJohn Baldwin 601bc3d5698SJohn Baldwin addq 0(%rsi),%r14 602bc3d5698SJohn Baldwin adcq 8(%rsi),%rbx 603bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 604bc3d5698SJohn Baldwin adcq %rcx,%rbp 605bc3d5698SJohn Baldwin subq $16,%r15 606bc3d5698SJohn Baldwin 607bc3d5698SJohn Baldwin call __poly1305_block 608bc3d5698SJohn Baldwin 609bc3d5698SJohn Baldwin.Linit_avx: 610bc3d5698SJohn Baldwin 611bc3d5698SJohn Baldwin movq %r14,%rax 612bc3d5698SJohn Baldwin movq %r14,%rdx 613bc3d5698SJohn Baldwin shrq $52,%r14 614bc3d5698SJohn Baldwin movq %rbx,%r8 615bc3d5698SJohn Baldwin movq %rbx,%r9 616bc3d5698SJohn Baldwin shrq $26,%rdx 617bc3d5698SJohn Baldwin andq $0x3ffffff,%rax 618bc3d5698SJohn Baldwin shlq $12,%r8 619bc3d5698SJohn Baldwin andq $0x3ffffff,%rdx 620bc3d5698SJohn Baldwin shrq $14,%rbx 621bc3d5698SJohn Baldwin orq %r8,%r14 622bc3d5698SJohn Baldwin shlq $24,%rbp 623bc3d5698SJohn Baldwin andq $0x3ffffff,%r14 624bc3d5698SJohn Baldwin shrq $40,%r9 625bc3d5698SJohn Baldwin andq $0x3ffffff,%rbx 626bc3d5698SJohn Baldwin orq %r9,%rbp 627bc3d5698SJohn Baldwin 628bc3d5698SJohn Baldwin vmovd %eax,%xmm0 629bc3d5698SJohn Baldwin vmovd %edx,%xmm1 630bc3d5698SJohn Baldwin vmovd %r14d,%xmm2 631bc3d5698SJohn Baldwin vmovd %ebx,%xmm3 632bc3d5698SJohn Baldwin vmovd %ebp,%xmm4 633bc3d5698SJohn Baldwin movl $1,20(%rdi) 634bc3d5698SJohn Baldwin 635bc3d5698SJohn Baldwin call __poly1305_init_avx 636bc3d5698SJohn Baldwin 637bc3d5698SJohn Baldwin.Lproceed_avx: 638bc3d5698SJohn Baldwin movq %r15,%rdx 639bc3d5698SJohn Baldwin 640bc3d5698SJohn Baldwin movq 0(%rsp),%r15 641bc3d5698SJohn Baldwin.cfi_restore %r15 642bc3d5698SJohn Baldwin movq 8(%rsp),%r14 643bc3d5698SJohn Baldwin.cfi_restore %r14 644bc3d5698SJohn Baldwin movq 16(%rsp),%r13 645bc3d5698SJohn Baldwin.cfi_restore %r13 646bc3d5698SJohn Baldwin movq 24(%rsp),%r12 647bc3d5698SJohn Baldwin.cfi_restore %r12 648bc3d5698SJohn Baldwin movq 32(%rsp),%rbp 649bc3d5698SJohn Baldwin.cfi_restore %rbp 650bc3d5698SJohn Baldwin movq 40(%rsp),%rbx 651bc3d5698SJohn Baldwin.cfi_restore %rbx 652bc3d5698SJohn Baldwin leaq 48(%rsp),%rax 653bc3d5698SJohn Baldwin leaq 48(%rsp),%rsp 654bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -48 655bc3d5698SJohn Baldwin.Lbase2_64_avx_epilogue: 656bc3d5698SJohn Baldwin jmp .Ldo_avx 657bc3d5698SJohn Baldwin.cfi_endproc 658bc3d5698SJohn Baldwin 659bc3d5698SJohn Baldwin.align 32 660bc3d5698SJohn Baldwin.Leven_avx: 661bc3d5698SJohn Baldwin.cfi_startproc 662bc3d5698SJohn Baldwin vmovd 0(%rdi),%xmm0 663bc3d5698SJohn Baldwin vmovd 4(%rdi),%xmm1 664bc3d5698SJohn Baldwin vmovd 8(%rdi),%xmm2 665bc3d5698SJohn Baldwin vmovd 12(%rdi),%xmm3 666bc3d5698SJohn Baldwin vmovd 16(%rdi),%xmm4 667bc3d5698SJohn Baldwin 668bc3d5698SJohn Baldwin.Ldo_avx: 669bc3d5698SJohn Baldwin leaq -88(%rsp),%r11 670bc3d5698SJohn Baldwin.cfi_def_cfa %r11,0x60 671bc3d5698SJohn Baldwin subq $0x178,%rsp 672bc3d5698SJohn Baldwin subq $64,%rdx 673bc3d5698SJohn Baldwin leaq -32(%rsi),%rax 674bc3d5698SJohn Baldwin cmovcq %rax,%rsi 675bc3d5698SJohn Baldwin 676bc3d5698SJohn Baldwin vmovdqu 48(%rdi),%xmm14 677bc3d5698SJohn Baldwin leaq 112(%rdi),%rdi 678bc3d5698SJohn Baldwin leaq .Lconst(%rip),%rcx 679bc3d5698SJohn Baldwin 680bc3d5698SJohn Baldwin 681bc3d5698SJohn Baldwin 682bc3d5698SJohn Baldwin vmovdqu 32(%rsi),%xmm5 683bc3d5698SJohn Baldwin vmovdqu 48(%rsi),%xmm6 684bc3d5698SJohn Baldwin vmovdqa 64(%rcx),%xmm15 685bc3d5698SJohn Baldwin 686bc3d5698SJohn Baldwin vpsrldq $6,%xmm5,%xmm7 687bc3d5698SJohn Baldwin vpsrldq $6,%xmm6,%xmm8 688bc3d5698SJohn Baldwin vpunpckhqdq %xmm6,%xmm5,%xmm9 689bc3d5698SJohn Baldwin vpunpcklqdq %xmm6,%xmm5,%xmm5 690bc3d5698SJohn Baldwin vpunpcklqdq %xmm8,%xmm7,%xmm8 691bc3d5698SJohn Baldwin 692bc3d5698SJohn Baldwin vpsrlq $40,%xmm9,%xmm9 693bc3d5698SJohn Baldwin vpsrlq $26,%xmm5,%xmm6 694bc3d5698SJohn Baldwin vpand %xmm15,%xmm5,%xmm5 695bc3d5698SJohn Baldwin vpsrlq $4,%xmm8,%xmm7 696bc3d5698SJohn Baldwin vpand %xmm15,%xmm6,%xmm6 697bc3d5698SJohn Baldwin vpsrlq $30,%xmm8,%xmm8 698bc3d5698SJohn Baldwin vpand %xmm15,%xmm7,%xmm7 699bc3d5698SJohn Baldwin vpand %xmm15,%xmm8,%xmm8 700bc3d5698SJohn Baldwin vpor 32(%rcx),%xmm9,%xmm9 701bc3d5698SJohn Baldwin 702bc3d5698SJohn Baldwin jbe .Lskip_loop_avx 703bc3d5698SJohn Baldwin 704bc3d5698SJohn Baldwin 705bc3d5698SJohn Baldwin vmovdqu -48(%rdi),%xmm11 706bc3d5698SJohn Baldwin vmovdqu -32(%rdi),%xmm12 707bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm14,%xmm13 708bc3d5698SJohn Baldwin vpshufd $0x44,%xmm14,%xmm10 709bc3d5698SJohn Baldwin vmovdqa %xmm13,-144(%r11) 710bc3d5698SJohn Baldwin vmovdqa %xmm10,0(%rsp) 711bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm11,%xmm14 712bc3d5698SJohn Baldwin vmovdqu -16(%rdi),%xmm10 713bc3d5698SJohn Baldwin vpshufd $0x44,%xmm11,%xmm11 714bc3d5698SJohn Baldwin vmovdqa %xmm14,-128(%r11) 715bc3d5698SJohn Baldwin vmovdqa %xmm11,16(%rsp) 716bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm12,%xmm13 717bc3d5698SJohn Baldwin vmovdqu 0(%rdi),%xmm11 718bc3d5698SJohn Baldwin vpshufd $0x44,%xmm12,%xmm12 719bc3d5698SJohn Baldwin vmovdqa %xmm13,-112(%r11) 720bc3d5698SJohn Baldwin vmovdqa %xmm12,32(%rsp) 721bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm10,%xmm14 722bc3d5698SJohn Baldwin vmovdqu 16(%rdi),%xmm12 723bc3d5698SJohn Baldwin vpshufd $0x44,%xmm10,%xmm10 724bc3d5698SJohn Baldwin vmovdqa %xmm14,-96(%r11) 725bc3d5698SJohn Baldwin vmovdqa %xmm10,48(%rsp) 726bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm11,%xmm13 727bc3d5698SJohn Baldwin vmovdqu 32(%rdi),%xmm10 728bc3d5698SJohn Baldwin vpshufd $0x44,%xmm11,%xmm11 729bc3d5698SJohn Baldwin vmovdqa %xmm13,-80(%r11) 730bc3d5698SJohn Baldwin vmovdqa %xmm11,64(%rsp) 731bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm12,%xmm14 732bc3d5698SJohn Baldwin vmovdqu 48(%rdi),%xmm11 733bc3d5698SJohn Baldwin vpshufd $0x44,%xmm12,%xmm12 734bc3d5698SJohn Baldwin vmovdqa %xmm14,-64(%r11) 735bc3d5698SJohn Baldwin vmovdqa %xmm12,80(%rsp) 736bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm10,%xmm13 737bc3d5698SJohn Baldwin vmovdqu 64(%rdi),%xmm12 738bc3d5698SJohn Baldwin vpshufd $0x44,%xmm10,%xmm10 739bc3d5698SJohn Baldwin vmovdqa %xmm13,-48(%r11) 740bc3d5698SJohn Baldwin vmovdqa %xmm10,96(%rsp) 741bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm11,%xmm14 742bc3d5698SJohn Baldwin vpshufd $0x44,%xmm11,%xmm11 743bc3d5698SJohn Baldwin vmovdqa %xmm14,-32(%r11) 744bc3d5698SJohn Baldwin vmovdqa %xmm11,112(%rsp) 745bc3d5698SJohn Baldwin vpshufd $0xEE,%xmm12,%xmm13 746bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%xmm14 747bc3d5698SJohn Baldwin vpshufd $0x44,%xmm12,%xmm12 748bc3d5698SJohn Baldwin vmovdqa %xmm13,-16(%r11) 749bc3d5698SJohn Baldwin vmovdqa %xmm12,128(%rsp) 750bc3d5698SJohn Baldwin 751bc3d5698SJohn Baldwin jmp .Loop_avx 752bc3d5698SJohn Baldwin 753bc3d5698SJohn Baldwin.align 32 754bc3d5698SJohn Baldwin.Loop_avx: 755bc3d5698SJohn Baldwin 756bc3d5698SJohn Baldwin 757bc3d5698SJohn Baldwin 758bc3d5698SJohn Baldwin 759bc3d5698SJohn Baldwin 760bc3d5698SJohn Baldwin 761bc3d5698SJohn Baldwin 762bc3d5698SJohn Baldwin 763bc3d5698SJohn Baldwin 764bc3d5698SJohn Baldwin 765bc3d5698SJohn Baldwin 766bc3d5698SJohn Baldwin 767bc3d5698SJohn Baldwin 768bc3d5698SJohn Baldwin 769bc3d5698SJohn Baldwin 770bc3d5698SJohn Baldwin 771bc3d5698SJohn Baldwin 772bc3d5698SJohn Baldwin 773bc3d5698SJohn Baldwin 774bc3d5698SJohn Baldwin 775bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm14,%xmm10 776bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm14,%xmm11 777bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%r11) 778bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm14,%xmm12 779bc3d5698SJohn Baldwin vmovdqa 16(%rsp),%xmm2 780bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm14,%xmm13 781bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm14,%xmm14 782bc3d5698SJohn Baldwin 783bc3d5698SJohn Baldwin vmovdqa %xmm0,0(%r11) 784bc3d5698SJohn Baldwin vpmuludq 32(%rsp),%xmm9,%xmm0 785bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%r11) 786bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm2,%xmm1 787bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm10,%xmm10 788bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm14,%xmm14 789bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%r11) 790bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm2,%xmm0 791bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm2,%xmm1 792bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm13,%xmm13 793bc3d5698SJohn Baldwin vmovdqa 48(%rsp),%xmm3 794bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm12,%xmm12 795bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%r11) 796bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm2,%xmm2 797bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm3,%xmm0 798bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm11,%xmm11 799bc3d5698SJohn Baldwin 800bc3d5698SJohn Baldwin vmovdqa 64(%rsp),%xmm4 801bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm14,%xmm14 802bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm3,%xmm1 803bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm3,%xmm3 804bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm13,%xmm13 805bc3d5698SJohn Baldwin vmovdqa 80(%rsp),%xmm2 806bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm12,%xmm12 807bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm4,%xmm0 808bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm4,%xmm4 809bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm11,%xmm11 810bc3d5698SJohn Baldwin vmovdqa 96(%rsp),%xmm3 811bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm10,%xmm10 812bc3d5698SJohn Baldwin 813bc3d5698SJohn Baldwin vmovdqa 128(%rsp),%xmm4 814bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm2,%xmm1 815bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm2,%xmm2 816bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm14,%xmm14 817bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm13,%xmm13 818bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm3,%xmm0 819bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm3,%xmm1 820bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm12,%xmm12 821bc3d5698SJohn Baldwin vmovdqu 0(%rsi),%xmm0 822bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm11,%xmm11 823bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm3,%xmm3 824bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm4,%xmm7 825bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm10,%xmm10 826bc3d5698SJohn Baldwin 827bc3d5698SJohn Baldwin vmovdqu 16(%rsi),%xmm1 828bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm11,%xmm11 829bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm4,%xmm8 830bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm4,%xmm9 831bc3d5698SJohn Baldwin vpsrldq $6,%xmm0,%xmm2 832bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm12,%xmm12 833bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm13,%xmm13 834bc3d5698SJohn Baldwin vpsrldq $6,%xmm1,%xmm3 835bc3d5698SJohn Baldwin vpmuludq 112(%rsp),%xmm5,%xmm9 836bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm4,%xmm5 837bc3d5698SJohn Baldwin vpunpckhqdq %xmm1,%xmm0,%xmm4 838bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm14,%xmm14 839bc3d5698SJohn Baldwin vmovdqa -144(%r11),%xmm9 840bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm10,%xmm10 841bc3d5698SJohn Baldwin 842bc3d5698SJohn Baldwin vpunpcklqdq %xmm1,%xmm0,%xmm0 843bc3d5698SJohn Baldwin vpunpcklqdq %xmm3,%xmm2,%xmm3 844bc3d5698SJohn Baldwin 845bc3d5698SJohn Baldwin 846bc3d5698SJohn Baldwin vpsrldq $5,%xmm4,%xmm4 847bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm1 848bc3d5698SJohn Baldwin vpand %xmm15,%xmm0,%xmm0 849bc3d5698SJohn Baldwin vpsrlq $4,%xmm3,%xmm2 850bc3d5698SJohn Baldwin vpand %xmm15,%xmm1,%xmm1 851bc3d5698SJohn Baldwin vpand 0(%rcx),%xmm4,%xmm4 852bc3d5698SJohn Baldwin vpsrlq $30,%xmm3,%xmm3 853bc3d5698SJohn Baldwin vpand %xmm15,%xmm2,%xmm2 854bc3d5698SJohn Baldwin vpand %xmm15,%xmm3,%xmm3 855bc3d5698SJohn Baldwin vpor 32(%rcx),%xmm4,%xmm4 856bc3d5698SJohn Baldwin 857bc3d5698SJohn Baldwin vpaddq 0(%r11),%xmm0,%xmm0 858bc3d5698SJohn Baldwin vpaddq 16(%r11),%xmm1,%xmm1 859bc3d5698SJohn Baldwin vpaddq 32(%r11),%xmm2,%xmm2 860bc3d5698SJohn Baldwin vpaddq 48(%r11),%xmm3,%xmm3 861bc3d5698SJohn Baldwin vpaddq 64(%r11),%xmm4,%xmm4 862bc3d5698SJohn Baldwin 863bc3d5698SJohn Baldwin leaq 32(%rsi),%rax 864bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 865bc3d5698SJohn Baldwin subq $64,%rdx 866bc3d5698SJohn Baldwin cmovcq %rax,%rsi 867bc3d5698SJohn Baldwin 868bc3d5698SJohn Baldwin 869bc3d5698SJohn Baldwin 870bc3d5698SJohn Baldwin 871bc3d5698SJohn Baldwin 872bc3d5698SJohn Baldwin 873bc3d5698SJohn Baldwin 874bc3d5698SJohn Baldwin 875bc3d5698SJohn Baldwin 876bc3d5698SJohn Baldwin 877bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm9,%xmm5 878bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm9,%xmm6 879bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm10,%xmm10 880bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm11,%xmm11 881bc3d5698SJohn Baldwin vmovdqa -128(%r11),%xmm7 882bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm9,%xmm5 883bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm9,%xmm6 884bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm12,%xmm12 885bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 886bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm9,%xmm9 887bc3d5698SJohn Baldwin vpmuludq -112(%r11),%xmm4,%xmm5 888bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm14,%xmm14 889bc3d5698SJohn Baldwin 890bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm10,%xmm10 891bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm7,%xmm6 892bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm7,%xmm5 893bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 894bc3d5698SJohn Baldwin vmovdqa -96(%r11),%xmm8 895bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm14,%xmm14 896bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm7,%xmm6 897bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm7,%xmm7 898bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm12,%xmm12 899bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm11,%xmm11 900bc3d5698SJohn Baldwin 901bc3d5698SJohn Baldwin vmovdqa -80(%r11),%xmm9 902bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm8,%xmm5 903bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm8,%xmm6 904bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm14,%xmm14 905bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 906bc3d5698SJohn Baldwin vmovdqa -64(%r11),%xmm7 907bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm8,%xmm8 908bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm9,%xmm5 909bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm12,%xmm12 910bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm11,%xmm11 911bc3d5698SJohn Baldwin vmovdqa -48(%r11),%xmm8 912bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm9,%xmm9 913bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm7,%xmm6 914bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm10,%xmm10 915bc3d5698SJohn Baldwin 916bc3d5698SJohn Baldwin vmovdqa -16(%r11),%xmm9 917bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm14,%xmm14 918bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm7,%xmm7 919bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm8,%xmm5 920bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm13,%xmm13 921bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm12,%xmm12 922bc3d5698SJohn Baldwin vmovdqu 32(%rsi),%xmm5 923bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm8,%xmm7 924bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm8,%xmm8 925bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm11,%xmm11 926bc3d5698SJohn Baldwin vmovdqu 48(%rsi),%xmm6 927bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm10,%xmm10 928bc3d5698SJohn Baldwin 929bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm9,%xmm2 930bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm9,%xmm3 931bc3d5698SJohn Baldwin vpsrldq $6,%xmm5,%xmm7 932bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm11,%xmm11 933bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm9,%xmm4 934bc3d5698SJohn Baldwin vpsrldq $6,%xmm6,%xmm8 935bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm12,%xmm2 936bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm13,%xmm3 937bc3d5698SJohn Baldwin vpmuludq -32(%r11),%xmm0,%xmm4 938bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm9,%xmm0 939bc3d5698SJohn Baldwin vpunpckhqdq %xmm6,%xmm5,%xmm9 940bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm14,%xmm4 941bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm10,%xmm0 942bc3d5698SJohn Baldwin 943bc3d5698SJohn Baldwin vpunpcklqdq %xmm6,%xmm5,%xmm5 944bc3d5698SJohn Baldwin vpunpcklqdq %xmm8,%xmm7,%xmm8 945bc3d5698SJohn Baldwin 946bc3d5698SJohn Baldwin 947bc3d5698SJohn Baldwin vpsrldq $5,%xmm9,%xmm9 948bc3d5698SJohn Baldwin vpsrlq $26,%xmm5,%xmm6 949bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%xmm14 950bc3d5698SJohn Baldwin vpand %xmm15,%xmm5,%xmm5 951bc3d5698SJohn Baldwin vpsrlq $4,%xmm8,%xmm7 952bc3d5698SJohn Baldwin vpand %xmm15,%xmm6,%xmm6 953bc3d5698SJohn Baldwin vpand 0(%rcx),%xmm9,%xmm9 954bc3d5698SJohn Baldwin vpsrlq $30,%xmm8,%xmm8 955bc3d5698SJohn Baldwin vpand %xmm15,%xmm7,%xmm7 956bc3d5698SJohn Baldwin vpand %xmm15,%xmm8,%xmm8 957bc3d5698SJohn Baldwin vpor 32(%rcx),%xmm9,%xmm9 958bc3d5698SJohn Baldwin 959bc3d5698SJohn Baldwin 960bc3d5698SJohn Baldwin 961bc3d5698SJohn Baldwin 962bc3d5698SJohn Baldwin 963bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm13 964bc3d5698SJohn Baldwin vpand %xmm15,%xmm3,%xmm3 965bc3d5698SJohn Baldwin vpaddq %xmm13,%xmm4,%xmm4 966bc3d5698SJohn Baldwin 967bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm10 968bc3d5698SJohn Baldwin vpand %xmm15,%xmm0,%xmm0 969bc3d5698SJohn Baldwin vpaddq %xmm10,%xmm11,%xmm1 970bc3d5698SJohn Baldwin 971bc3d5698SJohn Baldwin vpsrlq $26,%xmm4,%xmm10 972bc3d5698SJohn Baldwin vpand %xmm15,%xmm4,%xmm4 973bc3d5698SJohn Baldwin 974bc3d5698SJohn Baldwin vpsrlq $26,%xmm1,%xmm11 975bc3d5698SJohn Baldwin vpand %xmm15,%xmm1,%xmm1 976bc3d5698SJohn Baldwin vpaddq %xmm11,%xmm2,%xmm2 977bc3d5698SJohn Baldwin 978bc3d5698SJohn Baldwin vpaddq %xmm10,%xmm0,%xmm0 979bc3d5698SJohn Baldwin vpsllq $2,%xmm10,%xmm10 980bc3d5698SJohn Baldwin vpaddq %xmm10,%xmm0,%xmm0 981bc3d5698SJohn Baldwin 982bc3d5698SJohn Baldwin vpsrlq $26,%xmm2,%xmm12 983bc3d5698SJohn Baldwin vpand %xmm15,%xmm2,%xmm2 984bc3d5698SJohn Baldwin vpaddq %xmm12,%xmm3,%xmm3 985bc3d5698SJohn Baldwin 986bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm10 987bc3d5698SJohn Baldwin vpand %xmm15,%xmm0,%xmm0 988bc3d5698SJohn Baldwin vpaddq %xmm10,%xmm1,%xmm1 989bc3d5698SJohn Baldwin 990bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm13 991bc3d5698SJohn Baldwin vpand %xmm15,%xmm3,%xmm3 992bc3d5698SJohn Baldwin vpaddq %xmm13,%xmm4,%xmm4 993bc3d5698SJohn Baldwin 994bc3d5698SJohn Baldwin ja .Loop_avx 995bc3d5698SJohn Baldwin 996bc3d5698SJohn Baldwin.Lskip_loop_avx: 997bc3d5698SJohn Baldwin 998bc3d5698SJohn Baldwin 999bc3d5698SJohn Baldwin 1000bc3d5698SJohn Baldwin vpshufd $0x10,%xmm14,%xmm14 1001bc3d5698SJohn Baldwin addq $32,%rdx 1002bc3d5698SJohn Baldwin jnz .Long_tail_avx 1003bc3d5698SJohn Baldwin 1004bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm7,%xmm7 1005bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm5,%xmm5 1006bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm6,%xmm6 1007bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm8,%xmm8 1008bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm9,%xmm9 1009bc3d5698SJohn Baldwin 1010bc3d5698SJohn Baldwin.Long_tail_avx: 1011bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%r11) 1012bc3d5698SJohn Baldwin vmovdqa %xmm0,0(%r11) 1013bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%r11) 1014bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%r11) 1015bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%r11) 1016bc3d5698SJohn Baldwin 1017bc3d5698SJohn Baldwin 1018bc3d5698SJohn Baldwin 1019bc3d5698SJohn Baldwin 1020bc3d5698SJohn Baldwin 1021bc3d5698SJohn Baldwin 1022bc3d5698SJohn Baldwin 1023bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm14,%xmm12 1024bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm14,%xmm10 1025bc3d5698SJohn Baldwin vpshufd $0x10,-48(%rdi),%xmm2 1026bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm14,%xmm11 1027bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm14,%xmm13 1028bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm14,%xmm14 1029bc3d5698SJohn Baldwin 1030bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm2,%xmm0 1031bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm14,%xmm14 1032bc3d5698SJohn Baldwin vpshufd $0x10,-32(%rdi),%xmm3 1033bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm2,%xmm1 1034bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm13,%xmm13 1035bc3d5698SJohn Baldwin vpshufd $0x10,-16(%rdi),%xmm4 1036bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm2,%xmm0 1037bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm12,%xmm12 1038bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm2,%xmm2 1039bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm11,%xmm11 1040bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm3,%xmm3 1041bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm10,%xmm10 1042bc3d5698SJohn Baldwin 1043bc3d5698SJohn Baldwin vpshufd $0x10,0(%rdi),%xmm2 1044bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm4,%xmm1 1045bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm14,%xmm14 1046bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm4,%xmm0 1047bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm13,%xmm13 1048bc3d5698SJohn Baldwin vpshufd $0x10,16(%rdi),%xmm3 1049bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm4,%xmm4 1050bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm12,%xmm12 1051bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm2,%xmm1 1052bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm11,%xmm11 1053bc3d5698SJohn Baldwin vpshufd $0x10,32(%rdi),%xmm4 1054bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm2,%xmm2 1055bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm10,%xmm10 1056bc3d5698SJohn Baldwin 1057bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm3,%xmm0 1058bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm14,%xmm14 1059bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm3,%xmm3 1060bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm13,%xmm13 1061bc3d5698SJohn Baldwin vpshufd $0x10,48(%rdi),%xmm2 1062bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm4,%xmm1 1063bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm12,%xmm12 1064bc3d5698SJohn Baldwin vpshufd $0x10,64(%rdi),%xmm3 1065bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm4,%xmm0 1066bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm11,%xmm11 1067bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm4,%xmm4 1068bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm10,%xmm10 1069bc3d5698SJohn Baldwin 1070bc3d5698SJohn Baldwin vpmuludq %xmm5,%xmm2,%xmm2 1071bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm14,%xmm14 1072bc3d5698SJohn Baldwin vpmuludq %xmm9,%xmm3,%xmm1 1073bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm13,%xmm13 1074bc3d5698SJohn Baldwin vpmuludq %xmm8,%xmm3,%xmm0 1075bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm12,%xmm12 1076bc3d5698SJohn Baldwin vpmuludq %xmm7,%xmm3,%xmm1 1077bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm11,%xmm11 1078bc3d5698SJohn Baldwin vpmuludq %xmm6,%xmm3,%xmm3 1079bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm10,%xmm10 1080bc3d5698SJohn Baldwin 1081bc3d5698SJohn Baldwin jz .Lshort_tail_avx 1082bc3d5698SJohn Baldwin 1083bc3d5698SJohn Baldwin vmovdqu 0(%rsi),%xmm0 1084bc3d5698SJohn Baldwin vmovdqu 16(%rsi),%xmm1 1085bc3d5698SJohn Baldwin 1086bc3d5698SJohn Baldwin vpsrldq $6,%xmm0,%xmm2 1087bc3d5698SJohn Baldwin vpsrldq $6,%xmm1,%xmm3 1088bc3d5698SJohn Baldwin vpunpckhqdq %xmm1,%xmm0,%xmm4 1089bc3d5698SJohn Baldwin vpunpcklqdq %xmm1,%xmm0,%xmm0 1090bc3d5698SJohn Baldwin vpunpcklqdq %xmm3,%xmm2,%xmm3 1091bc3d5698SJohn Baldwin 1092bc3d5698SJohn Baldwin vpsrlq $40,%xmm4,%xmm4 1093bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm1 1094bc3d5698SJohn Baldwin vpand %xmm15,%xmm0,%xmm0 1095bc3d5698SJohn Baldwin vpsrlq $4,%xmm3,%xmm2 1096bc3d5698SJohn Baldwin vpand %xmm15,%xmm1,%xmm1 1097bc3d5698SJohn Baldwin vpsrlq $30,%xmm3,%xmm3 1098bc3d5698SJohn Baldwin vpand %xmm15,%xmm2,%xmm2 1099bc3d5698SJohn Baldwin vpand %xmm15,%xmm3,%xmm3 1100bc3d5698SJohn Baldwin vpor 32(%rcx),%xmm4,%xmm4 1101bc3d5698SJohn Baldwin 1102bc3d5698SJohn Baldwin vpshufd $0x32,-64(%rdi),%xmm9 1103bc3d5698SJohn Baldwin vpaddq 0(%r11),%xmm0,%xmm0 1104bc3d5698SJohn Baldwin vpaddq 16(%r11),%xmm1,%xmm1 1105bc3d5698SJohn Baldwin vpaddq 32(%r11),%xmm2,%xmm2 1106bc3d5698SJohn Baldwin vpaddq 48(%r11),%xmm3,%xmm3 1107bc3d5698SJohn Baldwin vpaddq 64(%r11),%xmm4,%xmm4 1108bc3d5698SJohn Baldwin 1109bc3d5698SJohn Baldwin 1110bc3d5698SJohn Baldwin 1111bc3d5698SJohn Baldwin 1112bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm9,%xmm5 1113bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm10,%xmm10 1114bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm9,%xmm6 1115bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm11,%xmm11 1116bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm9,%xmm5 1117bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm12,%xmm12 1118bc3d5698SJohn Baldwin vpshufd $0x32,-48(%rdi),%xmm7 1119bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm9,%xmm6 1120bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 1121bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm9,%xmm9 1122bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm14,%xmm14 1123bc3d5698SJohn Baldwin 1124bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm7,%xmm5 1125bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm14,%xmm14 1126bc3d5698SJohn Baldwin vpshufd $0x32,-32(%rdi),%xmm8 1127bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm7,%xmm6 1128bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 1129bc3d5698SJohn Baldwin vpshufd $0x32,-16(%rdi),%xmm9 1130bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm7,%xmm5 1131bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm12,%xmm12 1132bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm7,%xmm7 1133bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm11,%xmm11 1134bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm8,%xmm8 1135bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm10,%xmm10 1136bc3d5698SJohn Baldwin 1137bc3d5698SJohn Baldwin vpshufd $0x32,0(%rdi),%xmm7 1138bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm9,%xmm6 1139bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm14,%xmm14 1140bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm9,%xmm5 1141bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm13,%xmm13 1142bc3d5698SJohn Baldwin vpshufd $0x32,16(%rdi),%xmm8 1143bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm9,%xmm9 1144bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm12,%xmm12 1145bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm7,%xmm6 1146bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm11,%xmm11 1147bc3d5698SJohn Baldwin vpshufd $0x32,32(%rdi),%xmm9 1148bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm7,%xmm7 1149bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm10,%xmm10 1150bc3d5698SJohn Baldwin 1151bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm8,%xmm5 1152bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm14,%xmm14 1153bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm8,%xmm8 1154bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm13,%xmm13 1155bc3d5698SJohn Baldwin vpshufd $0x32,48(%rdi),%xmm7 1156bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm9,%xmm6 1157bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm12,%xmm12 1158bc3d5698SJohn Baldwin vpshufd $0x32,64(%rdi),%xmm8 1159bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm9,%xmm5 1160bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm11,%xmm11 1161bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm9,%xmm9 1162bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm10,%xmm10 1163bc3d5698SJohn Baldwin 1164bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm7,%xmm7 1165bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm14,%xmm14 1166bc3d5698SJohn Baldwin vpmuludq %xmm4,%xmm8,%xmm6 1167bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm13,%xmm13 1168bc3d5698SJohn Baldwin vpmuludq %xmm3,%xmm8,%xmm5 1169bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm12,%xmm12 1170bc3d5698SJohn Baldwin vpmuludq %xmm2,%xmm8,%xmm6 1171bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm11,%xmm11 1172bc3d5698SJohn Baldwin vpmuludq %xmm1,%xmm8,%xmm8 1173bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm10,%xmm10 1174bc3d5698SJohn Baldwin 1175bc3d5698SJohn Baldwin.Lshort_tail_avx: 1176bc3d5698SJohn Baldwin 1177bc3d5698SJohn Baldwin 1178bc3d5698SJohn Baldwin 1179bc3d5698SJohn Baldwin vpsrldq $8,%xmm14,%xmm9 1180bc3d5698SJohn Baldwin vpsrldq $8,%xmm13,%xmm8 1181bc3d5698SJohn Baldwin vpsrldq $8,%xmm11,%xmm6 1182bc3d5698SJohn Baldwin vpsrldq $8,%xmm10,%xmm5 1183bc3d5698SJohn Baldwin vpsrldq $8,%xmm12,%xmm7 1184bc3d5698SJohn Baldwin vpaddq %xmm8,%xmm13,%xmm13 1185bc3d5698SJohn Baldwin vpaddq %xmm9,%xmm14,%xmm14 1186bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm10,%xmm10 1187bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm11,%xmm11 1188bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm12,%xmm12 1189bc3d5698SJohn Baldwin 1190bc3d5698SJohn Baldwin 1191bc3d5698SJohn Baldwin 1192bc3d5698SJohn Baldwin 1193bc3d5698SJohn Baldwin vpsrlq $26,%xmm13,%xmm3 1194bc3d5698SJohn Baldwin vpand %xmm15,%xmm13,%xmm13 1195bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm14,%xmm14 1196bc3d5698SJohn Baldwin 1197bc3d5698SJohn Baldwin vpsrlq $26,%xmm10,%xmm0 1198bc3d5698SJohn Baldwin vpand %xmm15,%xmm10,%xmm10 1199bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm11,%xmm11 1200bc3d5698SJohn Baldwin 1201bc3d5698SJohn Baldwin vpsrlq $26,%xmm14,%xmm4 1202bc3d5698SJohn Baldwin vpand %xmm15,%xmm14,%xmm14 1203bc3d5698SJohn Baldwin 1204bc3d5698SJohn Baldwin vpsrlq $26,%xmm11,%xmm1 1205bc3d5698SJohn Baldwin vpand %xmm15,%xmm11,%xmm11 1206bc3d5698SJohn Baldwin vpaddq %xmm1,%xmm12,%xmm12 1207bc3d5698SJohn Baldwin 1208bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm10,%xmm10 1209bc3d5698SJohn Baldwin vpsllq $2,%xmm4,%xmm4 1210bc3d5698SJohn Baldwin vpaddq %xmm4,%xmm10,%xmm10 1211bc3d5698SJohn Baldwin 1212bc3d5698SJohn Baldwin vpsrlq $26,%xmm12,%xmm2 1213bc3d5698SJohn Baldwin vpand %xmm15,%xmm12,%xmm12 1214bc3d5698SJohn Baldwin vpaddq %xmm2,%xmm13,%xmm13 1215bc3d5698SJohn Baldwin 1216bc3d5698SJohn Baldwin vpsrlq $26,%xmm10,%xmm0 1217bc3d5698SJohn Baldwin vpand %xmm15,%xmm10,%xmm10 1218bc3d5698SJohn Baldwin vpaddq %xmm0,%xmm11,%xmm11 1219bc3d5698SJohn Baldwin 1220bc3d5698SJohn Baldwin vpsrlq $26,%xmm13,%xmm3 1221bc3d5698SJohn Baldwin vpand %xmm15,%xmm13,%xmm13 1222bc3d5698SJohn Baldwin vpaddq %xmm3,%xmm14,%xmm14 1223bc3d5698SJohn Baldwin 1224bc3d5698SJohn Baldwin vmovd %xmm10,-112(%rdi) 1225bc3d5698SJohn Baldwin vmovd %xmm11,-108(%rdi) 1226bc3d5698SJohn Baldwin vmovd %xmm12,-104(%rdi) 1227bc3d5698SJohn Baldwin vmovd %xmm13,-100(%rdi) 1228bc3d5698SJohn Baldwin vmovd %xmm14,-96(%rdi) 1229bc3d5698SJohn Baldwin leaq 88(%r11),%rsp 1230bc3d5698SJohn Baldwin.cfi_def_cfa %rsp,8 1231bc3d5698SJohn Baldwin vzeroupper 1232bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1233bc3d5698SJohn Baldwin.cfi_endproc 1234bc3d5698SJohn Baldwin.size poly1305_blocks_avx,.-poly1305_blocks_avx 1235bc3d5698SJohn Baldwin 1236bc3d5698SJohn Baldwin.type poly1305_emit_avx,@function 1237bc3d5698SJohn Baldwin.align 32 1238bc3d5698SJohn Baldwinpoly1305_emit_avx: 1239bc3d5698SJohn Baldwin.cfi_startproc 1240bc3d5698SJohn Baldwin cmpl $0,20(%rdi) 1241bc3d5698SJohn Baldwin je .Lemit 1242bc3d5698SJohn Baldwin 1243bc3d5698SJohn Baldwin movl 0(%rdi),%eax 1244bc3d5698SJohn Baldwin movl 4(%rdi),%ecx 1245bc3d5698SJohn Baldwin movl 8(%rdi),%r8d 1246bc3d5698SJohn Baldwin movl 12(%rdi),%r11d 1247bc3d5698SJohn Baldwin movl 16(%rdi),%r10d 1248bc3d5698SJohn Baldwin 1249bc3d5698SJohn Baldwin shlq $26,%rcx 1250bc3d5698SJohn Baldwin movq %r8,%r9 1251bc3d5698SJohn Baldwin shlq $52,%r8 1252bc3d5698SJohn Baldwin addq %rcx,%rax 1253bc3d5698SJohn Baldwin shrq $12,%r9 1254bc3d5698SJohn Baldwin addq %rax,%r8 1255bc3d5698SJohn Baldwin adcq $0,%r9 1256bc3d5698SJohn Baldwin 1257bc3d5698SJohn Baldwin shlq $14,%r11 1258bc3d5698SJohn Baldwin movq %r10,%rax 1259bc3d5698SJohn Baldwin shrq $24,%r10 1260bc3d5698SJohn Baldwin addq %r11,%r9 1261bc3d5698SJohn Baldwin shlq $40,%rax 1262bc3d5698SJohn Baldwin addq %rax,%r9 1263bc3d5698SJohn Baldwin adcq $0,%r10 1264bc3d5698SJohn Baldwin 1265bc3d5698SJohn Baldwin movq %r10,%rax 1266bc3d5698SJohn Baldwin movq %r10,%rcx 1267bc3d5698SJohn Baldwin andq $3,%r10 1268bc3d5698SJohn Baldwin shrq $2,%rax 1269bc3d5698SJohn Baldwin andq $-4,%rcx 1270bc3d5698SJohn Baldwin addq %rcx,%rax 1271bc3d5698SJohn Baldwin addq %rax,%r8 1272bc3d5698SJohn Baldwin adcq $0,%r9 1273bc3d5698SJohn Baldwin adcq $0,%r10 1274bc3d5698SJohn Baldwin 1275bc3d5698SJohn Baldwin movq %r8,%rax 1276bc3d5698SJohn Baldwin addq $5,%r8 1277bc3d5698SJohn Baldwin movq %r9,%rcx 1278bc3d5698SJohn Baldwin adcq $0,%r9 1279bc3d5698SJohn Baldwin adcq $0,%r10 1280bc3d5698SJohn Baldwin shrq $2,%r10 1281bc3d5698SJohn Baldwin cmovnzq %r8,%rax 1282bc3d5698SJohn Baldwin cmovnzq %r9,%rcx 1283bc3d5698SJohn Baldwin 1284bc3d5698SJohn Baldwin addq 0(%rdx),%rax 1285bc3d5698SJohn Baldwin adcq 8(%rdx),%rcx 1286bc3d5698SJohn Baldwin movq %rax,0(%rsi) 1287bc3d5698SJohn Baldwin movq %rcx,8(%rsi) 1288bc3d5698SJohn Baldwin 1289bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1290bc3d5698SJohn Baldwin.cfi_endproc 1291bc3d5698SJohn Baldwin.size poly1305_emit_avx,.-poly1305_emit_avx 1292bc3d5698SJohn Baldwin.type poly1305_blocks_avx2,@function 1293bc3d5698SJohn Baldwin.align 32 1294bc3d5698SJohn Baldwinpoly1305_blocks_avx2: 1295bc3d5698SJohn Baldwin.cfi_startproc 1296bc3d5698SJohn Baldwin movl 20(%rdi),%r8d 1297bc3d5698SJohn Baldwin cmpq $128,%rdx 1298bc3d5698SJohn Baldwin jae .Lblocks_avx2 1299bc3d5698SJohn Baldwin testl %r8d,%r8d 1300bc3d5698SJohn Baldwin jz .Lblocks 1301bc3d5698SJohn Baldwin 1302bc3d5698SJohn Baldwin.Lblocks_avx2: 1303bc3d5698SJohn Baldwin andq $-16,%rdx 1304bc3d5698SJohn Baldwin jz .Lno_data_avx2 1305bc3d5698SJohn Baldwin 1306bc3d5698SJohn Baldwin vzeroupper 1307bc3d5698SJohn Baldwin 1308bc3d5698SJohn Baldwin testl %r8d,%r8d 1309bc3d5698SJohn Baldwin jz .Lbase2_64_avx2 1310bc3d5698SJohn Baldwin 1311bc3d5698SJohn Baldwin testq $63,%rdx 1312bc3d5698SJohn Baldwin jz .Leven_avx2 1313bc3d5698SJohn Baldwin 1314bc3d5698SJohn Baldwin pushq %rbx 1315bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1316bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1317bc3d5698SJohn Baldwin pushq %rbp 1318bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1319bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1320bc3d5698SJohn Baldwin pushq %r12 1321bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1322bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1323bc3d5698SJohn Baldwin pushq %r13 1324bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1325bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1326bc3d5698SJohn Baldwin pushq %r14 1327bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1328bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1329bc3d5698SJohn Baldwin pushq %r15 1330bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1331bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1332bc3d5698SJohn Baldwin.Lblocks_avx2_body: 1333bc3d5698SJohn Baldwin 1334bc3d5698SJohn Baldwin movq %rdx,%r15 1335bc3d5698SJohn Baldwin 1336bc3d5698SJohn Baldwin movq 0(%rdi),%r8 1337bc3d5698SJohn Baldwin movq 8(%rdi),%r9 1338bc3d5698SJohn Baldwin movl 16(%rdi),%ebp 1339bc3d5698SJohn Baldwin 1340bc3d5698SJohn Baldwin movq 24(%rdi),%r11 1341bc3d5698SJohn Baldwin movq 32(%rdi),%r13 1342bc3d5698SJohn Baldwin 1343bc3d5698SJohn Baldwin 1344bc3d5698SJohn Baldwin movl %r8d,%r14d 1345bc3d5698SJohn Baldwin andq $-2147483648,%r8 1346bc3d5698SJohn Baldwin movq %r9,%r12 1347bc3d5698SJohn Baldwin movl %r9d,%ebx 1348bc3d5698SJohn Baldwin andq $-2147483648,%r9 1349bc3d5698SJohn Baldwin 1350bc3d5698SJohn Baldwin shrq $6,%r8 1351bc3d5698SJohn Baldwin shlq $52,%r12 1352bc3d5698SJohn Baldwin addq %r8,%r14 1353bc3d5698SJohn Baldwin shrq $12,%rbx 1354bc3d5698SJohn Baldwin shrq $18,%r9 1355bc3d5698SJohn Baldwin addq %r12,%r14 1356bc3d5698SJohn Baldwin adcq %r9,%rbx 1357bc3d5698SJohn Baldwin 1358bc3d5698SJohn Baldwin movq %rbp,%r8 1359bc3d5698SJohn Baldwin shlq $40,%r8 1360bc3d5698SJohn Baldwin shrq $24,%rbp 1361bc3d5698SJohn Baldwin addq %r8,%rbx 1362bc3d5698SJohn Baldwin adcq $0,%rbp 1363bc3d5698SJohn Baldwin 1364bc3d5698SJohn Baldwin movq $-4,%r9 1365bc3d5698SJohn Baldwin movq %rbp,%r8 1366bc3d5698SJohn Baldwin andq %rbp,%r9 1367bc3d5698SJohn Baldwin shrq $2,%r8 1368bc3d5698SJohn Baldwin andq $3,%rbp 1369bc3d5698SJohn Baldwin addq %r9,%r8 1370bc3d5698SJohn Baldwin addq %r8,%r14 1371bc3d5698SJohn Baldwin adcq $0,%rbx 1372bc3d5698SJohn Baldwin adcq $0,%rbp 1373bc3d5698SJohn Baldwin 1374bc3d5698SJohn Baldwin movq %r13,%r12 1375bc3d5698SJohn Baldwin movq %r13,%rax 1376bc3d5698SJohn Baldwin shrq $2,%r13 1377bc3d5698SJohn Baldwin addq %r12,%r13 1378bc3d5698SJohn Baldwin 1379bc3d5698SJohn Baldwin.Lbase2_26_pre_avx2: 1380bc3d5698SJohn Baldwin addq 0(%rsi),%r14 1381bc3d5698SJohn Baldwin adcq 8(%rsi),%rbx 1382bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 1383bc3d5698SJohn Baldwin adcq %rcx,%rbp 1384bc3d5698SJohn Baldwin subq $16,%r15 1385bc3d5698SJohn Baldwin 1386bc3d5698SJohn Baldwin call __poly1305_block 1387bc3d5698SJohn Baldwin movq %r12,%rax 1388bc3d5698SJohn Baldwin 1389bc3d5698SJohn Baldwin testq $63,%r15 1390bc3d5698SJohn Baldwin jnz .Lbase2_26_pre_avx2 1391bc3d5698SJohn Baldwin 1392bc3d5698SJohn Baldwin testq %rcx,%rcx 1393bc3d5698SJohn Baldwin jz .Lstore_base2_64_avx2 1394bc3d5698SJohn Baldwin 1395bc3d5698SJohn Baldwin 1396bc3d5698SJohn Baldwin movq %r14,%rax 1397bc3d5698SJohn Baldwin movq %r14,%rdx 1398bc3d5698SJohn Baldwin shrq $52,%r14 1399bc3d5698SJohn Baldwin movq %rbx,%r11 1400bc3d5698SJohn Baldwin movq %rbx,%r12 1401bc3d5698SJohn Baldwin shrq $26,%rdx 1402bc3d5698SJohn Baldwin andq $0x3ffffff,%rax 1403bc3d5698SJohn Baldwin shlq $12,%r11 1404bc3d5698SJohn Baldwin andq $0x3ffffff,%rdx 1405bc3d5698SJohn Baldwin shrq $14,%rbx 1406bc3d5698SJohn Baldwin orq %r11,%r14 1407bc3d5698SJohn Baldwin shlq $24,%rbp 1408bc3d5698SJohn Baldwin andq $0x3ffffff,%r14 1409bc3d5698SJohn Baldwin shrq $40,%r12 1410bc3d5698SJohn Baldwin andq $0x3ffffff,%rbx 1411bc3d5698SJohn Baldwin orq %r12,%rbp 1412bc3d5698SJohn Baldwin 1413bc3d5698SJohn Baldwin testq %r15,%r15 1414bc3d5698SJohn Baldwin jz .Lstore_base2_26_avx2 1415bc3d5698SJohn Baldwin 1416bc3d5698SJohn Baldwin vmovd %eax,%xmm0 1417bc3d5698SJohn Baldwin vmovd %edx,%xmm1 1418bc3d5698SJohn Baldwin vmovd %r14d,%xmm2 1419bc3d5698SJohn Baldwin vmovd %ebx,%xmm3 1420bc3d5698SJohn Baldwin vmovd %ebp,%xmm4 1421bc3d5698SJohn Baldwin jmp .Lproceed_avx2 1422bc3d5698SJohn Baldwin 1423bc3d5698SJohn Baldwin.align 32 1424bc3d5698SJohn Baldwin.Lstore_base2_64_avx2: 1425bc3d5698SJohn Baldwin movq %r14,0(%rdi) 1426bc3d5698SJohn Baldwin movq %rbx,8(%rdi) 1427bc3d5698SJohn Baldwin movq %rbp,16(%rdi) 1428bc3d5698SJohn Baldwin jmp .Ldone_avx2 1429bc3d5698SJohn Baldwin 1430bc3d5698SJohn Baldwin.align 16 1431bc3d5698SJohn Baldwin.Lstore_base2_26_avx2: 1432bc3d5698SJohn Baldwin movl %eax,0(%rdi) 1433bc3d5698SJohn Baldwin movl %edx,4(%rdi) 1434bc3d5698SJohn Baldwin movl %r14d,8(%rdi) 1435bc3d5698SJohn Baldwin movl %ebx,12(%rdi) 1436bc3d5698SJohn Baldwin movl %ebp,16(%rdi) 1437bc3d5698SJohn Baldwin.align 16 1438bc3d5698SJohn Baldwin.Ldone_avx2: 1439bc3d5698SJohn Baldwin movq 0(%rsp),%r15 1440bc3d5698SJohn Baldwin.cfi_restore %r15 1441bc3d5698SJohn Baldwin movq 8(%rsp),%r14 1442bc3d5698SJohn Baldwin.cfi_restore %r14 1443bc3d5698SJohn Baldwin movq 16(%rsp),%r13 1444bc3d5698SJohn Baldwin.cfi_restore %r13 1445bc3d5698SJohn Baldwin movq 24(%rsp),%r12 1446bc3d5698SJohn Baldwin.cfi_restore %r12 1447bc3d5698SJohn Baldwin movq 32(%rsp),%rbp 1448bc3d5698SJohn Baldwin.cfi_restore %rbp 1449bc3d5698SJohn Baldwin movq 40(%rsp),%rbx 1450bc3d5698SJohn Baldwin.cfi_restore %rbx 1451bc3d5698SJohn Baldwin leaq 48(%rsp),%rsp 1452bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -48 1453bc3d5698SJohn Baldwin.Lno_data_avx2: 1454bc3d5698SJohn Baldwin.Lblocks_avx2_epilogue: 1455bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1456bc3d5698SJohn Baldwin.cfi_endproc 1457bc3d5698SJohn Baldwin 1458bc3d5698SJohn Baldwin.align 32 1459bc3d5698SJohn Baldwin.Lbase2_64_avx2: 1460bc3d5698SJohn Baldwin.cfi_startproc 1461bc3d5698SJohn Baldwin pushq %rbx 1462bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1463bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 1464bc3d5698SJohn Baldwin pushq %rbp 1465bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1466bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 1467bc3d5698SJohn Baldwin pushq %r12 1468bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1469bc3d5698SJohn Baldwin.cfi_offset %r12,-32 1470bc3d5698SJohn Baldwin pushq %r13 1471bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1472bc3d5698SJohn Baldwin.cfi_offset %r13,-40 1473bc3d5698SJohn Baldwin pushq %r14 1474bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1475bc3d5698SJohn Baldwin.cfi_offset %r14,-48 1476bc3d5698SJohn Baldwin pushq %r15 1477bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 1478bc3d5698SJohn Baldwin.cfi_offset %r15,-56 1479bc3d5698SJohn Baldwin.Lbase2_64_avx2_body: 1480bc3d5698SJohn Baldwin 1481bc3d5698SJohn Baldwin movq %rdx,%r15 1482bc3d5698SJohn Baldwin 1483bc3d5698SJohn Baldwin movq 24(%rdi),%r11 1484bc3d5698SJohn Baldwin movq 32(%rdi),%r13 1485bc3d5698SJohn Baldwin 1486bc3d5698SJohn Baldwin movq 0(%rdi),%r14 1487bc3d5698SJohn Baldwin movq 8(%rdi),%rbx 1488bc3d5698SJohn Baldwin movl 16(%rdi),%ebp 1489bc3d5698SJohn Baldwin 1490bc3d5698SJohn Baldwin movq %r13,%r12 1491bc3d5698SJohn Baldwin movq %r13,%rax 1492bc3d5698SJohn Baldwin shrq $2,%r13 1493bc3d5698SJohn Baldwin addq %r12,%r13 1494bc3d5698SJohn Baldwin 1495bc3d5698SJohn Baldwin testq $63,%rdx 1496bc3d5698SJohn Baldwin jz .Linit_avx2 1497bc3d5698SJohn Baldwin 1498bc3d5698SJohn Baldwin.Lbase2_64_pre_avx2: 1499bc3d5698SJohn Baldwin addq 0(%rsi),%r14 1500bc3d5698SJohn Baldwin adcq 8(%rsi),%rbx 1501bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 1502bc3d5698SJohn Baldwin adcq %rcx,%rbp 1503bc3d5698SJohn Baldwin subq $16,%r15 1504bc3d5698SJohn Baldwin 1505bc3d5698SJohn Baldwin call __poly1305_block 1506bc3d5698SJohn Baldwin movq %r12,%rax 1507bc3d5698SJohn Baldwin 1508bc3d5698SJohn Baldwin testq $63,%r15 1509bc3d5698SJohn Baldwin jnz .Lbase2_64_pre_avx2 1510bc3d5698SJohn Baldwin 1511bc3d5698SJohn Baldwin.Linit_avx2: 1512bc3d5698SJohn Baldwin 1513bc3d5698SJohn Baldwin movq %r14,%rax 1514bc3d5698SJohn Baldwin movq %r14,%rdx 1515bc3d5698SJohn Baldwin shrq $52,%r14 1516bc3d5698SJohn Baldwin movq %rbx,%r8 1517bc3d5698SJohn Baldwin movq %rbx,%r9 1518bc3d5698SJohn Baldwin shrq $26,%rdx 1519bc3d5698SJohn Baldwin andq $0x3ffffff,%rax 1520bc3d5698SJohn Baldwin shlq $12,%r8 1521bc3d5698SJohn Baldwin andq $0x3ffffff,%rdx 1522bc3d5698SJohn Baldwin shrq $14,%rbx 1523bc3d5698SJohn Baldwin orq %r8,%r14 1524bc3d5698SJohn Baldwin shlq $24,%rbp 1525bc3d5698SJohn Baldwin andq $0x3ffffff,%r14 1526bc3d5698SJohn Baldwin shrq $40,%r9 1527bc3d5698SJohn Baldwin andq $0x3ffffff,%rbx 1528bc3d5698SJohn Baldwin orq %r9,%rbp 1529bc3d5698SJohn Baldwin 1530bc3d5698SJohn Baldwin vmovd %eax,%xmm0 1531bc3d5698SJohn Baldwin vmovd %edx,%xmm1 1532bc3d5698SJohn Baldwin vmovd %r14d,%xmm2 1533bc3d5698SJohn Baldwin vmovd %ebx,%xmm3 1534bc3d5698SJohn Baldwin vmovd %ebp,%xmm4 1535bc3d5698SJohn Baldwin movl $1,20(%rdi) 1536bc3d5698SJohn Baldwin 1537bc3d5698SJohn Baldwin call __poly1305_init_avx 1538bc3d5698SJohn Baldwin 1539bc3d5698SJohn Baldwin.Lproceed_avx2: 1540bc3d5698SJohn Baldwin movq %r15,%rdx 1541bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%r10d 1542bc3d5698SJohn Baldwin movl $3221291008,%r11d 1543bc3d5698SJohn Baldwin 1544bc3d5698SJohn Baldwin movq 0(%rsp),%r15 1545bc3d5698SJohn Baldwin.cfi_restore %r15 1546bc3d5698SJohn Baldwin movq 8(%rsp),%r14 1547bc3d5698SJohn Baldwin.cfi_restore %r14 1548bc3d5698SJohn Baldwin movq 16(%rsp),%r13 1549bc3d5698SJohn Baldwin.cfi_restore %r13 1550bc3d5698SJohn Baldwin movq 24(%rsp),%r12 1551bc3d5698SJohn Baldwin.cfi_restore %r12 1552bc3d5698SJohn Baldwin movq 32(%rsp),%rbp 1553bc3d5698SJohn Baldwin.cfi_restore %rbp 1554bc3d5698SJohn Baldwin movq 40(%rsp),%rbx 1555bc3d5698SJohn Baldwin.cfi_restore %rbx 1556bc3d5698SJohn Baldwin leaq 48(%rsp),%rax 1557bc3d5698SJohn Baldwin leaq 48(%rsp),%rsp 1558bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -48 1559bc3d5698SJohn Baldwin.Lbase2_64_avx2_epilogue: 1560bc3d5698SJohn Baldwin jmp .Ldo_avx2 1561bc3d5698SJohn Baldwin.cfi_endproc 1562bc3d5698SJohn Baldwin 1563bc3d5698SJohn Baldwin.align 32 1564bc3d5698SJohn Baldwin.Leven_avx2: 1565bc3d5698SJohn Baldwin.cfi_startproc 1566bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+8(%rip),%r10d 1567bc3d5698SJohn Baldwin vmovd 0(%rdi),%xmm0 1568bc3d5698SJohn Baldwin vmovd 4(%rdi),%xmm1 1569bc3d5698SJohn Baldwin vmovd 8(%rdi),%xmm2 1570bc3d5698SJohn Baldwin vmovd 12(%rdi),%xmm3 1571bc3d5698SJohn Baldwin vmovd 16(%rdi),%xmm4 1572bc3d5698SJohn Baldwin 1573bc3d5698SJohn Baldwin.Ldo_avx2: 1574bc3d5698SJohn Baldwin leaq -8(%rsp),%r11 1575bc3d5698SJohn Baldwin.cfi_def_cfa %r11,16 1576bc3d5698SJohn Baldwin subq $0x128,%rsp 1577bc3d5698SJohn Baldwin leaq .Lconst(%rip),%rcx 1578bc3d5698SJohn Baldwin leaq 48+64(%rdi),%rdi 1579bc3d5698SJohn Baldwin vmovdqa 96(%rcx),%ymm7 1580bc3d5698SJohn Baldwin 1581bc3d5698SJohn Baldwin 1582bc3d5698SJohn Baldwin vmovdqu -64(%rdi),%xmm9 1583bc3d5698SJohn Baldwin andq $-512,%rsp 1584bc3d5698SJohn Baldwin vmovdqu -48(%rdi),%xmm10 1585bc3d5698SJohn Baldwin vmovdqu -32(%rdi),%xmm6 1586bc3d5698SJohn Baldwin vmovdqu -16(%rdi),%xmm11 1587bc3d5698SJohn Baldwin vmovdqu 0(%rdi),%xmm12 1588bc3d5698SJohn Baldwin vmovdqu 16(%rdi),%xmm13 1589bc3d5698SJohn Baldwin leaq 144(%rsp),%rax 1590bc3d5698SJohn Baldwin vmovdqu 32(%rdi),%xmm14 1591bc3d5698SJohn Baldwin vpermd %ymm9,%ymm7,%ymm9 1592bc3d5698SJohn Baldwin vmovdqu 48(%rdi),%xmm15 1593bc3d5698SJohn Baldwin vpermd %ymm10,%ymm7,%ymm10 1594bc3d5698SJohn Baldwin vmovdqu 64(%rdi),%xmm5 1595bc3d5698SJohn Baldwin vpermd %ymm6,%ymm7,%ymm6 1596bc3d5698SJohn Baldwin vmovdqa %ymm9,0(%rsp) 1597bc3d5698SJohn Baldwin vpermd %ymm11,%ymm7,%ymm11 1598bc3d5698SJohn Baldwin vmovdqa %ymm10,32-144(%rax) 1599bc3d5698SJohn Baldwin vpermd %ymm12,%ymm7,%ymm12 1600bc3d5698SJohn Baldwin vmovdqa %ymm6,64-144(%rax) 1601bc3d5698SJohn Baldwin vpermd %ymm13,%ymm7,%ymm13 1602bc3d5698SJohn Baldwin vmovdqa %ymm11,96-144(%rax) 1603bc3d5698SJohn Baldwin vpermd %ymm14,%ymm7,%ymm14 1604bc3d5698SJohn Baldwin vmovdqa %ymm12,128-144(%rax) 1605bc3d5698SJohn Baldwin vpermd %ymm15,%ymm7,%ymm15 1606bc3d5698SJohn Baldwin vmovdqa %ymm13,160-144(%rax) 1607bc3d5698SJohn Baldwin vpermd %ymm5,%ymm7,%ymm5 1608bc3d5698SJohn Baldwin vmovdqa %ymm14,192-144(%rax) 1609bc3d5698SJohn Baldwin vmovdqa %ymm15,224-144(%rax) 1610bc3d5698SJohn Baldwin vmovdqa %ymm5,256-144(%rax) 1611bc3d5698SJohn Baldwin vmovdqa 64(%rcx),%ymm5 1612bc3d5698SJohn Baldwin 1613bc3d5698SJohn Baldwin 1614bc3d5698SJohn Baldwin 1615bc3d5698SJohn Baldwin vmovdqu 0(%rsi),%xmm7 1616bc3d5698SJohn Baldwin vmovdqu 16(%rsi),%xmm8 1617bc3d5698SJohn Baldwin vinserti128 $1,32(%rsi),%ymm7,%ymm7 1618bc3d5698SJohn Baldwin vinserti128 $1,48(%rsi),%ymm8,%ymm8 1619bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1620bc3d5698SJohn Baldwin 1621bc3d5698SJohn Baldwin vpsrldq $6,%ymm7,%ymm9 1622bc3d5698SJohn Baldwin vpsrldq $6,%ymm8,%ymm10 1623bc3d5698SJohn Baldwin vpunpckhqdq %ymm8,%ymm7,%ymm6 1624bc3d5698SJohn Baldwin vpunpcklqdq %ymm10,%ymm9,%ymm9 1625bc3d5698SJohn Baldwin vpunpcklqdq %ymm8,%ymm7,%ymm7 1626bc3d5698SJohn Baldwin 1627bc3d5698SJohn Baldwin vpsrlq $30,%ymm9,%ymm10 1628bc3d5698SJohn Baldwin vpsrlq $4,%ymm9,%ymm9 1629bc3d5698SJohn Baldwin vpsrlq $26,%ymm7,%ymm8 1630bc3d5698SJohn Baldwin vpsrlq $40,%ymm6,%ymm6 1631bc3d5698SJohn Baldwin vpand %ymm5,%ymm9,%ymm9 1632bc3d5698SJohn Baldwin vpand %ymm5,%ymm7,%ymm7 1633bc3d5698SJohn Baldwin vpand %ymm5,%ymm8,%ymm8 1634bc3d5698SJohn Baldwin vpand %ymm5,%ymm10,%ymm10 1635bc3d5698SJohn Baldwin vpor 32(%rcx),%ymm6,%ymm6 1636bc3d5698SJohn Baldwin 1637bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm9,%ymm2 1638bc3d5698SJohn Baldwin subq $64,%rdx 1639bc3d5698SJohn Baldwin jz .Ltail_avx2 1640bc3d5698SJohn Baldwin jmp .Loop_avx2 1641bc3d5698SJohn Baldwin 1642bc3d5698SJohn Baldwin.align 32 1643bc3d5698SJohn Baldwin.Loop_avx2: 1644bc3d5698SJohn Baldwin 1645bc3d5698SJohn Baldwin 1646bc3d5698SJohn Baldwin 1647bc3d5698SJohn Baldwin 1648bc3d5698SJohn Baldwin 1649bc3d5698SJohn Baldwin 1650bc3d5698SJohn Baldwin 1651bc3d5698SJohn Baldwin 1652bc3d5698SJohn Baldwin vpaddq %ymm0,%ymm7,%ymm0 1653bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%ymm7 1654bc3d5698SJohn Baldwin vpaddq %ymm1,%ymm8,%ymm1 1655bc3d5698SJohn Baldwin vmovdqa 32(%rsp),%ymm8 1656bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm10,%ymm3 1657bc3d5698SJohn Baldwin vmovdqa 96(%rsp),%ymm9 1658bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm6,%ymm4 1659bc3d5698SJohn Baldwin vmovdqa 48(%rax),%ymm10 1660bc3d5698SJohn Baldwin vmovdqa 112(%rax),%ymm5 1661bc3d5698SJohn Baldwin 1662bc3d5698SJohn Baldwin 1663bc3d5698SJohn Baldwin 1664bc3d5698SJohn Baldwin 1665bc3d5698SJohn Baldwin 1666bc3d5698SJohn Baldwin 1667bc3d5698SJohn Baldwin 1668bc3d5698SJohn Baldwin 1669bc3d5698SJohn Baldwin 1670bc3d5698SJohn Baldwin 1671bc3d5698SJohn Baldwin 1672bc3d5698SJohn Baldwin 1673bc3d5698SJohn Baldwin 1674bc3d5698SJohn Baldwin 1675bc3d5698SJohn Baldwin 1676bc3d5698SJohn Baldwin 1677bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm7,%ymm13 1678bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm8,%ymm14 1679bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm9,%ymm15 1680bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm10,%ymm11 1681bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm5,%ymm12 1682bc3d5698SJohn Baldwin 1683bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm8,%ymm6 1684bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm8,%ymm2 1685bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm12,%ymm12 1686bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm13,%ymm13 1687bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm8,%ymm6 1688bc3d5698SJohn Baldwin vpmuludq 64(%rsp),%ymm4,%ymm2 1689bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm15,%ymm15 1690bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm11,%ymm11 1691bc3d5698SJohn Baldwin vmovdqa -16(%rax),%ymm8 1692bc3d5698SJohn Baldwin 1693bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm7,%ymm6 1694bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm7,%ymm2 1695bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm11,%ymm11 1696bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm12,%ymm12 1697bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm7,%ymm6 1698bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm7,%ymm2 1699bc3d5698SJohn Baldwin vmovdqu 0(%rsi),%xmm7 1700bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm14,%ymm14 1701bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm15,%ymm15 1702bc3d5698SJohn Baldwin vinserti128 $1,32(%rsi),%ymm7,%ymm7 1703bc3d5698SJohn Baldwin 1704bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm8,%ymm6 1705bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm8,%ymm2 1706bc3d5698SJohn Baldwin vmovdqu 16(%rsi),%xmm8 1707bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm11,%ymm11 1708bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm12,%ymm12 1709bc3d5698SJohn Baldwin vmovdqa 16(%rax),%ymm2 1710bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm9,%ymm6 1711bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm9,%ymm9 1712bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm14,%ymm14 1713bc3d5698SJohn Baldwin vpaddq %ymm9,%ymm13,%ymm13 1714bc3d5698SJohn Baldwin vinserti128 $1,48(%rsi),%ymm8,%ymm8 1715bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1716bc3d5698SJohn Baldwin 1717bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm2,%ymm6 1718bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm2,%ymm2 1719bc3d5698SJohn Baldwin vpsrldq $6,%ymm7,%ymm9 1720bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm15,%ymm15 1721bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm14,%ymm14 1722bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm10,%ymm6 1723bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm10,%ymm2 1724bc3d5698SJohn Baldwin vpsrldq $6,%ymm8,%ymm10 1725bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm12,%ymm12 1726bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm13,%ymm13 1727bc3d5698SJohn Baldwin vpunpckhqdq %ymm8,%ymm7,%ymm6 1728bc3d5698SJohn Baldwin 1729bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm5,%ymm3 1730bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm5,%ymm4 1731bc3d5698SJohn Baldwin vpunpcklqdq %ymm8,%ymm7,%ymm7 1732bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm13,%ymm2 1733bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm14,%ymm3 1734bc3d5698SJohn Baldwin vpunpcklqdq %ymm10,%ymm9,%ymm10 1735bc3d5698SJohn Baldwin vpmuludq 80(%rax),%ymm0,%ymm4 1736bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm5,%ymm0 1737bc3d5698SJohn Baldwin vmovdqa 64(%rcx),%ymm5 1738bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm15,%ymm4 1739bc3d5698SJohn Baldwin vpaddq %ymm0,%ymm11,%ymm0 1740bc3d5698SJohn Baldwin 1741bc3d5698SJohn Baldwin 1742bc3d5698SJohn Baldwin 1743bc3d5698SJohn Baldwin 1744bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm14 1745bc3d5698SJohn Baldwin vpand %ymm5,%ymm3,%ymm3 1746bc3d5698SJohn Baldwin vpaddq %ymm14,%ymm4,%ymm4 1747bc3d5698SJohn Baldwin 1748bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm11 1749bc3d5698SJohn Baldwin vpand %ymm5,%ymm0,%ymm0 1750bc3d5698SJohn Baldwin vpaddq %ymm11,%ymm12,%ymm1 1751bc3d5698SJohn Baldwin 1752bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm15 1753bc3d5698SJohn Baldwin vpand %ymm5,%ymm4,%ymm4 1754bc3d5698SJohn Baldwin 1755bc3d5698SJohn Baldwin vpsrlq $4,%ymm10,%ymm9 1756bc3d5698SJohn Baldwin 1757bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm12 1758bc3d5698SJohn Baldwin vpand %ymm5,%ymm1,%ymm1 1759bc3d5698SJohn Baldwin vpaddq %ymm12,%ymm2,%ymm2 1760bc3d5698SJohn Baldwin 1761bc3d5698SJohn Baldwin vpaddq %ymm15,%ymm0,%ymm0 1762bc3d5698SJohn Baldwin vpsllq $2,%ymm15,%ymm15 1763bc3d5698SJohn Baldwin vpaddq %ymm15,%ymm0,%ymm0 1764bc3d5698SJohn Baldwin 1765bc3d5698SJohn Baldwin vpand %ymm5,%ymm9,%ymm9 1766bc3d5698SJohn Baldwin vpsrlq $26,%ymm7,%ymm8 1767bc3d5698SJohn Baldwin 1768bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm13 1769bc3d5698SJohn Baldwin vpand %ymm5,%ymm2,%ymm2 1770bc3d5698SJohn Baldwin vpaddq %ymm13,%ymm3,%ymm3 1771bc3d5698SJohn Baldwin 1772bc3d5698SJohn Baldwin vpaddq %ymm9,%ymm2,%ymm2 1773bc3d5698SJohn Baldwin vpsrlq $30,%ymm10,%ymm10 1774bc3d5698SJohn Baldwin 1775bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm11 1776bc3d5698SJohn Baldwin vpand %ymm5,%ymm0,%ymm0 1777bc3d5698SJohn Baldwin vpaddq %ymm11,%ymm1,%ymm1 1778bc3d5698SJohn Baldwin 1779bc3d5698SJohn Baldwin vpsrlq $40,%ymm6,%ymm6 1780bc3d5698SJohn Baldwin 1781bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm14 1782bc3d5698SJohn Baldwin vpand %ymm5,%ymm3,%ymm3 1783bc3d5698SJohn Baldwin vpaddq %ymm14,%ymm4,%ymm4 1784bc3d5698SJohn Baldwin 1785bc3d5698SJohn Baldwin vpand %ymm5,%ymm7,%ymm7 1786bc3d5698SJohn Baldwin vpand %ymm5,%ymm8,%ymm8 1787bc3d5698SJohn Baldwin vpand %ymm5,%ymm10,%ymm10 1788bc3d5698SJohn Baldwin vpor 32(%rcx),%ymm6,%ymm6 1789bc3d5698SJohn Baldwin 1790bc3d5698SJohn Baldwin subq $64,%rdx 1791bc3d5698SJohn Baldwin jnz .Loop_avx2 1792bc3d5698SJohn Baldwin 1793bc3d5698SJohn Baldwin.byte 0x66,0x90 1794bc3d5698SJohn Baldwin.Ltail_avx2: 1795bc3d5698SJohn Baldwin 1796bc3d5698SJohn Baldwin 1797bc3d5698SJohn Baldwin 1798bc3d5698SJohn Baldwin 1799bc3d5698SJohn Baldwin 1800bc3d5698SJohn Baldwin 1801bc3d5698SJohn Baldwin 1802bc3d5698SJohn Baldwin vpaddq %ymm0,%ymm7,%ymm0 1803bc3d5698SJohn Baldwin vmovdqu 4(%rsp),%ymm7 1804bc3d5698SJohn Baldwin vpaddq %ymm1,%ymm8,%ymm1 1805bc3d5698SJohn Baldwin vmovdqu 36(%rsp),%ymm8 1806bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm10,%ymm3 1807bc3d5698SJohn Baldwin vmovdqu 100(%rsp),%ymm9 1808bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm6,%ymm4 1809bc3d5698SJohn Baldwin vmovdqu 52(%rax),%ymm10 1810bc3d5698SJohn Baldwin vmovdqu 116(%rax),%ymm5 1811bc3d5698SJohn Baldwin 1812bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm7,%ymm13 1813bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm8,%ymm14 1814bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm9,%ymm15 1815bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm10,%ymm11 1816bc3d5698SJohn Baldwin vpmuludq %ymm2,%ymm5,%ymm12 1817bc3d5698SJohn Baldwin 1818bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm8,%ymm6 1819bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm8,%ymm2 1820bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm12,%ymm12 1821bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm13,%ymm13 1822bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm8,%ymm6 1823bc3d5698SJohn Baldwin vpmuludq 68(%rsp),%ymm4,%ymm2 1824bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm15,%ymm15 1825bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm11,%ymm11 1826bc3d5698SJohn Baldwin 1827bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm7,%ymm6 1828bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm7,%ymm2 1829bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm11,%ymm11 1830bc3d5698SJohn Baldwin vmovdqu -12(%rax),%ymm8 1831bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm12,%ymm12 1832bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm7,%ymm6 1833bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm7,%ymm2 1834bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm14,%ymm14 1835bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm15,%ymm15 1836bc3d5698SJohn Baldwin 1837bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm8,%ymm6 1838bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm8,%ymm2 1839bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm11,%ymm11 1840bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm12,%ymm12 1841bc3d5698SJohn Baldwin vmovdqu 20(%rax),%ymm2 1842bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm9,%ymm6 1843bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm9,%ymm9 1844bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm14,%ymm14 1845bc3d5698SJohn Baldwin vpaddq %ymm9,%ymm13,%ymm13 1846bc3d5698SJohn Baldwin 1847bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm2,%ymm6 1848bc3d5698SJohn Baldwin vpmuludq %ymm0,%ymm2,%ymm2 1849bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm15,%ymm15 1850bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm14,%ymm14 1851bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm10,%ymm6 1852bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm10,%ymm2 1853bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm12,%ymm12 1854bc3d5698SJohn Baldwin vpaddq %ymm2,%ymm13,%ymm13 1855bc3d5698SJohn Baldwin 1856bc3d5698SJohn Baldwin vpmuludq %ymm3,%ymm5,%ymm3 1857bc3d5698SJohn Baldwin vpmuludq %ymm4,%ymm5,%ymm4 1858bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm13,%ymm2 1859bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm14,%ymm3 1860bc3d5698SJohn Baldwin vpmuludq 84(%rax),%ymm0,%ymm4 1861bc3d5698SJohn Baldwin vpmuludq %ymm1,%ymm5,%ymm0 1862bc3d5698SJohn Baldwin vmovdqa 64(%rcx),%ymm5 1863bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm15,%ymm4 1864bc3d5698SJohn Baldwin vpaddq %ymm0,%ymm11,%ymm0 1865bc3d5698SJohn Baldwin 1866bc3d5698SJohn Baldwin 1867bc3d5698SJohn Baldwin 1868bc3d5698SJohn Baldwin 1869bc3d5698SJohn Baldwin vpsrldq $8,%ymm12,%ymm8 1870bc3d5698SJohn Baldwin vpsrldq $8,%ymm2,%ymm9 1871bc3d5698SJohn Baldwin vpsrldq $8,%ymm3,%ymm10 1872bc3d5698SJohn Baldwin vpsrldq $8,%ymm4,%ymm6 1873bc3d5698SJohn Baldwin vpsrldq $8,%ymm0,%ymm7 1874bc3d5698SJohn Baldwin vpaddq %ymm8,%ymm12,%ymm12 1875bc3d5698SJohn Baldwin vpaddq %ymm9,%ymm2,%ymm2 1876bc3d5698SJohn Baldwin vpaddq %ymm10,%ymm3,%ymm3 1877bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1878bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 1879bc3d5698SJohn Baldwin 1880bc3d5698SJohn Baldwin vpermq $0x2,%ymm3,%ymm10 1881bc3d5698SJohn Baldwin vpermq $0x2,%ymm4,%ymm6 1882bc3d5698SJohn Baldwin vpermq $0x2,%ymm0,%ymm7 1883bc3d5698SJohn Baldwin vpermq $0x2,%ymm12,%ymm8 1884bc3d5698SJohn Baldwin vpermq $0x2,%ymm2,%ymm9 1885bc3d5698SJohn Baldwin vpaddq %ymm10,%ymm3,%ymm3 1886bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1887bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 1888bc3d5698SJohn Baldwin vpaddq %ymm8,%ymm12,%ymm12 1889bc3d5698SJohn Baldwin vpaddq %ymm9,%ymm2,%ymm2 1890bc3d5698SJohn Baldwin 1891bc3d5698SJohn Baldwin 1892bc3d5698SJohn Baldwin 1893bc3d5698SJohn Baldwin 1894bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm14 1895bc3d5698SJohn Baldwin vpand %ymm5,%ymm3,%ymm3 1896bc3d5698SJohn Baldwin vpaddq %ymm14,%ymm4,%ymm4 1897bc3d5698SJohn Baldwin 1898bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm11 1899bc3d5698SJohn Baldwin vpand %ymm5,%ymm0,%ymm0 1900bc3d5698SJohn Baldwin vpaddq %ymm11,%ymm12,%ymm1 1901bc3d5698SJohn Baldwin 1902bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm15 1903bc3d5698SJohn Baldwin vpand %ymm5,%ymm4,%ymm4 1904bc3d5698SJohn Baldwin 1905bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm12 1906bc3d5698SJohn Baldwin vpand %ymm5,%ymm1,%ymm1 1907bc3d5698SJohn Baldwin vpaddq %ymm12,%ymm2,%ymm2 1908bc3d5698SJohn Baldwin 1909bc3d5698SJohn Baldwin vpaddq %ymm15,%ymm0,%ymm0 1910bc3d5698SJohn Baldwin vpsllq $2,%ymm15,%ymm15 1911bc3d5698SJohn Baldwin vpaddq %ymm15,%ymm0,%ymm0 1912bc3d5698SJohn Baldwin 1913bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm13 1914bc3d5698SJohn Baldwin vpand %ymm5,%ymm2,%ymm2 1915bc3d5698SJohn Baldwin vpaddq %ymm13,%ymm3,%ymm3 1916bc3d5698SJohn Baldwin 1917bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm11 1918bc3d5698SJohn Baldwin vpand %ymm5,%ymm0,%ymm0 1919bc3d5698SJohn Baldwin vpaddq %ymm11,%ymm1,%ymm1 1920bc3d5698SJohn Baldwin 1921bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm14 1922bc3d5698SJohn Baldwin vpand %ymm5,%ymm3,%ymm3 1923bc3d5698SJohn Baldwin vpaddq %ymm14,%ymm4,%ymm4 1924bc3d5698SJohn Baldwin 1925bc3d5698SJohn Baldwin vmovd %xmm0,-112(%rdi) 1926bc3d5698SJohn Baldwin vmovd %xmm1,-108(%rdi) 1927bc3d5698SJohn Baldwin vmovd %xmm2,-104(%rdi) 1928bc3d5698SJohn Baldwin vmovd %xmm3,-100(%rdi) 1929bc3d5698SJohn Baldwin vmovd %xmm4,-96(%rdi) 1930bc3d5698SJohn Baldwin leaq 8(%r11),%rsp 1931bc3d5698SJohn Baldwin.cfi_def_cfa %rsp,8 1932bc3d5698SJohn Baldwin vzeroupper 1933bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1934bc3d5698SJohn Baldwin.cfi_endproc 1935bc3d5698SJohn Baldwin.size poly1305_blocks_avx2,.-poly1305_blocks_avx2 1936bc3d5698SJohn Baldwin.align 64 1937bc3d5698SJohn Baldwin.Lconst: 1938bc3d5698SJohn Baldwin.Lmask24: 1939bc3d5698SJohn Baldwin.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 1940bc3d5698SJohn Baldwin.L129: 1941bc3d5698SJohn Baldwin.long 16777216,0,16777216,0,16777216,0,16777216,0 1942bc3d5698SJohn Baldwin.Lmask26: 1943bc3d5698SJohn Baldwin.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 1944bc3d5698SJohn Baldwin.Lpermd_avx2: 1945bc3d5698SJohn Baldwin.long 2,2,2,3,2,0,2,1 1946bc3d5698SJohn Baldwin.Lpermd_avx512: 1947bc3d5698SJohn Baldwin.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7 1948bc3d5698SJohn Baldwin 1949bc3d5698SJohn Baldwin.L2_44_inp_permd: 1950bc3d5698SJohn Baldwin.long 0,1,1,2,2,3,7,7 1951bc3d5698SJohn Baldwin.L2_44_inp_shift: 1952bc3d5698SJohn Baldwin.quad 0,12,24,64 1953bc3d5698SJohn Baldwin.L2_44_mask: 1954bc3d5698SJohn Baldwin.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff 1955bc3d5698SJohn Baldwin.L2_44_shift_rgt: 1956bc3d5698SJohn Baldwin.quad 44,44,42,64 1957bc3d5698SJohn Baldwin.L2_44_shift_lft: 1958bc3d5698SJohn Baldwin.quad 8,8,10,64 1959bc3d5698SJohn Baldwin 1960bc3d5698SJohn Baldwin.align 64 1961bc3d5698SJohn Baldwin.Lx_mask44: 1962bc3d5698SJohn Baldwin.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff 1963bc3d5698SJohn Baldwin.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff 1964bc3d5698SJohn Baldwin.Lx_mask42: 1965bc3d5698SJohn Baldwin.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff 1966bc3d5698SJohn Baldwin.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff 1967bc3d5698SJohn Baldwin.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 1968bc3d5698SJohn Baldwin.align 16 1969bc3d5698SJohn Baldwin.globl xor128_encrypt_n_pad 1970bc3d5698SJohn Baldwin.type xor128_encrypt_n_pad,@function 1971bc3d5698SJohn Baldwin.align 16 1972bc3d5698SJohn Baldwinxor128_encrypt_n_pad: 1973bc3d5698SJohn Baldwin.cfi_startproc 1974bc3d5698SJohn Baldwin subq %rdx,%rsi 1975bc3d5698SJohn Baldwin subq %rdx,%rdi 1976bc3d5698SJohn Baldwin movq %rcx,%r10 1977bc3d5698SJohn Baldwin shrq $4,%rcx 1978bc3d5698SJohn Baldwin jz .Ltail_enc 1979bc3d5698SJohn Baldwin nop 1980bc3d5698SJohn Baldwin.Loop_enc_xmm: 1981bc3d5698SJohn Baldwin movdqu (%rsi,%rdx,1),%xmm0 1982bc3d5698SJohn Baldwin pxor (%rdx),%xmm0 1983bc3d5698SJohn Baldwin movdqu %xmm0,(%rdi,%rdx,1) 1984bc3d5698SJohn Baldwin movdqa %xmm0,(%rdx) 1985bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 1986bc3d5698SJohn Baldwin decq %rcx 1987bc3d5698SJohn Baldwin jnz .Loop_enc_xmm 1988bc3d5698SJohn Baldwin 1989bc3d5698SJohn Baldwin andq $15,%r10 1990bc3d5698SJohn Baldwin jz .Ldone_enc 1991bc3d5698SJohn Baldwin 1992bc3d5698SJohn Baldwin.Ltail_enc: 1993bc3d5698SJohn Baldwin movq $16,%rcx 1994bc3d5698SJohn Baldwin subq %r10,%rcx 1995bc3d5698SJohn Baldwin xorl %eax,%eax 1996bc3d5698SJohn Baldwin.Loop_enc_byte: 1997bc3d5698SJohn Baldwin movb (%rsi,%rdx,1),%al 1998bc3d5698SJohn Baldwin xorb (%rdx),%al 1999bc3d5698SJohn Baldwin movb %al,(%rdi,%rdx,1) 2000bc3d5698SJohn Baldwin movb %al,(%rdx) 2001bc3d5698SJohn Baldwin leaq 1(%rdx),%rdx 2002bc3d5698SJohn Baldwin decq %r10 2003bc3d5698SJohn Baldwin jnz .Loop_enc_byte 2004bc3d5698SJohn Baldwin 2005bc3d5698SJohn Baldwin xorl %eax,%eax 2006bc3d5698SJohn Baldwin.Loop_enc_pad: 2007bc3d5698SJohn Baldwin movb %al,(%rdx) 2008bc3d5698SJohn Baldwin leaq 1(%rdx),%rdx 2009bc3d5698SJohn Baldwin decq %rcx 2010bc3d5698SJohn Baldwin jnz .Loop_enc_pad 2011bc3d5698SJohn Baldwin 2012bc3d5698SJohn Baldwin.Ldone_enc: 2013bc3d5698SJohn Baldwin movq %rdx,%rax 2014bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2015bc3d5698SJohn Baldwin.cfi_endproc 2016bc3d5698SJohn Baldwin.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad 2017bc3d5698SJohn Baldwin 2018bc3d5698SJohn Baldwin.globl xor128_decrypt_n_pad 2019bc3d5698SJohn Baldwin.type xor128_decrypt_n_pad,@function 2020bc3d5698SJohn Baldwin.align 16 2021bc3d5698SJohn Baldwinxor128_decrypt_n_pad: 2022bc3d5698SJohn Baldwin.cfi_startproc 2023bc3d5698SJohn Baldwin subq %rdx,%rsi 2024bc3d5698SJohn Baldwin subq %rdx,%rdi 2025bc3d5698SJohn Baldwin movq %rcx,%r10 2026bc3d5698SJohn Baldwin shrq $4,%rcx 2027bc3d5698SJohn Baldwin jz .Ltail_dec 2028bc3d5698SJohn Baldwin nop 2029bc3d5698SJohn Baldwin.Loop_dec_xmm: 2030bc3d5698SJohn Baldwin movdqu (%rsi,%rdx,1),%xmm0 2031bc3d5698SJohn Baldwin movdqa (%rdx),%xmm1 2032bc3d5698SJohn Baldwin pxor %xmm0,%xmm1 2033bc3d5698SJohn Baldwin movdqu %xmm1,(%rdi,%rdx,1) 2034bc3d5698SJohn Baldwin movdqa %xmm0,(%rdx) 2035bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 2036bc3d5698SJohn Baldwin decq %rcx 2037bc3d5698SJohn Baldwin jnz .Loop_dec_xmm 2038bc3d5698SJohn Baldwin 2039bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 2040bc3d5698SJohn Baldwin andq $15,%r10 2041bc3d5698SJohn Baldwin jz .Ldone_dec 2042bc3d5698SJohn Baldwin 2043bc3d5698SJohn Baldwin.Ltail_dec: 2044bc3d5698SJohn Baldwin movq $16,%rcx 2045bc3d5698SJohn Baldwin subq %r10,%rcx 2046bc3d5698SJohn Baldwin xorl %eax,%eax 2047bc3d5698SJohn Baldwin xorq %r11,%r11 2048bc3d5698SJohn Baldwin.Loop_dec_byte: 2049bc3d5698SJohn Baldwin movb (%rsi,%rdx,1),%r11b 2050bc3d5698SJohn Baldwin movb (%rdx),%al 2051bc3d5698SJohn Baldwin xorb %r11b,%al 2052bc3d5698SJohn Baldwin movb %al,(%rdi,%rdx,1) 2053bc3d5698SJohn Baldwin movb %r11b,(%rdx) 2054bc3d5698SJohn Baldwin leaq 1(%rdx),%rdx 2055bc3d5698SJohn Baldwin decq %r10 2056bc3d5698SJohn Baldwin jnz .Loop_dec_byte 2057bc3d5698SJohn Baldwin 2058bc3d5698SJohn Baldwin xorl %eax,%eax 2059bc3d5698SJohn Baldwin.Loop_dec_pad: 2060bc3d5698SJohn Baldwin movb %al,(%rdx) 2061bc3d5698SJohn Baldwin leaq 1(%rdx),%rdx 2062bc3d5698SJohn Baldwin decq %rcx 2063bc3d5698SJohn Baldwin jnz .Loop_dec_pad 2064bc3d5698SJohn Baldwin 2065bc3d5698SJohn Baldwin.Ldone_dec: 2066bc3d5698SJohn Baldwin movq %rdx,%rax 2067bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2068bc3d5698SJohn Baldwin.cfi_endproc 2069bc3d5698SJohn Baldwin.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad 2070*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 2071*c0855eaaSJohn Baldwin .p2align 3 2072*c0855eaaSJohn Baldwin .long 1f - 0f 2073*c0855eaaSJohn Baldwin .long 4f - 1f 2074*c0855eaaSJohn Baldwin .long 5 2075*c0855eaaSJohn Baldwin0: 2076*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 2077*c0855eaaSJohn Baldwin # on Solaris. 2078*c0855eaaSJohn Baldwin .byte 0x47 2079*c0855eaaSJohn Baldwin .byte 0x4e 2080*c0855eaaSJohn Baldwin .byte 0x55 2081*c0855eaaSJohn Baldwin .byte 0 2082*c0855eaaSJohn Baldwin1: 2083*c0855eaaSJohn Baldwin .p2align 3 2084*c0855eaaSJohn Baldwin .long 0xc0000002 2085*c0855eaaSJohn Baldwin .long 3f - 2f 2086*c0855eaaSJohn Baldwin2: 2087*c0855eaaSJohn Baldwin .long 3 2088*c0855eaaSJohn Baldwin3: 2089*c0855eaaSJohn Baldwin .p2align 3 2090*c0855eaaSJohn Baldwin4: 2091