1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from poly1305-x86.pl. */ 2bc3d5698SJohn Baldwin#ifdef PIC 3bc3d5698SJohn Baldwin.text 4bc3d5698SJohn Baldwin.align 64 5bc3d5698SJohn Baldwin.globl poly1305_init 6bc3d5698SJohn Baldwin.type poly1305_init,@function 7bc3d5698SJohn Baldwin.align 16 8bc3d5698SJohn Baldwinpoly1305_init: 9bc3d5698SJohn Baldwin.L_poly1305_init_begin: 10*c0855eaaSJohn Baldwin #ifdef __CET__ 11*c0855eaaSJohn Baldwin 12*c0855eaaSJohn Baldwin.byte 243,15,30,251 13*c0855eaaSJohn Baldwin #endif 14*c0855eaaSJohn Baldwin 15bc3d5698SJohn Baldwin pushl %ebp 16bc3d5698SJohn Baldwin pushl %ebx 17bc3d5698SJohn Baldwin pushl %esi 18bc3d5698SJohn Baldwin pushl %edi 19bc3d5698SJohn Baldwin movl 20(%esp),%edi 20bc3d5698SJohn Baldwin movl 24(%esp),%esi 21bc3d5698SJohn Baldwin movl 28(%esp),%ebp 22bc3d5698SJohn Baldwin xorl %eax,%eax 23bc3d5698SJohn Baldwin movl %eax,(%edi) 24bc3d5698SJohn Baldwin movl %eax,4(%edi) 25bc3d5698SJohn Baldwin movl %eax,8(%edi) 26bc3d5698SJohn Baldwin movl %eax,12(%edi) 27bc3d5698SJohn Baldwin movl %eax,16(%edi) 28bc3d5698SJohn Baldwin movl %eax,20(%edi) 29bc3d5698SJohn Baldwin cmpl $0,%esi 30bc3d5698SJohn Baldwin je .L000nokey 31bc3d5698SJohn Baldwin call .L001pic_point 32bc3d5698SJohn Baldwin.L001pic_point: 33bc3d5698SJohn Baldwin popl %ebx 34bc3d5698SJohn Baldwin leal poly1305_blocks-.L001pic_point(%ebx),%eax 35bc3d5698SJohn Baldwin leal poly1305_emit-.L001pic_point(%ebx),%edx 36bc3d5698SJohn Baldwin leal OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi 37bc3d5698SJohn Baldwin movl (%edi),%ecx 38bc3d5698SJohn Baldwin andl $83886080,%ecx 39bc3d5698SJohn Baldwin cmpl $83886080,%ecx 40bc3d5698SJohn Baldwin jne .L002no_sse2 41bc3d5698SJohn Baldwin leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax 42bc3d5698SJohn Baldwin leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx 43bc3d5698SJohn Baldwin movl 8(%edi),%ecx 44bc3d5698SJohn Baldwin testl $32,%ecx 45bc3d5698SJohn Baldwin jz .L002no_sse2 46bc3d5698SJohn Baldwin leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax 47bc3d5698SJohn Baldwin.L002no_sse2: 48bc3d5698SJohn Baldwin movl 20(%esp),%edi 49bc3d5698SJohn Baldwin movl %eax,(%ebp) 50bc3d5698SJohn Baldwin movl %edx,4(%ebp) 51bc3d5698SJohn Baldwin movl (%esi),%eax 52bc3d5698SJohn Baldwin movl 4(%esi),%ebx 53bc3d5698SJohn Baldwin movl 8(%esi),%ecx 54bc3d5698SJohn Baldwin movl 12(%esi),%edx 55bc3d5698SJohn Baldwin andl $268435455,%eax 56bc3d5698SJohn Baldwin andl $268435452,%ebx 57bc3d5698SJohn Baldwin andl $268435452,%ecx 58bc3d5698SJohn Baldwin andl $268435452,%edx 59bc3d5698SJohn Baldwin movl %eax,24(%edi) 60bc3d5698SJohn Baldwin movl %ebx,28(%edi) 61bc3d5698SJohn Baldwin movl %ecx,32(%edi) 62bc3d5698SJohn Baldwin movl %edx,36(%edi) 63bc3d5698SJohn Baldwin movl $1,%eax 64bc3d5698SJohn Baldwin.L000nokey: 65bc3d5698SJohn Baldwin popl %edi 66bc3d5698SJohn Baldwin popl %esi 67bc3d5698SJohn Baldwin popl %ebx 68bc3d5698SJohn Baldwin popl %ebp 69bc3d5698SJohn Baldwin ret 70bc3d5698SJohn Baldwin.size poly1305_init,.-.L_poly1305_init_begin 71bc3d5698SJohn Baldwin.globl poly1305_blocks 72bc3d5698SJohn Baldwin.type poly1305_blocks,@function 73bc3d5698SJohn Baldwin.align 16 74bc3d5698SJohn Baldwinpoly1305_blocks: 75bc3d5698SJohn Baldwin.L_poly1305_blocks_begin: 76*c0855eaaSJohn Baldwin #ifdef __CET__ 77*c0855eaaSJohn Baldwin 78*c0855eaaSJohn Baldwin.byte 243,15,30,251 79*c0855eaaSJohn Baldwin #endif 80*c0855eaaSJohn Baldwin 81bc3d5698SJohn Baldwin pushl %ebp 82bc3d5698SJohn Baldwin pushl %ebx 83bc3d5698SJohn Baldwin pushl %esi 84bc3d5698SJohn Baldwin pushl %edi 85bc3d5698SJohn Baldwin movl 20(%esp),%edi 86bc3d5698SJohn Baldwin movl 24(%esp),%esi 87bc3d5698SJohn Baldwin movl 28(%esp),%ecx 88bc3d5698SJohn Baldwin.Lenter_blocks: 89bc3d5698SJohn Baldwin andl $-15,%ecx 90bc3d5698SJohn Baldwin jz .L003nodata 91bc3d5698SJohn Baldwin subl $64,%esp 92bc3d5698SJohn Baldwin movl 24(%edi),%eax 93bc3d5698SJohn Baldwin movl 28(%edi),%ebx 94bc3d5698SJohn Baldwin leal (%esi,%ecx,1),%ebp 95bc3d5698SJohn Baldwin movl 32(%edi),%ecx 96bc3d5698SJohn Baldwin movl 36(%edi),%edx 97bc3d5698SJohn Baldwin movl %ebp,92(%esp) 98bc3d5698SJohn Baldwin movl %esi,%ebp 99bc3d5698SJohn Baldwin movl %eax,36(%esp) 100bc3d5698SJohn Baldwin movl %ebx,%eax 101bc3d5698SJohn Baldwin shrl $2,%eax 102bc3d5698SJohn Baldwin movl %ebx,40(%esp) 103bc3d5698SJohn Baldwin addl %ebx,%eax 104bc3d5698SJohn Baldwin movl %ecx,%ebx 105bc3d5698SJohn Baldwin shrl $2,%ebx 106bc3d5698SJohn Baldwin movl %ecx,44(%esp) 107bc3d5698SJohn Baldwin addl %ecx,%ebx 108bc3d5698SJohn Baldwin movl %edx,%ecx 109bc3d5698SJohn Baldwin shrl $2,%ecx 110bc3d5698SJohn Baldwin movl %edx,48(%esp) 111bc3d5698SJohn Baldwin addl %edx,%ecx 112bc3d5698SJohn Baldwin movl %eax,52(%esp) 113bc3d5698SJohn Baldwin movl %ebx,56(%esp) 114bc3d5698SJohn Baldwin movl %ecx,60(%esp) 115bc3d5698SJohn Baldwin movl (%edi),%eax 116bc3d5698SJohn Baldwin movl 4(%edi),%ebx 117bc3d5698SJohn Baldwin movl 8(%edi),%ecx 118bc3d5698SJohn Baldwin movl 12(%edi),%esi 119bc3d5698SJohn Baldwin movl 16(%edi),%edi 120bc3d5698SJohn Baldwin jmp .L004loop 121bc3d5698SJohn Baldwin.align 32 122bc3d5698SJohn Baldwin.L004loop: 123bc3d5698SJohn Baldwin addl (%ebp),%eax 124bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 125bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 126bc3d5698SJohn Baldwin adcl 12(%ebp),%esi 127bc3d5698SJohn Baldwin leal 16(%ebp),%ebp 128bc3d5698SJohn Baldwin adcl 96(%esp),%edi 129bc3d5698SJohn Baldwin movl %eax,(%esp) 130bc3d5698SJohn Baldwin movl %esi,12(%esp) 131bc3d5698SJohn Baldwin mull 36(%esp) 132bc3d5698SJohn Baldwin movl %edi,16(%esp) 133bc3d5698SJohn Baldwin movl %eax,%edi 134bc3d5698SJohn Baldwin movl %ebx,%eax 135bc3d5698SJohn Baldwin movl %edx,%esi 136bc3d5698SJohn Baldwin mull 60(%esp) 137bc3d5698SJohn Baldwin addl %eax,%edi 138bc3d5698SJohn Baldwin movl %ecx,%eax 139bc3d5698SJohn Baldwin adcl %edx,%esi 140bc3d5698SJohn Baldwin mull 56(%esp) 141bc3d5698SJohn Baldwin addl %eax,%edi 142bc3d5698SJohn Baldwin movl 12(%esp),%eax 143bc3d5698SJohn Baldwin adcl %edx,%esi 144bc3d5698SJohn Baldwin mull 52(%esp) 145bc3d5698SJohn Baldwin addl %eax,%edi 146bc3d5698SJohn Baldwin movl (%esp),%eax 147bc3d5698SJohn Baldwin adcl %edx,%esi 148bc3d5698SJohn Baldwin mull 40(%esp) 149bc3d5698SJohn Baldwin movl %edi,20(%esp) 150bc3d5698SJohn Baldwin xorl %edi,%edi 151bc3d5698SJohn Baldwin addl %eax,%esi 152bc3d5698SJohn Baldwin movl %ebx,%eax 153bc3d5698SJohn Baldwin adcl %edx,%edi 154bc3d5698SJohn Baldwin mull 36(%esp) 155bc3d5698SJohn Baldwin addl %eax,%esi 156bc3d5698SJohn Baldwin movl %ecx,%eax 157bc3d5698SJohn Baldwin adcl %edx,%edi 158bc3d5698SJohn Baldwin mull 60(%esp) 159bc3d5698SJohn Baldwin addl %eax,%esi 160bc3d5698SJohn Baldwin movl 12(%esp),%eax 161bc3d5698SJohn Baldwin adcl %edx,%edi 162bc3d5698SJohn Baldwin mull 56(%esp) 163bc3d5698SJohn Baldwin addl %eax,%esi 164bc3d5698SJohn Baldwin movl 16(%esp),%eax 165bc3d5698SJohn Baldwin adcl %edx,%edi 166bc3d5698SJohn Baldwin imull 52(%esp),%eax 167bc3d5698SJohn Baldwin addl %eax,%esi 168bc3d5698SJohn Baldwin movl (%esp),%eax 169bc3d5698SJohn Baldwin adcl $0,%edi 170bc3d5698SJohn Baldwin mull 44(%esp) 171bc3d5698SJohn Baldwin movl %esi,24(%esp) 172bc3d5698SJohn Baldwin xorl %esi,%esi 173bc3d5698SJohn Baldwin addl %eax,%edi 174bc3d5698SJohn Baldwin movl %ebx,%eax 175bc3d5698SJohn Baldwin adcl %edx,%esi 176bc3d5698SJohn Baldwin mull 40(%esp) 177bc3d5698SJohn Baldwin addl %eax,%edi 178bc3d5698SJohn Baldwin movl %ecx,%eax 179bc3d5698SJohn Baldwin adcl %edx,%esi 180bc3d5698SJohn Baldwin mull 36(%esp) 181bc3d5698SJohn Baldwin addl %eax,%edi 182bc3d5698SJohn Baldwin movl 12(%esp),%eax 183bc3d5698SJohn Baldwin adcl %edx,%esi 184bc3d5698SJohn Baldwin mull 60(%esp) 185bc3d5698SJohn Baldwin addl %eax,%edi 186bc3d5698SJohn Baldwin movl 16(%esp),%eax 187bc3d5698SJohn Baldwin adcl %edx,%esi 188bc3d5698SJohn Baldwin imull 56(%esp),%eax 189bc3d5698SJohn Baldwin addl %eax,%edi 190bc3d5698SJohn Baldwin movl (%esp),%eax 191bc3d5698SJohn Baldwin adcl $0,%esi 192bc3d5698SJohn Baldwin mull 48(%esp) 193bc3d5698SJohn Baldwin movl %edi,28(%esp) 194bc3d5698SJohn Baldwin xorl %edi,%edi 195bc3d5698SJohn Baldwin addl %eax,%esi 196bc3d5698SJohn Baldwin movl %ebx,%eax 197bc3d5698SJohn Baldwin adcl %edx,%edi 198bc3d5698SJohn Baldwin mull 44(%esp) 199bc3d5698SJohn Baldwin addl %eax,%esi 200bc3d5698SJohn Baldwin movl %ecx,%eax 201bc3d5698SJohn Baldwin adcl %edx,%edi 202bc3d5698SJohn Baldwin mull 40(%esp) 203bc3d5698SJohn Baldwin addl %eax,%esi 204bc3d5698SJohn Baldwin movl 12(%esp),%eax 205bc3d5698SJohn Baldwin adcl %edx,%edi 206bc3d5698SJohn Baldwin mull 36(%esp) 207bc3d5698SJohn Baldwin addl %eax,%esi 208bc3d5698SJohn Baldwin movl 16(%esp),%ecx 209bc3d5698SJohn Baldwin adcl %edx,%edi 210bc3d5698SJohn Baldwin movl %ecx,%edx 211bc3d5698SJohn Baldwin imull 60(%esp),%ecx 212bc3d5698SJohn Baldwin addl %ecx,%esi 213bc3d5698SJohn Baldwin movl 20(%esp),%eax 214bc3d5698SJohn Baldwin adcl $0,%edi 215bc3d5698SJohn Baldwin imull 36(%esp),%edx 216bc3d5698SJohn Baldwin addl %edi,%edx 217bc3d5698SJohn Baldwin movl 24(%esp),%ebx 218bc3d5698SJohn Baldwin movl 28(%esp),%ecx 219bc3d5698SJohn Baldwin movl %edx,%edi 220bc3d5698SJohn Baldwin shrl $2,%edx 221bc3d5698SJohn Baldwin andl $3,%edi 222bc3d5698SJohn Baldwin leal (%edx,%edx,4),%edx 223bc3d5698SJohn Baldwin addl %edx,%eax 224bc3d5698SJohn Baldwin adcl $0,%ebx 225bc3d5698SJohn Baldwin adcl $0,%ecx 226bc3d5698SJohn Baldwin adcl $0,%esi 227bc3d5698SJohn Baldwin adcl $0,%edi 228bc3d5698SJohn Baldwin cmpl 92(%esp),%ebp 229bc3d5698SJohn Baldwin jne .L004loop 230bc3d5698SJohn Baldwin movl 84(%esp),%edx 231bc3d5698SJohn Baldwin addl $64,%esp 232bc3d5698SJohn Baldwin movl %eax,(%edx) 233bc3d5698SJohn Baldwin movl %ebx,4(%edx) 234bc3d5698SJohn Baldwin movl %ecx,8(%edx) 235bc3d5698SJohn Baldwin movl %esi,12(%edx) 236bc3d5698SJohn Baldwin movl %edi,16(%edx) 237bc3d5698SJohn Baldwin.L003nodata: 238bc3d5698SJohn Baldwin popl %edi 239bc3d5698SJohn Baldwin popl %esi 240bc3d5698SJohn Baldwin popl %ebx 241bc3d5698SJohn Baldwin popl %ebp 242bc3d5698SJohn Baldwin ret 243bc3d5698SJohn Baldwin.size poly1305_blocks,.-.L_poly1305_blocks_begin 244bc3d5698SJohn Baldwin.globl poly1305_emit 245bc3d5698SJohn Baldwin.type poly1305_emit,@function 246bc3d5698SJohn Baldwin.align 16 247bc3d5698SJohn Baldwinpoly1305_emit: 248bc3d5698SJohn Baldwin.L_poly1305_emit_begin: 249*c0855eaaSJohn Baldwin #ifdef __CET__ 250*c0855eaaSJohn Baldwin 251*c0855eaaSJohn Baldwin.byte 243,15,30,251 252*c0855eaaSJohn Baldwin #endif 253*c0855eaaSJohn Baldwin 254bc3d5698SJohn Baldwin pushl %ebp 255bc3d5698SJohn Baldwin pushl %ebx 256bc3d5698SJohn Baldwin pushl %esi 257bc3d5698SJohn Baldwin pushl %edi 258bc3d5698SJohn Baldwin movl 20(%esp),%ebp 259bc3d5698SJohn Baldwin.Lenter_emit: 260bc3d5698SJohn Baldwin movl 24(%esp),%edi 261bc3d5698SJohn Baldwin movl (%ebp),%eax 262bc3d5698SJohn Baldwin movl 4(%ebp),%ebx 263bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 264bc3d5698SJohn Baldwin movl 12(%ebp),%edx 265bc3d5698SJohn Baldwin movl 16(%ebp),%esi 266bc3d5698SJohn Baldwin addl $5,%eax 267bc3d5698SJohn Baldwin adcl $0,%ebx 268bc3d5698SJohn Baldwin adcl $0,%ecx 269bc3d5698SJohn Baldwin adcl $0,%edx 270bc3d5698SJohn Baldwin adcl $0,%esi 271bc3d5698SJohn Baldwin shrl $2,%esi 272bc3d5698SJohn Baldwin negl %esi 273bc3d5698SJohn Baldwin andl %esi,%eax 274bc3d5698SJohn Baldwin andl %esi,%ebx 275bc3d5698SJohn Baldwin andl %esi,%ecx 276bc3d5698SJohn Baldwin andl %esi,%edx 277bc3d5698SJohn Baldwin movl %eax,(%edi) 278bc3d5698SJohn Baldwin movl %ebx,4(%edi) 279bc3d5698SJohn Baldwin movl %ecx,8(%edi) 280bc3d5698SJohn Baldwin movl %edx,12(%edi) 281bc3d5698SJohn Baldwin notl %esi 282bc3d5698SJohn Baldwin movl (%ebp),%eax 283bc3d5698SJohn Baldwin movl 4(%ebp),%ebx 284bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 285bc3d5698SJohn Baldwin movl 12(%ebp),%edx 286bc3d5698SJohn Baldwin movl 28(%esp),%ebp 287bc3d5698SJohn Baldwin andl %esi,%eax 288bc3d5698SJohn Baldwin andl %esi,%ebx 289bc3d5698SJohn Baldwin andl %esi,%ecx 290bc3d5698SJohn Baldwin andl %esi,%edx 291bc3d5698SJohn Baldwin orl (%edi),%eax 292bc3d5698SJohn Baldwin orl 4(%edi),%ebx 293bc3d5698SJohn Baldwin orl 8(%edi),%ecx 294bc3d5698SJohn Baldwin orl 12(%edi),%edx 295bc3d5698SJohn Baldwin addl (%ebp),%eax 296bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 297bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 298bc3d5698SJohn Baldwin adcl 12(%ebp),%edx 299bc3d5698SJohn Baldwin movl %eax,(%edi) 300bc3d5698SJohn Baldwin movl %ebx,4(%edi) 301bc3d5698SJohn Baldwin movl %ecx,8(%edi) 302bc3d5698SJohn Baldwin movl %edx,12(%edi) 303bc3d5698SJohn Baldwin popl %edi 304bc3d5698SJohn Baldwin popl %esi 305bc3d5698SJohn Baldwin popl %ebx 306bc3d5698SJohn Baldwin popl %ebp 307bc3d5698SJohn Baldwin ret 308bc3d5698SJohn Baldwin.size poly1305_emit,.-.L_poly1305_emit_begin 309bc3d5698SJohn Baldwin.align 32 310bc3d5698SJohn Baldwin.type _poly1305_init_sse2,@function 311bc3d5698SJohn Baldwin.align 16 312bc3d5698SJohn Baldwin_poly1305_init_sse2: 313*c0855eaaSJohn Baldwin #ifdef __CET__ 314*c0855eaaSJohn Baldwin 315*c0855eaaSJohn Baldwin.byte 243,15,30,251 316*c0855eaaSJohn Baldwin #endif 317*c0855eaaSJohn Baldwin 318bc3d5698SJohn Baldwin movdqu 24(%edi),%xmm4 319bc3d5698SJohn Baldwin leal 48(%edi),%edi 320bc3d5698SJohn Baldwin movl %esp,%ebp 321bc3d5698SJohn Baldwin subl $224,%esp 322bc3d5698SJohn Baldwin andl $-16,%esp 323bc3d5698SJohn Baldwin movq 64(%ebx),%xmm7 324bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 325bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 326bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 327bc3d5698SJohn Baldwin pand %xmm7,%xmm0 328bc3d5698SJohn Baldwin psrlq $26,%xmm1 329bc3d5698SJohn Baldwin psrldq $6,%xmm2 330bc3d5698SJohn Baldwin pand %xmm7,%xmm1 331bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 332bc3d5698SJohn Baldwin psrlq $4,%xmm2 333bc3d5698SJohn Baldwin psrlq $30,%xmm3 334bc3d5698SJohn Baldwin pand %xmm7,%xmm2 335bc3d5698SJohn Baldwin pand %xmm7,%xmm3 336bc3d5698SJohn Baldwin psrldq $13,%xmm4 337bc3d5698SJohn Baldwin leal 144(%esp),%edx 338bc3d5698SJohn Baldwin movl $2,%ecx 339bc3d5698SJohn Baldwin.L005square: 340bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 341bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 342bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 343bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 344bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 345bc3d5698SJohn Baldwin movdqa %xmm1,%xmm6 346bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 347bc3d5698SJohn Baldwin pslld $2,%xmm6 348bc3d5698SJohn Baldwin pslld $2,%xmm5 349bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 350bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 351bc3d5698SJohn Baldwin movdqa %xmm6,80(%esp) 352bc3d5698SJohn Baldwin movdqa %xmm5,96(%esp) 353bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 354bc3d5698SJohn Baldwin movdqa %xmm4,%xmm5 355bc3d5698SJohn Baldwin pslld $2,%xmm6 356bc3d5698SJohn Baldwin pslld $2,%xmm5 357bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 358bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 359bc3d5698SJohn Baldwin movdqa %xmm6,112(%esp) 360bc3d5698SJohn Baldwin movdqa %xmm5,128(%esp) 361bc3d5698SJohn Baldwin pshufd $68,%xmm0,%xmm6 362bc3d5698SJohn Baldwin movdqa %xmm1,%xmm5 363bc3d5698SJohn Baldwin pshufd $68,%xmm1,%xmm1 364bc3d5698SJohn Baldwin pshufd $68,%xmm2,%xmm2 365bc3d5698SJohn Baldwin pshufd $68,%xmm3,%xmm3 366bc3d5698SJohn Baldwin pshufd $68,%xmm4,%xmm4 367bc3d5698SJohn Baldwin movdqa %xmm6,(%edx) 368bc3d5698SJohn Baldwin movdqa %xmm1,16(%edx) 369bc3d5698SJohn Baldwin movdqa %xmm2,32(%edx) 370bc3d5698SJohn Baldwin movdqa %xmm3,48(%edx) 371bc3d5698SJohn Baldwin movdqa %xmm4,64(%edx) 372bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm4 373bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm3 374bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm2 375bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm1 376bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm0 377bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 378bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 379bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 380bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm6 381bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 382bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 383bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 384bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 385bc3d5698SJohn Baldwin movdqa 80(%esp),%xmm6 386bc3d5698SJohn Baldwin pmuludq (%edx),%xmm5 387bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 388bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 389bc3d5698SJohn Baldwin movdqa 32(%esp),%xmm7 390bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 391bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 392bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 393bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 394bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 395bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm5 396bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 397bc3d5698SJohn Baldwin movdqa 96(%esp),%xmm7 398bc3d5698SJohn Baldwin pmuludq (%edx),%xmm6 399bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 400bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 401bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm7 402bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 403bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 404bc3d5698SJohn Baldwin movdqa 48(%esp),%xmm6 405bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 406bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 407bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm6 408bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 409bc3d5698SJohn Baldwin movdqa 112(%esp),%xmm5 410bc3d5698SJohn Baldwin pmuludq (%edx),%xmm7 411bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 412bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 413bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm5 414bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 415bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 416bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 417bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 418bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 419bc3d5698SJohn Baldwin movdqa 64(%esp),%xmm5 420bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 421bc3d5698SJohn Baldwin movdqa 128(%esp),%xmm6 422bc3d5698SJohn Baldwin pmuludq (%edx),%xmm5 423bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 424bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 425bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 426bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 427bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 428bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 429bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 430bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 431bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm5 432bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 433bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 434bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 435bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 436bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 437bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 438bc3d5698SJohn Baldwin pand %xmm7,%xmm3 439bc3d5698SJohn Baldwin psrlq $26,%xmm5 440bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 441bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 442bc3d5698SJohn Baldwin pand %xmm7,%xmm0 443bc3d5698SJohn Baldwin psrlq $26,%xmm6 444bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 445bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 446bc3d5698SJohn Baldwin psrlq $26,%xmm5 447bc3d5698SJohn Baldwin pand %xmm7,%xmm4 448bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 449bc3d5698SJohn Baldwin psrlq $26,%xmm6 450bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 451bc3d5698SJohn Baldwin psllq $2,%xmm5 452bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 453bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 454bc3d5698SJohn Baldwin pand %xmm7,%xmm1 455bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 456bc3d5698SJohn Baldwin psrlq $26,%xmm6 457bc3d5698SJohn Baldwin pand %xmm7,%xmm2 458bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 459bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 460bc3d5698SJohn Baldwin psrlq $26,%xmm5 461bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 462bc3d5698SJohn Baldwin psrlq $26,%xmm6 463bc3d5698SJohn Baldwin pand %xmm7,%xmm0 464bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 465bc3d5698SJohn Baldwin pand %xmm7,%xmm3 466bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 467bc3d5698SJohn Baldwin decl %ecx 468bc3d5698SJohn Baldwin jz .L006square_break 469bc3d5698SJohn Baldwin punpcklqdq (%esp),%xmm0 470bc3d5698SJohn Baldwin punpcklqdq 16(%esp),%xmm1 471bc3d5698SJohn Baldwin punpcklqdq 32(%esp),%xmm2 472bc3d5698SJohn Baldwin punpcklqdq 48(%esp),%xmm3 473bc3d5698SJohn Baldwin punpcklqdq 64(%esp),%xmm4 474bc3d5698SJohn Baldwin jmp .L005square 475bc3d5698SJohn Baldwin.L006square_break: 476bc3d5698SJohn Baldwin psllq $32,%xmm0 477bc3d5698SJohn Baldwin psllq $32,%xmm1 478bc3d5698SJohn Baldwin psllq $32,%xmm2 479bc3d5698SJohn Baldwin psllq $32,%xmm3 480bc3d5698SJohn Baldwin psllq $32,%xmm4 481bc3d5698SJohn Baldwin por (%esp),%xmm0 482bc3d5698SJohn Baldwin por 16(%esp),%xmm1 483bc3d5698SJohn Baldwin por 32(%esp),%xmm2 484bc3d5698SJohn Baldwin por 48(%esp),%xmm3 485bc3d5698SJohn Baldwin por 64(%esp),%xmm4 486bc3d5698SJohn Baldwin pshufd $141,%xmm0,%xmm0 487bc3d5698SJohn Baldwin pshufd $141,%xmm1,%xmm1 488bc3d5698SJohn Baldwin pshufd $141,%xmm2,%xmm2 489bc3d5698SJohn Baldwin pshufd $141,%xmm3,%xmm3 490bc3d5698SJohn Baldwin pshufd $141,%xmm4,%xmm4 491bc3d5698SJohn Baldwin movdqu %xmm0,(%edi) 492bc3d5698SJohn Baldwin movdqu %xmm1,16(%edi) 493bc3d5698SJohn Baldwin movdqu %xmm2,32(%edi) 494bc3d5698SJohn Baldwin movdqu %xmm3,48(%edi) 495bc3d5698SJohn Baldwin movdqu %xmm4,64(%edi) 496bc3d5698SJohn Baldwin movdqa %xmm1,%xmm6 497bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 498bc3d5698SJohn Baldwin pslld $2,%xmm6 499bc3d5698SJohn Baldwin pslld $2,%xmm5 500bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 501bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 502bc3d5698SJohn Baldwin movdqu %xmm6,80(%edi) 503bc3d5698SJohn Baldwin movdqu %xmm5,96(%edi) 504bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 505bc3d5698SJohn Baldwin movdqa %xmm4,%xmm5 506bc3d5698SJohn Baldwin pslld $2,%xmm6 507bc3d5698SJohn Baldwin pslld $2,%xmm5 508bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 509bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 510bc3d5698SJohn Baldwin movdqu %xmm6,112(%edi) 511bc3d5698SJohn Baldwin movdqu %xmm5,128(%edi) 512bc3d5698SJohn Baldwin movl %ebp,%esp 513bc3d5698SJohn Baldwin leal -48(%edi),%edi 514bc3d5698SJohn Baldwin ret 515bc3d5698SJohn Baldwin.size _poly1305_init_sse2,.-_poly1305_init_sse2 516bc3d5698SJohn Baldwin.align 32 517bc3d5698SJohn Baldwin.type _poly1305_blocks_sse2,@function 518bc3d5698SJohn Baldwin.align 16 519bc3d5698SJohn Baldwin_poly1305_blocks_sse2: 520*c0855eaaSJohn Baldwin #ifdef __CET__ 521*c0855eaaSJohn Baldwin 522*c0855eaaSJohn Baldwin.byte 243,15,30,251 523*c0855eaaSJohn Baldwin #endif 524*c0855eaaSJohn Baldwin 525bc3d5698SJohn Baldwin pushl %ebp 526bc3d5698SJohn Baldwin pushl %ebx 527bc3d5698SJohn Baldwin pushl %esi 528bc3d5698SJohn Baldwin pushl %edi 529bc3d5698SJohn Baldwin movl 20(%esp),%edi 530bc3d5698SJohn Baldwin movl 24(%esp),%esi 531bc3d5698SJohn Baldwin movl 28(%esp),%ecx 532bc3d5698SJohn Baldwin movl 20(%edi),%eax 533bc3d5698SJohn Baldwin andl $-16,%ecx 534bc3d5698SJohn Baldwin jz .L007nodata 535bc3d5698SJohn Baldwin cmpl $64,%ecx 536bc3d5698SJohn Baldwin jae .L008enter_sse2 537bc3d5698SJohn Baldwin testl %eax,%eax 538bc3d5698SJohn Baldwin jz .Lenter_blocks 539bc3d5698SJohn Baldwin.align 16 540bc3d5698SJohn Baldwin.L008enter_sse2: 541bc3d5698SJohn Baldwin call .L009pic_point 542bc3d5698SJohn Baldwin.L009pic_point: 543bc3d5698SJohn Baldwin popl %ebx 544bc3d5698SJohn Baldwin leal .Lconst_sse2-.L009pic_point(%ebx),%ebx 545bc3d5698SJohn Baldwin testl %eax,%eax 546bc3d5698SJohn Baldwin jnz .L010base2_26 547bc3d5698SJohn Baldwin call _poly1305_init_sse2 548bc3d5698SJohn Baldwin movl (%edi),%eax 549bc3d5698SJohn Baldwin movl 3(%edi),%ecx 550bc3d5698SJohn Baldwin movl 6(%edi),%edx 551bc3d5698SJohn Baldwin movl 9(%edi),%esi 552bc3d5698SJohn Baldwin movl 13(%edi),%ebp 553bc3d5698SJohn Baldwin movl $1,20(%edi) 554bc3d5698SJohn Baldwin shrl $2,%ecx 555bc3d5698SJohn Baldwin andl $67108863,%eax 556bc3d5698SJohn Baldwin shrl $4,%edx 557bc3d5698SJohn Baldwin andl $67108863,%ecx 558bc3d5698SJohn Baldwin shrl $6,%esi 559bc3d5698SJohn Baldwin andl $67108863,%edx 560bc3d5698SJohn Baldwin movd %eax,%xmm0 561bc3d5698SJohn Baldwin movd %ecx,%xmm1 562bc3d5698SJohn Baldwin movd %edx,%xmm2 563bc3d5698SJohn Baldwin movd %esi,%xmm3 564bc3d5698SJohn Baldwin movd %ebp,%xmm4 565bc3d5698SJohn Baldwin movl 24(%esp),%esi 566bc3d5698SJohn Baldwin movl 28(%esp),%ecx 567bc3d5698SJohn Baldwin jmp .L011base2_32 568bc3d5698SJohn Baldwin.align 16 569bc3d5698SJohn Baldwin.L010base2_26: 570bc3d5698SJohn Baldwin movd (%edi),%xmm0 571bc3d5698SJohn Baldwin movd 4(%edi),%xmm1 572bc3d5698SJohn Baldwin movd 8(%edi),%xmm2 573bc3d5698SJohn Baldwin movd 12(%edi),%xmm3 574bc3d5698SJohn Baldwin movd 16(%edi),%xmm4 575bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 576bc3d5698SJohn Baldwin.L011base2_32: 577bc3d5698SJohn Baldwin movl 32(%esp),%eax 578bc3d5698SJohn Baldwin movl %esp,%ebp 579bc3d5698SJohn Baldwin subl $528,%esp 580bc3d5698SJohn Baldwin andl $-16,%esp 581bc3d5698SJohn Baldwin leal 48(%edi),%edi 582bc3d5698SJohn Baldwin shll $24,%eax 583bc3d5698SJohn Baldwin testl $31,%ecx 584bc3d5698SJohn Baldwin jz .L012even 585bc3d5698SJohn Baldwin movdqu (%esi),%xmm6 586bc3d5698SJohn Baldwin leal 16(%esi),%esi 587bc3d5698SJohn Baldwin movdqa %xmm6,%xmm5 588bc3d5698SJohn Baldwin pand %xmm7,%xmm6 589bc3d5698SJohn Baldwin paddd %xmm6,%xmm0 590bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 591bc3d5698SJohn Baldwin psrlq $26,%xmm5 592bc3d5698SJohn Baldwin psrldq $6,%xmm6 593bc3d5698SJohn Baldwin pand %xmm7,%xmm5 594bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 595bc3d5698SJohn Baldwin movdqa %xmm6,%xmm5 596bc3d5698SJohn Baldwin psrlq $4,%xmm6 597bc3d5698SJohn Baldwin pand %xmm7,%xmm6 598bc3d5698SJohn Baldwin paddd %xmm6,%xmm2 599bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 600bc3d5698SJohn Baldwin psrlq $30,%xmm5 601bc3d5698SJohn Baldwin pand %xmm7,%xmm5 602bc3d5698SJohn Baldwin psrldq $7,%xmm6 603bc3d5698SJohn Baldwin paddd %xmm5,%xmm3 604bc3d5698SJohn Baldwin movd %eax,%xmm5 605bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 606bc3d5698SJohn Baldwin movd 12(%edi),%xmm6 607bc3d5698SJohn Baldwin paddd %xmm5,%xmm4 608bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 609bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 610bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 611bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 612bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 613bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm0 614bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm1 615bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm2 616bc3d5698SJohn Baldwin movd 28(%edi),%xmm5 617bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm3 618bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm4 619bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 620bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 621bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 622bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm6 623bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 624bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 625bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 626bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 627bc3d5698SJohn Baldwin movd 92(%edi),%xmm6 628bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 629bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 630bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 631bc3d5698SJohn Baldwin movd 44(%edi),%xmm7 632bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 633bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 634bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 635bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 636bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 637bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm5 638bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 639bc3d5698SJohn Baldwin movd 108(%edi),%xmm7 640bc3d5698SJohn Baldwin pmuludq (%esp),%xmm6 641bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 642bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 643bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm7 644bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 645bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 646bc3d5698SJohn Baldwin movd 60(%edi),%xmm6 647bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 648bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 649bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm6 650bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 651bc3d5698SJohn Baldwin movd 124(%edi),%xmm5 652bc3d5698SJohn Baldwin pmuludq (%esp),%xmm7 653bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 654bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 655bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm5 656bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 657bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 658bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 659bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 660bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 661bc3d5698SJohn Baldwin movd 76(%edi),%xmm5 662bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 663bc3d5698SJohn Baldwin movd 140(%edi),%xmm6 664bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 665bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 666bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 667bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 668bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 669bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 670bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 671bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 672bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 673bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm5 674bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 675bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 676bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 677bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 678bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 679bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 680bc3d5698SJohn Baldwin pand %xmm7,%xmm3 681bc3d5698SJohn Baldwin psrlq $26,%xmm5 682bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 683bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 684bc3d5698SJohn Baldwin pand %xmm7,%xmm0 685bc3d5698SJohn Baldwin psrlq $26,%xmm6 686bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 687bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 688bc3d5698SJohn Baldwin psrlq $26,%xmm5 689bc3d5698SJohn Baldwin pand %xmm7,%xmm4 690bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 691bc3d5698SJohn Baldwin psrlq $26,%xmm6 692bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 693bc3d5698SJohn Baldwin psllq $2,%xmm5 694bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 695bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 696bc3d5698SJohn Baldwin pand %xmm7,%xmm1 697bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 698bc3d5698SJohn Baldwin psrlq $26,%xmm6 699bc3d5698SJohn Baldwin pand %xmm7,%xmm2 700bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 701bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 702bc3d5698SJohn Baldwin psrlq $26,%xmm5 703bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 704bc3d5698SJohn Baldwin psrlq $26,%xmm6 705bc3d5698SJohn Baldwin pand %xmm7,%xmm0 706bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 707bc3d5698SJohn Baldwin pand %xmm7,%xmm3 708bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 709bc3d5698SJohn Baldwin subl $16,%ecx 710bc3d5698SJohn Baldwin jz .L013done 711bc3d5698SJohn Baldwin.L012even: 712bc3d5698SJohn Baldwin leal 384(%esp),%edx 713bc3d5698SJohn Baldwin leal -32(%esi),%eax 714bc3d5698SJohn Baldwin subl $64,%ecx 715bc3d5698SJohn Baldwin movdqu (%edi),%xmm5 716bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 717bc3d5698SJohn Baldwin cmovbl %eax,%esi 718bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 719bc3d5698SJohn Baldwin movdqa %xmm6,(%edx) 720bc3d5698SJohn Baldwin leal 160(%esp),%eax 721bc3d5698SJohn Baldwin movdqu 16(%edi),%xmm6 722bc3d5698SJohn Baldwin movdqa %xmm5,-144(%edx) 723bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 724bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 725bc3d5698SJohn Baldwin movdqa %xmm5,16(%edx) 726bc3d5698SJohn Baldwin movdqu 32(%edi),%xmm5 727bc3d5698SJohn Baldwin movdqa %xmm6,-128(%edx) 728bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 729bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 730bc3d5698SJohn Baldwin movdqa %xmm6,32(%edx) 731bc3d5698SJohn Baldwin movdqu 48(%edi),%xmm6 732bc3d5698SJohn Baldwin movdqa %xmm5,-112(%edx) 733bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 734bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 735bc3d5698SJohn Baldwin movdqa %xmm5,48(%edx) 736bc3d5698SJohn Baldwin movdqu 64(%edi),%xmm5 737bc3d5698SJohn Baldwin movdqa %xmm6,-96(%edx) 738bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 739bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 740bc3d5698SJohn Baldwin movdqa %xmm6,64(%edx) 741bc3d5698SJohn Baldwin movdqu 80(%edi),%xmm6 742bc3d5698SJohn Baldwin movdqa %xmm5,-80(%edx) 743bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 744bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 745bc3d5698SJohn Baldwin movdqa %xmm5,80(%edx) 746bc3d5698SJohn Baldwin movdqu 96(%edi),%xmm5 747bc3d5698SJohn Baldwin movdqa %xmm6,-64(%edx) 748bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 749bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 750bc3d5698SJohn Baldwin movdqa %xmm6,96(%edx) 751bc3d5698SJohn Baldwin movdqu 112(%edi),%xmm6 752bc3d5698SJohn Baldwin movdqa %xmm5,-48(%edx) 753bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 754bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 755bc3d5698SJohn Baldwin movdqa %xmm5,112(%edx) 756bc3d5698SJohn Baldwin movdqu 128(%edi),%xmm5 757bc3d5698SJohn Baldwin movdqa %xmm6,-32(%edx) 758bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 759bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 760bc3d5698SJohn Baldwin movdqa %xmm6,128(%edx) 761bc3d5698SJohn Baldwin movdqa %xmm5,-16(%edx) 762bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm5 763bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm6 764bc3d5698SJohn Baldwin leal 32(%esi),%esi 765bc3d5698SJohn Baldwin movdqa %xmm2,112(%esp) 766bc3d5698SJohn Baldwin movdqa %xmm3,128(%esp) 767bc3d5698SJohn Baldwin movdqa %xmm4,144(%esp) 768bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 769bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 770bc3d5698SJohn Baldwin psrldq $6,%xmm2 771bc3d5698SJohn Baldwin psrldq $6,%xmm3 772bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 773bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 774bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 775bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 776bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 777bc3d5698SJohn Baldwin psrlq $4,%xmm2 778bc3d5698SJohn Baldwin psrlq $30,%xmm3 779bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 780bc3d5698SJohn Baldwin psrlq $40,%xmm4 781bc3d5698SJohn Baldwin psrlq $26,%xmm6 782bc3d5698SJohn Baldwin pand %xmm7,%xmm5 783bc3d5698SJohn Baldwin pand %xmm7,%xmm6 784bc3d5698SJohn Baldwin pand %xmm7,%xmm2 785bc3d5698SJohn Baldwin pand %xmm7,%xmm3 786bc3d5698SJohn Baldwin por (%ebx),%xmm4 787bc3d5698SJohn Baldwin movdqa %xmm0,80(%esp) 788bc3d5698SJohn Baldwin movdqa %xmm1,96(%esp) 789bc3d5698SJohn Baldwin jbe .L014skip_loop 790bc3d5698SJohn Baldwin jmp .L015loop 791bc3d5698SJohn Baldwin.align 32 792bc3d5698SJohn Baldwin.L015loop: 793bc3d5698SJohn Baldwin movdqa -144(%edx),%xmm7 794bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 795bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 796bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 797bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 798bc3d5698SJohn Baldwin movdqa %xmm5,%xmm1 799bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 800bc3d5698SJohn Baldwin movdqa %xmm6,%xmm0 801bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 802bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 803bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 804bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 805bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm0 806bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 807bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm1 808bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 809bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 810bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm7 811bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 812bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 813bc3d5698SJohn Baldwin pmuludq -96(%edx),%xmm5 814bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 815bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 816bc3d5698SJohn Baldwin pmuludq -80(%edx),%xmm6 817bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 818bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 819bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm7 820bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 821bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 822bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm5 823bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 824bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm7 825bc3d5698SJohn Baldwin pmuludq -96(%edx),%xmm6 826bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 827bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 828bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm7 829bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 830bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 831bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm5 832bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 833bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 834bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm6 835bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 836bc3d5698SJohn Baldwin movdqa 48(%eax),%xmm5 837bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm7 838bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 839bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 840bc3d5698SJohn Baldwin pmuludq -48(%edx),%xmm5 841bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 842bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 843bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm6 844bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 845bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 846bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm7 847bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 848bc3d5698SJohn Baldwin movdqa 64(%eax),%xmm6 849bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm5 850bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 851bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 852bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm6 853bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 854bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 855bc3d5698SJohn Baldwin pmuludq -64(%edx),%xmm7 856bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 857bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 858bc3d5698SJohn Baldwin pmuludq -48(%edx),%xmm5 859bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 860bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 861bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm6 862bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 863bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 864bc3d5698SJohn Baldwin movdqu -32(%esi),%xmm5 865bc3d5698SJohn Baldwin movdqu -16(%esi),%xmm6 866bc3d5698SJohn Baldwin leal 32(%esi),%esi 867bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 868bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 869bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 870bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 871bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 872bc3d5698SJohn Baldwin psrldq $6,%xmm2 873bc3d5698SJohn Baldwin psrldq $6,%xmm3 874bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 875bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 876bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 877bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 878bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 879bc3d5698SJohn Baldwin psrlq $4,%xmm2 880bc3d5698SJohn Baldwin psrlq $30,%xmm3 881bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 882bc3d5698SJohn Baldwin psrlq $40,%xmm4 883bc3d5698SJohn Baldwin psrlq $26,%xmm6 884bc3d5698SJohn Baldwin pand %xmm7,%xmm5 885bc3d5698SJohn Baldwin pand %xmm7,%xmm6 886bc3d5698SJohn Baldwin pand %xmm7,%xmm2 887bc3d5698SJohn Baldwin pand %xmm7,%xmm3 888bc3d5698SJohn Baldwin por (%ebx),%xmm4 889bc3d5698SJohn Baldwin leal -32(%esi),%eax 890bc3d5698SJohn Baldwin subl $64,%ecx 891bc3d5698SJohn Baldwin paddd 80(%esp),%xmm5 892bc3d5698SJohn Baldwin paddd 96(%esp),%xmm6 893bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 894bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 895bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 896bc3d5698SJohn Baldwin cmovbl %eax,%esi 897bc3d5698SJohn Baldwin leal 160(%esp),%eax 898bc3d5698SJohn Baldwin movdqa (%edx),%xmm7 899bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 900bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 901bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 902bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 903bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 904bc3d5698SJohn Baldwin movdqa %xmm5,%xmm1 905bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 906bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 907bc3d5698SJohn Baldwin movdqa %xmm6,%xmm0 908bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 909bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 910bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 911bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 912bc3d5698SJohn Baldwin paddq 16(%esp),%xmm6 913bc3d5698SJohn Baldwin paddq 32(%esp),%xmm2 914bc3d5698SJohn Baldwin paddq 48(%esp),%xmm3 915bc3d5698SJohn Baldwin paddq 64(%esp),%xmm4 916bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm0 917bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 918bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm1 919bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 920bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 921bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 922bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 923bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 924bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 925bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 926bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 927bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 928bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 929bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 930bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 931bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 932bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 933bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm5 934bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 935bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm7 936bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 937bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 938bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 939bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm7 940bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 941bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 942bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm5 943bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 944bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 945bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm6 946bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 947bc3d5698SJohn Baldwin movdqa 48(%eax),%xmm5 948bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 949bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 950bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 951bc3d5698SJohn Baldwin pmuludq 96(%edx),%xmm5 952bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 953bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 954bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm6 955bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 956bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 957bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm7 958bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 959bc3d5698SJohn Baldwin movdqa 64(%eax),%xmm6 960bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm5 961bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 962bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 963bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm6 964bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 965bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 966bc3d5698SJohn Baldwin pmuludq 80(%edx),%xmm7 967bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 968bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 969bc3d5698SJohn Baldwin pmuludq 96(%edx),%xmm5 970bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 971bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 972bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm6 973bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 974bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 975bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 976bc3d5698SJohn Baldwin pand %xmm7,%xmm3 977bc3d5698SJohn Baldwin psrlq $26,%xmm5 978bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 979bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 980bc3d5698SJohn Baldwin pand %xmm7,%xmm0 981bc3d5698SJohn Baldwin psrlq $26,%xmm6 982bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 983bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 984bc3d5698SJohn Baldwin psrlq $26,%xmm5 985bc3d5698SJohn Baldwin pand %xmm7,%xmm4 986bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 987bc3d5698SJohn Baldwin psrlq $26,%xmm6 988bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 989bc3d5698SJohn Baldwin psllq $2,%xmm5 990bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 991bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 992bc3d5698SJohn Baldwin pand %xmm7,%xmm1 993bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 994bc3d5698SJohn Baldwin psrlq $26,%xmm6 995bc3d5698SJohn Baldwin pand %xmm7,%xmm2 996bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 997bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 998bc3d5698SJohn Baldwin psrlq $26,%xmm5 999bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 1000bc3d5698SJohn Baldwin psrlq $26,%xmm6 1001bc3d5698SJohn Baldwin pand %xmm7,%xmm0 1002bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 1003bc3d5698SJohn Baldwin pand %xmm7,%xmm3 1004bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 1005bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm5 1006bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm6 1007bc3d5698SJohn Baldwin leal 32(%esi),%esi 1008bc3d5698SJohn Baldwin movdqa %xmm2,112(%esp) 1009bc3d5698SJohn Baldwin movdqa %xmm3,128(%esp) 1010bc3d5698SJohn Baldwin movdqa %xmm4,144(%esp) 1011bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 1012bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 1013bc3d5698SJohn Baldwin psrldq $6,%xmm2 1014bc3d5698SJohn Baldwin psrldq $6,%xmm3 1015bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 1016bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 1017bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 1018bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 1019bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 1020bc3d5698SJohn Baldwin psrlq $4,%xmm2 1021bc3d5698SJohn Baldwin psrlq $30,%xmm3 1022bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1023bc3d5698SJohn Baldwin psrlq $40,%xmm4 1024bc3d5698SJohn Baldwin psrlq $26,%xmm6 1025bc3d5698SJohn Baldwin pand %xmm7,%xmm5 1026bc3d5698SJohn Baldwin pand %xmm7,%xmm6 1027bc3d5698SJohn Baldwin pand %xmm7,%xmm2 1028bc3d5698SJohn Baldwin pand %xmm7,%xmm3 1029bc3d5698SJohn Baldwin por (%ebx),%xmm4 1030bc3d5698SJohn Baldwin movdqa %xmm0,80(%esp) 1031bc3d5698SJohn Baldwin movdqa %xmm1,96(%esp) 1032bc3d5698SJohn Baldwin ja .L015loop 1033bc3d5698SJohn Baldwin.L014skip_loop: 1034bc3d5698SJohn Baldwin pshufd $16,-144(%edx),%xmm7 1035bc3d5698SJohn Baldwin addl $32,%ecx 1036bc3d5698SJohn Baldwin jnz .L016long_tail 1037bc3d5698SJohn Baldwin paddd %xmm0,%xmm5 1038bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 1039bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 1040bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 1041bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 1042bc3d5698SJohn Baldwin.L016long_tail: 1043bc3d5698SJohn Baldwin movdqa %xmm5,(%eax) 1044bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 1045bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 1046bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 1047bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 1048bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 1049bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 1050bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 1051bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 1052bc3d5698SJohn Baldwin pshufd $16,-128(%edx),%xmm5 1053bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 1054bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 1055bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 1056bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1057bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm5 1058bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1059bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm6 1060bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 1061bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1062bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm7 1063bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 1064bc3d5698SJohn Baldwin pshufd $16,-64(%edx),%xmm6 1065bc3d5698SJohn Baldwin pmuludq (%eax),%xmm5 1066bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 1067bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm6 1068bc3d5698SJohn Baldwin pshufd $16,-112(%edx),%xmm7 1069bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 1070bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1071bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm7 1072bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 1073bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1074bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm5 1075bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 1076bc3d5698SJohn Baldwin pshufd $16,-48(%edx),%xmm7 1077bc3d5698SJohn Baldwin pmuludq (%eax),%xmm6 1078bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 1079bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1080bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm7 1081bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 1082bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm5 1083bc3d5698SJohn Baldwin pshufd $16,-96(%edx),%xmm6 1084bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 1085bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1086bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm6 1087bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 1088bc3d5698SJohn Baldwin pshufd $16,-32(%edx),%xmm5 1089bc3d5698SJohn Baldwin pmuludq (%eax),%xmm7 1090bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 1091bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1092bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm5 1093bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 1094bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1095bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm6 1096bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 1097bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm7 1098bc3d5698SJohn Baldwin pshufd $16,-80(%edx),%xmm5 1099bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 1100bc3d5698SJohn Baldwin pshufd $16,-16(%edx),%xmm6 1101bc3d5698SJohn Baldwin pmuludq (%eax),%xmm5 1102bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 1103bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1104bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm6 1105bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 1106bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1107bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm7 1108bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 1109bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1110bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm5 1111bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 1112bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm6 1113bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 1114bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 1115bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 1116bc3d5698SJohn Baldwin jz .L017short_tail 1117bc3d5698SJohn Baldwin movdqu -32(%esi),%xmm5 1118bc3d5698SJohn Baldwin movdqu -16(%esi),%xmm6 1119bc3d5698SJohn Baldwin leal 32(%esi),%esi 1120bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 1121bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 1122bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 1123bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 1124bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 1125bc3d5698SJohn Baldwin psrldq $6,%xmm2 1126bc3d5698SJohn Baldwin psrldq $6,%xmm3 1127bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 1128bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 1129bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 1130bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 1131bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 1132bc3d5698SJohn Baldwin psrlq $4,%xmm2 1133bc3d5698SJohn Baldwin psrlq $30,%xmm3 1134bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1135bc3d5698SJohn Baldwin psrlq $40,%xmm4 1136bc3d5698SJohn Baldwin psrlq $26,%xmm6 1137bc3d5698SJohn Baldwin pand %xmm7,%xmm5 1138bc3d5698SJohn Baldwin pand %xmm7,%xmm6 1139bc3d5698SJohn Baldwin pand %xmm7,%xmm2 1140bc3d5698SJohn Baldwin pand %xmm7,%xmm3 1141bc3d5698SJohn Baldwin por (%ebx),%xmm4 1142bc3d5698SJohn Baldwin pshufd $16,(%edx),%xmm7 1143bc3d5698SJohn Baldwin paddd 80(%esp),%xmm5 1144bc3d5698SJohn Baldwin paddd 96(%esp),%xmm6 1145bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 1146bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 1147bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 1148bc3d5698SJohn Baldwin movdqa %xmm5,(%esp) 1149bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 1150bc3d5698SJohn Baldwin movdqa %xmm6,16(%esp) 1151bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 1152bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 1153bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 1154bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 1155bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 1156bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 1157bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 1158bc3d5698SJohn Baldwin paddq 32(%esp),%xmm2 1159bc3d5698SJohn Baldwin movdqa %xmm5,32(%esp) 1160bc3d5698SJohn Baldwin pshufd $16,16(%edx),%xmm5 1161bc3d5698SJohn Baldwin paddq 48(%esp),%xmm3 1162bc3d5698SJohn Baldwin movdqa %xmm6,48(%esp) 1163bc3d5698SJohn Baldwin movdqa %xmm4,%xmm6 1164bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 1165bc3d5698SJohn Baldwin paddq 64(%esp),%xmm4 1166bc3d5698SJohn Baldwin movdqa %xmm6,64(%esp) 1167bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1168bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 1169bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1170bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm6 1171bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 1172bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1173bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 1174bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 1175bc3d5698SJohn Baldwin pshufd $16,80(%edx),%xmm6 1176bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 1177bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 1178bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 1179bc3d5698SJohn Baldwin pshufd $16,32(%edx),%xmm7 1180bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 1181bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1182bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 1183bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 1184bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1185bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm5 1186bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 1187bc3d5698SJohn Baldwin pshufd $16,96(%edx),%xmm7 1188bc3d5698SJohn Baldwin pmuludq (%esp),%xmm6 1189bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 1190bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1191bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm7 1192bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 1193bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 1194bc3d5698SJohn Baldwin pshufd $16,48(%edx),%xmm6 1195bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 1196bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1197bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm6 1198bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 1199bc3d5698SJohn Baldwin pshufd $16,112(%edx),%xmm5 1200bc3d5698SJohn Baldwin pmuludq (%esp),%xmm7 1201bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 1202bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1203bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm5 1204bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 1205bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1206bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 1207bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 1208bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 1209bc3d5698SJohn Baldwin pshufd $16,64(%edx),%xmm5 1210bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 1211bc3d5698SJohn Baldwin pshufd $16,128(%edx),%xmm6 1212bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 1213bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 1214bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 1215bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 1216bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 1217bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 1218bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 1219bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 1220bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 1221bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm5 1222bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 1223bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 1224bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 1225bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 1226bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 1227bc3d5698SJohn Baldwin.L017short_tail: 1228bc3d5698SJohn Baldwin pshufd $78,%xmm4,%xmm6 1229bc3d5698SJohn Baldwin pshufd $78,%xmm3,%xmm5 1230bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 1231bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 1232bc3d5698SJohn Baldwin pshufd $78,%xmm0,%xmm6 1233bc3d5698SJohn Baldwin pshufd $78,%xmm1,%xmm5 1234bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 1235bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 1236bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm6 1237bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 1238bc3d5698SJohn Baldwin pand %xmm7,%xmm3 1239bc3d5698SJohn Baldwin psrlq $26,%xmm5 1240bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 1241bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 1242bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 1243bc3d5698SJohn Baldwin pand %xmm7,%xmm0 1244bc3d5698SJohn Baldwin psrlq $26,%xmm6 1245bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 1246bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 1247bc3d5698SJohn Baldwin psrlq $26,%xmm5 1248bc3d5698SJohn Baldwin pand %xmm7,%xmm4 1249bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 1250bc3d5698SJohn Baldwin psrlq $26,%xmm6 1251bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 1252bc3d5698SJohn Baldwin psllq $2,%xmm5 1253bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 1254bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 1255bc3d5698SJohn Baldwin pand %xmm7,%xmm1 1256bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 1257bc3d5698SJohn Baldwin psrlq $26,%xmm6 1258bc3d5698SJohn Baldwin pand %xmm7,%xmm2 1259bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 1260bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 1261bc3d5698SJohn Baldwin psrlq $26,%xmm5 1262bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 1263bc3d5698SJohn Baldwin psrlq $26,%xmm6 1264bc3d5698SJohn Baldwin pand %xmm7,%xmm0 1265bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 1266bc3d5698SJohn Baldwin pand %xmm7,%xmm3 1267bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 1268bc3d5698SJohn Baldwin.L013done: 1269bc3d5698SJohn Baldwin movd %xmm0,-48(%edi) 1270bc3d5698SJohn Baldwin movd %xmm1,-44(%edi) 1271bc3d5698SJohn Baldwin movd %xmm2,-40(%edi) 1272bc3d5698SJohn Baldwin movd %xmm3,-36(%edi) 1273bc3d5698SJohn Baldwin movd %xmm4,-32(%edi) 1274bc3d5698SJohn Baldwin movl %ebp,%esp 1275bc3d5698SJohn Baldwin.L007nodata: 1276bc3d5698SJohn Baldwin popl %edi 1277bc3d5698SJohn Baldwin popl %esi 1278bc3d5698SJohn Baldwin popl %ebx 1279bc3d5698SJohn Baldwin popl %ebp 1280bc3d5698SJohn Baldwin ret 1281bc3d5698SJohn Baldwin.size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2 1282bc3d5698SJohn Baldwin.align 32 1283bc3d5698SJohn Baldwin.type _poly1305_emit_sse2,@function 1284bc3d5698SJohn Baldwin.align 16 1285bc3d5698SJohn Baldwin_poly1305_emit_sse2: 1286*c0855eaaSJohn Baldwin #ifdef __CET__ 1287*c0855eaaSJohn Baldwin 1288*c0855eaaSJohn Baldwin.byte 243,15,30,251 1289*c0855eaaSJohn Baldwin #endif 1290*c0855eaaSJohn Baldwin 1291bc3d5698SJohn Baldwin pushl %ebp 1292bc3d5698SJohn Baldwin pushl %ebx 1293bc3d5698SJohn Baldwin pushl %esi 1294bc3d5698SJohn Baldwin pushl %edi 1295bc3d5698SJohn Baldwin movl 20(%esp),%ebp 1296bc3d5698SJohn Baldwin cmpl $0,20(%ebp) 1297bc3d5698SJohn Baldwin je .Lenter_emit 1298bc3d5698SJohn Baldwin movl (%ebp),%eax 1299bc3d5698SJohn Baldwin movl 4(%ebp),%edi 1300bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 1301bc3d5698SJohn Baldwin movl 12(%ebp),%edx 1302bc3d5698SJohn Baldwin movl 16(%ebp),%esi 1303bc3d5698SJohn Baldwin movl %edi,%ebx 1304bc3d5698SJohn Baldwin shll $26,%edi 1305bc3d5698SJohn Baldwin shrl $6,%ebx 1306bc3d5698SJohn Baldwin addl %edi,%eax 1307bc3d5698SJohn Baldwin movl %ecx,%edi 1308bc3d5698SJohn Baldwin adcl $0,%ebx 1309bc3d5698SJohn Baldwin shll $20,%edi 1310bc3d5698SJohn Baldwin shrl $12,%ecx 1311bc3d5698SJohn Baldwin addl %edi,%ebx 1312bc3d5698SJohn Baldwin movl %edx,%edi 1313bc3d5698SJohn Baldwin adcl $0,%ecx 1314bc3d5698SJohn Baldwin shll $14,%edi 1315bc3d5698SJohn Baldwin shrl $18,%edx 1316bc3d5698SJohn Baldwin addl %edi,%ecx 1317bc3d5698SJohn Baldwin movl %esi,%edi 1318bc3d5698SJohn Baldwin adcl $0,%edx 1319bc3d5698SJohn Baldwin shll $8,%edi 1320bc3d5698SJohn Baldwin shrl $24,%esi 1321bc3d5698SJohn Baldwin addl %edi,%edx 1322bc3d5698SJohn Baldwin adcl $0,%esi 1323bc3d5698SJohn Baldwin movl %esi,%edi 1324bc3d5698SJohn Baldwin andl $3,%esi 1325bc3d5698SJohn Baldwin shrl $2,%edi 1326bc3d5698SJohn Baldwin leal (%edi,%edi,4),%ebp 1327bc3d5698SJohn Baldwin movl 24(%esp),%edi 1328bc3d5698SJohn Baldwin addl %ebp,%eax 1329bc3d5698SJohn Baldwin movl 28(%esp),%ebp 1330bc3d5698SJohn Baldwin adcl $0,%ebx 1331bc3d5698SJohn Baldwin adcl $0,%ecx 1332bc3d5698SJohn Baldwin adcl $0,%edx 1333bc3d5698SJohn Baldwin adcl $0,%esi 1334bc3d5698SJohn Baldwin movd %eax,%xmm0 1335bc3d5698SJohn Baldwin addl $5,%eax 1336bc3d5698SJohn Baldwin movd %ebx,%xmm1 1337bc3d5698SJohn Baldwin adcl $0,%ebx 1338bc3d5698SJohn Baldwin movd %ecx,%xmm2 1339bc3d5698SJohn Baldwin adcl $0,%ecx 1340bc3d5698SJohn Baldwin movd %edx,%xmm3 1341bc3d5698SJohn Baldwin adcl $0,%edx 1342bc3d5698SJohn Baldwin adcl $0,%esi 1343bc3d5698SJohn Baldwin shrl $2,%esi 1344bc3d5698SJohn Baldwin negl %esi 1345bc3d5698SJohn Baldwin andl %esi,%eax 1346bc3d5698SJohn Baldwin andl %esi,%ebx 1347bc3d5698SJohn Baldwin andl %esi,%ecx 1348bc3d5698SJohn Baldwin andl %esi,%edx 1349bc3d5698SJohn Baldwin movl %eax,(%edi) 1350bc3d5698SJohn Baldwin movd %xmm0,%eax 1351bc3d5698SJohn Baldwin movl %ebx,4(%edi) 1352bc3d5698SJohn Baldwin movd %xmm1,%ebx 1353bc3d5698SJohn Baldwin movl %ecx,8(%edi) 1354bc3d5698SJohn Baldwin movd %xmm2,%ecx 1355bc3d5698SJohn Baldwin movl %edx,12(%edi) 1356bc3d5698SJohn Baldwin movd %xmm3,%edx 1357bc3d5698SJohn Baldwin notl %esi 1358bc3d5698SJohn Baldwin andl %esi,%eax 1359bc3d5698SJohn Baldwin andl %esi,%ebx 1360bc3d5698SJohn Baldwin orl (%edi),%eax 1361bc3d5698SJohn Baldwin andl %esi,%ecx 1362bc3d5698SJohn Baldwin orl 4(%edi),%ebx 1363bc3d5698SJohn Baldwin andl %esi,%edx 1364bc3d5698SJohn Baldwin orl 8(%edi),%ecx 1365bc3d5698SJohn Baldwin orl 12(%edi),%edx 1366bc3d5698SJohn Baldwin addl (%ebp),%eax 1367bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 1368bc3d5698SJohn Baldwin movl %eax,(%edi) 1369bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 1370bc3d5698SJohn Baldwin movl %ebx,4(%edi) 1371bc3d5698SJohn Baldwin adcl 12(%ebp),%edx 1372bc3d5698SJohn Baldwin movl %ecx,8(%edi) 1373bc3d5698SJohn Baldwin movl %edx,12(%edi) 1374bc3d5698SJohn Baldwin popl %edi 1375bc3d5698SJohn Baldwin popl %esi 1376bc3d5698SJohn Baldwin popl %ebx 1377bc3d5698SJohn Baldwin popl %ebp 1378bc3d5698SJohn Baldwin ret 1379bc3d5698SJohn Baldwin.size _poly1305_emit_sse2,.-_poly1305_emit_sse2 1380bc3d5698SJohn Baldwin.align 32 1381bc3d5698SJohn Baldwin.type _poly1305_init_avx2,@function 1382bc3d5698SJohn Baldwin.align 16 1383bc3d5698SJohn Baldwin_poly1305_init_avx2: 1384*c0855eaaSJohn Baldwin #ifdef __CET__ 1385*c0855eaaSJohn Baldwin 1386*c0855eaaSJohn Baldwin.byte 243,15,30,251 1387*c0855eaaSJohn Baldwin #endif 1388*c0855eaaSJohn Baldwin 1389bc3d5698SJohn Baldwin vmovdqu 24(%edi),%xmm4 1390bc3d5698SJohn Baldwin leal 48(%edi),%edi 1391bc3d5698SJohn Baldwin movl %esp,%ebp 1392bc3d5698SJohn Baldwin subl $224,%esp 1393bc3d5698SJohn Baldwin andl $-16,%esp 1394bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 1395bc3d5698SJohn Baldwin vpand %xmm7,%xmm4,%xmm0 1396bc3d5698SJohn Baldwin vpsrlq $26,%xmm4,%xmm1 1397bc3d5698SJohn Baldwin vpsrldq $6,%xmm4,%xmm3 1398bc3d5698SJohn Baldwin vpand %xmm7,%xmm1,%xmm1 1399bc3d5698SJohn Baldwin vpsrlq $4,%xmm3,%xmm2 1400bc3d5698SJohn Baldwin vpsrlq $30,%xmm3,%xmm3 1401bc3d5698SJohn Baldwin vpand %xmm7,%xmm2,%xmm2 1402bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 1403bc3d5698SJohn Baldwin vpsrldq $13,%xmm4,%xmm4 1404bc3d5698SJohn Baldwin leal 144(%esp),%edx 1405bc3d5698SJohn Baldwin movl $2,%ecx 1406bc3d5698SJohn Baldwin.L018square: 1407bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 1408bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 1409bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 1410bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 1411bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%esp) 1412bc3d5698SJohn Baldwin vpslld $2,%xmm1,%xmm6 1413bc3d5698SJohn Baldwin vpslld $2,%xmm2,%xmm5 1414bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm6,%xmm6 1415bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm5,%xmm5 1416bc3d5698SJohn Baldwin vmovdqa %xmm6,80(%esp) 1417bc3d5698SJohn Baldwin vmovdqa %xmm5,96(%esp) 1418bc3d5698SJohn Baldwin vpslld $2,%xmm3,%xmm6 1419bc3d5698SJohn Baldwin vpslld $2,%xmm4,%xmm5 1420bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm6,%xmm6 1421bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm5,%xmm5 1422bc3d5698SJohn Baldwin vmovdqa %xmm6,112(%esp) 1423bc3d5698SJohn Baldwin vmovdqa %xmm5,128(%esp) 1424bc3d5698SJohn Baldwin vpshufd $68,%xmm0,%xmm5 1425bc3d5698SJohn Baldwin vmovdqa %xmm1,%xmm6 1426bc3d5698SJohn Baldwin vpshufd $68,%xmm1,%xmm1 1427bc3d5698SJohn Baldwin vpshufd $68,%xmm2,%xmm2 1428bc3d5698SJohn Baldwin vpshufd $68,%xmm3,%xmm3 1429bc3d5698SJohn Baldwin vpshufd $68,%xmm4,%xmm4 1430bc3d5698SJohn Baldwin vmovdqa %xmm5,(%edx) 1431bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%edx) 1432bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%edx) 1433bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%edx) 1434bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%edx) 1435bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm4,%xmm4 1436bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm3,%xmm3 1437bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm2,%xmm2 1438bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm1,%xmm1 1439bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm5,%xmm0 1440bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm5 1441bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm4,%xmm4 1442bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm6,%xmm7 1443bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm3,%xmm3 1444bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm6,%xmm5 1445bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 1446bc3d5698SJohn Baldwin vmovdqa 80(%esp),%xmm7 1447bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm6,%xmm6 1448bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 1449bc3d5698SJohn Baldwin vmovdqa 32(%esp),%xmm5 1450bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm7,%xmm7 1451bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm0,%xmm0 1452bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm5,%xmm6 1453bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm4,%xmm4 1454bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 1455bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm3,%xmm3 1456bc3d5698SJohn Baldwin vmovdqa 96(%esp),%xmm6 1457bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm5,%xmm5 1458bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 1459bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm6,%xmm7 1460bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm1,%xmm1 1461bc3d5698SJohn Baldwin vmovdqa 48(%esp),%xmm5 1462bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm6 1463bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm0,%xmm0 1464bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 1465bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm4,%xmm4 1466bc3d5698SJohn Baldwin vmovdqa 112(%esp),%xmm6 1467bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm5,%xmm5 1468bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm3,%xmm3 1469bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm6,%xmm7 1470bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm2,%xmm2 1471bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm5 1472bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm1,%xmm1 1473bc3d5698SJohn Baldwin vmovdqa 64(%esp),%xmm7 1474bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm6,%xmm6 1475bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm0,%xmm0 1476bc3d5698SJohn Baldwin vmovdqa 128(%esp),%xmm5 1477bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm7,%xmm7 1478bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm4,%xmm4 1479bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm5,%xmm6 1480bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm3,%xmm3 1481bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 1482bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm0,%xmm0 1483bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm5,%xmm6 1484bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 1485bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 1486bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm5,%xmm5 1487bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 1488bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm5 1489bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 1490bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm6 1491bc3d5698SJohn Baldwin vpand %xmm7,%xmm0,%xmm0 1492bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm4,%xmm4 1493bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 1494bc3d5698SJohn Baldwin vpsrlq $26,%xmm4,%xmm5 1495bc3d5698SJohn Baldwin vpand %xmm7,%xmm4,%xmm4 1496bc3d5698SJohn Baldwin vpsrlq $26,%xmm1,%xmm6 1497bc3d5698SJohn Baldwin vpand %xmm7,%xmm1,%xmm1 1498bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm2,%xmm2 1499bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm0,%xmm0 1500bc3d5698SJohn Baldwin vpsllq $2,%xmm5,%xmm5 1501bc3d5698SJohn Baldwin vpsrlq $26,%xmm2,%xmm6 1502bc3d5698SJohn Baldwin vpand %xmm7,%xmm2,%xmm2 1503bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm0,%xmm0 1504bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm3,%xmm3 1505bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm6 1506bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm5 1507bc3d5698SJohn Baldwin vpand %xmm7,%xmm0,%xmm0 1508bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 1509bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm1,%xmm1 1510bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1511bc3d5698SJohn Baldwin decl %ecx 1512bc3d5698SJohn Baldwin jz .L019square_break 1513bc3d5698SJohn Baldwin vpunpcklqdq (%esp),%xmm0,%xmm0 1514bc3d5698SJohn Baldwin vpunpcklqdq 16(%esp),%xmm1,%xmm1 1515bc3d5698SJohn Baldwin vpunpcklqdq 32(%esp),%xmm2,%xmm2 1516bc3d5698SJohn Baldwin vpunpcklqdq 48(%esp),%xmm3,%xmm3 1517bc3d5698SJohn Baldwin vpunpcklqdq 64(%esp),%xmm4,%xmm4 1518bc3d5698SJohn Baldwin jmp .L018square 1519bc3d5698SJohn Baldwin.L019square_break: 1520bc3d5698SJohn Baldwin vpsllq $32,%xmm0,%xmm0 1521bc3d5698SJohn Baldwin vpsllq $32,%xmm1,%xmm1 1522bc3d5698SJohn Baldwin vpsllq $32,%xmm2,%xmm2 1523bc3d5698SJohn Baldwin vpsllq $32,%xmm3,%xmm3 1524bc3d5698SJohn Baldwin vpsllq $32,%xmm4,%xmm4 1525bc3d5698SJohn Baldwin vpor (%esp),%xmm0,%xmm0 1526bc3d5698SJohn Baldwin vpor 16(%esp),%xmm1,%xmm1 1527bc3d5698SJohn Baldwin vpor 32(%esp),%xmm2,%xmm2 1528bc3d5698SJohn Baldwin vpor 48(%esp),%xmm3,%xmm3 1529bc3d5698SJohn Baldwin vpor 64(%esp),%xmm4,%xmm4 1530bc3d5698SJohn Baldwin vpshufd $141,%xmm0,%xmm0 1531bc3d5698SJohn Baldwin vpshufd $141,%xmm1,%xmm1 1532bc3d5698SJohn Baldwin vpshufd $141,%xmm2,%xmm2 1533bc3d5698SJohn Baldwin vpshufd $141,%xmm3,%xmm3 1534bc3d5698SJohn Baldwin vpshufd $141,%xmm4,%xmm4 1535bc3d5698SJohn Baldwin vmovdqu %xmm0,(%edi) 1536bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%edi) 1537bc3d5698SJohn Baldwin vmovdqu %xmm2,32(%edi) 1538bc3d5698SJohn Baldwin vmovdqu %xmm3,48(%edi) 1539bc3d5698SJohn Baldwin vmovdqu %xmm4,64(%edi) 1540bc3d5698SJohn Baldwin vpslld $2,%xmm1,%xmm6 1541bc3d5698SJohn Baldwin vpslld $2,%xmm2,%xmm5 1542bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm6,%xmm6 1543bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm5,%xmm5 1544bc3d5698SJohn Baldwin vmovdqu %xmm6,80(%edi) 1545bc3d5698SJohn Baldwin vmovdqu %xmm5,96(%edi) 1546bc3d5698SJohn Baldwin vpslld $2,%xmm3,%xmm6 1547bc3d5698SJohn Baldwin vpslld $2,%xmm4,%xmm5 1548bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm6,%xmm6 1549bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm5,%xmm5 1550bc3d5698SJohn Baldwin vmovdqu %xmm6,112(%edi) 1551bc3d5698SJohn Baldwin vmovdqu %xmm5,128(%edi) 1552bc3d5698SJohn Baldwin movl %ebp,%esp 1553bc3d5698SJohn Baldwin leal -48(%edi),%edi 1554bc3d5698SJohn Baldwin ret 1555bc3d5698SJohn Baldwin.size _poly1305_init_avx2,.-_poly1305_init_avx2 1556bc3d5698SJohn Baldwin.align 32 1557bc3d5698SJohn Baldwin.type _poly1305_blocks_avx2,@function 1558bc3d5698SJohn Baldwin.align 16 1559bc3d5698SJohn Baldwin_poly1305_blocks_avx2: 1560*c0855eaaSJohn Baldwin #ifdef __CET__ 1561*c0855eaaSJohn Baldwin 1562*c0855eaaSJohn Baldwin.byte 243,15,30,251 1563*c0855eaaSJohn Baldwin #endif 1564*c0855eaaSJohn Baldwin 1565bc3d5698SJohn Baldwin pushl %ebp 1566bc3d5698SJohn Baldwin pushl %ebx 1567bc3d5698SJohn Baldwin pushl %esi 1568bc3d5698SJohn Baldwin pushl %edi 1569bc3d5698SJohn Baldwin movl 20(%esp),%edi 1570bc3d5698SJohn Baldwin movl 24(%esp),%esi 1571bc3d5698SJohn Baldwin movl 28(%esp),%ecx 1572bc3d5698SJohn Baldwin movl 20(%edi),%eax 1573bc3d5698SJohn Baldwin andl $-16,%ecx 1574bc3d5698SJohn Baldwin jz .L020nodata 1575bc3d5698SJohn Baldwin cmpl $64,%ecx 1576bc3d5698SJohn Baldwin jae .L021enter_avx2 1577bc3d5698SJohn Baldwin testl %eax,%eax 1578bc3d5698SJohn Baldwin jz .Lenter_blocks 1579bc3d5698SJohn Baldwin.L021enter_avx2: 1580bc3d5698SJohn Baldwin vzeroupper 1581bc3d5698SJohn Baldwin call .L022pic_point 1582bc3d5698SJohn Baldwin.L022pic_point: 1583bc3d5698SJohn Baldwin popl %ebx 1584bc3d5698SJohn Baldwin leal .Lconst_sse2-.L022pic_point(%ebx),%ebx 1585bc3d5698SJohn Baldwin testl %eax,%eax 1586bc3d5698SJohn Baldwin jnz .L023base2_26 1587bc3d5698SJohn Baldwin call _poly1305_init_avx2 1588bc3d5698SJohn Baldwin movl (%edi),%eax 1589bc3d5698SJohn Baldwin movl 3(%edi),%ecx 1590bc3d5698SJohn Baldwin movl 6(%edi),%edx 1591bc3d5698SJohn Baldwin movl 9(%edi),%esi 1592bc3d5698SJohn Baldwin movl 13(%edi),%ebp 1593bc3d5698SJohn Baldwin shrl $2,%ecx 1594bc3d5698SJohn Baldwin andl $67108863,%eax 1595bc3d5698SJohn Baldwin shrl $4,%edx 1596bc3d5698SJohn Baldwin andl $67108863,%ecx 1597bc3d5698SJohn Baldwin shrl $6,%esi 1598bc3d5698SJohn Baldwin andl $67108863,%edx 1599bc3d5698SJohn Baldwin movl %eax,(%edi) 1600bc3d5698SJohn Baldwin movl %ecx,4(%edi) 1601bc3d5698SJohn Baldwin movl %edx,8(%edi) 1602bc3d5698SJohn Baldwin movl %esi,12(%edi) 1603bc3d5698SJohn Baldwin movl %ebp,16(%edi) 1604bc3d5698SJohn Baldwin movl $1,20(%edi) 1605bc3d5698SJohn Baldwin movl 24(%esp),%esi 1606bc3d5698SJohn Baldwin movl 28(%esp),%ecx 1607bc3d5698SJohn Baldwin.L023base2_26: 1608bc3d5698SJohn Baldwin movl 32(%esp),%eax 1609bc3d5698SJohn Baldwin movl %esp,%ebp 1610bc3d5698SJohn Baldwin subl $448,%esp 1611bc3d5698SJohn Baldwin andl $-512,%esp 1612bc3d5698SJohn Baldwin vmovdqu 48(%edi),%xmm0 1613bc3d5698SJohn Baldwin leal 288(%esp),%edx 1614bc3d5698SJohn Baldwin vmovdqu 64(%edi),%xmm1 1615bc3d5698SJohn Baldwin vmovdqu 80(%edi),%xmm2 1616bc3d5698SJohn Baldwin vmovdqu 96(%edi),%xmm3 1617bc3d5698SJohn Baldwin vmovdqu 112(%edi),%xmm4 1618bc3d5698SJohn Baldwin leal 48(%edi),%edi 1619bc3d5698SJohn Baldwin vpermq $64,%ymm0,%ymm0 1620bc3d5698SJohn Baldwin vpermq $64,%ymm1,%ymm1 1621bc3d5698SJohn Baldwin vpermq $64,%ymm2,%ymm2 1622bc3d5698SJohn Baldwin vpermq $64,%ymm3,%ymm3 1623bc3d5698SJohn Baldwin vpermq $64,%ymm4,%ymm4 1624bc3d5698SJohn Baldwin vpshufd $200,%ymm0,%ymm0 1625bc3d5698SJohn Baldwin vpshufd $200,%ymm1,%ymm1 1626bc3d5698SJohn Baldwin vpshufd $200,%ymm2,%ymm2 1627bc3d5698SJohn Baldwin vpshufd $200,%ymm3,%ymm3 1628bc3d5698SJohn Baldwin vpshufd $200,%ymm4,%ymm4 1629bc3d5698SJohn Baldwin vmovdqa %ymm0,-128(%edx) 1630bc3d5698SJohn Baldwin vmovdqu 80(%edi),%xmm0 1631bc3d5698SJohn Baldwin vmovdqa %ymm1,-96(%edx) 1632bc3d5698SJohn Baldwin vmovdqu 96(%edi),%xmm1 1633bc3d5698SJohn Baldwin vmovdqa %ymm2,-64(%edx) 1634bc3d5698SJohn Baldwin vmovdqu 112(%edi),%xmm2 1635bc3d5698SJohn Baldwin vmovdqa %ymm3,-32(%edx) 1636bc3d5698SJohn Baldwin vmovdqu 128(%edi),%xmm3 1637bc3d5698SJohn Baldwin vmovdqa %ymm4,(%edx) 1638bc3d5698SJohn Baldwin vpermq $64,%ymm0,%ymm0 1639bc3d5698SJohn Baldwin vpermq $64,%ymm1,%ymm1 1640bc3d5698SJohn Baldwin vpermq $64,%ymm2,%ymm2 1641bc3d5698SJohn Baldwin vpermq $64,%ymm3,%ymm3 1642bc3d5698SJohn Baldwin vpshufd $200,%ymm0,%ymm0 1643bc3d5698SJohn Baldwin vpshufd $200,%ymm1,%ymm1 1644bc3d5698SJohn Baldwin vpshufd $200,%ymm2,%ymm2 1645bc3d5698SJohn Baldwin vpshufd $200,%ymm3,%ymm3 1646bc3d5698SJohn Baldwin vmovdqa %ymm0,32(%edx) 1647bc3d5698SJohn Baldwin vmovd -48(%edi),%xmm0 1648bc3d5698SJohn Baldwin vmovdqa %ymm1,64(%edx) 1649bc3d5698SJohn Baldwin vmovd -44(%edi),%xmm1 1650bc3d5698SJohn Baldwin vmovdqa %ymm2,96(%edx) 1651bc3d5698SJohn Baldwin vmovd -40(%edi),%xmm2 1652bc3d5698SJohn Baldwin vmovdqa %ymm3,128(%edx) 1653bc3d5698SJohn Baldwin vmovd -36(%edi),%xmm3 1654bc3d5698SJohn Baldwin vmovd -32(%edi),%xmm4 1655bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 1656bc3d5698SJohn Baldwin negl %eax 1657bc3d5698SJohn Baldwin testl $63,%ecx 1658bc3d5698SJohn Baldwin jz .L024even 1659bc3d5698SJohn Baldwin movl %ecx,%edx 1660bc3d5698SJohn Baldwin andl $-64,%ecx 1661bc3d5698SJohn Baldwin andl $63,%edx 1662bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 1663bc3d5698SJohn Baldwin cmpl $32,%edx 1664bc3d5698SJohn Baldwin jb .L025one 1665bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 1666bc3d5698SJohn Baldwin je .L026two 1667bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 1668bc3d5698SJohn Baldwin leal 48(%esi),%esi 1669bc3d5698SJohn Baldwin leal 8(%ebx),%ebx 1670bc3d5698SJohn Baldwin leal 296(%esp),%edx 1671bc3d5698SJohn Baldwin jmp .L027tail 1672bc3d5698SJohn Baldwin.L026two: 1673bc3d5698SJohn Baldwin leal 32(%esi),%esi 1674bc3d5698SJohn Baldwin leal 16(%ebx),%ebx 1675bc3d5698SJohn Baldwin leal 304(%esp),%edx 1676bc3d5698SJohn Baldwin jmp .L027tail 1677bc3d5698SJohn Baldwin.L025one: 1678bc3d5698SJohn Baldwin leal 16(%esi),%esi 1679bc3d5698SJohn Baldwin vpxor %ymm6,%ymm6,%ymm6 1680bc3d5698SJohn Baldwin leal 32(%ebx,%eax,8),%ebx 1681bc3d5698SJohn Baldwin leal 312(%esp),%edx 1682bc3d5698SJohn Baldwin jmp .L027tail 1683bc3d5698SJohn Baldwin.align 32 1684bc3d5698SJohn Baldwin.L024even: 1685bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 1686bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 1687bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 1688bc3d5698SJohn Baldwin vinserti128 $1,48(%esi),%ymm6,%ymm6 1689bc3d5698SJohn Baldwin leal 64(%esi),%esi 1690bc3d5698SJohn Baldwin subl $64,%ecx 1691bc3d5698SJohn Baldwin jz .L027tail 1692bc3d5698SJohn Baldwin.L028loop: 1693bc3d5698SJohn Baldwin vmovdqa %ymm2,64(%esp) 1694bc3d5698SJohn Baldwin vpsrldq $6,%ymm5,%ymm2 1695bc3d5698SJohn Baldwin vmovdqa %ymm0,(%esp) 1696bc3d5698SJohn Baldwin vpsrldq $6,%ymm6,%ymm0 1697bc3d5698SJohn Baldwin vmovdqa %ymm1,32(%esp) 1698bc3d5698SJohn Baldwin vpunpckhqdq %ymm6,%ymm5,%ymm1 1699bc3d5698SJohn Baldwin vpunpcklqdq %ymm6,%ymm5,%ymm5 1700bc3d5698SJohn Baldwin vpunpcklqdq %ymm0,%ymm2,%ymm2 1701bc3d5698SJohn Baldwin vpsrlq $30,%ymm2,%ymm0 1702bc3d5698SJohn Baldwin vpsrlq $4,%ymm2,%ymm2 1703bc3d5698SJohn Baldwin vpsrlq $26,%ymm5,%ymm6 1704bc3d5698SJohn Baldwin vpsrlq $40,%ymm1,%ymm1 1705bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 1706bc3d5698SJohn Baldwin vpand %ymm7,%ymm5,%ymm5 1707bc3d5698SJohn Baldwin vpand %ymm7,%ymm6,%ymm6 1708bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1709bc3d5698SJohn Baldwin vpor (%ebx),%ymm1,%ymm1 1710bc3d5698SJohn Baldwin vpaddq 64(%esp),%ymm2,%ymm2 1711bc3d5698SJohn Baldwin vpaddq (%esp),%ymm5,%ymm5 1712bc3d5698SJohn Baldwin vpaddq 32(%esp),%ymm6,%ymm6 1713bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm0,%ymm0 1714bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm1,%ymm1 1715bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm2,%ymm3 1716bc3d5698SJohn Baldwin vmovdqa %ymm6,32(%esp) 1717bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm2,%ymm4 1718bc3d5698SJohn Baldwin vmovdqa %ymm0,96(%esp) 1719bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm2,%ymm0 1720bc3d5698SJohn Baldwin vmovdqa %ymm1,128(%esp) 1721bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm2,%ymm1 1722bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm2,%ymm2 1723bc3d5698SJohn Baldwin vpmuludq -32(%edx),%ymm5,%ymm7 1724bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 1725bc3d5698SJohn Baldwin vpmuludq (%edx),%ymm5,%ymm6 1726bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1727bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm5,%ymm7 1728bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 1729bc3d5698SJohn Baldwin vmovdqa 32(%esp),%ymm7 1730bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm5,%ymm6 1731bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1732bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm5,%ymm5 1733bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 1734bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm7,%ymm6 1735bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 1736bc3d5698SJohn Baldwin vpmuludq -32(%edx),%ymm7,%ymm5 1737bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 1738bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm7,%ymm6 1739bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 1740bc3d5698SJohn Baldwin vmovdqa 96(%esp),%ymm6 1741bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm7,%ymm5 1742bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 1743bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm7,%ymm7 1744bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm2,%ymm2 1745bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm6,%ymm5 1746bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 1747bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm6,%ymm7 1748bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 1749bc3d5698SJohn Baldwin vpmuludq 64(%edx),%ymm6,%ymm5 1750bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1751bc3d5698SJohn Baldwin vmovdqa 128(%esp),%ymm5 1752bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm6,%ymm7 1753bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm1,%ymm1 1754bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm6,%ymm6 1755bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 1756bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm5,%ymm7 1757bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 1758bc3d5698SJohn Baldwin vpmuludq 32(%edx),%ymm5,%ymm6 1759bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 1760bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm5,%ymm7 1761bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 1762bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 1763bc3d5698SJohn Baldwin vpmuludq 64(%edx),%ymm5,%ymm6 1764bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1765bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm5,%ymm5 1766bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 1767bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm5 1768bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 1769bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm6 1770bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1771bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 1772bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1773bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm5 1774bc3d5698SJohn Baldwin vpand %ymm7,%ymm4,%ymm4 1775bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm6 1776bc3d5698SJohn Baldwin vpand %ymm7,%ymm1,%ymm1 1777bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 1778bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1779bc3d5698SJohn Baldwin vpsllq $2,%ymm5,%ymm5 1780bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm6 1781bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 1782bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1783bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 1784bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm6 1785bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm5 1786bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1787bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 1788bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 1789bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1790bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 1791bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 1792bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 1793bc3d5698SJohn Baldwin vinserti128 $1,48(%esi),%ymm6,%ymm6 1794bc3d5698SJohn Baldwin leal 64(%esi),%esi 1795bc3d5698SJohn Baldwin subl $64,%ecx 1796bc3d5698SJohn Baldwin jnz .L028loop 1797bc3d5698SJohn Baldwin.L027tail: 1798bc3d5698SJohn Baldwin vmovdqa %ymm2,64(%esp) 1799bc3d5698SJohn Baldwin vpsrldq $6,%ymm5,%ymm2 1800bc3d5698SJohn Baldwin vmovdqa %ymm0,(%esp) 1801bc3d5698SJohn Baldwin vpsrldq $6,%ymm6,%ymm0 1802bc3d5698SJohn Baldwin vmovdqa %ymm1,32(%esp) 1803bc3d5698SJohn Baldwin vpunpckhqdq %ymm6,%ymm5,%ymm1 1804bc3d5698SJohn Baldwin vpunpcklqdq %ymm6,%ymm5,%ymm5 1805bc3d5698SJohn Baldwin vpunpcklqdq %ymm0,%ymm2,%ymm2 1806bc3d5698SJohn Baldwin vpsrlq $30,%ymm2,%ymm0 1807bc3d5698SJohn Baldwin vpsrlq $4,%ymm2,%ymm2 1808bc3d5698SJohn Baldwin vpsrlq $26,%ymm5,%ymm6 1809bc3d5698SJohn Baldwin vpsrlq $40,%ymm1,%ymm1 1810bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 1811bc3d5698SJohn Baldwin vpand %ymm7,%ymm5,%ymm5 1812bc3d5698SJohn Baldwin vpand %ymm7,%ymm6,%ymm6 1813bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1814bc3d5698SJohn Baldwin vpor (%ebx),%ymm1,%ymm1 1815bc3d5698SJohn Baldwin andl $-64,%ebx 1816bc3d5698SJohn Baldwin vpaddq 64(%esp),%ymm2,%ymm2 1817bc3d5698SJohn Baldwin vpaddq (%esp),%ymm5,%ymm5 1818bc3d5698SJohn Baldwin vpaddq 32(%esp),%ymm6,%ymm6 1819bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm0,%ymm0 1820bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm1,%ymm1 1821bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm2,%ymm3 1822bc3d5698SJohn Baldwin vmovdqa %ymm6,32(%esp) 1823bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm2,%ymm4 1824bc3d5698SJohn Baldwin vmovdqa %ymm0,96(%esp) 1825bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm2,%ymm0 1826bc3d5698SJohn Baldwin vmovdqa %ymm1,128(%esp) 1827bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm2,%ymm1 1828bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm2,%ymm2 1829bc3d5698SJohn Baldwin vpmuludq -28(%edx),%ymm5,%ymm7 1830bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 1831bc3d5698SJohn Baldwin vpmuludq 4(%edx),%ymm5,%ymm6 1832bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1833bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm5,%ymm7 1834bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 1835bc3d5698SJohn Baldwin vmovdqa 32(%esp),%ymm7 1836bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm5,%ymm6 1837bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1838bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm5,%ymm5 1839bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 1840bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm7,%ymm6 1841bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 1842bc3d5698SJohn Baldwin vpmuludq -28(%edx),%ymm7,%ymm5 1843bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 1844bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm7,%ymm6 1845bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 1846bc3d5698SJohn Baldwin vmovdqa 96(%esp),%ymm6 1847bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm7,%ymm5 1848bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 1849bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm7,%ymm7 1850bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm2,%ymm2 1851bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm6,%ymm5 1852bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 1853bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm6,%ymm7 1854bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 1855bc3d5698SJohn Baldwin vpmuludq 68(%edx),%ymm6,%ymm5 1856bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1857bc3d5698SJohn Baldwin vmovdqa 128(%esp),%ymm5 1858bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm6,%ymm7 1859bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm1,%ymm1 1860bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm6,%ymm6 1861bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 1862bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm5,%ymm7 1863bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 1864bc3d5698SJohn Baldwin vpmuludq 36(%edx),%ymm5,%ymm6 1865bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 1866bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm5,%ymm7 1867bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 1868bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 1869bc3d5698SJohn Baldwin vpmuludq 68(%edx),%ymm5,%ymm6 1870bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1871bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm5,%ymm5 1872bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 1873bc3d5698SJohn Baldwin vpsrldq $8,%ymm4,%ymm5 1874bc3d5698SJohn Baldwin vpsrldq $8,%ymm3,%ymm6 1875bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 1876bc3d5698SJohn Baldwin vpsrldq $8,%ymm0,%ymm5 1877bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 1878bc3d5698SJohn Baldwin vpsrldq $8,%ymm1,%ymm6 1879bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1880bc3d5698SJohn Baldwin vpsrldq $8,%ymm2,%ymm5 1881bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1882bc3d5698SJohn Baldwin vpermq $2,%ymm4,%ymm6 1883bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 1884bc3d5698SJohn Baldwin vpermq $2,%ymm3,%ymm5 1885bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1886bc3d5698SJohn Baldwin vpermq $2,%ymm0,%ymm6 1887bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 1888bc3d5698SJohn Baldwin vpermq $2,%ymm1,%ymm5 1889bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 1890bc3d5698SJohn Baldwin vpermq $2,%ymm2,%ymm6 1891bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 1892bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 1893bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm5 1894bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 1895bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm6 1896bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1897bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 1898bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 1899bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm5 1900bc3d5698SJohn Baldwin vpand %ymm7,%ymm4,%ymm4 1901bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm6 1902bc3d5698SJohn Baldwin vpand %ymm7,%ymm1,%ymm1 1903bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 1904bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1905bc3d5698SJohn Baldwin vpsllq $2,%ymm5,%ymm5 1906bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm6 1907bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 1908bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 1909bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 1910bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm6 1911bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm5 1912bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 1913bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 1914bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 1915bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 1916bc3d5698SJohn Baldwin cmpl $0,%ecx 1917bc3d5698SJohn Baldwin je .L029done 1918bc3d5698SJohn Baldwin vpshufd $252,%xmm0,%xmm0 1919bc3d5698SJohn Baldwin leal 288(%esp),%edx 1920bc3d5698SJohn Baldwin vpshufd $252,%xmm1,%xmm1 1921bc3d5698SJohn Baldwin vpshufd $252,%xmm2,%xmm2 1922bc3d5698SJohn Baldwin vpshufd $252,%xmm3,%xmm3 1923bc3d5698SJohn Baldwin vpshufd $252,%xmm4,%xmm4 1924bc3d5698SJohn Baldwin jmp .L024even 1925bc3d5698SJohn Baldwin.align 16 1926bc3d5698SJohn Baldwin.L029done: 1927bc3d5698SJohn Baldwin vmovd %xmm0,-48(%edi) 1928bc3d5698SJohn Baldwin vmovd %xmm1,-44(%edi) 1929bc3d5698SJohn Baldwin vmovd %xmm2,-40(%edi) 1930bc3d5698SJohn Baldwin vmovd %xmm3,-36(%edi) 1931bc3d5698SJohn Baldwin vmovd %xmm4,-32(%edi) 1932bc3d5698SJohn Baldwin vzeroupper 1933bc3d5698SJohn Baldwin movl %ebp,%esp 1934bc3d5698SJohn Baldwin.L020nodata: 1935bc3d5698SJohn Baldwin popl %edi 1936bc3d5698SJohn Baldwin popl %esi 1937bc3d5698SJohn Baldwin popl %ebx 1938bc3d5698SJohn Baldwin popl %ebp 1939bc3d5698SJohn Baldwin ret 1940bc3d5698SJohn Baldwin.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2 1941bc3d5698SJohn Baldwin.align 64 1942bc3d5698SJohn Baldwin.Lconst_sse2: 1943bc3d5698SJohn Baldwin.long 16777216,0,16777216,0,16777216,0,16777216,0 1944bc3d5698SJohn Baldwin.long 0,0,0,0,0,0,0,0 1945bc3d5698SJohn Baldwin.long 67108863,0,67108863,0,67108863,0,67108863,0 1946bc3d5698SJohn Baldwin.long 268435455,268435452,268435452,268435452 1947bc3d5698SJohn Baldwin.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 1948bc3d5698SJohn Baldwin.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1949bc3d5698SJohn Baldwin.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1950bc3d5698SJohn Baldwin.byte 114,103,62,0 1951bc3d5698SJohn Baldwin.align 4 1952bc3d5698SJohn Baldwin.comm OPENSSL_ia32cap_P,16,4 1953*c0855eaaSJohn Baldwin 1954*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 1955*c0855eaaSJohn Baldwin .p2align 2 1956*c0855eaaSJohn Baldwin .long 1f - 0f 1957*c0855eaaSJohn Baldwin .long 4f - 1f 1958*c0855eaaSJohn Baldwin .long 5 1959*c0855eaaSJohn Baldwin0: 1960*c0855eaaSJohn Baldwin .asciz "GNU" 1961*c0855eaaSJohn Baldwin1: 1962*c0855eaaSJohn Baldwin .p2align 2 1963*c0855eaaSJohn Baldwin .long 0xc0000002 1964*c0855eaaSJohn Baldwin .long 3f - 2f 1965*c0855eaaSJohn Baldwin2: 1966*c0855eaaSJohn Baldwin .long 3 1967*c0855eaaSJohn Baldwin3: 1968*c0855eaaSJohn Baldwin .p2align 2 1969*c0855eaaSJohn Baldwin4: 1970bc3d5698SJohn Baldwin#else 1971bc3d5698SJohn Baldwin.text 1972bc3d5698SJohn Baldwin.align 64 1973bc3d5698SJohn Baldwin.globl poly1305_init 1974bc3d5698SJohn Baldwin.type poly1305_init,@function 1975bc3d5698SJohn Baldwin.align 16 1976bc3d5698SJohn Baldwinpoly1305_init: 1977bc3d5698SJohn Baldwin.L_poly1305_init_begin: 1978*c0855eaaSJohn Baldwin #ifdef __CET__ 1979*c0855eaaSJohn Baldwin 1980*c0855eaaSJohn Baldwin.byte 243,15,30,251 1981*c0855eaaSJohn Baldwin #endif 1982*c0855eaaSJohn Baldwin 1983bc3d5698SJohn Baldwin pushl %ebp 1984bc3d5698SJohn Baldwin pushl %ebx 1985bc3d5698SJohn Baldwin pushl %esi 1986bc3d5698SJohn Baldwin pushl %edi 1987bc3d5698SJohn Baldwin movl 20(%esp),%edi 1988bc3d5698SJohn Baldwin movl 24(%esp),%esi 1989bc3d5698SJohn Baldwin movl 28(%esp),%ebp 1990bc3d5698SJohn Baldwin xorl %eax,%eax 1991bc3d5698SJohn Baldwin movl %eax,(%edi) 1992bc3d5698SJohn Baldwin movl %eax,4(%edi) 1993bc3d5698SJohn Baldwin movl %eax,8(%edi) 1994bc3d5698SJohn Baldwin movl %eax,12(%edi) 1995bc3d5698SJohn Baldwin movl %eax,16(%edi) 1996bc3d5698SJohn Baldwin movl %eax,20(%edi) 1997bc3d5698SJohn Baldwin cmpl $0,%esi 1998bc3d5698SJohn Baldwin je .L000nokey 1999bc3d5698SJohn Baldwin call .L001pic_point 2000bc3d5698SJohn Baldwin.L001pic_point: 2001bc3d5698SJohn Baldwin popl %ebx 2002bc3d5698SJohn Baldwin leal poly1305_blocks-.L001pic_point(%ebx),%eax 2003bc3d5698SJohn Baldwin leal poly1305_emit-.L001pic_point(%ebx),%edx 2004bc3d5698SJohn Baldwin leal OPENSSL_ia32cap_P,%edi 2005bc3d5698SJohn Baldwin movl (%edi),%ecx 2006bc3d5698SJohn Baldwin andl $83886080,%ecx 2007bc3d5698SJohn Baldwin cmpl $83886080,%ecx 2008bc3d5698SJohn Baldwin jne .L002no_sse2 2009bc3d5698SJohn Baldwin leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax 2010bc3d5698SJohn Baldwin leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx 2011bc3d5698SJohn Baldwin movl 8(%edi),%ecx 2012bc3d5698SJohn Baldwin testl $32,%ecx 2013bc3d5698SJohn Baldwin jz .L002no_sse2 2014bc3d5698SJohn Baldwin leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax 2015bc3d5698SJohn Baldwin.L002no_sse2: 2016bc3d5698SJohn Baldwin movl 20(%esp),%edi 2017bc3d5698SJohn Baldwin movl %eax,(%ebp) 2018bc3d5698SJohn Baldwin movl %edx,4(%ebp) 2019bc3d5698SJohn Baldwin movl (%esi),%eax 2020bc3d5698SJohn Baldwin movl 4(%esi),%ebx 2021bc3d5698SJohn Baldwin movl 8(%esi),%ecx 2022bc3d5698SJohn Baldwin movl 12(%esi),%edx 2023bc3d5698SJohn Baldwin andl $268435455,%eax 2024bc3d5698SJohn Baldwin andl $268435452,%ebx 2025bc3d5698SJohn Baldwin andl $268435452,%ecx 2026bc3d5698SJohn Baldwin andl $268435452,%edx 2027bc3d5698SJohn Baldwin movl %eax,24(%edi) 2028bc3d5698SJohn Baldwin movl %ebx,28(%edi) 2029bc3d5698SJohn Baldwin movl %ecx,32(%edi) 2030bc3d5698SJohn Baldwin movl %edx,36(%edi) 2031bc3d5698SJohn Baldwin movl $1,%eax 2032bc3d5698SJohn Baldwin.L000nokey: 2033bc3d5698SJohn Baldwin popl %edi 2034bc3d5698SJohn Baldwin popl %esi 2035bc3d5698SJohn Baldwin popl %ebx 2036bc3d5698SJohn Baldwin popl %ebp 2037bc3d5698SJohn Baldwin ret 2038bc3d5698SJohn Baldwin.size poly1305_init,.-.L_poly1305_init_begin 2039bc3d5698SJohn Baldwin.globl poly1305_blocks 2040bc3d5698SJohn Baldwin.type poly1305_blocks,@function 2041bc3d5698SJohn Baldwin.align 16 2042bc3d5698SJohn Baldwinpoly1305_blocks: 2043bc3d5698SJohn Baldwin.L_poly1305_blocks_begin: 2044*c0855eaaSJohn Baldwin #ifdef __CET__ 2045*c0855eaaSJohn Baldwin 2046*c0855eaaSJohn Baldwin.byte 243,15,30,251 2047*c0855eaaSJohn Baldwin #endif 2048*c0855eaaSJohn Baldwin 2049bc3d5698SJohn Baldwin pushl %ebp 2050bc3d5698SJohn Baldwin pushl %ebx 2051bc3d5698SJohn Baldwin pushl %esi 2052bc3d5698SJohn Baldwin pushl %edi 2053bc3d5698SJohn Baldwin movl 20(%esp),%edi 2054bc3d5698SJohn Baldwin movl 24(%esp),%esi 2055bc3d5698SJohn Baldwin movl 28(%esp),%ecx 2056bc3d5698SJohn Baldwin.Lenter_blocks: 2057bc3d5698SJohn Baldwin andl $-15,%ecx 2058bc3d5698SJohn Baldwin jz .L003nodata 2059bc3d5698SJohn Baldwin subl $64,%esp 2060bc3d5698SJohn Baldwin movl 24(%edi),%eax 2061bc3d5698SJohn Baldwin movl 28(%edi),%ebx 2062bc3d5698SJohn Baldwin leal (%esi,%ecx,1),%ebp 2063bc3d5698SJohn Baldwin movl 32(%edi),%ecx 2064bc3d5698SJohn Baldwin movl 36(%edi),%edx 2065bc3d5698SJohn Baldwin movl %ebp,92(%esp) 2066bc3d5698SJohn Baldwin movl %esi,%ebp 2067bc3d5698SJohn Baldwin movl %eax,36(%esp) 2068bc3d5698SJohn Baldwin movl %ebx,%eax 2069bc3d5698SJohn Baldwin shrl $2,%eax 2070bc3d5698SJohn Baldwin movl %ebx,40(%esp) 2071bc3d5698SJohn Baldwin addl %ebx,%eax 2072bc3d5698SJohn Baldwin movl %ecx,%ebx 2073bc3d5698SJohn Baldwin shrl $2,%ebx 2074bc3d5698SJohn Baldwin movl %ecx,44(%esp) 2075bc3d5698SJohn Baldwin addl %ecx,%ebx 2076bc3d5698SJohn Baldwin movl %edx,%ecx 2077bc3d5698SJohn Baldwin shrl $2,%ecx 2078bc3d5698SJohn Baldwin movl %edx,48(%esp) 2079bc3d5698SJohn Baldwin addl %edx,%ecx 2080bc3d5698SJohn Baldwin movl %eax,52(%esp) 2081bc3d5698SJohn Baldwin movl %ebx,56(%esp) 2082bc3d5698SJohn Baldwin movl %ecx,60(%esp) 2083bc3d5698SJohn Baldwin movl (%edi),%eax 2084bc3d5698SJohn Baldwin movl 4(%edi),%ebx 2085bc3d5698SJohn Baldwin movl 8(%edi),%ecx 2086bc3d5698SJohn Baldwin movl 12(%edi),%esi 2087bc3d5698SJohn Baldwin movl 16(%edi),%edi 2088bc3d5698SJohn Baldwin jmp .L004loop 2089bc3d5698SJohn Baldwin.align 32 2090bc3d5698SJohn Baldwin.L004loop: 2091bc3d5698SJohn Baldwin addl (%ebp),%eax 2092bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 2093bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 2094bc3d5698SJohn Baldwin adcl 12(%ebp),%esi 2095bc3d5698SJohn Baldwin leal 16(%ebp),%ebp 2096bc3d5698SJohn Baldwin adcl 96(%esp),%edi 2097bc3d5698SJohn Baldwin movl %eax,(%esp) 2098bc3d5698SJohn Baldwin movl %esi,12(%esp) 2099bc3d5698SJohn Baldwin mull 36(%esp) 2100bc3d5698SJohn Baldwin movl %edi,16(%esp) 2101bc3d5698SJohn Baldwin movl %eax,%edi 2102bc3d5698SJohn Baldwin movl %ebx,%eax 2103bc3d5698SJohn Baldwin movl %edx,%esi 2104bc3d5698SJohn Baldwin mull 60(%esp) 2105bc3d5698SJohn Baldwin addl %eax,%edi 2106bc3d5698SJohn Baldwin movl %ecx,%eax 2107bc3d5698SJohn Baldwin adcl %edx,%esi 2108bc3d5698SJohn Baldwin mull 56(%esp) 2109bc3d5698SJohn Baldwin addl %eax,%edi 2110bc3d5698SJohn Baldwin movl 12(%esp),%eax 2111bc3d5698SJohn Baldwin adcl %edx,%esi 2112bc3d5698SJohn Baldwin mull 52(%esp) 2113bc3d5698SJohn Baldwin addl %eax,%edi 2114bc3d5698SJohn Baldwin movl (%esp),%eax 2115bc3d5698SJohn Baldwin adcl %edx,%esi 2116bc3d5698SJohn Baldwin mull 40(%esp) 2117bc3d5698SJohn Baldwin movl %edi,20(%esp) 2118bc3d5698SJohn Baldwin xorl %edi,%edi 2119bc3d5698SJohn Baldwin addl %eax,%esi 2120bc3d5698SJohn Baldwin movl %ebx,%eax 2121bc3d5698SJohn Baldwin adcl %edx,%edi 2122bc3d5698SJohn Baldwin mull 36(%esp) 2123bc3d5698SJohn Baldwin addl %eax,%esi 2124bc3d5698SJohn Baldwin movl %ecx,%eax 2125bc3d5698SJohn Baldwin adcl %edx,%edi 2126bc3d5698SJohn Baldwin mull 60(%esp) 2127bc3d5698SJohn Baldwin addl %eax,%esi 2128bc3d5698SJohn Baldwin movl 12(%esp),%eax 2129bc3d5698SJohn Baldwin adcl %edx,%edi 2130bc3d5698SJohn Baldwin mull 56(%esp) 2131bc3d5698SJohn Baldwin addl %eax,%esi 2132bc3d5698SJohn Baldwin movl 16(%esp),%eax 2133bc3d5698SJohn Baldwin adcl %edx,%edi 2134bc3d5698SJohn Baldwin imull 52(%esp),%eax 2135bc3d5698SJohn Baldwin addl %eax,%esi 2136bc3d5698SJohn Baldwin movl (%esp),%eax 2137bc3d5698SJohn Baldwin adcl $0,%edi 2138bc3d5698SJohn Baldwin mull 44(%esp) 2139bc3d5698SJohn Baldwin movl %esi,24(%esp) 2140bc3d5698SJohn Baldwin xorl %esi,%esi 2141bc3d5698SJohn Baldwin addl %eax,%edi 2142bc3d5698SJohn Baldwin movl %ebx,%eax 2143bc3d5698SJohn Baldwin adcl %edx,%esi 2144bc3d5698SJohn Baldwin mull 40(%esp) 2145bc3d5698SJohn Baldwin addl %eax,%edi 2146bc3d5698SJohn Baldwin movl %ecx,%eax 2147bc3d5698SJohn Baldwin adcl %edx,%esi 2148bc3d5698SJohn Baldwin mull 36(%esp) 2149bc3d5698SJohn Baldwin addl %eax,%edi 2150bc3d5698SJohn Baldwin movl 12(%esp),%eax 2151bc3d5698SJohn Baldwin adcl %edx,%esi 2152bc3d5698SJohn Baldwin mull 60(%esp) 2153bc3d5698SJohn Baldwin addl %eax,%edi 2154bc3d5698SJohn Baldwin movl 16(%esp),%eax 2155bc3d5698SJohn Baldwin adcl %edx,%esi 2156bc3d5698SJohn Baldwin imull 56(%esp),%eax 2157bc3d5698SJohn Baldwin addl %eax,%edi 2158bc3d5698SJohn Baldwin movl (%esp),%eax 2159bc3d5698SJohn Baldwin adcl $0,%esi 2160bc3d5698SJohn Baldwin mull 48(%esp) 2161bc3d5698SJohn Baldwin movl %edi,28(%esp) 2162bc3d5698SJohn Baldwin xorl %edi,%edi 2163bc3d5698SJohn Baldwin addl %eax,%esi 2164bc3d5698SJohn Baldwin movl %ebx,%eax 2165bc3d5698SJohn Baldwin adcl %edx,%edi 2166bc3d5698SJohn Baldwin mull 44(%esp) 2167bc3d5698SJohn Baldwin addl %eax,%esi 2168bc3d5698SJohn Baldwin movl %ecx,%eax 2169bc3d5698SJohn Baldwin adcl %edx,%edi 2170bc3d5698SJohn Baldwin mull 40(%esp) 2171bc3d5698SJohn Baldwin addl %eax,%esi 2172bc3d5698SJohn Baldwin movl 12(%esp),%eax 2173bc3d5698SJohn Baldwin adcl %edx,%edi 2174bc3d5698SJohn Baldwin mull 36(%esp) 2175bc3d5698SJohn Baldwin addl %eax,%esi 2176bc3d5698SJohn Baldwin movl 16(%esp),%ecx 2177bc3d5698SJohn Baldwin adcl %edx,%edi 2178bc3d5698SJohn Baldwin movl %ecx,%edx 2179bc3d5698SJohn Baldwin imull 60(%esp),%ecx 2180bc3d5698SJohn Baldwin addl %ecx,%esi 2181bc3d5698SJohn Baldwin movl 20(%esp),%eax 2182bc3d5698SJohn Baldwin adcl $0,%edi 2183bc3d5698SJohn Baldwin imull 36(%esp),%edx 2184bc3d5698SJohn Baldwin addl %edi,%edx 2185bc3d5698SJohn Baldwin movl 24(%esp),%ebx 2186bc3d5698SJohn Baldwin movl 28(%esp),%ecx 2187bc3d5698SJohn Baldwin movl %edx,%edi 2188bc3d5698SJohn Baldwin shrl $2,%edx 2189bc3d5698SJohn Baldwin andl $3,%edi 2190bc3d5698SJohn Baldwin leal (%edx,%edx,4),%edx 2191bc3d5698SJohn Baldwin addl %edx,%eax 2192bc3d5698SJohn Baldwin adcl $0,%ebx 2193bc3d5698SJohn Baldwin adcl $0,%ecx 2194bc3d5698SJohn Baldwin adcl $0,%esi 2195bc3d5698SJohn Baldwin adcl $0,%edi 2196bc3d5698SJohn Baldwin cmpl 92(%esp),%ebp 2197bc3d5698SJohn Baldwin jne .L004loop 2198bc3d5698SJohn Baldwin movl 84(%esp),%edx 2199bc3d5698SJohn Baldwin addl $64,%esp 2200bc3d5698SJohn Baldwin movl %eax,(%edx) 2201bc3d5698SJohn Baldwin movl %ebx,4(%edx) 2202bc3d5698SJohn Baldwin movl %ecx,8(%edx) 2203bc3d5698SJohn Baldwin movl %esi,12(%edx) 2204bc3d5698SJohn Baldwin movl %edi,16(%edx) 2205bc3d5698SJohn Baldwin.L003nodata: 2206bc3d5698SJohn Baldwin popl %edi 2207bc3d5698SJohn Baldwin popl %esi 2208bc3d5698SJohn Baldwin popl %ebx 2209bc3d5698SJohn Baldwin popl %ebp 2210bc3d5698SJohn Baldwin ret 2211bc3d5698SJohn Baldwin.size poly1305_blocks,.-.L_poly1305_blocks_begin 2212bc3d5698SJohn Baldwin.globl poly1305_emit 2213bc3d5698SJohn Baldwin.type poly1305_emit,@function 2214bc3d5698SJohn Baldwin.align 16 2215bc3d5698SJohn Baldwinpoly1305_emit: 2216bc3d5698SJohn Baldwin.L_poly1305_emit_begin: 2217*c0855eaaSJohn Baldwin #ifdef __CET__ 2218*c0855eaaSJohn Baldwin 2219*c0855eaaSJohn Baldwin.byte 243,15,30,251 2220*c0855eaaSJohn Baldwin #endif 2221*c0855eaaSJohn Baldwin 2222bc3d5698SJohn Baldwin pushl %ebp 2223bc3d5698SJohn Baldwin pushl %ebx 2224bc3d5698SJohn Baldwin pushl %esi 2225bc3d5698SJohn Baldwin pushl %edi 2226bc3d5698SJohn Baldwin movl 20(%esp),%ebp 2227bc3d5698SJohn Baldwin.Lenter_emit: 2228bc3d5698SJohn Baldwin movl 24(%esp),%edi 2229bc3d5698SJohn Baldwin movl (%ebp),%eax 2230bc3d5698SJohn Baldwin movl 4(%ebp),%ebx 2231bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 2232bc3d5698SJohn Baldwin movl 12(%ebp),%edx 2233bc3d5698SJohn Baldwin movl 16(%ebp),%esi 2234bc3d5698SJohn Baldwin addl $5,%eax 2235bc3d5698SJohn Baldwin adcl $0,%ebx 2236bc3d5698SJohn Baldwin adcl $0,%ecx 2237bc3d5698SJohn Baldwin adcl $0,%edx 2238bc3d5698SJohn Baldwin adcl $0,%esi 2239bc3d5698SJohn Baldwin shrl $2,%esi 2240bc3d5698SJohn Baldwin negl %esi 2241bc3d5698SJohn Baldwin andl %esi,%eax 2242bc3d5698SJohn Baldwin andl %esi,%ebx 2243bc3d5698SJohn Baldwin andl %esi,%ecx 2244bc3d5698SJohn Baldwin andl %esi,%edx 2245bc3d5698SJohn Baldwin movl %eax,(%edi) 2246bc3d5698SJohn Baldwin movl %ebx,4(%edi) 2247bc3d5698SJohn Baldwin movl %ecx,8(%edi) 2248bc3d5698SJohn Baldwin movl %edx,12(%edi) 2249bc3d5698SJohn Baldwin notl %esi 2250bc3d5698SJohn Baldwin movl (%ebp),%eax 2251bc3d5698SJohn Baldwin movl 4(%ebp),%ebx 2252bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 2253bc3d5698SJohn Baldwin movl 12(%ebp),%edx 2254bc3d5698SJohn Baldwin movl 28(%esp),%ebp 2255bc3d5698SJohn Baldwin andl %esi,%eax 2256bc3d5698SJohn Baldwin andl %esi,%ebx 2257bc3d5698SJohn Baldwin andl %esi,%ecx 2258bc3d5698SJohn Baldwin andl %esi,%edx 2259bc3d5698SJohn Baldwin orl (%edi),%eax 2260bc3d5698SJohn Baldwin orl 4(%edi),%ebx 2261bc3d5698SJohn Baldwin orl 8(%edi),%ecx 2262bc3d5698SJohn Baldwin orl 12(%edi),%edx 2263bc3d5698SJohn Baldwin addl (%ebp),%eax 2264bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 2265bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 2266bc3d5698SJohn Baldwin adcl 12(%ebp),%edx 2267bc3d5698SJohn Baldwin movl %eax,(%edi) 2268bc3d5698SJohn Baldwin movl %ebx,4(%edi) 2269bc3d5698SJohn Baldwin movl %ecx,8(%edi) 2270bc3d5698SJohn Baldwin movl %edx,12(%edi) 2271bc3d5698SJohn Baldwin popl %edi 2272bc3d5698SJohn Baldwin popl %esi 2273bc3d5698SJohn Baldwin popl %ebx 2274bc3d5698SJohn Baldwin popl %ebp 2275bc3d5698SJohn Baldwin ret 2276bc3d5698SJohn Baldwin.size poly1305_emit,.-.L_poly1305_emit_begin 2277bc3d5698SJohn Baldwin.align 32 2278bc3d5698SJohn Baldwin.type _poly1305_init_sse2,@function 2279bc3d5698SJohn Baldwin.align 16 2280bc3d5698SJohn Baldwin_poly1305_init_sse2: 2281*c0855eaaSJohn Baldwin #ifdef __CET__ 2282*c0855eaaSJohn Baldwin 2283*c0855eaaSJohn Baldwin.byte 243,15,30,251 2284*c0855eaaSJohn Baldwin #endif 2285*c0855eaaSJohn Baldwin 2286bc3d5698SJohn Baldwin movdqu 24(%edi),%xmm4 2287bc3d5698SJohn Baldwin leal 48(%edi),%edi 2288bc3d5698SJohn Baldwin movl %esp,%ebp 2289bc3d5698SJohn Baldwin subl $224,%esp 2290bc3d5698SJohn Baldwin andl $-16,%esp 2291bc3d5698SJohn Baldwin movq 64(%ebx),%xmm7 2292bc3d5698SJohn Baldwin movdqa %xmm4,%xmm0 2293bc3d5698SJohn Baldwin movdqa %xmm4,%xmm1 2294bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 2295bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2296bc3d5698SJohn Baldwin psrlq $26,%xmm1 2297bc3d5698SJohn Baldwin psrldq $6,%xmm2 2298bc3d5698SJohn Baldwin pand %xmm7,%xmm1 2299bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 2300bc3d5698SJohn Baldwin psrlq $4,%xmm2 2301bc3d5698SJohn Baldwin psrlq $30,%xmm3 2302bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2303bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2304bc3d5698SJohn Baldwin psrldq $13,%xmm4 2305bc3d5698SJohn Baldwin leal 144(%esp),%edx 2306bc3d5698SJohn Baldwin movl $2,%ecx 2307bc3d5698SJohn Baldwin.L005square: 2308bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 2309bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 2310bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 2311bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 2312bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 2313bc3d5698SJohn Baldwin movdqa %xmm1,%xmm6 2314bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 2315bc3d5698SJohn Baldwin pslld $2,%xmm6 2316bc3d5698SJohn Baldwin pslld $2,%xmm5 2317bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 2318bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 2319bc3d5698SJohn Baldwin movdqa %xmm6,80(%esp) 2320bc3d5698SJohn Baldwin movdqa %xmm5,96(%esp) 2321bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 2322bc3d5698SJohn Baldwin movdqa %xmm4,%xmm5 2323bc3d5698SJohn Baldwin pslld $2,%xmm6 2324bc3d5698SJohn Baldwin pslld $2,%xmm5 2325bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 2326bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 2327bc3d5698SJohn Baldwin movdqa %xmm6,112(%esp) 2328bc3d5698SJohn Baldwin movdqa %xmm5,128(%esp) 2329bc3d5698SJohn Baldwin pshufd $68,%xmm0,%xmm6 2330bc3d5698SJohn Baldwin movdqa %xmm1,%xmm5 2331bc3d5698SJohn Baldwin pshufd $68,%xmm1,%xmm1 2332bc3d5698SJohn Baldwin pshufd $68,%xmm2,%xmm2 2333bc3d5698SJohn Baldwin pshufd $68,%xmm3,%xmm3 2334bc3d5698SJohn Baldwin pshufd $68,%xmm4,%xmm4 2335bc3d5698SJohn Baldwin movdqa %xmm6,(%edx) 2336bc3d5698SJohn Baldwin movdqa %xmm1,16(%edx) 2337bc3d5698SJohn Baldwin movdqa %xmm2,32(%edx) 2338bc3d5698SJohn Baldwin movdqa %xmm3,48(%edx) 2339bc3d5698SJohn Baldwin movdqa %xmm4,64(%edx) 2340bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm4 2341bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm3 2342bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm2 2343bc3d5698SJohn Baldwin pmuludq %xmm0,%xmm1 2344bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm0 2345bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2346bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 2347bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2348bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm6 2349bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2350bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2351bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 2352bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2353bc3d5698SJohn Baldwin movdqa 80(%esp),%xmm6 2354bc3d5698SJohn Baldwin pmuludq (%edx),%xmm5 2355bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2356bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 2357bc3d5698SJohn Baldwin movdqa 32(%esp),%xmm7 2358bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2359bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2360bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 2361bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 2362bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2363bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm5 2364bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 2365bc3d5698SJohn Baldwin movdqa 96(%esp),%xmm7 2366bc3d5698SJohn Baldwin pmuludq (%edx),%xmm6 2367bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2368bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2369bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm7 2370bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2371bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 2372bc3d5698SJohn Baldwin movdqa 48(%esp),%xmm6 2373bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 2374bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2375bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm6 2376bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2377bc3d5698SJohn Baldwin movdqa 112(%esp),%xmm5 2378bc3d5698SJohn Baldwin pmuludq (%edx),%xmm7 2379bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2380bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2381bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm5 2382bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 2383bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2384bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 2385bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 2386bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 2387bc3d5698SJohn Baldwin movdqa 64(%esp),%xmm5 2388bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2389bc3d5698SJohn Baldwin movdqa 128(%esp),%xmm6 2390bc3d5698SJohn Baldwin pmuludq (%edx),%xmm5 2391bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2392bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2393bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 2394bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2395bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2396bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 2397bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2398bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2399bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm5 2400bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2401bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 2402bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 2403bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2404bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2405bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 2406bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2407bc3d5698SJohn Baldwin psrlq $26,%xmm5 2408bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 2409bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 2410bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2411bc3d5698SJohn Baldwin psrlq $26,%xmm6 2412bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2413bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 2414bc3d5698SJohn Baldwin psrlq $26,%xmm5 2415bc3d5698SJohn Baldwin pand %xmm7,%xmm4 2416bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 2417bc3d5698SJohn Baldwin psrlq $26,%xmm6 2418bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 2419bc3d5698SJohn Baldwin psllq $2,%xmm5 2420bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 2421bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 2422bc3d5698SJohn Baldwin pand %xmm7,%xmm1 2423bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 2424bc3d5698SJohn Baldwin psrlq $26,%xmm6 2425bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2426bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 2427bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 2428bc3d5698SJohn Baldwin psrlq $26,%xmm5 2429bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2430bc3d5698SJohn Baldwin psrlq $26,%xmm6 2431bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2432bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 2433bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2434bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 2435bc3d5698SJohn Baldwin decl %ecx 2436bc3d5698SJohn Baldwin jz .L006square_break 2437bc3d5698SJohn Baldwin punpcklqdq (%esp),%xmm0 2438bc3d5698SJohn Baldwin punpcklqdq 16(%esp),%xmm1 2439bc3d5698SJohn Baldwin punpcklqdq 32(%esp),%xmm2 2440bc3d5698SJohn Baldwin punpcklqdq 48(%esp),%xmm3 2441bc3d5698SJohn Baldwin punpcklqdq 64(%esp),%xmm4 2442bc3d5698SJohn Baldwin jmp .L005square 2443bc3d5698SJohn Baldwin.L006square_break: 2444bc3d5698SJohn Baldwin psllq $32,%xmm0 2445bc3d5698SJohn Baldwin psllq $32,%xmm1 2446bc3d5698SJohn Baldwin psllq $32,%xmm2 2447bc3d5698SJohn Baldwin psllq $32,%xmm3 2448bc3d5698SJohn Baldwin psllq $32,%xmm4 2449bc3d5698SJohn Baldwin por (%esp),%xmm0 2450bc3d5698SJohn Baldwin por 16(%esp),%xmm1 2451bc3d5698SJohn Baldwin por 32(%esp),%xmm2 2452bc3d5698SJohn Baldwin por 48(%esp),%xmm3 2453bc3d5698SJohn Baldwin por 64(%esp),%xmm4 2454bc3d5698SJohn Baldwin pshufd $141,%xmm0,%xmm0 2455bc3d5698SJohn Baldwin pshufd $141,%xmm1,%xmm1 2456bc3d5698SJohn Baldwin pshufd $141,%xmm2,%xmm2 2457bc3d5698SJohn Baldwin pshufd $141,%xmm3,%xmm3 2458bc3d5698SJohn Baldwin pshufd $141,%xmm4,%xmm4 2459bc3d5698SJohn Baldwin movdqu %xmm0,(%edi) 2460bc3d5698SJohn Baldwin movdqu %xmm1,16(%edi) 2461bc3d5698SJohn Baldwin movdqu %xmm2,32(%edi) 2462bc3d5698SJohn Baldwin movdqu %xmm3,48(%edi) 2463bc3d5698SJohn Baldwin movdqu %xmm4,64(%edi) 2464bc3d5698SJohn Baldwin movdqa %xmm1,%xmm6 2465bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 2466bc3d5698SJohn Baldwin pslld $2,%xmm6 2467bc3d5698SJohn Baldwin pslld $2,%xmm5 2468bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 2469bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 2470bc3d5698SJohn Baldwin movdqu %xmm6,80(%edi) 2471bc3d5698SJohn Baldwin movdqu %xmm5,96(%edi) 2472bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 2473bc3d5698SJohn Baldwin movdqa %xmm4,%xmm5 2474bc3d5698SJohn Baldwin pslld $2,%xmm6 2475bc3d5698SJohn Baldwin pslld $2,%xmm5 2476bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 2477bc3d5698SJohn Baldwin paddd %xmm4,%xmm5 2478bc3d5698SJohn Baldwin movdqu %xmm6,112(%edi) 2479bc3d5698SJohn Baldwin movdqu %xmm5,128(%edi) 2480bc3d5698SJohn Baldwin movl %ebp,%esp 2481bc3d5698SJohn Baldwin leal -48(%edi),%edi 2482bc3d5698SJohn Baldwin ret 2483bc3d5698SJohn Baldwin.size _poly1305_init_sse2,.-_poly1305_init_sse2 2484bc3d5698SJohn Baldwin.align 32 2485bc3d5698SJohn Baldwin.type _poly1305_blocks_sse2,@function 2486bc3d5698SJohn Baldwin.align 16 2487bc3d5698SJohn Baldwin_poly1305_blocks_sse2: 2488*c0855eaaSJohn Baldwin #ifdef __CET__ 2489*c0855eaaSJohn Baldwin 2490*c0855eaaSJohn Baldwin.byte 243,15,30,251 2491*c0855eaaSJohn Baldwin #endif 2492*c0855eaaSJohn Baldwin 2493bc3d5698SJohn Baldwin pushl %ebp 2494bc3d5698SJohn Baldwin pushl %ebx 2495bc3d5698SJohn Baldwin pushl %esi 2496bc3d5698SJohn Baldwin pushl %edi 2497bc3d5698SJohn Baldwin movl 20(%esp),%edi 2498bc3d5698SJohn Baldwin movl 24(%esp),%esi 2499bc3d5698SJohn Baldwin movl 28(%esp),%ecx 2500bc3d5698SJohn Baldwin movl 20(%edi),%eax 2501bc3d5698SJohn Baldwin andl $-16,%ecx 2502bc3d5698SJohn Baldwin jz .L007nodata 2503bc3d5698SJohn Baldwin cmpl $64,%ecx 2504bc3d5698SJohn Baldwin jae .L008enter_sse2 2505bc3d5698SJohn Baldwin testl %eax,%eax 2506bc3d5698SJohn Baldwin jz .Lenter_blocks 2507bc3d5698SJohn Baldwin.align 16 2508bc3d5698SJohn Baldwin.L008enter_sse2: 2509bc3d5698SJohn Baldwin call .L009pic_point 2510bc3d5698SJohn Baldwin.L009pic_point: 2511bc3d5698SJohn Baldwin popl %ebx 2512bc3d5698SJohn Baldwin leal .Lconst_sse2-.L009pic_point(%ebx),%ebx 2513bc3d5698SJohn Baldwin testl %eax,%eax 2514bc3d5698SJohn Baldwin jnz .L010base2_26 2515bc3d5698SJohn Baldwin call _poly1305_init_sse2 2516bc3d5698SJohn Baldwin movl (%edi),%eax 2517bc3d5698SJohn Baldwin movl 3(%edi),%ecx 2518bc3d5698SJohn Baldwin movl 6(%edi),%edx 2519bc3d5698SJohn Baldwin movl 9(%edi),%esi 2520bc3d5698SJohn Baldwin movl 13(%edi),%ebp 2521bc3d5698SJohn Baldwin movl $1,20(%edi) 2522bc3d5698SJohn Baldwin shrl $2,%ecx 2523bc3d5698SJohn Baldwin andl $67108863,%eax 2524bc3d5698SJohn Baldwin shrl $4,%edx 2525bc3d5698SJohn Baldwin andl $67108863,%ecx 2526bc3d5698SJohn Baldwin shrl $6,%esi 2527bc3d5698SJohn Baldwin andl $67108863,%edx 2528bc3d5698SJohn Baldwin movd %eax,%xmm0 2529bc3d5698SJohn Baldwin movd %ecx,%xmm1 2530bc3d5698SJohn Baldwin movd %edx,%xmm2 2531bc3d5698SJohn Baldwin movd %esi,%xmm3 2532bc3d5698SJohn Baldwin movd %ebp,%xmm4 2533bc3d5698SJohn Baldwin movl 24(%esp),%esi 2534bc3d5698SJohn Baldwin movl 28(%esp),%ecx 2535bc3d5698SJohn Baldwin jmp .L011base2_32 2536bc3d5698SJohn Baldwin.align 16 2537bc3d5698SJohn Baldwin.L010base2_26: 2538bc3d5698SJohn Baldwin movd (%edi),%xmm0 2539bc3d5698SJohn Baldwin movd 4(%edi),%xmm1 2540bc3d5698SJohn Baldwin movd 8(%edi),%xmm2 2541bc3d5698SJohn Baldwin movd 12(%edi),%xmm3 2542bc3d5698SJohn Baldwin movd 16(%edi),%xmm4 2543bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 2544bc3d5698SJohn Baldwin.L011base2_32: 2545bc3d5698SJohn Baldwin movl 32(%esp),%eax 2546bc3d5698SJohn Baldwin movl %esp,%ebp 2547bc3d5698SJohn Baldwin subl $528,%esp 2548bc3d5698SJohn Baldwin andl $-16,%esp 2549bc3d5698SJohn Baldwin leal 48(%edi),%edi 2550bc3d5698SJohn Baldwin shll $24,%eax 2551bc3d5698SJohn Baldwin testl $31,%ecx 2552bc3d5698SJohn Baldwin jz .L012even 2553bc3d5698SJohn Baldwin movdqu (%esi),%xmm6 2554bc3d5698SJohn Baldwin leal 16(%esi),%esi 2555bc3d5698SJohn Baldwin movdqa %xmm6,%xmm5 2556bc3d5698SJohn Baldwin pand %xmm7,%xmm6 2557bc3d5698SJohn Baldwin paddd %xmm6,%xmm0 2558bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2559bc3d5698SJohn Baldwin psrlq $26,%xmm5 2560bc3d5698SJohn Baldwin psrldq $6,%xmm6 2561bc3d5698SJohn Baldwin pand %xmm7,%xmm5 2562bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 2563bc3d5698SJohn Baldwin movdqa %xmm6,%xmm5 2564bc3d5698SJohn Baldwin psrlq $4,%xmm6 2565bc3d5698SJohn Baldwin pand %xmm7,%xmm6 2566bc3d5698SJohn Baldwin paddd %xmm6,%xmm2 2567bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2568bc3d5698SJohn Baldwin psrlq $30,%xmm5 2569bc3d5698SJohn Baldwin pand %xmm7,%xmm5 2570bc3d5698SJohn Baldwin psrldq $7,%xmm6 2571bc3d5698SJohn Baldwin paddd %xmm5,%xmm3 2572bc3d5698SJohn Baldwin movd %eax,%xmm5 2573bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 2574bc3d5698SJohn Baldwin movd 12(%edi),%xmm6 2575bc3d5698SJohn Baldwin paddd %xmm5,%xmm4 2576bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 2577bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 2578bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 2579bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 2580bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 2581bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm0 2582bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm1 2583bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm2 2584bc3d5698SJohn Baldwin movd 28(%edi),%xmm5 2585bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm3 2586bc3d5698SJohn Baldwin pmuludq %xmm6,%xmm4 2587bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2588bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 2589bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2590bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm6 2591bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2592bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2593bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 2594bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2595bc3d5698SJohn Baldwin movd 92(%edi),%xmm6 2596bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 2597bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2598bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 2599bc3d5698SJohn Baldwin movd 44(%edi),%xmm7 2600bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2601bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2602bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 2603bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 2604bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2605bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm5 2606bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 2607bc3d5698SJohn Baldwin movd 108(%edi),%xmm7 2608bc3d5698SJohn Baldwin pmuludq (%esp),%xmm6 2609bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2610bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2611bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm7 2612bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2613bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 2614bc3d5698SJohn Baldwin movd 60(%edi),%xmm6 2615bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 2616bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2617bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm6 2618bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2619bc3d5698SJohn Baldwin movd 124(%edi),%xmm5 2620bc3d5698SJohn Baldwin pmuludq (%esp),%xmm7 2621bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2622bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2623bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm5 2624bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 2625bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2626bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 2627bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 2628bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 2629bc3d5698SJohn Baldwin movd 76(%edi),%xmm5 2630bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2631bc3d5698SJohn Baldwin movd 140(%edi),%xmm6 2632bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 2633bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2634bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2635bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 2636bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2637bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2638bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 2639bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2640bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2641bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm5 2642bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2643bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 2644bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 2645bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2646bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2647bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 2648bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2649bc3d5698SJohn Baldwin psrlq $26,%xmm5 2650bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 2651bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 2652bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2653bc3d5698SJohn Baldwin psrlq $26,%xmm6 2654bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2655bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 2656bc3d5698SJohn Baldwin psrlq $26,%xmm5 2657bc3d5698SJohn Baldwin pand %xmm7,%xmm4 2658bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 2659bc3d5698SJohn Baldwin psrlq $26,%xmm6 2660bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 2661bc3d5698SJohn Baldwin psllq $2,%xmm5 2662bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 2663bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 2664bc3d5698SJohn Baldwin pand %xmm7,%xmm1 2665bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 2666bc3d5698SJohn Baldwin psrlq $26,%xmm6 2667bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2668bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 2669bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 2670bc3d5698SJohn Baldwin psrlq $26,%xmm5 2671bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2672bc3d5698SJohn Baldwin psrlq $26,%xmm6 2673bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2674bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 2675bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2676bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 2677bc3d5698SJohn Baldwin subl $16,%ecx 2678bc3d5698SJohn Baldwin jz .L013done 2679bc3d5698SJohn Baldwin.L012even: 2680bc3d5698SJohn Baldwin leal 384(%esp),%edx 2681bc3d5698SJohn Baldwin leal -32(%esi),%eax 2682bc3d5698SJohn Baldwin subl $64,%ecx 2683bc3d5698SJohn Baldwin movdqu (%edi),%xmm5 2684bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 2685bc3d5698SJohn Baldwin cmovbl %eax,%esi 2686bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 2687bc3d5698SJohn Baldwin movdqa %xmm6,(%edx) 2688bc3d5698SJohn Baldwin leal 160(%esp),%eax 2689bc3d5698SJohn Baldwin movdqu 16(%edi),%xmm6 2690bc3d5698SJohn Baldwin movdqa %xmm5,-144(%edx) 2691bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 2692bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 2693bc3d5698SJohn Baldwin movdqa %xmm5,16(%edx) 2694bc3d5698SJohn Baldwin movdqu 32(%edi),%xmm5 2695bc3d5698SJohn Baldwin movdqa %xmm6,-128(%edx) 2696bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 2697bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 2698bc3d5698SJohn Baldwin movdqa %xmm6,32(%edx) 2699bc3d5698SJohn Baldwin movdqu 48(%edi),%xmm6 2700bc3d5698SJohn Baldwin movdqa %xmm5,-112(%edx) 2701bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 2702bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 2703bc3d5698SJohn Baldwin movdqa %xmm5,48(%edx) 2704bc3d5698SJohn Baldwin movdqu 64(%edi),%xmm5 2705bc3d5698SJohn Baldwin movdqa %xmm6,-96(%edx) 2706bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 2707bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 2708bc3d5698SJohn Baldwin movdqa %xmm6,64(%edx) 2709bc3d5698SJohn Baldwin movdqu 80(%edi),%xmm6 2710bc3d5698SJohn Baldwin movdqa %xmm5,-80(%edx) 2711bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 2712bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 2713bc3d5698SJohn Baldwin movdqa %xmm5,80(%edx) 2714bc3d5698SJohn Baldwin movdqu 96(%edi),%xmm5 2715bc3d5698SJohn Baldwin movdqa %xmm6,-64(%edx) 2716bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 2717bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 2718bc3d5698SJohn Baldwin movdqa %xmm6,96(%edx) 2719bc3d5698SJohn Baldwin movdqu 112(%edi),%xmm6 2720bc3d5698SJohn Baldwin movdqa %xmm5,-48(%edx) 2721bc3d5698SJohn Baldwin pshufd $68,%xmm6,%xmm5 2722bc3d5698SJohn Baldwin pshufd $238,%xmm6,%xmm6 2723bc3d5698SJohn Baldwin movdqa %xmm5,112(%edx) 2724bc3d5698SJohn Baldwin movdqu 128(%edi),%xmm5 2725bc3d5698SJohn Baldwin movdqa %xmm6,-32(%edx) 2726bc3d5698SJohn Baldwin pshufd $68,%xmm5,%xmm6 2727bc3d5698SJohn Baldwin pshufd $238,%xmm5,%xmm5 2728bc3d5698SJohn Baldwin movdqa %xmm6,128(%edx) 2729bc3d5698SJohn Baldwin movdqa %xmm5,-16(%edx) 2730bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm5 2731bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm6 2732bc3d5698SJohn Baldwin leal 32(%esi),%esi 2733bc3d5698SJohn Baldwin movdqa %xmm2,112(%esp) 2734bc3d5698SJohn Baldwin movdqa %xmm3,128(%esp) 2735bc3d5698SJohn Baldwin movdqa %xmm4,144(%esp) 2736bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 2737bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2738bc3d5698SJohn Baldwin psrldq $6,%xmm2 2739bc3d5698SJohn Baldwin psrldq $6,%xmm3 2740bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2741bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 2742bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 2743bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 2744bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 2745bc3d5698SJohn Baldwin psrlq $4,%xmm2 2746bc3d5698SJohn Baldwin psrlq $30,%xmm3 2747bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2748bc3d5698SJohn Baldwin psrlq $40,%xmm4 2749bc3d5698SJohn Baldwin psrlq $26,%xmm6 2750bc3d5698SJohn Baldwin pand %xmm7,%xmm5 2751bc3d5698SJohn Baldwin pand %xmm7,%xmm6 2752bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2753bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2754bc3d5698SJohn Baldwin por (%ebx),%xmm4 2755bc3d5698SJohn Baldwin movdqa %xmm0,80(%esp) 2756bc3d5698SJohn Baldwin movdqa %xmm1,96(%esp) 2757bc3d5698SJohn Baldwin jbe .L014skip_loop 2758bc3d5698SJohn Baldwin jmp .L015loop 2759bc3d5698SJohn Baldwin.align 32 2760bc3d5698SJohn Baldwin.L015loop: 2761bc3d5698SJohn Baldwin movdqa -144(%edx),%xmm7 2762bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 2763bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 2764bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 2765bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 2766bc3d5698SJohn Baldwin movdqa %xmm5,%xmm1 2767bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 2768bc3d5698SJohn Baldwin movdqa %xmm6,%xmm0 2769bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 2770bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 2771bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 2772bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 2773bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm0 2774bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 2775bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm1 2776bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2777bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2778bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm7 2779bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2780bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2781bc3d5698SJohn Baldwin pmuludq -96(%edx),%xmm5 2782bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2783bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 2784bc3d5698SJohn Baldwin pmuludq -80(%edx),%xmm6 2785bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2786bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2787bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm7 2788bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2789bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2790bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm5 2791bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2792bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm7 2793bc3d5698SJohn Baldwin pmuludq -96(%edx),%xmm6 2794bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2795bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2796bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm7 2797bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2798bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2799bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm5 2800bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2801bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2802bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm6 2803bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2804bc3d5698SJohn Baldwin movdqa 48(%eax),%xmm5 2805bc3d5698SJohn Baldwin pmuludq -112(%edx),%xmm7 2806bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2807bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2808bc3d5698SJohn Baldwin pmuludq -48(%edx),%xmm5 2809bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 2810bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2811bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm6 2812bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2813bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2814bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm7 2815bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2816bc3d5698SJohn Baldwin movdqa 64(%eax),%xmm6 2817bc3d5698SJohn Baldwin pmuludq -128(%edx),%xmm5 2818bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2819bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2820bc3d5698SJohn Baldwin pmuludq -16(%edx),%xmm6 2821bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2822bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2823bc3d5698SJohn Baldwin pmuludq -64(%edx),%xmm7 2824bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2825bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2826bc3d5698SJohn Baldwin pmuludq -48(%edx),%xmm5 2827bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2828bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 2829bc3d5698SJohn Baldwin pmuludq -32(%edx),%xmm6 2830bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2831bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2832bc3d5698SJohn Baldwin movdqu -32(%esi),%xmm5 2833bc3d5698SJohn Baldwin movdqu -16(%esi),%xmm6 2834bc3d5698SJohn Baldwin leal 32(%esi),%esi 2835bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 2836bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 2837bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 2838bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 2839bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2840bc3d5698SJohn Baldwin psrldq $6,%xmm2 2841bc3d5698SJohn Baldwin psrldq $6,%xmm3 2842bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2843bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 2844bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 2845bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 2846bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 2847bc3d5698SJohn Baldwin psrlq $4,%xmm2 2848bc3d5698SJohn Baldwin psrlq $30,%xmm3 2849bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2850bc3d5698SJohn Baldwin psrlq $40,%xmm4 2851bc3d5698SJohn Baldwin psrlq $26,%xmm6 2852bc3d5698SJohn Baldwin pand %xmm7,%xmm5 2853bc3d5698SJohn Baldwin pand %xmm7,%xmm6 2854bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2855bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2856bc3d5698SJohn Baldwin por (%ebx),%xmm4 2857bc3d5698SJohn Baldwin leal -32(%esi),%eax 2858bc3d5698SJohn Baldwin subl $64,%ecx 2859bc3d5698SJohn Baldwin paddd 80(%esp),%xmm5 2860bc3d5698SJohn Baldwin paddd 96(%esp),%xmm6 2861bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 2862bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 2863bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 2864bc3d5698SJohn Baldwin cmovbl %eax,%esi 2865bc3d5698SJohn Baldwin leal 160(%esp),%eax 2866bc3d5698SJohn Baldwin movdqa (%edx),%xmm7 2867bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 2868bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 2869bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 2870bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 2871bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 2872bc3d5698SJohn Baldwin movdqa %xmm5,%xmm1 2873bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 2874bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 2875bc3d5698SJohn Baldwin movdqa %xmm6,%xmm0 2876bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 2877bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 2878bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 2879bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 2880bc3d5698SJohn Baldwin paddq 16(%esp),%xmm6 2881bc3d5698SJohn Baldwin paddq 32(%esp),%xmm2 2882bc3d5698SJohn Baldwin paddq 48(%esp),%xmm3 2883bc3d5698SJohn Baldwin paddq 64(%esp),%xmm4 2884bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm0 2885bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 2886bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm1 2887bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2888bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2889bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 2890bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2891bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2892bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm5 2893bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2894bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 2895bc3d5698SJohn Baldwin pmuludq 64(%edx),%xmm6 2896bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2897bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2898bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm7 2899bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2900bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2901bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm5 2902bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2903bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm7 2904bc3d5698SJohn Baldwin pmuludq 48(%edx),%xmm6 2905bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 2906bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2907bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm7 2908bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 2909bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2910bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm5 2911bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2912bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2913bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm6 2914bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2915bc3d5698SJohn Baldwin movdqa 48(%eax),%xmm5 2916bc3d5698SJohn Baldwin pmuludq 32(%edx),%xmm7 2917bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2918bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2919bc3d5698SJohn Baldwin pmuludq 96(%edx),%xmm5 2920bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 2921bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2922bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm6 2923bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 2924bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2925bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm7 2926bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 2927bc3d5698SJohn Baldwin movdqa 64(%eax),%xmm6 2928bc3d5698SJohn Baldwin pmuludq 16(%edx),%xmm5 2929bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 2930bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 2931bc3d5698SJohn Baldwin pmuludq 128(%edx),%xmm6 2932bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 2933bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 2934bc3d5698SJohn Baldwin pmuludq 80(%edx),%xmm7 2935bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 2936bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2937bc3d5698SJohn Baldwin pmuludq 96(%edx),%xmm5 2938bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 2939bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 2940bc3d5698SJohn Baldwin pmuludq 112(%edx),%xmm6 2941bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 2942bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 2943bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 2944bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2945bc3d5698SJohn Baldwin psrlq $26,%xmm5 2946bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 2947bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 2948bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2949bc3d5698SJohn Baldwin psrlq $26,%xmm6 2950bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2951bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 2952bc3d5698SJohn Baldwin psrlq $26,%xmm5 2953bc3d5698SJohn Baldwin pand %xmm7,%xmm4 2954bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 2955bc3d5698SJohn Baldwin psrlq $26,%xmm6 2956bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 2957bc3d5698SJohn Baldwin psllq $2,%xmm5 2958bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 2959bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 2960bc3d5698SJohn Baldwin pand %xmm7,%xmm1 2961bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 2962bc3d5698SJohn Baldwin psrlq $26,%xmm6 2963bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2964bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 2965bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 2966bc3d5698SJohn Baldwin psrlq $26,%xmm5 2967bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2968bc3d5698SJohn Baldwin psrlq $26,%xmm6 2969bc3d5698SJohn Baldwin pand %xmm7,%xmm0 2970bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 2971bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2972bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 2973bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm5 2974bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm6 2975bc3d5698SJohn Baldwin leal 32(%esi),%esi 2976bc3d5698SJohn Baldwin movdqa %xmm2,112(%esp) 2977bc3d5698SJohn Baldwin movdqa %xmm3,128(%esp) 2978bc3d5698SJohn Baldwin movdqa %xmm4,144(%esp) 2979bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 2980bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 2981bc3d5698SJohn Baldwin psrldq $6,%xmm2 2982bc3d5698SJohn Baldwin psrldq $6,%xmm3 2983bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 2984bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 2985bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 2986bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 2987bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 2988bc3d5698SJohn Baldwin psrlq $4,%xmm2 2989bc3d5698SJohn Baldwin psrlq $30,%xmm3 2990bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 2991bc3d5698SJohn Baldwin psrlq $40,%xmm4 2992bc3d5698SJohn Baldwin psrlq $26,%xmm6 2993bc3d5698SJohn Baldwin pand %xmm7,%xmm5 2994bc3d5698SJohn Baldwin pand %xmm7,%xmm6 2995bc3d5698SJohn Baldwin pand %xmm7,%xmm2 2996bc3d5698SJohn Baldwin pand %xmm7,%xmm3 2997bc3d5698SJohn Baldwin por (%ebx),%xmm4 2998bc3d5698SJohn Baldwin movdqa %xmm0,80(%esp) 2999bc3d5698SJohn Baldwin movdqa %xmm1,96(%esp) 3000bc3d5698SJohn Baldwin ja .L015loop 3001bc3d5698SJohn Baldwin.L014skip_loop: 3002bc3d5698SJohn Baldwin pshufd $16,-144(%edx),%xmm7 3003bc3d5698SJohn Baldwin addl $32,%ecx 3004bc3d5698SJohn Baldwin jnz .L016long_tail 3005bc3d5698SJohn Baldwin paddd %xmm0,%xmm5 3006bc3d5698SJohn Baldwin paddd %xmm1,%xmm6 3007bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 3008bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 3009bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 3010bc3d5698SJohn Baldwin.L016long_tail: 3011bc3d5698SJohn Baldwin movdqa %xmm5,(%eax) 3012bc3d5698SJohn Baldwin movdqa %xmm6,16(%eax) 3013bc3d5698SJohn Baldwin movdqa %xmm2,32(%eax) 3014bc3d5698SJohn Baldwin movdqa %xmm3,48(%eax) 3015bc3d5698SJohn Baldwin movdqa %xmm4,64(%eax) 3016bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 3017bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 3018bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 3019bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 3020bc3d5698SJohn Baldwin pshufd $16,-128(%edx),%xmm5 3021bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 3022bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 3023bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 3024bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3025bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm5 3026bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3027bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm6 3028bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 3029bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3030bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm7 3031bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 3032bc3d5698SJohn Baldwin pshufd $16,-64(%edx),%xmm6 3033bc3d5698SJohn Baldwin pmuludq (%eax),%xmm5 3034bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 3035bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm6 3036bc3d5698SJohn Baldwin pshufd $16,-112(%edx),%xmm7 3037bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 3038bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3039bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm7 3040bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 3041bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3042bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm5 3043bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 3044bc3d5698SJohn Baldwin pshufd $16,-48(%edx),%xmm7 3045bc3d5698SJohn Baldwin pmuludq (%eax),%xmm6 3046bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 3047bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3048bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm7 3049bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 3050bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm5 3051bc3d5698SJohn Baldwin pshufd $16,-96(%edx),%xmm6 3052bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 3053bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3054bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm6 3055bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 3056bc3d5698SJohn Baldwin pshufd $16,-32(%edx),%xmm5 3057bc3d5698SJohn Baldwin pmuludq (%eax),%xmm7 3058bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 3059bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3060bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm5 3061bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 3062bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3063bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm6 3064bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 3065bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm7 3066bc3d5698SJohn Baldwin pshufd $16,-80(%edx),%xmm5 3067bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 3068bc3d5698SJohn Baldwin pshufd $16,-16(%edx),%xmm6 3069bc3d5698SJohn Baldwin pmuludq (%eax),%xmm5 3070bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 3071bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3072bc3d5698SJohn Baldwin pmuludq 64(%eax),%xmm6 3073bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 3074bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3075bc3d5698SJohn Baldwin pmuludq 16(%eax),%xmm7 3076bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 3077bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3078bc3d5698SJohn Baldwin pmuludq 32(%eax),%xmm5 3079bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 3080bc3d5698SJohn Baldwin pmuludq 48(%eax),%xmm6 3081bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 3082bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 3083bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 3084bc3d5698SJohn Baldwin jz .L017short_tail 3085bc3d5698SJohn Baldwin movdqu -32(%esi),%xmm5 3086bc3d5698SJohn Baldwin movdqu -16(%esi),%xmm6 3087bc3d5698SJohn Baldwin leal 32(%esi),%esi 3088bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 3089bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 3090bc3d5698SJohn Baldwin movdqa %xmm4,64(%esp) 3091bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 3092bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 3093bc3d5698SJohn Baldwin psrldq $6,%xmm2 3094bc3d5698SJohn Baldwin psrldq $6,%xmm3 3095bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 3096bc3d5698SJohn Baldwin punpcklqdq %xmm3,%xmm2 3097bc3d5698SJohn Baldwin punpckhqdq %xmm6,%xmm4 3098bc3d5698SJohn Baldwin punpcklqdq %xmm6,%xmm5 3099bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 3100bc3d5698SJohn Baldwin psrlq $4,%xmm2 3101bc3d5698SJohn Baldwin psrlq $30,%xmm3 3102bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3103bc3d5698SJohn Baldwin psrlq $40,%xmm4 3104bc3d5698SJohn Baldwin psrlq $26,%xmm6 3105bc3d5698SJohn Baldwin pand %xmm7,%xmm5 3106bc3d5698SJohn Baldwin pand %xmm7,%xmm6 3107bc3d5698SJohn Baldwin pand %xmm7,%xmm2 3108bc3d5698SJohn Baldwin pand %xmm7,%xmm3 3109bc3d5698SJohn Baldwin por (%ebx),%xmm4 3110bc3d5698SJohn Baldwin pshufd $16,(%edx),%xmm7 3111bc3d5698SJohn Baldwin paddd 80(%esp),%xmm5 3112bc3d5698SJohn Baldwin paddd 96(%esp),%xmm6 3113bc3d5698SJohn Baldwin paddd 112(%esp),%xmm2 3114bc3d5698SJohn Baldwin paddd 128(%esp),%xmm3 3115bc3d5698SJohn Baldwin paddd 144(%esp),%xmm4 3116bc3d5698SJohn Baldwin movdqa %xmm5,(%esp) 3117bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm5 3118bc3d5698SJohn Baldwin movdqa %xmm6,16(%esp) 3119bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm6 3120bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 3121bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 3122bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm2 3123bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 3124bc3d5698SJohn Baldwin movdqa %xmm3,%xmm6 3125bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm3 3126bc3d5698SJohn Baldwin paddq 32(%esp),%xmm2 3127bc3d5698SJohn Baldwin movdqa %xmm5,32(%esp) 3128bc3d5698SJohn Baldwin pshufd $16,16(%edx),%xmm5 3129bc3d5698SJohn Baldwin paddq 48(%esp),%xmm3 3130bc3d5698SJohn Baldwin movdqa %xmm6,48(%esp) 3131bc3d5698SJohn Baldwin movdqa %xmm4,%xmm6 3132bc3d5698SJohn Baldwin pmuludq %xmm7,%xmm4 3133bc3d5698SJohn Baldwin paddq 64(%esp),%xmm4 3134bc3d5698SJohn Baldwin movdqa %xmm6,64(%esp) 3135bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3136bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 3137bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3138bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm6 3139bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 3140bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3141bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 3142bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 3143bc3d5698SJohn Baldwin pshufd $16,80(%edx),%xmm6 3144bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 3145bc3d5698SJohn Baldwin paddq %xmm7,%xmm2 3146bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 3147bc3d5698SJohn Baldwin pshufd $16,32(%edx),%xmm7 3148bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 3149bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3150bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 3151bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 3152bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3153bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm5 3154bc3d5698SJohn Baldwin paddq %xmm7,%xmm4 3155bc3d5698SJohn Baldwin pshufd $16,96(%edx),%xmm7 3156bc3d5698SJohn Baldwin pmuludq (%esp),%xmm6 3157bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 3158bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3159bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm7 3160bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 3161bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm5 3162bc3d5698SJohn Baldwin pshufd $16,48(%edx),%xmm6 3163bc3d5698SJohn Baldwin paddq %xmm7,%xmm1 3164bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3165bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm6 3166bc3d5698SJohn Baldwin paddq %xmm5,%xmm0 3167bc3d5698SJohn Baldwin pshufd $16,112(%edx),%xmm5 3168bc3d5698SJohn Baldwin pmuludq (%esp),%xmm7 3169bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 3170bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3171bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm5 3172bc3d5698SJohn Baldwin paddq %xmm7,%xmm3 3173bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3174bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 3175bc3d5698SJohn Baldwin paddq %xmm5,%xmm2 3176bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm7 3177bc3d5698SJohn Baldwin pshufd $16,64(%edx),%xmm5 3178bc3d5698SJohn Baldwin paddq %xmm6,%xmm1 3179bc3d5698SJohn Baldwin pshufd $16,128(%edx),%xmm6 3180bc3d5698SJohn Baldwin pmuludq (%esp),%xmm5 3181bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 3182bc3d5698SJohn Baldwin movdqa %xmm6,%xmm7 3183bc3d5698SJohn Baldwin pmuludq 64(%esp),%xmm6 3184bc3d5698SJohn Baldwin paddq %xmm5,%xmm4 3185bc3d5698SJohn Baldwin movdqa %xmm7,%xmm5 3186bc3d5698SJohn Baldwin pmuludq 16(%esp),%xmm7 3187bc3d5698SJohn Baldwin paddq %xmm6,%xmm3 3188bc3d5698SJohn Baldwin movdqa %xmm5,%xmm6 3189bc3d5698SJohn Baldwin pmuludq 32(%esp),%xmm5 3190bc3d5698SJohn Baldwin paddq %xmm7,%xmm0 3191bc3d5698SJohn Baldwin pmuludq 48(%esp),%xmm6 3192bc3d5698SJohn Baldwin movdqa 64(%ebx),%xmm7 3193bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 3194bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 3195bc3d5698SJohn Baldwin.L017short_tail: 3196bc3d5698SJohn Baldwin pshufd $78,%xmm4,%xmm6 3197bc3d5698SJohn Baldwin pshufd $78,%xmm3,%xmm5 3198bc3d5698SJohn Baldwin paddq %xmm6,%xmm4 3199bc3d5698SJohn Baldwin paddq %xmm5,%xmm3 3200bc3d5698SJohn Baldwin pshufd $78,%xmm0,%xmm6 3201bc3d5698SJohn Baldwin pshufd $78,%xmm1,%xmm5 3202bc3d5698SJohn Baldwin paddq %xmm6,%xmm0 3203bc3d5698SJohn Baldwin paddq %xmm5,%xmm1 3204bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm6 3205bc3d5698SJohn Baldwin movdqa %xmm3,%xmm5 3206bc3d5698SJohn Baldwin pand %xmm7,%xmm3 3207bc3d5698SJohn Baldwin psrlq $26,%xmm5 3208bc3d5698SJohn Baldwin paddq %xmm6,%xmm2 3209bc3d5698SJohn Baldwin paddq %xmm4,%xmm5 3210bc3d5698SJohn Baldwin movdqa %xmm0,%xmm6 3211bc3d5698SJohn Baldwin pand %xmm7,%xmm0 3212bc3d5698SJohn Baldwin psrlq $26,%xmm6 3213bc3d5698SJohn Baldwin movdqa %xmm5,%xmm4 3214bc3d5698SJohn Baldwin paddq %xmm1,%xmm6 3215bc3d5698SJohn Baldwin psrlq $26,%xmm5 3216bc3d5698SJohn Baldwin pand %xmm7,%xmm4 3217bc3d5698SJohn Baldwin movdqa %xmm6,%xmm1 3218bc3d5698SJohn Baldwin psrlq $26,%xmm6 3219bc3d5698SJohn Baldwin paddd %xmm5,%xmm0 3220bc3d5698SJohn Baldwin psllq $2,%xmm5 3221bc3d5698SJohn Baldwin paddq %xmm2,%xmm6 3222bc3d5698SJohn Baldwin paddq %xmm0,%xmm5 3223bc3d5698SJohn Baldwin pand %xmm7,%xmm1 3224bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 3225bc3d5698SJohn Baldwin psrlq $26,%xmm6 3226bc3d5698SJohn Baldwin pand %xmm7,%xmm2 3227bc3d5698SJohn Baldwin paddd %xmm3,%xmm6 3228bc3d5698SJohn Baldwin movdqa %xmm5,%xmm0 3229bc3d5698SJohn Baldwin psrlq $26,%xmm5 3230bc3d5698SJohn Baldwin movdqa %xmm6,%xmm3 3231bc3d5698SJohn Baldwin psrlq $26,%xmm6 3232bc3d5698SJohn Baldwin pand %xmm7,%xmm0 3233bc3d5698SJohn Baldwin paddd %xmm5,%xmm1 3234bc3d5698SJohn Baldwin pand %xmm7,%xmm3 3235bc3d5698SJohn Baldwin paddd %xmm6,%xmm4 3236bc3d5698SJohn Baldwin.L013done: 3237bc3d5698SJohn Baldwin movd %xmm0,-48(%edi) 3238bc3d5698SJohn Baldwin movd %xmm1,-44(%edi) 3239bc3d5698SJohn Baldwin movd %xmm2,-40(%edi) 3240bc3d5698SJohn Baldwin movd %xmm3,-36(%edi) 3241bc3d5698SJohn Baldwin movd %xmm4,-32(%edi) 3242bc3d5698SJohn Baldwin movl %ebp,%esp 3243bc3d5698SJohn Baldwin.L007nodata: 3244bc3d5698SJohn Baldwin popl %edi 3245bc3d5698SJohn Baldwin popl %esi 3246bc3d5698SJohn Baldwin popl %ebx 3247bc3d5698SJohn Baldwin popl %ebp 3248bc3d5698SJohn Baldwin ret 3249bc3d5698SJohn Baldwin.size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2 3250bc3d5698SJohn Baldwin.align 32 3251bc3d5698SJohn Baldwin.type _poly1305_emit_sse2,@function 3252bc3d5698SJohn Baldwin.align 16 3253bc3d5698SJohn Baldwin_poly1305_emit_sse2: 3254*c0855eaaSJohn Baldwin #ifdef __CET__ 3255*c0855eaaSJohn Baldwin 3256*c0855eaaSJohn Baldwin.byte 243,15,30,251 3257*c0855eaaSJohn Baldwin #endif 3258*c0855eaaSJohn Baldwin 3259bc3d5698SJohn Baldwin pushl %ebp 3260bc3d5698SJohn Baldwin pushl %ebx 3261bc3d5698SJohn Baldwin pushl %esi 3262bc3d5698SJohn Baldwin pushl %edi 3263bc3d5698SJohn Baldwin movl 20(%esp),%ebp 3264bc3d5698SJohn Baldwin cmpl $0,20(%ebp) 3265bc3d5698SJohn Baldwin je .Lenter_emit 3266bc3d5698SJohn Baldwin movl (%ebp),%eax 3267bc3d5698SJohn Baldwin movl 4(%ebp),%edi 3268bc3d5698SJohn Baldwin movl 8(%ebp),%ecx 3269bc3d5698SJohn Baldwin movl 12(%ebp),%edx 3270bc3d5698SJohn Baldwin movl 16(%ebp),%esi 3271bc3d5698SJohn Baldwin movl %edi,%ebx 3272bc3d5698SJohn Baldwin shll $26,%edi 3273bc3d5698SJohn Baldwin shrl $6,%ebx 3274bc3d5698SJohn Baldwin addl %edi,%eax 3275bc3d5698SJohn Baldwin movl %ecx,%edi 3276bc3d5698SJohn Baldwin adcl $0,%ebx 3277bc3d5698SJohn Baldwin shll $20,%edi 3278bc3d5698SJohn Baldwin shrl $12,%ecx 3279bc3d5698SJohn Baldwin addl %edi,%ebx 3280bc3d5698SJohn Baldwin movl %edx,%edi 3281bc3d5698SJohn Baldwin adcl $0,%ecx 3282bc3d5698SJohn Baldwin shll $14,%edi 3283bc3d5698SJohn Baldwin shrl $18,%edx 3284bc3d5698SJohn Baldwin addl %edi,%ecx 3285bc3d5698SJohn Baldwin movl %esi,%edi 3286bc3d5698SJohn Baldwin adcl $0,%edx 3287bc3d5698SJohn Baldwin shll $8,%edi 3288bc3d5698SJohn Baldwin shrl $24,%esi 3289bc3d5698SJohn Baldwin addl %edi,%edx 3290bc3d5698SJohn Baldwin adcl $0,%esi 3291bc3d5698SJohn Baldwin movl %esi,%edi 3292bc3d5698SJohn Baldwin andl $3,%esi 3293bc3d5698SJohn Baldwin shrl $2,%edi 3294bc3d5698SJohn Baldwin leal (%edi,%edi,4),%ebp 3295bc3d5698SJohn Baldwin movl 24(%esp),%edi 3296bc3d5698SJohn Baldwin addl %ebp,%eax 3297bc3d5698SJohn Baldwin movl 28(%esp),%ebp 3298bc3d5698SJohn Baldwin adcl $0,%ebx 3299bc3d5698SJohn Baldwin adcl $0,%ecx 3300bc3d5698SJohn Baldwin adcl $0,%edx 3301bc3d5698SJohn Baldwin adcl $0,%esi 3302bc3d5698SJohn Baldwin movd %eax,%xmm0 3303bc3d5698SJohn Baldwin addl $5,%eax 3304bc3d5698SJohn Baldwin movd %ebx,%xmm1 3305bc3d5698SJohn Baldwin adcl $0,%ebx 3306bc3d5698SJohn Baldwin movd %ecx,%xmm2 3307bc3d5698SJohn Baldwin adcl $0,%ecx 3308bc3d5698SJohn Baldwin movd %edx,%xmm3 3309bc3d5698SJohn Baldwin adcl $0,%edx 3310bc3d5698SJohn Baldwin adcl $0,%esi 3311bc3d5698SJohn Baldwin shrl $2,%esi 3312bc3d5698SJohn Baldwin negl %esi 3313bc3d5698SJohn Baldwin andl %esi,%eax 3314bc3d5698SJohn Baldwin andl %esi,%ebx 3315bc3d5698SJohn Baldwin andl %esi,%ecx 3316bc3d5698SJohn Baldwin andl %esi,%edx 3317bc3d5698SJohn Baldwin movl %eax,(%edi) 3318bc3d5698SJohn Baldwin movd %xmm0,%eax 3319bc3d5698SJohn Baldwin movl %ebx,4(%edi) 3320bc3d5698SJohn Baldwin movd %xmm1,%ebx 3321bc3d5698SJohn Baldwin movl %ecx,8(%edi) 3322bc3d5698SJohn Baldwin movd %xmm2,%ecx 3323bc3d5698SJohn Baldwin movl %edx,12(%edi) 3324bc3d5698SJohn Baldwin movd %xmm3,%edx 3325bc3d5698SJohn Baldwin notl %esi 3326bc3d5698SJohn Baldwin andl %esi,%eax 3327bc3d5698SJohn Baldwin andl %esi,%ebx 3328bc3d5698SJohn Baldwin orl (%edi),%eax 3329bc3d5698SJohn Baldwin andl %esi,%ecx 3330bc3d5698SJohn Baldwin orl 4(%edi),%ebx 3331bc3d5698SJohn Baldwin andl %esi,%edx 3332bc3d5698SJohn Baldwin orl 8(%edi),%ecx 3333bc3d5698SJohn Baldwin orl 12(%edi),%edx 3334bc3d5698SJohn Baldwin addl (%ebp),%eax 3335bc3d5698SJohn Baldwin adcl 4(%ebp),%ebx 3336bc3d5698SJohn Baldwin movl %eax,(%edi) 3337bc3d5698SJohn Baldwin adcl 8(%ebp),%ecx 3338bc3d5698SJohn Baldwin movl %ebx,4(%edi) 3339bc3d5698SJohn Baldwin adcl 12(%ebp),%edx 3340bc3d5698SJohn Baldwin movl %ecx,8(%edi) 3341bc3d5698SJohn Baldwin movl %edx,12(%edi) 3342bc3d5698SJohn Baldwin popl %edi 3343bc3d5698SJohn Baldwin popl %esi 3344bc3d5698SJohn Baldwin popl %ebx 3345bc3d5698SJohn Baldwin popl %ebp 3346bc3d5698SJohn Baldwin ret 3347bc3d5698SJohn Baldwin.size _poly1305_emit_sse2,.-_poly1305_emit_sse2 3348bc3d5698SJohn Baldwin.align 32 3349bc3d5698SJohn Baldwin.type _poly1305_init_avx2,@function 3350bc3d5698SJohn Baldwin.align 16 3351bc3d5698SJohn Baldwin_poly1305_init_avx2: 3352*c0855eaaSJohn Baldwin #ifdef __CET__ 3353*c0855eaaSJohn Baldwin 3354*c0855eaaSJohn Baldwin.byte 243,15,30,251 3355*c0855eaaSJohn Baldwin #endif 3356*c0855eaaSJohn Baldwin 3357bc3d5698SJohn Baldwin vmovdqu 24(%edi),%xmm4 3358bc3d5698SJohn Baldwin leal 48(%edi),%edi 3359bc3d5698SJohn Baldwin movl %esp,%ebp 3360bc3d5698SJohn Baldwin subl $224,%esp 3361bc3d5698SJohn Baldwin andl $-16,%esp 3362bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 3363bc3d5698SJohn Baldwin vpand %xmm7,%xmm4,%xmm0 3364bc3d5698SJohn Baldwin vpsrlq $26,%xmm4,%xmm1 3365bc3d5698SJohn Baldwin vpsrldq $6,%xmm4,%xmm3 3366bc3d5698SJohn Baldwin vpand %xmm7,%xmm1,%xmm1 3367bc3d5698SJohn Baldwin vpsrlq $4,%xmm3,%xmm2 3368bc3d5698SJohn Baldwin vpsrlq $30,%xmm3,%xmm3 3369bc3d5698SJohn Baldwin vpand %xmm7,%xmm2,%xmm2 3370bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 3371bc3d5698SJohn Baldwin vpsrldq $13,%xmm4,%xmm4 3372bc3d5698SJohn Baldwin leal 144(%esp),%edx 3373bc3d5698SJohn Baldwin movl $2,%ecx 3374bc3d5698SJohn Baldwin.L018square: 3375bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 3376bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 3377bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 3378bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 3379bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%esp) 3380bc3d5698SJohn Baldwin vpslld $2,%xmm1,%xmm6 3381bc3d5698SJohn Baldwin vpslld $2,%xmm2,%xmm5 3382bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm6,%xmm6 3383bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm5,%xmm5 3384bc3d5698SJohn Baldwin vmovdqa %xmm6,80(%esp) 3385bc3d5698SJohn Baldwin vmovdqa %xmm5,96(%esp) 3386bc3d5698SJohn Baldwin vpslld $2,%xmm3,%xmm6 3387bc3d5698SJohn Baldwin vpslld $2,%xmm4,%xmm5 3388bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm6,%xmm6 3389bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm5,%xmm5 3390bc3d5698SJohn Baldwin vmovdqa %xmm6,112(%esp) 3391bc3d5698SJohn Baldwin vmovdqa %xmm5,128(%esp) 3392bc3d5698SJohn Baldwin vpshufd $68,%xmm0,%xmm5 3393bc3d5698SJohn Baldwin vmovdqa %xmm1,%xmm6 3394bc3d5698SJohn Baldwin vpshufd $68,%xmm1,%xmm1 3395bc3d5698SJohn Baldwin vpshufd $68,%xmm2,%xmm2 3396bc3d5698SJohn Baldwin vpshufd $68,%xmm3,%xmm3 3397bc3d5698SJohn Baldwin vpshufd $68,%xmm4,%xmm4 3398bc3d5698SJohn Baldwin vmovdqa %xmm5,(%edx) 3399bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%edx) 3400bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%edx) 3401bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%edx) 3402bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%edx) 3403bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm4,%xmm4 3404bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm3,%xmm3 3405bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm2,%xmm2 3406bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm1,%xmm1 3407bc3d5698SJohn Baldwin vpmuludq %xmm0,%xmm5,%xmm0 3408bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm5 3409bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm4,%xmm4 3410bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm6,%xmm7 3411bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm3,%xmm3 3412bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm6,%xmm5 3413bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 3414bc3d5698SJohn Baldwin vmovdqa 80(%esp),%xmm7 3415bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm6,%xmm6 3416bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 3417bc3d5698SJohn Baldwin vmovdqa 32(%esp),%xmm5 3418bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm7,%xmm7 3419bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm0,%xmm0 3420bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm5,%xmm6 3421bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm4,%xmm4 3422bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 3423bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm3,%xmm3 3424bc3d5698SJohn Baldwin vmovdqa 96(%esp),%xmm6 3425bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm5,%xmm5 3426bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 3427bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm6,%xmm7 3428bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm1,%xmm1 3429bc3d5698SJohn Baldwin vmovdqa 48(%esp),%xmm5 3430bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm6 3431bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm0,%xmm0 3432bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 3433bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm4,%xmm4 3434bc3d5698SJohn Baldwin vmovdqa 112(%esp),%xmm6 3435bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm5,%xmm5 3436bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm3,%xmm3 3437bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm6,%xmm7 3438bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm2,%xmm2 3439bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm6,%xmm5 3440bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm1,%xmm1 3441bc3d5698SJohn Baldwin vmovdqa 64(%esp),%xmm7 3442bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm6,%xmm6 3443bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm0,%xmm0 3444bc3d5698SJohn Baldwin vmovdqa 128(%esp),%xmm5 3445bc3d5698SJohn Baldwin vpmuludq (%edx),%xmm7,%xmm7 3446bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm4,%xmm4 3447bc3d5698SJohn Baldwin vpmuludq 64(%edx),%xmm5,%xmm6 3448bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm3,%xmm3 3449bc3d5698SJohn Baldwin vpmuludq 16(%edx),%xmm5,%xmm7 3450bc3d5698SJohn Baldwin vpaddq %xmm7,%xmm0,%xmm0 3451bc3d5698SJohn Baldwin vpmuludq 32(%edx),%xmm5,%xmm6 3452bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 3453bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 3454bc3d5698SJohn Baldwin vpmuludq 48(%edx),%xmm5,%xmm5 3455bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm2,%xmm2 3456bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm5 3457bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 3458bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm6 3459bc3d5698SJohn Baldwin vpand %xmm7,%xmm0,%xmm0 3460bc3d5698SJohn Baldwin vpaddq %xmm5,%xmm4,%xmm4 3461bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm1,%xmm1 3462bc3d5698SJohn Baldwin vpsrlq $26,%xmm4,%xmm5 3463bc3d5698SJohn Baldwin vpand %xmm7,%xmm4,%xmm4 3464bc3d5698SJohn Baldwin vpsrlq $26,%xmm1,%xmm6 3465bc3d5698SJohn Baldwin vpand %xmm7,%xmm1,%xmm1 3466bc3d5698SJohn Baldwin vpaddq %xmm6,%xmm2,%xmm2 3467bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm0,%xmm0 3468bc3d5698SJohn Baldwin vpsllq $2,%xmm5,%xmm5 3469bc3d5698SJohn Baldwin vpsrlq $26,%xmm2,%xmm6 3470bc3d5698SJohn Baldwin vpand %xmm7,%xmm2,%xmm2 3471bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm0,%xmm0 3472bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm3,%xmm3 3473bc3d5698SJohn Baldwin vpsrlq $26,%xmm3,%xmm6 3474bc3d5698SJohn Baldwin vpsrlq $26,%xmm0,%xmm5 3475bc3d5698SJohn Baldwin vpand %xmm7,%xmm0,%xmm0 3476bc3d5698SJohn Baldwin vpand %xmm7,%xmm3,%xmm3 3477bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm1,%xmm1 3478bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 3479bc3d5698SJohn Baldwin decl %ecx 3480bc3d5698SJohn Baldwin jz .L019square_break 3481bc3d5698SJohn Baldwin vpunpcklqdq (%esp),%xmm0,%xmm0 3482bc3d5698SJohn Baldwin vpunpcklqdq 16(%esp),%xmm1,%xmm1 3483bc3d5698SJohn Baldwin vpunpcklqdq 32(%esp),%xmm2,%xmm2 3484bc3d5698SJohn Baldwin vpunpcklqdq 48(%esp),%xmm3,%xmm3 3485bc3d5698SJohn Baldwin vpunpcklqdq 64(%esp),%xmm4,%xmm4 3486bc3d5698SJohn Baldwin jmp .L018square 3487bc3d5698SJohn Baldwin.L019square_break: 3488bc3d5698SJohn Baldwin vpsllq $32,%xmm0,%xmm0 3489bc3d5698SJohn Baldwin vpsllq $32,%xmm1,%xmm1 3490bc3d5698SJohn Baldwin vpsllq $32,%xmm2,%xmm2 3491bc3d5698SJohn Baldwin vpsllq $32,%xmm3,%xmm3 3492bc3d5698SJohn Baldwin vpsllq $32,%xmm4,%xmm4 3493bc3d5698SJohn Baldwin vpor (%esp),%xmm0,%xmm0 3494bc3d5698SJohn Baldwin vpor 16(%esp),%xmm1,%xmm1 3495bc3d5698SJohn Baldwin vpor 32(%esp),%xmm2,%xmm2 3496bc3d5698SJohn Baldwin vpor 48(%esp),%xmm3,%xmm3 3497bc3d5698SJohn Baldwin vpor 64(%esp),%xmm4,%xmm4 3498bc3d5698SJohn Baldwin vpshufd $141,%xmm0,%xmm0 3499bc3d5698SJohn Baldwin vpshufd $141,%xmm1,%xmm1 3500bc3d5698SJohn Baldwin vpshufd $141,%xmm2,%xmm2 3501bc3d5698SJohn Baldwin vpshufd $141,%xmm3,%xmm3 3502bc3d5698SJohn Baldwin vpshufd $141,%xmm4,%xmm4 3503bc3d5698SJohn Baldwin vmovdqu %xmm0,(%edi) 3504bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%edi) 3505bc3d5698SJohn Baldwin vmovdqu %xmm2,32(%edi) 3506bc3d5698SJohn Baldwin vmovdqu %xmm3,48(%edi) 3507bc3d5698SJohn Baldwin vmovdqu %xmm4,64(%edi) 3508bc3d5698SJohn Baldwin vpslld $2,%xmm1,%xmm6 3509bc3d5698SJohn Baldwin vpslld $2,%xmm2,%xmm5 3510bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm6,%xmm6 3511bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm5,%xmm5 3512bc3d5698SJohn Baldwin vmovdqu %xmm6,80(%edi) 3513bc3d5698SJohn Baldwin vmovdqu %xmm5,96(%edi) 3514bc3d5698SJohn Baldwin vpslld $2,%xmm3,%xmm6 3515bc3d5698SJohn Baldwin vpslld $2,%xmm4,%xmm5 3516bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm6,%xmm6 3517bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm5,%xmm5 3518bc3d5698SJohn Baldwin vmovdqu %xmm6,112(%edi) 3519bc3d5698SJohn Baldwin vmovdqu %xmm5,128(%edi) 3520bc3d5698SJohn Baldwin movl %ebp,%esp 3521bc3d5698SJohn Baldwin leal -48(%edi),%edi 3522bc3d5698SJohn Baldwin ret 3523bc3d5698SJohn Baldwin.size _poly1305_init_avx2,.-_poly1305_init_avx2 3524bc3d5698SJohn Baldwin.align 32 3525bc3d5698SJohn Baldwin.type _poly1305_blocks_avx2,@function 3526bc3d5698SJohn Baldwin.align 16 3527bc3d5698SJohn Baldwin_poly1305_blocks_avx2: 3528*c0855eaaSJohn Baldwin #ifdef __CET__ 3529*c0855eaaSJohn Baldwin 3530*c0855eaaSJohn Baldwin.byte 243,15,30,251 3531*c0855eaaSJohn Baldwin #endif 3532*c0855eaaSJohn Baldwin 3533bc3d5698SJohn Baldwin pushl %ebp 3534bc3d5698SJohn Baldwin pushl %ebx 3535bc3d5698SJohn Baldwin pushl %esi 3536bc3d5698SJohn Baldwin pushl %edi 3537bc3d5698SJohn Baldwin movl 20(%esp),%edi 3538bc3d5698SJohn Baldwin movl 24(%esp),%esi 3539bc3d5698SJohn Baldwin movl 28(%esp),%ecx 3540bc3d5698SJohn Baldwin movl 20(%edi),%eax 3541bc3d5698SJohn Baldwin andl $-16,%ecx 3542bc3d5698SJohn Baldwin jz .L020nodata 3543bc3d5698SJohn Baldwin cmpl $64,%ecx 3544bc3d5698SJohn Baldwin jae .L021enter_avx2 3545bc3d5698SJohn Baldwin testl %eax,%eax 3546bc3d5698SJohn Baldwin jz .Lenter_blocks 3547bc3d5698SJohn Baldwin.L021enter_avx2: 3548bc3d5698SJohn Baldwin vzeroupper 3549bc3d5698SJohn Baldwin call .L022pic_point 3550bc3d5698SJohn Baldwin.L022pic_point: 3551bc3d5698SJohn Baldwin popl %ebx 3552bc3d5698SJohn Baldwin leal .Lconst_sse2-.L022pic_point(%ebx),%ebx 3553bc3d5698SJohn Baldwin testl %eax,%eax 3554bc3d5698SJohn Baldwin jnz .L023base2_26 3555bc3d5698SJohn Baldwin call _poly1305_init_avx2 3556bc3d5698SJohn Baldwin movl (%edi),%eax 3557bc3d5698SJohn Baldwin movl 3(%edi),%ecx 3558bc3d5698SJohn Baldwin movl 6(%edi),%edx 3559bc3d5698SJohn Baldwin movl 9(%edi),%esi 3560bc3d5698SJohn Baldwin movl 13(%edi),%ebp 3561bc3d5698SJohn Baldwin shrl $2,%ecx 3562bc3d5698SJohn Baldwin andl $67108863,%eax 3563bc3d5698SJohn Baldwin shrl $4,%edx 3564bc3d5698SJohn Baldwin andl $67108863,%ecx 3565bc3d5698SJohn Baldwin shrl $6,%esi 3566bc3d5698SJohn Baldwin andl $67108863,%edx 3567bc3d5698SJohn Baldwin movl %eax,(%edi) 3568bc3d5698SJohn Baldwin movl %ecx,4(%edi) 3569bc3d5698SJohn Baldwin movl %edx,8(%edi) 3570bc3d5698SJohn Baldwin movl %esi,12(%edi) 3571bc3d5698SJohn Baldwin movl %ebp,16(%edi) 3572bc3d5698SJohn Baldwin movl $1,20(%edi) 3573bc3d5698SJohn Baldwin movl 24(%esp),%esi 3574bc3d5698SJohn Baldwin movl 28(%esp),%ecx 3575bc3d5698SJohn Baldwin.L023base2_26: 3576bc3d5698SJohn Baldwin movl 32(%esp),%eax 3577bc3d5698SJohn Baldwin movl %esp,%ebp 3578bc3d5698SJohn Baldwin subl $448,%esp 3579bc3d5698SJohn Baldwin andl $-512,%esp 3580bc3d5698SJohn Baldwin vmovdqu 48(%edi),%xmm0 3581bc3d5698SJohn Baldwin leal 288(%esp),%edx 3582bc3d5698SJohn Baldwin vmovdqu 64(%edi),%xmm1 3583bc3d5698SJohn Baldwin vmovdqu 80(%edi),%xmm2 3584bc3d5698SJohn Baldwin vmovdqu 96(%edi),%xmm3 3585bc3d5698SJohn Baldwin vmovdqu 112(%edi),%xmm4 3586bc3d5698SJohn Baldwin leal 48(%edi),%edi 3587bc3d5698SJohn Baldwin vpermq $64,%ymm0,%ymm0 3588bc3d5698SJohn Baldwin vpermq $64,%ymm1,%ymm1 3589bc3d5698SJohn Baldwin vpermq $64,%ymm2,%ymm2 3590bc3d5698SJohn Baldwin vpermq $64,%ymm3,%ymm3 3591bc3d5698SJohn Baldwin vpermq $64,%ymm4,%ymm4 3592bc3d5698SJohn Baldwin vpshufd $200,%ymm0,%ymm0 3593bc3d5698SJohn Baldwin vpshufd $200,%ymm1,%ymm1 3594bc3d5698SJohn Baldwin vpshufd $200,%ymm2,%ymm2 3595bc3d5698SJohn Baldwin vpshufd $200,%ymm3,%ymm3 3596bc3d5698SJohn Baldwin vpshufd $200,%ymm4,%ymm4 3597bc3d5698SJohn Baldwin vmovdqa %ymm0,-128(%edx) 3598bc3d5698SJohn Baldwin vmovdqu 80(%edi),%xmm0 3599bc3d5698SJohn Baldwin vmovdqa %ymm1,-96(%edx) 3600bc3d5698SJohn Baldwin vmovdqu 96(%edi),%xmm1 3601bc3d5698SJohn Baldwin vmovdqa %ymm2,-64(%edx) 3602bc3d5698SJohn Baldwin vmovdqu 112(%edi),%xmm2 3603bc3d5698SJohn Baldwin vmovdqa %ymm3,-32(%edx) 3604bc3d5698SJohn Baldwin vmovdqu 128(%edi),%xmm3 3605bc3d5698SJohn Baldwin vmovdqa %ymm4,(%edx) 3606bc3d5698SJohn Baldwin vpermq $64,%ymm0,%ymm0 3607bc3d5698SJohn Baldwin vpermq $64,%ymm1,%ymm1 3608bc3d5698SJohn Baldwin vpermq $64,%ymm2,%ymm2 3609bc3d5698SJohn Baldwin vpermq $64,%ymm3,%ymm3 3610bc3d5698SJohn Baldwin vpshufd $200,%ymm0,%ymm0 3611bc3d5698SJohn Baldwin vpshufd $200,%ymm1,%ymm1 3612bc3d5698SJohn Baldwin vpshufd $200,%ymm2,%ymm2 3613bc3d5698SJohn Baldwin vpshufd $200,%ymm3,%ymm3 3614bc3d5698SJohn Baldwin vmovdqa %ymm0,32(%edx) 3615bc3d5698SJohn Baldwin vmovd -48(%edi),%xmm0 3616bc3d5698SJohn Baldwin vmovdqa %ymm1,64(%edx) 3617bc3d5698SJohn Baldwin vmovd -44(%edi),%xmm1 3618bc3d5698SJohn Baldwin vmovdqa %ymm2,96(%edx) 3619bc3d5698SJohn Baldwin vmovd -40(%edi),%xmm2 3620bc3d5698SJohn Baldwin vmovdqa %ymm3,128(%edx) 3621bc3d5698SJohn Baldwin vmovd -36(%edi),%xmm3 3622bc3d5698SJohn Baldwin vmovd -32(%edi),%xmm4 3623bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 3624bc3d5698SJohn Baldwin negl %eax 3625bc3d5698SJohn Baldwin testl $63,%ecx 3626bc3d5698SJohn Baldwin jz .L024even 3627bc3d5698SJohn Baldwin movl %ecx,%edx 3628bc3d5698SJohn Baldwin andl $-64,%ecx 3629bc3d5698SJohn Baldwin andl $63,%edx 3630bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 3631bc3d5698SJohn Baldwin cmpl $32,%edx 3632bc3d5698SJohn Baldwin jb .L025one 3633bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 3634bc3d5698SJohn Baldwin je .L026two 3635bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 3636bc3d5698SJohn Baldwin leal 48(%esi),%esi 3637bc3d5698SJohn Baldwin leal 8(%ebx),%ebx 3638bc3d5698SJohn Baldwin leal 296(%esp),%edx 3639bc3d5698SJohn Baldwin jmp .L027tail 3640bc3d5698SJohn Baldwin.L026two: 3641bc3d5698SJohn Baldwin leal 32(%esi),%esi 3642bc3d5698SJohn Baldwin leal 16(%ebx),%ebx 3643bc3d5698SJohn Baldwin leal 304(%esp),%edx 3644bc3d5698SJohn Baldwin jmp .L027tail 3645bc3d5698SJohn Baldwin.L025one: 3646bc3d5698SJohn Baldwin leal 16(%esi),%esi 3647bc3d5698SJohn Baldwin vpxor %ymm6,%ymm6,%ymm6 3648bc3d5698SJohn Baldwin leal 32(%ebx,%eax,8),%ebx 3649bc3d5698SJohn Baldwin leal 312(%esp),%edx 3650bc3d5698SJohn Baldwin jmp .L027tail 3651bc3d5698SJohn Baldwin.align 32 3652bc3d5698SJohn Baldwin.L024even: 3653bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 3654bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 3655bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 3656bc3d5698SJohn Baldwin vinserti128 $1,48(%esi),%ymm6,%ymm6 3657bc3d5698SJohn Baldwin leal 64(%esi),%esi 3658bc3d5698SJohn Baldwin subl $64,%ecx 3659bc3d5698SJohn Baldwin jz .L027tail 3660bc3d5698SJohn Baldwin.L028loop: 3661bc3d5698SJohn Baldwin vmovdqa %ymm2,64(%esp) 3662bc3d5698SJohn Baldwin vpsrldq $6,%ymm5,%ymm2 3663bc3d5698SJohn Baldwin vmovdqa %ymm0,(%esp) 3664bc3d5698SJohn Baldwin vpsrldq $6,%ymm6,%ymm0 3665bc3d5698SJohn Baldwin vmovdqa %ymm1,32(%esp) 3666bc3d5698SJohn Baldwin vpunpckhqdq %ymm6,%ymm5,%ymm1 3667bc3d5698SJohn Baldwin vpunpcklqdq %ymm6,%ymm5,%ymm5 3668bc3d5698SJohn Baldwin vpunpcklqdq %ymm0,%ymm2,%ymm2 3669bc3d5698SJohn Baldwin vpsrlq $30,%ymm2,%ymm0 3670bc3d5698SJohn Baldwin vpsrlq $4,%ymm2,%ymm2 3671bc3d5698SJohn Baldwin vpsrlq $26,%ymm5,%ymm6 3672bc3d5698SJohn Baldwin vpsrlq $40,%ymm1,%ymm1 3673bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 3674bc3d5698SJohn Baldwin vpand %ymm7,%ymm5,%ymm5 3675bc3d5698SJohn Baldwin vpand %ymm7,%ymm6,%ymm6 3676bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3677bc3d5698SJohn Baldwin vpor (%ebx),%ymm1,%ymm1 3678bc3d5698SJohn Baldwin vpaddq 64(%esp),%ymm2,%ymm2 3679bc3d5698SJohn Baldwin vpaddq (%esp),%ymm5,%ymm5 3680bc3d5698SJohn Baldwin vpaddq 32(%esp),%ymm6,%ymm6 3681bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm0,%ymm0 3682bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm1,%ymm1 3683bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm2,%ymm3 3684bc3d5698SJohn Baldwin vmovdqa %ymm6,32(%esp) 3685bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm2,%ymm4 3686bc3d5698SJohn Baldwin vmovdqa %ymm0,96(%esp) 3687bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm2,%ymm0 3688bc3d5698SJohn Baldwin vmovdqa %ymm1,128(%esp) 3689bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm2,%ymm1 3690bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm2,%ymm2 3691bc3d5698SJohn Baldwin vpmuludq -32(%edx),%ymm5,%ymm7 3692bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 3693bc3d5698SJohn Baldwin vpmuludq (%edx),%ymm5,%ymm6 3694bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 3695bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm5,%ymm7 3696bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 3697bc3d5698SJohn Baldwin vmovdqa 32(%esp),%ymm7 3698bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm5,%ymm6 3699bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3700bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm5,%ymm5 3701bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 3702bc3d5698SJohn Baldwin vpmuludq -64(%edx),%ymm7,%ymm6 3703bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 3704bc3d5698SJohn Baldwin vpmuludq -32(%edx),%ymm7,%ymm5 3705bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 3706bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm7,%ymm6 3707bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 3708bc3d5698SJohn Baldwin vmovdqa 96(%esp),%ymm6 3709bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm7,%ymm5 3710bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 3711bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm7,%ymm7 3712bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm2,%ymm2 3713bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm6,%ymm5 3714bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 3715bc3d5698SJohn Baldwin vpmuludq -96(%edx),%ymm6,%ymm7 3716bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 3717bc3d5698SJohn Baldwin vpmuludq 64(%edx),%ymm6,%ymm5 3718bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3719bc3d5698SJohn Baldwin vmovdqa 128(%esp),%ymm5 3720bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm6,%ymm7 3721bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm1,%ymm1 3722bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm6,%ymm6 3723bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 3724bc3d5698SJohn Baldwin vpmuludq 128(%edx),%ymm5,%ymm7 3725bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 3726bc3d5698SJohn Baldwin vpmuludq 32(%edx),%ymm5,%ymm6 3727bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 3728bc3d5698SJohn Baldwin vpmuludq -128(%edx),%ymm5,%ymm7 3729bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 3730bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 3731bc3d5698SJohn Baldwin vpmuludq 64(%edx),%ymm5,%ymm6 3732bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3733bc3d5698SJohn Baldwin vpmuludq 96(%edx),%ymm5,%ymm5 3734bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 3735bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm5 3736bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 3737bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm6 3738bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3739bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 3740bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3741bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm5 3742bc3d5698SJohn Baldwin vpand %ymm7,%ymm4,%ymm4 3743bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm6 3744bc3d5698SJohn Baldwin vpand %ymm7,%ymm1,%ymm1 3745bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 3746bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3747bc3d5698SJohn Baldwin vpsllq $2,%ymm5,%ymm5 3748bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm6 3749bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 3750bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3751bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 3752bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm6 3753bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm5 3754bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3755bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 3756bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 3757bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 3758bc3d5698SJohn Baldwin vmovdqu (%esi),%xmm5 3759bc3d5698SJohn Baldwin vmovdqu 16(%esi),%xmm6 3760bc3d5698SJohn Baldwin vinserti128 $1,32(%esi),%ymm5,%ymm5 3761bc3d5698SJohn Baldwin vinserti128 $1,48(%esi),%ymm6,%ymm6 3762bc3d5698SJohn Baldwin leal 64(%esi),%esi 3763bc3d5698SJohn Baldwin subl $64,%ecx 3764bc3d5698SJohn Baldwin jnz .L028loop 3765bc3d5698SJohn Baldwin.L027tail: 3766bc3d5698SJohn Baldwin vmovdqa %ymm2,64(%esp) 3767bc3d5698SJohn Baldwin vpsrldq $6,%ymm5,%ymm2 3768bc3d5698SJohn Baldwin vmovdqa %ymm0,(%esp) 3769bc3d5698SJohn Baldwin vpsrldq $6,%ymm6,%ymm0 3770bc3d5698SJohn Baldwin vmovdqa %ymm1,32(%esp) 3771bc3d5698SJohn Baldwin vpunpckhqdq %ymm6,%ymm5,%ymm1 3772bc3d5698SJohn Baldwin vpunpcklqdq %ymm6,%ymm5,%ymm5 3773bc3d5698SJohn Baldwin vpunpcklqdq %ymm0,%ymm2,%ymm2 3774bc3d5698SJohn Baldwin vpsrlq $30,%ymm2,%ymm0 3775bc3d5698SJohn Baldwin vpsrlq $4,%ymm2,%ymm2 3776bc3d5698SJohn Baldwin vpsrlq $26,%ymm5,%ymm6 3777bc3d5698SJohn Baldwin vpsrlq $40,%ymm1,%ymm1 3778bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 3779bc3d5698SJohn Baldwin vpand %ymm7,%ymm5,%ymm5 3780bc3d5698SJohn Baldwin vpand %ymm7,%ymm6,%ymm6 3781bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3782bc3d5698SJohn Baldwin vpor (%ebx),%ymm1,%ymm1 3783bc3d5698SJohn Baldwin andl $-64,%ebx 3784bc3d5698SJohn Baldwin vpaddq 64(%esp),%ymm2,%ymm2 3785bc3d5698SJohn Baldwin vpaddq (%esp),%ymm5,%ymm5 3786bc3d5698SJohn Baldwin vpaddq 32(%esp),%ymm6,%ymm6 3787bc3d5698SJohn Baldwin vpaddq %ymm3,%ymm0,%ymm0 3788bc3d5698SJohn Baldwin vpaddq %ymm4,%ymm1,%ymm1 3789bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm2,%ymm3 3790bc3d5698SJohn Baldwin vmovdqa %ymm6,32(%esp) 3791bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm2,%ymm4 3792bc3d5698SJohn Baldwin vmovdqa %ymm0,96(%esp) 3793bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm2,%ymm0 3794bc3d5698SJohn Baldwin vmovdqa %ymm1,128(%esp) 3795bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm2,%ymm1 3796bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm2,%ymm2 3797bc3d5698SJohn Baldwin vpmuludq -28(%edx),%ymm5,%ymm7 3798bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 3799bc3d5698SJohn Baldwin vpmuludq 4(%edx),%ymm5,%ymm6 3800bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 3801bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm5,%ymm7 3802bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm0,%ymm0 3803bc3d5698SJohn Baldwin vmovdqa 32(%esp),%ymm7 3804bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm5,%ymm6 3805bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3806bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm5,%ymm5 3807bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 3808bc3d5698SJohn Baldwin vpmuludq -60(%edx),%ymm7,%ymm6 3809bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 3810bc3d5698SJohn Baldwin vpmuludq -28(%edx),%ymm7,%ymm5 3811bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 3812bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm7,%ymm6 3813bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 3814bc3d5698SJohn Baldwin vmovdqa 96(%esp),%ymm6 3815bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm7,%ymm5 3816bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 3817bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm7,%ymm7 3818bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm2,%ymm2 3819bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm6,%ymm5 3820bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 3821bc3d5698SJohn Baldwin vpmuludq -92(%edx),%ymm6,%ymm7 3822bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 3823bc3d5698SJohn Baldwin vpmuludq 68(%edx),%ymm6,%ymm5 3824bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3825bc3d5698SJohn Baldwin vmovdqa 128(%esp),%ymm5 3826bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm6,%ymm7 3827bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm1,%ymm1 3828bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm6,%ymm6 3829bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 3830bc3d5698SJohn Baldwin vpmuludq 132(%edx),%ymm5,%ymm7 3831bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm3,%ymm3 3832bc3d5698SJohn Baldwin vpmuludq 36(%edx),%ymm5,%ymm6 3833bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 3834bc3d5698SJohn Baldwin vpmuludq -124(%edx),%ymm5,%ymm7 3835bc3d5698SJohn Baldwin vpaddq %ymm7,%ymm4,%ymm4 3836bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%ymm7 3837bc3d5698SJohn Baldwin vpmuludq 68(%edx),%ymm5,%ymm6 3838bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3839bc3d5698SJohn Baldwin vpmuludq 100(%edx),%ymm5,%ymm5 3840bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 3841bc3d5698SJohn Baldwin vpsrldq $8,%ymm4,%ymm5 3842bc3d5698SJohn Baldwin vpsrldq $8,%ymm3,%ymm6 3843bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 3844bc3d5698SJohn Baldwin vpsrldq $8,%ymm0,%ymm5 3845bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 3846bc3d5698SJohn Baldwin vpsrldq $8,%ymm1,%ymm6 3847bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3848bc3d5698SJohn Baldwin vpsrldq $8,%ymm2,%ymm5 3849bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3850bc3d5698SJohn Baldwin vpermq $2,%ymm4,%ymm6 3851bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm2,%ymm2 3852bc3d5698SJohn Baldwin vpermq $2,%ymm3,%ymm5 3853bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 3854bc3d5698SJohn Baldwin vpermq $2,%ymm0,%ymm6 3855bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm3,%ymm3 3856bc3d5698SJohn Baldwin vpermq $2,%ymm1,%ymm5 3857bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm0,%ymm0 3858bc3d5698SJohn Baldwin vpermq $2,%ymm2,%ymm6 3859bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 3860bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 3861bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm5 3862bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 3863bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm6 3864bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3865bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm4,%ymm4 3866bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm1,%ymm1 3867bc3d5698SJohn Baldwin vpsrlq $26,%ymm4,%ymm5 3868bc3d5698SJohn Baldwin vpand %ymm7,%ymm4,%ymm4 3869bc3d5698SJohn Baldwin vpsrlq $26,%ymm1,%ymm6 3870bc3d5698SJohn Baldwin vpand %ymm7,%ymm1,%ymm1 3871bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm2,%ymm2 3872bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3873bc3d5698SJohn Baldwin vpsllq $2,%ymm5,%ymm5 3874bc3d5698SJohn Baldwin vpsrlq $26,%ymm2,%ymm6 3875bc3d5698SJohn Baldwin vpand %ymm7,%ymm2,%ymm2 3876bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm0,%ymm0 3877bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm3,%ymm3 3878bc3d5698SJohn Baldwin vpsrlq $26,%ymm3,%ymm6 3879bc3d5698SJohn Baldwin vpsrlq $26,%ymm0,%ymm5 3880bc3d5698SJohn Baldwin vpand %ymm7,%ymm0,%ymm0 3881bc3d5698SJohn Baldwin vpand %ymm7,%ymm3,%ymm3 3882bc3d5698SJohn Baldwin vpaddq %ymm5,%ymm1,%ymm1 3883bc3d5698SJohn Baldwin vpaddq %ymm6,%ymm4,%ymm4 3884bc3d5698SJohn Baldwin cmpl $0,%ecx 3885bc3d5698SJohn Baldwin je .L029done 3886bc3d5698SJohn Baldwin vpshufd $252,%xmm0,%xmm0 3887bc3d5698SJohn Baldwin leal 288(%esp),%edx 3888bc3d5698SJohn Baldwin vpshufd $252,%xmm1,%xmm1 3889bc3d5698SJohn Baldwin vpshufd $252,%xmm2,%xmm2 3890bc3d5698SJohn Baldwin vpshufd $252,%xmm3,%xmm3 3891bc3d5698SJohn Baldwin vpshufd $252,%xmm4,%xmm4 3892bc3d5698SJohn Baldwin jmp .L024even 3893bc3d5698SJohn Baldwin.align 16 3894bc3d5698SJohn Baldwin.L029done: 3895bc3d5698SJohn Baldwin vmovd %xmm0,-48(%edi) 3896bc3d5698SJohn Baldwin vmovd %xmm1,-44(%edi) 3897bc3d5698SJohn Baldwin vmovd %xmm2,-40(%edi) 3898bc3d5698SJohn Baldwin vmovd %xmm3,-36(%edi) 3899bc3d5698SJohn Baldwin vmovd %xmm4,-32(%edi) 3900bc3d5698SJohn Baldwin vzeroupper 3901bc3d5698SJohn Baldwin movl %ebp,%esp 3902bc3d5698SJohn Baldwin.L020nodata: 3903bc3d5698SJohn Baldwin popl %edi 3904bc3d5698SJohn Baldwin popl %esi 3905bc3d5698SJohn Baldwin popl %ebx 3906bc3d5698SJohn Baldwin popl %ebp 3907bc3d5698SJohn Baldwin ret 3908bc3d5698SJohn Baldwin.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2 3909bc3d5698SJohn Baldwin.align 64 3910bc3d5698SJohn Baldwin.Lconst_sse2: 3911bc3d5698SJohn Baldwin.long 16777216,0,16777216,0,16777216,0,16777216,0 3912bc3d5698SJohn Baldwin.long 0,0,0,0,0,0,0,0 3913bc3d5698SJohn Baldwin.long 67108863,0,67108863,0,67108863,0,67108863,0 3914bc3d5698SJohn Baldwin.long 268435455,268435452,268435452,268435452 3915bc3d5698SJohn Baldwin.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 3916bc3d5698SJohn Baldwin.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 3917bc3d5698SJohn Baldwin.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 3918bc3d5698SJohn Baldwin.byte 114,103,62,0 3919bc3d5698SJohn Baldwin.align 4 3920bc3d5698SJohn Baldwin.comm OPENSSL_ia32cap_P,16,4 3921*c0855eaaSJohn Baldwin 3922*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 3923*c0855eaaSJohn Baldwin .p2align 2 3924*c0855eaaSJohn Baldwin .long 1f - 0f 3925*c0855eaaSJohn Baldwin .long 4f - 1f 3926*c0855eaaSJohn Baldwin .long 5 3927*c0855eaaSJohn Baldwin0: 3928*c0855eaaSJohn Baldwin .asciz "GNU" 3929*c0855eaaSJohn Baldwin1: 3930*c0855eaaSJohn Baldwin .p2align 2 3931*c0855eaaSJohn Baldwin .long 0xc0000002 3932*c0855eaaSJohn Baldwin .long 3f - 2f 3933*c0855eaaSJohn Baldwin2: 3934*c0855eaaSJohn Baldwin .long 3 3935*c0855eaaSJohn Baldwin3: 3936*c0855eaaSJohn Baldwin .p2align 2 3937*c0855eaaSJohn Baldwin4: 3938bc3d5698SJohn Baldwin#endif 3939