1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from chacha-x86_64.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin 5bc3d5698SJohn Baldwin 6bc3d5698SJohn Baldwin.align 64 7bc3d5698SJohn Baldwin.Lzero: 8bc3d5698SJohn Baldwin.long 0,0,0,0 9bc3d5698SJohn Baldwin.Lone: 10bc3d5698SJohn Baldwin.long 1,0,0,0 11bc3d5698SJohn Baldwin.Linc: 12bc3d5698SJohn Baldwin.long 0,1,2,3 13bc3d5698SJohn Baldwin.Lfour: 14bc3d5698SJohn Baldwin.long 4,4,4,4 15bc3d5698SJohn Baldwin.Lincy: 16bc3d5698SJohn Baldwin.long 0,2,4,6,1,3,5,7 17bc3d5698SJohn Baldwin.Leight: 18bc3d5698SJohn Baldwin.long 8,8,8,8,8,8,8,8 19bc3d5698SJohn Baldwin.Lrot16: 20bc3d5698SJohn Baldwin.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd 21bc3d5698SJohn Baldwin.Lrot24: 22bc3d5698SJohn Baldwin.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe 23bc3d5698SJohn Baldwin.Ltwoy: 24bc3d5698SJohn Baldwin.long 2,0,0,0, 2,0,0,0 25bc3d5698SJohn Baldwin.align 64 26bc3d5698SJohn Baldwin.Lzeroz: 27bc3d5698SJohn Baldwin.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 28bc3d5698SJohn Baldwin.Lfourz: 29bc3d5698SJohn Baldwin.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 30bc3d5698SJohn Baldwin.Lincz: 31bc3d5698SJohn Baldwin.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 32bc3d5698SJohn Baldwin.Lsixteen: 33bc3d5698SJohn Baldwin.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 34bc3d5698SJohn Baldwin.Lsigma: 35bc3d5698SJohn Baldwin.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 36bc3d5698SJohn Baldwin.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 37bc3d5698SJohn Baldwin.globl ChaCha20_ctr32 38bc3d5698SJohn Baldwin.type ChaCha20_ctr32,@function 39bc3d5698SJohn Baldwin.align 64 40bc3d5698SJohn BaldwinChaCha20_ctr32: 41bc3d5698SJohn Baldwin.cfi_startproc 42bc3d5698SJohn Baldwin cmpq $0,%rdx 43bc3d5698SJohn Baldwin je .Lno_data 44bc3d5698SJohn Baldwin movq OPENSSL_ia32cap_P+4(%rip),%r10 45bc3d5698SJohn Baldwin testl $512,%r10d 46bc3d5698SJohn Baldwin jnz .LChaCha20_ssse3 47bc3d5698SJohn Baldwin 48bc3d5698SJohn Baldwin pushq %rbx 49bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 50bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 51bc3d5698SJohn Baldwin pushq %rbp 52bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 53bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 54bc3d5698SJohn Baldwin pushq %r12 55bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 56bc3d5698SJohn Baldwin.cfi_offset %r12,-32 57bc3d5698SJohn Baldwin pushq %r13 58bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 59bc3d5698SJohn Baldwin.cfi_offset %r13,-40 60bc3d5698SJohn Baldwin pushq %r14 61bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 62bc3d5698SJohn Baldwin.cfi_offset %r14,-48 63bc3d5698SJohn Baldwin pushq %r15 64bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 65bc3d5698SJohn Baldwin.cfi_offset %r15,-56 66bc3d5698SJohn Baldwin subq $64+24,%rsp 67bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 64+24 68bc3d5698SJohn Baldwin.Lctr32_body: 69bc3d5698SJohn Baldwin 70bc3d5698SJohn Baldwin 71bc3d5698SJohn Baldwin movdqu (%rcx),%xmm1 72bc3d5698SJohn Baldwin movdqu 16(%rcx),%xmm2 73bc3d5698SJohn Baldwin movdqu (%r8),%xmm3 74bc3d5698SJohn Baldwin movdqa .Lone(%rip),%xmm4 75bc3d5698SJohn Baldwin 76bc3d5698SJohn Baldwin 77bc3d5698SJohn Baldwin movdqa %xmm1,16(%rsp) 78bc3d5698SJohn Baldwin movdqa %xmm2,32(%rsp) 79bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 80bc3d5698SJohn Baldwin movq %rdx,%rbp 81bc3d5698SJohn Baldwin jmp .Loop_outer 82bc3d5698SJohn Baldwin 83bc3d5698SJohn Baldwin.align 32 84bc3d5698SJohn Baldwin.Loop_outer: 85bc3d5698SJohn Baldwin movl $0x61707865,%eax 86bc3d5698SJohn Baldwin movl $0x3320646e,%ebx 87bc3d5698SJohn Baldwin movl $0x79622d32,%ecx 88bc3d5698SJohn Baldwin movl $0x6b206574,%edx 89bc3d5698SJohn Baldwin movl 16(%rsp),%r8d 90bc3d5698SJohn Baldwin movl 20(%rsp),%r9d 91bc3d5698SJohn Baldwin movl 24(%rsp),%r10d 92bc3d5698SJohn Baldwin movl 28(%rsp),%r11d 93bc3d5698SJohn Baldwin movd %xmm3,%r12d 94bc3d5698SJohn Baldwin movl 52(%rsp),%r13d 95bc3d5698SJohn Baldwin movl 56(%rsp),%r14d 96bc3d5698SJohn Baldwin movl 60(%rsp),%r15d 97bc3d5698SJohn Baldwin 98bc3d5698SJohn Baldwin movq %rbp,64+0(%rsp) 99bc3d5698SJohn Baldwin movl $10,%ebp 100bc3d5698SJohn Baldwin movq %rsi,64+8(%rsp) 101bc3d5698SJohn Baldwin.byte 102,72,15,126,214 102bc3d5698SJohn Baldwin movq %rdi,64+16(%rsp) 103bc3d5698SJohn Baldwin movq %rsi,%rdi 104bc3d5698SJohn Baldwin shrq $32,%rdi 105bc3d5698SJohn Baldwin jmp .Loop 106bc3d5698SJohn Baldwin 107bc3d5698SJohn Baldwin.align 32 108bc3d5698SJohn Baldwin.Loop: 109bc3d5698SJohn Baldwin addl %r8d,%eax 110bc3d5698SJohn Baldwin xorl %eax,%r12d 111bc3d5698SJohn Baldwin roll $16,%r12d 112bc3d5698SJohn Baldwin addl %r9d,%ebx 113bc3d5698SJohn Baldwin xorl %ebx,%r13d 114bc3d5698SJohn Baldwin roll $16,%r13d 115bc3d5698SJohn Baldwin addl %r12d,%esi 116bc3d5698SJohn Baldwin xorl %esi,%r8d 117bc3d5698SJohn Baldwin roll $12,%r8d 118bc3d5698SJohn Baldwin addl %r13d,%edi 119bc3d5698SJohn Baldwin xorl %edi,%r9d 120bc3d5698SJohn Baldwin roll $12,%r9d 121bc3d5698SJohn Baldwin addl %r8d,%eax 122bc3d5698SJohn Baldwin xorl %eax,%r12d 123bc3d5698SJohn Baldwin roll $8,%r12d 124bc3d5698SJohn Baldwin addl %r9d,%ebx 125bc3d5698SJohn Baldwin xorl %ebx,%r13d 126bc3d5698SJohn Baldwin roll $8,%r13d 127bc3d5698SJohn Baldwin addl %r12d,%esi 128bc3d5698SJohn Baldwin xorl %esi,%r8d 129bc3d5698SJohn Baldwin roll $7,%r8d 130bc3d5698SJohn Baldwin addl %r13d,%edi 131bc3d5698SJohn Baldwin xorl %edi,%r9d 132bc3d5698SJohn Baldwin roll $7,%r9d 133bc3d5698SJohn Baldwin movl %esi,32(%rsp) 134bc3d5698SJohn Baldwin movl %edi,36(%rsp) 135bc3d5698SJohn Baldwin movl 40(%rsp),%esi 136bc3d5698SJohn Baldwin movl 44(%rsp),%edi 137bc3d5698SJohn Baldwin addl %r10d,%ecx 138bc3d5698SJohn Baldwin xorl %ecx,%r14d 139bc3d5698SJohn Baldwin roll $16,%r14d 140bc3d5698SJohn Baldwin addl %r11d,%edx 141bc3d5698SJohn Baldwin xorl %edx,%r15d 142bc3d5698SJohn Baldwin roll $16,%r15d 143bc3d5698SJohn Baldwin addl %r14d,%esi 144bc3d5698SJohn Baldwin xorl %esi,%r10d 145bc3d5698SJohn Baldwin roll $12,%r10d 146bc3d5698SJohn Baldwin addl %r15d,%edi 147bc3d5698SJohn Baldwin xorl %edi,%r11d 148bc3d5698SJohn Baldwin roll $12,%r11d 149bc3d5698SJohn Baldwin addl %r10d,%ecx 150bc3d5698SJohn Baldwin xorl %ecx,%r14d 151bc3d5698SJohn Baldwin roll $8,%r14d 152bc3d5698SJohn Baldwin addl %r11d,%edx 153bc3d5698SJohn Baldwin xorl %edx,%r15d 154bc3d5698SJohn Baldwin roll $8,%r15d 155bc3d5698SJohn Baldwin addl %r14d,%esi 156bc3d5698SJohn Baldwin xorl %esi,%r10d 157bc3d5698SJohn Baldwin roll $7,%r10d 158bc3d5698SJohn Baldwin addl %r15d,%edi 159bc3d5698SJohn Baldwin xorl %edi,%r11d 160bc3d5698SJohn Baldwin roll $7,%r11d 161bc3d5698SJohn Baldwin addl %r9d,%eax 162bc3d5698SJohn Baldwin xorl %eax,%r15d 163bc3d5698SJohn Baldwin roll $16,%r15d 164bc3d5698SJohn Baldwin addl %r10d,%ebx 165bc3d5698SJohn Baldwin xorl %ebx,%r12d 166bc3d5698SJohn Baldwin roll $16,%r12d 167bc3d5698SJohn Baldwin addl %r15d,%esi 168bc3d5698SJohn Baldwin xorl %esi,%r9d 169bc3d5698SJohn Baldwin roll $12,%r9d 170bc3d5698SJohn Baldwin addl %r12d,%edi 171bc3d5698SJohn Baldwin xorl %edi,%r10d 172bc3d5698SJohn Baldwin roll $12,%r10d 173bc3d5698SJohn Baldwin addl %r9d,%eax 174bc3d5698SJohn Baldwin xorl %eax,%r15d 175bc3d5698SJohn Baldwin roll $8,%r15d 176bc3d5698SJohn Baldwin addl %r10d,%ebx 177bc3d5698SJohn Baldwin xorl %ebx,%r12d 178bc3d5698SJohn Baldwin roll $8,%r12d 179bc3d5698SJohn Baldwin addl %r15d,%esi 180bc3d5698SJohn Baldwin xorl %esi,%r9d 181bc3d5698SJohn Baldwin roll $7,%r9d 182bc3d5698SJohn Baldwin addl %r12d,%edi 183bc3d5698SJohn Baldwin xorl %edi,%r10d 184bc3d5698SJohn Baldwin roll $7,%r10d 185bc3d5698SJohn Baldwin movl %esi,40(%rsp) 186bc3d5698SJohn Baldwin movl %edi,44(%rsp) 187bc3d5698SJohn Baldwin movl 32(%rsp),%esi 188bc3d5698SJohn Baldwin movl 36(%rsp),%edi 189bc3d5698SJohn Baldwin addl %r11d,%ecx 190bc3d5698SJohn Baldwin xorl %ecx,%r13d 191bc3d5698SJohn Baldwin roll $16,%r13d 192bc3d5698SJohn Baldwin addl %r8d,%edx 193bc3d5698SJohn Baldwin xorl %edx,%r14d 194bc3d5698SJohn Baldwin roll $16,%r14d 195bc3d5698SJohn Baldwin addl %r13d,%esi 196bc3d5698SJohn Baldwin xorl %esi,%r11d 197bc3d5698SJohn Baldwin roll $12,%r11d 198bc3d5698SJohn Baldwin addl %r14d,%edi 199bc3d5698SJohn Baldwin xorl %edi,%r8d 200bc3d5698SJohn Baldwin roll $12,%r8d 201bc3d5698SJohn Baldwin addl %r11d,%ecx 202bc3d5698SJohn Baldwin xorl %ecx,%r13d 203bc3d5698SJohn Baldwin roll $8,%r13d 204bc3d5698SJohn Baldwin addl %r8d,%edx 205bc3d5698SJohn Baldwin xorl %edx,%r14d 206bc3d5698SJohn Baldwin roll $8,%r14d 207bc3d5698SJohn Baldwin addl %r13d,%esi 208bc3d5698SJohn Baldwin xorl %esi,%r11d 209bc3d5698SJohn Baldwin roll $7,%r11d 210bc3d5698SJohn Baldwin addl %r14d,%edi 211bc3d5698SJohn Baldwin xorl %edi,%r8d 212bc3d5698SJohn Baldwin roll $7,%r8d 213bc3d5698SJohn Baldwin decl %ebp 214bc3d5698SJohn Baldwin jnz .Loop 215bc3d5698SJohn Baldwin movl %edi,36(%rsp) 216bc3d5698SJohn Baldwin movl %esi,32(%rsp) 217bc3d5698SJohn Baldwin movq 64(%rsp),%rbp 218bc3d5698SJohn Baldwin movdqa %xmm2,%xmm1 219bc3d5698SJohn Baldwin movq 64+8(%rsp),%rsi 220bc3d5698SJohn Baldwin paddd %xmm4,%xmm3 221bc3d5698SJohn Baldwin movq 64+16(%rsp),%rdi 222bc3d5698SJohn Baldwin 223bc3d5698SJohn Baldwin addl $0x61707865,%eax 224bc3d5698SJohn Baldwin addl $0x3320646e,%ebx 225bc3d5698SJohn Baldwin addl $0x79622d32,%ecx 226bc3d5698SJohn Baldwin addl $0x6b206574,%edx 227bc3d5698SJohn Baldwin addl 16(%rsp),%r8d 228bc3d5698SJohn Baldwin addl 20(%rsp),%r9d 229bc3d5698SJohn Baldwin addl 24(%rsp),%r10d 230bc3d5698SJohn Baldwin addl 28(%rsp),%r11d 231bc3d5698SJohn Baldwin addl 48(%rsp),%r12d 232bc3d5698SJohn Baldwin addl 52(%rsp),%r13d 233bc3d5698SJohn Baldwin addl 56(%rsp),%r14d 234bc3d5698SJohn Baldwin addl 60(%rsp),%r15d 235bc3d5698SJohn Baldwin paddd 32(%rsp),%xmm1 236bc3d5698SJohn Baldwin 237bc3d5698SJohn Baldwin cmpq $64,%rbp 238bc3d5698SJohn Baldwin jb .Ltail 239bc3d5698SJohn Baldwin 240bc3d5698SJohn Baldwin xorl 0(%rsi),%eax 241bc3d5698SJohn Baldwin xorl 4(%rsi),%ebx 242bc3d5698SJohn Baldwin xorl 8(%rsi),%ecx 243bc3d5698SJohn Baldwin xorl 12(%rsi),%edx 244bc3d5698SJohn Baldwin xorl 16(%rsi),%r8d 245bc3d5698SJohn Baldwin xorl 20(%rsi),%r9d 246bc3d5698SJohn Baldwin xorl 24(%rsi),%r10d 247bc3d5698SJohn Baldwin xorl 28(%rsi),%r11d 248bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm0 249bc3d5698SJohn Baldwin xorl 48(%rsi),%r12d 250bc3d5698SJohn Baldwin xorl 52(%rsi),%r13d 251bc3d5698SJohn Baldwin xorl 56(%rsi),%r14d 252bc3d5698SJohn Baldwin xorl 60(%rsi),%r15d 253bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 254bc3d5698SJohn Baldwin pxor %xmm1,%xmm0 255bc3d5698SJohn Baldwin 256bc3d5698SJohn Baldwin movdqa %xmm2,32(%rsp) 257bc3d5698SJohn Baldwin movd %xmm3,48(%rsp) 258bc3d5698SJohn Baldwin 259bc3d5698SJohn Baldwin movl %eax,0(%rdi) 260bc3d5698SJohn Baldwin movl %ebx,4(%rdi) 261bc3d5698SJohn Baldwin movl %ecx,8(%rdi) 262bc3d5698SJohn Baldwin movl %edx,12(%rdi) 263bc3d5698SJohn Baldwin movl %r8d,16(%rdi) 264bc3d5698SJohn Baldwin movl %r9d,20(%rdi) 265bc3d5698SJohn Baldwin movl %r10d,24(%rdi) 266bc3d5698SJohn Baldwin movl %r11d,28(%rdi) 267bc3d5698SJohn Baldwin movdqu %xmm0,32(%rdi) 268bc3d5698SJohn Baldwin movl %r12d,48(%rdi) 269bc3d5698SJohn Baldwin movl %r13d,52(%rdi) 270bc3d5698SJohn Baldwin movl %r14d,56(%rdi) 271bc3d5698SJohn Baldwin movl %r15d,60(%rdi) 272bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 273bc3d5698SJohn Baldwin 274bc3d5698SJohn Baldwin subq $64,%rbp 275bc3d5698SJohn Baldwin jnz .Loop_outer 276bc3d5698SJohn Baldwin 277bc3d5698SJohn Baldwin jmp .Ldone 278bc3d5698SJohn Baldwin 279bc3d5698SJohn Baldwin.align 16 280bc3d5698SJohn Baldwin.Ltail: 281bc3d5698SJohn Baldwin movl %eax,0(%rsp) 282bc3d5698SJohn Baldwin movl %ebx,4(%rsp) 283bc3d5698SJohn Baldwin xorq %rbx,%rbx 284bc3d5698SJohn Baldwin movl %ecx,8(%rsp) 285bc3d5698SJohn Baldwin movl %edx,12(%rsp) 286bc3d5698SJohn Baldwin movl %r8d,16(%rsp) 287bc3d5698SJohn Baldwin movl %r9d,20(%rsp) 288bc3d5698SJohn Baldwin movl %r10d,24(%rsp) 289bc3d5698SJohn Baldwin movl %r11d,28(%rsp) 290bc3d5698SJohn Baldwin movdqa %xmm1,32(%rsp) 291bc3d5698SJohn Baldwin movl %r12d,48(%rsp) 292bc3d5698SJohn Baldwin movl %r13d,52(%rsp) 293bc3d5698SJohn Baldwin movl %r14d,56(%rsp) 294bc3d5698SJohn Baldwin movl %r15d,60(%rsp) 295bc3d5698SJohn Baldwin 296bc3d5698SJohn Baldwin.Loop_tail: 297bc3d5698SJohn Baldwin movzbl (%rsi,%rbx,1),%eax 298bc3d5698SJohn Baldwin movzbl (%rsp,%rbx,1),%edx 299bc3d5698SJohn Baldwin leaq 1(%rbx),%rbx 300bc3d5698SJohn Baldwin xorl %edx,%eax 301bc3d5698SJohn Baldwin movb %al,-1(%rdi,%rbx,1) 302bc3d5698SJohn Baldwin decq %rbp 303bc3d5698SJohn Baldwin jnz .Loop_tail 304bc3d5698SJohn Baldwin 305bc3d5698SJohn Baldwin.Ldone: 306bc3d5698SJohn Baldwin leaq 64+24+48(%rsp),%rsi 307bc3d5698SJohn Baldwin.cfi_def_cfa %rsi,8 308bc3d5698SJohn Baldwin movq -48(%rsi),%r15 309bc3d5698SJohn Baldwin.cfi_restore %r15 310bc3d5698SJohn Baldwin movq -40(%rsi),%r14 311bc3d5698SJohn Baldwin.cfi_restore %r14 312bc3d5698SJohn Baldwin movq -32(%rsi),%r13 313bc3d5698SJohn Baldwin.cfi_restore %r13 314bc3d5698SJohn Baldwin movq -24(%rsi),%r12 315bc3d5698SJohn Baldwin.cfi_restore %r12 316bc3d5698SJohn Baldwin movq -16(%rsi),%rbp 317bc3d5698SJohn Baldwin.cfi_restore %rbp 318bc3d5698SJohn Baldwin movq -8(%rsi),%rbx 319bc3d5698SJohn Baldwin.cfi_restore %rbx 320bc3d5698SJohn Baldwin leaq (%rsi),%rsp 321bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 322bc3d5698SJohn Baldwin.Lno_data: 323bc3d5698SJohn Baldwin .byte 0xf3,0xc3 324bc3d5698SJohn Baldwin.cfi_endproc 325bc3d5698SJohn Baldwin.size ChaCha20_ctr32,.-ChaCha20_ctr32 326bc3d5698SJohn Baldwin.type ChaCha20_ssse3,@function 327bc3d5698SJohn Baldwin.align 32 328bc3d5698SJohn BaldwinChaCha20_ssse3: 329bc3d5698SJohn Baldwin.cfi_startproc 330bc3d5698SJohn Baldwin.LChaCha20_ssse3: 331bc3d5698SJohn Baldwin movq %rsp,%r9 332bc3d5698SJohn Baldwin.cfi_def_cfa_register %r9 333bc3d5698SJohn Baldwin testl $2048,%r10d 334bc3d5698SJohn Baldwin jnz .LChaCha20_4xop 335bc3d5698SJohn Baldwin cmpq $128,%rdx 336bc3d5698SJohn Baldwin je .LChaCha20_128 337bc3d5698SJohn Baldwin ja .LChaCha20_4x 338bc3d5698SJohn Baldwin 339bc3d5698SJohn Baldwin.Ldo_sse3_after_all: 340bc3d5698SJohn Baldwin subq $64+8,%rsp 341bc3d5698SJohn Baldwin movdqa .Lsigma(%rip),%xmm0 342bc3d5698SJohn Baldwin movdqu (%rcx),%xmm1 343bc3d5698SJohn Baldwin movdqu 16(%rcx),%xmm2 344bc3d5698SJohn Baldwin movdqu (%r8),%xmm3 345bc3d5698SJohn Baldwin movdqa .Lrot16(%rip),%xmm6 346bc3d5698SJohn Baldwin movdqa .Lrot24(%rip),%xmm7 347bc3d5698SJohn Baldwin 348bc3d5698SJohn Baldwin movdqa %xmm0,0(%rsp) 349bc3d5698SJohn Baldwin movdqa %xmm1,16(%rsp) 350bc3d5698SJohn Baldwin movdqa %xmm2,32(%rsp) 351bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 352bc3d5698SJohn Baldwin movq $10,%r8 353bc3d5698SJohn Baldwin jmp .Loop_ssse3 354bc3d5698SJohn Baldwin 355bc3d5698SJohn Baldwin.align 32 356bc3d5698SJohn Baldwin.Loop_outer_ssse3: 357bc3d5698SJohn Baldwin movdqa .Lone(%rip),%xmm3 358bc3d5698SJohn Baldwin movdqa 0(%rsp),%xmm0 359bc3d5698SJohn Baldwin movdqa 16(%rsp),%xmm1 360bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm2 361bc3d5698SJohn Baldwin paddd 48(%rsp),%xmm3 362bc3d5698SJohn Baldwin movq $10,%r8 363bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 364bc3d5698SJohn Baldwin jmp .Loop_ssse3 365bc3d5698SJohn Baldwin 366bc3d5698SJohn Baldwin.align 32 367bc3d5698SJohn Baldwin.Loop_ssse3: 368bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 369bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 370bc3d5698SJohn Baldwin.byte 102,15,56,0,222 371bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 372bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 373bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 374bc3d5698SJohn Baldwin psrld $20,%xmm1 375bc3d5698SJohn Baldwin pslld $12,%xmm4 376bc3d5698SJohn Baldwin por %xmm4,%xmm1 377bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 378bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 379bc3d5698SJohn Baldwin.byte 102,15,56,0,223 380bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 381bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 382bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 383bc3d5698SJohn Baldwin psrld $25,%xmm1 384bc3d5698SJohn Baldwin pslld $7,%xmm4 385bc3d5698SJohn Baldwin por %xmm4,%xmm1 386bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 387bc3d5698SJohn Baldwin pshufd $57,%xmm1,%xmm1 388bc3d5698SJohn Baldwin pshufd $147,%xmm3,%xmm3 389bc3d5698SJohn Baldwin nop 390bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 391bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 392bc3d5698SJohn Baldwin.byte 102,15,56,0,222 393bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 394bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 395bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 396bc3d5698SJohn Baldwin psrld $20,%xmm1 397bc3d5698SJohn Baldwin pslld $12,%xmm4 398bc3d5698SJohn Baldwin por %xmm4,%xmm1 399bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 400bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 401bc3d5698SJohn Baldwin.byte 102,15,56,0,223 402bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 403bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 404bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 405bc3d5698SJohn Baldwin psrld $25,%xmm1 406bc3d5698SJohn Baldwin pslld $7,%xmm4 407bc3d5698SJohn Baldwin por %xmm4,%xmm1 408bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 409bc3d5698SJohn Baldwin pshufd $147,%xmm1,%xmm1 410bc3d5698SJohn Baldwin pshufd $57,%xmm3,%xmm3 411bc3d5698SJohn Baldwin decq %r8 412bc3d5698SJohn Baldwin jnz .Loop_ssse3 413bc3d5698SJohn Baldwin paddd 0(%rsp),%xmm0 414bc3d5698SJohn Baldwin paddd 16(%rsp),%xmm1 415bc3d5698SJohn Baldwin paddd 32(%rsp),%xmm2 416bc3d5698SJohn Baldwin paddd 48(%rsp),%xmm3 417bc3d5698SJohn Baldwin 418bc3d5698SJohn Baldwin cmpq $64,%rdx 419bc3d5698SJohn Baldwin jb .Ltail_ssse3 420bc3d5698SJohn Baldwin 421bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm4 422bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm5 423bc3d5698SJohn Baldwin pxor %xmm4,%xmm0 424bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm4 425bc3d5698SJohn Baldwin pxor %xmm5,%xmm1 426bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm5 427bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 428bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 429bc3d5698SJohn Baldwin pxor %xmm5,%xmm3 430bc3d5698SJohn Baldwin 431bc3d5698SJohn Baldwin movdqu %xmm0,0(%rdi) 432bc3d5698SJohn Baldwin movdqu %xmm1,16(%rdi) 433bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 434bc3d5698SJohn Baldwin movdqu %xmm3,48(%rdi) 435bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 436bc3d5698SJohn Baldwin 437bc3d5698SJohn Baldwin subq $64,%rdx 438bc3d5698SJohn Baldwin jnz .Loop_outer_ssse3 439bc3d5698SJohn Baldwin 440bc3d5698SJohn Baldwin jmp .Ldone_ssse3 441bc3d5698SJohn Baldwin 442bc3d5698SJohn Baldwin.align 16 443bc3d5698SJohn Baldwin.Ltail_ssse3: 444bc3d5698SJohn Baldwin movdqa %xmm0,0(%rsp) 445bc3d5698SJohn Baldwin movdqa %xmm1,16(%rsp) 446bc3d5698SJohn Baldwin movdqa %xmm2,32(%rsp) 447bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 448bc3d5698SJohn Baldwin xorq %r8,%r8 449bc3d5698SJohn Baldwin 450bc3d5698SJohn Baldwin.Loop_tail_ssse3: 451bc3d5698SJohn Baldwin movzbl (%rsi,%r8,1),%eax 452bc3d5698SJohn Baldwin movzbl (%rsp,%r8,1),%ecx 453bc3d5698SJohn Baldwin leaq 1(%r8),%r8 454bc3d5698SJohn Baldwin xorl %ecx,%eax 455bc3d5698SJohn Baldwin movb %al,-1(%rdi,%r8,1) 456bc3d5698SJohn Baldwin decq %rdx 457bc3d5698SJohn Baldwin jnz .Loop_tail_ssse3 458bc3d5698SJohn Baldwin 459bc3d5698SJohn Baldwin.Ldone_ssse3: 460bc3d5698SJohn Baldwin leaq (%r9),%rsp 461bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 462bc3d5698SJohn Baldwin.Lssse3_epilogue: 463bc3d5698SJohn Baldwin .byte 0xf3,0xc3 464bc3d5698SJohn Baldwin.cfi_endproc 465bc3d5698SJohn Baldwin.size ChaCha20_ssse3,.-ChaCha20_ssse3 466bc3d5698SJohn Baldwin.type ChaCha20_128,@function 467bc3d5698SJohn Baldwin.align 32 468bc3d5698SJohn BaldwinChaCha20_128: 469bc3d5698SJohn Baldwin.cfi_startproc 470bc3d5698SJohn Baldwin.LChaCha20_128: 471bc3d5698SJohn Baldwin movq %rsp,%r9 472bc3d5698SJohn Baldwin.cfi_def_cfa_register %r9 473bc3d5698SJohn Baldwin subq $64+8,%rsp 474bc3d5698SJohn Baldwin movdqa .Lsigma(%rip),%xmm8 475bc3d5698SJohn Baldwin movdqu (%rcx),%xmm9 476bc3d5698SJohn Baldwin movdqu 16(%rcx),%xmm2 477bc3d5698SJohn Baldwin movdqu (%r8),%xmm3 478bc3d5698SJohn Baldwin movdqa .Lone(%rip),%xmm1 479bc3d5698SJohn Baldwin movdqa .Lrot16(%rip),%xmm6 480bc3d5698SJohn Baldwin movdqa .Lrot24(%rip),%xmm7 481bc3d5698SJohn Baldwin 482bc3d5698SJohn Baldwin movdqa %xmm8,%xmm10 483bc3d5698SJohn Baldwin movdqa %xmm8,0(%rsp) 484bc3d5698SJohn Baldwin movdqa %xmm9,%xmm11 485bc3d5698SJohn Baldwin movdqa %xmm9,16(%rsp) 486bc3d5698SJohn Baldwin movdqa %xmm2,%xmm0 487bc3d5698SJohn Baldwin movdqa %xmm2,32(%rsp) 488bc3d5698SJohn Baldwin paddd %xmm3,%xmm1 489bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 490bc3d5698SJohn Baldwin movq $10,%r8 491bc3d5698SJohn Baldwin jmp .Loop_128 492bc3d5698SJohn Baldwin 493bc3d5698SJohn Baldwin.align 32 494bc3d5698SJohn Baldwin.Loop_128: 495bc3d5698SJohn Baldwin paddd %xmm9,%xmm8 496bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 497bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 498bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 499bc3d5698SJohn Baldwin.byte 102,15,56,0,222 500bc3d5698SJohn Baldwin.byte 102,15,56,0,206 501bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 502bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 503bc3d5698SJohn Baldwin pxor %xmm2,%xmm9 504bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 505bc3d5698SJohn Baldwin movdqa %xmm9,%xmm4 506bc3d5698SJohn Baldwin psrld $20,%xmm9 507bc3d5698SJohn Baldwin movdqa %xmm11,%xmm5 508bc3d5698SJohn Baldwin pslld $12,%xmm4 509bc3d5698SJohn Baldwin psrld $20,%xmm11 510bc3d5698SJohn Baldwin por %xmm4,%xmm9 511bc3d5698SJohn Baldwin pslld $12,%xmm5 512bc3d5698SJohn Baldwin por %xmm5,%xmm11 513bc3d5698SJohn Baldwin paddd %xmm9,%xmm8 514bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 515bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 516bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 517bc3d5698SJohn Baldwin.byte 102,15,56,0,223 518bc3d5698SJohn Baldwin.byte 102,15,56,0,207 519bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 520bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 521bc3d5698SJohn Baldwin pxor %xmm2,%xmm9 522bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 523bc3d5698SJohn Baldwin movdqa %xmm9,%xmm4 524bc3d5698SJohn Baldwin psrld $25,%xmm9 525bc3d5698SJohn Baldwin movdqa %xmm11,%xmm5 526bc3d5698SJohn Baldwin pslld $7,%xmm4 527bc3d5698SJohn Baldwin psrld $25,%xmm11 528bc3d5698SJohn Baldwin por %xmm4,%xmm9 529bc3d5698SJohn Baldwin pslld $7,%xmm5 530bc3d5698SJohn Baldwin por %xmm5,%xmm11 531bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 532bc3d5698SJohn Baldwin pshufd $57,%xmm9,%xmm9 533bc3d5698SJohn Baldwin pshufd $147,%xmm3,%xmm3 534bc3d5698SJohn Baldwin pshufd $78,%xmm0,%xmm0 535bc3d5698SJohn Baldwin pshufd $57,%xmm11,%xmm11 536bc3d5698SJohn Baldwin pshufd $147,%xmm1,%xmm1 537bc3d5698SJohn Baldwin paddd %xmm9,%xmm8 538bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 539bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 540bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 541bc3d5698SJohn Baldwin.byte 102,15,56,0,222 542bc3d5698SJohn Baldwin.byte 102,15,56,0,206 543bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 544bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 545bc3d5698SJohn Baldwin pxor %xmm2,%xmm9 546bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 547bc3d5698SJohn Baldwin movdqa %xmm9,%xmm4 548bc3d5698SJohn Baldwin psrld $20,%xmm9 549bc3d5698SJohn Baldwin movdqa %xmm11,%xmm5 550bc3d5698SJohn Baldwin pslld $12,%xmm4 551bc3d5698SJohn Baldwin psrld $20,%xmm11 552bc3d5698SJohn Baldwin por %xmm4,%xmm9 553bc3d5698SJohn Baldwin pslld $12,%xmm5 554bc3d5698SJohn Baldwin por %xmm5,%xmm11 555bc3d5698SJohn Baldwin paddd %xmm9,%xmm8 556bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 557bc3d5698SJohn Baldwin paddd %xmm11,%xmm10 558bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 559bc3d5698SJohn Baldwin.byte 102,15,56,0,223 560bc3d5698SJohn Baldwin.byte 102,15,56,0,207 561bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 562bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 563bc3d5698SJohn Baldwin pxor %xmm2,%xmm9 564bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 565bc3d5698SJohn Baldwin movdqa %xmm9,%xmm4 566bc3d5698SJohn Baldwin psrld $25,%xmm9 567bc3d5698SJohn Baldwin movdqa %xmm11,%xmm5 568bc3d5698SJohn Baldwin pslld $7,%xmm4 569bc3d5698SJohn Baldwin psrld $25,%xmm11 570bc3d5698SJohn Baldwin por %xmm4,%xmm9 571bc3d5698SJohn Baldwin pslld $7,%xmm5 572bc3d5698SJohn Baldwin por %xmm5,%xmm11 573bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 574bc3d5698SJohn Baldwin pshufd $147,%xmm9,%xmm9 575bc3d5698SJohn Baldwin pshufd $57,%xmm3,%xmm3 576bc3d5698SJohn Baldwin pshufd $78,%xmm0,%xmm0 577bc3d5698SJohn Baldwin pshufd $147,%xmm11,%xmm11 578bc3d5698SJohn Baldwin pshufd $57,%xmm1,%xmm1 579bc3d5698SJohn Baldwin decq %r8 580bc3d5698SJohn Baldwin jnz .Loop_128 581bc3d5698SJohn Baldwin paddd 0(%rsp),%xmm8 582bc3d5698SJohn Baldwin paddd 16(%rsp),%xmm9 583bc3d5698SJohn Baldwin paddd 32(%rsp),%xmm2 584bc3d5698SJohn Baldwin paddd 48(%rsp),%xmm3 585bc3d5698SJohn Baldwin paddd .Lone(%rip),%xmm1 586bc3d5698SJohn Baldwin paddd 0(%rsp),%xmm10 587bc3d5698SJohn Baldwin paddd 16(%rsp),%xmm11 588bc3d5698SJohn Baldwin paddd 32(%rsp),%xmm0 589bc3d5698SJohn Baldwin paddd 48(%rsp),%xmm1 590bc3d5698SJohn Baldwin 591bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm4 592bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm5 593bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 594bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm4 595bc3d5698SJohn Baldwin pxor %xmm5,%xmm9 596bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm5 597bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 598bc3d5698SJohn Baldwin movdqu 64(%rsi),%xmm4 599bc3d5698SJohn Baldwin pxor %xmm5,%xmm3 600bc3d5698SJohn Baldwin movdqu 80(%rsi),%xmm5 601bc3d5698SJohn Baldwin pxor %xmm4,%xmm10 602bc3d5698SJohn Baldwin movdqu 96(%rsi),%xmm4 603bc3d5698SJohn Baldwin pxor %xmm5,%xmm11 604bc3d5698SJohn Baldwin movdqu 112(%rsi),%xmm5 605bc3d5698SJohn Baldwin pxor %xmm4,%xmm0 606bc3d5698SJohn Baldwin pxor %xmm5,%xmm1 607bc3d5698SJohn Baldwin 608bc3d5698SJohn Baldwin movdqu %xmm8,0(%rdi) 609bc3d5698SJohn Baldwin movdqu %xmm9,16(%rdi) 610bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 611bc3d5698SJohn Baldwin movdqu %xmm3,48(%rdi) 612bc3d5698SJohn Baldwin movdqu %xmm10,64(%rdi) 613bc3d5698SJohn Baldwin movdqu %xmm11,80(%rdi) 614bc3d5698SJohn Baldwin movdqu %xmm0,96(%rdi) 615bc3d5698SJohn Baldwin movdqu %xmm1,112(%rdi) 616bc3d5698SJohn Baldwin leaq (%r9),%rsp 617bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 618bc3d5698SJohn Baldwin.L128_epilogue: 619bc3d5698SJohn Baldwin .byte 0xf3,0xc3 620bc3d5698SJohn Baldwin.cfi_endproc 621bc3d5698SJohn Baldwin.size ChaCha20_128,.-ChaCha20_128 622bc3d5698SJohn Baldwin.type ChaCha20_4x,@function 623bc3d5698SJohn Baldwin.align 32 624bc3d5698SJohn BaldwinChaCha20_4x: 625bc3d5698SJohn Baldwin.cfi_startproc 626bc3d5698SJohn Baldwin.LChaCha20_4x: 627bc3d5698SJohn Baldwin movq %rsp,%r9 628bc3d5698SJohn Baldwin.cfi_def_cfa_register %r9 629bc3d5698SJohn Baldwin movq %r10,%r11 630bc3d5698SJohn Baldwin shrq $32,%r10 631bc3d5698SJohn Baldwin testq $32,%r10 632bc3d5698SJohn Baldwin jnz .LChaCha20_8x 633bc3d5698SJohn Baldwin cmpq $192,%rdx 634bc3d5698SJohn Baldwin ja .Lproceed4x 635bc3d5698SJohn Baldwin 636bc3d5698SJohn Baldwin andq $71303168,%r11 637bc3d5698SJohn Baldwin cmpq $4194304,%r11 638bc3d5698SJohn Baldwin je .Ldo_sse3_after_all 639bc3d5698SJohn Baldwin 640bc3d5698SJohn Baldwin.Lproceed4x: 641bc3d5698SJohn Baldwin subq $0x140+8,%rsp 642bc3d5698SJohn Baldwin movdqa .Lsigma(%rip),%xmm11 643bc3d5698SJohn Baldwin movdqu (%rcx),%xmm15 644bc3d5698SJohn Baldwin movdqu 16(%rcx),%xmm7 645bc3d5698SJohn Baldwin movdqu (%r8),%xmm3 646bc3d5698SJohn Baldwin leaq 256(%rsp),%rcx 647bc3d5698SJohn Baldwin leaq .Lrot16(%rip),%r10 648bc3d5698SJohn Baldwin leaq .Lrot24(%rip),%r11 649bc3d5698SJohn Baldwin 650bc3d5698SJohn Baldwin pshufd $0x00,%xmm11,%xmm8 651bc3d5698SJohn Baldwin pshufd $0x55,%xmm11,%xmm9 652bc3d5698SJohn Baldwin movdqa %xmm8,64(%rsp) 653bc3d5698SJohn Baldwin pshufd $0xaa,%xmm11,%xmm10 654bc3d5698SJohn Baldwin movdqa %xmm9,80(%rsp) 655bc3d5698SJohn Baldwin pshufd $0xff,%xmm11,%xmm11 656bc3d5698SJohn Baldwin movdqa %xmm10,96(%rsp) 657bc3d5698SJohn Baldwin movdqa %xmm11,112(%rsp) 658bc3d5698SJohn Baldwin 659bc3d5698SJohn Baldwin pshufd $0x00,%xmm15,%xmm12 660bc3d5698SJohn Baldwin pshufd $0x55,%xmm15,%xmm13 661bc3d5698SJohn Baldwin movdqa %xmm12,128-256(%rcx) 662bc3d5698SJohn Baldwin pshufd $0xaa,%xmm15,%xmm14 663bc3d5698SJohn Baldwin movdqa %xmm13,144-256(%rcx) 664bc3d5698SJohn Baldwin pshufd $0xff,%xmm15,%xmm15 665bc3d5698SJohn Baldwin movdqa %xmm14,160-256(%rcx) 666bc3d5698SJohn Baldwin movdqa %xmm15,176-256(%rcx) 667bc3d5698SJohn Baldwin 668bc3d5698SJohn Baldwin pshufd $0x00,%xmm7,%xmm4 669bc3d5698SJohn Baldwin pshufd $0x55,%xmm7,%xmm5 670bc3d5698SJohn Baldwin movdqa %xmm4,192-256(%rcx) 671bc3d5698SJohn Baldwin pshufd $0xaa,%xmm7,%xmm6 672bc3d5698SJohn Baldwin movdqa %xmm5,208-256(%rcx) 673bc3d5698SJohn Baldwin pshufd $0xff,%xmm7,%xmm7 674bc3d5698SJohn Baldwin movdqa %xmm6,224-256(%rcx) 675bc3d5698SJohn Baldwin movdqa %xmm7,240-256(%rcx) 676bc3d5698SJohn Baldwin 677bc3d5698SJohn Baldwin pshufd $0x00,%xmm3,%xmm0 678bc3d5698SJohn Baldwin pshufd $0x55,%xmm3,%xmm1 679bc3d5698SJohn Baldwin paddd .Linc(%rip),%xmm0 680bc3d5698SJohn Baldwin pshufd $0xaa,%xmm3,%xmm2 681bc3d5698SJohn Baldwin movdqa %xmm1,272-256(%rcx) 682bc3d5698SJohn Baldwin pshufd $0xff,%xmm3,%xmm3 683bc3d5698SJohn Baldwin movdqa %xmm2,288-256(%rcx) 684bc3d5698SJohn Baldwin movdqa %xmm3,304-256(%rcx) 685bc3d5698SJohn Baldwin 686bc3d5698SJohn Baldwin jmp .Loop_enter4x 687bc3d5698SJohn Baldwin 688bc3d5698SJohn Baldwin.align 32 689bc3d5698SJohn Baldwin.Loop_outer4x: 690bc3d5698SJohn Baldwin movdqa 64(%rsp),%xmm8 691bc3d5698SJohn Baldwin movdqa 80(%rsp),%xmm9 692bc3d5698SJohn Baldwin movdqa 96(%rsp),%xmm10 693bc3d5698SJohn Baldwin movdqa 112(%rsp),%xmm11 694bc3d5698SJohn Baldwin movdqa 128-256(%rcx),%xmm12 695bc3d5698SJohn Baldwin movdqa 144-256(%rcx),%xmm13 696bc3d5698SJohn Baldwin movdqa 160-256(%rcx),%xmm14 697bc3d5698SJohn Baldwin movdqa 176-256(%rcx),%xmm15 698bc3d5698SJohn Baldwin movdqa 192-256(%rcx),%xmm4 699bc3d5698SJohn Baldwin movdqa 208-256(%rcx),%xmm5 700bc3d5698SJohn Baldwin movdqa 224-256(%rcx),%xmm6 701bc3d5698SJohn Baldwin movdqa 240-256(%rcx),%xmm7 702bc3d5698SJohn Baldwin movdqa 256-256(%rcx),%xmm0 703bc3d5698SJohn Baldwin movdqa 272-256(%rcx),%xmm1 704bc3d5698SJohn Baldwin movdqa 288-256(%rcx),%xmm2 705bc3d5698SJohn Baldwin movdqa 304-256(%rcx),%xmm3 706bc3d5698SJohn Baldwin paddd .Lfour(%rip),%xmm0 707bc3d5698SJohn Baldwin 708bc3d5698SJohn Baldwin.Loop_enter4x: 709bc3d5698SJohn Baldwin movdqa %xmm6,32(%rsp) 710bc3d5698SJohn Baldwin movdqa %xmm7,48(%rsp) 711bc3d5698SJohn Baldwin movdqa (%r10),%xmm7 712bc3d5698SJohn Baldwin movl $10,%eax 713bc3d5698SJohn Baldwin movdqa %xmm0,256-256(%rcx) 714bc3d5698SJohn Baldwin jmp .Loop4x 715bc3d5698SJohn Baldwin 716bc3d5698SJohn Baldwin.align 32 717bc3d5698SJohn Baldwin.Loop4x: 718bc3d5698SJohn Baldwin paddd %xmm12,%xmm8 719bc3d5698SJohn Baldwin paddd %xmm13,%xmm9 720bc3d5698SJohn Baldwin pxor %xmm8,%xmm0 721bc3d5698SJohn Baldwin pxor %xmm9,%xmm1 722bc3d5698SJohn Baldwin.byte 102,15,56,0,199 723bc3d5698SJohn Baldwin.byte 102,15,56,0,207 724bc3d5698SJohn Baldwin paddd %xmm0,%xmm4 725bc3d5698SJohn Baldwin paddd %xmm1,%xmm5 726bc3d5698SJohn Baldwin pxor %xmm4,%xmm12 727bc3d5698SJohn Baldwin pxor %xmm5,%xmm13 728bc3d5698SJohn Baldwin movdqa %xmm12,%xmm6 729bc3d5698SJohn Baldwin pslld $12,%xmm12 730bc3d5698SJohn Baldwin psrld $20,%xmm6 731bc3d5698SJohn Baldwin movdqa %xmm13,%xmm7 732bc3d5698SJohn Baldwin pslld $12,%xmm13 733bc3d5698SJohn Baldwin por %xmm6,%xmm12 734bc3d5698SJohn Baldwin psrld $20,%xmm7 735bc3d5698SJohn Baldwin movdqa (%r11),%xmm6 736bc3d5698SJohn Baldwin por %xmm7,%xmm13 737bc3d5698SJohn Baldwin paddd %xmm12,%xmm8 738bc3d5698SJohn Baldwin paddd %xmm13,%xmm9 739bc3d5698SJohn Baldwin pxor %xmm8,%xmm0 740bc3d5698SJohn Baldwin pxor %xmm9,%xmm1 741bc3d5698SJohn Baldwin.byte 102,15,56,0,198 742bc3d5698SJohn Baldwin.byte 102,15,56,0,206 743bc3d5698SJohn Baldwin paddd %xmm0,%xmm4 744bc3d5698SJohn Baldwin paddd %xmm1,%xmm5 745bc3d5698SJohn Baldwin pxor %xmm4,%xmm12 746bc3d5698SJohn Baldwin pxor %xmm5,%xmm13 747bc3d5698SJohn Baldwin movdqa %xmm12,%xmm7 748bc3d5698SJohn Baldwin pslld $7,%xmm12 749bc3d5698SJohn Baldwin psrld $25,%xmm7 750bc3d5698SJohn Baldwin movdqa %xmm13,%xmm6 751bc3d5698SJohn Baldwin pslld $7,%xmm13 752bc3d5698SJohn Baldwin por %xmm7,%xmm12 753bc3d5698SJohn Baldwin psrld $25,%xmm6 754bc3d5698SJohn Baldwin movdqa (%r10),%xmm7 755bc3d5698SJohn Baldwin por %xmm6,%xmm13 756bc3d5698SJohn Baldwin movdqa %xmm4,0(%rsp) 757bc3d5698SJohn Baldwin movdqa %xmm5,16(%rsp) 758bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm4 759bc3d5698SJohn Baldwin movdqa 48(%rsp),%xmm5 760bc3d5698SJohn Baldwin paddd %xmm14,%xmm10 761bc3d5698SJohn Baldwin paddd %xmm15,%xmm11 762bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 763bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 764bc3d5698SJohn Baldwin.byte 102,15,56,0,215 765bc3d5698SJohn Baldwin.byte 102,15,56,0,223 766bc3d5698SJohn Baldwin paddd %xmm2,%xmm4 767bc3d5698SJohn Baldwin paddd %xmm3,%xmm5 768bc3d5698SJohn Baldwin pxor %xmm4,%xmm14 769bc3d5698SJohn Baldwin pxor %xmm5,%xmm15 770bc3d5698SJohn Baldwin movdqa %xmm14,%xmm6 771bc3d5698SJohn Baldwin pslld $12,%xmm14 772bc3d5698SJohn Baldwin psrld $20,%xmm6 773bc3d5698SJohn Baldwin movdqa %xmm15,%xmm7 774bc3d5698SJohn Baldwin pslld $12,%xmm15 775bc3d5698SJohn Baldwin por %xmm6,%xmm14 776bc3d5698SJohn Baldwin psrld $20,%xmm7 777bc3d5698SJohn Baldwin movdqa (%r11),%xmm6 778bc3d5698SJohn Baldwin por %xmm7,%xmm15 779bc3d5698SJohn Baldwin paddd %xmm14,%xmm10 780bc3d5698SJohn Baldwin paddd %xmm15,%xmm11 781bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 782bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 783bc3d5698SJohn Baldwin.byte 102,15,56,0,214 784bc3d5698SJohn Baldwin.byte 102,15,56,0,222 785bc3d5698SJohn Baldwin paddd %xmm2,%xmm4 786bc3d5698SJohn Baldwin paddd %xmm3,%xmm5 787bc3d5698SJohn Baldwin pxor %xmm4,%xmm14 788bc3d5698SJohn Baldwin pxor %xmm5,%xmm15 789bc3d5698SJohn Baldwin movdqa %xmm14,%xmm7 790bc3d5698SJohn Baldwin pslld $7,%xmm14 791bc3d5698SJohn Baldwin psrld $25,%xmm7 792bc3d5698SJohn Baldwin movdqa %xmm15,%xmm6 793bc3d5698SJohn Baldwin pslld $7,%xmm15 794bc3d5698SJohn Baldwin por %xmm7,%xmm14 795bc3d5698SJohn Baldwin psrld $25,%xmm6 796bc3d5698SJohn Baldwin movdqa (%r10),%xmm7 797bc3d5698SJohn Baldwin por %xmm6,%xmm15 798bc3d5698SJohn Baldwin paddd %xmm13,%xmm8 799bc3d5698SJohn Baldwin paddd %xmm14,%xmm9 800bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 801bc3d5698SJohn Baldwin pxor %xmm9,%xmm0 802bc3d5698SJohn Baldwin.byte 102,15,56,0,223 803bc3d5698SJohn Baldwin.byte 102,15,56,0,199 804bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 805bc3d5698SJohn Baldwin paddd %xmm0,%xmm5 806bc3d5698SJohn Baldwin pxor %xmm4,%xmm13 807bc3d5698SJohn Baldwin pxor %xmm5,%xmm14 808bc3d5698SJohn Baldwin movdqa %xmm13,%xmm6 809bc3d5698SJohn Baldwin pslld $12,%xmm13 810bc3d5698SJohn Baldwin psrld $20,%xmm6 811bc3d5698SJohn Baldwin movdqa %xmm14,%xmm7 812bc3d5698SJohn Baldwin pslld $12,%xmm14 813bc3d5698SJohn Baldwin por %xmm6,%xmm13 814bc3d5698SJohn Baldwin psrld $20,%xmm7 815bc3d5698SJohn Baldwin movdqa (%r11),%xmm6 816bc3d5698SJohn Baldwin por %xmm7,%xmm14 817bc3d5698SJohn Baldwin paddd %xmm13,%xmm8 818bc3d5698SJohn Baldwin paddd %xmm14,%xmm9 819bc3d5698SJohn Baldwin pxor %xmm8,%xmm3 820bc3d5698SJohn Baldwin pxor %xmm9,%xmm0 821bc3d5698SJohn Baldwin.byte 102,15,56,0,222 822bc3d5698SJohn Baldwin.byte 102,15,56,0,198 823bc3d5698SJohn Baldwin paddd %xmm3,%xmm4 824bc3d5698SJohn Baldwin paddd %xmm0,%xmm5 825bc3d5698SJohn Baldwin pxor %xmm4,%xmm13 826bc3d5698SJohn Baldwin pxor %xmm5,%xmm14 827bc3d5698SJohn Baldwin movdqa %xmm13,%xmm7 828bc3d5698SJohn Baldwin pslld $7,%xmm13 829bc3d5698SJohn Baldwin psrld $25,%xmm7 830bc3d5698SJohn Baldwin movdqa %xmm14,%xmm6 831bc3d5698SJohn Baldwin pslld $7,%xmm14 832bc3d5698SJohn Baldwin por %xmm7,%xmm13 833bc3d5698SJohn Baldwin psrld $25,%xmm6 834bc3d5698SJohn Baldwin movdqa (%r10),%xmm7 835bc3d5698SJohn Baldwin por %xmm6,%xmm14 836bc3d5698SJohn Baldwin movdqa %xmm4,32(%rsp) 837bc3d5698SJohn Baldwin movdqa %xmm5,48(%rsp) 838bc3d5698SJohn Baldwin movdqa 0(%rsp),%xmm4 839bc3d5698SJohn Baldwin movdqa 16(%rsp),%xmm5 840bc3d5698SJohn Baldwin paddd %xmm15,%xmm10 841bc3d5698SJohn Baldwin paddd %xmm12,%xmm11 842bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 843bc3d5698SJohn Baldwin pxor %xmm11,%xmm2 844bc3d5698SJohn Baldwin.byte 102,15,56,0,207 845bc3d5698SJohn Baldwin.byte 102,15,56,0,215 846bc3d5698SJohn Baldwin paddd %xmm1,%xmm4 847bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 848bc3d5698SJohn Baldwin pxor %xmm4,%xmm15 849bc3d5698SJohn Baldwin pxor %xmm5,%xmm12 850bc3d5698SJohn Baldwin movdqa %xmm15,%xmm6 851bc3d5698SJohn Baldwin pslld $12,%xmm15 852bc3d5698SJohn Baldwin psrld $20,%xmm6 853bc3d5698SJohn Baldwin movdqa %xmm12,%xmm7 854bc3d5698SJohn Baldwin pslld $12,%xmm12 855bc3d5698SJohn Baldwin por %xmm6,%xmm15 856bc3d5698SJohn Baldwin psrld $20,%xmm7 857bc3d5698SJohn Baldwin movdqa (%r11),%xmm6 858bc3d5698SJohn Baldwin por %xmm7,%xmm12 859bc3d5698SJohn Baldwin paddd %xmm15,%xmm10 860bc3d5698SJohn Baldwin paddd %xmm12,%xmm11 861bc3d5698SJohn Baldwin pxor %xmm10,%xmm1 862bc3d5698SJohn Baldwin pxor %xmm11,%xmm2 863bc3d5698SJohn Baldwin.byte 102,15,56,0,206 864bc3d5698SJohn Baldwin.byte 102,15,56,0,214 865bc3d5698SJohn Baldwin paddd %xmm1,%xmm4 866bc3d5698SJohn Baldwin paddd %xmm2,%xmm5 867bc3d5698SJohn Baldwin pxor %xmm4,%xmm15 868bc3d5698SJohn Baldwin pxor %xmm5,%xmm12 869bc3d5698SJohn Baldwin movdqa %xmm15,%xmm7 870bc3d5698SJohn Baldwin pslld $7,%xmm15 871bc3d5698SJohn Baldwin psrld $25,%xmm7 872bc3d5698SJohn Baldwin movdqa %xmm12,%xmm6 873bc3d5698SJohn Baldwin pslld $7,%xmm12 874bc3d5698SJohn Baldwin por %xmm7,%xmm15 875bc3d5698SJohn Baldwin psrld $25,%xmm6 876bc3d5698SJohn Baldwin movdqa (%r10),%xmm7 877bc3d5698SJohn Baldwin por %xmm6,%xmm12 878bc3d5698SJohn Baldwin decl %eax 879bc3d5698SJohn Baldwin jnz .Loop4x 880bc3d5698SJohn Baldwin 881bc3d5698SJohn Baldwin paddd 64(%rsp),%xmm8 882bc3d5698SJohn Baldwin paddd 80(%rsp),%xmm9 883bc3d5698SJohn Baldwin paddd 96(%rsp),%xmm10 884bc3d5698SJohn Baldwin paddd 112(%rsp),%xmm11 885bc3d5698SJohn Baldwin 886bc3d5698SJohn Baldwin movdqa %xmm8,%xmm6 887bc3d5698SJohn Baldwin punpckldq %xmm9,%xmm8 888bc3d5698SJohn Baldwin movdqa %xmm10,%xmm7 889bc3d5698SJohn Baldwin punpckldq %xmm11,%xmm10 890bc3d5698SJohn Baldwin punpckhdq %xmm9,%xmm6 891bc3d5698SJohn Baldwin punpckhdq %xmm11,%xmm7 892bc3d5698SJohn Baldwin movdqa %xmm8,%xmm9 893bc3d5698SJohn Baldwin punpcklqdq %xmm10,%xmm8 894bc3d5698SJohn Baldwin movdqa %xmm6,%xmm11 895bc3d5698SJohn Baldwin punpcklqdq %xmm7,%xmm6 896bc3d5698SJohn Baldwin punpckhqdq %xmm10,%xmm9 897bc3d5698SJohn Baldwin punpckhqdq %xmm7,%xmm11 898bc3d5698SJohn Baldwin paddd 128-256(%rcx),%xmm12 899bc3d5698SJohn Baldwin paddd 144-256(%rcx),%xmm13 900bc3d5698SJohn Baldwin paddd 160-256(%rcx),%xmm14 901bc3d5698SJohn Baldwin paddd 176-256(%rcx),%xmm15 902bc3d5698SJohn Baldwin 903bc3d5698SJohn Baldwin movdqa %xmm8,0(%rsp) 904bc3d5698SJohn Baldwin movdqa %xmm9,16(%rsp) 905bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm8 906bc3d5698SJohn Baldwin movdqa 48(%rsp),%xmm9 907bc3d5698SJohn Baldwin 908bc3d5698SJohn Baldwin movdqa %xmm12,%xmm10 909bc3d5698SJohn Baldwin punpckldq %xmm13,%xmm12 910bc3d5698SJohn Baldwin movdqa %xmm14,%xmm7 911bc3d5698SJohn Baldwin punpckldq %xmm15,%xmm14 912bc3d5698SJohn Baldwin punpckhdq %xmm13,%xmm10 913bc3d5698SJohn Baldwin punpckhdq %xmm15,%xmm7 914bc3d5698SJohn Baldwin movdqa %xmm12,%xmm13 915bc3d5698SJohn Baldwin punpcklqdq %xmm14,%xmm12 916bc3d5698SJohn Baldwin movdqa %xmm10,%xmm15 917bc3d5698SJohn Baldwin punpcklqdq %xmm7,%xmm10 918bc3d5698SJohn Baldwin punpckhqdq %xmm14,%xmm13 919bc3d5698SJohn Baldwin punpckhqdq %xmm7,%xmm15 920bc3d5698SJohn Baldwin paddd 192-256(%rcx),%xmm4 921bc3d5698SJohn Baldwin paddd 208-256(%rcx),%xmm5 922bc3d5698SJohn Baldwin paddd 224-256(%rcx),%xmm8 923bc3d5698SJohn Baldwin paddd 240-256(%rcx),%xmm9 924bc3d5698SJohn Baldwin 925bc3d5698SJohn Baldwin movdqa %xmm6,32(%rsp) 926bc3d5698SJohn Baldwin movdqa %xmm11,48(%rsp) 927bc3d5698SJohn Baldwin 928bc3d5698SJohn Baldwin movdqa %xmm4,%xmm14 929bc3d5698SJohn Baldwin punpckldq %xmm5,%xmm4 930bc3d5698SJohn Baldwin movdqa %xmm8,%xmm7 931bc3d5698SJohn Baldwin punpckldq %xmm9,%xmm8 932bc3d5698SJohn Baldwin punpckhdq %xmm5,%xmm14 933bc3d5698SJohn Baldwin punpckhdq %xmm9,%xmm7 934bc3d5698SJohn Baldwin movdqa %xmm4,%xmm5 935bc3d5698SJohn Baldwin punpcklqdq %xmm8,%xmm4 936bc3d5698SJohn Baldwin movdqa %xmm14,%xmm9 937bc3d5698SJohn Baldwin punpcklqdq %xmm7,%xmm14 938bc3d5698SJohn Baldwin punpckhqdq %xmm8,%xmm5 939bc3d5698SJohn Baldwin punpckhqdq %xmm7,%xmm9 940bc3d5698SJohn Baldwin paddd 256-256(%rcx),%xmm0 941bc3d5698SJohn Baldwin paddd 272-256(%rcx),%xmm1 942bc3d5698SJohn Baldwin paddd 288-256(%rcx),%xmm2 943bc3d5698SJohn Baldwin paddd 304-256(%rcx),%xmm3 944bc3d5698SJohn Baldwin 945bc3d5698SJohn Baldwin movdqa %xmm0,%xmm8 946bc3d5698SJohn Baldwin punpckldq %xmm1,%xmm0 947bc3d5698SJohn Baldwin movdqa %xmm2,%xmm7 948bc3d5698SJohn Baldwin punpckldq %xmm3,%xmm2 949bc3d5698SJohn Baldwin punpckhdq %xmm1,%xmm8 950bc3d5698SJohn Baldwin punpckhdq %xmm3,%xmm7 951bc3d5698SJohn Baldwin movdqa %xmm0,%xmm1 952bc3d5698SJohn Baldwin punpcklqdq %xmm2,%xmm0 953bc3d5698SJohn Baldwin movdqa %xmm8,%xmm3 954bc3d5698SJohn Baldwin punpcklqdq %xmm7,%xmm8 955bc3d5698SJohn Baldwin punpckhqdq %xmm2,%xmm1 956bc3d5698SJohn Baldwin punpckhqdq %xmm7,%xmm3 957bc3d5698SJohn Baldwin cmpq $256,%rdx 958bc3d5698SJohn Baldwin jb .Ltail4x 959bc3d5698SJohn Baldwin 960bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 961bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 962bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 963bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 964bc3d5698SJohn Baldwin pxor 0(%rsp),%xmm6 965bc3d5698SJohn Baldwin pxor %xmm12,%xmm11 966bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 967bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 968bc3d5698SJohn Baldwin 969bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 970bc3d5698SJohn Baldwin movdqu 64(%rsi),%xmm6 971bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 972bc3d5698SJohn Baldwin movdqu 80(%rsi),%xmm11 973bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 974bc3d5698SJohn Baldwin movdqu 96(%rsi),%xmm2 975bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 976bc3d5698SJohn Baldwin movdqu 112(%rsi),%xmm7 977bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 978bc3d5698SJohn Baldwin pxor 16(%rsp),%xmm6 979bc3d5698SJohn Baldwin pxor %xmm13,%xmm11 980bc3d5698SJohn Baldwin pxor %xmm5,%xmm2 981bc3d5698SJohn Baldwin pxor %xmm1,%xmm7 982bc3d5698SJohn Baldwin 983bc3d5698SJohn Baldwin movdqu %xmm6,64(%rdi) 984bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 985bc3d5698SJohn Baldwin movdqu %xmm11,80(%rdi) 986bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 987bc3d5698SJohn Baldwin movdqu %xmm2,96(%rdi) 988bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 989bc3d5698SJohn Baldwin movdqu %xmm7,112(%rdi) 990bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 991bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 992bc3d5698SJohn Baldwin pxor 32(%rsp),%xmm6 993bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 994bc3d5698SJohn Baldwin pxor %xmm14,%xmm2 995bc3d5698SJohn Baldwin pxor %xmm8,%xmm7 996bc3d5698SJohn Baldwin 997bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 998bc3d5698SJohn Baldwin movdqu 64(%rsi),%xmm6 999bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 1000bc3d5698SJohn Baldwin movdqu 80(%rsi),%xmm11 1001bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 1002bc3d5698SJohn Baldwin movdqu 96(%rsi),%xmm2 1003bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 1004bc3d5698SJohn Baldwin movdqu 112(%rsi),%xmm7 1005bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1006bc3d5698SJohn Baldwin pxor 48(%rsp),%xmm6 1007bc3d5698SJohn Baldwin pxor %xmm15,%xmm11 1008bc3d5698SJohn Baldwin pxor %xmm9,%xmm2 1009bc3d5698SJohn Baldwin pxor %xmm3,%xmm7 1010bc3d5698SJohn Baldwin movdqu %xmm6,64(%rdi) 1011bc3d5698SJohn Baldwin movdqu %xmm11,80(%rdi) 1012bc3d5698SJohn Baldwin movdqu %xmm2,96(%rdi) 1013bc3d5698SJohn Baldwin movdqu %xmm7,112(%rdi) 1014bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1015bc3d5698SJohn Baldwin 1016bc3d5698SJohn Baldwin subq $256,%rdx 1017bc3d5698SJohn Baldwin jnz .Loop_outer4x 1018bc3d5698SJohn Baldwin 1019bc3d5698SJohn Baldwin jmp .Ldone4x 1020bc3d5698SJohn Baldwin 1021bc3d5698SJohn Baldwin.Ltail4x: 1022bc3d5698SJohn Baldwin cmpq $192,%rdx 1023bc3d5698SJohn Baldwin jae .L192_or_more4x 1024bc3d5698SJohn Baldwin cmpq $128,%rdx 1025bc3d5698SJohn Baldwin jae .L128_or_more4x 1026bc3d5698SJohn Baldwin cmpq $64,%rdx 1027bc3d5698SJohn Baldwin jae .L64_or_more4x 1028bc3d5698SJohn Baldwin 1029bc3d5698SJohn Baldwin 1030bc3d5698SJohn Baldwin xorq %r10,%r10 1031bc3d5698SJohn Baldwin 1032bc3d5698SJohn Baldwin movdqa %xmm12,16(%rsp) 1033bc3d5698SJohn Baldwin movdqa %xmm4,32(%rsp) 1034bc3d5698SJohn Baldwin movdqa %xmm0,48(%rsp) 1035bc3d5698SJohn Baldwin jmp .Loop_tail4x 1036bc3d5698SJohn Baldwin 1037bc3d5698SJohn Baldwin.align 32 1038bc3d5698SJohn Baldwin.L64_or_more4x: 1039bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 1040bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 1041bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 1042bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 1043bc3d5698SJohn Baldwin pxor 0(%rsp),%xmm6 1044bc3d5698SJohn Baldwin pxor %xmm12,%xmm11 1045bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 1046bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 1047bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 1048bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 1049bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 1050bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 1051bc3d5698SJohn Baldwin je .Ldone4x 1052bc3d5698SJohn Baldwin 1053bc3d5698SJohn Baldwin movdqa 16(%rsp),%xmm6 1054bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1055bc3d5698SJohn Baldwin xorq %r10,%r10 1056bc3d5698SJohn Baldwin movdqa %xmm6,0(%rsp) 1057bc3d5698SJohn Baldwin movdqa %xmm13,16(%rsp) 1058bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1059bc3d5698SJohn Baldwin movdqa %xmm5,32(%rsp) 1060bc3d5698SJohn Baldwin subq $64,%rdx 1061bc3d5698SJohn Baldwin movdqa %xmm1,48(%rsp) 1062bc3d5698SJohn Baldwin jmp .Loop_tail4x 1063bc3d5698SJohn Baldwin 1064bc3d5698SJohn Baldwin.align 32 1065bc3d5698SJohn Baldwin.L128_or_more4x: 1066bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 1067bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 1068bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 1069bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 1070bc3d5698SJohn Baldwin pxor 0(%rsp),%xmm6 1071bc3d5698SJohn Baldwin pxor %xmm12,%xmm11 1072bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 1073bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 1074bc3d5698SJohn Baldwin 1075bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 1076bc3d5698SJohn Baldwin movdqu 64(%rsi),%xmm6 1077bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 1078bc3d5698SJohn Baldwin movdqu 80(%rsi),%xmm11 1079bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 1080bc3d5698SJohn Baldwin movdqu 96(%rsi),%xmm2 1081bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 1082bc3d5698SJohn Baldwin movdqu 112(%rsi),%xmm7 1083bc3d5698SJohn Baldwin pxor 16(%rsp),%xmm6 1084bc3d5698SJohn Baldwin pxor %xmm13,%xmm11 1085bc3d5698SJohn Baldwin pxor %xmm5,%xmm2 1086bc3d5698SJohn Baldwin pxor %xmm1,%xmm7 1087bc3d5698SJohn Baldwin movdqu %xmm6,64(%rdi) 1088bc3d5698SJohn Baldwin movdqu %xmm11,80(%rdi) 1089bc3d5698SJohn Baldwin movdqu %xmm2,96(%rdi) 1090bc3d5698SJohn Baldwin movdqu %xmm7,112(%rdi) 1091bc3d5698SJohn Baldwin je .Ldone4x 1092bc3d5698SJohn Baldwin 1093bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm6 1094bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1095bc3d5698SJohn Baldwin xorq %r10,%r10 1096bc3d5698SJohn Baldwin movdqa %xmm6,0(%rsp) 1097bc3d5698SJohn Baldwin movdqa %xmm10,16(%rsp) 1098bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1099bc3d5698SJohn Baldwin movdqa %xmm14,32(%rsp) 1100bc3d5698SJohn Baldwin subq $128,%rdx 1101bc3d5698SJohn Baldwin movdqa %xmm8,48(%rsp) 1102bc3d5698SJohn Baldwin jmp .Loop_tail4x 1103bc3d5698SJohn Baldwin 1104bc3d5698SJohn Baldwin.align 32 1105bc3d5698SJohn Baldwin.L192_or_more4x: 1106bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 1107bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 1108bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 1109bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 1110bc3d5698SJohn Baldwin pxor 0(%rsp),%xmm6 1111bc3d5698SJohn Baldwin pxor %xmm12,%xmm11 1112bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 1113bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 1114bc3d5698SJohn Baldwin 1115bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 1116bc3d5698SJohn Baldwin movdqu 64(%rsi),%xmm6 1117bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 1118bc3d5698SJohn Baldwin movdqu 80(%rsi),%xmm11 1119bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 1120bc3d5698SJohn Baldwin movdqu 96(%rsi),%xmm2 1121bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 1122bc3d5698SJohn Baldwin movdqu 112(%rsi),%xmm7 1123bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1124bc3d5698SJohn Baldwin pxor 16(%rsp),%xmm6 1125bc3d5698SJohn Baldwin pxor %xmm13,%xmm11 1126bc3d5698SJohn Baldwin pxor %xmm5,%xmm2 1127bc3d5698SJohn Baldwin pxor %xmm1,%xmm7 1128bc3d5698SJohn Baldwin 1129bc3d5698SJohn Baldwin movdqu %xmm6,64(%rdi) 1130bc3d5698SJohn Baldwin movdqu 0(%rsi),%xmm6 1131bc3d5698SJohn Baldwin movdqu %xmm11,80(%rdi) 1132bc3d5698SJohn Baldwin movdqu 16(%rsi),%xmm11 1133bc3d5698SJohn Baldwin movdqu %xmm2,96(%rdi) 1134bc3d5698SJohn Baldwin movdqu 32(%rsi),%xmm2 1135bc3d5698SJohn Baldwin movdqu %xmm7,112(%rdi) 1136bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1137bc3d5698SJohn Baldwin movdqu 48(%rsi),%xmm7 1138bc3d5698SJohn Baldwin pxor 32(%rsp),%xmm6 1139bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 1140bc3d5698SJohn Baldwin pxor %xmm14,%xmm2 1141bc3d5698SJohn Baldwin pxor %xmm8,%xmm7 1142bc3d5698SJohn Baldwin movdqu %xmm6,0(%rdi) 1143bc3d5698SJohn Baldwin movdqu %xmm11,16(%rdi) 1144bc3d5698SJohn Baldwin movdqu %xmm2,32(%rdi) 1145bc3d5698SJohn Baldwin movdqu %xmm7,48(%rdi) 1146bc3d5698SJohn Baldwin je .Ldone4x 1147bc3d5698SJohn Baldwin 1148bc3d5698SJohn Baldwin movdqa 48(%rsp),%xmm6 1149bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1150bc3d5698SJohn Baldwin xorq %r10,%r10 1151bc3d5698SJohn Baldwin movdqa %xmm6,0(%rsp) 1152bc3d5698SJohn Baldwin movdqa %xmm15,16(%rsp) 1153bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1154bc3d5698SJohn Baldwin movdqa %xmm9,32(%rsp) 1155bc3d5698SJohn Baldwin subq $192,%rdx 1156bc3d5698SJohn Baldwin movdqa %xmm3,48(%rsp) 1157bc3d5698SJohn Baldwin 1158bc3d5698SJohn Baldwin.Loop_tail4x: 1159bc3d5698SJohn Baldwin movzbl (%rsi,%r10,1),%eax 1160bc3d5698SJohn Baldwin movzbl (%rsp,%r10,1),%ecx 1161bc3d5698SJohn Baldwin leaq 1(%r10),%r10 1162bc3d5698SJohn Baldwin xorl %ecx,%eax 1163bc3d5698SJohn Baldwin movb %al,-1(%rdi,%r10,1) 1164bc3d5698SJohn Baldwin decq %rdx 1165bc3d5698SJohn Baldwin jnz .Loop_tail4x 1166bc3d5698SJohn Baldwin 1167bc3d5698SJohn Baldwin.Ldone4x: 1168bc3d5698SJohn Baldwin leaq (%r9),%rsp 1169bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1170bc3d5698SJohn Baldwin.L4x_epilogue: 1171bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1172bc3d5698SJohn Baldwin.cfi_endproc 1173bc3d5698SJohn Baldwin.size ChaCha20_4x,.-ChaCha20_4x 1174bc3d5698SJohn Baldwin.type ChaCha20_4xop,@function 1175bc3d5698SJohn Baldwin.align 32 1176bc3d5698SJohn BaldwinChaCha20_4xop: 1177bc3d5698SJohn Baldwin.cfi_startproc 1178bc3d5698SJohn Baldwin.LChaCha20_4xop: 1179bc3d5698SJohn Baldwin movq %rsp,%r9 1180bc3d5698SJohn Baldwin.cfi_def_cfa_register %r9 1181bc3d5698SJohn Baldwin subq $0x140+8,%rsp 1182bc3d5698SJohn Baldwin vzeroupper 1183bc3d5698SJohn Baldwin 1184bc3d5698SJohn Baldwin vmovdqa .Lsigma(%rip),%xmm11 1185bc3d5698SJohn Baldwin vmovdqu (%rcx),%xmm3 1186bc3d5698SJohn Baldwin vmovdqu 16(%rcx),%xmm15 1187bc3d5698SJohn Baldwin vmovdqu (%r8),%xmm7 1188bc3d5698SJohn Baldwin leaq 256(%rsp),%rcx 1189bc3d5698SJohn Baldwin 1190bc3d5698SJohn Baldwin vpshufd $0x00,%xmm11,%xmm8 1191bc3d5698SJohn Baldwin vpshufd $0x55,%xmm11,%xmm9 1192bc3d5698SJohn Baldwin vmovdqa %xmm8,64(%rsp) 1193bc3d5698SJohn Baldwin vpshufd $0xaa,%xmm11,%xmm10 1194bc3d5698SJohn Baldwin vmovdqa %xmm9,80(%rsp) 1195bc3d5698SJohn Baldwin vpshufd $0xff,%xmm11,%xmm11 1196bc3d5698SJohn Baldwin vmovdqa %xmm10,96(%rsp) 1197bc3d5698SJohn Baldwin vmovdqa %xmm11,112(%rsp) 1198bc3d5698SJohn Baldwin 1199bc3d5698SJohn Baldwin vpshufd $0x00,%xmm3,%xmm0 1200bc3d5698SJohn Baldwin vpshufd $0x55,%xmm3,%xmm1 1201bc3d5698SJohn Baldwin vmovdqa %xmm0,128-256(%rcx) 1202bc3d5698SJohn Baldwin vpshufd $0xaa,%xmm3,%xmm2 1203bc3d5698SJohn Baldwin vmovdqa %xmm1,144-256(%rcx) 1204bc3d5698SJohn Baldwin vpshufd $0xff,%xmm3,%xmm3 1205bc3d5698SJohn Baldwin vmovdqa %xmm2,160-256(%rcx) 1206bc3d5698SJohn Baldwin vmovdqa %xmm3,176-256(%rcx) 1207bc3d5698SJohn Baldwin 1208bc3d5698SJohn Baldwin vpshufd $0x00,%xmm15,%xmm12 1209bc3d5698SJohn Baldwin vpshufd $0x55,%xmm15,%xmm13 1210bc3d5698SJohn Baldwin vmovdqa %xmm12,192-256(%rcx) 1211bc3d5698SJohn Baldwin vpshufd $0xaa,%xmm15,%xmm14 1212bc3d5698SJohn Baldwin vmovdqa %xmm13,208-256(%rcx) 1213bc3d5698SJohn Baldwin vpshufd $0xff,%xmm15,%xmm15 1214bc3d5698SJohn Baldwin vmovdqa %xmm14,224-256(%rcx) 1215bc3d5698SJohn Baldwin vmovdqa %xmm15,240-256(%rcx) 1216bc3d5698SJohn Baldwin 1217bc3d5698SJohn Baldwin vpshufd $0x00,%xmm7,%xmm4 1218bc3d5698SJohn Baldwin vpshufd $0x55,%xmm7,%xmm5 1219bc3d5698SJohn Baldwin vpaddd .Linc(%rip),%xmm4,%xmm4 1220bc3d5698SJohn Baldwin vpshufd $0xaa,%xmm7,%xmm6 1221bc3d5698SJohn Baldwin vmovdqa %xmm5,272-256(%rcx) 1222bc3d5698SJohn Baldwin vpshufd $0xff,%xmm7,%xmm7 1223bc3d5698SJohn Baldwin vmovdqa %xmm6,288-256(%rcx) 1224bc3d5698SJohn Baldwin vmovdqa %xmm7,304-256(%rcx) 1225bc3d5698SJohn Baldwin 1226bc3d5698SJohn Baldwin jmp .Loop_enter4xop 1227bc3d5698SJohn Baldwin 1228bc3d5698SJohn Baldwin.align 32 1229bc3d5698SJohn Baldwin.Loop_outer4xop: 1230bc3d5698SJohn Baldwin vmovdqa 64(%rsp),%xmm8 1231bc3d5698SJohn Baldwin vmovdqa 80(%rsp),%xmm9 1232bc3d5698SJohn Baldwin vmovdqa 96(%rsp),%xmm10 1233bc3d5698SJohn Baldwin vmovdqa 112(%rsp),%xmm11 1234bc3d5698SJohn Baldwin vmovdqa 128-256(%rcx),%xmm0 1235bc3d5698SJohn Baldwin vmovdqa 144-256(%rcx),%xmm1 1236bc3d5698SJohn Baldwin vmovdqa 160-256(%rcx),%xmm2 1237bc3d5698SJohn Baldwin vmovdqa 176-256(%rcx),%xmm3 1238bc3d5698SJohn Baldwin vmovdqa 192-256(%rcx),%xmm12 1239bc3d5698SJohn Baldwin vmovdqa 208-256(%rcx),%xmm13 1240bc3d5698SJohn Baldwin vmovdqa 224-256(%rcx),%xmm14 1241bc3d5698SJohn Baldwin vmovdqa 240-256(%rcx),%xmm15 1242bc3d5698SJohn Baldwin vmovdqa 256-256(%rcx),%xmm4 1243bc3d5698SJohn Baldwin vmovdqa 272-256(%rcx),%xmm5 1244bc3d5698SJohn Baldwin vmovdqa 288-256(%rcx),%xmm6 1245bc3d5698SJohn Baldwin vmovdqa 304-256(%rcx),%xmm7 1246bc3d5698SJohn Baldwin vpaddd .Lfour(%rip),%xmm4,%xmm4 1247bc3d5698SJohn Baldwin 1248bc3d5698SJohn Baldwin.Loop_enter4xop: 1249bc3d5698SJohn Baldwin movl $10,%eax 1250bc3d5698SJohn Baldwin vmovdqa %xmm4,256-256(%rcx) 1251bc3d5698SJohn Baldwin jmp .Loop4xop 1252bc3d5698SJohn Baldwin 1253bc3d5698SJohn Baldwin.align 32 1254bc3d5698SJohn Baldwin.Loop4xop: 1255bc3d5698SJohn Baldwin vpaddd %xmm0,%xmm8,%xmm8 1256bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm9,%xmm9 1257bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm10,%xmm10 1258bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm11,%xmm11 1259bc3d5698SJohn Baldwin vpxor %xmm4,%xmm8,%xmm4 1260bc3d5698SJohn Baldwin vpxor %xmm5,%xmm9,%xmm5 1261bc3d5698SJohn Baldwin vpxor %xmm6,%xmm10,%xmm6 1262bc3d5698SJohn Baldwin vpxor %xmm7,%xmm11,%xmm7 1263bc3d5698SJohn Baldwin.byte 143,232,120,194,228,16 1264bc3d5698SJohn Baldwin.byte 143,232,120,194,237,16 1265bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1266bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1267bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm12,%xmm12 1268bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm13,%xmm13 1269bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm14,%xmm14 1270bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm15,%xmm15 1271bc3d5698SJohn Baldwin vpxor %xmm0,%xmm12,%xmm0 1272bc3d5698SJohn Baldwin vpxor %xmm1,%xmm13,%xmm1 1273bc3d5698SJohn Baldwin vpxor %xmm14,%xmm2,%xmm2 1274bc3d5698SJohn Baldwin vpxor %xmm15,%xmm3,%xmm3 1275bc3d5698SJohn Baldwin.byte 143,232,120,194,192,12 1276bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 1277bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1278bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1279bc3d5698SJohn Baldwin vpaddd %xmm8,%xmm0,%xmm8 1280bc3d5698SJohn Baldwin vpaddd %xmm9,%xmm1,%xmm9 1281bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm10,%xmm10 1282bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm11,%xmm11 1283bc3d5698SJohn Baldwin vpxor %xmm4,%xmm8,%xmm4 1284bc3d5698SJohn Baldwin vpxor %xmm5,%xmm9,%xmm5 1285bc3d5698SJohn Baldwin vpxor %xmm6,%xmm10,%xmm6 1286bc3d5698SJohn Baldwin vpxor %xmm7,%xmm11,%xmm7 1287bc3d5698SJohn Baldwin.byte 143,232,120,194,228,8 1288bc3d5698SJohn Baldwin.byte 143,232,120,194,237,8 1289bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1290bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1291bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm12,%xmm12 1292bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm13,%xmm13 1293bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm14,%xmm14 1294bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm15,%xmm15 1295bc3d5698SJohn Baldwin vpxor %xmm0,%xmm12,%xmm0 1296bc3d5698SJohn Baldwin vpxor %xmm1,%xmm13,%xmm1 1297bc3d5698SJohn Baldwin vpxor %xmm14,%xmm2,%xmm2 1298bc3d5698SJohn Baldwin vpxor %xmm15,%xmm3,%xmm3 1299bc3d5698SJohn Baldwin.byte 143,232,120,194,192,7 1300bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 1301bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1302bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1303bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm8,%xmm8 1304bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm9,%xmm9 1305bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm10,%xmm10 1306bc3d5698SJohn Baldwin vpaddd %xmm0,%xmm11,%xmm11 1307bc3d5698SJohn Baldwin vpxor %xmm7,%xmm8,%xmm7 1308bc3d5698SJohn Baldwin vpxor %xmm4,%xmm9,%xmm4 1309bc3d5698SJohn Baldwin vpxor %xmm5,%xmm10,%xmm5 1310bc3d5698SJohn Baldwin vpxor %xmm6,%xmm11,%xmm6 1311bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1312bc3d5698SJohn Baldwin.byte 143,232,120,194,228,16 1313bc3d5698SJohn Baldwin.byte 143,232,120,194,237,16 1314bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1315bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm14,%xmm14 1316bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm15,%xmm15 1317bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm12,%xmm12 1318bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm13,%xmm13 1319bc3d5698SJohn Baldwin vpxor %xmm1,%xmm14,%xmm1 1320bc3d5698SJohn Baldwin vpxor %xmm2,%xmm15,%xmm2 1321bc3d5698SJohn Baldwin vpxor %xmm12,%xmm3,%xmm3 1322bc3d5698SJohn Baldwin vpxor %xmm13,%xmm0,%xmm0 1323bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 1324bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1325bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1326bc3d5698SJohn Baldwin.byte 143,232,120,194,192,12 1327bc3d5698SJohn Baldwin vpaddd %xmm8,%xmm1,%xmm8 1328bc3d5698SJohn Baldwin vpaddd %xmm9,%xmm2,%xmm9 1329bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm10,%xmm10 1330bc3d5698SJohn Baldwin vpaddd %xmm0,%xmm11,%xmm11 1331bc3d5698SJohn Baldwin vpxor %xmm7,%xmm8,%xmm7 1332bc3d5698SJohn Baldwin vpxor %xmm4,%xmm9,%xmm4 1333bc3d5698SJohn Baldwin vpxor %xmm5,%xmm10,%xmm5 1334bc3d5698SJohn Baldwin vpxor %xmm6,%xmm11,%xmm6 1335bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1336bc3d5698SJohn Baldwin.byte 143,232,120,194,228,8 1337bc3d5698SJohn Baldwin.byte 143,232,120,194,237,8 1338bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1339bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm14,%xmm14 1340bc3d5698SJohn Baldwin vpaddd %xmm4,%xmm15,%xmm15 1341bc3d5698SJohn Baldwin vpaddd %xmm5,%xmm12,%xmm12 1342bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm13,%xmm13 1343bc3d5698SJohn Baldwin vpxor %xmm1,%xmm14,%xmm1 1344bc3d5698SJohn Baldwin vpxor %xmm2,%xmm15,%xmm2 1345bc3d5698SJohn Baldwin vpxor %xmm12,%xmm3,%xmm3 1346bc3d5698SJohn Baldwin vpxor %xmm13,%xmm0,%xmm0 1347bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 1348bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1349bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1350bc3d5698SJohn Baldwin.byte 143,232,120,194,192,7 1351bc3d5698SJohn Baldwin decl %eax 1352bc3d5698SJohn Baldwin jnz .Loop4xop 1353bc3d5698SJohn Baldwin 1354bc3d5698SJohn Baldwin vpaddd 64(%rsp),%xmm8,%xmm8 1355bc3d5698SJohn Baldwin vpaddd 80(%rsp),%xmm9,%xmm9 1356bc3d5698SJohn Baldwin vpaddd 96(%rsp),%xmm10,%xmm10 1357bc3d5698SJohn Baldwin vpaddd 112(%rsp),%xmm11,%xmm11 1358bc3d5698SJohn Baldwin 1359bc3d5698SJohn Baldwin vmovdqa %xmm14,32(%rsp) 1360bc3d5698SJohn Baldwin vmovdqa %xmm15,48(%rsp) 1361bc3d5698SJohn Baldwin 1362bc3d5698SJohn Baldwin vpunpckldq %xmm9,%xmm8,%xmm14 1363bc3d5698SJohn Baldwin vpunpckldq %xmm11,%xmm10,%xmm15 1364bc3d5698SJohn Baldwin vpunpckhdq %xmm9,%xmm8,%xmm8 1365bc3d5698SJohn Baldwin vpunpckhdq %xmm11,%xmm10,%xmm10 1366bc3d5698SJohn Baldwin vpunpcklqdq %xmm15,%xmm14,%xmm9 1367bc3d5698SJohn Baldwin vpunpckhqdq %xmm15,%xmm14,%xmm14 1368bc3d5698SJohn Baldwin vpunpcklqdq %xmm10,%xmm8,%xmm11 1369bc3d5698SJohn Baldwin vpunpckhqdq %xmm10,%xmm8,%xmm8 1370bc3d5698SJohn Baldwin vpaddd 128-256(%rcx),%xmm0,%xmm0 1371bc3d5698SJohn Baldwin vpaddd 144-256(%rcx),%xmm1,%xmm1 1372bc3d5698SJohn Baldwin vpaddd 160-256(%rcx),%xmm2,%xmm2 1373bc3d5698SJohn Baldwin vpaddd 176-256(%rcx),%xmm3,%xmm3 1374bc3d5698SJohn Baldwin 1375bc3d5698SJohn Baldwin vmovdqa %xmm9,0(%rsp) 1376bc3d5698SJohn Baldwin vmovdqa %xmm14,16(%rsp) 1377bc3d5698SJohn Baldwin vmovdqa 32(%rsp),%xmm9 1378bc3d5698SJohn Baldwin vmovdqa 48(%rsp),%xmm14 1379bc3d5698SJohn Baldwin 1380bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm10 1381bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm15 1382bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 1383bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 1384bc3d5698SJohn Baldwin vpunpcklqdq %xmm15,%xmm10,%xmm1 1385bc3d5698SJohn Baldwin vpunpckhqdq %xmm15,%xmm10,%xmm10 1386bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm3 1387bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm0 1388bc3d5698SJohn Baldwin vpaddd 192-256(%rcx),%xmm12,%xmm12 1389bc3d5698SJohn Baldwin vpaddd 208-256(%rcx),%xmm13,%xmm13 1390bc3d5698SJohn Baldwin vpaddd 224-256(%rcx),%xmm9,%xmm9 1391bc3d5698SJohn Baldwin vpaddd 240-256(%rcx),%xmm14,%xmm14 1392bc3d5698SJohn Baldwin 1393bc3d5698SJohn Baldwin vpunpckldq %xmm13,%xmm12,%xmm2 1394bc3d5698SJohn Baldwin vpunpckldq %xmm14,%xmm9,%xmm15 1395bc3d5698SJohn Baldwin vpunpckhdq %xmm13,%xmm12,%xmm12 1396bc3d5698SJohn Baldwin vpunpckhdq %xmm14,%xmm9,%xmm9 1397bc3d5698SJohn Baldwin vpunpcklqdq %xmm15,%xmm2,%xmm13 1398bc3d5698SJohn Baldwin vpunpckhqdq %xmm15,%xmm2,%xmm2 1399bc3d5698SJohn Baldwin vpunpcklqdq %xmm9,%xmm12,%xmm14 1400bc3d5698SJohn Baldwin vpunpckhqdq %xmm9,%xmm12,%xmm12 1401bc3d5698SJohn Baldwin vpaddd 256-256(%rcx),%xmm4,%xmm4 1402bc3d5698SJohn Baldwin vpaddd 272-256(%rcx),%xmm5,%xmm5 1403bc3d5698SJohn Baldwin vpaddd 288-256(%rcx),%xmm6,%xmm6 1404bc3d5698SJohn Baldwin vpaddd 304-256(%rcx),%xmm7,%xmm7 1405bc3d5698SJohn Baldwin 1406bc3d5698SJohn Baldwin vpunpckldq %xmm5,%xmm4,%xmm9 1407bc3d5698SJohn Baldwin vpunpckldq %xmm7,%xmm6,%xmm15 1408bc3d5698SJohn Baldwin vpunpckhdq %xmm5,%xmm4,%xmm4 1409bc3d5698SJohn Baldwin vpunpckhdq %xmm7,%xmm6,%xmm6 1410bc3d5698SJohn Baldwin vpunpcklqdq %xmm15,%xmm9,%xmm5 1411bc3d5698SJohn Baldwin vpunpckhqdq %xmm15,%xmm9,%xmm9 1412bc3d5698SJohn Baldwin vpunpcklqdq %xmm6,%xmm4,%xmm7 1413bc3d5698SJohn Baldwin vpunpckhqdq %xmm6,%xmm4,%xmm4 1414bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%xmm6 1415bc3d5698SJohn Baldwin vmovdqa 16(%rsp),%xmm15 1416bc3d5698SJohn Baldwin 1417bc3d5698SJohn Baldwin cmpq $256,%rdx 1418bc3d5698SJohn Baldwin jb .Ltail4xop 1419bc3d5698SJohn Baldwin 1420bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm6,%xmm6 1421bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm1,%xmm1 1422bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm13,%xmm13 1423bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm5,%xmm5 1424bc3d5698SJohn Baldwin vpxor 64(%rsi),%xmm15,%xmm15 1425bc3d5698SJohn Baldwin vpxor 80(%rsi),%xmm10,%xmm10 1426bc3d5698SJohn Baldwin vpxor 96(%rsi),%xmm2,%xmm2 1427bc3d5698SJohn Baldwin vpxor 112(%rsi),%xmm9,%xmm9 1428bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1429bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm11,%xmm11 1430bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm3,%xmm3 1431bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm14,%xmm14 1432bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm7,%xmm7 1433bc3d5698SJohn Baldwin vpxor 64(%rsi),%xmm8,%xmm8 1434bc3d5698SJohn Baldwin vpxor 80(%rsi),%xmm0,%xmm0 1435bc3d5698SJohn Baldwin vpxor 96(%rsi),%xmm12,%xmm12 1436bc3d5698SJohn Baldwin vpxor 112(%rsi),%xmm4,%xmm4 1437bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1438bc3d5698SJohn Baldwin 1439bc3d5698SJohn Baldwin vmovdqu %xmm6,0(%rdi) 1440bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%rdi) 1441bc3d5698SJohn Baldwin vmovdqu %xmm13,32(%rdi) 1442bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rdi) 1443bc3d5698SJohn Baldwin vmovdqu %xmm15,64(%rdi) 1444bc3d5698SJohn Baldwin vmovdqu %xmm10,80(%rdi) 1445bc3d5698SJohn Baldwin vmovdqu %xmm2,96(%rdi) 1446bc3d5698SJohn Baldwin vmovdqu %xmm9,112(%rdi) 1447bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1448bc3d5698SJohn Baldwin vmovdqu %xmm11,0(%rdi) 1449bc3d5698SJohn Baldwin vmovdqu %xmm3,16(%rdi) 1450bc3d5698SJohn Baldwin vmovdqu %xmm14,32(%rdi) 1451bc3d5698SJohn Baldwin vmovdqu %xmm7,48(%rdi) 1452bc3d5698SJohn Baldwin vmovdqu %xmm8,64(%rdi) 1453bc3d5698SJohn Baldwin vmovdqu %xmm0,80(%rdi) 1454bc3d5698SJohn Baldwin vmovdqu %xmm12,96(%rdi) 1455bc3d5698SJohn Baldwin vmovdqu %xmm4,112(%rdi) 1456bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1457bc3d5698SJohn Baldwin 1458bc3d5698SJohn Baldwin subq $256,%rdx 1459bc3d5698SJohn Baldwin jnz .Loop_outer4xop 1460bc3d5698SJohn Baldwin 1461bc3d5698SJohn Baldwin jmp .Ldone4xop 1462bc3d5698SJohn Baldwin 1463bc3d5698SJohn Baldwin.align 32 1464bc3d5698SJohn Baldwin.Ltail4xop: 1465bc3d5698SJohn Baldwin cmpq $192,%rdx 1466bc3d5698SJohn Baldwin jae .L192_or_more4xop 1467bc3d5698SJohn Baldwin cmpq $128,%rdx 1468bc3d5698SJohn Baldwin jae .L128_or_more4xop 1469bc3d5698SJohn Baldwin cmpq $64,%rdx 1470bc3d5698SJohn Baldwin jae .L64_or_more4xop 1471bc3d5698SJohn Baldwin 1472bc3d5698SJohn Baldwin xorq %r10,%r10 1473bc3d5698SJohn Baldwin vmovdqa %xmm6,0(%rsp) 1474bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%rsp) 1475bc3d5698SJohn Baldwin vmovdqa %xmm13,32(%rsp) 1476bc3d5698SJohn Baldwin vmovdqa %xmm5,48(%rsp) 1477bc3d5698SJohn Baldwin jmp .Loop_tail4xop 1478bc3d5698SJohn Baldwin 1479bc3d5698SJohn Baldwin.align 32 1480bc3d5698SJohn Baldwin.L64_or_more4xop: 1481bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm6,%xmm6 1482bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm1,%xmm1 1483bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm13,%xmm13 1484bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm5,%xmm5 1485bc3d5698SJohn Baldwin vmovdqu %xmm6,0(%rdi) 1486bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%rdi) 1487bc3d5698SJohn Baldwin vmovdqu %xmm13,32(%rdi) 1488bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rdi) 1489bc3d5698SJohn Baldwin je .Ldone4xop 1490bc3d5698SJohn Baldwin 1491bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1492bc3d5698SJohn Baldwin vmovdqa %xmm15,0(%rsp) 1493bc3d5698SJohn Baldwin xorq %r10,%r10 1494bc3d5698SJohn Baldwin vmovdqa %xmm10,16(%rsp) 1495bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1496bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%rsp) 1497bc3d5698SJohn Baldwin subq $64,%rdx 1498bc3d5698SJohn Baldwin vmovdqa %xmm9,48(%rsp) 1499bc3d5698SJohn Baldwin jmp .Loop_tail4xop 1500bc3d5698SJohn Baldwin 1501bc3d5698SJohn Baldwin.align 32 1502bc3d5698SJohn Baldwin.L128_or_more4xop: 1503bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm6,%xmm6 1504bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm1,%xmm1 1505bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm13,%xmm13 1506bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm5,%xmm5 1507bc3d5698SJohn Baldwin vpxor 64(%rsi),%xmm15,%xmm15 1508bc3d5698SJohn Baldwin vpxor 80(%rsi),%xmm10,%xmm10 1509bc3d5698SJohn Baldwin vpxor 96(%rsi),%xmm2,%xmm2 1510bc3d5698SJohn Baldwin vpxor 112(%rsi),%xmm9,%xmm9 1511bc3d5698SJohn Baldwin 1512bc3d5698SJohn Baldwin vmovdqu %xmm6,0(%rdi) 1513bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%rdi) 1514bc3d5698SJohn Baldwin vmovdqu %xmm13,32(%rdi) 1515bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rdi) 1516bc3d5698SJohn Baldwin vmovdqu %xmm15,64(%rdi) 1517bc3d5698SJohn Baldwin vmovdqu %xmm10,80(%rdi) 1518bc3d5698SJohn Baldwin vmovdqu %xmm2,96(%rdi) 1519bc3d5698SJohn Baldwin vmovdqu %xmm9,112(%rdi) 1520bc3d5698SJohn Baldwin je .Ldone4xop 1521bc3d5698SJohn Baldwin 1522bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1523bc3d5698SJohn Baldwin vmovdqa %xmm11,0(%rsp) 1524bc3d5698SJohn Baldwin xorq %r10,%r10 1525bc3d5698SJohn Baldwin vmovdqa %xmm3,16(%rsp) 1526bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1527bc3d5698SJohn Baldwin vmovdqa %xmm14,32(%rsp) 1528bc3d5698SJohn Baldwin subq $128,%rdx 1529bc3d5698SJohn Baldwin vmovdqa %xmm7,48(%rsp) 1530bc3d5698SJohn Baldwin jmp .Loop_tail4xop 1531bc3d5698SJohn Baldwin 1532bc3d5698SJohn Baldwin.align 32 1533bc3d5698SJohn Baldwin.L192_or_more4xop: 1534bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm6,%xmm6 1535bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm1,%xmm1 1536bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm13,%xmm13 1537bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm5,%xmm5 1538bc3d5698SJohn Baldwin vpxor 64(%rsi),%xmm15,%xmm15 1539bc3d5698SJohn Baldwin vpxor 80(%rsi),%xmm10,%xmm10 1540bc3d5698SJohn Baldwin vpxor 96(%rsi),%xmm2,%xmm2 1541bc3d5698SJohn Baldwin vpxor 112(%rsi),%xmm9,%xmm9 1542bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1543bc3d5698SJohn Baldwin vpxor 0(%rsi),%xmm11,%xmm11 1544bc3d5698SJohn Baldwin vpxor 16(%rsi),%xmm3,%xmm3 1545bc3d5698SJohn Baldwin vpxor 32(%rsi),%xmm14,%xmm14 1546bc3d5698SJohn Baldwin vpxor 48(%rsi),%xmm7,%xmm7 1547bc3d5698SJohn Baldwin 1548bc3d5698SJohn Baldwin vmovdqu %xmm6,0(%rdi) 1549bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%rdi) 1550bc3d5698SJohn Baldwin vmovdqu %xmm13,32(%rdi) 1551bc3d5698SJohn Baldwin vmovdqu %xmm5,48(%rdi) 1552bc3d5698SJohn Baldwin vmovdqu %xmm15,64(%rdi) 1553bc3d5698SJohn Baldwin vmovdqu %xmm10,80(%rdi) 1554bc3d5698SJohn Baldwin vmovdqu %xmm2,96(%rdi) 1555bc3d5698SJohn Baldwin vmovdqu %xmm9,112(%rdi) 1556bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1557bc3d5698SJohn Baldwin vmovdqu %xmm11,0(%rdi) 1558bc3d5698SJohn Baldwin vmovdqu %xmm3,16(%rdi) 1559bc3d5698SJohn Baldwin vmovdqu %xmm14,32(%rdi) 1560bc3d5698SJohn Baldwin vmovdqu %xmm7,48(%rdi) 1561bc3d5698SJohn Baldwin je .Ldone4xop 1562bc3d5698SJohn Baldwin 1563bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1564bc3d5698SJohn Baldwin vmovdqa %xmm8,0(%rsp) 1565bc3d5698SJohn Baldwin xorq %r10,%r10 1566bc3d5698SJohn Baldwin vmovdqa %xmm0,16(%rsp) 1567bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1568bc3d5698SJohn Baldwin vmovdqa %xmm12,32(%rsp) 1569bc3d5698SJohn Baldwin subq $192,%rdx 1570bc3d5698SJohn Baldwin vmovdqa %xmm4,48(%rsp) 1571bc3d5698SJohn Baldwin 1572bc3d5698SJohn Baldwin.Loop_tail4xop: 1573bc3d5698SJohn Baldwin movzbl (%rsi,%r10,1),%eax 1574bc3d5698SJohn Baldwin movzbl (%rsp,%r10,1),%ecx 1575bc3d5698SJohn Baldwin leaq 1(%r10),%r10 1576bc3d5698SJohn Baldwin xorl %ecx,%eax 1577bc3d5698SJohn Baldwin movb %al,-1(%rdi,%r10,1) 1578bc3d5698SJohn Baldwin decq %rdx 1579bc3d5698SJohn Baldwin jnz .Loop_tail4xop 1580bc3d5698SJohn Baldwin 1581bc3d5698SJohn Baldwin.Ldone4xop: 1582bc3d5698SJohn Baldwin vzeroupper 1583bc3d5698SJohn Baldwin leaq (%r9),%rsp 1584bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1585bc3d5698SJohn Baldwin.L4xop_epilogue: 1586bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1587bc3d5698SJohn Baldwin.cfi_endproc 1588bc3d5698SJohn Baldwin.size ChaCha20_4xop,.-ChaCha20_4xop 1589bc3d5698SJohn Baldwin.type ChaCha20_8x,@function 1590bc3d5698SJohn Baldwin.align 32 1591bc3d5698SJohn BaldwinChaCha20_8x: 1592bc3d5698SJohn Baldwin.cfi_startproc 1593bc3d5698SJohn Baldwin.LChaCha20_8x: 1594bc3d5698SJohn Baldwin movq %rsp,%r9 1595bc3d5698SJohn Baldwin.cfi_def_cfa_register %r9 1596bc3d5698SJohn Baldwin subq $0x280+8,%rsp 1597bc3d5698SJohn Baldwin andq $-32,%rsp 1598bc3d5698SJohn Baldwin vzeroupper 1599bc3d5698SJohn Baldwin 1600bc3d5698SJohn Baldwin 1601bc3d5698SJohn Baldwin 1602bc3d5698SJohn Baldwin 1603bc3d5698SJohn Baldwin 1604bc3d5698SJohn Baldwin 1605bc3d5698SJohn Baldwin 1606bc3d5698SJohn Baldwin 1607bc3d5698SJohn Baldwin 1608bc3d5698SJohn Baldwin 1609bc3d5698SJohn Baldwin vbroadcasti128 .Lsigma(%rip),%ymm11 1610bc3d5698SJohn Baldwin vbroadcasti128 (%rcx),%ymm3 1611bc3d5698SJohn Baldwin vbroadcasti128 16(%rcx),%ymm15 1612bc3d5698SJohn Baldwin vbroadcasti128 (%r8),%ymm7 1613bc3d5698SJohn Baldwin leaq 256(%rsp),%rcx 1614bc3d5698SJohn Baldwin leaq 512(%rsp),%rax 1615bc3d5698SJohn Baldwin leaq .Lrot16(%rip),%r10 1616bc3d5698SJohn Baldwin leaq .Lrot24(%rip),%r11 1617bc3d5698SJohn Baldwin 1618bc3d5698SJohn Baldwin vpshufd $0x00,%ymm11,%ymm8 1619bc3d5698SJohn Baldwin vpshufd $0x55,%ymm11,%ymm9 1620bc3d5698SJohn Baldwin vmovdqa %ymm8,128-256(%rcx) 1621bc3d5698SJohn Baldwin vpshufd $0xaa,%ymm11,%ymm10 1622bc3d5698SJohn Baldwin vmovdqa %ymm9,160-256(%rcx) 1623bc3d5698SJohn Baldwin vpshufd $0xff,%ymm11,%ymm11 1624bc3d5698SJohn Baldwin vmovdqa %ymm10,192-256(%rcx) 1625bc3d5698SJohn Baldwin vmovdqa %ymm11,224-256(%rcx) 1626bc3d5698SJohn Baldwin 1627bc3d5698SJohn Baldwin vpshufd $0x00,%ymm3,%ymm0 1628bc3d5698SJohn Baldwin vpshufd $0x55,%ymm3,%ymm1 1629bc3d5698SJohn Baldwin vmovdqa %ymm0,256-256(%rcx) 1630bc3d5698SJohn Baldwin vpshufd $0xaa,%ymm3,%ymm2 1631bc3d5698SJohn Baldwin vmovdqa %ymm1,288-256(%rcx) 1632bc3d5698SJohn Baldwin vpshufd $0xff,%ymm3,%ymm3 1633bc3d5698SJohn Baldwin vmovdqa %ymm2,320-256(%rcx) 1634bc3d5698SJohn Baldwin vmovdqa %ymm3,352-256(%rcx) 1635bc3d5698SJohn Baldwin 1636bc3d5698SJohn Baldwin vpshufd $0x00,%ymm15,%ymm12 1637bc3d5698SJohn Baldwin vpshufd $0x55,%ymm15,%ymm13 1638bc3d5698SJohn Baldwin vmovdqa %ymm12,384-512(%rax) 1639bc3d5698SJohn Baldwin vpshufd $0xaa,%ymm15,%ymm14 1640bc3d5698SJohn Baldwin vmovdqa %ymm13,416-512(%rax) 1641bc3d5698SJohn Baldwin vpshufd $0xff,%ymm15,%ymm15 1642bc3d5698SJohn Baldwin vmovdqa %ymm14,448-512(%rax) 1643bc3d5698SJohn Baldwin vmovdqa %ymm15,480-512(%rax) 1644bc3d5698SJohn Baldwin 1645bc3d5698SJohn Baldwin vpshufd $0x00,%ymm7,%ymm4 1646bc3d5698SJohn Baldwin vpshufd $0x55,%ymm7,%ymm5 1647bc3d5698SJohn Baldwin vpaddd .Lincy(%rip),%ymm4,%ymm4 1648bc3d5698SJohn Baldwin vpshufd $0xaa,%ymm7,%ymm6 1649bc3d5698SJohn Baldwin vmovdqa %ymm5,544-512(%rax) 1650bc3d5698SJohn Baldwin vpshufd $0xff,%ymm7,%ymm7 1651bc3d5698SJohn Baldwin vmovdqa %ymm6,576-512(%rax) 1652bc3d5698SJohn Baldwin vmovdqa %ymm7,608-512(%rax) 1653bc3d5698SJohn Baldwin 1654bc3d5698SJohn Baldwin jmp .Loop_enter8x 1655bc3d5698SJohn Baldwin 1656bc3d5698SJohn Baldwin.align 32 1657bc3d5698SJohn Baldwin.Loop_outer8x: 1658bc3d5698SJohn Baldwin vmovdqa 128-256(%rcx),%ymm8 1659bc3d5698SJohn Baldwin vmovdqa 160-256(%rcx),%ymm9 1660bc3d5698SJohn Baldwin vmovdqa 192-256(%rcx),%ymm10 1661bc3d5698SJohn Baldwin vmovdqa 224-256(%rcx),%ymm11 1662bc3d5698SJohn Baldwin vmovdqa 256-256(%rcx),%ymm0 1663bc3d5698SJohn Baldwin vmovdqa 288-256(%rcx),%ymm1 1664bc3d5698SJohn Baldwin vmovdqa 320-256(%rcx),%ymm2 1665bc3d5698SJohn Baldwin vmovdqa 352-256(%rcx),%ymm3 1666bc3d5698SJohn Baldwin vmovdqa 384-512(%rax),%ymm12 1667bc3d5698SJohn Baldwin vmovdqa 416-512(%rax),%ymm13 1668bc3d5698SJohn Baldwin vmovdqa 448-512(%rax),%ymm14 1669bc3d5698SJohn Baldwin vmovdqa 480-512(%rax),%ymm15 1670bc3d5698SJohn Baldwin vmovdqa 512-512(%rax),%ymm4 1671bc3d5698SJohn Baldwin vmovdqa 544-512(%rax),%ymm5 1672bc3d5698SJohn Baldwin vmovdqa 576-512(%rax),%ymm6 1673bc3d5698SJohn Baldwin vmovdqa 608-512(%rax),%ymm7 1674bc3d5698SJohn Baldwin vpaddd .Leight(%rip),%ymm4,%ymm4 1675bc3d5698SJohn Baldwin 1676bc3d5698SJohn Baldwin.Loop_enter8x: 1677bc3d5698SJohn Baldwin vmovdqa %ymm14,64(%rsp) 1678bc3d5698SJohn Baldwin vmovdqa %ymm15,96(%rsp) 1679bc3d5698SJohn Baldwin vbroadcasti128 (%r10),%ymm15 1680bc3d5698SJohn Baldwin vmovdqa %ymm4,512-512(%rax) 1681bc3d5698SJohn Baldwin movl $10,%eax 1682bc3d5698SJohn Baldwin jmp .Loop8x 1683bc3d5698SJohn Baldwin 1684bc3d5698SJohn Baldwin.align 32 1685bc3d5698SJohn Baldwin.Loop8x: 1686bc3d5698SJohn Baldwin vpaddd %ymm0,%ymm8,%ymm8 1687bc3d5698SJohn Baldwin vpxor %ymm4,%ymm8,%ymm4 1688bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm4,%ymm4 1689bc3d5698SJohn Baldwin vpaddd %ymm1,%ymm9,%ymm9 1690bc3d5698SJohn Baldwin vpxor %ymm5,%ymm9,%ymm5 1691bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm5,%ymm5 1692bc3d5698SJohn Baldwin vpaddd %ymm4,%ymm12,%ymm12 1693bc3d5698SJohn Baldwin vpxor %ymm0,%ymm12,%ymm0 1694bc3d5698SJohn Baldwin vpslld $12,%ymm0,%ymm14 1695bc3d5698SJohn Baldwin vpsrld $20,%ymm0,%ymm0 1696bc3d5698SJohn Baldwin vpor %ymm0,%ymm14,%ymm0 1697bc3d5698SJohn Baldwin vbroadcasti128 (%r11),%ymm14 1698bc3d5698SJohn Baldwin vpaddd %ymm5,%ymm13,%ymm13 1699bc3d5698SJohn Baldwin vpxor %ymm1,%ymm13,%ymm1 1700bc3d5698SJohn Baldwin vpslld $12,%ymm1,%ymm15 1701bc3d5698SJohn Baldwin vpsrld $20,%ymm1,%ymm1 1702bc3d5698SJohn Baldwin vpor %ymm1,%ymm15,%ymm1 1703bc3d5698SJohn Baldwin vpaddd %ymm0,%ymm8,%ymm8 1704bc3d5698SJohn Baldwin vpxor %ymm4,%ymm8,%ymm4 1705bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm4,%ymm4 1706bc3d5698SJohn Baldwin vpaddd %ymm1,%ymm9,%ymm9 1707bc3d5698SJohn Baldwin vpxor %ymm5,%ymm9,%ymm5 1708bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm5,%ymm5 1709bc3d5698SJohn Baldwin vpaddd %ymm4,%ymm12,%ymm12 1710bc3d5698SJohn Baldwin vpxor %ymm0,%ymm12,%ymm0 1711bc3d5698SJohn Baldwin vpslld $7,%ymm0,%ymm15 1712bc3d5698SJohn Baldwin vpsrld $25,%ymm0,%ymm0 1713bc3d5698SJohn Baldwin vpor %ymm0,%ymm15,%ymm0 1714bc3d5698SJohn Baldwin vbroadcasti128 (%r10),%ymm15 1715bc3d5698SJohn Baldwin vpaddd %ymm5,%ymm13,%ymm13 1716bc3d5698SJohn Baldwin vpxor %ymm1,%ymm13,%ymm1 1717bc3d5698SJohn Baldwin vpslld $7,%ymm1,%ymm14 1718bc3d5698SJohn Baldwin vpsrld $25,%ymm1,%ymm1 1719bc3d5698SJohn Baldwin vpor %ymm1,%ymm14,%ymm1 1720bc3d5698SJohn Baldwin vmovdqa %ymm12,0(%rsp) 1721bc3d5698SJohn Baldwin vmovdqa %ymm13,32(%rsp) 1722bc3d5698SJohn Baldwin vmovdqa 64(%rsp),%ymm12 1723bc3d5698SJohn Baldwin vmovdqa 96(%rsp),%ymm13 1724bc3d5698SJohn Baldwin vpaddd %ymm2,%ymm10,%ymm10 1725bc3d5698SJohn Baldwin vpxor %ymm6,%ymm10,%ymm6 1726bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm6,%ymm6 1727bc3d5698SJohn Baldwin vpaddd %ymm3,%ymm11,%ymm11 1728bc3d5698SJohn Baldwin vpxor %ymm7,%ymm11,%ymm7 1729bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm7,%ymm7 1730bc3d5698SJohn Baldwin vpaddd %ymm6,%ymm12,%ymm12 1731bc3d5698SJohn Baldwin vpxor %ymm2,%ymm12,%ymm2 1732bc3d5698SJohn Baldwin vpslld $12,%ymm2,%ymm14 1733bc3d5698SJohn Baldwin vpsrld $20,%ymm2,%ymm2 1734bc3d5698SJohn Baldwin vpor %ymm2,%ymm14,%ymm2 1735bc3d5698SJohn Baldwin vbroadcasti128 (%r11),%ymm14 1736bc3d5698SJohn Baldwin vpaddd %ymm7,%ymm13,%ymm13 1737bc3d5698SJohn Baldwin vpxor %ymm3,%ymm13,%ymm3 1738bc3d5698SJohn Baldwin vpslld $12,%ymm3,%ymm15 1739bc3d5698SJohn Baldwin vpsrld $20,%ymm3,%ymm3 1740bc3d5698SJohn Baldwin vpor %ymm3,%ymm15,%ymm3 1741bc3d5698SJohn Baldwin vpaddd %ymm2,%ymm10,%ymm10 1742bc3d5698SJohn Baldwin vpxor %ymm6,%ymm10,%ymm6 1743bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm6,%ymm6 1744bc3d5698SJohn Baldwin vpaddd %ymm3,%ymm11,%ymm11 1745bc3d5698SJohn Baldwin vpxor %ymm7,%ymm11,%ymm7 1746bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm7,%ymm7 1747bc3d5698SJohn Baldwin vpaddd %ymm6,%ymm12,%ymm12 1748bc3d5698SJohn Baldwin vpxor %ymm2,%ymm12,%ymm2 1749bc3d5698SJohn Baldwin vpslld $7,%ymm2,%ymm15 1750bc3d5698SJohn Baldwin vpsrld $25,%ymm2,%ymm2 1751bc3d5698SJohn Baldwin vpor %ymm2,%ymm15,%ymm2 1752bc3d5698SJohn Baldwin vbroadcasti128 (%r10),%ymm15 1753bc3d5698SJohn Baldwin vpaddd %ymm7,%ymm13,%ymm13 1754bc3d5698SJohn Baldwin vpxor %ymm3,%ymm13,%ymm3 1755bc3d5698SJohn Baldwin vpslld $7,%ymm3,%ymm14 1756bc3d5698SJohn Baldwin vpsrld $25,%ymm3,%ymm3 1757bc3d5698SJohn Baldwin vpor %ymm3,%ymm14,%ymm3 1758bc3d5698SJohn Baldwin vpaddd %ymm1,%ymm8,%ymm8 1759bc3d5698SJohn Baldwin vpxor %ymm7,%ymm8,%ymm7 1760bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm7,%ymm7 1761bc3d5698SJohn Baldwin vpaddd %ymm2,%ymm9,%ymm9 1762bc3d5698SJohn Baldwin vpxor %ymm4,%ymm9,%ymm4 1763bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm4,%ymm4 1764bc3d5698SJohn Baldwin vpaddd %ymm7,%ymm12,%ymm12 1765bc3d5698SJohn Baldwin vpxor %ymm1,%ymm12,%ymm1 1766bc3d5698SJohn Baldwin vpslld $12,%ymm1,%ymm14 1767bc3d5698SJohn Baldwin vpsrld $20,%ymm1,%ymm1 1768bc3d5698SJohn Baldwin vpor %ymm1,%ymm14,%ymm1 1769bc3d5698SJohn Baldwin vbroadcasti128 (%r11),%ymm14 1770bc3d5698SJohn Baldwin vpaddd %ymm4,%ymm13,%ymm13 1771bc3d5698SJohn Baldwin vpxor %ymm2,%ymm13,%ymm2 1772bc3d5698SJohn Baldwin vpslld $12,%ymm2,%ymm15 1773bc3d5698SJohn Baldwin vpsrld $20,%ymm2,%ymm2 1774bc3d5698SJohn Baldwin vpor %ymm2,%ymm15,%ymm2 1775bc3d5698SJohn Baldwin vpaddd %ymm1,%ymm8,%ymm8 1776bc3d5698SJohn Baldwin vpxor %ymm7,%ymm8,%ymm7 1777bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm7,%ymm7 1778bc3d5698SJohn Baldwin vpaddd %ymm2,%ymm9,%ymm9 1779bc3d5698SJohn Baldwin vpxor %ymm4,%ymm9,%ymm4 1780bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm4,%ymm4 1781bc3d5698SJohn Baldwin vpaddd %ymm7,%ymm12,%ymm12 1782bc3d5698SJohn Baldwin vpxor %ymm1,%ymm12,%ymm1 1783bc3d5698SJohn Baldwin vpslld $7,%ymm1,%ymm15 1784bc3d5698SJohn Baldwin vpsrld $25,%ymm1,%ymm1 1785bc3d5698SJohn Baldwin vpor %ymm1,%ymm15,%ymm1 1786bc3d5698SJohn Baldwin vbroadcasti128 (%r10),%ymm15 1787bc3d5698SJohn Baldwin vpaddd %ymm4,%ymm13,%ymm13 1788bc3d5698SJohn Baldwin vpxor %ymm2,%ymm13,%ymm2 1789bc3d5698SJohn Baldwin vpslld $7,%ymm2,%ymm14 1790bc3d5698SJohn Baldwin vpsrld $25,%ymm2,%ymm2 1791bc3d5698SJohn Baldwin vpor %ymm2,%ymm14,%ymm2 1792bc3d5698SJohn Baldwin vmovdqa %ymm12,64(%rsp) 1793bc3d5698SJohn Baldwin vmovdqa %ymm13,96(%rsp) 1794bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%ymm12 1795bc3d5698SJohn Baldwin vmovdqa 32(%rsp),%ymm13 1796bc3d5698SJohn Baldwin vpaddd %ymm3,%ymm10,%ymm10 1797bc3d5698SJohn Baldwin vpxor %ymm5,%ymm10,%ymm5 1798bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm5,%ymm5 1799bc3d5698SJohn Baldwin vpaddd %ymm0,%ymm11,%ymm11 1800bc3d5698SJohn Baldwin vpxor %ymm6,%ymm11,%ymm6 1801bc3d5698SJohn Baldwin vpshufb %ymm15,%ymm6,%ymm6 1802bc3d5698SJohn Baldwin vpaddd %ymm5,%ymm12,%ymm12 1803bc3d5698SJohn Baldwin vpxor %ymm3,%ymm12,%ymm3 1804bc3d5698SJohn Baldwin vpslld $12,%ymm3,%ymm14 1805bc3d5698SJohn Baldwin vpsrld $20,%ymm3,%ymm3 1806bc3d5698SJohn Baldwin vpor %ymm3,%ymm14,%ymm3 1807bc3d5698SJohn Baldwin vbroadcasti128 (%r11),%ymm14 1808bc3d5698SJohn Baldwin vpaddd %ymm6,%ymm13,%ymm13 1809bc3d5698SJohn Baldwin vpxor %ymm0,%ymm13,%ymm0 1810bc3d5698SJohn Baldwin vpslld $12,%ymm0,%ymm15 1811bc3d5698SJohn Baldwin vpsrld $20,%ymm0,%ymm0 1812bc3d5698SJohn Baldwin vpor %ymm0,%ymm15,%ymm0 1813bc3d5698SJohn Baldwin vpaddd %ymm3,%ymm10,%ymm10 1814bc3d5698SJohn Baldwin vpxor %ymm5,%ymm10,%ymm5 1815bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm5,%ymm5 1816bc3d5698SJohn Baldwin vpaddd %ymm0,%ymm11,%ymm11 1817bc3d5698SJohn Baldwin vpxor %ymm6,%ymm11,%ymm6 1818bc3d5698SJohn Baldwin vpshufb %ymm14,%ymm6,%ymm6 1819bc3d5698SJohn Baldwin vpaddd %ymm5,%ymm12,%ymm12 1820bc3d5698SJohn Baldwin vpxor %ymm3,%ymm12,%ymm3 1821bc3d5698SJohn Baldwin vpslld $7,%ymm3,%ymm15 1822bc3d5698SJohn Baldwin vpsrld $25,%ymm3,%ymm3 1823bc3d5698SJohn Baldwin vpor %ymm3,%ymm15,%ymm3 1824bc3d5698SJohn Baldwin vbroadcasti128 (%r10),%ymm15 1825bc3d5698SJohn Baldwin vpaddd %ymm6,%ymm13,%ymm13 1826bc3d5698SJohn Baldwin vpxor %ymm0,%ymm13,%ymm0 1827bc3d5698SJohn Baldwin vpslld $7,%ymm0,%ymm14 1828bc3d5698SJohn Baldwin vpsrld $25,%ymm0,%ymm0 1829bc3d5698SJohn Baldwin vpor %ymm0,%ymm14,%ymm0 1830bc3d5698SJohn Baldwin decl %eax 1831bc3d5698SJohn Baldwin jnz .Loop8x 1832bc3d5698SJohn Baldwin 1833bc3d5698SJohn Baldwin leaq 512(%rsp),%rax 1834bc3d5698SJohn Baldwin vpaddd 128-256(%rcx),%ymm8,%ymm8 1835bc3d5698SJohn Baldwin vpaddd 160-256(%rcx),%ymm9,%ymm9 1836bc3d5698SJohn Baldwin vpaddd 192-256(%rcx),%ymm10,%ymm10 1837bc3d5698SJohn Baldwin vpaddd 224-256(%rcx),%ymm11,%ymm11 1838bc3d5698SJohn Baldwin 1839bc3d5698SJohn Baldwin vpunpckldq %ymm9,%ymm8,%ymm14 1840bc3d5698SJohn Baldwin vpunpckldq %ymm11,%ymm10,%ymm15 1841bc3d5698SJohn Baldwin vpunpckhdq %ymm9,%ymm8,%ymm8 1842bc3d5698SJohn Baldwin vpunpckhdq %ymm11,%ymm10,%ymm10 1843bc3d5698SJohn Baldwin vpunpcklqdq %ymm15,%ymm14,%ymm9 1844bc3d5698SJohn Baldwin vpunpckhqdq %ymm15,%ymm14,%ymm14 1845bc3d5698SJohn Baldwin vpunpcklqdq %ymm10,%ymm8,%ymm11 1846bc3d5698SJohn Baldwin vpunpckhqdq %ymm10,%ymm8,%ymm8 1847bc3d5698SJohn Baldwin vpaddd 256-256(%rcx),%ymm0,%ymm0 1848bc3d5698SJohn Baldwin vpaddd 288-256(%rcx),%ymm1,%ymm1 1849bc3d5698SJohn Baldwin vpaddd 320-256(%rcx),%ymm2,%ymm2 1850bc3d5698SJohn Baldwin vpaddd 352-256(%rcx),%ymm3,%ymm3 1851bc3d5698SJohn Baldwin 1852bc3d5698SJohn Baldwin vpunpckldq %ymm1,%ymm0,%ymm10 1853bc3d5698SJohn Baldwin vpunpckldq %ymm3,%ymm2,%ymm15 1854bc3d5698SJohn Baldwin vpunpckhdq %ymm1,%ymm0,%ymm0 1855bc3d5698SJohn Baldwin vpunpckhdq %ymm3,%ymm2,%ymm2 1856bc3d5698SJohn Baldwin vpunpcklqdq %ymm15,%ymm10,%ymm1 1857bc3d5698SJohn Baldwin vpunpckhqdq %ymm15,%ymm10,%ymm10 1858bc3d5698SJohn Baldwin vpunpcklqdq %ymm2,%ymm0,%ymm3 1859bc3d5698SJohn Baldwin vpunpckhqdq %ymm2,%ymm0,%ymm0 1860bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 1861bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 1862bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 1863bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 1864bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 1865bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 1866bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 1867bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 1868bc3d5698SJohn Baldwin vmovdqa %ymm15,0(%rsp) 1869bc3d5698SJohn Baldwin vmovdqa %ymm9,32(%rsp) 1870bc3d5698SJohn Baldwin vmovdqa 64(%rsp),%ymm15 1871bc3d5698SJohn Baldwin vmovdqa 96(%rsp),%ymm9 1872bc3d5698SJohn Baldwin 1873bc3d5698SJohn Baldwin vpaddd 384-512(%rax),%ymm12,%ymm12 1874bc3d5698SJohn Baldwin vpaddd 416-512(%rax),%ymm13,%ymm13 1875bc3d5698SJohn Baldwin vpaddd 448-512(%rax),%ymm15,%ymm15 1876bc3d5698SJohn Baldwin vpaddd 480-512(%rax),%ymm9,%ymm9 1877bc3d5698SJohn Baldwin 1878bc3d5698SJohn Baldwin vpunpckldq %ymm13,%ymm12,%ymm2 1879bc3d5698SJohn Baldwin vpunpckldq %ymm9,%ymm15,%ymm8 1880bc3d5698SJohn Baldwin vpunpckhdq %ymm13,%ymm12,%ymm12 1881bc3d5698SJohn Baldwin vpunpckhdq %ymm9,%ymm15,%ymm15 1882bc3d5698SJohn Baldwin vpunpcklqdq %ymm8,%ymm2,%ymm13 1883bc3d5698SJohn Baldwin vpunpckhqdq %ymm8,%ymm2,%ymm2 1884bc3d5698SJohn Baldwin vpunpcklqdq %ymm15,%ymm12,%ymm9 1885bc3d5698SJohn Baldwin vpunpckhqdq %ymm15,%ymm12,%ymm12 1886bc3d5698SJohn Baldwin vpaddd 512-512(%rax),%ymm4,%ymm4 1887bc3d5698SJohn Baldwin vpaddd 544-512(%rax),%ymm5,%ymm5 1888bc3d5698SJohn Baldwin vpaddd 576-512(%rax),%ymm6,%ymm6 1889bc3d5698SJohn Baldwin vpaddd 608-512(%rax),%ymm7,%ymm7 1890bc3d5698SJohn Baldwin 1891bc3d5698SJohn Baldwin vpunpckldq %ymm5,%ymm4,%ymm15 1892bc3d5698SJohn Baldwin vpunpckldq %ymm7,%ymm6,%ymm8 1893bc3d5698SJohn Baldwin vpunpckhdq %ymm5,%ymm4,%ymm4 1894bc3d5698SJohn Baldwin vpunpckhdq %ymm7,%ymm6,%ymm6 1895bc3d5698SJohn Baldwin vpunpcklqdq %ymm8,%ymm15,%ymm5 1896bc3d5698SJohn Baldwin vpunpckhqdq %ymm8,%ymm15,%ymm15 1897bc3d5698SJohn Baldwin vpunpcklqdq %ymm6,%ymm4,%ymm7 1898bc3d5698SJohn Baldwin vpunpckhqdq %ymm6,%ymm4,%ymm4 1899bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 1900bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 1901bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 1902bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 1903bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 1904bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 1905bc3d5698SJohn Baldwin vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 1906bc3d5698SJohn Baldwin vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 1907bc3d5698SJohn Baldwin vmovdqa 0(%rsp),%ymm6 1908bc3d5698SJohn Baldwin vmovdqa 32(%rsp),%ymm12 1909bc3d5698SJohn Baldwin 1910bc3d5698SJohn Baldwin cmpq $512,%rdx 1911bc3d5698SJohn Baldwin jb .Ltail8x 1912bc3d5698SJohn Baldwin 1913bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 1914bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 1915bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 1916bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 1917bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1918bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 1919bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 1920bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 1921bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 1922bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1923bc3d5698SJohn Baldwin 1924bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm12,%ymm12 1925bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm13,%ymm13 1926bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm10,%ymm10 1927bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm15,%ymm15 1928bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1929bc3d5698SJohn Baldwin vmovdqu %ymm12,0(%rdi) 1930bc3d5698SJohn Baldwin vmovdqu %ymm13,32(%rdi) 1931bc3d5698SJohn Baldwin vmovdqu %ymm10,64(%rdi) 1932bc3d5698SJohn Baldwin vmovdqu %ymm15,96(%rdi) 1933bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1934bc3d5698SJohn Baldwin 1935bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm14,%ymm14 1936bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm2,%ymm2 1937bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm3,%ymm3 1938bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm7,%ymm7 1939bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1940bc3d5698SJohn Baldwin vmovdqu %ymm14,0(%rdi) 1941bc3d5698SJohn Baldwin vmovdqu %ymm2,32(%rdi) 1942bc3d5698SJohn Baldwin vmovdqu %ymm3,64(%rdi) 1943bc3d5698SJohn Baldwin vmovdqu %ymm7,96(%rdi) 1944bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1945bc3d5698SJohn Baldwin 1946bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm11,%ymm11 1947bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm9,%ymm9 1948bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm0,%ymm0 1949bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm4,%ymm4 1950bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1951bc3d5698SJohn Baldwin vmovdqu %ymm11,0(%rdi) 1952bc3d5698SJohn Baldwin vmovdqu %ymm9,32(%rdi) 1953bc3d5698SJohn Baldwin vmovdqu %ymm0,64(%rdi) 1954bc3d5698SJohn Baldwin vmovdqu %ymm4,96(%rdi) 1955bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1956bc3d5698SJohn Baldwin 1957bc3d5698SJohn Baldwin subq $512,%rdx 1958bc3d5698SJohn Baldwin jnz .Loop_outer8x 1959bc3d5698SJohn Baldwin 1960bc3d5698SJohn Baldwin jmp .Ldone8x 1961bc3d5698SJohn Baldwin 1962bc3d5698SJohn Baldwin.Ltail8x: 1963bc3d5698SJohn Baldwin cmpq $448,%rdx 1964bc3d5698SJohn Baldwin jae .L448_or_more8x 1965bc3d5698SJohn Baldwin cmpq $384,%rdx 1966bc3d5698SJohn Baldwin jae .L384_or_more8x 1967bc3d5698SJohn Baldwin cmpq $320,%rdx 1968bc3d5698SJohn Baldwin jae .L320_or_more8x 1969bc3d5698SJohn Baldwin cmpq $256,%rdx 1970bc3d5698SJohn Baldwin jae .L256_or_more8x 1971bc3d5698SJohn Baldwin cmpq $192,%rdx 1972bc3d5698SJohn Baldwin jae .L192_or_more8x 1973bc3d5698SJohn Baldwin cmpq $128,%rdx 1974bc3d5698SJohn Baldwin jae .L128_or_more8x 1975bc3d5698SJohn Baldwin cmpq $64,%rdx 1976bc3d5698SJohn Baldwin jae .L64_or_more8x 1977bc3d5698SJohn Baldwin 1978bc3d5698SJohn Baldwin xorq %r10,%r10 1979bc3d5698SJohn Baldwin vmovdqa %ymm6,0(%rsp) 1980bc3d5698SJohn Baldwin vmovdqa %ymm8,32(%rsp) 1981bc3d5698SJohn Baldwin jmp .Loop_tail8x 1982bc3d5698SJohn Baldwin 1983bc3d5698SJohn Baldwin.align 32 1984bc3d5698SJohn Baldwin.L64_or_more8x: 1985bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 1986bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 1987bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 1988bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 1989bc3d5698SJohn Baldwin je .Ldone8x 1990bc3d5698SJohn Baldwin 1991bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 1992bc3d5698SJohn Baldwin xorq %r10,%r10 1993bc3d5698SJohn Baldwin vmovdqa %ymm1,0(%rsp) 1994bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1995bc3d5698SJohn Baldwin subq $64,%rdx 1996bc3d5698SJohn Baldwin vmovdqa %ymm5,32(%rsp) 1997bc3d5698SJohn Baldwin jmp .Loop_tail8x 1998bc3d5698SJohn Baldwin 1999bc3d5698SJohn Baldwin.align 32 2000bc3d5698SJohn Baldwin.L128_or_more8x: 2001bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2002bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2003bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2004bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2005bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2006bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2007bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2008bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2009bc3d5698SJohn Baldwin je .Ldone8x 2010bc3d5698SJohn Baldwin 2011bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 2012bc3d5698SJohn Baldwin xorq %r10,%r10 2013bc3d5698SJohn Baldwin vmovdqa %ymm12,0(%rsp) 2014bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 2015bc3d5698SJohn Baldwin subq $128,%rdx 2016bc3d5698SJohn Baldwin vmovdqa %ymm13,32(%rsp) 2017bc3d5698SJohn Baldwin jmp .Loop_tail8x 2018bc3d5698SJohn Baldwin 2019bc3d5698SJohn Baldwin.align 32 2020bc3d5698SJohn Baldwin.L192_or_more8x: 2021bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2022bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2023bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2024bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2025bc3d5698SJohn Baldwin vpxor 128(%rsi),%ymm12,%ymm12 2026bc3d5698SJohn Baldwin vpxor 160(%rsi),%ymm13,%ymm13 2027bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2028bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2029bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2030bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2031bc3d5698SJohn Baldwin vmovdqu %ymm12,128(%rdi) 2032bc3d5698SJohn Baldwin vmovdqu %ymm13,160(%rdi) 2033bc3d5698SJohn Baldwin je .Ldone8x 2034bc3d5698SJohn Baldwin 2035bc3d5698SJohn Baldwin leaq 192(%rsi),%rsi 2036bc3d5698SJohn Baldwin xorq %r10,%r10 2037bc3d5698SJohn Baldwin vmovdqa %ymm10,0(%rsp) 2038bc3d5698SJohn Baldwin leaq 192(%rdi),%rdi 2039bc3d5698SJohn Baldwin subq $192,%rdx 2040bc3d5698SJohn Baldwin vmovdqa %ymm15,32(%rsp) 2041bc3d5698SJohn Baldwin jmp .Loop_tail8x 2042bc3d5698SJohn Baldwin 2043bc3d5698SJohn Baldwin.align 32 2044bc3d5698SJohn Baldwin.L256_or_more8x: 2045bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2046bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2047bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2048bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2049bc3d5698SJohn Baldwin vpxor 128(%rsi),%ymm12,%ymm12 2050bc3d5698SJohn Baldwin vpxor 160(%rsi),%ymm13,%ymm13 2051bc3d5698SJohn Baldwin vpxor 192(%rsi),%ymm10,%ymm10 2052bc3d5698SJohn Baldwin vpxor 224(%rsi),%ymm15,%ymm15 2053bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2054bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2055bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2056bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2057bc3d5698SJohn Baldwin vmovdqu %ymm12,128(%rdi) 2058bc3d5698SJohn Baldwin vmovdqu %ymm13,160(%rdi) 2059bc3d5698SJohn Baldwin vmovdqu %ymm10,192(%rdi) 2060bc3d5698SJohn Baldwin vmovdqu %ymm15,224(%rdi) 2061bc3d5698SJohn Baldwin je .Ldone8x 2062bc3d5698SJohn Baldwin 2063bc3d5698SJohn Baldwin leaq 256(%rsi),%rsi 2064bc3d5698SJohn Baldwin xorq %r10,%r10 2065bc3d5698SJohn Baldwin vmovdqa %ymm14,0(%rsp) 2066bc3d5698SJohn Baldwin leaq 256(%rdi),%rdi 2067bc3d5698SJohn Baldwin subq $256,%rdx 2068bc3d5698SJohn Baldwin vmovdqa %ymm2,32(%rsp) 2069bc3d5698SJohn Baldwin jmp .Loop_tail8x 2070bc3d5698SJohn Baldwin 2071bc3d5698SJohn Baldwin.align 32 2072bc3d5698SJohn Baldwin.L320_or_more8x: 2073bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2074bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2075bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2076bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2077bc3d5698SJohn Baldwin vpxor 128(%rsi),%ymm12,%ymm12 2078bc3d5698SJohn Baldwin vpxor 160(%rsi),%ymm13,%ymm13 2079bc3d5698SJohn Baldwin vpxor 192(%rsi),%ymm10,%ymm10 2080bc3d5698SJohn Baldwin vpxor 224(%rsi),%ymm15,%ymm15 2081bc3d5698SJohn Baldwin vpxor 256(%rsi),%ymm14,%ymm14 2082bc3d5698SJohn Baldwin vpxor 288(%rsi),%ymm2,%ymm2 2083bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2084bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2085bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2086bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2087bc3d5698SJohn Baldwin vmovdqu %ymm12,128(%rdi) 2088bc3d5698SJohn Baldwin vmovdqu %ymm13,160(%rdi) 2089bc3d5698SJohn Baldwin vmovdqu %ymm10,192(%rdi) 2090bc3d5698SJohn Baldwin vmovdqu %ymm15,224(%rdi) 2091bc3d5698SJohn Baldwin vmovdqu %ymm14,256(%rdi) 2092bc3d5698SJohn Baldwin vmovdqu %ymm2,288(%rdi) 2093bc3d5698SJohn Baldwin je .Ldone8x 2094bc3d5698SJohn Baldwin 2095bc3d5698SJohn Baldwin leaq 320(%rsi),%rsi 2096bc3d5698SJohn Baldwin xorq %r10,%r10 2097bc3d5698SJohn Baldwin vmovdqa %ymm3,0(%rsp) 2098bc3d5698SJohn Baldwin leaq 320(%rdi),%rdi 2099bc3d5698SJohn Baldwin subq $320,%rdx 2100bc3d5698SJohn Baldwin vmovdqa %ymm7,32(%rsp) 2101bc3d5698SJohn Baldwin jmp .Loop_tail8x 2102bc3d5698SJohn Baldwin 2103bc3d5698SJohn Baldwin.align 32 2104bc3d5698SJohn Baldwin.L384_or_more8x: 2105bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2106bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2107bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2108bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2109bc3d5698SJohn Baldwin vpxor 128(%rsi),%ymm12,%ymm12 2110bc3d5698SJohn Baldwin vpxor 160(%rsi),%ymm13,%ymm13 2111bc3d5698SJohn Baldwin vpxor 192(%rsi),%ymm10,%ymm10 2112bc3d5698SJohn Baldwin vpxor 224(%rsi),%ymm15,%ymm15 2113bc3d5698SJohn Baldwin vpxor 256(%rsi),%ymm14,%ymm14 2114bc3d5698SJohn Baldwin vpxor 288(%rsi),%ymm2,%ymm2 2115bc3d5698SJohn Baldwin vpxor 320(%rsi),%ymm3,%ymm3 2116bc3d5698SJohn Baldwin vpxor 352(%rsi),%ymm7,%ymm7 2117bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2118bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2119bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2120bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2121bc3d5698SJohn Baldwin vmovdqu %ymm12,128(%rdi) 2122bc3d5698SJohn Baldwin vmovdqu %ymm13,160(%rdi) 2123bc3d5698SJohn Baldwin vmovdqu %ymm10,192(%rdi) 2124bc3d5698SJohn Baldwin vmovdqu %ymm15,224(%rdi) 2125bc3d5698SJohn Baldwin vmovdqu %ymm14,256(%rdi) 2126bc3d5698SJohn Baldwin vmovdqu %ymm2,288(%rdi) 2127bc3d5698SJohn Baldwin vmovdqu %ymm3,320(%rdi) 2128bc3d5698SJohn Baldwin vmovdqu %ymm7,352(%rdi) 2129bc3d5698SJohn Baldwin je .Ldone8x 2130bc3d5698SJohn Baldwin 2131bc3d5698SJohn Baldwin leaq 384(%rsi),%rsi 2132bc3d5698SJohn Baldwin xorq %r10,%r10 2133bc3d5698SJohn Baldwin vmovdqa %ymm11,0(%rsp) 2134bc3d5698SJohn Baldwin leaq 384(%rdi),%rdi 2135bc3d5698SJohn Baldwin subq $384,%rdx 2136bc3d5698SJohn Baldwin vmovdqa %ymm9,32(%rsp) 2137bc3d5698SJohn Baldwin jmp .Loop_tail8x 2138bc3d5698SJohn Baldwin 2139bc3d5698SJohn Baldwin.align 32 2140bc3d5698SJohn Baldwin.L448_or_more8x: 2141bc3d5698SJohn Baldwin vpxor 0(%rsi),%ymm6,%ymm6 2142bc3d5698SJohn Baldwin vpxor 32(%rsi),%ymm8,%ymm8 2143bc3d5698SJohn Baldwin vpxor 64(%rsi),%ymm1,%ymm1 2144bc3d5698SJohn Baldwin vpxor 96(%rsi),%ymm5,%ymm5 2145bc3d5698SJohn Baldwin vpxor 128(%rsi),%ymm12,%ymm12 2146bc3d5698SJohn Baldwin vpxor 160(%rsi),%ymm13,%ymm13 2147bc3d5698SJohn Baldwin vpxor 192(%rsi),%ymm10,%ymm10 2148bc3d5698SJohn Baldwin vpxor 224(%rsi),%ymm15,%ymm15 2149bc3d5698SJohn Baldwin vpxor 256(%rsi),%ymm14,%ymm14 2150bc3d5698SJohn Baldwin vpxor 288(%rsi),%ymm2,%ymm2 2151bc3d5698SJohn Baldwin vpxor 320(%rsi),%ymm3,%ymm3 2152bc3d5698SJohn Baldwin vpxor 352(%rsi),%ymm7,%ymm7 2153bc3d5698SJohn Baldwin vpxor 384(%rsi),%ymm11,%ymm11 2154bc3d5698SJohn Baldwin vpxor 416(%rsi),%ymm9,%ymm9 2155bc3d5698SJohn Baldwin vmovdqu %ymm6,0(%rdi) 2156bc3d5698SJohn Baldwin vmovdqu %ymm8,32(%rdi) 2157bc3d5698SJohn Baldwin vmovdqu %ymm1,64(%rdi) 2158bc3d5698SJohn Baldwin vmovdqu %ymm5,96(%rdi) 2159bc3d5698SJohn Baldwin vmovdqu %ymm12,128(%rdi) 2160bc3d5698SJohn Baldwin vmovdqu %ymm13,160(%rdi) 2161bc3d5698SJohn Baldwin vmovdqu %ymm10,192(%rdi) 2162bc3d5698SJohn Baldwin vmovdqu %ymm15,224(%rdi) 2163bc3d5698SJohn Baldwin vmovdqu %ymm14,256(%rdi) 2164bc3d5698SJohn Baldwin vmovdqu %ymm2,288(%rdi) 2165bc3d5698SJohn Baldwin vmovdqu %ymm3,320(%rdi) 2166bc3d5698SJohn Baldwin vmovdqu %ymm7,352(%rdi) 2167bc3d5698SJohn Baldwin vmovdqu %ymm11,384(%rdi) 2168bc3d5698SJohn Baldwin vmovdqu %ymm9,416(%rdi) 2169bc3d5698SJohn Baldwin je .Ldone8x 2170bc3d5698SJohn Baldwin 2171bc3d5698SJohn Baldwin leaq 448(%rsi),%rsi 2172bc3d5698SJohn Baldwin xorq %r10,%r10 2173bc3d5698SJohn Baldwin vmovdqa %ymm0,0(%rsp) 2174bc3d5698SJohn Baldwin leaq 448(%rdi),%rdi 2175bc3d5698SJohn Baldwin subq $448,%rdx 2176bc3d5698SJohn Baldwin vmovdqa %ymm4,32(%rsp) 2177bc3d5698SJohn Baldwin 2178bc3d5698SJohn Baldwin.Loop_tail8x: 2179bc3d5698SJohn Baldwin movzbl (%rsi,%r10,1),%eax 2180bc3d5698SJohn Baldwin movzbl (%rsp,%r10,1),%ecx 2181bc3d5698SJohn Baldwin leaq 1(%r10),%r10 2182bc3d5698SJohn Baldwin xorl %ecx,%eax 2183bc3d5698SJohn Baldwin movb %al,-1(%rdi,%r10,1) 2184bc3d5698SJohn Baldwin decq %rdx 2185bc3d5698SJohn Baldwin jnz .Loop_tail8x 2186bc3d5698SJohn Baldwin 2187bc3d5698SJohn Baldwin.Ldone8x: 2188bc3d5698SJohn Baldwin vzeroall 2189bc3d5698SJohn Baldwin leaq (%r9),%rsp 2190bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2191bc3d5698SJohn Baldwin.L8x_epilogue: 2192bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2193bc3d5698SJohn Baldwin.cfi_endproc 2194bc3d5698SJohn Baldwin.size ChaCha20_8x,.-ChaCha20_8x 2195*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 2196*c0855eaaSJohn Baldwin .p2align 3 2197*c0855eaaSJohn Baldwin .long 1f - 0f 2198*c0855eaaSJohn Baldwin .long 4f - 1f 2199*c0855eaaSJohn Baldwin .long 5 2200*c0855eaaSJohn Baldwin0: 2201*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 2202*c0855eaaSJohn Baldwin # on Solaris. 2203*c0855eaaSJohn Baldwin .byte 0x47 2204*c0855eaaSJohn Baldwin .byte 0x4e 2205*c0855eaaSJohn Baldwin .byte 0x55 2206*c0855eaaSJohn Baldwin .byte 0 2207*c0855eaaSJohn Baldwin1: 2208*c0855eaaSJohn Baldwin .p2align 3 2209*c0855eaaSJohn Baldwin .long 0xc0000002 2210*c0855eaaSJohn Baldwin .long 3f - 2f 2211*c0855eaaSJohn Baldwin2: 2212*c0855eaaSJohn Baldwin .long 3 2213*c0855eaaSJohn Baldwin3: 2214*c0855eaaSJohn Baldwin .p2align 3 2215*c0855eaaSJohn Baldwin4: 2216