1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from chacha-x86.pl. */ 2bc3d5698SJohn Baldwin#ifdef PIC 3bc3d5698SJohn Baldwin.text 4bc3d5698SJohn Baldwin.globl ChaCha20_ctr32 5bc3d5698SJohn Baldwin.type ChaCha20_ctr32,@function 6bc3d5698SJohn Baldwin.align 16 7bc3d5698SJohn BaldwinChaCha20_ctr32: 8bc3d5698SJohn Baldwin.L_ChaCha20_ctr32_begin: 9*c0855eaaSJohn Baldwin #ifdef __CET__ 10*c0855eaaSJohn Baldwin 11*c0855eaaSJohn Baldwin.byte 243,15,30,251 12*c0855eaaSJohn Baldwin #endif 13*c0855eaaSJohn Baldwin 14bc3d5698SJohn Baldwin pushl %ebp 15bc3d5698SJohn Baldwin pushl %ebx 16bc3d5698SJohn Baldwin pushl %esi 17bc3d5698SJohn Baldwin pushl %edi 18bc3d5698SJohn Baldwin xorl %eax,%eax 19bc3d5698SJohn Baldwin cmpl 28(%esp),%eax 20bc3d5698SJohn Baldwin je .L000no_data 21bc3d5698SJohn Baldwin call .Lpic_point 22bc3d5698SJohn Baldwin.Lpic_point: 23bc3d5698SJohn Baldwin popl %eax 24bc3d5698SJohn Baldwin leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 25bc3d5698SJohn Baldwin testl $16777216,(%ebp) 26bc3d5698SJohn Baldwin jz .L001x86 27bc3d5698SJohn Baldwin testl $512,4(%ebp) 28bc3d5698SJohn Baldwin jz .L001x86 29bc3d5698SJohn Baldwin jmp .Lssse3_shortcut 30bc3d5698SJohn Baldwin.L001x86: 31bc3d5698SJohn Baldwin movl 32(%esp),%esi 32bc3d5698SJohn Baldwin movl 36(%esp),%edi 33bc3d5698SJohn Baldwin subl $132,%esp 34bc3d5698SJohn Baldwin movl (%esi),%eax 35bc3d5698SJohn Baldwin movl 4(%esi),%ebx 36bc3d5698SJohn Baldwin movl 8(%esi),%ecx 37bc3d5698SJohn Baldwin movl 12(%esi),%edx 38bc3d5698SJohn Baldwin movl %eax,80(%esp) 39bc3d5698SJohn Baldwin movl %ebx,84(%esp) 40bc3d5698SJohn Baldwin movl %ecx,88(%esp) 41bc3d5698SJohn Baldwin movl %edx,92(%esp) 42bc3d5698SJohn Baldwin movl 16(%esi),%eax 43bc3d5698SJohn Baldwin movl 20(%esi),%ebx 44bc3d5698SJohn Baldwin movl 24(%esi),%ecx 45bc3d5698SJohn Baldwin movl 28(%esi),%edx 46bc3d5698SJohn Baldwin movl %eax,96(%esp) 47bc3d5698SJohn Baldwin movl %ebx,100(%esp) 48bc3d5698SJohn Baldwin movl %ecx,104(%esp) 49bc3d5698SJohn Baldwin movl %edx,108(%esp) 50bc3d5698SJohn Baldwin movl (%edi),%eax 51bc3d5698SJohn Baldwin movl 4(%edi),%ebx 52bc3d5698SJohn Baldwin movl 8(%edi),%ecx 53bc3d5698SJohn Baldwin movl 12(%edi),%edx 54bc3d5698SJohn Baldwin subl $1,%eax 55bc3d5698SJohn Baldwin movl %eax,112(%esp) 56bc3d5698SJohn Baldwin movl %ebx,116(%esp) 57bc3d5698SJohn Baldwin movl %ecx,120(%esp) 58bc3d5698SJohn Baldwin movl %edx,124(%esp) 59bc3d5698SJohn Baldwin jmp .L002entry 60bc3d5698SJohn Baldwin.align 16 61bc3d5698SJohn Baldwin.L003outer_loop: 62bc3d5698SJohn Baldwin movl %ebx,156(%esp) 63bc3d5698SJohn Baldwin movl %eax,152(%esp) 64bc3d5698SJohn Baldwin movl %ecx,160(%esp) 65bc3d5698SJohn Baldwin.L002entry: 66bc3d5698SJohn Baldwin movl $1634760805,%eax 67bc3d5698SJohn Baldwin movl $857760878,4(%esp) 68bc3d5698SJohn Baldwin movl $2036477234,8(%esp) 69bc3d5698SJohn Baldwin movl $1797285236,12(%esp) 70bc3d5698SJohn Baldwin movl 84(%esp),%ebx 71bc3d5698SJohn Baldwin movl 88(%esp),%ebp 72bc3d5698SJohn Baldwin movl 104(%esp),%ecx 73bc3d5698SJohn Baldwin movl 108(%esp),%esi 74bc3d5698SJohn Baldwin movl 116(%esp),%edx 75bc3d5698SJohn Baldwin movl 120(%esp),%edi 76bc3d5698SJohn Baldwin movl %ebx,20(%esp) 77bc3d5698SJohn Baldwin movl %ebp,24(%esp) 78bc3d5698SJohn Baldwin movl %ecx,40(%esp) 79bc3d5698SJohn Baldwin movl %esi,44(%esp) 80bc3d5698SJohn Baldwin movl %edx,52(%esp) 81bc3d5698SJohn Baldwin movl %edi,56(%esp) 82bc3d5698SJohn Baldwin movl 92(%esp),%ebx 83bc3d5698SJohn Baldwin movl 124(%esp),%edi 84bc3d5698SJohn Baldwin movl 112(%esp),%edx 85bc3d5698SJohn Baldwin movl 80(%esp),%ebp 86bc3d5698SJohn Baldwin movl 96(%esp),%ecx 87bc3d5698SJohn Baldwin movl 100(%esp),%esi 88bc3d5698SJohn Baldwin addl $1,%edx 89bc3d5698SJohn Baldwin movl %ebx,28(%esp) 90bc3d5698SJohn Baldwin movl %edi,60(%esp) 91bc3d5698SJohn Baldwin movl %edx,112(%esp) 92bc3d5698SJohn Baldwin movl $10,%ebx 93bc3d5698SJohn Baldwin jmp .L004loop 94bc3d5698SJohn Baldwin.align 16 95bc3d5698SJohn Baldwin.L004loop: 96bc3d5698SJohn Baldwin addl %ebp,%eax 97bc3d5698SJohn Baldwin movl %ebx,128(%esp) 98bc3d5698SJohn Baldwin movl %ebp,%ebx 99bc3d5698SJohn Baldwin xorl %eax,%edx 100bc3d5698SJohn Baldwin roll $16,%edx 101bc3d5698SJohn Baldwin addl %edx,%ecx 102bc3d5698SJohn Baldwin xorl %ecx,%ebx 103bc3d5698SJohn Baldwin movl 52(%esp),%edi 104bc3d5698SJohn Baldwin roll $12,%ebx 105bc3d5698SJohn Baldwin movl 20(%esp),%ebp 106bc3d5698SJohn Baldwin addl %ebx,%eax 107bc3d5698SJohn Baldwin xorl %eax,%edx 108bc3d5698SJohn Baldwin movl %eax,(%esp) 109bc3d5698SJohn Baldwin roll $8,%edx 110bc3d5698SJohn Baldwin movl 4(%esp),%eax 111bc3d5698SJohn Baldwin addl %edx,%ecx 112bc3d5698SJohn Baldwin movl %edx,48(%esp) 113bc3d5698SJohn Baldwin xorl %ecx,%ebx 114bc3d5698SJohn Baldwin addl %ebp,%eax 115bc3d5698SJohn Baldwin roll $7,%ebx 116bc3d5698SJohn Baldwin xorl %eax,%edi 117bc3d5698SJohn Baldwin movl %ecx,32(%esp) 118bc3d5698SJohn Baldwin roll $16,%edi 119bc3d5698SJohn Baldwin movl %ebx,16(%esp) 120bc3d5698SJohn Baldwin addl %edi,%esi 121bc3d5698SJohn Baldwin movl 40(%esp),%ecx 122bc3d5698SJohn Baldwin xorl %esi,%ebp 123bc3d5698SJohn Baldwin movl 56(%esp),%edx 124bc3d5698SJohn Baldwin roll $12,%ebp 125bc3d5698SJohn Baldwin movl 24(%esp),%ebx 126bc3d5698SJohn Baldwin addl %ebp,%eax 127bc3d5698SJohn Baldwin xorl %eax,%edi 128bc3d5698SJohn Baldwin movl %eax,4(%esp) 129bc3d5698SJohn Baldwin roll $8,%edi 130bc3d5698SJohn Baldwin movl 8(%esp),%eax 131bc3d5698SJohn Baldwin addl %edi,%esi 132bc3d5698SJohn Baldwin movl %edi,52(%esp) 133bc3d5698SJohn Baldwin xorl %esi,%ebp 134bc3d5698SJohn Baldwin addl %ebx,%eax 135bc3d5698SJohn Baldwin roll $7,%ebp 136bc3d5698SJohn Baldwin xorl %eax,%edx 137bc3d5698SJohn Baldwin movl %esi,36(%esp) 138bc3d5698SJohn Baldwin roll $16,%edx 139bc3d5698SJohn Baldwin movl %ebp,20(%esp) 140bc3d5698SJohn Baldwin addl %edx,%ecx 141bc3d5698SJohn Baldwin movl 44(%esp),%esi 142bc3d5698SJohn Baldwin xorl %ecx,%ebx 143bc3d5698SJohn Baldwin movl 60(%esp),%edi 144bc3d5698SJohn Baldwin roll $12,%ebx 145bc3d5698SJohn Baldwin movl 28(%esp),%ebp 146bc3d5698SJohn Baldwin addl %ebx,%eax 147bc3d5698SJohn Baldwin xorl %eax,%edx 148bc3d5698SJohn Baldwin movl %eax,8(%esp) 149bc3d5698SJohn Baldwin roll $8,%edx 150bc3d5698SJohn Baldwin movl 12(%esp),%eax 151bc3d5698SJohn Baldwin addl %edx,%ecx 152bc3d5698SJohn Baldwin movl %edx,56(%esp) 153bc3d5698SJohn Baldwin xorl %ecx,%ebx 154bc3d5698SJohn Baldwin addl %ebp,%eax 155bc3d5698SJohn Baldwin roll $7,%ebx 156bc3d5698SJohn Baldwin xorl %eax,%edi 157bc3d5698SJohn Baldwin roll $16,%edi 158bc3d5698SJohn Baldwin movl %ebx,24(%esp) 159bc3d5698SJohn Baldwin addl %edi,%esi 160bc3d5698SJohn Baldwin xorl %esi,%ebp 161bc3d5698SJohn Baldwin roll $12,%ebp 162bc3d5698SJohn Baldwin movl 20(%esp),%ebx 163bc3d5698SJohn Baldwin addl %ebp,%eax 164bc3d5698SJohn Baldwin xorl %eax,%edi 165bc3d5698SJohn Baldwin movl %eax,12(%esp) 166bc3d5698SJohn Baldwin roll $8,%edi 167bc3d5698SJohn Baldwin movl (%esp),%eax 168bc3d5698SJohn Baldwin addl %edi,%esi 169bc3d5698SJohn Baldwin movl %edi,%edx 170bc3d5698SJohn Baldwin xorl %esi,%ebp 171bc3d5698SJohn Baldwin addl %ebx,%eax 172bc3d5698SJohn Baldwin roll $7,%ebp 173bc3d5698SJohn Baldwin xorl %eax,%edx 174bc3d5698SJohn Baldwin roll $16,%edx 175bc3d5698SJohn Baldwin movl %ebp,28(%esp) 176bc3d5698SJohn Baldwin addl %edx,%ecx 177bc3d5698SJohn Baldwin xorl %ecx,%ebx 178bc3d5698SJohn Baldwin movl 48(%esp),%edi 179bc3d5698SJohn Baldwin roll $12,%ebx 180bc3d5698SJohn Baldwin movl 24(%esp),%ebp 181bc3d5698SJohn Baldwin addl %ebx,%eax 182bc3d5698SJohn Baldwin xorl %eax,%edx 183bc3d5698SJohn Baldwin movl %eax,(%esp) 184bc3d5698SJohn Baldwin roll $8,%edx 185bc3d5698SJohn Baldwin movl 4(%esp),%eax 186bc3d5698SJohn Baldwin addl %edx,%ecx 187bc3d5698SJohn Baldwin movl %edx,60(%esp) 188bc3d5698SJohn Baldwin xorl %ecx,%ebx 189bc3d5698SJohn Baldwin addl %ebp,%eax 190bc3d5698SJohn Baldwin roll $7,%ebx 191bc3d5698SJohn Baldwin xorl %eax,%edi 192bc3d5698SJohn Baldwin movl %ecx,40(%esp) 193bc3d5698SJohn Baldwin roll $16,%edi 194bc3d5698SJohn Baldwin movl %ebx,20(%esp) 195bc3d5698SJohn Baldwin addl %edi,%esi 196bc3d5698SJohn Baldwin movl 32(%esp),%ecx 197bc3d5698SJohn Baldwin xorl %esi,%ebp 198bc3d5698SJohn Baldwin movl 52(%esp),%edx 199bc3d5698SJohn Baldwin roll $12,%ebp 200bc3d5698SJohn Baldwin movl 28(%esp),%ebx 201bc3d5698SJohn Baldwin addl %ebp,%eax 202bc3d5698SJohn Baldwin xorl %eax,%edi 203bc3d5698SJohn Baldwin movl %eax,4(%esp) 204bc3d5698SJohn Baldwin roll $8,%edi 205bc3d5698SJohn Baldwin movl 8(%esp),%eax 206bc3d5698SJohn Baldwin addl %edi,%esi 207bc3d5698SJohn Baldwin movl %edi,48(%esp) 208bc3d5698SJohn Baldwin xorl %esi,%ebp 209bc3d5698SJohn Baldwin addl %ebx,%eax 210bc3d5698SJohn Baldwin roll $7,%ebp 211bc3d5698SJohn Baldwin xorl %eax,%edx 212bc3d5698SJohn Baldwin movl %esi,44(%esp) 213bc3d5698SJohn Baldwin roll $16,%edx 214bc3d5698SJohn Baldwin movl %ebp,24(%esp) 215bc3d5698SJohn Baldwin addl %edx,%ecx 216bc3d5698SJohn Baldwin movl 36(%esp),%esi 217bc3d5698SJohn Baldwin xorl %ecx,%ebx 218bc3d5698SJohn Baldwin movl 56(%esp),%edi 219bc3d5698SJohn Baldwin roll $12,%ebx 220bc3d5698SJohn Baldwin movl 16(%esp),%ebp 221bc3d5698SJohn Baldwin addl %ebx,%eax 222bc3d5698SJohn Baldwin xorl %eax,%edx 223bc3d5698SJohn Baldwin movl %eax,8(%esp) 224bc3d5698SJohn Baldwin roll $8,%edx 225bc3d5698SJohn Baldwin movl 12(%esp),%eax 226bc3d5698SJohn Baldwin addl %edx,%ecx 227bc3d5698SJohn Baldwin movl %edx,52(%esp) 228bc3d5698SJohn Baldwin xorl %ecx,%ebx 229bc3d5698SJohn Baldwin addl %ebp,%eax 230bc3d5698SJohn Baldwin roll $7,%ebx 231bc3d5698SJohn Baldwin xorl %eax,%edi 232bc3d5698SJohn Baldwin roll $16,%edi 233bc3d5698SJohn Baldwin movl %ebx,28(%esp) 234bc3d5698SJohn Baldwin addl %edi,%esi 235bc3d5698SJohn Baldwin xorl %esi,%ebp 236bc3d5698SJohn Baldwin movl 48(%esp),%edx 237bc3d5698SJohn Baldwin roll $12,%ebp 238bc3d5698SJohn Baldwin movl 128(%esp),%ebx 239bc3d5698SJohn Baldwin addl %ebp,%eax 240bc3d5698SJohn Baldwin xorl %eax,%edi 241bc3d5698SJohn Baldwin movl %eax,12(%esp) 242bc3d5698SJohn Baldwin roll $8,%edi 243bc3d5698SJohn Baldwin movl (%esp),%eax 244bc3d5698SJohn Baldwin addl %edi,%esi 245bc3d5698SJohn Baldwin movl %edi,56(%esp) 246bc3d5698SJohn Baldwin xorl %esi,%ebp 247bc3d5698SJohn Baldwin roll $7,%ebp 248bc3d5698SJohn Baldwin decl %ebx 249bc3d5698SJohn Baldwin jnz .L004loop 250bc3d5698SJohn Baldwin movl 160(%esp),%ebx 251bc3d5698SJohn Baldwin addl $1634760805,%eax 252bc3d5698SJohn Baldwin addl 80(%esp),%ebp 253bc3d5698SJohn Baldwin addl 96(%esp),%ecx 254bc3d5698SJohn Baldwin addl 100(%esp),%esi 255bc3d5698SJohn Baldwin cmpl $64,%ebx 256bc3d5698SJohn Baldwin jb .L005tail 257bc3d5698SJohn Baldwin movl 156(%esp),%ebx 258bc3d5698SJohn Baldwin addl 112(%esp),%edx 259bc3d5698SJohn Baldwin addl 120(%esp),%edi 260bc3d5698SJohn Baldwin xorl (%ebx),%eax 261bc3d5698SJohn Baldwin xorl 16(%ebx),%ebp 262bc3d5698SJohn Baldwin movl %eax,(%esp) 263bc3d5698SJohn Baldwin movl 152(%esp),%eax 264bc3d5698SJohn Baldwin xorl 32(%ebx),%ecx 265bc3d5698SJohn Baldwin xorl 36(%ebx),%esi 266bc3d5698SJohn Baldwin xorl 48(%ebx),%edx 267bc3d5698SJohn Baldwin xorl 56(%ebx),%edi 268bc3d5698SJohn Baldwin movl %ebp,16(%eax) 269bc3d5698SJohn Baldwin movl %ecx,32(%eax) 270bc3d5698SJohn Baldwin movl %esi,36(%eax) 271bc3d5698SJohn Baldwin movl %edx,48(%eax) 272bc3d5698SJohn Baldwin movl %edi,56(%eax) 273bc3d5698SJohn Baldwin movl 4(%esp),%ebp 274bc3d5698SJohn Baldwin movl 8(%esp),%ecx 275bc3d5698SJohn Baldwin movl 12(%esp),%esi 276bc3d5698SJohn Baldwin movl 20(%esp),%edx 277bc3d5698SJohn Baldwin movl 24(%esp),%edi 278bc3d5698SJohn Baldwin addl $857760878,%ebp 279bc3d5698SJohn Baldwin addl $2036477234,%ecx 280bc3d5698SJohn Baldwin addl $1797285236,%esi 281bc3d5698SJohn Baldwin addl 84(%esp),%edx 282bc3d5698SJohn Baldwin addl 88(%esp),%edi 283bc3d5698SJohn Baldwin xorl 4(%ebx),%ebp 284bc3d5698SJohn Baldwin xorl 8(%ebx),%ecx 285bc3d5698SJohn Baldwin xorl 12(%ebx),%esi 286bc3d5698SJohn Baldwin xorl 20(%ebx),%edx 287bc3d5698SJohn Baldwin xorl 24(%ebx),%edi 288bc3d5698SJohn Baldwin movl %ebp,4(%eax) 289bc3d5698SJohn Baldwin movl %ecx,8(%eax) 290bc3d5698SJohn Baldwin movl %esi,12(%eax) 291bc3d5698SJohn Baldwin movl %edx,20(%eax) 292bc3d5698SJohn Baldwin movl %edi,24(%eax) 293bc3d5698SJohn Baldwin movl 28(%esp),%ebp 294bc3d5698SJohn Baldwin movl 40(%esp),%ecx 295bc3d5698SJohn Baldwin movl 44(%esp),%esi 296bc3d5698SJohn Baldwin movl 52(%esp),%edx 297bc3d5698SJohn Baldwin movl 60(%esp),%edi 298bc3d5698SJohn Baldwin addl 92(%esp),%ebp 299bc3d5698SJohn Baldwin addl 104(%esp),%ecx 300bc3d5698SJohn Baldwin addl 108(%esp),%esi 301bc3d5698SJohn Baldwin addl 116(%esp),%edx 302bc3d5698SJohn Baldwin addl 124(%esp),%edi 303bc3d5698SJohn Baldwin xorl 28(%ebx),%ebp 304bc3d5698SJohn Baldwin xorl 40(%ebx),%ecx 305bc3d5698SJohn Baldwin xorl 44(%ebx),%esi 306bc3d5698SJohn Baldwin xorl 52(%ebx),%edx 307bc3d5698SJohn Baldwin xorl 60(%ebx),%edi 308bc3d5698SJohn Baldwin leal 64(%ebx),%ebx 309bc3d5698SJohn Baldwin movl %ebp,28(%eax) 310bc3d5698SJohn Baldwin movl (%esp),%ebp 311bc3d5698SJohn Baldwin movl %ecx,40(%eax) 312bc3d5698SJohn Baldwin movl 160(%esp),%ecx 313bc3d5698SJohn Baldwin movl %esi,44(%eax) 314bc3d5698SJohn Baldwin movl %edx,52(%eax) 315bc3d5698SJohn Baldwin movl %edi,60(%eax) 316bc3d5698SJohn Baldwin movl %ebp,(%eax) 317bc3d5698SJohn Baldwin leal 64(%eax),%eax 318bc3d5698SJohn Baldwin subl $64,%ecx 319bc3d5698SJohn Baldwin jnz .L003outer_loop 320bc3d5698SJohn Baldwin jmp .L006done 321bc3d5698SJohn Baldwin.L005tail: 322bc3d5698SJohn Baldwin addl 112(%esp),%edx 323bc3d5698SJohn Baldwin addl 120(%esp),%edi 324bc3d5698SJohn Baldwin movl %eax,(%esp) 325bc3d5698SJohn Baldwin movl %ebp,16(%esp) 326bc3d5698SJohn Baldwin movl %ecx,32(%esp) 327bc3d5698SJohn Baldwin movl %esi,36(%esp) 328bc3d5698SJohn Baldwin movl %edx,48(%esp) 329bc3d5698SJohn Baldwin movl %edi,56(%esp) 330bc3d5698SJohn Baldwin movl 4(%esp),%ebp 331bc3d5698SJohn Baldwin movl 8(%esp),%ecx 332bc3d5698SJohn Baldwin movl 12(%esp),%esi 333bc3d5698SJohn Baldwin movl 20(%esp),%edx 334bc3d5698SJohn Baldwin movl 24(%esp),%edi 335bc3d5698SJohn Baldwin addl $857760878,%ebp 336bc3d5698SJohn Baldwin addl $2036477234,%ecx 337bc3d5698SJohn Baldwin addl $1797285236,%esi 338bc3d5698SJohn Baldwin addl 84(%esp),%edx 339bc3d5698SJohn Baldwin addl 88(%esp),%edi 340bc3d5698SJohn Baldwin movl %ebp,4(%esp) 341bc3d5698SJohn Baldwin movl %ecx,8(%esp) 342bc3d5698SJohn Baldwin movl %esi,12(%esp) 343bc3d5698SJohn Baldwin movl %edx,20(%esp) 344bc3d5698SJohn Baldwin movl %edi,24(%esp) 345bc3d5698SJohn Baldwin movl 28(%esp),%ebp 346bc3d5698SJohn Baldwin movl 40(%esp),%ecx 347bc3d5698SJohn Baldwin movl 44(%esp),%esi 348bc3d5698SJohn Baldwin movl 52(%esp),%edx 349bc3d5698SJohn Baldwin movl 60(%esp),%edi 350bc3d5698SJohn Baldwin addl 92(%esp),%ebp 351bc3d5698SJohn Baldwin addl 104(%esp),%ecx 352bc3d5698SJohn Baldwin addl 108(%esp),%esi 353bc3d5698SJohn Baldwin addl 116(%esp),%edx 354bc3d5698SJohn Baldwin addl 124(%esp),%edi 355bc3d5698SJohn Baldwin movl %ebp,28(%esp) 356bc3d5698SJohn Baldwin movl 156(%esp),%ebp 357bc3d5698SJohn Baldwin movl %ecx,40(%esp) 358bc3d5698SJohn Baldwin movl 152(%esp),%ecx 359bc3d5698SJohn Baldwin movl %esi,44(%esp) 360bc3d5698SJohn Baldwin xorl %esi,%esi 361bc3d5698SJohn Baldwin movl %edx,52(%esp) 362bc3d5698SJohn Baldwin movl %edi,60(%esp) 363bc3d5698SJohn Baldwin xorl %eax,%eax 364bc3d5698SJohn Baldwin xorl %edx,%edx 365bc3d5698SJohn Baldwin.L007tail_loop: 366bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%al 367bc3d5698SJohn Baldwin movb (%esp,%esi,1),%dl 368bc3d5698SJohn Baldwin leal 1(%esi),%esi 369bc3d5698SJohn Baldwin xorb %dl,%al 370bc3d5698SJohn Baldwin movb %al,-1(%ecx,%esi,1) 371bc3d5698SJohn Baldwin decl %ebx 372bc3d5698SJohn Baldwin jnz .L007tail_loop 373bc3d5698SJohn Baldwin.L006done: 374bc3d5698SJohn Baldwin addl $132,%esp 375bc3d5698SJohn Baldwin.L000no_data: 376bc3d5698SJohn Baldwin popl %edi 377bc3d5698SJohn Baldwin popl %esi 378bc3d5698SJohn Baldwin popl %ebx 379bc3d5698SJohn Baldwin popl %ebp 380bc3d5698SJohn Baldwin ret 381bc3d5698SJohn Baldwin.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 382bc3d5698SJohn Baldwin.globl ChaCha20_ssse3 383bc3d5698SJohn Baldwin.type ChaCha20_ssse3,@function 384bc3d5698SJohn Baldwin.align 16 385bc3d5698SJohn BaldwinChaCha20_ssse3: 386bc3d5698SJohn Baldwin.L_ChaCha20_ssse3_begin: 387*c0855eaaSJohn Baldwin #ifdef __CET__ 388*c0855eaaSJohn Baldwin 389*c0855eaaSJohn Baldwin.byte 243,15,30,251 390*c0855eaaSJohn Baldwin #endif 391*c0855eaaSJohn Baldwin 392bc3d5698SJohn Baldwin pushl %ebp 393bc3d5698SJohn Baldwin pushl %ebx 394bc3d5698SJohn Baldwin pushl %esi 395bc3d5698SJohn Baldwin pushl %edi 396bc3d5698SJohn Baldwin.Lssse3_shortcut: 397bc3d5698SJohn Baldwin testl $2048,4(%ebp) 398bc3d5698SJohn Baldwin jnz .Lxop_shortcut 399bc3d5698SJohn Baldwin movl 20(%esp),%edi 400bc3d5698SJohn Baldwin movl 24(%esp),%esi 401bc3d5698SJohn Baldwin movl 28(%esp),%ecx 402bc3d5698SJohn Baldwin movl 32(%esp),%edx 403bc3d5698SJohn Baldwin movl 36(%esp),%ebx 404bc3d5698SJohn Baldwin movl %esp,%ebp 405bc3d5698SJohn Baldwin subl $524,%esp 406bc3d5698SJohn Baldwin andl $-64,%esp 407bc3d5698SJohn Baldwin movl %ebp,512(%esp) 408bc3d5698SJohn Baldwin leal .Lssse3_data-.Lpic_point(%eax),%eax 409bc3d5698SJohn Baldwin movdqu (%ebx),%xmm3 410bc3d5698SJohn Baldwin.L0081x: 411bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm0 412bc3d5698SJohn Baldwin movdqu (%edx),%xmm1 413bc3d5698SJohn Baldwin movdqu 16(%edx),%xmm2 414bc3d5698SJohn Baldwin movdqa (%eax),%xmm6 415bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 416bc3d5698SJohn Baldwin movl %ebp,48(%esp) 417bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 418bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 419bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 420bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 421bc3d5698SJohn Baldwin movl $10,%edx 422bc3d5698SJohn Baldwin jmp .L009loop1x 423bc3d5698SJohn Baldwin.align 16 424bc3d5698SJohn Baldwin.L010outer1x: 425bc3d5698SJohn Baldwin movdqa 80(%eax),%xmm3 426bc3d5698SJohn Baldwin movdqa (%esp),%xmm0 427bc3d5698SJohn Baldwin movdqa 16(%esp),%xmm1 428bc3d5698SJohn Baldwin movdqa 32(%esp),%xmm2 429bc3d5698SJohn Baldwin paddd 48(%esp),%xmm3 430bc3d5698SJohn Baldwin movl $10,%edx 431bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 432bc3d5698SJohn Baldwin jmp .L009loop1x 433bc3d5698SJohn Baldwin.align 16 434bc3d5698SJohn Baldwin.L009loop1x: 435bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 436bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 437bc3d5698SJohn Baldwin.byte 102,15,56,0,222 438bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 439bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 440bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 441bc3d5698SJohn Baldwin psrld $20,%xmm1 442bc3d5698SJohn Baldwin pslld $12,%xmm4 443bc3d5698SJohn Baldwin por %xmm4,%xmm1 444bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 445bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 446bc3d5698SJohn Baldwin.byte 102,15,56,0,223 447bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 448bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 449bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 450bc3d5698SJohn Baldwin psrld $25,%xmm1 451bc3d5698SJohn Baldwin pslld $7,%xmm4 452bc3d5698SJohn Baldwin por %xmm4,%xmm1 453bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 454bc3d5698SJohn Baldwin pshufd $57,%xmm1,%xmm1 455bc3d5698SJohn Baldwin pshufd $147,%xmm3,%xmm3 456bc3d5698SJohn Baldwin nop 457bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 458bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 459bc3d5698SJohn Baldwin.byte 102,15,56,0,222 460bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 461bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 462bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 463bc3d5698SJohn Baldwin psrld $20,%xmm1 464bc3d5698SJohn Baldwin pslld $12,%xmm4 465bc3d5698SJohn Baldwin por %xmm4,%xmm1 466bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 467bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 468bc3d5698SJohn Baldwin.byte 102,15,56,0,223 469bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 470bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 471bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 472bc3d5698SJohn Baldwin psrld $25,%xmm1 473bc3d5698SJohn Baldwin pslld $7,%xmm4 474bc3d5698SJohn Baldwin por %xmm4,%xmm1 475bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 476bc3d5698SJohn Baldwin pshufd $147,%xmm1,%xmm1 477bc3d5698SJohn Baldwin pshufd $57,%xmm3,%xmm3 478bc3d5698SJohn Baldwin decl %edx 479bc3d5698SJohn Baldwin jnz .L009loop1x 480bc3d5698SJohn Baldwin paddd (%esp),%xmm0 481bc3d5698SJohn Baldwin paddd 16(%esp),%xmm1 482bc3d5698SJohn Baldwin paddd 32(%esp),%xmm2 483bc3d5698SJohn Baldwin paddd 48(%esp),%xmm3 484bc3d5698SJohn Baldwin cmpl $64,%ecx 485bc3d5698SJohn Baldwin jb .L011tail 486bc3d5698SJohn Baldwin movdqu (%esi),%xmm4 487bc3d5698SJohn Baldwin movdqu 16(%esi),%xmm5 488bc3d5698SJohn Baldwin pxor %xmm4,%xmm0 489bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm4 490bc3d5698SJohn Baldwin pxor %xmm5,%xmm1 491bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm5 492bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 493bc3d5698SJohn Baldwin pxor %xmm5,%xmm3 494bc3d5698SJohn Baldwin leal 64(%esi),%esi 495bc3d5698SJohn Baldwin movdqu %xmm0,(%edi) 496bc3d5698SJohn Baldwin movdqu %xmm1,16(%edi) 497bc3d5698SJohn Baldwin movdqu %xmm2,32(%edi) 498bc3d5698SJohn Baldwin movdqu %xmm3,48(%edi) 499bc3d5698SJohn Baldwin leal 64(%edi),%edi 500bc3d5698SJohn Baldwin subl $64,%ecx 501bc3d5698SJohn Baldwin jnz .L010outer1x 502bc3d5698SJohn Baldwin jmp .L012done 503bc3d5698SJohn Baldwin.L011tail: 504bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 505bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 506bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 507bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 508bc3d5698SJohn Baldwin xorl %eax,%eax 509bc3d5698SJohn Baldwin xorl %edx,%edx 510bc3d5698SJohn Baldwin xorl %ebp,%ebp 511bc3d5698SJohn Baldwin.L013tail_loop: 512bc3d5698SJohn Baldwin movb (%esp,%ebp,1),%al 513bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%dl 514bc3d5698SJohn Baldwin leal 1(%ebp),%ebp 515bc3d5698SJohn Baldwin xorb %dl,%al 516bc3d5698SJohn Baldwin movb %al,-1(%edi,%ebp,1) 517bc3d5698SJohn Baldwin decl %ecx 518bc3d5698SJohn Baldwin jnz .L013tail_loop 519bc3d5698SJohn Baldwin.L012done: 520bc3d5698SJohn Baldwin movl 512(%esp),%esp 521bc3d5698SJohn Baldwin popl %edi 522bc3d5698SJohn Baldwin popl %esi 523bc3d5698SJohn Baldwin popl %ebx 524bc3d5698SJohn Baldwin popl %ebp 525bc3d5698SJohn Baldwin ret 526bc3d5698SJohn Baldwin.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 527bc3d5698SJohn Baldwin.align 64 528bc3d5698SJohn Baldwin.Lssse3_data: 529bc3d5698SJohn Baldwin.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 530bc3d5698SJohn Baldwin.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 531bc3d5698SJohn Baldwin.long 1634760805,857760878,2036477234,1797285236 532bc3d5698SJohn Baldwin.long 0,1,2,3 533bc3d5698SJohn Baldwin.long 4,4,4,4 534bc3d5698SJohn Baldwin.long 1,0,0,0 535bc3d5698SJohn Baldwin.long 4,0,0,0 536bc3d5698SJohn Baldwin.long 0,-1,-1,-1 537bc3d5698SJohn Baldwin.align 64 538bc3d5698SJohn Baldwin.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 539bc3d5698SJohn Baldwin.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 540bc3d5698SJohn Baldwin.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 541bc3d5698SJohn Baldwin.byte 114,103,62,0 542bc3d5698SJohn Baldwin.globl ChaCha20_xop 543bc3d5698SJohn Baldwin.type ChaCha20_xop,@function 544bc3d5698SJohn Baldwin.align 16 545bc3d5698SJohn BaldwinChaCha20_xop: 546bc3d5698SJohn Baldwin.L_ChaCha20_xop_begin: 547*c0855eaaSJohn Baldwin #ifdef __CET__ 548*c0855eaaSJohn Baldwin 549*c0855eaaSJohn Baldwin.byte 243,15,30,251 550*c0855eaaSJohn Baldwin #endif 551*c0855eaaSJohn Baldwin 552bc3d5698SJohn Baldwin pushl %ebp 553bc3d5698SJohn Baldwin pushl %ebx 554bc3d5698SJohn Baldwin pushl %esi 555bc3d5698SJohn Baldwin pushl %edi 556bc3d5698SJohn Baldwin.Lxop_shortcut: 557bc3d5698SJohn Baldwin movl 20(%esp),%edi 558bc3d5698SJohn Baldwin movl 24(%esp),%esi 559bc3d5698SJohn Baldwin movl 28(%esp),%ecx 560bc3d5698SJohn Baldwin movl 32(%esp),%edx 561bc3d5698SJohn Baldwin movl 36(%esp),%ebx 562bc3d5698SJohn Baldwin vzeroupper 563bc3d5698SJohn Baldwin movl %esp,%ebp 564bc3d5698SJohn Baldwin subl $524,%esp 565bc3d5698SJohn Baldwin andl $-64,%esp 566bc3d5698SJohn Baldwin movl %ebp,512(%esp) 567bc3d5698SJohn Baldwin leal .Lssse3_data-.Lpic_point(%eax),%eax 568bc3d5698SJohn Baldwin vmovdqu (%ebx),%xmm3 569bc3d5698SJohn Baldwin cmpl $256,%ecx 570bc3d5698SJohn Baldwin jb .L0141x 571bc3d5698SJohn Baldwin movl %edx,516(%esp) 572bc3d5698SJohn Baldwin movl %ebx,520(%esp) 573bc3d5698SJohn Baldwin subl $256,%ecx 574bc3d5698SJohn Baldwin leal 384(%esp),%ebp 575bc3d5698SJohn Baldwin vmovdqu (%edx),%xmm7 576bc3d5698SJohn Baldwin vpshufd $0,%xmm3,%xmm0 577bc3d5698SJohn Baldwin vpshufd $85,%xmm3,%xmm1 578bc3d5698SJohn Baldwin vpshufd $170,%xmm3,%xmm2 579bc3d5698SJohn Baldwin vpshufd $255,%xmm3,%xmm3 580bc3d5698SJohn Baldwin vpaddd 48(%eax),%xmm0,%xmm0 581bc3d5698SJohn Baldwin vpshufd $0,%xmm7,%xmm4 582bc3d5698SJohn Baldwin vpshufd $85,%xmm7,%xmm5 583bc3d5698SJohn Baldwin vpsubd 64(%eax),%xmm0,%xmm0 584bc3d5698SJohn Baldwin vpshufd $170,%xmm7,%xmm6 585bc3d5698SJohn Baldwin vpshufd $255,%xmm7,%xmm7 586bc3d5698SJohn Baldwin vmovdqa %xmm0,64(%ebp) 587bc3d5698SJohn Baldwin vmovdqa %xmm1,80(%ebp) 588bc3d5698SJohn Baldwin vmovdqa %xmm2,96(%ebp) 589bc3d5698SJohn Baldwin vmovdqa %xmm3,112(%ebp) 590bc3d5698SJohn Baldwin vmovdqu 16(%edx),%xmm3 591bc3d5698SJohn Baldwin vmovdqa %xmm4,-64(%ebp) 592bc3d5698SJohn Baldwin vmovdqa %xmm5,-48(%ebp) 593bc3d5698SJohn Baldwin vmovdqa %xmm6,-32(%ebp) 594bc3d5698SJohn Baldwin vmovdqa %xmm7,-16(%ebp) 595bc3d5698SJohn Baldwin vmovdqa 32(%eax),%xmm7 596bc3d5698SJohn Baldwin leal 128(%esp),%ebx 597bc3d5698SJohn Baldwin vpshufd $0,%xmm3,%xmm0 598bc3d5698SJohn Baldwin vpshufd $85,%xmm3,%xmm1 599bc3d5698SJohn Baldwin vpshufd $170,%xmm3,%xmm2 600bc3d5698SJohn Baldwin vpshufd $255,%xmm3,%xmm3 601bc3d5698SJohn Baldwin vpshufd $0,%xmm7,%xmm4 602bc3d5698SJohn Baldwin vpshufd $85,%xmm7,%xmm5 603bc3d5698SJohn Baldwin vpshufd $170,%xmm7,%xmm6 604bc3d5698SJohn Baldwin vpshufd $255,%xmm7,%xmm7 605bc3d5698SJohn Baldwin vmovdqa %xmm0,(%ebp) 606bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%ebp) 607bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%ebp) 608bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%ebp) 609bc3d5698SJohn Baldwin vmovdqa %xmm4,-128(%ebp) 610bc3d5698SJohn Baldwin vmovdqa %xmm5,-112(%ebp) 611bc3d5698SJohn Baldwin vmovdqa %xmm6,-96(%ebp) 612bc3d5698SJohn Baldwin vmovdqa %xmm7,-80(%ebp) 613bc3d5698SJohn Baldwin leal 128(%esi),%esi 614bc3d5698SJohn Baldwin leal 128(%edi),%edi 615bc3d5698SJohn Baldwin jmp .L015outer_loop 616bc3d5698SJohn Baldwin.align 32 617bc3d5698SJohn Baldwin.L015outer_loop: 618bc3d5698SJohn Baldwin vmovdqa -112(%ebp),%xmm1 619bc3d5698SJohn Baldwin vmovdqa -96(%ebp),%xmm2 620bc3d5698SJohn Baldwin vmovdqa -80(%ebp),%xmm3 621bc3d5698SJohn Baldwin vmovdqa -48(%ebp),%xmm5 622bc3d5698SJohn Baldwin vmovdqa -32(%ebp),%xmm6 623bc3d5698SJohn Baldwin vmovdqa -16(%ebp),%xmm7 624bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 625bc3d5698SJohn Baldwin vmovdqa %xmm2,-96(%ebx) 626bc3d5698SJohn Baldwin vmovdqa %xmm3,-80(%ebx) 627bc3d5698SJohn Baldwin vmovdqa %xmm5,-48(%ebx) 628bc3d5698SJohn Baldwin vmovdqa %xmm6,-32(%ebx) 629bc3d5698SJohn Baldwin vmovdqa %xmm7,-16(%ebx) 630bc3d5698SJohn Baldwin vmovdqa 32(%ebp),%xmm2 631bc3d5698SJohn Baldwin vmovdqa 48(%ebp),%xmm3 632bc3d5698SJohn Baldwin vmovdqa 64(%ebp),%xmm4 633bc3d5698SJohn Baldwin vmovdqa 80(%ebp),%xmm5 634bc3d5698SJohn Baldwin vmovdqa 96(%ebp),%xmm6 635bc3d5698SJohn Baldwin vmovdqa 112(%ebp),%xmm7 636bc3d5698SJohn Baldwin vpaddd 64(%eax),%xmm4,%xmm4 637bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%ebx) 638bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%ebx) 639bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%ebx) 640bc3d5698SJohn Baldwin vmovdqa %xmm5,80(%ebx) 641bc3d5698SJohn Baldwin vmovdqa %xmm6,96(%ebx) 642bc3d5698SJohn Baldwin vmovdqa %xmm7,112(%ebx) 643bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%ebp) 644bc3d5698SJohn Baldwin vmovdqa -128(%ebp),%xmm0 645bc3d5698SJohn Baldwin vmovdqa %xmm4,%xmm6 646bc3d5698SJohn Baldwin vmovdqa -64(%ebp),%xmm3 647bc3d5698SJohn Baldwin vmovdqa (%ebp),%xmm4 648bc3d5698SJohn Baldwin vmovdqa 16(%ebp),%xmm5 649bc3d5698SJohn Baldwin movl $10,%edx 650bc3d5698SJohn Baldwin nop 651bc3d5698SJohn Baldwin.align 32 652bc3d5698SJohn Baldwin.L016loop: 653bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm0,%xmm0 654bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 655bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 656bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 657bc3d5698SJohn Baldwin vpxor %xmm4,%xmm3,%xmm2 658bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 659bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 660bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm3 661bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 662bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm7 663bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 664bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 665bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 666bc3d5698SJohn Baldwin vmovdqa %xmm0,-128(%ebx) 667bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 668bc3d5698SJohn Baldwin vmovdqa %xmm6,64(%ebx) 669bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 670bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 671bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 672bc3d5698SJohn Baldwin vmovdqa %xmm4,(%ebx) 673bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 674bc3d5698SJohn Baldwin vmovdqa %xmm2,-64(%ebx) 675bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 676bc3d5698SJohn Baldwin vmovdqa 32(%ebx),%xmm4 677bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 678bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm0 679bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 680bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm2 681bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 682bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm6 683bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 684bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 685bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 686bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 687bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 688bc3d5698SJohn Baldwin vmovdqa %xmm7,80(%ebx) 689bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 690bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 691bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 692bc3d5698SJohn Baldwin vmovdqa %xmm5,16(%ebx) 693bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 694bc3d5698SJohn Baldwin vmovdqa %xmm3,-48(%ebx) 695bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 696bc3d5698SJohn Baldwin vmovdqa 48(%ebx),%xmm5 697bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 698bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm1 699bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 700bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm3 701bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 702bc3d5698SJohn Baldwin vmovdqa 112(%ebx),%xmm7 703bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 704bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 705bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 706bc3d5698SJohn Baldwin vmovdqa %xmm0,-96(%ebx) 707bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 708bc3d5698SJohn Baldwin vmovdqa %xmm6,96(%ebx) 709bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 710bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 711bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 712bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 713bc3d5698SJohn Baldwin vmovdqa %xmm2,-32(%ebx) 714bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 715bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 716bc3d5698SJohn Baldwin vmovdqa -128(%ebx),%xmm0 717bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 718bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm2 719bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 720bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 721bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 722bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 723bc3d5698SJohn Baldwin vmovdqa %xmm1,-80(%ebx) 724bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 725bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 726bc3d5698SJohn Baldwin vpxor %xmm0,%xmm7,%xmm6 727bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 728bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 729bc3d5698SJohn Baldwin vmovdqa %xmm3,-16(%ebx) 730bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 731bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 732bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 733bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 734bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm3 735bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 736bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 737bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 738bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 739bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 740bc3d5698SJohn Baldwin vmovdqa %xmm0,-128(%ebx) 741bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 742bc3d5698SJohn Baldwin vmovdqa %xmm6,112(%ebx) 743bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 744bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 745bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 746bc3d5698SJohn Baldwin vmovdqa %xmm4,32(%ebx) 747bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 748bc3d5698SJohn Baldwin vmovdqa %xmm2,-48(%ebx) 749bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 750bc3d5698SJohn Baldwin vmovdqa (%ebx),%xmm4 751bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 752bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm0 753bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 754bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm2 755bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 756bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm6 757bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 758bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 759bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 760bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 761bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 762bc3d5698SJohn Baldwin vmovdqa %xmm7,64(%ebx) 763bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 764bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 765bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 766bc3d5698SJohn Baldwin vmovdqa %xmm5,48(%ebx) 767bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 768bc3d5698SJohn Baldwin vmovdqa %xmm3,-32(%ebx) 769bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 770bc3d5698SJohn Baldwin vmovdqa 16(%ebx),%xmm5 771bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 772bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm1 773bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 774bc3d5698SJohn Baldwin vmovdqa -64(%ebx),%xmm3 775bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 776bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm7 777bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 778bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 779bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 780bc3d5698SJohn Baldwin vmovdqa %xmm0,-96(%ebx) 781bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 782bc3d5698SJohn Baldwin vmovdqa %xmm6,80(%ebx) 783bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 784bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 785bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 786bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 787bc3d5698SJohn Baldwin vmovdqa %xmm2,-16(%ebx) 788bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 789bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 790bc3d5698SJohn Baldwin vmovdqa -128(%ebx),%xmm0 791bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 792bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 793bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm6 794bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 795bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 796bc3d5698SJohn Baldwin vmovdqa %xmm1,-80(%ebx) 797bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 798bc3d5698SJohn Baldwin vmovdqa %xmm7,96(%ebx) 799bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 800bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 801bc3d5698SJohn Baldwin decl %edx 802bc3d5698SJohn Baldwin jnz .L016loop 803bc3d5698SJohn Baldwin vmovdqa %xmm3,-64(%ebx) 804bc3d5698SJohn Baldwin vmovdqa %xmm4,(%ebx) 805bc3d5698SJohn Baldwin vmovdqa %xmm5,16(%ebx) 806bc3d5698SJohn Baldwin vmovdqa %xmm6,64(%ebx) 807bc3d5698SJohn Baldwin vmovdqa %xmm7,96(%ebx) 808bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 809bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm2 810bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm3 811bc3d5698SJohn Baldwin vpaddd -128(%ebp),%xmm0,%xmm0 812bc3d5698SJohn Baldwin vpaddd -112(%ebp),%xmm1,%xmm1 813bc3d5698SJohn Baldwin vpaddd -96(%ebp),%xmm2,%xmm2 814bc3d5698SJohn Baldwin vpaddd -80(%ebp),%xmm3,%xmm3 815bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 816bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 817bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 818bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 819bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 820bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 821bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 822bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 823bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 824bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 825bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 826bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 827bc3d5698SJohn Baldwin leal 16(%esi),%esi 828bc3d5698SJohn Baldwin vmovdqa -64(%ebx),%xmm0 829bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm1 830bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm2 831bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm3 832bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 833bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 834bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 835bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 836bc3d5698SJohn Baldwin leal 16(%edi),%edi 837bc3d5698SJohn Baldwin vpaddd -64(%ebp),%xmm0,%xmm0 838bc3d5698SJohn Baldwin vpaddd -48(%ebp),%xmm1,%xmm1 839bc3d5698SJohn Baldwin vpaddd -32(%ebp),%xmm2,%xmm2 840bc3d5698SJohn Baldwin vpaddd -16(%ebp),%xmm3,%xmm3 841bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 842bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 843bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 844bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 845bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 846bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 847bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 848bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 849bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 850bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 851bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 852bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 853bc3d5698SJohn Baldwin leal 16(%esi),%esi 854bc3d5698SJohn Baldwin vmovdqa (%ebx),%xmm0 855bc3d5698SJohn Baldwin vmovdqa 16(%ebx),%xmm1 856bc3d5698SJohn Baldwin vmovdqa 32(%ebx),%xmm2 857bc3d5698SJohn Baldwin vmovdqa 48(%ebx),%xmm3 858bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 859bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 860bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 861bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 862bc3d5698SJohn Baldwin leal 16(%edi),%edi 863bc3d5698SJohn Baldwin vpaddd (%ebp),%xmm0,%xmm0 864bc3d5698SJohn Baldwin vpaddd 16(%ebp),%xmm1,%xmm1 865bc3d5698SJohn Baldwin vpaddd 32(%ebp),%xmm2,%xmm2 866bc3d5698SJohn Baldwin vpaddd 48(%ebp),%xmm3,%xmm3 867bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 868bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 869bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 870bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 871bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 872bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 873bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 874bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 875bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 876bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 877bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 878bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 879bc3d5698SJohn Baldwin leal 16(%esi),%esi 880bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm0 881bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm1 882bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm2 883bc3d5698SJohn Baldwin vmovdqa 112(%ebx),%xmm3 884bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 885bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 886bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 887bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 888bc3d5698SJohn Baldwin leal 16(%edi),%edi 889bc3d5698SJohn Baldwin vpaddd 64(%ebp),%xmm0,%xmm0 890bc3d5698SJohn Baldwin vpaddd 80(%ebp),%xmm1,%xmm1 891bc3d5698SJohn Baldwin vpaddd 96(%ebp),%xmm2,%xmm2 892bc3d5698SJohn Baldwin vpaddd 112(%ebp),%xmm3,%xmm3 893bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 894bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 895bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 896bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 897bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 898bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 899bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 900bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 901bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 902bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 903bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 904bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 905bc3d5698SJohn Baldwin leal 208(%esi),%esi 906bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 907bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 908bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 909bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 910bc3d5698SJohn Baldwin leal 208(%edi),%edi 911bc3d5698SJohn Baldwin subl $256,%ecx 912bc3d5698SJohn Baldwin jnc .L015outer_loop 913bc3d5698SJohn Baldwin addl $256,%ecx 914bc3d5698SJohn Baldwin jz .L017done 915bc3d5698SJohn Baldwin movl 520(%esp),%ebx 916bc3d5698SJohn Baldwin leal -128(%esi),%esi 917bc3d5698SJohn Baldwin movl 516(%esp),%edx 918bc3d5698SJohn Baldwin leal -128(%edi),%edi 919bc3d5698SJohn Baldwin vmovd 64(%ebp),%xmm2 920bc3d5698SJohn Baldwin vmovdqu (%ebx),%xmm3 921bc3d5698SJohn Baldwin vpaddd 96(%eax),%xmm2,%xmm2 922bc3d5698SJohn Baldwin vpand 112(%eax),%xmm3,%xmm3 923bc3d5698SJohn Baldwin vpor %xmm2,%xmm3,%xmm3 924bc3d5698SJohn Baldwin.L0141x: 925bc3d5698SJohn Baldwin vmovdqa 32(%eax),%xmm0 926bc3d5698SJohn Baldwin vmovdqu (%edx),%xmm1 927bc3d5698SJohn Baldwin vmovdqu 16(%edx),%xmm2 928bc3d5698SJohn Baldwin vmovdqa (%eax),%xmm6 929bc3d5698SJohn Baldwin vmovdqa 16(%eax),%xmm7 930bc3d5698SJohn Baldwin movl %ebp,48(%esp) 931bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 932bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 933bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 934bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 935bc3d5698SJohn Baldwin movl $10,%edx 936bc3d5698SJohn Baldwin jmp .L018loop1x 937bc3d5698SJohn Baldwin.align 16 938bc3d5698SJohn Baldwin.L019outer1x: 939bc3d5698SJohn Baldwin vmovdqa 80(%eax),%xmm3 940bc3d5698SJohn Baldwin vmovdqa (%esp),%xmm0 941bc3d5698SJohn Baldwin vmovdqa 16(%esp),%xmm1 942bc3d5698SJohn Baldwin vmovdqa 32(%esp),%xmm2 943bc3d5698SJohn Baldwin vpaddd 48(%esp),%xmm3,%xmm3 944bc3d5698SJohn Baldwin movl $10,%edx 945bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 946bc3d5698SJohn Baldwin jmp .L018loop1x 947bc3d5698SJohn Baldwin.align 16 948bc3d5698SJohn Baldwin.L018loop1x: 949bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 950bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 951bc3d5698SJohn Baldwin.byte 143,232,120,194,219,16 952bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 953bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 954bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 955bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 956bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 957bc3d5698SJohn Baldwin.byte 143,232,120,194,219,8 958bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 959bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 960bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 961bc3d5698SJohn Baldwin vpshufd $78,%xmm2,%xmm2 962bc3d5698SJohn Baldwin vpshufd $57,%xmm1,%xmm1 963bc3d5698SJohn Baldwin vpshufd $147,%xmm3,%xmm3 964bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 965bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 966bc3d5698SJohn Baldwin.byte 143,232,120,194,219,16 967bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 968bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 969bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 970bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 971bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 972bc3d5698SJohn Baldwin.byte 143,232,120,194,219,8 973bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 974bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 975bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 976bc3d5698SJohn Baldwin vpshufd $78,%xmm2,%xmm2 977bc3d5698SJohn Baldwin vpshufd $147,%xmm1,%xmm1 978bc3d5698SJohn Baldwin vpshufd $57,%xmm3,%xmm3 979bc3d5698SJohn Baldwin decl %edx 980bc3d5698SJohn Baldwin jnz .L018loop1x 981bc3d5698SJohn Baldwin vpaddd (%esp),%xmm0,%xmm0 982bc3d5698SJohn Baldwin vpaddd 16(%esp),%xmm1,%xmm1 983bc3d5698SJohn Baldwin vpaddd 32(%esp),%xmm2,%xmm2 984bc3d5698SJohn Baldwin vpaddd 48(%esp),%xmm3,%xmm3 985bc3d5698SJohn Baldwin cmpl $64,%ecx 986bc3d5698SJohn Baldwin jb .L020tail 987bc3d5698SJohn Baldwin vpxor (%esi),%xmm0,%xmm0 988bc3d5698SJohn Baldwin vpxor 16(%esi),%xmm1,%xmm1 989bc3d5698SJohn Baldwin vpxor 32(%esi),%xmm2,%xmm2 990bc3d5698SJohn Baldwin vpxor 48(%esi),%xmm3,%xmm3 991bc3d5698SJohn Baldwin leal 64(%esi),%esi 992bc3d5698SJohn Baldwin vmovdqu %xmm0,(%edi) 993bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%edi) 994bc3d5698SJohn Baldwin vmovdqu %xmm2,32(%edi) 995bc3d5698SJohn Baldwin vmovdqu %xmm3,48(%edi) 996bc3d5698SJohn Baldwin leal 64(%edi),%edi 997bc3d5698SJohn Baldwin subl $64,%ecx 998bc3d5698SJohn Baldwin jnz .L019outer1x 999bc3d5698SJohn Baldwin jmp .L017done 1000bc3d5698SJohn Baldwin.L020tail: 1001bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 1002bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 1003bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 1004bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 1005bc3d5698SJohn Baldwin xorl %eax,%eax 1006bc3d5698SJohn Baldwin xorl %edx,%edx 1007bc3d5698SJohn Baldwin xorl %ebp,%ebp 1008bc3d5698SJohn Baldwin.L021tail_loop: 1009bc3d5698SJohn Baldwin movb (%esp,%ebp,1),%al 1010bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%dl 1011bc3d5698SJohn Baldwin leal 1(%ebp),%ebp 1012bc3d5698SJohn Baldwin xorb %dl,%al 1013bc3d5698SJohn Baldwin movb %al,-1(%edi,%ebp,1) 1014bc3d5698SJohn Baldwin decl %ecx 1015bc3d5698SJohn Baldwin jnz .L021tail_loop 1016bc3d5698SJohn Baldwin.L017done: 1017bc3d5698SJohn Baldwin vzeroupper 1018bc3d5698SJohn Baldwin movl 512(%esp),%esp 1019bc3d5698SJohn Baldwin popl %edi 1020bc3d5698SJohn Baldwin popl %esi 1021bc3d5698SJohn Baldwin popl %ebx 1022bc3d5698SJohn Baldwin popl %ebp 1023bc3d5698SJohn Baldwin ret 1024bc3d5698SJohn Baldwin.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 1025bc3d5698SJohn Baldwin.comm OPENSSL_ia32cap_P,16,4 1026*c0855eaaSJohn Baldwin 1027*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 1028*c0855eaaSJohn Baldwin .p2align 2 1029*c0855eaaSJohn Baldwin .long 1f - 0f 1030*c0855eaaSJohn Baldwin .long 4f - 1f 1031*c0855eaaSJohn Baldwin .long 5 1032*c0855eaaSJohn Baldwin0: 1033*c0855eaaSJohn Baldwin .asciz "GNU" 1034*c0855eaaSJohn Baldwin1: 1035*c0855eaaSJohn Baldwin .p2align 2 1036*c0855eaaSJohn Baldwin .long 0xc0000002 1037*c0855eaaSJohn Baldwin .long 3f - 2f 1038*c0855eaaSJohn Baldwin2: 1039*c0855eaaSJohn Baldwin .long 3 1040*c0855eaaSJohn Baldwin3: 1041*c0855eaaSJohn Baldwin .p2align 2 1042*c0855eaaSJohn Baldwin4: 1043bc3d5698SJohn Baldwin#else 1044bc3d5698SJohn Baldwin.text 1045bc3d5698SJohn Baldwin.globl ChaCha20_ctr32 1046bc3d5698SJohn Baldwin.type ChaCha20_ctr32,@function 1047bc3d5698SJohn Baldwin.align 16 1048bc3d5698SJohn BaldwinChaCha20_ctr32: 1049bc3d5698SJohn Baldwin.L_ChaCha20_ctr32_begin: 1050*c0855eaaSJohn Baldwin #ifdef __CET__ 1051*c0855eaaSJohn Baldwin 1052*c0855eaaSJohn Baldwin.byte 243,15,30,251 1053*c0855eaaSJohn Baldwin #endif 1054*c0855eaaSJohn Baldwin 1055bc3d5698SJohn Baldwin pushl %ebp 1056bc3d5698SJohn Baldwin pushl %ebx 1057bc3d5698SJohn Baldwin pushl %esi 1058bc3d5698SJohn Baldwin pushl %edi 1059bc3d5698SJohn Baldwin xorl %eax,%eax 1060bc3d5698SJohn Baldwin cmpl 28(%esp),%eax 1061bc3d5698SJohn Baldwin je .L000no_data 1062bc3d5698SJohn Baldwin call .Lpic_point 1063bc3d5698SJohn Baldwin.Lpic_point: 1064bc3d5698SJohn Baldwin popl %eax 1065bc3d5698SJohn Baldwin leal OPENSSL_ia32cap_P,%ebp 1066bc3d5698SJohn Baldwin testl $16777216,(%ebp) 1067bc3d5698SJohn Baldwin jz .L001x86 1068bc3d5698SJohn Baldwin testl $512,4(%ebp) 1069bc3d5698SJohn Baldwin jz .L001x86 1070bc3d5698SJohn Baldwin jmp .Lssse3_shortcut 1071bc3d5698SJohn Baldwin.L001x86: 1072bc3d5698SJohn Baldwin movl 32(%esp),%esi 1073bc3d5698SJohn Baldwin movl 36(%esp),%edi 1074bc3d5698SJohn Baldwin subl $132,%esp 1075bc3d5698SJohn Baldwin movl (%esi),%eax 1076bc3d5698SJohn Baldwin movl 4(%esi),%ebx 1077bc3d5698SJohn Baldwin movl 8(%esi),%ecx 1078bc3d5698SJohn Baldwin movl 12(%esi),%edx 1079bc3d5698SJohn Baldwin movl %eax,80(%esp) 1080bc3d5698SJohn Baldwin movl %ebx,84(%esp) 1081bc3d5698SJohn Baldwin movl %ecx,88(%esp) 1082bc3d5698SJohn Baldwin movl %edx,92(%esp) 1083bc3d5698SJohn Baldwin movl 16(%esi),%eax 1084bc3d5698SJohn Baldwin movl 20(%esi),%ebx 1085bc3d5698SJohn Baldwin movl 24(%esi),%ecx 1086bc3d5698SJohn Baldwin movl 28(%esi),%edx 1087bc3d5698SJohn Baldwin movl %eax,96(%esp) 1088bc3d5698SJohn Baldwin movl %ebx,100(%esp) 1089bc3d5698SJohn Baldwin movl %ecx,104(%esp) 1090bc3d5698SJohn Baldwin movl %edx,108(%esp) 1091bc3d5698SJohn Baldwin movl (%edi),%eax 1092bc3d5698SJohn Baldwin movl 4(%edi),%ebx 1093bc3d5698SJohn Baldwin movl 8(%edi),%ecx 1094bc3d5698SJohn Baldwin movl 12(%edi),%edx 1095bc3d5698SJohn Baldwin subl $1,%eax 1096bc3d5698SJohn Baldwin movl %eax,112(%esp) 1097bc3d5698SJohn Baldwin movl %ebx,116(%esp) 1098bc3d5698SJohn Baldwin movl %ecx,120(%esp) 1099bc3d5698SJohn Baldwin movl %edx,124(%esp) 1100bc3d5698SJohn Baldwin jmp .L002entry 1101bc3d5698SJohn Baldwin.align 16 1102bc3d5698SJohn Baldwin.L003outer_loop: 1103bc3d5698SJohn Baldwin movl %ebx,156(%esp) 1104bc3d5698SJohn Baldwin movl %eax,152(%esp) 1105bc3d5698SJohn Baldwin movl %ecx,160(%esp) 1106bc3d5698SJohn Baldwin.L002entry: 1107bc3d5698SJohn Baldwin movl $1634760805,%eax 1108bc3d5698SJohn Baldwin movl $857760878,4(%esp) 1109bc3d5698SJohn Baldwin movl $2036477234,8(%esp) 1110bc3d5698SJohn Baldwin movl $1797285236,12(%esp) 1111bc3d5698SJohn Baldwin movl 84(%esp),%ebx 1112bc3d5698SJohn Baldwin movl 88(%esp),%ebp 1113bc3d5698SJohn Baldwin movl 104(%esp),%ecx 1114bc3d5698SJohn Baldwin movl 108(%esp),%esi 1115bc3d5698SJohn Baldwin movl 116(%esp),%edx 1116bc3d5698SJohn Baldwin movl 120(%esp),%edi 1117bc3d5698SJohn Baldwin movl %ebx,20(%esp) 1118bc3d5698SJohn Baldwin movl %ebp,24(%esp) 1119bc3d5698SJohn Baldwin movl %ecx,40(%esp) 1120bc3d5698SJohn Baldwin movl %esi,44(%esp) 1121bc3d5698SJohn Baldwin movl %edx,52(%esp) 1122bc3d5698SJohn Baldwin movl %edi,56(%esp) 1123bc3d5698SJohn Baldwin movl 92(%esp),%ebx 1124bc3d5698SJohn Baldwin movl 124(%esp),%edi 1125bc3d5698SJohn Baldwin movl 112(%esp),%edx 1126bc3d5698SJohn Baldwin movl 80(%esp),%ebp 1127bc3d5698SJohn Baldwin movl 96(%esp),%ecx 1128bc3d5698SJohn Baldwin movl 100(%esp),%esi 1129bc3d5698SJohn Baldwin addl $1,%edx 1130bc3d5698SJohn Baldwin movl %ebx,28(%esp) 1131bc3d5698SJohn Baldwin movl %edi,60(%esp) 1132bc3d5698SJohn Baldwin movl %edx,112(%esp) 1133bc3d5698SJohn Baldwin movl $10,%ebx 1134bc3d5698SJohn Baldwin jmp .L004loop 1135bc3d5698SJohn Baldwin.align 16 1136bc3d5698SJohn Baldwin.L004loop: 1137bc3d5698SJohn Baldwin addl %ebp,%eax 1138bc3d5698SJohn Baldwin movl %ebx,128(%esp) 1139bc3d5698SJohn Baldwin movl %ebp,%ebx 1140bc3d5698SJohn Baldwin xorl %eax,%edx 1141bc3d5698SJohn Baldwin roll $16,%edx 1142bc3d5698SJohn Baldwin addl %edx,%ecx 1143bc3d5698SJohn Baldwin xorl %ecx,%ebx 1144bc3d5698SJohn Baldwin movl 52(%esp),%edi 1145bc3d5698SJohn Baldwin roll $12,%ebx 1146bc3d5698SJohn Baldwin movl 20(%esp),%ebp 1147bc3d5698SJohn Baldwin addl %ebx,%eax 1148bc3d5698SJohn Baldwin xorl %eax,%edx 1149bc3d5698SJohn Baldwin movl %eax,(%esp) 1150bc3d5698SJohn Baldwin roll $8,%edx 1151bc3d5698SJohn Baldwin movl 4(%esp),%eax 1152bc3d5698SJohn Baldwin addl %edx,%ecx 1153bc3d5698SJohn Baldwin movl %edx,48(%esp) 1154bc3d5698SJohn Baldwin xorl %ecx,%ebx 1155bc3d5698SJohn Baldwin addl %ebp,%eax 1156bc3d5698SJohn Baldwin roll $7,%ebx 1157bc3d5698SJohn Baldwin xorl %eax,%edi 1158bc3d5698SJohn Baldwin movl %ecx,32(%esp) 1159bc3d5698SJohn Baldwin roll $16,%edi 1160bc3d5698SJohn Baldwin movl %ebx,16(%esp) 1161bc3d5698SJohn Baldwin addl %edi,%esi 1162bc3d5698SJohn Baldwin movl 40(%esp),%ecx 1163bc3d5698SJohn Baldwin xorl %esi,%ebp 1164bc3d5698SJohn Baldwin movl 56(%esp),%edx 1165bc3d5698SJohn Baldwin roll $12,%ebp 1166bc3d5698SJohn Baldwin movl 24(%esp),%ebx 1167bc3d5698SJohn Baldwin addl %ebp,%eax 1168bc3d5698SJohn Baldwin xorl %eax,%edi 1169bc3d5698SJohn Baldwin movl %eax,4(%esp) 1170bc3d5698SJohn Baldwin roll $8,%edi 1171bc3d5698SJohn Baldwin movl 8(%esp),%eax 1172bc3d5698SJohn Baldwin addl %edi,%esi 1173bc3d5698SJohn Baldwin movl %edi,52(%esp) 1174bc3d5698SJohn Baldwin xorl %esi,%ebp 1175bc3d5698SJohn Baldwin addl %ebx,%eax 1176bc3d5698SJohn Baldwin roll $7,%ebp 1177bc3d5698SJohn Baldwin xorl %eax,%edx 1178bc3d5698SJohn Baldwin movl %esi,36(%esp) 1179bc3d5698SJohn Baldwin roll $16,%edx 1180bc3d5698SJohn Baldwin movl %ebp,20(%esp) 1181bc3d5698SJohn Baldwin addl %edx,%ecx 1182bc3d5698SJohn Baldwin movl 44(%esp),%esi 1183bc3d5698SJohn Baldwin xorl %ecx,%ebx 1184bc3d5698SJohn Baldwin movl 60(%esp),%edi 1185bc3d5698SJohn Baldwin roll $12,%ebx 1186bc3d5698SJohn Baldwin movl 28(%esp),%ebp 1187bc3d5698SJohn Baldwin addl %ebx,%eax 1188bc3d5698SJohn Baldwin xorl %eax,%edx 1189bc3d5698SJohn Baldwin movl %eax,8(%esp) 1190bc3d5698SJohn Baldwin roll $8,%edx 1191bc3d5698SJohn Baldwin movl 12(%esp),%eax 1192bc3d5698SJohn Baldwin addl %edx,%ecx 1193bc3d5698SJohn Baldwin movl %edx,56(%esp) 1194bc3d5698SJohn Baldwin xorl %ecx,%ebx 1195bc3d5698SJohn Baldwin addl %ebp,%eax 1196bc3d5698SJohn Baldwin roll $7,%ebx 1197bc3d5698SJohn Baldwin xorl %eax,%edi 1198bc3d5698SJohn Baldwin roll $16,%edi 1199bc3d5698SJohn Baldwin movl %ebx,24(%esp) 1200bc3d5698SJohn Baldwin addl %edi,%esi 1201bc3d5698SJohn Baldwin xorl %esi,%ebp 1202bc3d5698SJohn Baldwin roll $12,%ebp 1203bc3d5698SJohn Baldwin movl 20(%esp),%ebx 1204bc3d5698SJohn Baldwin addl %ebp,%eax 1205bc3d5698SJohn Baldwin xorl %eax,%edi 1206bc3d5698SJohn Baldwin movl %eax,12(%esp) 1207bc3d5698SJohn Baldwin roll $8,%edi 1208bc3d5698SJohn Baldwin movl (%esp),%eax 1209bc3d5698SJohn Baldwin addl %edi,%esi 1210bc3d5698SJohn Baldwin movl %edi,%edx 1211bc3d5698SJohn Baldwin xorl %esi,%ebp 1212bc3d5698SJohn Baldwin addl %ebx,%eax 1213bc3d5698SJohn Baldwin roll $7,%ebp 1214bc3d5698SJohn Baldwin xorl %eax,%edx 1215bc3d5698SJohn Baldwin roll $16,%edx 1216bc3d5698SJohn Baldwin movl %ebp,28(%esp) 1217bc3d5698SJohn Baldwin addl %edx,%ecx 1218bc3d5698SJohn Baldwin xorl %ecx,%ebx 1219bc3d5698SJohn Baldwin movl 48(%esp),%edi 1220bc3d5698SJohn Baldwin roll $12,%ebx 1221bc3d5698SJohn Baldwin movl 24(%esp),%ebp 1222bc3d5698SJohn Baldwin addl %ebx,%eax 1223bc3d5698SJohn Baldwin xorl %eax,%edx 1224bc3d5698SJohn Baldwin movl %eax,(%esp) 1225bc3d5698SJohn Baldwin roll $8,%edx 1226bc3d5698SJohn Baldwin movl 4(%esp),%eax 1227bc3d5698SJohn Baldwin addl %edx,%ecx 1228bc3d5698SJohn Baldwin movl %edx,60(%esp) 1229bc3d5698SJohn Baldwin xorl %ecx,%ebx 1230bc3d5698SJohn Baldwin addl %ebp,%eax 1231bc3d5698SJohn Baldwin roll $7,%ebx 1232bc3d5698SJohn Baldwin xorl %eax,%edi 1233bc3d5698SJohn Baldwin movl %ecx,40(%esp) 1234bc3d5698SJohn Baldwin roll $16,%edi 1235bc3d5698SJohn Baldwin movl %ebx,20(%esp) 1236bc3d5698SJohn Baldwin addl %edi,%esi 1237bc3d5698SJohn Baldwin movl 32(%esp),%ecx 1238bc3d5698SJohn Baldwin xorl %esi,%ebp 1239bc3d5698SJohn Baldwin movl 52(%esp),%edx 1240bc3d5698SJohn Baldwin roll $12,%ebp 1241bc3d5698SJohn Baldwin movl 28(%esp),%ebx 1242bc3d5698SJohn Baldwin addl %ebp,%eax 1243bc3d5698SJohn Baldwin xorl %eax,%edi 1244bc3d5698SJohn Baldwin movl %eax,4(%esp) 1245bc3d5698SJohn Baldwin roll $8,%edi 1246bc3d5698SJohn Baldwin movl 8(%esp),%eax 1247bc3d5698SJohn Baldwin addl %edi,%esi 1248bc3d5698SJohn Baldwin movl %edi,48(%esp) 1249bc3d5698SJohn Baldwin xorl %esi,%ebp 1250bc3d5698SJohn Baldwin addl %ebx,%eax 1251bc3d5698SJohn Baldwin roll $7,%ebp 1252bc3d5698SJohn Baldwin xorl %eax,%edx 1253bc3d5698SJohn Baldwin movl %esi,44(%esp) 1254bc3d5698SJohn Baldwin roll $16,%edx 1255bc3d5698SJohn Baldwin movl %ebp,24(%esp) 1256bc3d5698SJohn Baldwin addl %edx,%ecx 1257bc3d5698SJohn Baldwin movl 36(%esp),%esi 1258bc3d5698SJohn Baldwin xorl %ecx,%ebx 1259bc3d5698SJohn Baldwin movl 56(%esp),%edi 1260bc3d5698SJohn Baldwin roll $12,%ebx 1261bc3d5698SJohn Baldwin movl 16(%esp),%ebp 1262bc3d5698SJohn Baldwin addl %ebx,%eax 1263bc3d5698SJohn Baldwin xorl %eax,%edx 1264bc3d5698SJohn Baldwin movl %eax,8(%esp) 1265bc3d5698SJohn Baldwin roll $8,%edx 1266bc3d5698SJohn Baldwin movl 12(%esp),%eax 1267bc3d5698SJohn Baldwin addl %edx,%ecx 1268bc3d5698SJohn Baldwin movl %edx,52(%esp) 1269bc3d5698SJohn Baldwin xorl %ecx,%ebx 1270bc3d5698SJohn Baldwin addl %ebp,%eax 1271bc3d5698SJohn Baldwin roll $7,%ebx 1272bc3d5698SJohn Baldwin xorl %eax,%edi 1273bc3d5698SJohn Baldwin roll $16,%edi 1274bc3d5698SJohn Baldwin movl %ebx,28(%esp) 1275bc3d5698SJohn Baldwin addl %edi,%esi 1276bc3d5698SJohn Baldwin xorl %esi,%ebp 1277bc3d5698SJohn Baldwin movl 48(%esp),%edx 1278bc3d5698SJohn Baldwin roll $12,%ebp 1279bc3d5698SJohn Baldwin movl 128(%esp),%ebx 1280bc3d5698SJohn Baldwin addl %ebp,%eax 1281bc3d5698SJohn Baldwin xorl %eax,%edi 1282bc3d5698SJohn Baldwin movl %eax,12(%esp) 1283bc3d5698SJohn Baldwin roll $8,%edi 1284bc3d5698SJohn Baldwin movl (%esp),%eax 1285bc3d5698SJohn Baldwin addl %edi,%esi 1286bc3d5698SJohn Baldwin movl %edi,56(%esp) 1287bc3d5698SJohn Baldwin xorl %esi,%ebp 1288bc3d5698SJohn Baldwin roll $7,%ebp 1289bc3d5698SJohn Baldwin decl %ebx 1290bc3d5698SJohn Baldwin jnz .L004loop 1291bc3d5698SJohn Baldwin movl 160(%esp),%ebx 1292bc3d5698SJohn Baldwin addl $1634760805,%eax 1293bc3d5698SJohn Baldwin addl 80(%esp),%ebp 1294bc3d5698SJohn Baldwin addl 96(%esp),%ecx 1295bc3d5698SJohn Baldwin addl 100(%esp),%esi 1296bc3d5698SJohn Baldwin cmpl $64,%ebx 1297bc3d5698SJohn Baldwin jb .L005tail 1298bc3d5698SJohn Baldwin movl 156(%esp),%ebx 1299bc3d5698SJohn Baldwin addl 112(%esp),%edx 1300bc3d5698SJohn Baldwin addl 120(%esp),%edi 1301bc3d5698SJohn Baldwin xorl (%ebx),%eax 1302bc3d5698SJohn Baldwin xorl 16(%ebx),%ebp 1303bc3d5698SJohn Baldwin movl %eax,(%esp) 1304bc3d5698SJohn Baldwin movl 152(%esp),%eax 1305bc3d5698SJohn Baldwin xorl 32(%ebx),%ecx 1306bc3d5698SJohn Baldwin xorl 36(%ebx),%esi 1307bc3d5698SJohn Baldwin xorl 48(%ebx),%edx 1308bc3d5698SJohn Baldwin xorl 56(%ebx),%edi 1309bc3d5698SJohn Baldwin movl %ebp,16(%eax) 1310bc3d5698SJohn Baldwin movl %ecx,32(%eax) 1311bc3d5698SJohn Baldwin movl %esi,36(%eax) 1312bc3d5698SJohn Baldwin movl %edx,48(%eax) 1313bc3d5698SJohn Baldwin movl %edi,56(%eax) 1314bc3d5698SJohn Baldwin movl 4(%esp),%ebp 1315bc3d5698SJohn Baldwin movl 8(%esp),%ecx 1316bc3d5698SJohn Baldwin movl 12(%esp),%esi 1317bc3d5698SJohn Baldwin movl 20(%esp),%edx 1318bc3d5698SJohn Baldwin movl 24(%esp),%edi 1319bc3d5698SJohn Baldwin addl $857760878,%ebp 1320bc3d5698SJohn Baldwin addl $2036477234,%ecx 1321bc3d5698SJohn Baldwin addl $1797285236,%esi 1322bc3d5698SJohn Baldwin addl 84(%esp),%edx 1323bc3d5698SJohn Baldwin addl 88(%esp),%edi 1324bc3d5698SJohn Baldwin xorl 4(%ebx),%ebp 1325bc3d5698SJohn Baldwin xorl 8(%ebx),%ecx 1326bc3d5698SJohn Baldwin xorl 12(%ebx),%esi 1327bc3d5698SJohn Baldwin xorl 20(%ebx),%edx 1328bc3d5698SJohn Baldwin xorl 24(%ebx),%edi 1329bc3d5698SJohn Baldwin movl %ebp,4(%eax) 1330bc3d5698SJohn Baldwin movl %ecx,8(%eax) 1331bc3d5698SJohn Baldwin movl %esi,12(%eax) 1332bc3d5698SJohn Baldwin movl %edx,20(%eax) 1333bc3d5698SJohn Baldwin movl %edi,24(%eax) 1334bc3d5698SJohn Baldwin movl 28(%esp),%ebp 1335bc3d5698SJohn Baldwin movl 40(%esp),%ecx 1336bc3d5698SJohn Baldwin movl 44(%esp),%esi 1337bc3d5698SJohn Baldwin movl 52(%esp),%edx 1338bc3d5698SJohn Baldwin movl 60(%esp),%edi 1339bc3d5698SJohn Baldwin addl 92(%esp),%ebp 1340bc3d5698SJohn Baldwin addl 104(%esp),%ecx 1341bc3d5698SJohn Baldwin addl 108(%esp),%esi 1342bc3d5698SJohn Baldwin addl 116(%esp),%edx 1343bc3d5698SJohn Baldwin addl 124(%esp),%edi 1344bc3d5698SJohn Baldwin xorl 28(%ebx),%ebp 1345bc3d5698SJohn Baldwin xorl 40(%ebx),%ecx 1346bc3d5698SJohn Baldwin xorl 44(%ebx),%esi 1347bc3d5698SJohn Baldwin xorl 52(%ebx),%edx 1348bc3d5698SJohn Baldwin xorl 60(%ebx),%edi 1349bc3d5698SJohn Baldwin leal 64(%ebx),%ebx 1350bc3d5698SJohn Baldwin movl %ebp,28(%eax) 1351bc3d5698SJohn Baldwin movl (%esp),%ebp 1352bc3d5698SJohn Baldwin movl %ecx,40(%eax) 1353bc3d5698SJohn Baldwin movl 160(%esp),%ecx 1354bc3d5698SJohn Baldwin movl %esi,44(%eax) 1355bc3d5698SJohn Baldwin movl %edx,52(%eax) 1356bc3d5698SJohn Baldwin movl %edi,60(%eax) 1357bc3d5698SJohn Baldwin movl %ebp,(%eax) 1358bc3d5698SJohn Baldwin leal 64(%eax),%eax 1359bc3d5698SJohn Baldwin subl $64,%ecx 1360bc3d5698SJohn Baldwin jnz .L003outer_loop 1361bc3d5698SJohn Baldwin jmp .L006done 1362bc3d5698SJohn Baldwin.L005tail: 1363bc3d5698SJohn Baldwin addl 112(%esp),%edx 1364bc3d5698SJohn Baldwin addl 120(%esp),%edi 1365bc3d5698SJohn Baldwin movl %eax,(%esp) 1366bc3d5698SJohn Baldwin movl %ebp,16(%esp) 1367bc3d5698SJohn Baldwin movl %ecx,32(%esp) 1368bc3d5698SJohn Baldwin movl %esi,36(%esp) 1369bc3d5698SJohn Baldwin movl %edx,48(%esp) 1370bc3d5698SJohn Baldwin movl %edi,56(%esp) 1371bc3d5698SJohn Baldwin movl 4(%esp),%ebp 1372bc3d5698SJohn Baldwin movl 8(%esp),%ecx 1373bc3d5698SJohn Baldwin movl 12(%esp),%esi 1374bc3d5698SJohn Baldwin movl 20(%esp),%edx 1375bc3d5698SJohn Baldwin movl 24(%esp),%edi 1376bc3d5698SJohn Baldwin addl $857760878,%ebp 1377bc3d5698SJohn Baldwin addl $2036477234,%ecx 1378bc3d5698SJohn Baldwin addl $1797285236,%esi 1379bc3d5698SJohn Baldwin addl 84(%esp),%edx 1380bc3d5698SJohn Baldwin addl 88(%esp),%edi 1381bc3d5698SJohn Baldwin movl %ebp,4(%esp) 1382bc3d5698SJohn Baldwin movl %ecx,8(%esp) 1383bc3d5698SJohn Baldwin movl %esi,12(%esp) 1384bc3d5698SJohn Baldwin movl %edx,20(%esp) 1385bc3d5698SJohn Baldwin movl %edi,24(%esp) 1386bc3d5698SJohn Baldwin movl 28(%esp),%ebp 1387bc3d5698SJohn Baldwin movl 40(%esp),%ecx 1388bc3d5698SJohn Baldwin movl 44(%esp),%esi 1389bc3d5698SJohn Baldwin movl 52(%esp),%edx 1390bc3d5698SJohn Baldwin movl 60(%esp),%edi 1391bc3d5698SJohn Baldwin addl 92(%esp),%ebp 1392bc3d5698SJohn Baldwin addl 104(%esp),%ecx 1393bc3d5698SJohn Baldwin addl 108(%esp),%esi 1394bc3d5698SJohn Baldwin addl 116(%esp),%edx 1395bc3d5698SJohn Baldwin addl 124(%esp),%edi 1396bc3d5698SJohn Baldwin movl %ebp,28(%esp) 1397bc3d5698SJohn Baldwin movl 156(%esp),%ebp 1398bc3d5698SJohn Baldwin movl %ecx,40(%esp) 1399bc3d5698SJohn Baldwin movl 152(%esp),%ecx 1400bc3d5698SJohn Baldwin movl %esi,44(%esp) 1401bc3d5698SJohn Baldwin xorl %esi,%esi 1402bc3d5698SJohn Baldwin movl %edx,52(%esp) 1403bc3d5698SJohn Baldwin movl %edi,60(%esp) 1404bc3d5698SJohn Baldwin xorl %eax,%eax 1405bc3d5698SJohn Baldwin xorl %edx,%edx 1406bc3d5698SJohn Baldwin.L007tail_loop: 1407bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%al 1408bc3d5698SJohn Baldwin movb (%esp,%esi,1),%dl 1409bc3d5698SJohn Baldwin leal 1(%esi),%esi 1410bc3d5698SJohn Baldwin xorb %dl,%al 1411bc3d5698SJohn Baldwin movb %al,-1(%ecx,%esi,1) 1412bc3d5698SJohn Baldwin decl %ebx 1413bc3d5698SJohn Baldwin jnz .L007tail_loop 1414bc3d5698SJohn Baldwin.L006done: 1415bc3d5698SJohn Baldwin addl $132,%esp 1416bc3d5698SJohn Baldwin.L000no_data: 1417bc3d5698SJohn Baldwin popl %edi 1418bc3d5698SJohn Baldwin popl %esi 1419bc3d5698SJohn Baldwin popl %ebx 1420bc3d5698SJohn Baldwin popl %ebp 1421bc3d5698SJohn Baldwin ret 1422bc3d5698SJohn Baldwin.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 1423bc3d5698SJohn Baldwin.globl ChaCha20_ssse3 1424bc3d5698SJohn Baldwin.type ChaCha20_ssse3,@function 1425bc3d5698SJohn Baldwin.align 16 1426bc3d5698SJohn BaldwinChaCha20_ssse3: 1427bc3d5698SJohn Baldwin.L_ChaCha20_ssse3_begin: 1428*c0855eaaSJohn Baldwin #ifdef __CET__ 1429*c0855eaaSJohn Baldwin 1430*c0855eaaSJohn Baldwin.byte 243,15,30,251 1431*c0855eaaSJohn Baldwin #endif 1432*c0855eaaSJohn Baldwin 1433bc3d5698SJohn Baldwin pushl %ebp 1434bc3d5698SJohn Baldwin pushl %ebx 1435bc3d5698SJohn Baldwin pushl %esi 1436bc3d5698SJohn Baldwin pushl %edi 1437bc3d5698SJohn Baldwin.Lssse3_shortcut: 1438bc3d5698SJohn Baldwin testl $2048,4(%ebp) 1439bc3d5698SJohn Baldwin jnz .Lxop_shortcut 1440bc3d5698SJohn Baldwin movl 20(%esp),%edi 1441bc3d5698SJohn Baldwin movl 24(%esp),%esi 1442bc3d5698SJohn Baldwin movl 28(%esp),%ecx 1443bc3d5698SJohn Baldwin movl 32(%esp),%edx 1444bc3d5698SJohn Baldwin movl 36(%esp),%ebx 1445bc3d5698SJohn Baldwin movl %esp,%ebp 1446bc3d5698SJohn Baldwin subl $524,%esp 1447bc3d5698SJohn Baldwin andl $-64,%esp 1448bc3d5698SJohn Baldwin movl %ebp,512(%esp) 1449bc3d5698SJohn Baldwin leal .Lssse3_data-.Lpic_point(%eax),%eax 1450bc3d5698SJohn Baldwin movdqu (%ebx),%xmm3 1451bc3d5698SJohn Baldwin.L0081x: 1452bc3d5698SJohn Baldwin movdqa 32(%eax),%xmm0 1453bc3d5698SJohn Baldwin movdqu (%edx),%xmm1 1454bc3d5698SJohn Baldwin movdqu 16(%edx),%xmm2 1455bc3d5698SJohn Baldwin movdqa (%eax),%xmm6 1456bc3d5698SJohn Baldwin movdqa 16(%eax),%xmm7 1457bc3d5698SJohn Baldwin movl %ebp,48(%esp) 1458bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 1459bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 1460bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 1461bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 1462bc3d5698SJohn Baldwin movl $10,%edx 1463bc3d5698SJohn Baldwin jmp .L009loop1x 1464bc3d5698SJohn Baldwin.align 16 1465bc3d5698SJohn Baldwin.L010outer1x: 1466bc3d5698SJohn Baldwin movdqa 80(%eax),%xmm3 1467bc3d5698SJohn Baldwin movdqa (%esp),%xmm0 1468bc3d5698SJohn Baldwin movdqa 16(%esp),%xmm1 1469bc3d5698SJohn Baldwin movdqa 32(%esp),%xmm2 1470bc3d5698SJohn Baldwin paddd 48(%esp),%xmm3 1471bc3d5698SJohn Baldwin movl $10,%edx 1472bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 1473bc3d5698SJohn Baldwin jmp .L009loop1x 1474bc3d5698SJohn Baldwin.align 16 1475bc3d5698SJohn Baldwin.L009loop1x: 1476bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 1477bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 1478bc3d5698SJohn Baldwin.byte 102,15,56,0,222 1479bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 1480bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 1481bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 1482bc3d5698SJohn Baldwin psrld $20,%xmm1 1483bc3d5698SJohn Baldwin pslld $12,%xmm4 1484bc3d5698SJohn Baldwin por %xmm4,%xmm1 1485bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 1486bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 1487bc3d5698SJohn Baldwin.byte 102,15,56,0,223 1488bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 1489bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 1490bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 1491bc3d5698SJohn Baldwin psrld $25,%xmm1 1492bc3d5698SJohn Baldwin pslld $7,%xmm4 1493bc3d5698SJohn Baldwin por %xmm4,%xmm1 1494bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 1495bc3d5698SJohn Baldwin pshufd $57,%xmm1,%xmm1 1496bc3d5698SJohn Baldwin pshufd $147,%xmm3,%xmm3 1497bc3d5698SJohn Baldwin nop 1498bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 1499bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 1500bc3d5698SJohn Baldwin.byte 102,15,56,0,222 1501bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 1502bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 1503bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 1504bc3d5698SJohn Baldwin psrld $20,%xmm1 1505bc3d5698SJohn Baldwin pslld $12,%xmm4 1506bc3d5698SJohn Baldwin por %xmm4,%xmm1 1507bc3d5698SJohn Baldwin paddd %xmm1,%xmm0 1508bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 1509bc3d5698SJohn Baldwin.byte 102,15,56,0,223 1510bc3d5698SJohn Baldwin paddd %xmm3,%xmm2 1511bc3d5698SJohn Baldwin pxor %xmm2,%xmm1 1512bc3d5698SJohn Baldwin movdqa %xmm1,%xmm4 1513bc3d5698SJohn Baldwin psrld $25,%xmm1 1514bc3d5698SJohn Baldwin pslld $7,%xmm4 1515bc3d5698SJohn Baldwin por %xmm4,%xmm1 1516bc3d5698SJohn Baldwin pshufd $78,%xmm2,%xmm2 1517bc3d5698SJohn Baldwin pshufd $147,%xmm1,%xmm1 1518bc3d5698SJohn Baldwin pshufd $57,%xmm3,%xmm3 1519bc3d5698SJohn Baldwin decl %edx 1520bc3d5698SJohn Baldwin jnz .L009loop1x 1521bc3d5698SJohn Baldwin paddd (%esp),%xmm0 1522bc3d5698SJohn Baldwin paddd 16(%esp),%xmm1 1523bc3d5698SJohn Baldwin paddd 32(%esp),%xmm2 1524bc3d5698SJohn Baldwin paddd 48(%esp),%xmm3 1525bc3d5698SJohn Baldwin cmpl $64,%ecx 1526bc3d5698SJohn Baldwin jb .L011tail 1527bc3d5698SJohn Baldwin movdqu (%esi),%xmm4 1528bc3d5698SJohn Baldwin movdqu 16(%esi),%xmm5 1529bc3d5698SJohn Baldwin pxor %xmm4,%xmm0 1530bc3d5698SJohn Baldwin movdqu 32(%esi),%xmm4 1531bc3d5698SJohn Baldwin pxor %xmm5,%xmm1 1532bc3d5698SJohn Baldwin movdqu 48(%esi),%xmm5 1533bc3d5698SJohn Baldwin pxor %xmm4,%xmm2 1534bc3d5698SJohn Baldwin pxor %xmm5,%xmm3 1535bc3d5698SJohn Baldwin leal 64(%esi),%esi 1536bc3d5698SJohn Baldwin movdqu %xmm0,(%edi) 1537bc3d5698SJohn Baldwin movdqu %xmm1,16(%edi) 1538bc3d5698SJohn Baldwin movdqu %xmm2,32(%edi) 1539bc3d5698SJohn Baldwin movdqu %xmm3,48(%edi) 1540bc3d5698SJohn Baldwin leal 64(%edi),%edi 1541bc3d5698SJohn Baldwin subl $64,%ecx 1542bc3d5698SJohn Baldwin jnz .L010outer1x 1543bc3d5698SJohn Baldwin jmp .L012done 1544bc3d5698SJohn Baldwin.L011tail: 1545bc3d5698SJohn Baldwin movdqa %xmm0,(%esp) 1546bc3d5698SJohn Baldwin movdqa %xmm1,16(%esp) 1547bc3d5698SJohn Baldwin movdqa %xmm2,32(%esp) 1548bc3d5698SJohn Baldwin movdqa %xmm3,48(%esp) 1549bc3d5698SJohn Baldwin xorl %eax,%eax 1550bc3d5698SJohn Baldwin xorl %edx,%edx 1551bc3d5698SJohn Baldwin xorl %ebp,%ebp 1552bc3d5698SJohn Baldwin.L013tail_loop: 1553bc3d5698SJohn Baldwin movb (%esp,%ebp,1),%al 1554bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%dl 1555bc3d5698SJohn Baldwin leal 1(%ebp),%ebp 1556bc3d5698SJohn Baldwin xorb %dl,%al 1557bc3d5698SJohn Baldwin movb %al,-1(%edi,%ebp,1) 1558bc3d5698SJohn Baldwin decl %ecx 1559bc3d5698SJohn Baldwin jnz .L013tail_loop 1560bc3d5698SJohn Baldwin.L012done: 1561bc3d5698SJohn Baldwin movl 512(%esp),%esp 1562bc3d5698SJohn Baldwin popl %edi 1563bc3d5698SJohn Baldwin popl %esi 1564bc3d5698SJohn Baldwin popl %ebx 1565bc3d5698SJohn Baldwin popl %ebp 1566bc3d5698SJohn Baldwin ret 1567bc3d5698SJohn Baldwin.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 1568bc3d5698SJohn Baldwin.align 64 1569bc3d5698SJohn Baldwin.Lssse3_data: 1570bc3d5698SJohn Baldwin.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 1571bc3d5698SJohn Baldwin.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 1572bc3d5698SJohn Baldwin.long 1634760805,857760878,2036477234,1797285236 1573bc3d5698SJohn Baldwin.long 0,1,2,3 1574bc3d5698SJohn Baldwin.long 4,4,4,4 1575bc3d5698SJohn Baldwin.long 1,0,0,0 1576bc3d5698SJohn Baldwin.long 4,0,0,0 1577bc3d5698SJohn Baldwin.long 0,-1,-1,-1 1578bc3d5698SJohn Baldwin.align 64 1579bc3d5698SJohn Baldwin.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 1580bc3d5698SJohn Baldwin.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1581bc3d5698SJohn Baldwin.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1582bc3d5698SJohn Baldwin.byte 114,103,62,0 1583bc3d5698SJohn Baldwin.globl ChaCha20_xop 1584bc3d5698SJohn Baldwin.type ChaCha20_xop,@function 1585bc3d5698SJohn Baldwin.align 16 1586bc3d5698SJohn BaldwinChaCha20_xop: 1587bc3d5698SJohn Baldwin.L_ChaCha20_xop_begin: 1588*c0855eaaSJohn Baldwin #ifdef __CET__ 1589*c0855eaaSJohn Baldwin 1590*c0855eaaSJohn Baldwin.byte 243,15,30,251 1591*c0855eaaSJohn Baldwin #endif 1592*c0855eaaSJohn Baldwin 1593bc3d5698SJohn Baldwin pushl %ebp 1594bc3d5698SJohn Baldwin pushl %ebx 1595bc3d5698SJohn Baldwin pushl %esi 1596bc3d5698SJohn Baldwin pushl %edi 1597bc3d5698SJohn Baldwin.Lxop_shortcut: 1598bc3d5698SJohn Baldwin movl 20(%esp),%edi 1599bc3d5698SJohn Baldwin movl 24(%esp),%esi 1600bc3d5698SJohn Baldwin movl 28(%esp),%ecx 1601bc3d5698SJohn Baldwin movl 32(%esp),%edx 1602bc3d5698SJohn Baldwin movl 36(%esp),%ebx 1603bc3d5698SJohn Baldwin vzeroupper 1604bc3d5698SJohn Baldwin movl %esp,%ebp 1605bc3d5698SJohn Baldwin subl $524,%esp 1606bc3d5698SJohn Baldwin andl $-64,%esp 1607bc3d5698SJohn Baldwin movl %ebp,512(%esp) 1608bc3d5698SJohn Baldwin leal .Lssse3_data-.Lpic_point(%eax),%eax 1609bc3d5698SJohn Baldwin vmovdqu (%ebx),%xmm3 1610bc3d5698SJohn Baldwin cmpl $256,%ecx 1611bc3d5698SJohn Baldwin jb .L0141x 1612bc3d5698SJohn Baldwin movl %edx,516(%esp) 1613bc3d5698SJohn Baldwin movl %ebx,520(%esp) 1614bc3d5698SJohn Baldwin subl $256,%ecx 1615bc3d5698SJohn Baldwin leal 384(%esp),%ebp 1616bc3d5698SJohn Baldwin vmovdqu (%edx),%xmm7 1617bc3d5698SJohn Baldwin vpshufd $0,%xmm3,%xmm0 1618bc3d5698SJohn Baldwin vpshufd $85,%xmm3,%xmm1 1619bc3d5698SJohn Baldwin vpshufd $170,%xmm3,%xmm2 1620bc3d5698SJohn Baldwin vpshufd $255,%xmm3,%xmm3 1621bc3d5698SJohn Baldwin vpaddd 48(%eax),%xmm0,%xmm0 1622bc3d5698SJohn Baldwin vpshufd $0,%xmm7,%xmm4 1623bc3d5698SJohn Baldwin vpshufd $85,%xmm7,%xmm5 1624bc3d5698SJohn Baldwin vpsubd 64(%eax),%xmm0,%xmm0 1625bc3d5698SJohn Baldwin vpshufd $170,%xmm7,%xmm6 1626bc3d5698SJohn Baldwin vpshufd $255,%xmm7,%xmm7 1627bc3d5698SJohn Baldwin vmovdqa %xmm0,64(%ebp) 1628bc3d5698SJohn Baldwin vmovdqa %xmm1,80(%ebp) 1629bc3d5698SJohn Baldwin vmovdqa %xmm2,96(%ebp) 1630bc3d5698SJohn Baldwin vmovdqa %xmm3,112(%ebp) 1631bc3d5698SJohn Baldwin vmovdqu 16(%edx),%xmm3 1632bc3d5698SJohn Baldwin vmovdqa %xmm4,-64(%ebp) 1633bc3d5698SJohn Baldwin vmovdqa %xmm5,-48(%ebp) 1634bc3d5698SJohn Baldwin vmovdqa %xmm6,-32(%ebp) 1635bc3d5698SJohn Baldwin vmovdqa %xmm7,-16(%ebp) 1636bc3d5698SJohn Baldwin vmovdqa 32(%eax),%xmm7 1637bc3d5698SJohn Baldwin leal 128(%esp),%ebx 1638bc3d5698SJohn Baldwin vpshufd $0,%xmm3,%xmm0 1639bc3d5698SJohn Baldwin vpshufd $85,%xmm3,%xmm1 1640bc3d5698SJohn Baldwin vpshufd $170,%xmm3,%xmm2 1641bc3d5698SJohn Baldwin vpshufd $255,%xmm3,%xmm3 1642bc3d5698SJohn Baldwin vpshufd $0,%xmm7,%xmm4 1643bc3d5698SJohn Baldwin vpshufd $85,%xmm7,%xmm5 1644bc3d5698SJohn Baldwin vpshufd $170,%xmm7,%xmm6 1645bc3d5698SJohn Baldwin vpshufd $255,%xmm7,%xmm7 1646bc3d5698SJohn Baldwin vmovdqa %xmm0,(%ebp) 1647bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%ebp) 1648bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%ebp) 1649bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%ebp) 1650bc3d5698SJohn Baldwin vmovdqa %xmm4,-128(%ebp) 1651bc3d5698SJohn Baldwin vmovdqa %xmm5,-112(%ebp) 1652bc3d5698SJohn Baldwin vmovdqa %xmm6,-96(%ebp) 1653bc3d5698SJohn Baldwin vmovdqa %xmm7,-80(%ebp) 1654bc3d5698SJohn Baldwin leal 128(%esi),%esi 1655bc3d5698SJohn Baldwin leal 128(%edi),%edi 1656bc3d5698SJohn Baldwin jmp .L015outer_loop 1657bc3d5698SJohn Baldwin.align 32 1658bc3d5698SJohn Baldwin.L015outer_loop: 1659bc3d5698SJohn Baldwin vmovdqa -112(%ebp),%xmm1 1660bc3d5698SJohn Baldwin vmovdqa -96(%ebp),%xmm2 1661bc3d5698SJohn Baldwin vmovdqa -80(%ebp),%xmm3 1662bc3d5698SJohn Baldwin vmovdqa -48(%ebp),%xmm5 1663bc3d5698SJohn Baldwin vmovdqa -32(%ebp),%xmm6 1664bc3d5698SJohn Baldwin vmovdqa -16(%ebp),%xmm7 1665bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 1666bc3d5698SJohn Baldwin vmovdqa %xmm2,-96(%ebx) 1667bc3d5698SJohn Baldwin vmovdqa %xmm3,-80(%ebx) 1668bc3d5698SJohn Baldwin vmovdqa %xmm5,-48(%ebx) 1669bc3d5698SJohn Baldwin vmovdqa %xmm6,-32(%ebx) 1670bc3d5698SJohn Baldwin vmovdqa %xmm7,-16(%ebx) 1671bc3d5698SJohn Baldwin vmovdqa 32(%ebp),%xmm2 1672bc3d5698SJohn Baldwin vmovdqa 48(%ebp),%xmm3 1673bc3d5698SJohn Baldwin vmovdqa 64(%ebp),%xmm4 1674bc3d5698SJohn Baldwin vmovdqa 80(%ebp),%xmm5 1675bc3d5698SJohn Baldwin vmovdqa 96(%ebp),%xmm6 1676bc3d5698SJohn Baldwin vmovdqa 112(%ebp),%xmm7 1677bc3d5698SJohn Baldwin vpaddd 64(%eax),%xmm4,%xmm4 1678bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%ebx) 1679bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%ebx) 1680bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%ebx) 1681bc3d5698SJohn Baldwin vmovdqa %xmm5,80(%ebx) 1682bc3d5698SJohn Baldwin vmovdqa %xmm6,96(%ebx) 1683bc3d5698SJohn Baldwin vmovdqa %xmm7,112(%ebx) 1684bc3d5698SJohn Baldwin vmovdqa %xmm4,64(%ebp) 1685bc3d5698SJohn Baldwin vmovdqa -128(%ebp),%xmm0 1686bc3d5698SJohn Baldwin vmovdqa %xmm4,%xmm6 1687bc3d5698SJohn Baldwin vmovdqa -64(%ebp),%xmm3 1688bc3d5698SJohn Baldwin vmovdqa (%ebp),%xmm4 1689bc3d5698SJohn Baldwin vmovdqa 16(%ebp),%xmm5 1690bc3d5698SJohn Baldwin movl $10,%edx 1691bc3d5698SJohn Baldwin nop 1692bc3d5698SJohn Baldwin.align 32 1693bc3d5698SJohn Baldwin.L016loop: 1694bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm0,%xmm0 1695bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1696bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1697bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1698bc3d5698SJohn Baldwin vpxor %xmm4,%xmm3,%xmm2 1699bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 1700bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1701bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm3 1702bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1703bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm7 1704bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1705bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1706bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1707bc3d5698SJohn Baldwin vmovdqa %xmm0,-128(%ebx) 1708bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1709bc3d5698SJohn Baldwin vmovdqa %xmm6,64(%ebx) 1710bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1711bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1712bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1713bc3d5698SJohn Baldwin vmovdqa %xmm4,(%ebx) 1714bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1715bc3d5698SJohn Baldwin vmovdqa %xmm2,-64(%ebx) 1716bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1717bc3d5698SJohn Baldwin vmovdqa 32(%ebx),%xmm4 1718bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1719bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm0 1720bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1721bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm2 1722bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1723bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm6 1724bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1725bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1726bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1727bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 1728bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1729bc3d5698SJohn Baldwin vmovdqa %xmm7,80(%ebx) 1730bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1731bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1732bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1733bc3d5698SJohn Baldwin vmovdqa %xmm5,16(%ebx) 1734bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1735bc3d5698SJohn Baldwin vmovdqa %xmm3,-48(%ebx) 1736bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1737bc3d5698SJohn Baldwin vmovdqa 48(%ebx),%xmm5 1738bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1739bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm1 1740bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1741bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm3 1742bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1743bc3d5698SJohn Baldwin vmovdqa 112(%ebx),%xmm7 1744bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1745bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1746bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1747bc3d5698SJohn Baldwin vmovdqa %xmm0,-96(%ebx) 1748bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1749bc3d5698SJohn Baldwin vmovdqa %xmm6,96(%ebx) 1750bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1751bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1752bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1753bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1754bc3d5698SJohn Baldwin vmovdqa %xmm2,-32(%ebx) 1755bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1756bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1757bc3d5698SJohn Baldwin vmovdqa -128(%ebx),%xmm0 1758bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1759bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm2 1760bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1761bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1762bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1763bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1764bc3d5698SJohn Baldwin vmovdqa %xmm1,-80(%ebx) 1765bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1766bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1767bc3d5698SJohn Baldwin vpxor %xmm0,%xmm7,%xmm6 1768bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1769bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1770bc3d5698SJohn Baldwin vmovdqa %xmm3,-16(%ebx) 1771bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1772bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1773bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 1774bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1775bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm3 1776bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1777bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm7 1778bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1779bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1780bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1781bc3d5698SJohn Baldwin vmovdqa %xmm0,-128(%ebx) 1782bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1783bc3d5698SJohn Baldwin vmovdqa %xmm6,112(%ebx) 1784bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1785bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1786bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1787bc3d5698SJohn Baldwin vmovdqa %xmm4,32(%ebx) 1788bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1789bc3d5698SJohn Baldwin vmovdqa %xmm2,-48(%ebx) 1790bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1791bc3d5698SJohn Baldwin vmovdqa (%ebx),%xmm4 1792bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1793bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm0 1794bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1795bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm2 1796bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1797bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm6 1798bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1799bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1800bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1801bc3d5698SJohn Baldwin vmovdqa %xmm1,-112(%ebx) 1802bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1803bc3d5698SJohn Baldwin vmovdqa %xmm7,64(%ebx) 1804bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1805bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1806bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1807bc3d5698SJohn Baldwin vmovdqa %xmm5,48(%ebx) 1808bc3d5698SJohn Baldwin.byte 143,232,120,194,246,16 1809bc3d5698SJohn Baldwin vmovdqa %xmm3,-32(%ebx) 1810bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1811bc3d5698SJohn Baldwin vmovdqa 16(%ebx),%xmm5 1812bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1813bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm1 1814bc3d5698SJohn Baldwin.byte 143,232,120,194,210,12 1815bc3d5698SJohn Baldwin vmovdqa -64(%ebx),%xmm3 1816bc3d5698SJohn Baldwin vpaddd %xmm2,%xmm0,%xmm0 1817bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm7 1818bc3d5698SJohn Baldwin vpxor %xmm0,%xmm6,%xmm6 1819bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1820bc3d5698SJohn Baldwin.byte 143,232,120,194,246,8 1821bc3d5698SJohn Baldwin vmovdqa %xmm0,-96(%ebx) 1822bc3d5698SJohn Baldwin vpaddd %xmm6,%xmm4,%xmm4 1823bc3d5698SJohn Baldwin vmovdqa %xmm6,80(%ebx) 1824bc3d5698SJohn Baldwin vpxor %xmm4,%xmm2,%xmm2 1825bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1826bc3d5698SJohn Baldwin.byte 143,232,120,194,210,7 1827bc3d5698SJohn Baldwin.byte 143,232,120,194,255,16 1828bc3d5698SJohn Baldwin vmovdqa %xmm2,-16(%ebx) 1829bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1830bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1831bc3d5698SJohn Baldwin vmovdqa -128(%ebx),%xmm0 1832bc3d5698SJohn Baldwin.byte 143,232,120,194,219,12 1833bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm1,%xmm1 1834bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm6 1835bc3d5698SJohn Baldwin vpxor %xmm1,%xmm7,%xmm7 1836bc3d5698SJohn Baldwin.byte 143,232,120,194,255,8 1837bc3d5698SJohn Baldwin vmovdqa %xmm1,-80(%ebx) 1838bc3d5698SJohn Baldwin vpaddd %xmm7,%xmm5,%xmm5 1839bc3d5698SJohn Baldwin vmovdqa %xmm7,96(%ebx) 1840bc3d5698SJohn Baldwin vpxor %xmm5,%xmm3,%xmm3 1841bc3d5698SJohn Baldwin.byte 143,232,120,194,219,7 1842bc3d5698SJohn Baldwin decl %edx 1843bc3d5698SJohn Baldwin jnz .L016loop 1844bc3d5698SJohn Baldwin vmovdqa %xmm3,-64(%ebx) 1845bc3d5698SJohn Baldwin vmovdqa %xmm4,(%ebx) 1846bc3d5698SJohn Baldwin vmovdqa %xmm5,16(%ebx) 1847bc3d5698SJohn Baldwin vmovdqa %xmm6,64(%ebx) 1848bc3d5698SJohn Baldwin vmovdqa %xmm7,96(%ebx) 1849bc3d5698SJohn Baldwin vmovdqa -112(%ebx),%xmm1 1850bc3d5698SJohn Baldwin vmovdqa -96(%ebx),%xmm2 1851bc3d5698SJohn Baldwin vmovdqa -80(%ebx),%xmm3 1852bc3d5698SJohn Baldwin vpaddd -128(%ebp),%xmm0,%xmm0 1853bc3d5698SJohn Baldwin vpaddd -112(%ebp),%xmm1,%xmm1 1854bc3d5698SJohn Baldwin vpaddd -96(%ebp),%xmm2,%xmm2 1855bc3d5698SJohn Baldwin vpaddd -80(%ebp),%xmm3,%xmm3 1856bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 1857bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 1858bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 1859bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 1860bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 1861bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 1862bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 1863bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 1864bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 1865bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 1866bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 1867bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 1868bc3d5698SJohn Baldwin leal 16(%esi),%esi 1869bc3d5698SJohn Baldwin vmovdqa -64(%ebx),%xmm0 1870bc3d5698SJohn Baldwin vmovdqa -48(%ebx),%xmm1 1871bc3d5698SJohn Baldwin vmovdqa -32(%ebx),%xmm2 1872bc3d5698SJohn Baldwin vmovdqa -16(%ebx),%xmm3 1873bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 1874bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 1875bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 1876bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 1877bc3d5698SJohn Baldwin leal 16(%edi),%edi 1878bc3d5698SJohn Baldwin vpaddd -64(%ebp),%xmm0,%xmm0 1879bc3d5698SJohn Baldwin vpaddd -48(%ebp),%xmm1,%xmm1 1880bc3d5698SJohn Baldwin vpaddd -32(%ebp),%xmm2,%xmm2 1881bc3d5698SJohn Baldwin vpaddd -16(%ebp),%xmm3,%xmm3 1882bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 1883bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 1884bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 1885bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 1886bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 1887bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 1888bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 1889bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 1890bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 1891bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 1892bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 1893bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 1894bc3d5698SJohn Baldwin leal 16(%esi),%esi 1895bc3d5698SJohn Baldwin vmovdqa (%ebx),%xmm0 1896bc3d5698SJohn Baldwin vmovdqa 16(%ebx),%xmm1 1897bc3d5698SJohn Baldwin vmovdqa 32(%ebx),%xmm2 1898bc3d5698SJohn Baldwin vmovdqa 48(%ebx),%xmm3 1899bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 1900bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 1901bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 1902bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 1903bc3d5698SJohn Baldwin leal 16(%edi),%edi 1904bc3d5698SJohn Baldwin vpaddd (%ebp),%xmm0,%xmm0 1905bc3d5698SJohn Baldwin vpaddd 16(%ebp),%xmm1,%xmm1 1906bc3d5698SJohn Baldwin vpaddd 32(%ebp),%xmm2,%xmm2 1907bc3d5698SJohn Baldwin vpaddd 48(%ebp),%xmm3,%xmm3 1908bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 1909bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 1910bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 1911bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 1912bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 1913bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 1914bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 1915bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 1916bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 1917bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 1918bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 1919bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 1920bc3d5698SJohn Baldwin leal 16(%esi),%esi 1921bc3d5698SJohn Baldwin vmovdqa 64(%ebx),%xmm0 1922bc3d5698SJohn Baldwin vmovdqa 80(%ebx),%xmm1 1923bc3d5698SJohn Baldwin vmovdqa 96(%ebx),%xmm2 1924bc3d5698SJohn Baldwin vmovdqa 112(%ebx),%xmm3 1925bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 1926bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 1927bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 1928bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 1929bc3d5698SJohn Baldwin leal 16(%edi),%edi 1930bc3d5698SJohn Baldwin vpaddd 64(%ebp),%xmm0,%xmm0 1931bc3d5698SJohn Baldwin vpaddd 80(%ebp),%xmm1,%xmm1 1932bc3d5698SJohn Baldwin vpaddd 96(%ebp),%xmm2,%xmm2 1933bc3d5698SJohn Baldwin vpaddd 112(%ebp),%xmm3,%xmm3 1934bc3d5698SJohn Baldwin vpunpckldq %xmm1,%xmm0,%xmm6 1935bc3d5698SJohn Baldwin vpunpckldq %xmm3,%xmm2,%xmm7 1936bc3d5698SJohn Baldwin vpunpckhdq %xmm1,%xmm0,%xmm0 1937bc3d5698SJohn Baldwin vpunpckhdq %xmm3,%xmm2,%xmm2 1938bc3d5698SJohn Baldwin vpunpcklqdq %xmm7,%xmm6,%xmm1 1939bc3d5698SJohn Baldwin vpunpckhqdq %xmm7,%xmm6,%xmm6 1940bc3d5698SJohn Baldwin vpunpcklqdq %xmm2,%xmm0,%xmm7 1941bc3d5698SJohn Baldwin vpunpckhqdq %xmm2,%xmm0,%xmm3 1942bc3d5698SJohn Baldwin vpxor -128(%esi),%xmm1,%xmm4 1943bc3d5698SJohn Baldwin vpxor -64(%esi),%xmm6,%xmm5 1944bc3d5698SJohn Baldwin vpxor (%esi),%xmm7,%xmm6 1945bc3d5698SJohn Baldwin vpxor 64(%esi),%xmm3,%xmm7 1946bc3d5698SJohn Baldwin leal 208(%esi),%esi 1947bc3d5698SJohn Baldwin vmovdqu %xmm4,-128(%edi) 1948bc3d5698SJohn Baldwin vmovdqu %xmm5,-64(%edi) 1949bc3d5698SJohn Baldwin vmovdqu %xmm6,(%edi) 1950bc3d5698SJohn Baldwin vmovdqu %xmm7,64(%edi) 1951bc3d5698SJohn Baldwin leal 208(%edi),%edi 1952bc3d5698SJohn Baldwin subl $256,%ecx 1953bc3d5698SJohn Baldwin jnc .L015outer_loop 1954bc3d5698SJohn Baldwin addl $256,%ecx 1955bc3d5698SJohn Baldwin jz .L017done 1956bc3d5698SJohn Baldwin movl 520(%esp),%ebx 1957bc3d5698SJohn Baldwin leal -128(%esi),%esi 1958bc3d5698SJohn Baldwin movl 516(%esp),%edx 1959bc3d5698SJohn Baldwin leal -128(%edi),%edi 1960bc3d5698SJohn Baldwin vmovd 64(%ebp),%xmm2 1961bc3d5698SJohn Baldwin vmovdqu (%ebx),%xmm3 1962bc3d5698SJohn Baldwin vpaddd 96(%eax),%xmm2,%xmm2 1963bc3d5698SJohn Baldwin vpand 112(%eax),%xmm3,%xmm3 1964bc3d5698SJohn Baldwin vpor %xmm2,%xmm3,%xmm3 1965bc3d5698SJohn Baldwin.L0141x: 1966bc3d5698SJohn Baldwin vmovdqa 32(%eax),%xmm0 1967bc3d5698SJohn Baldwin vmovdqu (%edx),%xmm1 1968bc3d5698SJohn Baldwin vmovdqu 16(%edx),%xmm2 1969bc3d5698SJohn Baldwin vmovdqa (%eax),%xmm6 1970bc3d5698SJohn Baldwin vmovdqa 16(%eax),%xmm7 1971bc3d5698SJohn Baldwin movl %ebp,48(%esp) 1972bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 1973bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 1974bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 1975bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 1976bc3d5698SJohn Baldwin movl $10,%edx 1977bc3d5698SJohn Baldwin jmp .L018loop1x 1978bc3d5698SJohn Baldwin.align 16 1979bc3d5698SJohn Baldwin.L019outer1x: 1980bc3d5698SJohn Baldwin vmovdqa 80(%eax),%xmm3 1981bc3d5698SJohn Baldwin vmovdqa (%esp),%xmm0 1982bc3d5698SJohn Baldwin vmovdqa 16(%esp),%xmm1 1983bc3d5698SJohn Baldwin vmovdqa 32(%esp),%xmm2 1984bc3d5698SJohn Baldwin vpaddd 48(%esp),%xmm3,%xmm3 1985bc3d5698SJohn Baldwin movl $10,%edx 1986bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 1987bc3d5698SJohn Baldwin jmp .L018loop1x 1988bc3d5698SJohn Baldwin.align 16 1989bc3d5698SJohn Baldwin.L018loop1x: 1990bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 1991bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 1992bc3d5698SJohn Baldwin.byte 143,232,120,194,219,16 1993bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 1994bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 1995bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 1996bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 1997bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 1998bc3d5698SJohn Baldwin.byte 143,232,120,194,219,8 1999bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 2000bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 2001bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 2002bc3d5698SJohn Baldwin vpshufd $78,%xmm2,%xmm2 2003bc3d5698SJohn Baldwin vpshufd $57,%xmm1,%xmm1 2004bc3d5698SJohn Baldwin vpshufd $147,%xmm3,%xmm3 2005bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 2006bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 2007bc3d5698SJohn Baldwin.byte 143,232,120,194,219,16 2008bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 2009bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 2010bc3d5698SJohn Baldwin.byte 143,232,120,194,201,12 2011bc3d5698SJohn Baldwin vpaddd %xmm1,%xmm0,%xmm0 2012bc3d5698SJohn Baldwin vpxor %xmm0,%xmm3,%xmm3 2013bc3d5698SJohn Baldwin.byte 143,232,120,194,219,8 2014bc3d5698SJohn Baldwin vpaddd %xmm3,%xmm2,%xmm2 2015bc3d5698SJohn Baldwin vpxor %xmm2,%xmm1,%xmm1 2016bc3d5698SJohn Baldwin.byte 143,232,120,194,201,7 2017bc3d5698SJohn Baldwin vpshufd $78,%xmm2,%xmm2 2018bc3d5698SJohn Baldwin vpshufd $147,%xmm1,%xmm1 2019bc3d5698SJohn Baldwin vpshufd $57,%xmm3,%xmm3 2020bc3d5698SJohn Baldwin decl %edx 2021bc3d5698SJohn Baldwin jnz .L018loop1x 2022bc3d5698SJohn Baldwin vpaddd (%esp),%xmm0,%xmm0 2023bc3d5698SJohn Baldwin vpaddd 16(%esp),%xmm1,%xmm1 2024bc3d5698SJohn Baldwin vpaddd 32(%esp),%xmm2,%xmm2 2025bc3d5698SJohn Baldwin vpaddd 48(%esp),%xmm3,%xmm3 2026bc3d5698SJohn Baldwin cmpl $64,%ecx 2027bc3d5698SJohn Baldwin jb .L020tail 2028bc3d5698SJohn Baldwin vpxor (%esi),%xmm0,%xmm0 2029bc3d5698SJohn Baldwin vpxor 16(%esi),%xmm1,%xmm1 2030bc3d5698SJohn Baldwin vpxor 32(%esi),%xmm2,%xmm2 2031bc3d5698SJohn Baldwin vpxor 48(%esi),%xmm3,%xmm3 2032bc3d5698SJohn Baldwin leal 64(%esi),%esi 2033bc3d5698SJohn Baldwin vmovdqu %xmm0,(%edi) 2034bc3d5698SJohn Baldwin vmovdqu %xmm1,16(%edi) 2035bc3d5698SJohn Baldwin vmovdqu %xmm2,32(%edi) 2036bc3d5698SJohn Baldwin vmovdqu %xmm3,48(%edi) 2037bc3d5698SJohn Baldwin leal 64(%edi),%edi 2038bc3d5698SJohn Baldwin subl $64,%ecx 2039bc3d5698SJohn Baldwin jnz .L019outer1x 2040bc3d5698SJohn Baldwin jmp .L017done 2041bc3d5698SJohn Baldwin.L020tail: 2042bc3d5698SJohn Baldwin vmovdqa %xmm0,(%esp) 2043bc3d5698SJohn Baldwin vmovdqa %xmm1,16(%esp) 2044bc3d5698SJohn Baldwin vmovdqa %xmm2,32(%esp) 2045bc3d5698SJohn Baldwin vmovdqa %xmm3,48(%esp) 2046bc3d5698SJohn Baldwin xorl %eax,%eax 2047bc3d5698SJohn Baldwin xorl %edx,%edx 2048bc3d5698SJohn Baldwin xorl %ebp,%ebp 2049bc3d5698SJohn Baldwin.L021tail_loop: 2050bc3d5698SJohn Baldwin movb (%esp,%ebp,1),%al 2051bc3d5698SJohn Baldwin movb (%esi,%ebp,1),%dl 2052bc3d5698SJohn Baldwin leal 1(%ebp),%ebp 2053bc3d5698SJohn Baldwin xorb %dl,%al 2054bc3d5698SJohn Baldwin movb %al,-1(%edi,%ebp,1) 2055bc3d5698SJohn Baldwin decl %ecx 2056bc3d5698SJohn Baldwin jnz .L021tail_loop 2057bc3d5698SJohn Baldwin.L017done: 2058bc3d5698SJohn Baldwin vzeroupper 2059bc3d5698SJohn Baldwin movl 512(%esp),%esp 2060bc3d5698SJohn Baldwin popl %edi 2061bc3d5698SJohn Baldwin popl %esi 2062bc3d5698SJohn Baldwin popl %ebx 2063bc3d5698SJohn Baldwin popl %ebp 2064bc3d5698SJohn Baldwin ret 2065bc3d5698SJohn Baldwin.size ChaCha20_xop,.-.L_ChaCha20_xop_begin 2066bc3d5698SJohn Baldwin.comm OPENSSL_ia32cap_P,16,4 2067*c0855eaaSJohn Baldwin 2068*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 2069*c0855eaaSJohn Baldwin .p2align 2 2070*c0855eaaSJohn Baldwin .long 1f - 0f 2071*c0855eaaSJohn Baldwin .long 4f - 1f 2072*c0855eaaSJohn Baldwin .long 5 2073*c0855eaaSJohn Baldwin0: 2074*c0855eaaSJohn Baldwin .asciz "GNU" 2075*c0855eaaSJohn Baldwin1: 2076*c0855eaaSJohn Baldwin .p2align 2 2077*c0855eaaSJohn Baldwin .long 0xc0000002 2078*c0855eaaSJohn Baldwin .long 3f - 2f 2079*c0855eaaSJohn Baldwin2: 2080*c0855eaaSJohn Baldwin .long 3 2081*c0855eaaSJohn Baldwin3: 2082*c0855eaaSJohn Baldwin .p2align 2 2083*c0855eaaSJohn Baldwin4: 2084bc3d5698SJohn Baldwin#endif 2085