1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesni-x86_64.pl. */ 2bc3d5698SJohn Baldwin.text 3bc3d5698SJohn Baldwin 4bc3d5698SJohn Baldwin.globl aesni_encrypt 5bc3d5698SJohn Baldwin.type aesni_encrypt,@function 6bc3d5698SJohn Baldwin.align 16 7bc3d5698SJohn Baldwinaesni_encrypt: 8bc3d5698SJohn Baldwin.cfi_startproc 9*c0855eaaSJohn Baldwin.byte 243,15,30,250 10bc3d5698SJohn Baldwin movups (%rdi),%xmm2 11bc3d5698SJohn Baldwin movl 240(%rdx),%eax 12bc3d5698SJohn Baldwin movups (%rdx),%xmm0 13bc3d5698SJohn Baldwin movups 16(%rdx),%xmm1 14bc3d5698SJohn Baldwin leaq 32(%rdx),%rdx 15bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 16bc3d5698SJohn Baldwin.Loop_enc1_1: 17bc3d5698SJohn Baldwin.byte 102,15,56,220,209 18bc3d5698SJohn Baldwin decl %eax 19bc3d5698SJohn Baldwin movups (%rdx),%xmm1 20bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 21bc3d5698SJohn Baldwin jnz .Loop_enc1_1 22bc3d5698SJohn Baldwin.byte 102,15,56,221,209 23bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 24bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 25bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 26bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 27bc3d5698SJohn Baldwin .byte 0xf3,0xc3 28bc3d5698SJohn Baldwin.cfi_endproc 29bc3d5698SJohn Baldwin.size aesni_encrypt,.-aesni_encrypt 30bc3d5698SJohn Baldwin 31bc3d5698SJohn Baldwin.globl aesni_decrypt 32bc3d5698SJohn Baldwin.type aesni_decrypt,@function 33bc3d5698SJohn Baldwin.align 16 34bc3d5698SJohn Baldwinaesni_decrypt: 35bc3d5698SJohn Baldwin.cfi_startproc 36*c0855eaaSJohn Baldwin.byte 243,15,30,250 37bc3d5698SJohn Baldwin movups (%rdi),%xmm2 38bc3d5698SJohn Baldwin movl 240(%rdx),%eax 39bc3d5698SJohn Baldwin movups (%rdx),%xmm0 40bc3d5698SJohn Baldwin movups 16(%rdx),%xmm1 41bc3d5698SJohn Baldwin leaq 32(%rdx),%rdx 42bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 43bc3d5698SJohn Baldwin.Loop_dec1_2: 44bc3d5698SJohn Baldwin.byte 102,15,56,222,209 45bc3d5698SJohn Baldwin decl %eax 46bc3d5698SJohn Baldwin movups (%rdx),%xmm1 47bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 48bc3d5698SJohn Baldwin jnz .Loop_dec1_2 49bc3d5698SJohn Baldwin.byte 102,15,56,223,209 50bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 51bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 52bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 53bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 54bc3d5698SJohn Baldwin .byte 0xf3,0xc3 55bc3d5698SJohn Baldwin.cfi_endproc 56bc3d5698SJohn Baldwin.size aesni_decrypt, .-aesni_decrypt 57bc3d5698SJohn Baldwin.type _aesni_encrypt2,@function 58bc3d5698SJohn Baldwin.align 16 59bc3d5698SJohn Baldwin_aesni_encrypt2: 60bc3d5698SJohn Baldwin.cfi_startproc 61bc3d5698SJohn Baldwin movups (%rcx),%xmm0 62bc3d5698SJohn Baldwin shll $4,%eax 63bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 64bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 65bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 66bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 67bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 68bc3d5698SJohn Baldwin negq %rax 69bc3d5698SJohn Baldwin addq $16,%rax 70bc3d5698SJohn Baldwin 71bc3d5698SJohn Baldwin.Lenc_loop2: 72bc3d5698SJohn Baldwin.byte 102,15,56,220,209 73bc3d5698SJohn Baldwin.byte 102,15,56,220,217 74bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 75bc3d5698SJohn Baldwin addq $32,%rax 76bc3d5698SJohn Baldwin.byte 102,15,56,220,208 77bc3d5698SJohn Baldwin.byte 102,15,56,220,216 78bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 79bc3d5698SJohn Baldwin jnz .Lenc_loop2 80bc3d5698SJohn Baldwin 81bc3d5698SJohn Baldwin.byte 102,15,56,220,209 82bc3d5698SJohn Baldwin.byte 102,15,56,220,217 83bc3d5698SJohn Baldwin.byte 102,15,56,221,208 84bc3d5698SJohn Baldwin.byte 102,15,56,221,216 85bc3d5698SJohn Baldwin .byte 0xf3,0xc3 86bc3d5698SJohn Baldwin.cfi_endproc 87bc3d5698SJohn Baldwin.size _aesni_encrypt2,.-_aesni_encrypt2 88bc3d5698SJohn Baldwin.type _aesni_decrypt2,@function 89bc3d5698SJohn Baldwin.align 16 90bc3d5698SJohn Baldwin_aesni_decrypt2: 91bc3d5698SJohn Baldwin.cfi_startproc 92bc3d5698SJohn Baldwin movups (%rcx),%xmm0 93bc3d5698SJohn Baldwin shll $4,%eax 94bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 95bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 96bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 97bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 98bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 99bc3d5698SJohn Baldwin negq %rax 100bc3d5698SJohn Baldwin addq $16,%rax 101bc3d5698SJohn Baldwin 102bc3d5698SJohn Baldwin.Ldec_loop2: 103bc3d5698SJohn Baldwin.byte 102,15,56,222,209 104bc3d5698SJohn Baldwin.byte 102,15,56,222,217 105bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 106bc3d5698SJohn Baldwin addq $32,%rax 107bc3d5698SJohn Baldwin.byte 102,15,56,222,208 108bc3d5698SJohn Baldwin.byte 102,15,56,222,216 109bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 110bc3d5698SJohn Baldwin jnz .Ldec_loop2 111bc3d5698SJohn Baldwin 112bc3d5698SJohn Baldwin.byte 102,15,56,222,209 113bc3d5698SJohn Baldwin.byte 102,15,56,222,217 114bc3d5698SJohn Baldwin.byte 102,15,56,223,208 115bc3d5698SJohn Baldwin.byte 102,15,56,223,216 116bc3d5698SJohn Baldwin .byte 0xf3,0xc3 117bc3d5698SJohn Baldwin.cfi_endproc 118bc3d5698SJohn Baldwin.size _aesni_decrypt2,.-_aesni_decrypt2 119bc3d5698SJohn Baldwin.type _aesni_encrypt3,@function 120bc3d5698SJohn Baldwin.align 16 121bc3d5698SJohn Baldwin_aesni_encrypt3: 122bc3d5698SJohn Baldwin.cfi_startproc 123bc3d5698SJohn Baldwin movups (%rcx),%xmm0 124bc3d5698SJohn Baldwin shll $4,%eax 125bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 126bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 127bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 128bc3d5698SJohn Baldwin xorps %xmm0,%xmm4 129bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 130bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 131bc3d5698SJohn Baldwin negq %rax 132bc3d5698SJohn Baldwin addq $16,%rax 133bc3d5698SJohn Baldwin 134bc3d5698SJohn Baldwin.Lenc_loop3: 135bc3d5698SJohn Baldwin.byte 102,15,56,220,209 136bc3d5698SJohn Baldwin.byte 102,15,56,220,217 137bc3d5698SJohn Baldwin.byte 102,15,56,220,225 138bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 139bc3d5698SJohn Baldwin addq $32,%rax 140bc3d5698SJohn Baldwin.byte 102,15,56,220,208 141bc3d5698SJohn Baldwin.byte 102,15,56,220,216 142bc3d5698SJohn Baldwin.byte 102,15,56,220,224 143bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 144bc3d5698SJohn Baldwin jnz .Lenc_loop3 145bc3d5698SJohn Baldwin 146bc3d5698SJohn Baldwin.byte 102,15,56,220,209 147bc3d5698SJohn Baldwin.byte 102,15,56,220,217 148bc3d5698SJohn Baldwin.byte 102,15,56,220,225 149bc3d5698SJohn Baldwin.byte 102,15,56,221,208 150bc3d5698SJohn Baldwin.byte 102,15,56,221,216 151bc3d5698SJohn Baldwin.byte 102,15,56,221,224 152bc3d5698SJohn Baldwin .byte 0xf3,0xc3 153bc3d5698SJohn Baldwin.cfi_endproc 154bc3d5698SJohn Baldwin.size _aesni_encrypt3,.-_aesni_encrypt3 155bc3d5698SJohn Baldwin.type _aesni_decrypt3,@function 156bc3d5698SJohn Baldwin.align 16 157bc3d5698SJohn Baldwin_aesni_decrypt3: 158bc3d5698SJohn Baldwin.cfi_startproc 159bc3d5698SJohn Baldwin movups (%rcx),%xmm0 160bc3d5698SJohn Baldwin shll $4,%eax 161bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 162bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 163bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 164bc3d5698SJohn Baldwin xorps %xmm0,%xmm4 165bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 166bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 167bc3d5698SJohn Baldwin negq %rax 168bc3d5698SJohn Baldwin addq $16,%rax 169bc3d5698SJohn Baldwin 170bc3d5698SJohn Baldwin.Ldec_loop3: 171bc3d5698SJohn Baldwin.byte 102,15,56,222,209 172bc3d5698SJohn Baldwin.byte 102,15,56,222,217 173bc3d5698SJohn Baldwin.byte 102,15,56,222,225 174bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 175bc3d5698SJohn Baldwin addq $32,%rax 176bc3d5698SJohn Baldwin.byte 102,15,56,222,208 177bc3d5698SJohn Baldwin.byte 102,15,56,222,216 178bc3d5698SJohn Baldwin.byte 102,15,56,222,224 179bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 180bc3d5698SJohn Baldwin jnz .Ldec_loop3 181bc3d5698SJohn Baldwin 182bc3d5698SJohn Baldwin.byte 102,15,56,222,209 183bc3d5698SJohn Baldwin.byte 102,15,56,222,217 184bc3d5698SJohn Baldwin.byte 102,15,56,222,225 185bc3d5698SJohn Baldwin.byte 102,15,56,223,208 186bc3d5698SJohn Baldwin.byte 102,15,56,223,216 187bc3d5698SJohn Baldwin.byte 102,15,56,223,224 188bc3d5698SJohn Baldwin .byte 0xf3,0xc3 189bc3d5698SJohn Baldwin.cfi_endproc 190bc3d5698SJohn Baldwin.size _aesni_decrypt3,.-_aesni_decrypt3 191bc3d5698SJohn Baldwin.type _aesni_encrypt4,@function 192bc3d5698SJohn Baldwin.align 16 193bc3d5698SJohn Baldwin_aesni_encrypt4: 194bc3d5698SJohn Baldwin.cfi_startproc 195bc3d5698SJohn Baldwin movups (%rcx),%xmm0 196bc3d5698SJohn Baldwin shll $4,%eax 197bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 198bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 199bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 200bc3d5698SJohn Baldwin xorps %xmm0,%xmm4 201bc3d5698SJohn Baldwin xorps %xmm0,%xmm5 202bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 203bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 204bc3d5698SJohn Baldwin negq %rax 205bc3d5698SJohn Baldwin.byte 0x0f,0x1f,0x00 206bc3d5698SJohn Baldwin addq $16,%rax 207bc3d5698SJohn Baldwin 208bc3d5698SJohn Baldwin.Lenc_loop4: 209bc3d5698SJohn Baldwin.byte 102,15,56,220,209 210bc3d5698SJohn Baldwin.byte 102,15,56,220,217 211bc3d5698SJohn Baldwin.byte 102,15,56,220,225 212bc3d5698SJohn Baldwin.byte 102,15,56,220,233 213bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 214bc3d5698SJohn Baldwin addq $32,%rax 215bc3d5698SJohn Baldwin.byte 102,15,56,220,208 216bc3d5698SJohn Baldwin.byte 102,15,56,220,216 217bc3d5698SJohn Baldwin.byte 102,15,56,220,224 218bc3d5698SJohn Baldwin.byte 102,15,56,220,232 219bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 220bc3d5698SJohn Baldwin jnz .Lenc_loop4 221bc3d5698SJohn Baldwin 222bc3d5698SJohn Baldwin.byte 102,15,56,220,209 223bc3d5698SJohn Baldwin.byte 102,15,56,220,217 224bc3d5698SJohn Baldwin.byte 102,15,56,220,225 225bc3d5698SJohn Baldwin.byte 102,15,56,220,233 226bc3d5698SJohn Baldwin.byte 102,15,56,221,208 227bc3d5698SJohn Baldwin.byte 102,15,56,221,216 228bc3d5698SJohn Baldwin.byte 102,15,56,221,224 229bc3d5698SJohn Baldwin.byte 102,15,56,221,232 230bc3d5698SJohn Baldwin .byte 0xf3,0xc3 231bc3d5698SJohn Baldwin.cfi_endproc 232bc3d5698SJohn Baldwin.size _aesni_encrypt4,.-_aesni_encrypt4 233bc3d5698SJohn Baldwin.type _aesni_decrypt4,@function 234bc3d5698SJohn Baldwin.align 16 235bc3d5698SJohn Baldwin_aesni_decrypt4: 236bc3d5698SJohn Baldwin.cfi_startproc 237bc3d5698SJohn Baldwin movups (%rcx),%xmm0 238bc3d5698SJohn Baldwin shll $4,%eax 239bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 240bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 241bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 242bc3d5698SJohn Baldwin xorps %xmm0,%xmm4 243bc3d5698SJohn Baldwin xorps %xmm0,%xmm5 244bc3d5698SJohn Baldwin movups 32(%rcx),%xmm0 245bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 246bc3d5698SJohn Baldwin negq %rax 247bc3d5698SJohn Baldwin.byte 0x0f,0x1f,0x00 248bc3d5698SJohn Baldwin addq $16,%rax 249bc3d5698SJohn Baldwin 250bc3d5698SJohn Baldwin.Ldec_loop4: 251bc3d5698SJohn Baldwin.byte 102,15,56,222,209 252bc3d5698SJohn Baldwin.byte 102,15,56,222,217 253bc3d5698SJohn Baldwin.byte 102,15,56,222,225 254bc3d5698SJohn Baldwin.byte 102,15,56,222,233 255bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 256bc3d5698SJohn Baldwin addq $32,%rax 257bc3d5698SJohn Baldwin.byte 102,15,56,222,208 258bc3d5698SJohn Baldwin.byte 102,15,56,222,216 259bc3d5698SJohn Baldwin.byte 102,15,56,222,224 260bc3d5698SJohn Baldwin.byte 102,15,56,222,232 261bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 262bc3d5698SJohn Baldwin jnz .Ldec_loop4 263bc3d5698SJohn Baldwin 264bc3d5698SJohn Baldwin.byte 102,15,56,222,209 265bc3d5698SJohn Baldwin.byte 102,15,56,222,217 266bc3d5698SJohn Baldwin.byte 102,15,56,222,225 267bc3d5698SJohn Baldwin.byte 102,15,56,222,233 268bc3d5698SJohn Baldwin.byte 102,15,56,223,208 269bc3d5698SJohn Baldwin.byte 102,15,56,223,216 270bc3d5698SJohn Baldwin.byte 102,15,56,223,224 271bc3d5698SJohn Baldwin.byte 102,15,56,223,232 272bc3d5698SJohn Baldwin .byte 0xf3,0xc3 273bc3d5698SJohn Baldwin.cfi_endproc 274bc3d5698SJohn Baldwin.size _aesni_decrypt4,.-_aesni_decrypt4 275bc3d5698SJohn Baldwin.type _aesni_encrypt6,@function 276bc3d5698SJohn Baldwin.align 16 277bc3d5698SJohn Baldwin_aesni_encrypt6: 278bc3d5698SJohn Baldwin.cfi_startproc 279bc3d5698SJohn Baldwin movups (%rcx),%xmm0 280bc3d5698SJohn Baldwin shll $4,%eax 281bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 282bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 283bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 284bc3d5698SJohn Baldwin pxor %xmm0,%xmm4 285bc3d5698SJohn Baldwin.byte 102,15,56,220,209 286bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 287bc3d5698SJohn Baldwin negq %rax 288bc3d5698SJohn Baldwin.byte 102,15,56,220,217 289bc3d5698SJohn Baldwin pxor %xmm0,%xmm5 290bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 291bc3d5698SJohn Baldwin.byte 102,15,56,220,225 292bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 293bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm0 294bc3d5698SJohn Baldwin addq $16,%rax 295bc3d5698SJohn Baldwin jmp .Lenc_loop6_enter 296bc3d5698SJohn Baldwin.align 16 297bc3d5698SJohn Baldwin.Lenc_loop6: 298bc3d5698SJohn Baldwin.byte 102,15,56,220,209 299bc3d5698SJohn Baldwin.byte 102,15,56,220,217 300bc3d5698SJohn Baldwin.byte 102,15,56,220,225 301bc3d5698SJohn Baldwin.Lenc_loop6_enter: 302bc3d5698SJohn Baldwin.byte 102,15,56,220,233 303bc3d5698SJohn Baldwin.byte 102,15,56,220,241 304bc3d5698SJohn Baldwin.byte 102,15,56,220,249 305bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 306bc3d5698SJohn Baldwin addq $32,%rax 307bc3d5698SJohn Baldwin.byte 102,15,56,220,208 308bc3d5698SJohn Baldwin.byte 102,15,56,220,216 309bc3d5698SJohn Baldwin.byte 102,15,56,220,224 310bc3d5698SJohn Baldwin.byte 102,15,56,220,232 311bc3d5698SJohn Baldwin.byte 102,15,56,220,240 312bc3d5698SJohn Baldwin.byte 102,15,56,220,248 313bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 314bc3d5698SJohn Baldwin jnz .Lenc_loop6 315bc3d5698SJohn Baldwin 316bc3d5698SJohn Baldwin.byte 102,15,56,220,209 317bc3d5698SJohn Baldwin.byte 102,15,56,220,217 318bc3d5698SJohn Baldwin.byte 102,15,56,220,225 319bc3d5698SJohn Baldwin.byte 102,15,56,220,233 320bc3d5698SJohn Baldwin.byte 102,15,56,220,241 321bc3d5698SJohn Baldwin.byte 102,15,56,220,249 322bc3d5698SJohn Baldwin.byte 102,15,56,221,208 323bc3d5698SJohn Baldwin.byte 102,15,56,221,216 324bc3d5698SJohn Baldwin.byte 102,15,56,221,224 325bc3d5698SJohn Baldwin.byte 102,15,56,221,232 326bc3d5698SJohn Baldwin.byte 102,15,56,221,240 327bc3d5698SJohn Baldwin.byte 102,15,56,221,248 328bc3d5698SJohn Baldwin .byte 0xf3,0xc3 329bc3d5698SJohn Baldwin.cfi_endproc 330bc3d5698SJohn Baldwin.size _aesni_encrypt6,.-_aesni_encrypt6 331bc3d5698SJohn Baldwin.type _aesni_decrypt6,@function 332bc3d5698SJohn Baldwin.align 16 333bc3d5698SJohn Baldwin_aesni_decrypt6: 334bc3d5698SJohn Baldwin.cfi_startproc 335bc3d5698SJohn Baldwin movups (%rcx),%xmm0 336bc3d5698SJohn Baldwin shll $4,%eax 337bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 338bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 339bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 340bc3d5698SJohn Baldwin pxor %xmm0,%xmm4 341bc3d5698SJohn Baldwin.byte 102,15,56,222,209 342bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 343bc3d5698SJohn Baldwin negq %rax 344bc3d5698SJohn Baldwin.byte 102,15,56,222,217 345bc3d5698SJohn Baldwin pxor %xmm0,%xmm5 346bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 347bc3d5698SJohn Baldwin.byte 102,15,56,222,225 348bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 349bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm0 350bc3d5698SJohn Baldwin addq $16,%rax 351bc3d5698SJohn Baldwin jmp .Ldec_loop6_enter 352bc3d5698SJohn Baldwin.align 16 353bc3d5698SJohn Baldwin.Ldec_loop6: 354bc3d5698SJohn Baldwin.byte 102,15,56,222,209 355bc3d5698SJohn Baldwin.byte 102,15,56,222,217 356bc3d5698SJohn Baldwin.byte 102,15,56,222,225 357bc3d5698SJohn Baldwin.Ldec_loop6_enter: 358bc3d5698SJohn Baldwin.byte 102,15,56,222,233 359bc3d5698SJohn Baldwin.byte 102,15,56,222,241 360bc3d5698SJohn Baldwin.byte 102,15,56,222,249 361bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 362bc3d5698SJohn Baldwin addq $32,%rax 363bc3d5698SJohn Baldwin.byte 102,15,56,222,208 364bc3d5698SJohn Baldwin.byte 102,15,56,222,216 365bc3d5698SJohn Baldwin.byte 102,15,56,222,224 366bc3d5698SJohn Baldwin.byte 102,15,56,222,232 367bc3d5698SJohn Baldwin.byte 102,15,56,222,240 368bc3d5698SJohn Baldwin.byte 102,15,56,222,248 369bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 370bc3d5698SJohn Baldwin jnz .Ldec_loop6 371bc3d5698SJohn Baldwin 372bc3d5698SJohn Baldwin.byte 102,15,56,222,209 373bc3d5698SJohn Baldwin.byte 102,15,56,222,217 374bc3d5698SJohn Baldwin.byte 102,15,56,222,225 375bc3d5698SJohn Baldwin.byte 102,15,56,222,233 376bc3d5698SJohn Baldwin.byte 102,15,56,222,241 377bc3d5698SJohn Baldwin.byte 102,15,56,222,249 378bc3d5698SJohn Baldwin.byte 102,15,56,223,208 379bc3d5698SJohn Baldwin.byte 102,15,56,223,216 380bc3d5698SJohn Baldwin.byte 102,15,56,223,224 381bc3d5698SJohn Baldwin.byte 102,15,56,223,232 382bc3d5698SJohn Baldwin.byte 102,15,56,223,240 383bc3d5698SJohn Baldwin.byte 102,15,56,223,248 384bc3d5698SJohn Baldwin .byte 0xf3,0xc3 385bc3d5698SJohn Baldwin.cfi_endproc 386bc3d5698SJohn Baldwin.size _aesni_decrypt6,.-_aesni_decrypt6 387bc3d5698SJohn Baldwin.type _aesni_encrypt8,@function 388bc3d5698SJohn Baldwin.align 16 389bc3d5698SJohn Baldwin_aesni_encrypt8: 390bc3d5698SJohn Baldwin.cfi_startproc 391bc3d5698SJohn Baldwin movups (%rcx),%xmm0 392bc3d5698SJohn Baldwin shll $4,%eax 393bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 394bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 395bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 396bc3d5698SJohn Baldwin pxor %xmm0,%xmm4 397bc3d5698SJohn Baldwin pxor %xmm0,%xmm5 398bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 399bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 400bc3d5698SJohn Baldwin negq %rax 401bc3d5698SJohn Baldwin.byte 102,15,56,220,209 402bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 403bc3d5698SJohn Baldwin pxor %xmm0,%xmm8 404bc3d5698SJohn Baldwin.byte 102,15,56,220,217 405bc3d5698SJohn Baldwin pxor %xmm0,%xmm9 406bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm0 407bc3d5698SJohn Baldwin addq $16,%rax 408bc3d5698SJohn Baldwin jmp .Lenc_loop8_inner 409bc3d5698SJohn Baldwin.align 16 410bc3d5698SJohn Baldwin.Lenc_loop8: 411bc3d5698SJohn Baldwin.byte 102,15,56,220,209 412bc3d5698SJohn Baldwin.byte 102,15,56,220,217 413bc3d5698SJohn Baldwin.Lenc_loop8_inner: 414bc3d5698SJohn Baldwin.byte 102,15,56,220,225 415bc3d5698SJohn Baldwin.byte 102,15,56,220,233 416bc3d5698SJohn Baldwin.byte 102,15,56,220,241 417bc3d5698SJohn Baldwin.byte 102,15,56,220,249 418bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 419bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 420bc3d5698SJohn Baldwin.Lenc_loop8_enter: 421bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 422bc3d5698SJohn Baldwin addq $32,%rax 423bc3d5698SJohn Baldwin.byte 102,15,56,220,208 424bc3d5698SJohn Baldwin.byte 102,15,56,220,216 425bc3d5698SJohn Baldwin.byte 102,15,56,220,224 426bc3d5698SJohn Baldwin.byte 102,15,56,220,232 427bc3d5698SJohn Baldwin.byte 102,15,56,220,240 428bc3d5698SJohn Baldwin.byte 102,15,56,220,248 429bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 430bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 431bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 432bc3d5698SJohn Baldwin jnz .Lenc_loop8 433bc3d5698SJohn Baldwin 434bc3d5698SJohn Baldwin.byte 102,15,56,220,209 435bc3d5698SJohn Baldwin.byte 102,15,56,220,217 436bc3d5698SJohn Baldwin.byte 102,15,56,220,225 437bc3d5698SJohn Baldwin.byte 102,15,56,220,233 438bc3d5698SJohn Baldwin.byte 102,15,56,220,241 439bc3d5698SJohn Baldwin.byte 102,15,56,220,249 440bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 441bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 442bc3d5698SJohn Baldwin.byte 102,15,56,221,208 443bc3d5698SJohn Baldwin.byte 102,15,56,221,216 444bc3d5698SJohn Baldwin.byte 102,15,56,221,224 445bc3d5698SJohn Baldwin.byte 102,15,56,221,232 446bc3d5698SJohn Baldwin.byte 102,15,56,221,240 447bc3d5698SJohn Baldwin.byte 102,15,56,221,248 448bc3d5698SJohn Baldwin.byte 102,68,15,56,221,192 449bc3d5698SJohn Baldwin.byte 102,68,15,56,221,200 450bc3d5698SJohn Baldwin .byte 0xf3,0xc3 451bc3d5698SJohn Baldwin.cfi_endproc 452bc3d5698SJohn Baldwin.size _aesni_encrypt8,.-_aesni_encrypt8 453bc3d5698SJohn Baldwin.type _aesni_decrypt8,@function 454bc3d5698SJohn Baldwin.align 16 455bc3d5698SJohn Baldwin_aesni_decrypt8: 456bc3d5698SJohn Baldwin.cfi_startproc 457bc3d5698SJohn Baldwin movups (%rcx),%xmm0 458bc3d5698SJohn Baldwin shll $4,%eax 459bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 460bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 461bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 462bc3d5698SJohn Baldwin pxor %xmm0,%xmm4 463bc3d5698SJohn Baldwin pxor %xmm0,%xmm5 464bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 465bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 466bc3d5698SJohn Baldwin negq %rax 467bc3d5698SJohn Baldwin.byte 102,15,56,222,209 468bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 469bc3d5698SJohn Baldwin pxor %xmm0,%xmm8 470bc3d5698SJohn Baldwin.byte 102,15,56,222,217 471bc3d5698SJohn Baldwin pxor %xmm0,%xmm9 472bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm0 473bc3d5698SJohn Baldwin addq $16,%rax 474bc3d5698SJohn Baldwin jmp .Ldec_loop8_inner 475bc3d5698SJohn Baldwin.align 16 476bc3d5698SJohn Baldwin.Ldec_loop8: 477bc3d5698SJohn Baldwin.byte 102,15,56,222,209 478bc3d5698SJohn Baldwin.byte 102,15,56,222,217 479bc3d5698SJohn Baldwin.Ldec_loop8_inner: 480bc3d5698SJohn Baldwin.byte 102,15,56,222,225 481bc3d5698SJohn Baldwin.byte 102,15,56,222,233 482bc3d5698SJohn Baldwin.byte 102,15,56,222,241 483bc3d5698SJohn Baldwin.byte 102,15,56,222,249 484bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 485bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 486bc3d5698SJohn Baldwin.Ldec_loop8_enter: 487bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 488bc3d5698SJohn Baldwin addq $32,%rax 489bc3d5698SJohn Baldwin.byte 102,15,56,222,208 490bc3d5698SJohn Baldwin.byte 102,15,56,222,216 491bc3d5698SJohn Baldwin.byte 102,15,56,222,224 492bc3d5698SJohn Baldwin.byte 102,15,56,222,232 493bc3d5698SJohn Baldwin.byte 102,15,56,222,240 494bc3d5698SJohn Baldwin.byte 102,15,56,222,248 495bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 496bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 497bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 498bc3d5698SJohn Baldwin jnz .Ldec_loop8 499bc3d5698SJohn Baldwin 500bc3d5698SJohn Baldwin.byte 102,15,56,222,209 501bc3d5698SJohn Baldwin.byte 102,15,56,222,217 502bc3d5698SJohn Baldwin.byte 102,15,56,222,225 503bc3d5698SJohn Baldwin.byte 102,15,56,222,233 504bc3d5698SJohn Baldwin.byte 102,15,56,222,241 505bc3d5698SJohn Baldwin.byte 102,15,56,222,249 506bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 507bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 508bc3d5698SJohn Baldwin.byte 102,15,56,223,208 509bc3d5698SJohn Baldwin.byte 102,15,56,223,216 510bc3d5698SJohn Baldwin.byte 102,15,56,223,224 511bc3d5698SJohn Baldwin.byte 102,15,56,223,232 512bc3d5698SJohn Baldwin.byte 102,15,56,223,240 513bc3d5698SJohn Baldwin.byte 102,15,56,223,248 514bc3d5698SJohn Baldwin.byte 102,68,15,56,223,192 515bc3d5698SJohn Baldwin.byte 102,68,15,56,223,200 516bc3d5698SJohn Baldwin .byte 0xf3,0xc3 517bc3d5698SJohn Baldwin.cfi_endproc 518bc3d5698SJohn Baldwin.size _aesni_decrypt8,.-_aesni_decrypt8 519bc3d5698SJohn Baldwin.globl aesni_ecb_encrypt 520bc3d5698SJohn Baldwin.type aesni_ecb_encrypt,@function 521bc3d5698SJohn Baldwin.align 16 522bc3d5698SJohn Baldwinaesni_ecb_encrypt: 523bc3d5698SJohn Baldwin.cfi_startproc 524*c0855eaaSJohn Baldwin.byte 243,15,30,250 525bc3d5698SJohn Baldwin andq $-16,%rdx 526bc3d5698SJohn Baldwin jz .Lecb_ret 527bc3d5698SJohn Baldwin 528bc3d5698SJohn Baldwin movl 240(%rcx),%eax 529bc3d5698SJohn Baldwin movups (%rcx),%xmm0 530bc3d5698SJohn Baldwin movq %rcx,%r11 531bc3d5698SJohn Baldwin movl %eax,%r10d 532bc3d5698SJohn Baldwin testl %r8d,%r8d 533bc3d5698SJohn Baldwin jz .Lecb_decrypt 534bc3d5698SJohn Baldwin 535bc3d5698SJohn Baldwin cmpq $0x80,%rdx 536bc3d5698SJohn Baldwin jb .Lecb_enc_tail 537bc3d5698SJohn Baldwin 538bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 539bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 540bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 541bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 542bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 543bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 544bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 545bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm9 546bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 547bc3d5698SJohn Baldwin subq $0x80,%rdx 548bc3d5698SJohn Baldwin jmp .Lecb_enc_loop8_enter 549bc3d5698SJohn Baldwin.align 16 550bc3d5698SJohn Baldwin.Lecb_enc_loop8: 551bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 552bc3d5698SJohn Baldwin movq %r11,%rcx 553bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 554bc3d5698SJohn Baldwin movl %r10d,%eax 555bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 556bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 557bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 558bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 559bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 560bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 561bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 562bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 563bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 564bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 565bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 566bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 567bc3d5698SJohn Baldwin movups %xmm9,112(%rsi) 568bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 569bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm9 570bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 571bc3d5698SJohn Baldwin.Lecb_enc_loop8_enter: 572bc3d5698SJohn Baldwin 573bc3d5698SJohn Baldwin call _aesni_encrypt8 574bc3d5698SJohn Baldwin 575bc3d5698SJohn Baldwin subq $0x80,%rdx 576bc3d5698SJohn Baldwin jnc .Lecb_enc_loop8 577bc3d5698SJohn Baldwin 578bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 579bc3d5698SJohn Baldwin movq %r11,%rcx 580bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 581bc3d5698SJohn Baldwin movl %r10d,%eax 582bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 583bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 584bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 585bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 586bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 587bc3d5698SJohn Baldwin movups %xmm9,112(%rsi) 588bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 589bc3d5698SJohn Baldwin addq $0x80,%rdx 590bc3d5698SJohn Baldwin jz .Lecb_ret 591bc3d5698SJohn Baldwin 592bc3d5698SJohn Baldwin.Lecb_enc_tail: 593bc3d5698SJohn Baldwin movups (%rdi),%xmm2 594bc3d5698SJohn Baldwin cmpq $0x20,%rdx 595bc3d5698SJohn Baldwin jb .Lecb_enc_one 596bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 597bc3d5698SJohn Baldwin je .Lecb_enc_two 598bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 599bc3d5698SJohn Baldwin cmpq $0x40,%rdx 600bc3d5698SJohn Baldwin jb .Lecb_enc_three 601bc3d5698SJohn Baldwin movups 48(%rdi),%xmm5 602bc3d5698SJohn Baldwin je .Lecb_enc_four 603bc3d5698SJohn Baldwin movups 64(%rdi),%xmm6 604bc3d5698SJohn Baldwin cmpq $0x60,%rdx 605bc3d5698SJohn Baldwin jb .Lecb_enc_five 606bc3d5698SJohn Baldwin movups 80(%rdi),%xmm7 607bc3d5698SJohn Baldwin je .Lecb_enc_six 608bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 609bc3d5698SJohn Baldwin xorps %xmm9,%xmm9 610bc3d5698SJohn Baldwin call _aesni_encrypt8 611bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 612bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 613bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 614bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 615bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 616bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 617bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 618bc3d5698SJohn Baldwin jmp .Lecb_ret 619bc3d5698SJohn Baldwin.align 16 620bc3d5698SJohn Baldwin.Lecb_enc_one: 621bc3d5698SJohn Baldwin movups (%rcx),%xmm0 622bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 623bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 624bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 625bc3d5698SJohn Baldwin.Loop_enc1_3: 626bc3d5698SJohn Baldwin.byte 102,15,56,220,209 627bc3d5698SJohn Baldwin decl %eax 628bc3d5698SJohn Baldwin movups (%rcx),%xmm1 629bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 630bc3d5698SJohn Baldwin jnz .Loop_enc1_3 631bc3d5698SJohn Baldwin.byte 102,15,56,221,209 632bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 633bc3d5698SJohn Baldwin jmp .Lecb_ret 634bc3d5698SJohn Baldwin.align 16 635bc3d5698SJohn Baldwin.Lecb_enc_two: 636bc3d5698SJohn Baldwin call _aesni_encrypt2 637bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 638bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 639bc3d5698SJohn Baldwin jmp .Lecb_ret 640bc3d5698SJohn Baldwin.align 16 641bc3d5698SJohn Baldwin.Lecb_enc_three: 642bc3d5698SJohn Baldwin call _aesni_encrypt3 643bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 644bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 645bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 646bc3d5698SJohn Baldwin jmp .Lecb_ret 647bc3d5698SJohn Baldwin.align 16 648bc3d5698SJohn Baldwin.Lecb_enc_four: 649bc3d5698SJohn Baldwin call _aesni_encrypt4 650bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 651bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 652bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 653bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 654bc3d5698SJohn Baldwin jmp .Lecb_ret 655bc3d5698SJohn Baldwin.align 16 656bc3d5698SJohn Baldwin.Lecb_enc_five: 657bc3d5698SJohn Baldwin xorps %xmm7,%xmm7 658bc3d5698SJohn Baldwin call _aesni_encrypt6 659bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 660bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 661bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 662bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 663bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 664bc3d5698SJohn Baldwin jmp .Lecb_ret 665bc3d5698SJohn Baldwin.align 16 666bc3d5698SJohn Baldwin.Lecb_enc_six: 667bc3d5698SJohn Baldwin call _aesni_encrypt6 668bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 669bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 670bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 671bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 672bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 673bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 674bc3d5698SJohn Baldwin jmp .Lecb_ret 675bc3d5698SJohn Baldwin 676bc3d5698SJohn Baldwin.align 16 677bc3d5698SJohn Baldwin.Lecb_decrypt: 678bc3d5698SJohn Baldwin cmpq $0x80,%rdx 679bc3d5698SJohn Baldwin jb .Lecb_dec_tail 680bc3d5698SJohn Baldwin 681bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 682bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 683bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 684bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 685bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 686bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 687bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 688bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm9 689bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 690bc3d5698SJohn Baldwin subq $0x80,%rdx 691bc3d5698SJohn Baldwin jmp .Lecb_dec_loop8_enter 692bc3d5698SJohn Baldwin.align 16 693bc3d5698SJohn Baldwin.Lecb_dec_loop8: 694bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 695bc3d5698SJohn Baldwin movq %r11,%rcx 696bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 697bc3d5698SJohn Baldwin movl %r10d,%eax 698bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 699bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 700bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 701bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 702bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 703bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 704bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 705bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 706bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 707bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 708bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 709bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 710bc3d5698SJohn Baldwin movups %xmm9,112(%rsi) 711bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 712bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm9 713bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 714bc3d5698SJohn Baldwin.Lecb_dec_loop8_enter: 715bc3d5698SJohn Baldwin 716bc3d5698SJohn Baldwin call _aesni_decrypt8 717bc3d5698SJohn Baldwin 718bc3d5698SJohn Baldwin movups (%r11),%xmm0 719bc3d5698SJohn Baldwin subq $0x80,%rdx 720bc3d5698SJohn Baldwin jnc .Lecb_dec_loop8 721bc3d5698SJohn Baldwin 722bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 723bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 724bc3d5698SJohn Baldwin movq %r11,%rcx 725bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 726bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 727bc3d5698SJohn Baldwin movl %r10d,%eax 728bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 729bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 730bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 731bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 732bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 733bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 734bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 735bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 736bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 737bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 738bc3d5698SJohn Baldwin movups %xmm9,112(%rsi) 739bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 740bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 741bc3d5698SJohn Baldwin addq $0x80,%rdx 742bc3d5698SJohn Baldwin jz .Lecb_ret 743bc3d5698SJohn Baldwin 744bc3d5698SJohn Baldwin.Lecb_dec_tail: 745bc3d5698SJohn Baldwin movups (%rdi),%xmm2 746bc3d5698SJohn Baldwin cmpq $0x20,%rdx 747bc3d5698SJohn Baldwin jb .Lecb_dec_one 748bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 749bc3d5698SJohn Baldwin je .Lecb_dec_two 750bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 751bc3d5698SJohn Baldwin cmpq $0x40,%rdx 752bc3d5698SJohn Baldwin jb .Lecb_dec_three 753bc3d5698SJohn Baldwin movups 48(%rdi),%xmm5 754bc3d5698SJohn Baldwin je .Lecb_dec_four 755bc3d5698SJohn Baldwin movups 64(%rdi),%xmm6 756bc3d5698SJohn Baldwin cmpq $0x60,%rdx 757bc3d5698SJohn Baldwin jb .Lecb_dec_five 758bc3d5698SJohn Baldwin movups 80(%rdi),%xmm7 759bc3d5698SJohn Baldwin je .Lecb_dec_six 760bc3d5698SJohn Baldwin movups 96(%rdi),%xmm8 761bc3d5698SJohn Baldwin movups (%rcx),%xmm0 762bc3d5698SJohn Baldwin xorps %xmm9,%xmm9 763bc3d5698SJohn Baldwin call _aesni_decrypt8 764bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 765bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 766bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 767bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 768bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 769bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 770bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 771bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 772bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 773bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 774bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 775bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 776bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 777bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 778bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 779bc3d5698SJohn Baldwin jmp .Lecb_ret 780bc3d5698SJohn Baldwin.align 16 781bc3d5698SJohn Baldwin.Lecb_dec_one: 782bc3d5698SJohn Baldwin movups (%rcx),%xmm0 783bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 784bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 785bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 786bc3d5698SJohn Baldwin.Loop_dec1_4: 787bc3d5698SJohn Baldwin.byte 102,15,56,222,209 788bc3d5698SJohn Baldwin decl %eax 789bc3d5698SJohn Baldwin movups (%rcx),%xmm1 790bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 791bc3d5698SJohn Baldwin jnz .Loop_dec1_4 792bc3d5698SJohn Baldwin.byte 102,15,56,223,209 793bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 794bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 795bc3d5698SJohn Baldwin jmp .Lecb_ret 796bc3d5698SJohn Baldwin.align 16 797bc3d5698SJohn Baldwin.Lecb_dec_two: 798bc3d5698SJohn Baldwin call _aesni_decrypt2 799bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 800bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 801bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 802bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 803bc3d5698SJohn Baldwin jmp .Lecb_ret 804bc3d5698SJohn Baldwin.align 16 805bc3d5698SJohn Baldwin.Lecb_dec_three: 806bc3d5698SJohn Baldwin call _aesni_decrypt3 807bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 808bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 809bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 810bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 811bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 812bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 813bc3d5698SJohn Baldwin jmp .Lecb_ret 814bc3d5698SJohn Baldwin.align 16 815bc3d5698SJohn Baldwin.Lecb_dec_four: 816bc3d5698SJohn Baldwin call _aesni_decrypt4 817bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 818bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 819bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 820bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 821bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 822bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 823bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 824bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 825bc3d5698SJohn Baldwin jmp .Lecb_ret 826bc3d5698SJohn Baldwin.align 16 827bc3d5698SJohn Baldwin.Lecb_dec_five: 828bc3d5698SJohn Baldwin xorps %xmm7,%xmm7 829bc3d5698SJohn Baldwin call _aesni_decrypt6 830bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 831bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 832bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 833bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 834bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 835bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 836bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 837bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 838bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 839bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 840bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 841bc3d5698SJohn Baldwin jmp .Lecb_ret 842bc3d5698SJohn Baldwin.align 16 843bc3d5698SJohn Baldwin.Lecb_dec_six: 844bc3d5698SJohn Baldwin call _aesni_decrypt6 845bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 846bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 847bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 848bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 849bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 850bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 851bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 852bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 853bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 854bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 855bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 856bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 857bc3d5698SJohn Baldwin 858bc3d5698SJohn Baldwin.Lecb_ret: 859bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 860bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 861bc3d5698SJohn Baldwin .byte 0xf3,0xc3 862bc3d5698SJohn Baldwin.cfi_endproc 863bc3d5698SJohn Baldwin.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 864bc3d5698SJohn Baldwin.globl aesni_ccm64_encrypt_blocks 865bc3d5698SJohn Baldwin.type aesni_ccm64_encrypt_blocks,@function 866bc3d5698SJohn Baldwin.align 16 867bc3d5698SJohn Baldwinaesni_ccm64_encrypt_blocks: 868bc3d5698SJohn Baldwin.cfi_startproc 869*c0855eaaSJohn Baldwin.byte 243,15,30,250 870bc3d5698SJohn Baldwin movl 240(%rcx),%eax 871bc3d5698SJohn Baldwin movdqu (%r8),%xmm6 872bc3d5698SJohn Baldwin movdqa .Lincrement64(%rip),%xmm9 873bc3d5698SJohn Baldwin movdqa .Lbswap_mask(%rip),%xmm7 874bc3d5698SJohn Baldwin 875bc3d5698SJohn Baldwin shll $4,%eax 876bc3d5698SJohn Baldwin movl $16,%r10d 877bc3d5698SJohn Baldwin leaq 0(%rcx),%r11 878bc3d5698SJohn Baldwin movdqu (%r9),%xmm3 879bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 880bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 881bc3d5698SJohn Baldwin.byte 102,15,56,0,247 882bc3d5698SJohn Baldwin subq %rax,%r10 883bc3d5698SJohn Baldwin jmp .Lccm64_enc_outer 884bc3d5698SJohn Baldwin.align 16 885bc3d5698SJohn Baldwin.Lccm64_enc_outer: 886bc3d5698SJohn Baldwin movups (%r11),%xmm0 887bc3d5698SJohn Baldwin movq %r10,%rax 888bc3d5698SJohn Baldwin movups (%rdi),%xmm8 889bc3d5698SJohn Baldwin 890bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 891bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 892bc3d5698SJohn Baldwin xorps %xmm8,%xmm0 893bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 894bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 895bc3d5698SJohn Baldwin 896bc3d5698SJohn Baldwin.Lccm64_enc2_loop: 897bc3d5698SJohn Baldwin.byte 102,15,56,220,209 898bc3d5698SJohn Baldwin.byte 102,15,56,220,217 899bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 900bc3d5698SJohn Baldwin addq $32,%rax 901bc3d5698SJohn Baldwin.byte 102,15,56,220,208 902bc3d5698SJohn Baldwin.byte 102,15,56,220,216 903bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 904bc3d5698SJohn Baldwin jnz .Lccm64_enc2_loop 905bc3d5698SJohn Baldwin.byte 102,15,56,220,209 906bc3d5698SJohn Baldwin.byte 102,15,56,220,217 907bc3d5698SJohn Baldwin paddq %xmm9,%xmm6 908bc3d5698SJohn Baldwin decq %rdx 909bc3d5698SJohn Baldwin.byte 102,15,56,221,208 910bc3d5698SJohn Baldwin.byte 102,15,56,221,216 911bc3d5698SJohn Baldwin 912bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 913bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 914bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 915bc3d5698SJohn Baldwin movups %xmm8,(%rsi) 916bc3d5698SJohn Baldwin.byte 102,15,56,0,215 917bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 918bc3d5698SJohn Baldwin jnz .Lccm64_enc_outer 919bc3d5698SJohn Baldwin 920bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 921bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 922bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 923bc3d5698SJohn Baldwin movups %xmm3,(%r9) 924bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 925bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 926bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 927bc3d5698SJohn Baldwin .byte 0xf3,0xc3 928bc3d5698SJohn Baldwin.cfi_endproc 929bc3d5698SJohn Baldwin.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 930bc3d5698SJohn Baldwin.globl aesni_ccm64_decrypt_blocks 931bc3d5698SJohn Baldwin.type aesni_ccm64_decrypt_blocks,@function 932bc3d5698SJohn Baldwin.align 16 933bc3d5698SJohn Baldwinaesni_ccm64_decrypt_blocks: 934bc3d5698SJohn Baldwin.cfi_startproc 935*c0855eaaSJohn Baldwin.byte 243,15,30,250 936bc3d5698SJohn Baldwin movl 240(%rcx),%eax 937bc3d5698SJohn Baldwin movups (%r8),%xmm6 938bc3d5698SJohn Baldwin movdqu (%r9),%xmm3 939bc3d5698SJohn Baldwin movdqa .Lincrement64(%rip),%xmm9 940bc3d5698SJohn Baldwin movdqa .Lbswap_mask(%rip),%xmm7 941bc3d5698SJohn Baldwin 942bc3d5698SJohn Baldwin movaps %xmm6,%xmm2 943bc3d5698SJohn Baldwin movl %eax,%r10d 944bc3d5698SJohn Baldwin movq %rcx,%r11 945bc3d5698SJohn Baldwin.byte 102,15,56,0,247 946bc3d5698SJohn Baldwin movups (%rcx),%xmm0 947bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 948bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 949bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 950bc3d5698SJohn Baldwin.Loop_enc1_5: 951bc3d5698SJohn Baldwin.byte 102,15,56,220,209 952bc3d5698SJohn Baldwin decl %eax 953bc3d5698SJohn Baldwin movups (%rcx),%xmm1 954bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 955bc3d5698SJohn Baldwin jnz .Loop_enc1_5 956bc3d5698SJohn Baldwin.byte 102,15,56,221,209 957bc3d5698SJohn Baldwin shll $4,%r10d 958bc3d5698SJohn Baldwin movl $16,%eax 959bc3d5698SJohn Baldwin movups (%rdi),%xmm8 960bc3d5698SJohn Baldwin paddq %xmm9,%xmm6 961bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 962bc3d5698SJohn Baldwin subq %r10,%rax 963bc3d5698SJohn Baldwin leaq 32(%r11,%r10,1),%rcx 964bc3d5698SJohn Baldwin movq %rax,%r10 965bc3d5698SJohn Baldwin jmp .Lccm64_dec_outer 966bc3d5698SJohn Baldwin.align 16 967bc3d5698SJohn Baldwin.Lccm64_dec_outer: 968bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 969bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 970bc3d5698SJohn Baldwin movups %xmm8,(%rsi) 971bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 972bc3d5698SJohn Baldwin.byte 102,15,56,0,215 973bc3d5698SJohn Baldwin 974bc3d5698SJohn Baldwin subq $1,%rdx 975bc3d5698SJohn Baldwin jz .Lccm64_dec_break 976bc3d5698SJohn Baldwin 977bc3d5698SJohn Baldwin movups (%r11),%xmm0 978bc3d5698SJohn Baldwin movq %r10,%rax 979bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 980bc3d5698SJohn Baldwin xorps %xmm0,%xmm8 981bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 982bc3d5698SJohn Baldwin xorps %xmm8,%xmm3 983bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 984bc3d5698SJohn Baldwin jmp .Lccm64_dec2_loop 985bc3d5698SJohn Baldwin.align 16 986bc3d5698SJohn Baldwin.Lccm64_dec2_loop: 987bc3d5698SJohn Baldwin.byte 102,15,56,220,209 988bc3d5698SJohn Baldwin.byte 102,15,56,220,217 989bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 990bc3d5698SJohn Baldwin addq $32,%rax 991bc3d5698SJohn Baldwin.byte 102,15,56,220,208 992bc3d5698SJohn Baldwin.byte 102,15,56,220,216 993bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 994bc3d5698SJohn Baldwin jnz .Lccm64_dec2_loop 995bc3d5698SJohn Baldwin movups (%rdi),%xmm8 996bc3d5698SJohn Baldwin paddq %xmm9,%xmm6 997bc3d5698SJohn Baldwin.byte 102,15,56,220,209 998bc3d5698SJohn Baldwin.byte 102,15,56,220,217 999bc3d5698SJohn Baldwin.byte 102,15,56,221,208 1000bc3d5698SJohn Baldwin.byte 102,15,56,221,216 1001bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 1002bc3d5698SJohn Baldwin jmp .Lccm64_dec_outer 1003bc3d5698SJohn Baldwin 1004bc3d5698SJohn Baldwin.align 16 1005bc3d5698SJohn Baldwin.Lccm64_dec_break: 1006bc3d5698SJohn Baldwin 1007bc3d5698SJohn Baldwin movl 240(%r11),%eax 1008bc3d5698SJohn Baldwin movups (%r11),%xmm0 1009bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 1010bc3d5698SJohn Baldwin xorps %xmm0,%xmm8 1011bc3d5698SJohn Baldwin leaq 32(%r11),%r11 1012bc3d5698SJohn Baldwin xorps %xmm8,%xmm3 1013bc3d5698SJohn Baldwin.Loop_enc1_6: 1014bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1015bc3d5698SJohn Baldwin decl %eax 1016bc3d5698SJohn Baldwin movups (%r11),%xmm1 1017bc3d5698SJohn Baldwin leaq 16(%r11),%r11 1018bc3d5698SJohn Baldwin jnz .Loop_enc1_6 1019bc3d5698SJohn Baldwin.byte 102,15,56,221,217 1020bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1021bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 1022bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 1023bc3d5698SJohn Baldwin movups %xmm3,(%r9) 1024bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 1025bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 1026bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 1027bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1028bc3d5698SJohn Baldwin.cfi_endproc 1029bc3d5698SJohn Baldwin.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1030bc3d5698SJohn Baldwin.globl aesni_ctr32_encrypt_blocks 1031bc3d5698SJohn Baldwin.type aesni_ctr32_encrypt_blocks,@function 1032bc3d5698SJohn Baldwin.align 16 1033bc3d5698SJohn Baldwinaesni_ctr32_encrypt_blocks: 1034bc3d5698SJohn Baldwin.cfi_startproc 1035*c0855eaaSJohn Baldwin.byte 243,15,30,250 1036bc3d5698SJohn Baldwin cmpq $1,%rdx 1037bc3d5698SJohn Baldwin jne .Lctr32_bulk 1038bc3d5698SJohn Baldwin 1039bc3d5698SJohn Baldwin 1040bc3d5698SJohn Baldwin 1041bc3d5698SJohn Baldwin movups (%r8),%xmm2 1042bc3d5698SJohn Baldwin movups (%rdi),%xmm3 1043bc3d5698SJohn Baldwin movl 240(%rcx),%edx 1044bc3d5698SJohn Baldwin movups (%rcx),%xmm0 1045bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 1046bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1047bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 1048bc3d5698SJohn Baldwin.Loop_enc1_7: 1049bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1050bc3d5698SJohn Baldwin decl %edx 1051bc3d5698SJohn Baldwin movups (%rcx),%xmm1 1052bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1053bc3d5698SJohn Baldwin jnz .Loop_enc1_7 1054bc3d5698SJohn Baldwin.byte 102,15,56,221,209 1055bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 1056bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 1057bc3d5698SJohn Baldwin xorps %xmm3,%xmm2 1058bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 1059bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1060bc3d5698SJohn Baldwin xorps %xmm2,%xmm2 1061bc3d5698SJohn Baldwin jmp .Lctr32_epilogue 1062bc3d5698SJohn Baldwin 1063bc3d5698SJohn Baldwin.align 16 1064bc3d5698SJohn Baldwin.Lctr32_bulk: 1065bc3d5698SJohn Baldwin leaq (%rsp),%r11 1066bc3d5698SJohn Baldwin.cfi_def_cfa_register %r11 1067bc3d5698SJohn Baldwin pushq %rbp 1068bc3d5698SJohn Baldwin.cfi_offset %rbp,-16 1069bc3d5698SJohn Baldwin subq $128,%rsp 1070bc3d5698SJohn Baldwin andq $-16,%rsp 1071bc3d5698SJohn Baldwin 1072bc3d5698SJohn Baldwin 1073bc3d5698SJohn Baldwin 1074bc3d5698SJohn Baldwin 1075bc3d5698SJohn Baldwin movdqu (%r8),%xmm2 1076bc3d5698SJohn Baldwin movdqu (%rcx),%xmm0 1077bc3d5698SJohn Baldwin movl 12(%r8),%r8d 1078bc3d5698SJohn Baldwin pxor %xmm0,%xmm2 1079bc3d5698SJohn Baldwin movl 12(%rcx),%ebp 1080bc3d5698SJohn Baldwin movdqa %xmm2,0(%rsp) 1081bc3d5698SJohn Baldwin bswapl %r8d 1082bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 1083bc3d5698SJohn Baldwin movdqa %xmm2,%xmm4 1084bc3d5698SJohn Baldwin movdqa %xmm2,%xmm5 1085bc3d5698SJohn Baldwin movdqa %xmm2,64(%rsp) 1086bc3d5698SJohn Baldwin movdqa %xmm2,80(%rsp) 1087bc3d5698SJohn Baldwin movdqa %xmm2,96(%rsp) 1088bc3d5698SJohn Baldwin movq %rdx,%r10 1089bc3d5698SJohn Baldwin movdqa %xmm2,112(%rsp) 1090bc3d5698SJohn Baldwin 1091bc3d5698SJohn Baldwin leaq 1(%r8),%rax 1092bc3d5698SJohn Baldwin leaq 2(%r8),%rdx 1093bc3d5698SJohn Baldwin bswapl %eax 1094bc3d5698SJohn Baldwin bswapl %edx 1095bc3d5698SJohn Baldwin xorl %ebp,%eax 1096bc3d5698SJohn Baldwin xorl %ebp,%edx 1097bc3d5698SJohn Baldwin.byte 102,15,58,34,216,3 1098bc3d5698SJohn Baldwin leaq 3(%r8),%rax 1099bc3d5698SJohn Baldwin movdqa %xmm3,16(%rsp) 1100bc3d5698SJohn Baldwin.byte 102,15,58,34,226,3 1101bc3d5698SJohn Baldwin bswapl %eax 1102bc3d5698SJohn Baldwin movq %r10,%rdx 1103bc3d5698SJohn Baldwin leaq 4(%r8),%r10 1104bc3d5698SJohn Baldwin movdqa %xmm4,32(%rsp) 1105bc3d5698SJohn Baldwin xorl %ebp,%eax 1106bc3d5698SJohn Baldwin bswapl %r10d 1107bc3d5698SJohn Baldwin.byte 102,15,58,34,232,3 1108bc3d5698SJohn Baldwin xorl %ebp,%r10d 1109bc3d5698SJohn Baldwin movdqa %xmm5,48(%rsp) 1110bc3d5698SJohn Baldwin leaq 5(%r8),%r9 1111bc3d5698SJohn Baldwin movl %r10d,64+12(%rsp) 1112bc3d5698SJohn Baldwin bswapl %r9d 1113bc3d5698SJohn Baldwin leaq 6(%r8),%r10 1114bc3d5698SJohn Baldwin movl 240(%rcx),%eax 1115bc3d5698SJohn Baldwin xorl %ebp,%r9d 1116bc3d5698SJohn Baldwin bswapl %r10d 1117bc3d5698SJohn Baldwin movl %r9d,80+12(%rsp) 1118bc3d5698SJohn Baldwin xorl %ebp,%r10d 1119bc3d5698SJohn Baldwin leaq 7(%r8),%r9 1120bc3d5698SJohn Baldwin movl %r10d,96+12(%rsp) 1121bc3d5698SJohn Baldwin bswapl %r9d 1122bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+4(%rip),%r10d 1123bc3d5698SJohn Baldwin xorl %ebp,%r9d 1124bc3d5698SJohn Baldwin andl $71303168,%r10d 1125bc3d5698SJohn Baldwin movl %r9d,112+12(%rsp) 1126bc3d5698SJohn Baldwin 1127bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 1128bc3d5698SJohn Baldwin 1129bc3d5698SJohn Baldwin movdqa 64(%rsp),%xmm6 1130bc3d5698SJohn Baldwin movdqa 80(%rsp),%xmm7 1131bc3d5698SJohn Baldwin 1132bc3d5698SJohn Baldwin cmpq $8,%rdx 1133bc3d5698SJohn Baldwin jb .Lctr32_tail 1134bc3d5698SJohn Baldwin 1135bc3d5698SJohn Baldwin subq $6,%rdx 1136bc3d5698SJohn Baldwin cmpl $4194304,%r10d 1137bc3d5698SJohn Baldwin je .Lctr32_6x 1138bc3d5698SJohn Baldwin 1139bc3d5698SJohn Baldwin leaq 128(%rcx),%rcx 1140bc3d5698SJohn Baldwin subq $2,%rdx 1141bc3d5698SJohn Baldwin jmp .Lctr32_loop8 1142bc3d5698SJohn Baldwin 1143bc3d5698SJohn Baldwin.align 16 1144bc3d5698SJohn Baldwin.Lctr32_6x: 1145bc3d5698SJohn Baldwin shll $4,%eax 1146bc3d5698SJohn Baldwin movl $48,%r10d 1147bc3d5698SJohn Baldwin bswapl %ebp 1148bc3d5698SJohn Baldwin leaq 32(%rcx,%rax,1),%rcx 1149bc3d5698SJohn Baldwin subq %rax,%r10 1150bc3d5698SJohn Baldwin jmp .Lctr32_loop6 1151bc3d5698SJohn Baldwin 1152bc3d5698SJohn Baldwin.align 16 1153bc3d5698SJohn Baldwin.Lctr32_loop6: 1154bc3d5698SJohn Baldwin addl $6,%r8d 1155bc3d5698SJohn Baldwin movups -48(%rcx,%r10,1),%xmm0 1156bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1157bc3d5698SJohn Baldwin movl %r8d,%eax 1158bc3d5698SJohn Baldwin xorl %ebp,%eax 1159bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1160bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,12 1161bc3d5698SJohn Baldwin leal 1(%r8),%eax 1162bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1163bc3d5698SJohn Baldwin xorl %ebp,%eax 1164bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,28 1165bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1166bc3d5698SJohn Baldwin leal 2(%r8),%eax 1167bc3d5698SJohn Baldwin xorl %ebp,%eax 1168bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1169bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,44 1170bc3d5698SJohn Baldwin leal 3(%r8),%eax 1171bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1172bc3d5698SJohn Baldwin movups -32(%rcx,%r10,1),%xmm1 1173bc3d5698SJohn Baldwin xorl %ebp,%eax 1174bc3d5698SJohn Baldwin 1175bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1176bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,60 1177bc3d5698SJohn Baldwin leal 4(%r8),%eax 1178bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1179bc3d5698SJohn Baldwin xorl %ebp,%eax 1180bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,76 1181bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1182bc3d5698SJohn Baldwin leal 5(%r8),%eax 1183bc3d5698SJohn Baldwin xorl %ebp,%eax 1184bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1185bc3d5698SJohn Baldwin.byte 0x0f,0x38,0xf1,0x44,0x24,92 1186bc3d5698SJohn Baldwin movq %r10,%rax 1187bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1188bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1189bc3d5698SJohn Baldwin movups -16(%rcx,%r10,1),%xmm0 1190bc3d5698SJohn Baldwin 1191bc3d5698SJohn Baldwin call .Lenc_loop6 1192bc3d5698SJohn Baldwin 1193bc3d5698SJohn Baldwin movdqu (%rdi),%xmm8 1194bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm9 1195bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm10 1196bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm11 1197bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm12 1198bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm13 1199bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 1200bc3d5698SJohn Baldwin movups -64(%rcx,%r10,1),%xmm1 1201bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 1202bc3d5698SJohn Baldwin movaps 0(%rsp),%xmm2 1203bc3d5698SJohn Baldwin pxor %xmm3,%xmm9 1204bc3d5698SJohn Baldwin movaps 16(%rsp),%xmm3 1205bc3d5698SJohn Baldwin pxor %xmm4,%xmm10 1206bc3d5698SJohn Baldwin movaps 32(%rsp),%xmm4 1207bc3d5698SJohn Baldwin pxor %xmm5,%xmm11 1208bc3d5698SJohn Baldwin movaps 48(%rsp),%xmm5 1209bc3d5698SJohn Baldwin pxor %xmm6,%xmm12 1210bc3d5698SJohn Baldwin movaps 64(%rsp),%xmm6 1211bc3d5698SJohn Baldwin pxor %xmm7,%xmm13 1212bc3d5698SJohn Baldwin movaps 80(%rsp),%xmm7 1213bc3d5698SJohn Baldwin movdqu %xmm8,(%rsi) 1214bc3d5698SJohn Baldwin movdqu %xmm9,16(%rsi) 1215bc3d5698SJohn Baldwin movdqu %xmm10,32(%rsi) 1216bc3d5698SJohn Baldwin movdqu %xmm11,48(%rsi) 1217bc3d5698SJohn Baldwin movdqu %xmm12,64(%rsi) 1218bc3d5698SJohn Baldwin movdqu %xmm13,80(%rsi) 1219bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 1220bc3d5698SJohn Baldwin 1221bc3d5698SJohn Baldwin subq $6,%rdx 1222bc3d5698SJohn Baldwin jnc .Lctr32_loop6 1223bc3d5698SJohn Baldwin 1224bc3d5698SJohn Baldwin addq $6,%rdx 1225bc3d5698SJohn Baldwin jz .Lctr32_done 1226bc3d5698SJohn Baldwin 1227bc3d5698SJohn Baldwin leal -48(%r10),%eax 1228bc3d5698SJohn Baldwin leaq -80(%rcx,%r10,1),%rcx 1229bc3d5698SJohn Baldwin negl %eax 1230bc3d5698SJohn Baldwin shrl $4,%eax 1231bc3d5698SJohn Baldwin jmp .Lctr32_tail 1232bc3d5698SJohn Baldwin 1233bc3d5698SJohn Baldwin.align 32 1234bc3d5698SJohn Baldwin.Lctr32_loop8: 1235bc3d5698SJohn Baldwin addl $8,%r8d 1236bc3d5698SJohn Baldwin movdqa 96(%rsp),%xmm8 1237bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1238bc3d5698SJohn Baldwin movl %r8d,%r9d 1239bc3d5698SJohn Baldwin movdqa 112(%rsp),%xmm9 1240bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1241bc3d5698SJohn Baldwin bswapl %r9d 1242bc3d5698SJohn Baldwin movups 32-128(%rcx),%xmm0 1243bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1244bc3d5698SJohn Baldwin xorl %ebp,%r9d 1245bc3d5698SJohn Baldwin nop 1246bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1247bc3d5698SJohn Baldwin movl %r9d,0+12(%rsp) 1248bc3d5698SJohn Baldwin leaq 1(%r8),%r9 1249bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1250bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1251bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1252bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1253bc3d5698SJohn Baldwin movups 48-128(%rcx),%xmm1 1254bc3d5698SJohn Baldwin bswapl %r9d 1255bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1256bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1257bc3d5698SJohn Baldwin xorl %ebp,%r9d 1258bc3d5698SJohn Baldwin.byte 0x66,0x90 1259bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1260bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1261bc3d5698SJohn Baldwin movl %r9d,16+12(%rsp) 1262bc3d5698SJohn Baldwin leaq 2(%r8),%r9 1263bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1264bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1265bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1266bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1267bc3d5698SJohn Baldwin movups 64-128(%rcx),%xmm0 1268bc3d5698SJohn Baldwin bswapl %r9d 1269bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1270bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1271bc3d5698SJohn Baldwin xorl %ebp,%r9d 1272bc3d5698SJohn Baldwin.byte 0x66,0x90 1273bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1274bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1275bc3d5698SJohn Baldwin movl %r9d,32+12(%rsp) 1276bc3d5698SJohn Baldwin leaq 3(%r8),%r9 1277bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1278bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1279bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1280bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1281bc3d5698SJohn Baldwin movups 80-128(%rcx),%xmm1 1282bc3d5698SJohn Baldwin bswapl %r9d 1283bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1284bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1285bc3d5698SJohn Baldwin xorl %ebp,%r9d 1286bc3d5698SJohn Baldwin.byte 0x66,0x90 1287bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1288bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1289bc3d5698SJohn Baldwin movl %r9d,48+12(%rsp) 1290bc3d5698SJohn Baldwin leaq 4(%r8),%r9 1291bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1292bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1293bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1294bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1295bc3d5698SJohn Baldwin movups 96-128(%rcx),%xmm0 1296bc3d5698SJohn Baldwin bswapl %r9d 1297bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1298bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1299bc3d5698SJohn Baldwin xorl %ebp,%r9d 1300bc3d5698SJohn Baldwin.byte 0x66,0x90 1301bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1302bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1303bc3d5698SJohn Baldwin movl %r9d,64+12(%rsp) 1304bc3d5698SJohn Baldwin leaq 5(%r8),%r9 1305bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1306bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1307bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1308bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1309bc3d5698SJohn Baldwin movups 112-128(%rcx),%xmm1 1310bc3d5698SJohn Baldwin bswapl %r9d 1311bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1312bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1313bc3d5698SJohn Baldwin xorl %ebp,%r9d 1314bc3d5698SJohn Baldwin.byte 0x66,0x90 1315bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1316bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1317bc3d5698SJohn Baldwin movl %r9d,80+12(%rsp) 1318bc3d5698SJohn Baldwin leaq 6(%r8),%r9 1319bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1320bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1321bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1322bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1323bc3d5698SJohn Baldwin movups 128-128(%rcx),%xmm0 1324bc3d5698SJohn Baldwin bswapl %r9d 1325bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1326bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1327bc3d5698SJohn Baldwin xorl %ebp,%r9d 1328bc3d5698SJohn Baldwin.byte 0x66,0x90 1329bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1330bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1331bc3d5698SJohn Baldwin movl %r9d,96+12(%rsp) 1332bc3d5698SJohn Baldwin leaq 7(%r8),%r9 1333bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1334bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1335bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1336bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1337bc3d5698SJohn Baldwin movups 144-128(%rcx),%xmm1 1338bc3d5698SJohn Baldwin bswapl %r9d 1339bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1340bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1341bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1342bc3d5698SJohn Baldwin xorl %ebp,%r9d 1343bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm10 1344bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1345bc3d5698SJohn Baldwin movl %r9d,112+12(%rsp) 1346bc3d5698SJohn Baldwin cmpl $11,%eax 1347bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1348bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1349bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1350bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1351bc3d5698SJohn Baldwin movups 160-128(%rcx),%xmm0 1352bc3d5698SJohn Baldwin 1353bc3d5698SJohn Baldwin jb .Lctr32_enc_done 1354bc3d5698SJohn Baldwin 1355bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1356bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1357bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1358bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1359bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1360bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1361bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1362bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1363bc3d5698SJohn Baldwin movups 176-128(%rcx),%xmm1 1364bc3d5698SJohn Baldwin 1365bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1366bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1367bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1368bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1369bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1370bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1371bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1372bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1373bc3d5698SJohn Baldwin movups 192-128(%rcx),%xmm0 1374bc3d5698SJohn Baldwin je .Lctr32_enc_done 1375bc3d5698SJohn Baldwin 1376bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1377bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1378bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1379bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1380bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1381bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1382bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1383bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1384bc3d5698SJohn Baldwin movups 208-128(%rcx),%xmm1 1385bc3d5698SJohn Baldwin 1386bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1387bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1388bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1389bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1390bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1391bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1392bc3d5698SJohn Baldwin.byte 102,68,15,56,220,192 1393bc3d5698SJohn Baldwin.byte 102,68,15,56,220,200 1394bc3d5698SJohn Baldwin movups 224-128(%rcx),%xmm0 1395bc3d5698SJohn Baldwin jmp .Lctr32_enc_done 1396bc3d5698SJohn Baldwin 1397bc3d5698SJohn Baldwin.align 16 1398bc3d5698SJohn Baldwin.Lctr32_enc_done: 1399bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm11 1400bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 1401bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm12 1402bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 1403bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm13 1404bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 1405bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm14 1406bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 1407bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm15 1408bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 1409bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 1410bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1411bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1412bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1413bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1414bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1415bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1416bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1417bc3d5698SJohn Baldwin.byte 102,68,15,56,220,201 1418bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm1 1419bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 1420bc3d5698SJohn Baldwin 1421bc3d5698SJohn Baldwin.byte 102,65,15,56,221,210 1422bc3d5698SJohn Baldwin pxor %xmm0,%xmm1 1423bc3d5698SJohn Baldwin movdqu 112-128(%rdi),%xmm10 1424bc3d5698SJohn Baldwin.byte 102,65,15,56,221,219 1425bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 1426bc3d5698SJohn Baldwin movdqa 0(%rsp),%xmm11 1427bc3d5698SJohn Baldwin.byte 102,65,15,56,221,228 1428bc3d5698SJohn Baldwin.byte 102,65,15,56,221,237 1429bc3d5698SJohn Baldwin movdqa 16(%rsp),%xmm12 1430bc3d5698SJohn Baldwin movdqa 32(%rsp),%xmm13 1431bc3d5698SJohn Baldwin.byte 102,65,15,56,221,246 1432bc3d5698SJohn Baldwin.byte 102,65,15,56,221,255 1433bc3d5698SJohn Baldwin movdqa 48(%rsp),%xmm14 1434bc3d5698SJohn Baldwin movdqa 64(%rsp),%xmm15 1435bc3d5698SJohn Baldwin.byte 102,68,15,56,221,193 1436bc3d5698SJohn Baldwin movdqa 80(%rsp),%xmm0 1437bc3d5698SJohn Baldwin movups 16-128(%rcx),%xmm1 1438bc3d5698SJohn Baldwin.byte 102,69,15,56,221,202 1439bc3d5698SJohn Baldwin 1440bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1441bc3d5698SJohn Baldwin movdqa %xmm11,%xmm2 1442bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 1443bc3d5698SJohn Baldwin movdqa %xmm12,%xmm3 1444bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 1445bc3d5698SJohn Baldwin movdqa %xmm13,%xmm4 1446bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 1447bc3d5698SJohn Baldwin movdqa %xmm14,%xmm5 1448bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 1449bc3d5698SJohn Baldwin movdqa %xmm15,%xmm6 1450bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 1451bc3d5698SJohn Baldwin movdqa %xmm0,%xmm7 1452bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 1453bc3d5698SJohn Baldwin movups %xmm9,112(%rsi) 1454bc3d5698SJohn Baldwin leaq 128(%rsi),%rsi 1455bc3d5698SJohn Baldwin 1456bc3d5698SJohn Baldwin subq $8,%rdx 1457bc3d5698SJohn Baldwin jnc .Lctr32_loop8 1458bc3d5698SJohn Baldwin 1459bc3d5698SJohn Baldwin addq $8,%rdx 1460bc3d5698SJohn Baldwin jz .Lctr32_done 1461bc3d5698SJohn Baldwin leaq -128(%rcx),%rcx 1462bc3d5698SJohn Baldwin 1463bc3d5698SJohn Baldwin.Lctr32_tail: 1464bc3d5698SJohn Baldwin 1465bc3d5698SJohn Baldwin 1466bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1467bc3d5698SJohn Baldwin cmpq $4,%rdx 1468bc3d5698SJohn Baldwin jb .Lctr32_loop3 1469bc3d5698SJohn Baldwin je .Lctr32_loop4 1470bc3d5698SJohn Baldwin 1471bc3d5698SJohn Baldwin 1472bc3d5698SJohn Baldwin shll $4,%eax 1473bc3d5698SJohn Baldwin movdqa 96(%rsp),%xmm8 1474bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 1475bc3d5698SJohn Baldwin 1476bc3d5698SJohn Baldwin movups 16(%rcx),%xmm0 1477bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1478bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1479bc3d5698SJohn Baldwin leaq 32-16(%rcx,%rax,1),%rcx 1480bc3d5698SJohn Baldwin negq %rax 1481bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1482bc3d5698SJohn Baldwin addq $16,%rax 1483bc3d5698SJohn Baldwin movups (%rdi),%xmm10 1484bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1485bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1486bc3d5698SJohn Baldwin movups 16(%rdi),%xmm11 1487bc3d5698SJohn Baldwin movups 32(%rdi),%xmm12 1488bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1489bc3d5698SJohn Baldwin.byte 102,68,15,56,220,193 1490bc3d5698SJohn Baldwin 1491bc3d5698SJohn Baldwin call .Lenc_loop8_enter 1492bc3d5698SJohn Baldwin 1493bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm13 1494bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 1495bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm10 1496bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 1497bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 1498bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 1499bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 1500bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 1501bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 1502bc3d5698SJohn Baldwin pxor %xmm10,%xmm6 1503bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 1504bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 1505bc3d5698SJohn Baldwin cmpq $6,%rdx 1506bc3d5698SJohn Baldwin jb .Lctr32_done 1507bc3d5698SJohn Baldwin 1508bc3d5698SJohn Baldwin movups 80(%rdi),%xmm11 1509bc3d5698SJohn Baldwin xorps %xmm11,%xmm7 1510bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 1511bc3d5698SJohn Baldwin je .Lctr32_done 1512bc3d5698SJohn Baldwin 1513bc3d5698SJohn Baldwin movups 96(%rdi),%xmm12 1514bc3d5698SJohn Baldwin xorps %xmm12,%xmm8 1515bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 1516bc3d5698SJohn Baldwin jmp .Lctr32_done 1517bc3d5698SJohn Baldwin 1518bc3d5698SJohn Baldwin.align 32 1519bc3d5698SJohn Baldwin.Lctr32_loop4: 1520bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1521bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1522bc3d5698SJohn Baldwin decl %eax 1523bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1524bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1525bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1526bc3d5698SJohn Baldwin movups (%rcx),%xmm1 1527bc3d5698SJohn Baldwin jnz .Lctr32_loop4 1528bc3d5698SJohn Baldwin.byte 102,15,56,221,209 1529bc3d5698SJohn Baldwin.byte 102,15,56,221,217 1530bc3d5698SJohn Baldwin movups (%rdi),%xmm10 1531bc3d5698SJohn Baldwin movups 16(%rdi),%xmm11 1532bc3d5698SJohn Baldwin.byte 102,15,56,221,225 1533bc3d5698SJohn Baldwin.byte 102,15,56,221,233 1534bc3d5698SJohn Baldwin movups 32(%rdi),%xmm12 1535bc3d5698SJohn Baldwin movups 48(%rdi),%xmm13 1536bc3d5698SJohn Baldwin 1537bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1538bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1539bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1540bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 1541bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 1542bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 1543bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 1544bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 1545bc3d5698SJohn Baldwin jmp .Lctr32_done 1546bc3d5698SJohn Baldwin 1547bc3d5698SJohn Baldwin.align 32 1548bc3d5698SJohn Baldwin.Lctr32_loop3: 1549bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1550bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1551bc3d5698SJohn Baldwin decl %eax 1552bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1553bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1554bc3d5698SJohn Baldwin movups (%rcx),%xmm1 1555bc3d5698SJohn Baldwin jnz .Lctr32_loop3 1556bc3d5698SJohn Baldwin.byte 102,15,56,221,209 1557bc3d5698SJohn Baldwin.byte 102,15,56,221,217 1558bc3d5698SJohn Baldwin.byte 102,15,56,221,225 1559bc3d5698SJohn Baldwin 1560bc3d5698SJohn Baldwin movups (%rdi),%xmm10 1561bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1562bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1563bc3d5698SJohn Baldwin cmpq $2,%rdx 1564bc3d5698SJohn Baldwin jb .Lctr32_done 1565bc3d5698SJohn Baldwin 1566bc3d5698SJohn Baldwin movups 16(%rdi),%xmm11 1567bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1568bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 1569bc3d5698SJohn Baldwin je .Lctr32_done 1570bc3d5698SJohn Baldwin 1571bc3d5698SJohn Baldwin movups 32(%rdi),%xmm12 1572bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 1573bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 1574bc3d5698SJohn Baldwin 1575bc3d5698SJohn Baldwin.Lctr32_done: 1576bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 1577bc3d5698SJohn Baldwin xorl %ebp,%ebp 1578bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 1579bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 1580bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 1581bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 1582bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 1583bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 1584bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 1585bc3d5698SJohn Baldwin movaps %xmm0,0(%rsp) 1586bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 1587bc3d5698SJohn Baldwin movaps %xmm0,16(%rsp) 1588bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 1589bc3d5698SJohn Baldwin movaps %xmm0,32(%rsp) 1590bc3d5698SJohn Baldwin pxor %xmm10,%xmm10 1591bc3d5698SJohn Baldwin movaps %xmm0,48(%rsp) 1592bc3d5698SJohn Baldwin pxor %xmm11,%xmm11 1593bc3d5698SJohn Baldwin movaps %xmm0,64(%rsp) 1594bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 1595bc3d5698SJohn Baldwin movaps %xmm0,80(%rsp) 1596bc3d5698SJohn Baldwin pxor %xmm13,%xmm13 1597bc3d5698SJohn Baldwin movaps %xmm0,96(%rsp) 1598bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 1599bc3d5698SJohn Baldwin movaps %xmm0,112(%rsp) 1600bc3d5698SJohn Baldwin pxor %xmm15,%xmm15 1601bc3d5698SJohn Baldwin movq -8(%r11),%rbp 1602bc3d5698SJohn Baldwin.cfi_restore %rbp 1603bc3d5698SJohn Baldwin leaq (%r11),%rsp 1604bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 1605bc3d5698SJohn Baldwin.Lctr32_epilogue: 1606bc3d5698SJohn Baldwin .byte 0xf3,0xc3 1607bc3d5698SJohn Baldwin.cfi_endproc 1608bc3d5698SJohn Baldwin.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1609bc3d5698SJohn Baldwin.globl aesni_xts_encrypt 1610bc3d5698SJohn Baldwin.type aesni_xts_encrypt,@function 1611bc3d5698SJohn Baldwin.align 16 1612bc3d5698SJohn Baldwinaesni_xts_encrypt: 1613bc3d5698SJohn Baldwin.cfi_startproc 1614*c0855eaaSJohn Baldwin.byte 243,15,30,250 1615bc3d5698SJohn Baldwin leaq (%rsp),%r11 1616bc3d5698SJohn Baldwin.cfi_def_cfa_register %r11 1617bc3d5698SJohn Baldwin pushq %rbp 1618bc3d5698SJohn Baldwin.cfi_offset %rbp,-16 1619bc3d5698SJohn Baldwin subq $112,%rsp 1620bc3d5698SJohn Baldwin andq $-16,%rsp 1621bc3d5698SJohn Baldwin movups (%r9),%xmm2 1622bc3d5698SJohn Baldwin movl 240(%r8),%eax 1623bc3d5698SJohn Baldwin movl 240(%rcx),%r10d 1624bc3d5698SJohn Baldwin movups (%r8),%xmm0 1625bc3d5698SJohn Baldwin movups 16(%r8),%xmm1 1626bc3d5698SJohn Baldwin leaq 32(%r8),%r8 1627bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 1628bc3d5698SJohn Baldwin.Loop_enc1_8: 1629bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1630bc3d5698SJohn Baldwin decl %eax 1631bc3d5698SJohn Baldwin movups (%r8),%xmm1 1632bc3d5698SJohn Baldwin leaq 16(%r8),%r8 1633bc3d5698SJohn Baldwin jnz .Loop_enc1_8 1634bc3d5698SJohn Baldwin.byte 102,15,56,221,209 1635bc3d5698SJohn Baldwin movups (%rcx),%xmm0 1636bc3d5698SJohn Baldwin movq %rcx,%rbp 1637bc3d5698SJohn Baldwin movl %r10d,%eax 1638bc3d5698SJohn Baldwin shll $4,%r10d 1639bc3d5698SJohn Baldwin movq %rdx,%r9 1640bc3d5698SJohn Baldwin andq $-16,%rdx 1641bc3d5698SJohn Baldwin 1642bc3d5698SJohn Baldwin movups 16(%rcx,%r10,1),%xmm1 1643bc3d5698SJohn Baldwin 1644bc3d5698SJohn Baldwin movdqa .Lxts_magic(%rip),%xmm8 1645bc3d5698SJohn Baldwin movdqa %xmm2,%xmm15 1646bc3d5698SJohn Baldwin pshufd $0x5f,%xmm2,%xmm9 1647bc3d5698SJohn Baldwin pxor %xmm0,%xmm1 1648bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1649bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1650bc3d5698SJohn Baldwin movdqa %xmm15,%xmm10 1651bc3d5698SJohn Baldwin psrad $31,%xmm14 1652bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1653bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1654bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 1655bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1656bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1657bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1658bc3d5698SJohn Baldwin movdqa %xmm15,%xmm11 1659bc3d5698SJohn Baldwin psrad $31,%xmm14 1660bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1661bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1662bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 1663bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1664bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1665bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1666bc3d5698SJohn Baldwin movdqa %xmm15,%xmm12 1667bc3d5698SJohn Baldwin psrad $31,%xmm14 1668bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1669bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1670bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 1671bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1672bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1673bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1674bc3d5698SJohn Baldwin movdqa %xmm15,%xmm13 1675bc3d5698SJohn Baldwin psrad $31,%xmm14 1676bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1677bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1678bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 1679bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1680bc3d5698SJohn Baldwin movdqa %xmm15,%xmm14 1681bc3d5698SJohn Baldwin psrad $31,%xmm9 1682bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1683bc3d5698SJohn Baldwin pand %xmm8,%xmm9 1684bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 1685bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 1686bc3d5698SJohn Baldwin movaps %xmm1,96(%rsp) 1687bc3d5698SJohn Baldwin 1688bc3d5698SJohn Baldwin subq $96,%rdx 1689bc3d5698SJohn Baldwin jc .Lxts_enc_short 1690bc3d5698SJohn Baldwin 1691bc3d5698SJohn Baldwin movl $16+96,%eax 1692bc3d5698SJohn Baldwin leaq 32(%rbp,%r10,1),%rcx 1693bc3d5698SJohn Baldwin subq %r10,%rax 1694bc3d5698SJohn Baldwin movups 16(%rbp),%xmm1 1695bc3d5698SJohn Baldwin movq %rax,%r10 1696bc3d5698SJohn Baldwin leaq .Lxts_magic(%rip),%r8 1697bc3d5698SJohn Baldwin jmp .Lxts_enc_grandloop 1698bc3d5698SJohn Baldwin 1699bc3d5698SJohn Baldwin.align 32 1700bc3d5698SJohn Baldwin.Lxts_enc_grandloop: 1701bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 1702bc3d5698SJohn Baldwin movdqa %xmm0,%xmm8 1703bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 1704bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 1705bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 1706bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 1707bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1708bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 1709bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 1710bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1711bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 1712bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 1713bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1714bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 1715bc3d5698SJohn Baldwin pxor %xmm15,%xmm8 1716bc3d5698SJohn Baldwin movdqa 96(%rsp),%xmm9 1717bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 1718bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1719bc3d5698SJohn Baldwin movups 32(%rbp),%xmm0 1720bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 1721bc3d5698SJohn Baldwin pxor %xmm8,%xmm7 1722bc3d5698SJohn Baldwin 1723bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 1724bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1725bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 1726bc3d5698SJohn Baldwin movdqa %xmm10,0(%rsp) 1727bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1728bc3d5698SJohn Baldwin movups 48(%rbp),%xmm1 1729bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 1730bc3d5698SJohn Baldwin 1731bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1732bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 1733bc3d5698SJohn Baldwin movdqa %xmm11,16(%rsp) 1734bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1735bc3d5698SJohn Baldwin pxor %xmm9,%xmm14 1736bc3d5698SJohn Baldwin movdqa %xmm12,32(%rsp) 1737bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1738bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1739bc3d5698SJohn Baldwin pxor %xmm9,%xmm8 1740bc3d5698SJohn Baldwin movdqa %xmm14,64(%rsp) 1741bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1742bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1743bc3d5698SJohn Baldwin movups 64(%rbp),%xmm0 1744bc3d5698SJohn Baldwin movdqa %xmm8,80(%rsp) 1745bc3d5698SJohn Baldwin pshufd $0x5f,%xmm15,%xmm9 1746bc3d5698SJohn Baldwin jmp .Lxts_enc_loop6 1747bc3d5698SJohn Baldwin.align 32 1748bc3d5698SJohn Baldwin.Lxts_enc_loop6: 1749bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1750bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1751bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1752bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1753bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1754bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1755bc3d5698SJohn Baldwin movups -64(%rcx,%rax,1),%xmm1 1756bc3d5698SJohn Baldwin addq $32,%rax 1757bc3d5698SJohn Baldwin 1758bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1759bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1760bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1761bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1762bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1763bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1764bc3d5698SJohn Baldwin movups -80(%rcx,%rax,1),%xmm0 1765bc3d5698SJohn Baldwin jnz .Lxts_enc_loop6 1766bc3d5698SJohn Baldwin 1767bc3d5698SJohn Baldwin movdqa (%r8),%xmm8 1768bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1769bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1770bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1771bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1772bc3d5698SJohn Baldwin psrad $31,%xmm14 1773bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1774bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1775bc3d5698SJohn Baldwin movups (%rbp),%xmm10 1776bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1777bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1778bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1779bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1780bc3d5698SJohn Baldwin movaps %xmm10,%xmm11 1781bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1782bc3d5698SJohn Baldwin movups -64(%rcx),%xmm1 1783bc3d5698SJohn Baldwin 1784bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1785bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1786bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1787bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 1788bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1789bc3d5698SJohn Baldwin psrad $31,%xmm14 1790bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1791bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1792bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1793bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1794bc3d5698SJohn Baldwin movaps %xmm11,%xmm12 1795bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1796bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1797bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1798bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1799bc3d5698SJohn Baldwin movups -48(%rcx),%xmm0 1800bc3d5698SJohn Baldwin 1801bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1802bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1803bc3d5698SJohn Baldwin pxor %xmm15,%xmm11 1804bc3d5698SJohn Baldwin psrad $31,%xmm14 1805bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1806bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1807bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1808bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1809bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1810bc3d5698SJohn Baldwin movdqa %xmm13,48(%rsp) 1811bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1812bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1813bc3d5698SJohn Baldwin movaps %xmm12,%xmm13 1814bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 1815bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1816bc3d5698SJohn Baldwin movups -32(%rcx),%xmm1 1817bc3d5698SJohn Baldwin 1818bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1819bc3d5698SJohn Baldwin.byte 102,15,56,220,208 1820bc3d5698SJohn Baldwin pxor %xmm15,%xmm12 1821bc3d5698SJohn Baldwin psrad $31,%xmm14 1822bc3d5698SJohn Baldwin.byte 102,15,56,220,216 1823bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1824bc3d5698SJohn Baldwin pand %xmm8,%xmm14 1825bc3d5698SJohn Baldwin.byte 102,15,56,220,224 1826bc3d5698SJohn Baldwin.byte 102,15,56,220,232 1827bc3d5698SJohn Baldwin.byte 102,15,56,220,240 1828bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 1829bc3d5698SJohn Baldwin movaps %xmm13,%xmm14 1830bc3d5698SJohn Baldwin.byte 102,15,56,220,248 1831bc3d5698SJohn Baldwin 1832bc3d5698SJohn Baldwin movdqa %xmm9,%xmm0 1833bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 1834bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1835bc3d5698SJohn Baldwin pxor %xmm15,%xmm13 1836bc3d5698SJohn Baldwin psrad $31,%xmm0 1837bc3d5698SJohn Baldwin.byte 102,15,56,220,217 1838bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1839bc3d5698SJohn Baldwin pand %xmm8,%xmm0 1840bc3d5698SJohn Baldwin.byte 102,15,56,220,225 1841bc3d5698SJohn Baldwin.byte 102,15,56,220,233 1842bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 1843bc3d5698SJohn Baldwin movups (%rbp),%xmm0 1844bc3d5698SJohn Baldwin.byte 102,15,56,220,241 1845bc3d5698SJohn Baldwin.byte 102,15,56,220,249 1846bc3d5698SJohn Baldwin movups 16(%rbp),%xmm1 1847bc3d5698SJohn Baldwin 1848bc3d5698SJohn Baldwin pxor %xmm15,%xmm14 1849bc3d5698SJohn Baldwin.byte 102,15,56,221,84,36,0 1850bc3d5698SJohn Baldwin psrad $31,%xmm9 1851bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 1852bc3d5698SJohn Baldwin.byte 102,15,56,221,92,36,16 1853bc3d5698SJohn Baldwin.byte 102,15,56,221,100,36,32 1854bc3d5698SJohn Baldwin pand %xmm8,%xmm9 1855bc3d5698SJohn Baldwin movq %r10,%rax 1856bc3d5698SJohn Baldwin.byte 102,15,56,221,108,36,48 1857bc3d5698SJohn Baldwin.byte 102,15,56,221,116,36,64 1858bc3d5698SJohn Baldwin.byte 102,15,56,221,124,36,80 1859bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 1860bc3d5698SJohn Baldwin 1861bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 1862bc3d5698SJohn Baldwin movups %xmm2,-96(%rsi) 1863bc3d5698SJohn Baldwin movups %xmm3,-80(%rsi) 1864bc3d5698SJohn Baldwin movups %xmm4,-64(%rsi) 1865bc3d5698SJohn Baldwin movups %xmm5,-48(%rsi) 1866bc3d5698SJohn Baldwin movups %xmm6,-32(%rsi) 1867bc3d5698SJohn Baldwin movups %xmm7,-16(%rsi) 1868bc3d5698SJohn Baldwin subq $96,%rdx 1869bc3d5698SJohn Baldwin jnc .Lxts_enc_grandloop 1870bc3d5698SJohn Baldwin 1871bc3d5698SJohn Baldwin movl $16+96,%eax 1872bc3d5698SJohn Baldwin subl %r10d,%eax 1873bc3d5698SJohn Baldwin movq %rbp,%rcx 1874bc3d5698SJohn Baldwin shrl $4,%eax 1875bc3d5698SJohn Baldwin 1876bc3d5698SJohn Baldwin.Lxts_enc_short: 1877bc3d5698SJohn Baldwin 1878bc3d5698SJohn Baldwin movl %eax,%r10d 1879bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 1880bc3d5698SJohn Baldwin addq $96,%rdx 1881bc3d5698SJohn Baldwin jz .Lxts_enc_done 1882bc3d5698SJohn Baldwin 1883bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 1884bc3d5698SJohn Baldwin cmpq $0x20,%rdx 1885bc3d5698SJohn Baldwin jb .Lxts_enc_one 1886bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 1887bc3d5698SJohn Baldwin je .Lxts_enc_two 1888bc3d5698SJohn Baldwin 1889bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 1890bc3d5698SJohn Baldwin cmpq $0x40,%rdx 1891bc3d5698SJohn Baldwin jb .Lxts_enc_three 1892bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 1893bc3d5698SJohn Baldwin je .Lxts_enc_four 1894bc3d5698SJohn Baldwin 1895bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 1896bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 1897bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 1898bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 1899bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 1900bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 1901bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 1902bc3d5698SJohn Baldwin leaq 80(%rdi),%rdi 1903bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 1904bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 1905bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 1906bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 1907bc3d5698SJohn Baldwin 1908bc3d5698SJohn Baldwin call _aesni_encrypt6 1909bc3d5698SJohn Baldwin 1910bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1911bc3d5698SJohn Baldwin movdqa %xmm15,%xmm10 1912bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1913bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 1914bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 1915bc3d5698SJohn Baldwin xorps %xmm13,%xmm5 1916bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 1917bc3d5698SJohn Baldwin xorps %xmm14,%xmm6 1918bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 1919bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 1920bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 1921bc3d5698SJohn Baldwin leaq 80(%rsi),%rsi 1922bc3d5698SJohn Baldwin jmp .Lxts_enc_done 1923bc3d5698SJohn Baldwin 1924bc3d5698SJohn Baldwin.align 16 1925bc3d5698SJohn Baldwin.Lxts_enc_one: 1926bc3d5698SJohn Baldwin movups (%rdi),%xmm2 1927bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 1928bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1929bc3d5698SJohn Baldwin movups (%rcx),%xmm0 1930bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 1931bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 1932bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 1933bc3d5698SJohn Baldwin.Loop_enc1_9: 1934bc3d5698SJohn Baldwin.byte 102,15,56,220,209 1935bc3d5698SJohn Baldwin decl %eax 1936bc3d5698SJohn Baldwin movups (%rcx),%xmm1 1937bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 1938bc3d5698SJohn Baldwin jnz .Loop_enc1_9 1939bc3d5698SJohn Baldwin.byte 102,15,56,221,209 1940bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1941bc3d5698SJohn Baldwin movdqa %xmm11,%xmm10 1942bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1943bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 1944bc3d5698SJohn Baldwin jmp .Lxts_enc_done 1945bc3d5698SJohn Baldwin 1946bc3d5698SJohn Baldwin.align 16 1947bc3d5698SJohn Baldwin.Lxts_enc_two: 1948bc3d5698SJohn Baldwin movups (%rdi),%xmm2 1949bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 1950bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 1951bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1952bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1953bc3d5698SJohn Baldwin 1954bc3d5698SJohn Baldwin call _aesni_encrypt2 1955bc3d5698SJohn Baldwin 1956bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1957bc3d5698SJohn Baldwin movdqa %xmm12,%xmm10 1958bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1959bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1960bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 1961bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 1962bc3d5698SJohn Baldwin jmp .Lxts_enc_done 1963bc3d5698SJohn Baldwin 1964bc3d5698SJohn Baldwin.align 16 1965bc3d5698SJohn Baldwin.Lxts_enc_three: 1966bc3d5698SJohn Baldwin movups (%rdi),%xmm2 1967bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 1968bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 1969bc3d5698SJohn Baldwin leaq 48(%rdi),%rdi 1970bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1971bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1972bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 1973bc3d5698SJohn Baldwin 1974bc3d5698SJohn Baldwin call _aesni_encrypt3 1975bc3d5698SJohn Baldwin 1976bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1977bc3d5698SJohn Baldwin movdqa %xmm13,%xmm10 1978bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1979bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 1980bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 1981bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 1982bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 1983bc3d5698SJohn Baldwin leaq 48(%rsi),%rsi 1984bc3d5698SJohn Baldwin jmp .Lxts_enc_done 1985bc3d5698SJohn Baldwin 1986bc3d5698SJohn Baldwin.align 16 1987bc3d5698SJohn Baldwin.Lxts_enc_four: 1988bc3d5698SJohn Baldwin movups (%rdi),%xmm2 1989bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 1990bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 1991bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 1992bc3d5698SJohn Baldwin movups 48(%rdi),%xmm5 1993bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 1994bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 1995bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 1996bc3d5698SJohn Baldwin xorps %xmm13,%xmm5 1997bc3d5698SJohn Baldwin 1998bc3d5698SJohn Baldwin call _aesni_encrypt4 1999bc3d5698SJohn Baldwin 2000bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2001bc3d5698SJohn Baldwin movdqa %xmm14,%xmm10 2002bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2003bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2004bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 2005bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2006bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 2007bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 2008bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 2009bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 2010bc3d5698SJohn Baldwin jmp .Lxts_enc_done 2011bc3d5698SJohn Baldwin 2012bc3d5698SJohn Baldwin.align 16 2013bc3d5698SJohn Baldwin.Lxts_enc_done: 2014bc3d5698SJohn Baldwin andq $15,%r9 2015bc3d5698SJohn Baldwin jz .Lxts_enc_ret 2016bc3d5698SJohn Baldwin movq %r9,%rdx 2017bc3d5698SJohn Baldwin 2018bc3d5698SJohn Baldwin.Lxts_enc_steal: 2019bc3d5698SJohn Baldwin movzbl (%rdi),%eax 2020bc3d5698SJohn Baldwin movzbl -16(%rsi),%ecx 2021bc3d5698SJohn Baldwin leaq 1(%rdi),%rdi 2022bc3d5698SJohn Baldwin movb %al,-16(%rsi) 2023bc3d5698SJohn Baldwin movb %cl,0(%rsi) 2024bc3d5698SJohn Baldwin leaq 1(%rsi),%rsi 2025bc3d5698SJohn Baldwin subq $1,%rdx 2026bc3d5698SJohn Baldwin jnz .Lxts_enc_steal 2027bc3d5698SJohn Baldwin 2028bc3d5698SJohn Baldwin subq %r9,%rsi 2029bc3d5698SJohn Baldwin movq %rbp,%rcx 2030bc3d5698SJohn Baldwin movl %r10d,%eax 2031bc3d5698SJohn Baldwin 2032bc3d5698SJohn Baldwin movups -16(%rsi),%xmm2 2033bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2034bc3d5698SJohn Baldwin movups (%rcx),%xmm0 2035bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 2036bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2037bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 2038bc3d5698SJohn Baldwin.Loop_enc1_10: 2039bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2040bc3d5698SJohn Baldwin decl %eax 2041bc3d5698SJohn Baldwin movups (%rcx),%xmm1 2042bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 2043bc3d5698SJohn Baldwin jnz .Loop_enc1_10 2044bc3d5698SJohn Baldwin.byte 102,15,56,221,209 2045bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2046bc3d5698SJohn Baldwin movups %xmm2,-16(%rsi) 2047bc3d5698SJohn Baldwin 2048bc3d5698SJohn Baldwin.Lxts_enc_ret: 2049bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 2050bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 2051bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 2052bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 2053bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 2054bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2055bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 2056bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 2057bc3d5698SJohn Baldwin movaps %xmm0,0(%rsp) 2058bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 2059bc3d5698SJohn Baldwin movaps %xmm0,16(%rsp) 2060bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 2061bc3d5698SJohn Baldwin movaps %xmm0,32(%rsp) 2062bc3d5698SJohn Baldwin pxor %xmm10,%xmm10 2063bc3d5698SJohn Baldwin movaps %xmm0,48(%rsp) 2064bc3d5698SJohn Baldwin pxor %xmm11,%xmm11 2065bc3d5698SJohn Baldwin movaps %xmm0,64(%rsp) 2066bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 2067bc3d5698SJohn Baldwin movaps %xmm0,80(%rsp) 2068bc3d5698SJohn Baldwin pxor %xmm13,%xmm13 2069bc3d5698SJohn Baldwin movaps %xmm0,96(%rsp) 2070bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 2071bc3d5698SJohn Baldwin pxor %xmm15,%xmm15 2072bc3d5698SJohn Baldwin movq -8(%r11),%rbp 2073bc3d5698SJohn Baldwin.cfi_restore %rbp 2074bc3d5698SJohn Baldwin leaq (%r11),%rsp 2075bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2076bc3d5698SJohn Baldwin.Lxts_enc_epilogue: 2077bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2078bc3d5698SJohn Baldwin.cfi_endproc 2079bc3d5698SJohn Baldwin.size aesni_xts_encrypt,.-aesni_xts_encrypt 2080bc3d5698SJohn Baldwin.globl aesni_xts_decrypt 2081bc3d5698SJohn Baldwin.type aesni_xts_decrypt,@function 2082bc3d5698SJohn Baldwin.align 16 2083bc3d5698SJohn Baldwinaesni_xts_decrypt: 2084bc3d5698SJohn Baldwin.cfi_startproc 2085*c0855eaaSJohn Baldwin.byte 243,15,30,250 2086bc3d5698SJohn Baldwin leaq (%rsp),%r11 2087bc3d5698SJohn Baldwin.cfi_def_cfa_register %r11 2088bc3d5698SJohn Baldwin pushq %rbp 2089bc3d5698SJohn Baldwin.cfi_offset %rbp,-16 2090bc3d5698SJohn Baldwin subq $112,%rsp 2091bc3d5698SJohn Baldwin andq $-16,%rsp 2092bc3d5698SJohn Baldwin movups (%r9),%xmm2 2093bc3d5698SJohn Baldwin movl 240(%r8),%eax 2094bc3d5698SJohn Baldwin movl 240(%rcx),%r10d 2095bc3d5698SJohn Baldwin movups (%r8),%xmm0 2096bc3d5698SJohn Baldwin movups 16(%r8),%xmm1 2097bc3d5698SJohn Baldwin leaq 32(%r8),%r8 2098bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 2099bc3d5698SJohn Baldwin.Loop_enc1_11: 2100bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2101bc3d5698SJohn Baldwin decl %eax 2102bc3d5698SJohn Baldwin movups (%r8),%xmm1 2103bc3d5698SJohn Baldwin leaq 16(%r8),%r8 2104bc3d5698SJohn Baldwin jnz .Loop_enc1_11 2105bc3d5698SJohn Baldwin.byte 102,15,56,221,209 2106bc3d5698SJohn Baldwin xorl %eax,%eax 2107bc3d5698SJohn Baldwin testq $15,%rdx 2108bc3d5698SJohn Baldwin setnz %al 2109bc3d5698SJohn Baldwin shlq $4,%rax 2110bc3d5698SJohn Baldwin subq %rax,%rdx 2111bc3d5698SJohn Baldwin 2112bc3d5698SJohn Baldwin movups (%rcx),%xmm0 2113bc3d5698SJohn Baldwin movq %rcx,%rbp 2114bc3d5698SJohn Baldwin movl %r10d,%eax 2115bc3d5698SJohn Baldwin shll $4,%r10d 2116bc3d5698SJohn Baldwin movq %rdx,%r9 2117bc3d5698SJohn Baldwin andq $-16,%rdx 2118bc3d5698SJohn Baldwin 2119bc3d5698SJohn Baldwin movups 16(%rcx,%r10,1),%xmm1 2120bc3d5698SJohn Baldwin 2121bc3d5698SJohn Baldwin movdqa .Lxts_magic(%rip),%xmm8 2122bc3d5698SJohn Baldwin movdqa %xmm2,%xmm15 2123bc3d5698SJohn Baldwin pshufd $0x5f,%xmm2,%xmm9 2124bc3d5698SJohn Baldwin pxor %xmm0,%xmm1 2125bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2126bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2127bc3d5698SJohn Baldwin movdqa %xmm15,%xmm10 2128bc3d5698SJohn Baldwin psrad $31,%xmm14 2129bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2130bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2131bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 2132bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2133bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2134bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2135bc3d5698SJohn Baldwin movdqa %xmm15,%xmm11 2136bc3d5698SJohn Baldwin psrad $31,%xmm14 2137bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2138bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2139bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 2140bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2141bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2142bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2143bc3d5698SJohn Baldwin movdqa %xmm15,%xmm12 2144bc3d5698SJohn Baldwin psrad $31,%xmm14 2145bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2146bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2147bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 2148bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2149bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2150bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2151bc3d5698SJohn Baldwin movdqa %xmm15,%xmm13 2152bc3d5698SJohn Baldwin psrad $31,%xmm14 2153bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2154bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2155bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 2156bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2157bc3d5698SJohn Baldwin movdqa %xmm15,%xmm14 2158bc3d5698SJohn Baldwin psrad $31,%xmm9 2159bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2160bc3d5698SJohn Baldwin pand %xmm8,%xmm9 2161bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 2162bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 2163bc3d5698SJohn Baldwin movaps %xmm1,96(%rsp) 2164bc3d5698SJohn Baldwin 2165bc3d5698SJohn Baldwin subq $96,%rdx 2166bc3d5698SJohn Baldwin jc .Lxts_dec_short 2167bc3d5698SJohn Baldwin 2168bc3d5698SJohn Baldwin movl $16+96,%eax 2169bc3d5698SJohn Baldwin leaq 32(%rbp,%r10,1),%rcx 2170bc3d5698SJohn Baldwin subq %r10,%rax 2171bc3d5698SJohn Baldwin movups 16(%rbp),%xmm1 2172bc3d5698SJohn Baldwin movq %rax,%r10 2173bc3d5698SJohn Baldwin leaq .Lxts_magic(%rip),%r8 2174bc3d5698SJohn Baldwin jmp .Lxts_dec_grandloop 2175bc3d5698SJohn Baldwin 2176bc3d5698SJohn Baldwin.align 32 2177bc3d5698SJohn Baldwin.Lxts_dec_grandloop: 2178bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 2179bc3d5698SJohn Baldwin movdqa %xmm0,%xmm8 2180bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 2181bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2182bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 2183bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2184bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2185bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 2186bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2187bc3d5698SJohn Baldwin.byte 102,15,56,222,217 2188bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 2189bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2190bc3d5698SJohn Baldwin.byte 102,15,56,222,225 2191bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 2192bc3d5698SJohn Baldwin pxor %xmm15,%xmm8 2193bc3d5698SJohn Baldwin movdqa 96(%rsp),%xmm9 2194bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 2195bc3d5698SJohn Baldwin.byte 102,15,56,222,233 2196bc3d5698SJohn Baldwin movups 32(%rbp),%xmm0 2197bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 2198bc3d5698SJohn Baldwin pxor %xmm8,%xmm7 2199bc3d5698SJohn Baldwin 2200bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 2201bc3d5698SJohn Baldwin.byte 102,15,56,222,241 2202bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 2203bc3d5698SJohn Baldwin movdqa %xmm10,0(%rsp) 2204bc3d5698SJohn Baldwin.byte 102,15,56,222,249 2205bc3d5698SJohn Baldwin movups 48(%rbp),%xmm1 2206bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 2207bc3d5698SJohn Baldwin 2208bc3d5698SJohn Baldwin.byte 102,15,56,222,208 2209bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 2210bc3d5698SJohn Baldwin movdqa %xmm11,16(%rsp) 2211bc3d5698SJohn Baldwin.byte 102,15,56,222,216 2212bc3d5698SJohn Baldwin pxor %xmm9,%xmm14 2213bc3d5698SJohn Baldwin movdqa %xmm12,32(%rsp) 2214bc3d5698SJohn Baldwin.byte 102,15,56,222,224 2215bc3d5698SJohn Baldwin.byte 102,15,56,222,232 2216bc3d5698SJohn Baldwin pxor %xmm9,%xmm8 2217bc3d5698SJohn Baldwin movdqa %xmm14,64(%rsp) 2218bc3d5698SJohn Baldwin.byte 102,15,56,222,240 2219bc3d5698SJohn Baldwin.byte 102,15,56,222,248 2220bc3d5698SJohn Baldwin movups 64(%rbp),%xmm0 2221bc3d5698SJohn Baldwin movdqa %xmm8,80(%rsp) 2222bc3d5698SJohn Baldwin pshufd $0x5f,%xmm15,%xmm9 2223bc3d5698SJohn Baldwin jmp .Lxts_dec_loop6 2224bc3d5698SJohn Baldwin.align 32 2225bc3d5698SJohn Baldwin.Lxts_dec_loop6: 2226bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2227bc3d5698SJohn Baldwin.byte 102,15,56,222,217 2228bc3d5698SJohn Baldwin.byte 102,15,56,222,225 2229bc3d5698SJohn Baldwin.byte 102,15,56,222,233 2230bc3d5698SJohn Baldwin.byte 102,15,56,222,241 2231bc3d5698SJohn Baldwin.byte 102,15,56,222,249 2232bc3d5698SJohn Baldwin movups -64(%rcx,%rax,1),%xmm1 2233bc3d5698SJohn Baldwin addq $32,%rax 2234bc3d5698SJohn Baldwin 2235bc3d5698SJohn Baldwin.byte 102,15,56,222,208 2236bc3d5698SJohn Baldwin.byte 102,15,56,222,216 2237bc3d5698SJohn Baldwin.byte 102,15,56,222,224 2238bc3d5698SJohn Baldwin.byte 102,15,56,222,232 2239bc3d5698SJohn Baldwin.byte 102,15,56,222,240 2240bc3d5698SJohn Baldwin.byte 102,15,56,222,248 2241bc3d5698SJohn Baldwin movups -80(%rcx,%rax,1),%xmm0 2242bc3d5698SJohn Baldwin jnz .Lxts_dec_loop6 2243bc3d5698SJohn Baldwin 2244bc3d5698SJohn Baldwin movdqa (%r8),%xmm8 2245bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2246bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2247bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2248bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2249bc3d5698SJohn Baldwin psrad $31,%xmm14 2250bc3d5698SJohn Baldwin.byte 102,15,56,222,217 2251bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2252bc3d5698SJohn Baldwin movups (%rbp),%xmm10 2253bc3d5698SJohn Baldwin.byte 102,15,56,222,225 2254bc3d5698SJohn Baldwin.byte 102,15,56,222,233 2255bc3d5698SJohn Baldwin.byte 102,15,56,222,241 2256bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2257bc3d5698SJohn Baldwin movaps %xmm10,%xmm11 2258bc3d5698SJohn Baldwin.byte 102,15,56,222,249 2259bc3d5698SJohn Baldwin movups -64(%rcx),%xmm1 2260bc3d5698SJohn Baldwin 2261bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2262bc3d5698SJohn Baldwin.byte 102,15,56,222,208 2263bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2264bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 2265bc3d5698SJohn Baldwin.byte 102,15,56,222,216 2266bc3d5698SJohn Baldwin psrad $31,%xmm14 2267bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2268bc3d5698SJohn Baldwin.byte 102,15,56,222,224 2269bc3d5698SJohn Baldwin.byte 102,15,56,222,232 2270bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2271bc3d5698SJohn Baldwin movaps %xmm11,%xmm12 2272bc3d5698SJohn Baldwin.byte 102,15,56,222,240 2273bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2274bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2275bc3d5698SJohn Baldwin.byte 102,15,56,222,248 2276bc3d5698SJohn Baldwin movups -48(%rcx),%xmm0 2277bc3d5698SJohn Baldwin 2278bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2279bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2280bc3d5698SJohn Baldwin pxor %xmm15,%xmm11 2281bc3d5698SJohn Baldwin psrad $31,%xmm14 2282bc3d5698SJohn Baldwin.byte 102,15,56,222,217 2283bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2284bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2285bc3d5698SJohn Baldwin.byte 102,15,56,222,225 2286bc3d5698SJohn Baldwin.byte 102,15,56,222,233 2287bc3d5698SJohn Baldwin movdqa %xmm13,48(%rsp) 2288bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2289bc3d5698SJohn Baldwin.byte 102,15,56,222,241 2290bc3d5698SJohn Baldwin movaps %xmm12,%xmm13 2291bc3d5698SJohn Baldwin movdqa %xmm9,%xmm14 2292bc3d5698SJohn Baldwin.byte 102,15,56,222,249 2293bc3d5698SJohn Baldwin movups -32(%rcx),%xmm1 2294bc3d5698SJohn Baldwin 2295bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2296bc3d5698SJohn Baldwin.byte 102,15,56,222,208 2297bc3d5698SJohn Baldwin pxor %xmm15,%xmm12 2298bc3d5698SJohn Baldwin psrad $31,%xmm14 2299bc3d5698SJohn Baldwin.byte 102,15,56,222,216 2300bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2301bc3d5698SJohn Baldwin pand %xmm8,%xmm14 2302bc3d5698SJohn Baldwin.byte 102,15,56,222,224 2303bc3d5698SJohn Baldwin.byte 102,15,56,222,232 2304bc3d5698SJohn Baldwin.byte 102,15,56,222,240 2305bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2306bc3d5698SJohn Baldwin movaps %xmm13,%xmm14 2307bc3d5698SJohn Baldwin.byte 102,15,56,222,248 2308bc3d5698SJohn Baldwin 2309bc3d5698SJohn Baldwin movdqa %xmm9,%xmm0 2310bc3d5698SJohn Baldwin paddd %xmm9,%xmm9 2311bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2312bc3d5698SJohn Baldwin pxor %xmm15,%xmm13 2313bc3d5698SJohn Baldwin psrad $31,%xmm0 2314bc3d5698SJohn Baldwin.byte 102,15,56,222,217 2315bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2316bc3d5698SJohn Baldwin pand %xmm8,%xmm0 2317bc3d5698SJohn Baldwin.byte 102,15,56,222,225 2318bc3d5698SJohn Baldwin.byte 102,15,56,222,233 2319bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 2320bc3d5698SJohn Baldwin movups (%rbp),%xmm0 2321bc3d5698SJohn Baldwin.byte 102,15,56,222,241 2322bc3d5698SJohn Baldwin.byte 102,15,56,222,249 2323bc3d5698SJohn Baldwin movups 16(%rbp),%xmm1 2324bc3d5698SJohn Baldwin 2325bc3d5698SJohn Baldwin pxor %xmm15,%xmm14 2326bc3d5698SJohn Baldwin.byte 102,15,56,223,84,36,0 2327bc3d5698SJohn Baldwin psrad $31,%xmm9 2328bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2329bc3d5698SJohn Baldwin.byte 102,15,56,223,92,36,16 2330bc3d5698SJohn Baldwin.byte 102,15,56,223,100,36,32 2331bc3d5698SJohn Baldwin pand %xmm8,%xmm9 2332bc3d5698SJohn Baldwin movq %r10,%rax 2333bc3d5698SJohn Baldwin.byte 102,15,56,223,108,36,48 2334bc3d5698SJohn Baldwin.byte 102,15,56,223,116,36,64 2335bc3d5698SJohn Baldwin.byte 102,15,56,223,124,36,80 2336bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 2337bc3d5698SJohn Baldwin 2338bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 2339bc3d5698SJohn Baldwin movups %xmm2,-96(%rsi) 2340bc3d5698SJohn Baldwin movups %xmm3,-80(%rsi) 2341bc3d5698SJohn Baldwin movups %xmm4,-64(%rsi) 2342bc3d5698SJohn Baldwin movups %xmm5,-48(%rsi) 2343bc3d5698SJohn Baldwin movups %xmm6,-32(%rsi) 2344bc3d5698SJohn Baldwin movups %xmm7,-16(%rsi) 2345bc3d5698SJohn Baldwin subq $96,%rdx 2346bc3d5698SJohn Baldwin jnc .Lxts_dec_grandloop 2347bc3d5698SJohn Baldwin 2348bc3d5698SJohn Baldwin movl $16+96,%eax 2349bc3d5698SJohn Baldwin subl %r10d,%eax 2350bc3d5698SJohn Baldwin movq %rbp,%rcx 2351bc3d5698SJohn Baldwin shrl $4,%eax 2352bc3d5698SJohn Baldwin 2353bc3d5698SJohn Baldwin.Lxts_dec_short: 2354bc3d5698SJohn Baldwin 2355bc3d5698SJohn Baldwin movl %eax,%r10d 2356bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 2357bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 2358bc3d5698SJohn Baldwin addq $96,%rdx 2359bc3d5698SJohn Baldwin jz .Lxts_dec_done 2360bc3d5698SJohn Baldwin 2361bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 2362bc3d5698SJohn Baldwin cmpq $0x20,%rdx 2363bc3d5698SJohn Baldwin jb .Lxts_dec_one 2364bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 2365bc3d5698SJohn Baldwin je .Lxts_dec_two 2366bc3d5698SJohn Baldwin 2367bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 2368bc3d5698SJohn Baldwin cmpq $0x40,%rdx 2369bc3d5698SJohn Baldwin jb .Lxts_dec_three 2370bc3d5698SJohn Baldwin je .Lxts_dec_four 2371bc3d5698SJohn Baldwin 2372bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 2373bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 2374bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 2375bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2376bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 2377bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2378bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 2379bc3d5698SJohn Baldwin leaq 80(%rdi),%rdi 2380bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2381bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2382bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 2383bc3d5698SJohn Baldwin 2384bc3d5698SJohn Baldwin call _aesni_decrypt6 2385bc3d5698SJohn Baldwin 2386bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2387bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2388bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 2389bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 2390bc3d5698SJohn Baldwin xorps %xmm13,%xmm5 2391bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 2392bc3d5698SJohn Baldwin xorps %xmm14,%xmm6 2393bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 2394bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 2395bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 2396bc3d5698SJohn Baldwin pcmpgtd %xmm15,%xmm14 2397bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 2398bc3d5698SJohn Baldwin leaq 80(%rsi),%rsi 2399bc3d5698SJohn Baldwin pshufd $0x13,%xmm14,%xmm11 2400bc3d5698SJohn Baldwin andq $15,%r9 2401bc3d5698SJohn Baldwin jz .Lxts_dec_ret 2402bc3d5698SJohn Baldwin 2403bc3d5698SJohn Baldwin movdqa %xmm15,%xmm10 2404bc3d5698SJohn Baldwin paddq %xmm15,%xmm15 2405bc3d5698SJohn Baldwin pand %xmm8,%xmm11 2406bc3d5698SJohn Baldwin pxor %xmm15,%xmm11 2407bc3d5698SJohn Baldwin jmp .Lxts_dec_done2 2408bc3d5698SJohn Baldwin 2409bc3d5698SJohn Baldwin.align 16 2410bc3d5698SJohn Baldwin.Lxts_dec_one: 2411bc3d5698SJohn Baldwin movups (%rdi),%xmm2 2412bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 2413bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2414bc3d5698SJohn Baldwin movups (%rcx),%xmm0 2415bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 2416bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2417bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 2418bc3d5698SJohn Baldwin.Loop_dec1_12: 2419bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2420bc3d5698SJohn Baldwin decl %eax 2421bc3d5698SJohn Baldwin movups (%rcx),%xmm1 2422bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 2423bc3d5698SJohn Baldwin jnz .Loop_dec1_12 2424bc3d5698SJohn Baldwin.byte 102,15,56,223,209 2425bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2426bc3d5698SJohn Baldwin movdqa %xmm11,%xmm10 2427bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2428bc3d5698SJohn Baldwin movdqa %xmm12,%xmm11 2429bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 2430bc3d5698SJohn Baldwin jmp .Lxts_dec_done 2431bc3d5698SJohn Baldwin 2432bc3d5698SJohn Baldwin.align 16 2433bc3d5698SJohn Baldwin.Lxts_dec_two: 2434bc3d5698SJohn Baldwin movups (%rdi),%xmm2 2435bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 2436bc3d5698SJohn Baldwin leaq 32(%rdi),%rdi 2437bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2438bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2439bc3d5698SJohn Baldwin 2440bc3d5698SJohn Baldwin call _aesni_decrypt2 2441bc3d5698SJohn Baldwin 2442bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2443bc3d5698SJohn Baldwin movdqa %xmm12,%xmm10 2444bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2445bc3d5698SJohn Baldwin movdqa %xmm13,%xmm11 2446bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2447bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2448bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 2449bc3d5698SJohn Baldwin jmp .Lxts_dec_done 2450bc3d5698SJohn Baldwin 2451bc3d5698SJohn Baldwin.align 16 2452bc3d5698SJohn Baldwin.Lxts_dec_three: 2453bc3d5698SJohn Baldwin movups (%rdi),%xmm2 2454bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 2455bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 2456bc3d5698SJohn Baldwin leaq 48(%rdi),%rdi 2457bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2458bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2459bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 2460bc3d5698SJohn Baldwin 2461bc3d5698SJohn Baldwin call _aesni_decrypt3 2462bc3d5698SJohn Baldwin 2463bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2464bc3d5698SJohn Baldwin movdqa %xmm13,%xmm10 2465bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2466bc3d5698SJohn Baldwin movdqa %xmm14,%xmm11 2467bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 2468bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2469bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2470bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 2471bc3d5698SJohn Baldwin leaq 48(%rsi),%rsi 2472bc3d5698SJohn Baldwin jmp .Lxts_dec_done 2473bc3d5698SJohn Baldwin 2474bc3d5698SJohn Baldwin.align 16 2475bc3d5698SJohn Baldwin.Lxts_dec_four: 2476bc3d5698SJohn Baldwin movups (%rdi),%xmm2 2477bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 2478bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 2479bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2480bc3d5698SJohn Baldwin movups 48(%rdi),%xmm5 2481bc3d5698SJohn Baldwin leaq 64(%rdi),%rdi 2482bc3d5698SJohn Baldwin xorps %xmm11,%xmm3 2483bc3d5698SJohn Baldwin xorps %xmm12,%xmm4 2484bc3d5698SJohn Baldwin xorps %xmm13,%xmm5 2485bc3d5698SJohn Baldwin 2486bc3d5698SJohn Baldwin call _aesni_decrypt4 2487bc3d5698SJohn Baldwin 2488bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2489bc3d5698SJohn Baldwin movdqa %xmm14,%xmm10 2490bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2491bc3d5698SJohn Baldwin movdqa %xmm15,%xmm11 2492bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2493bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 2494bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2495bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 2496bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 2497bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 2498bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 2499bc3d5698SJohn Baldwin jmp .Lxts_dec_done 2500bc3d5698SJohn Baldwin 2501bc3d5698SJohn Baldwin.align 16 2502bc3d5698SJohn Baldwin.Lxts_dec_done: 2503bc3d5698SJohn Baldwin andq $15,%r9 2504bc3d5698SJohn Baldwin jz .Lxts_dec_ret 2505bc3d5698SJohn Baldwin.Lxts_dec_done2: 2506bc3d5698SJohn Baldwin movq %r9,%rdx 2507bc3d5698SJohn Baldwin movq %rbp,%rcx 2508bc3d5698SJohn Baldwin movl %r10d,%eax 2509bc3d5698SJohn Baldwin 2510bc3d5698SJohn Baldwin movups (%rdi),%xmm2 2511bc3d5698SJohn Baldwin xorps %xmm11,%xmm2 2512bc3d5698SJohn Baldwin movups (%rcx),%xmm0 2513bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 2514bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2515bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 2516bc3d5698SJohn Baldwin.Loop_dec1_13: 2517bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2518bc3d5698SJohn Baldwin decl %eax 2519bc3d5698SJohn Baldwin movups (%rcx),%xmm1 2520bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 2521bc3d5698SJohn Baldwin jnz .Loop_dec1_13 2522bc3d5698SJohn Baldwin.byte 102,15,56,223,209 2523bc3d5698SJohn Baldwin xorps %xmm11,%xmm2 2524bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2525bc3d5698SJohn Baldwin 2526bc3d5698SJohn Baldwin.Lxts_dec_steal: 2527bc3d5698SJohn Baldwin movzbl 16(%rdi),%eax 2528bc3d5698SJohn Baldwin movzbl (%rsi),%ecx 2529bc3d5698SJohn Baldwin leaq 1(%rdi),%rdi 2530bc3d5698SJohn Baldwin movb %al,(%rsi) 2531bc3d5698SJohn Baldwin movb %cl,16(%rsi) 2532bc3d5698SJohn Baldwin leaq 1(%rsi),%rsi 2533bc3d5698SJohn Baldwin subq $1,%rdx 2534bc3d5698SJohn Baldwin jnz .Lxts_dec_steal 2535bc3d5698SJohn Baldwin 2536bc3d5698SJohn Baldwin subq %r9,%rsi 2537bc3d5698SJohn Baldwin movq %rbp,%rcx 2538bc3d5698SJohn Baldwin movl %r10d,%eax 2539bc3d5698SJohn Baldwin 2540bc3d5698SJohn Baldwin movups (%rsi),%xmm2 2541bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2542bc3d5698SJohn Baldwin movups (%rcx),%xmm0 2543bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 2544bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 2545bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 2546bc3d5698SJohn Baldwin.Loop_dec1_14: 2547bc3d5698SJohn Baldwin.byte 102,15,56,222,209 2548bc3d5698SJohn Baldwin decl %eax 2549bc3d5698SJohn Baldwin movups (%rcx),%xmm1 2550bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 2551bc3d5698SJohn Baldwin jnz .Loop_dec1_14 2552bc3d5698SJohn Baldwin.byte 102,15,56,223,209 2553bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 2554bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2555bc3d5698SJohn Baldwin 2556bc3d5698SJohn Baldwin.Lxts_dec_ret: 2557bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 2558bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 2559bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 2560bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 2561bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 2562bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2563bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 2564bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 2565bc3d5698SJohn Baldwin movaps %xmm0,0(%rsp) 2566bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 2567bc3d5698SJohn Baldwin movaps %xmm0,16(%rsp) 2568bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 2569bc3d5698SJohn Baldwin movaps %xmm0,32(%rsp) 2570bc3d5698SJohn Baldwin pxor %xmm10,%xmm10 2571bc3d5698SJohn Baldwin movaps %xmm0,48(%rsp) 2572bc3d5698SJohn Baldwin pxor %xmm11,%xmm11 2573bc3d5698SJohn Baldwin movaps %xmm0,64(%rsp) 2574bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 2575bc3d5698SJohn Baldwin movaps %xmm0,80(%rsp) 2576bc3d5698SJohn Baldwin pxor %xmm13,%xmm13 2577bc3d5698SJohn Baldwin movaps %xmm0,96(%rsp) 2578bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 2579bc3d5698SJohn Baldwin pxor %xmm15,%xmm15 2580bc3d5698SJohn Baldwin movq -8(%r11),%rbp 2581bc3d5698SJohn Baldwin.cfi_restore %rbp 2582bc3d5698SJohn Baldwin leaq (%r11),%rsp 2583bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2584bc3d5698SJohn Baldwin.Lxts_dec_epilogue: 2585bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2586bc3d5698SJohn Baldwin.cfi_endproc 2587bc3d5698SJohn Baldwin.size aesni_xts_decrypt,.-aesni_xts_decrypt 2588bc3d5698SJohn Baldwin.globl aesni_ocb_encrypt 2589bc3d5698SJohn Baldwin.type aesni_ocb_encrypt,@function 2590bc3d5698SJohn Baldwin.align 32 2591bc3d5698SJohn Baldwinaesni_ocb_encrypt: 2592bc3d5698SJohn Baldwin.cfi_startproc 2593*c0855eaaSJohn Baldwin.byte 243,15,30,250 2594bc3d5698SJohn Baldwin leaq (%rsp),%rax 2595bc3d5698SJohn Baldwin pushq %rbx 2596bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 2597bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 2598bc3d5698SJohn Baldwin pushq %rbp 2599bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 2600bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 2601bc3d5698SJohn Baldwin pushq %r12 2602bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 2603bc3d5698SJohn Baldwin.cfi_offset %r12,-32 2604bc3d5698SJohn Baldwin pushq %r13 2605bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 2606bc3d5698SJohn Baldwin.cfi_offset %r13,-40 2607bc3d5698SJohn Baldwin pushq %r14 2608bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 2609bc3d5698SJohn Baldwin.cfi_offset %r14,-48 2610bc3d5698SJohn Baldwin movq 8(%rax),%rbx 2611bc3d5698SJohn Baldwin movq 8+8(%rax),%rbp 2612bc3d5698SJohn Baldwin 2613bc3d5698SJohn Baldwin movl 240(%rcx),%r10d 2614bc3d5698SJohn Baldwin movq %rcx,%r11 2615bc3d5698SJohn Baldwin shll $4,%r10d 2616bc3d5698SJohn Baldwin movups (%rcx),%xmm9 2617bc3d5698SJohn Baldwin movups 16(%rcx,%r10,1),%xmm1 2618bc3d5698SJohn Baldwin 2619bc3d5698SJohn Baldwin movdqu (%r9),%xmm15 2620bc3d5698SJohn Baldwin pxor %xmm1,%xmm9 2621bc3d5698SJohn Baldwin pxor %xmm1,%xmm15 2622bc3d5698SJohn Baldwin 2623bc3d5698SJohn Baldwin movl $16+32,%eax 2624bc3d5698SJohn Baldwin leaq 32(%r11,%r10,1),%rcx 2625bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 2626bc3d5698SJohn Baldwin subq %r10,%rax 2627bc3d5698SJohn Baldwin movq %rax,%r10 2628bc3d5698SJohn Baldwin 2629bc3d5698SJohn Baldwin movdqu (%rbx),%xmm10 2630bc3d5698SJohn Baldwin movdqu (%rbp),%xmm8 2631bc3d5698SJohn Baldwin 2632bc3d5698SJohn Baldwin testq $1,%r8 2633bc3d5698SJohn Baldwin jnz .Locb_enc_odd 2634bc3d5698SJohn Baldwin 2635bc3d5698SJohn Baldwin bsfq %r8,%r12 2636bc3d5698SJohn Baldwin addq $1,%r8 2637bc3d5698SJohn Baldwin shlq $4,%r12 2638bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm7 2639bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 2640bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 2641bc3d5698SJohn Baldwin 2642bc3d5698SJohn Baldwin call __ocb_encrypt1 2643bc3d5698SJohn Baldwin 2644bc3d5698SJohn Baldwin movdqa %xmm7,%xmm15 2645bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 2646bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 2647bc3d5698SJohn Baldwin subq $1,%rdx 2648bc3d5698SJohn Baldwin jz .Locb_enc_done 2649bc3d5698SJohn Baldwin 2650bc3d5698SJohn Baldwin.Locb_enc_odd: 2651bc3d5698SJohn Baldwin leaq 1(%r8),%r12 2652bc3d5698SJohn Baldwin leaq 3(%r8),%r13 2653bc3d5698SJohn Baldwin leaq 5(%r8),%r14 2654bc3d5698SJohn Baldwin leaq 6(%r8),%r8 2655bc3d5698SJohn Baldwin bsfq %r12,%r12 2656bc3d5698SJohn Baldwin bsfq %r13,%r13 2657bc3d5698SJohn Baldwin bsfq %r14,%r14 2658bc3d5698SJohn Baldwin shlq $4,%r12 2659bc3d5698SJohn Baldwin shlq $4,%r13 2660bc3d5698SJohn Baldwin shlq $4,%r14 2661bc3d5698SJohn Baldwin 2662bc3d5698SJohn Baldwin subq $6,%rdx 2663bc3d5698SJohn Baldwin jc .Locb_enc_short 2664bc3d5698SJohn Baldwin jmp .Locb_enc_grandloop 2665bc3d5698SJohn Baldwin 2666bc3d5698SJohn Baldwin.align 32 2667bc3d5698SJohn Baldwin.Locb_enc_grandloop: 2668bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 2669bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 2670bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 2671bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 2672bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 2673bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 2674bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 2675bc3d5698SJohn Baldwin 2676bc3d5698SJohn Baldwin call __ocb_encrypt6 2677bc3d5698SJohn Baldwin 2678bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2679bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2680bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 2681bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 2682bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 2683bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 2684bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 2685bc3d5698SJohn Baldwin subq $6,%rdx 2686bc3d5698SJohn Baldwin jnc .Locb_enc_grandloop 2687bc3d5698SJohn Baldwin 2688bc3d5698SJohn Baldwin.Locb_enc_short: 2689bc3d5698SJohn Baldwin addq $6,%rdx 2690bc3d5698SJohn Baldwin jz .Locb_enc_done 2691bc3d5698SJohn Baldwin 2692bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 2693bc3d5698SJohn Baldwin cmpq $2,%rdx 2694bc3d5698SJohn Baldwin jb .Locb_enc_one 2695bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 2696bc3d5698SJohn Baldwin je .Locb_enc_two 2697bc3d5698SJohn Baldwin 2698bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 2699bc3d5698SJohn Baldwin cmpq $4,%rdx 2700bc3d5698SJohn Baldwin jb .Locb_enc_three 2701bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 2702bc3d5698SJohn Baldwin je .Locb_enc_four 2703bc3d5698SJohn Baldwin 2704bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 2705bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 2706bc3d5698SJohn Baldwin 2707bc3d5698SJohn Baldwin call __ocb_encrypt6 2708bc3d5698SJohn Baldwin 2709bc3d5698SJohn Baldwin movdqa %xmm14,%xmm15 2710bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2711bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2712bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 2713bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 2714bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 2715bc3d5698SJohn Baldwin 2716bc3d5698SJohn Baldwin jmp .Locb_enc_done 2717bc3d5698SJohn Baldwin 2718bc3d5698SJohn Baldwin.align 16 2719bc3d5698SJohn Baldwin.Locb_enc_one: 2720bc3d5698SJohn Baldwin movdqa %xmm10,%xmm7 2721bc3d5698SJohn Baldwin 2722bc3d5698SJohn Baldwin call __ocb_encrypt1 2723bc3d5698SJohn Baldwin 2724bc3d5698SJohn Baldwin movdqa %xmm7,%xmm15 2725bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2726bc3d5698SJohn Baldwin jmp .Locb_enc_done 2727bc3d5698SJohn Baldwin 2728bc3d5698SJohn Baldwin.align 16 2729bc3d5698SJohn Baldwin.Locb_enc_two: 2730bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 2731bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2732bc3d5698SJohn Baldwin 2733bc3d5698SJohn Baldwin call __ocb_encrypt4 2734bc3d5698SJohn Baldwin 2735bc3d5698SJohn Baldwin movdqa %xmm11,%xmm15 2736bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2737bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2738bc3d5698SJohn Baldwin 2739bc3d5698SJohn Baldwin jmp .Locb_enc_done 2740bc3d5698SJohn Baldwin 2741bc3d5698SJohn Baldwin.align 16 2742bc3d5698SJohn Baldwin.Locb_enc_three: 2743bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2744bc3d5698SJohn Baldwin 2745bc3d5698SJohn Baldwin call __ocb_encrypt4 2746bc3d5698SJohn Baldwin 2747bc3d5698SJohn Baldwin movdqa %xmm12,%xmm15 2748bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2749bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2750bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 2751bc3d5698SJohn Baldwin 2752bc3d5698SJohn Baldwin jmp .Locb_enc_done 2753bc3d5698SJohn Baldwin 2754bc3d5698SJohn Baldwin.align 16 2755bc3d5698SJohn Baldwin.Locb_enc_four: 2756bc3d5698SJohn Baldwin call __ocb_encrypt4 2757bc3d5698SJohn Baldwin 2758bc3d5698SJohn Baldwin movdqa %xmm13,%xmm15 2759bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 2760bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 2761bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 2762bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 2763bc3d5698SJohn Baldwin 2764bc3d5698SJohn Baldwin.Locb_enc_done: 2765bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 2766bc3d5698SJohn Baldwin movdqu %xmm8,(%rbp) 2767bc3d5698SJohn Baldwin movdqu %xmm15,(%r9) 2768bc3d5698SJohn Baldwin 2769bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 2770bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 2771bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 2772bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 2773bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 2774bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 2775bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 2776bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 2777bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 2778bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 2779bc3d5698SJohn Baldwin pxor %xmm10,%xmm10 2780bc3d5698SJohn Baldwin pxor %xmm11,%xmm11 2781bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 2782bc3d5698SJohn Baldwin pxor %xmm13,%xmm13 2783bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 2784bc3d5698SJohn Baldwin pxor %xmm15,%xmm15 2785bc3d5698SJohn Baldwin leaq 40(%rsp),%rax 2786bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 2787bc3d5698SJohn Baldwin movq -40(%rax),%r14 2788bc3d5698SJohn Baldwin.cfi_restore %r14 2789bc3d5698SJohn Baldwin movq -32(%rax),%r13 2790bc3d5698SJohn Baldwin.cfi_restore %r13 2791bc3d5698SJohn Baldwin movq -24(%rax),%r12 2792bc3d5698SJohn Baldwin.cfi_restore %r12 2793bc3d5698SJohn Baldwin movq -16(%rax),%rbp 2794bc3d5698SJohn Baldwin.cfi_restore %rbp 2795bc3d5698SJohn Baldwin movq -8(%rax),%rbx 2796bc3d5698SJohn Baldwin.cfi_restore %rbx 2797bc3d5698SJohn Baldwin leaq (%rax),%rsp 2798bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 2799bc3d5698SJohn Baldwin.Locb_enc_epilogue: 2800bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2801bc3d5698SJohn Baldwin.cfi_endproc 2802bc3d5698SJohn Baldwin.size aesni_ocb_encrypt,.-aesni_ocb_encrypt 2803bc3d5698SJohn Baldwin 2804bc3d5698SJohn Baldwin.type __ocb_encrypt6,@function 2805bc3d5698SJohn Baldwin.align 32 2806bc3d5698SJohn Baldwin__ocb_encrypt6: 2807bc3d5698SJohn Baldwin.cfi_startproc 2808bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 2809bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm11 2810bc3d5698SJohn Baldwin movdqa %xmm10,%xmm12 2811bc3d5698SJohn Baldwin movdqu (%rbx,%r13,1),%xmm13 2812bc3d5698SJohn Baldwin movdqa %xmm10,%xmm14 2813bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 2814bc3d5698SJohn Baldwin movdqu (%rbx,%r14,1),%xmm15 2815bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 2816bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 2817bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2818bc3d5698SJohn Baldwin pxor %xmm11,%xmm12 2819bc3d5698SJohn Baldwin pxor %xmm3,%xmm8 2820bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2821bc3d5698SJohn Baldwin pxor %xmm12,%xmm13 2822bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 2823bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2824bc3d5698SJohn Baldwin pxor %xmm13,%xmm14 2825bc3d5698SJohn Baldwin pxor %xmm5,%xmm8 2826bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2827bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 2828bc3d5698SJohn Baldwin pxor %xmm6,%xmm8 2829bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 2830bc3d5698SJohn Baldwin pxor %xmm7,%xmm8 2831bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 2832bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 2833bc3d5698SJohn Baldwin 2834bc3d5698SJohn Baldwin leaq 1(%r8),%r12 2835bc3d5698SJohn Baldwin leaq 3(%r8),%r13 2836bc3d5698SJohn Baldwin leaq 5(%r8),%r14 2837bc3d5698SJohn Baldwin addq $6,%r8 2838bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 2839bc3d5698SJohn Baldwin bsfq %r12,%r12 2840bc3d5698SJohn Baldwin bsfq %r13,%r13 2841bc3d5698SJohn Baldwin bsfq %r14,%r14 2842bc3d5698SJohn Baldwin 2843bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2844bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2845bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2846bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2847bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 2848bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 2849bc3d5698SJohn Baldwin.byte 102,15,56,220,241 2850bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 2851bc3d5698SJohn Baldwin pxor %xmm9,%xmm14 2852bc3d5698SJohn Baldwin.byte 102,15,56,220,249 2853bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 2854bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 2855bc3d5698SJohn Baldwin 2856bc3d5698SJohn Baldwin.byte 102,15,56,220,208 2857bc3d5698SJohn Baldwin.byte 102,15,56,220,216 2858bc3d5698SJohn Baldwin.byte 102,15,56,220,224 2859bc3d5698SJohn Baldwin.byte 102,15,56,220,232 2860bc3d5698SJohn Baldwin.byte 102,15,56,220,240 2861bc3d5698SJohn Baldwin.byte 102,15,56,220,248 2862bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 2863bc3d5698SJohn Baldwin shlq $4,%r12 2864bc3d5698SJohn Baldwin shlq $4,%r13 2865bc3d5698SJohn Baldwin jmp .Locb_enc_loop6 2866bc3d5698SJohn Baldwin 2867bc3d5698SJohn Baldwin.align 32 2868bc3d5698SJohn Baldwin.Locb_enc_loop6: 2869bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2870bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2871bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2872bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2873bc3d5698SJohn Baldwin.byte 102,15,56,220,241 2874bc3d5698SJohn Baldwin.byte 102,15,56,220,249 2875bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 2876bc3d5698SJohn Baldwin addq $32,%rax 2877bc3d5698SJohn Baldwin 2878bc3d5698SJohn Baldwin.byte 102,15,56,220,208 2879bc3d5698SJohn Baldwin.byte 102,15,56,220,216 2880bc3d5698SJohn Baldwin.byte 102,15,56,220,224 2881bc3d5698SJohn Baldwin.byte 102,15,56,220,232 2882bc3d5698SJohn Baldwin.byte 102,15,56,220,240 2883bc3d5698SJohn Baldwin.byte 102,15,56,220,248 2884bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 2885bc3d5698SJohn Baldwin jnz .Locb_enc_loop6 2886bc3d5698SJohn Baldwin 2887bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2888bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2889bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2890bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2891bc3d5698SJohn Baldwin.byte 102,15,56,220,241 2892bc3d5698SJohn Baldwin.byte 102,15,56,220,249 2893bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 2894bc3d5698SJohn Baldwin shlq $4,%r14 2895bc3d5698SJohn Baldwin 2896bc3d5698SJohn Baldwin.byte 102,65,15,56,221,210 2897bc3d5698SJohn Baldwin movdqu (%rbx),%xmm10 2898bc3d5698SJohn Baldwin movq %r10,%rax 2899bc3d5698SJohn Baldwin.byte 102,65,15,56,221,219 2900bc3d5698SJohn Baldwin.byte 102,65,15,56,221,228 2901bc3d5698SJohn Baldwin.byte 102,65,15,56,221,237 2902bc3d5698SJohn Baldwin.byte 102,65,15,56,221,246 2903bc3d5698SJohn Baldwin.byte 102,65,15,56,221,255 2904bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2905bc3d5698SJohn Baldwin.cfi_endproc 2906bc3d5698SJohn Baldwin.size __ocb_encrypt6,.-__ocb_encrypt6 2907bc3d5698SJohn Baldwin 2908bc3d5698SJohn Baldwin.type __ocb_encrypt4,@function 2909bc3d5698SJohn Baldwin.align 32 2910bc3d5698SJohn Baldwin__ocb_encrypt4: 2911bc3d5698SJohn Baldwin.cfi_startproc 2912bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 2913bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm11 2914bc3d5698SJohn Baldwin movdqa %xmm10,%xmm12 2915bc3d5698SJohn Baldwin movdqu (%rbx,%r13,1),%xmm13 2916bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 2917bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 2918bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 2919bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 2920bc3d5698SJohn Baldwin pxor %xmm11,%xmm12 2921bc3d5698SJohn Baldwin pxor %xmm3,%xmm8 2922bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 2923bc3d5698SJohn Baldwin pxor %xmm12,%xmm13 2924bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 2925bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 2926bc3d5698SJohn Baldwin pxor %xmm5,%xmm8 2927bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 2928bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 2929bc3d5698SJohn Baldwin 2930bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 2931bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 2932bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 2933bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 2934bc3d5698SJohn Baldwin 2935bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2936bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2937bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2938bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2939bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 2940bc3d5698SJohn Baldwin 2941bc3d5698SJohn Baldwin.byte 102,15,56,220,208 2942bc3d5698SJohn Baldwin.byte 102,15,56,220,216 2943bc3d5698SJohn Baldwin.byte 102,15,56,220,224 2944bc3d5698SJohn Baldwin.byte 102,15,56,220,232 2945bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 2946bc3d5698SJohn Baldwin jmp .Locb_enc_loop4 2947bc3d5698SJohn Baldwin 2948bc3d5698SJohn Baldwin.align 32 2949bc3d5698SJohn Baldwin.Locb_enc_loop4: 2950bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2951bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2952bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2953bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2954bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 2955bc3d5698SJohn Baldwin addq $32,%rax 2956bc3d5698SJohn Baldwin 2957bc3d5698SJohn Baldwin.byte 102,15,56,220,208 2958bc3d5698SJohn Baldwin.byte 102,15,56,220,216 2959bc3d5698SJohn Baldwin.byte 102,15,56,220,224 2960bc3d5698SJohn Baldwin.byte 102,15,56,220,232 2961bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 2962bc3d5698SJohn Baldwin jnz .Locb_enc_loop4 2963bc3d5698SJohn Baldwin 2964bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2965bc3d5698SJohn Baldwin.byte 102,15,56,220,217 2966bc3d5698SJohn Baldwin.byte 102,15,56,220,225 2967bc3d5698SJohn Baldwin.byte 102,15,56,220,233 2968bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 2969bc3d5698SJohn Baldwin movq %r10,%rax 2970bc3d5698SJohn Baldwin 2971bc3d5698SJohn Baldwin.byte 102,65,15,56,221,210 2972bc3d5698SJohn Baldwin.byte 102,65,15,56,221,219 2973bc3d5698SJohn Baldwin.byte 102,65,15,56,221,228 2974bc3d5698SJohn Baldwin.byte 102,65,15,56,221,237 2975bc3d5698SJohn Baldwin .byte 0xf3,0xc3 2976bc3d5698SJohn Baldwin.cfi_endproc 2977bc3d5698SJohn Baldwin.size __ocb_encrypt4,.-__ocb_encrypt4 2978bc3d5698SJohn Baldwin 2979bc3d5698SJohn Baldwin.type __ocb_encrypt1,@function 2980bc3d5698SJohn Baldwin.align 32 2981bc3d5698SJohn Baldwin__ocb_encrypt1: 2982bc3d5698SJohn Baldwin.cfi_startproc 2983bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 2984bc3d5698SJohn Baldwin pxor %xmm9,%xmm7 2985bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 2986bc3d5698SJohn Baldwin pxor %xmm7,%xmm2 2987bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 2988bc3d5698SJohn Baldwin 2989bc3d5698SJohn Baldwin.byte 102,15,56,220,209 2990bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 2991bc3d5698SJohn Baldwin pxor %xmm9,%xmm7 2992bc3d5698SJohn Baldwin 2993bc3d5698SJohn Baldwin.byte 102,15,56,220,208 2994bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 2995bc3d5698SJohn Baldwin jmp .Locb_enc_loop1 2996bc3d5698SJohn Baldwin 2997bc3d5698SJohn Baldwin.align 32 2998bc3d5698SJohn Baldwin.Locb_enc_loop1: 2999bc3d5698SJohn Baldwin.byte 102,15,56,220,209 3000bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 3001bc3d5698SJohn Baldwin addq $32,%rax 3002bc3d5698SJohn Baldwin 3003bc3d5698SJohn Baldwin.byte 102,15,56,220,208 3004bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 3005bc3d5698SJohn Baldwin jnz .Locb_enc_loop1 3006bc3d5698SJohn Baldwin 3007bc3d5698SJohn Baldwin.byte 102,15,56,220,209 3008bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 3009bc3d5698SJohn Baldwin movq %r10,%rax 3010bc3d5698SJohn Baldwin 3011bc3d5698SJohn Baldwin.byte 102,15,56,221,215 3012bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3013bc3d5698SJohn Baldwin.cfi_endproc 3014bc3d5698SJohn Baldwin.size __ocb_encrypt1,.-__ocb_encrypt1 3015bc3d5698SJohn Baldwin 3016bc3d5698SJohn Baldwin.globl aesni_ocb_decrypt 3017bc3d5698SJohn Baldwin.type aesni_ocb_decrypt,@function 3018bc3d5698SJohn Baldwin.align 32 3019bc3d5698SJohn Baldwinaesni_ocb_decrypt: 3020bc3d5698SJohn Baldwin.cfi_startproc 3021*c0855eaaSJohn Baldwin.byte 243,15,30,250 3022bc3d5698SJohn Baldwin leaq (%rsp),%rax 3023bc3d5698SJohn Baldwin pushq %rbx 3024bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 3025bc3d5698SJohn Baldwin.cfi_offset %rbx,-16 3026bc3d5698SJohn Baldwin pushq %rbp 3027bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 3028bc3d5698SJohn Baldwin.cfi_offset %rbp,-24 3029bc3d5698SJohn Baldwin pushq %r12 3030bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 3031bc3d5698SJohn Baldwin.cfi_offset %r12,-32 3032bc3d5698SJohn Baldwin pushq %r13 3033bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 3034bc3d5698SJohn Baldwin.cfi_offset %r13,-40 3035bc3d5698SJohn Baldwin pushq %r14 3036bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 3037bc3d5698SJohn Baldwin.cfi_offset %r14,-48 3038bc3d5698SJohn Baldwin movq 8(%rax),%rbx 3039bc3d5698SJohn Baldwin movq 8+8(%rax),%rbp 3040bc3d5698SJohn Baldwin 3041bc3d5698SJohn Baldwin movl 240(%rcx),%r10d 3042bc3d5698SJohn Baldwin movq %rcx,%r11 3043bc3d5698SJohn Baldwin shll $4,%r10d 3044bc3d5698SJohn Baldwin movups (%rcx),%xmm9 3045bc3d5698SJohn Baldwin movups 16(%rcx,%r10,1),%xmm1 3046bc3d5698SJohn Baldwin 3047bc3d5698SJohn Baldwin movdqu (%r9),%xmm15 3048bc3d5698SJohn Baldwin pxor %xmm1,%xmm9 3049bc3d5698SJohn Baldwin pxor %xmm1,%xmm15 3050bc3d5698SJohn Baldwin 3051bc3d5698SJohn Baldwin movl $16+32,%eax 3052bc3d5698SJohn Baldwin leaq 32(%r11,%r10,1),%rcx 3053bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 3054bc3d5698SJohn Baldwin subq %r10,%rax 3055bc3d5698SJohn Baldwin movq %rax,%r10 3056bc3d5698SJohn Baldwin 3057bc3d5698SJohn Baldwin movdqu (%rbx),%xmm10 3058bc3d5698SJohn Baldwin movdqu (%rbp),%xmm8 3059bc3d5698SJohn Baldwin 3060bc3d5698SJohn Baldwin testq $1,%r8 3061bc3d5698SJohn Baldwin jnz .Locb_dec_odd 3062bc3d5698SJohn Baldwin 3063bc3d5698SJohn Baldwin bsfq %r8,%r12 3064bc3d5698SJohn Baldwin addq $1,%r8 3065bc3d5698SJohn Baldwin shlq $4,%r12 3066bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm7 3067bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 3068bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 3069bc3d5698SJohn Baldwin 3070bc3d5698SJohn Baldwin call __ocb_decrypt1 3071bc3d5698SJohn Baldwin 3072bc3d5698SJohn Baldwin movdqa %xmm7,%xmm15 3073bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 3074bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 3075bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3076bc3d5698SJohn Baldwin subq $1,%rdx 3077bc3d5698SJohn Baldwin jz .Locb_dec_done 3078bc3d5698SJohn Baldwin 3079bc3d5698SJohn Baldwin.Locb_dec_odd: 3080bc3d5698SJohn Baldwin leaq 1(%r8),%r12 3081bc3d5698SJohn Baldwin leaq 3(%r8),%r13 3082bc3d5698SJohn Baldwin leaq 5(%r8),%r14 3083bc3d5698SJohn Baldwin leaq 6(%r8),%r8 3084bc3d5698SJohn Baldwin bsfq %r12,%r12 3085bc3d5698SJohn Baldwin bsfq %r13,%r13 3086bc3d5698SJohn Baldwin bsfq %r14,%r14 3087bc3d5698SJohn Baldwin shlq $4,%r12 3088bc3d5698SJohn Baldwin shlq $4,%r13 3089bc3d5698SJohn Baldwin shlq $4,%r14 3090bc3d5698SJohn Baldwin 3091bc3d5698SJohn Baldwin subq $6,%rdx 3092bc3d5698SJohn Baldwin jc .Locb_dec_short 3093bc3d5698SJohn Baldwin jmp .Locb_dec_grandloop 3094bc3d5698SJohn Baldwin 3095bc3d5698SJohn Baldwin.align 32 3096bc3d5698SJohn Baldwin.Locb_dec_grandloop: 3097bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 3098bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 3099bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 3100bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 3101bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 3102bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 3103bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 3104bc3d5698SJohn Baldwin 3105bc3d5698SJohn Baldwin call __ocb_decrypt6 3106bc3d5698SJohn Baldwin 3107bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3108bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 3109bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3110bc3d5698SJohn Baldwin pxor %xmm3,%xmm8 3111bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 3112bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 3113bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 3114bc3d5698SJohn Baldwin pxor %xmm5,%xmm8 3115bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 3116bc3d5698SJohn Baldwin pxor %xmm6,%xmm8 3117bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 3118bc3d5698SJohn Baldwin pxor %xmm7,%xmm8 3119bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 3120bc3d5698SJohn Baldwin subq $6,%rdx 3121bc3d5698SJohn Baldwin jnc .Locb_dec_grandloop 3122bc3d5698SJohn Baldwin 3123bc3d5698SJohn Baldwin.Locb_dec_short: 3124bc3d5698SJohn Baldwin addq $6,%rdx 3125bc3d5698SJohn Baldwin jz .Locb_dec_done 3126bc3d5698SJohn Baldwin 3127bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 3128bc3d5698SJohn Baldwin cmpq $2,%rdx 3129bc3d5698SJohn Baldwin jb .Locb_dec_one 3130bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 3131bc3d5698SJohn Baldwin je .Locb_dec_two 3132bc3d5698SJohn Baldwin 3133bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 3134bc3d5698SJohn Baldwin cmpq $4,%rdx 3135bc3d5698SJohn Baldwin jb .Locb_dec_three 3136bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 3137bc3d5698SJohn Baldwin je .Locb_dec_four 3138bc3d5698SJohn Baldwin 3139bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 3140bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 3141bc3d5698SJohn Baldwin 3142bc3d5698SJohn Baldwin call __ocb_decrypt6 3143bc3d5698SJohn Baldwin 3144bc3d5698SJohn Baldwin movdqa %xmm14,%xmm15 3145bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3146bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 3147bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3148bc3d5698SJohn Baldwin pxor %xmm3,%xmm8 3149bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 3150bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 3151bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 3152bc3d5698SJohn Baldwin pxor %xmm5,%xmm8 3153bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 3154bc3d5698SJohn Baldwin pxor %xmm6,%xmm8 3155bc3d5698SJohn Baldwin 3156bc3d5698SJohn Baldwin jmp .Locb_dec_done 3157bc3d5698SJohn Baldwin 3158bc3d5698SJohn Baldwin.align 16 3159bc3d5698SJohn Baldwin.Locb_dec_one: 3160bc3d5698SJohn Baldwin movdqa %xmm10,%xmm7 3161bc3d5698SJohn Baldwin 3162bc3d5698SJohn Baldwin call __ocb_decrypt1 3163bc3d5698SJohn Baldwin 3164bc3d5698SJohn Baldwin movdqa %xmm7,%xmm15 3165bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3166bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 3167bc3d5698SJohn Baldwin jmp .Locb_dec_done 3168bc3d5698SJohn Baldwin 3169bc3d5698SJohn Baldwin.align 16 3170bc3d5698SJohn Baldwin.Locb_dec_two: 3171bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3172bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3173bc3d5698SJohn Baldwin 3174bc3d5698SJohn Baldwin call __ocb_decrypt4 3175bc3d5698SJohn Baldwin 3176bc3d5698SJohn Baldwin movdqa %xmm11,%xmm15 3177bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3178bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 3179bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3180bc3d5698SJohn Baldwin xorps %xmm3,%xmm8 3181bc3d5698SJohn Baldwin 3182bc3d5698SJohn Baldwin jmp .Locb_dec_done 3183bc3d5698SJohn Baldwin 3184bc3d5698SJohn Baldwin.align 16 3185bc3d5698SJohn Baldwin.Locb_dec_three: 3186bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3187bc3d5698SJohn Baldwin 3188bc3d5698SJohn Baldwin call __ocb_decrypt4 3189bc3d5698SJohn Baldwin 3190bc3d5698SJohn Baldwin movdqa %xmm12,%xmm15 3191bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3192bc3d5698SJohn Baldwin xorps %xmm2,%xmm8 3193bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3194bc3d5698SJohn Baldwin xorps %xmm3,%xmm8 3195bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 3196bc3d5698SJohn Baldwin xorps %xmm4,%xmm8 3197bc3d5698SJohn Baldwin 3198bc3d5698SJohn Baldwin jmp .Locb_dec_done 3199bc3d5698SJohn Baldwin 3200bc3d5698SJohn Baldwin.align 16 3201bc3d5698SJohn Baldwin.Locb_dec_four: 3202bc3d5698SJohn Baldwin call __ocb_decrypt4 3203bc3d5698SJohn Baldwin 3204bc3d5698SJohn Baldwin movdqa %xmm13,%xmm15 3205bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3206bc3d5698SJohn Baldwin pxor %xmm2,%xmm8 3207bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3208bc3d5698SJohn Baldwin pxor %xmm3,%xmm8 3209bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 3210bc3d5698SJohn Baldwin pxor %xmm4,%xmm8 3211bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 3212bc3d5698SJohn Baldwin pxor %xmm5,%xmm8 3213bc3d5698SJohn Baldwin 3214bc3d5698SJohn Baldwin.Locb_dec_done: 3215bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 3216bc3d5698SJohn Baldwin movdqu %xmm8,(%rbp) 3217bc3d5698SJohn Baldwin movdqu %xmm15,(%r9) 3218bc3d5698SJohn Baldwin 3219bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 3220bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 3221bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 3222bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3223bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3224bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3225bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 3226bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 3227bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 3228bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 3229bc3d5698SJohn Baldwin pxor %xmm10,%xmm10 3230bc3d5698SJohn Baldwin pxor %xmm11,%xmm11 3231bc3d5698SJohn Baldwin pxor %xmm12,%xmm12 3232bc3d5698SJohn Baldwin pxor %xmm13,%xmm13 3233bc3d5698SJohn Baldwin pxor %xmm14,%xmm14 3234bc3d5698SJohn Baldwin pxor %xmm15,%xmm15 3235bc3d5698SJohn Baldwin leaq 40(%rsp),%rax 3236bc3d5698SJohn Baldwin.cfi_def_cfa %rax,8 3237bc3d5698SJohn Baldwin movq -40(%rax),%r14 3238bc3d5698SJohn Baldwin.cfi_restore %r14 3239bc3d5698SJohn Baldwin movq -32(%rax),%r13 3240bc3d5698SJohn Baldwin.cfi_restore %r13 3241bc3d5698SJohn Baldwin movq -24(%rax),%r12 3242bc3d5698SJohn Baldwin.cfi_restore %r12 3243bc3d5698SJohn Baldwin movq -16(%rax),%rbp 3244bc3d5698SJohn Baldwin.cfi_restore %rbp 3245bc3d5698SJohn Baldwin movq -8(%rax),%rbx 3246bc3d5698SJohn Baldwin.cfi_restore %rbx 3247bc3d5698SJohn Baldwin leaq (%rax),%rsp 3248bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 3249bc3d5698SJohn Baldwin.Locb_dec_epilogue: 3250bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3251bc3d5698SJohn Baldwin.cfi_endproc 3252bc3d5698SJohn Baldwin.size aesni_ocb_decrypt,.-aesni_ocb_decrypt 3253bc3d5698SJohn Baldwin 3254bc3d5698SJohn Baldwin.type __ocb_decrypt6,@function 3255bc3d5698SJohn Baldwin.align 32 3256bc3d5698SJohn Baldwin__ocb_decrypt6: 3257bc3d5698SJohn Baldwin.cfi_startproc 3258bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 3259bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm11 3260bc3d5698SJohn Baldwin movdqa %xmm10,%xmm12 3261bc3d5698SJohn Baldwin movdqu (%rbx,%r13,1),%xmm13 3262bc3d5698SJohn Baldwin movdqa %xmm10,%xmm14 3263bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 3264bc3d5698SJohn Baldwin movdqu (%rbx,%r14,1),%xmm15 3265bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 3266bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3267bc3d5698SJohn Baldwin pxor %xmm11,%xmm12 3268bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3269bc3d5698SJohn Baldwin pxor %xmm12,%xmm13 3270bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3271bc3d5698SJohn Baldwin pxor %xmm13,%xmm14 3272bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3273bc3d5698SJohn Baldwin pxor %xmm14,%xmm15 3274bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 3275bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 3276bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 3277bc3d5698SJohn Baldwin 3278bc3d5698SJohn Baldwin leaq 1(%r8),%r12 3279bc3d5698SJohn Baldwin leaq 3(%r8),%r13 3280bc3d5698SJohn Baldwin leaq 5(%r8),%r14 3281bc3d5698SJohn Baldwin addq $6,%r8 3282bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 3283bc3d5698SJohn Baldwin bsfq %r12,%r12 3284bc3d5698SJohn Baldwin bsfq %r13,%r13 3285bc3d5698SJohn Baldwin bsfq %r14,%r14 3286bc3d5698SJohn Baldwin 3287bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3288bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3289bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3290bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3291bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 3292bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 3293bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3294bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 3295bc3d5698SJohn Baldwin pxor %xmm9,%xmm14 3296bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3297bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 3298bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 3299bc3d5698SJohn Baldwin 3300bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3301bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3302bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3303bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3304bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3305bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3306bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 3307bc3d5698SJohn Baldwin shlq $4,%r12 3308bc3d5698SJohn Baldwin shlq $4,%r13 3309bc3d5698SJohn Baldwin jmp .Locb_dec_loop6 3310bc3d5698SJohn Baldwin 3311bc3d5698SJohn Baldwin.align 32 3312bc3d5698SJohn Baldwin.Locb_dec_loop6: 3313bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3314bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3315bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3316bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3317bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3318bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3319bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 3320bc3d5698SJohn Baldwin addq $32,%rax 3321bc3d5698SJohn Baldwin 3322bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3323bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3324bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3325bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3326bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3327bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3328bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 3329bc3d5698SJohn Baldwin jnz .Locb_dec_loop6 3330bc3d5698SJohn Baldwin 3331bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3332bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3333bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3334bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3335bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3336bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3337bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 3338bc3d5698SJohn Baldwin shlq $4,%r14 3339bc3d5698SJohn Baldwin 3340bc3d5698SJohn Baldwin.byte 102,65,15,56,223,210 3341bc3d5698SJohn Baldwin movdqu (%rbx),%xmm10 3342bc3d5698SJohn Baldwin movq %r10,%rax 3343bc3d5698SJohn Baldwin.byte 102,65,15,56,223,219 3344bc3d5698SJohn Baldwin.byte 102,65,15,56,223,228 3345bc3d5698SJohn Baldwin.byte 102,65,15,56,223,237 3346bc3d5698SJohn Baldwin.byte 102,65,15,56,223,246 3347bc3d5698SJohn Baldwin.byte 102,65,15,56,223,255 3348bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3349bc3d5698SJohn Baldwin.cfi_endproc 3350bc3d5698SJohn Baldwin.size __ocb_decrypt6,.-__ocb_decrypt6 3351bc3d5698SJohn Baldwin 3352bc3d5698SJohn Baldwin.type __ocb_decrypt4,@function 3353bc3d5698SJohn Baldwin.align 32 3354bc3d5698SJohn Baldwin__ocb_decrypt4: 3355bc3d5698SJohn Baldwin.cfi_startproc 3356bc3d5698SJohn Baldwin pxor %xmm9,%xmm15 3357bc3d5698SJohn Baldwin movdqu (%rbx,%r12,1),%xmm11 3358bc3d5698SJohn Baldwin movdqa %xmm10,%xmm12 3359bc3d5698SJohn Baldwin movdqu (%rbx,%r13,1),%xmm13 3360bc3d5698SJohn Baldwin pxor %xmm15,%xmm10 3361bc3d5698SJohn Baldwin pxor %xmm10,%xmm11 3362bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3363bc3d5698SJohn Baldwin pxor %xmm11,%xmm12 3364bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3365bc3d5698SJohn Baldwin pxor %xmm12,%xmm13 3366bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3367bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3368bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 3369bc3d5698SJohn Baldwin 3370bc3d5698SJohn Baldwin pxor %xmm9,%xmm10 3371bc3d5698SJohn Baldwin pxor %xmm9,%xmm11 3372bc3d5698SJohn Baldwin pxor %xmm9,%xmm12 3373bc3d5698SJohn Baldwin pxor %xmm9,%xmm13 3374bc3d5698SJohn Baldwin 3375bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3376bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3377bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3378bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3379bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 3380bc3d5698SJohn Baldwin 3381bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3382bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3383bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3384bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3385bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 3386bc3d5698SJohn Baldwin jmp .Locb_dec_loop4 3387bc3d5698SJohn Baldwin 3388bc3d5698SJohn Baldwin.align 32 3389bc3d5698SJohn Baldwin.Locb_dec_loop4: 3390bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3391bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3392bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3393bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3394bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 3395bc3d5698SJohn Baldwin addq $32,%rax 3396bc3d5698SJohn Baldwin 3397bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3398bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3399bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3400bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3401bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 3402bc3d5698SJohn Baldwin jnz .Locb_dec_loop4 3403bc3d5698SJohn Baldwin 3404bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3405bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3406bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3407bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3408bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 3409bc3d5698SJohn Baldwin movq %r10,%rax 3410bc3d5698SJohn Baldwin 3411bc3d5698SJohn Baldwin.byte 102,65,15,56,223,210 3412bc3d5698SJohn Baldwin.byte 102,65,15,56,223,219 3413bc3d5698SJohn Baldwin.byte 102,65,15,56,223,228 3414bc3d5698SJohn Baldwin.byte 102,65,15,56,223,237 3415bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3416bc3d5698SJohn Baldwin.cfi_endproc 3417bc3d5698SJohn Baldwin.size __ocb_decrypt4,.-__ocb_decrypt4 3418bc3d5698SJohn Baldwin 3419bc3d5698SJohn Baldwin.type __ocb_decrypt1,@function 3420bc3d5698SJohn Baldwin.align 32 3421bc3d5698SJohn Baldwin__ocb_decrypt1: 3422bc3d5698SJohn Baldwin.cfi_startproc 3423bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 3424bc3d5698SJohn Baldwin pxor %xmm9,%xmm7 3425bc3d5698SJohn Baldwin pxor %xmm7,%xmm2 3426bc3d5698SJohn Baldwin movups 32(%r11),%xmm0 3427bc3d5698SJohn Baldwin 3428bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3429bc3d5698SJohn Baldwin movups 48(%r11),%xmm1 3430bc3d5698SJohn Baldwin pxor %xmm9,%xmm7 3431bc3d5698SJohn Baldwin 3432bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3433bc3d5698SJohn Baldwin movups 64(%r11),%xmm0 3434bc3d5698SJohn Baldwin jmp .Locb_dec_loop1 3435bc3d5698SJohn Baldwin 3436bc3d5698SJohn Baldwin.align 32 3437bc3d5698SJohn Baldwin.Locb_dec_loop1: 3438bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3439bc3d5698SJohn Baldwin movups (%rcx,%rax,1),%xmm1 3440bc3d5698SJohn Baldwin addq $32,%rax 3441bc3d5698SJohn Baldwin 3442bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3443bc3d5698SJohn Baldwin movups -16(%rcx,%rax,1),%xmm0 3444bc3d5698SJohn Baldwin jnz .Locb_dec_loop1 3445bc3d5698SJohn Baldwin 3446bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3447bc3d5698SJohn Baldwin movups 16(%r11),%xmm1 3448bc3d5698SJohn Baldwin movq %r10,%rax 3449bc3d5698SJohn Baldwin 3450bc3d5698SJohn Baldwin.byte 102,15,56,223,215 3451bc3d5698SJohn Baldwin .byte 0xf3,0xc3 3452bc3d5698SJohn Baldwin.cfi_endproc 3453bc3d5698SJohn Baldwin.size __ocb_decrypt1,.-__ocb_decrypt1 3454bc3d5698SJohn Baldwin.globl aesni_cbc_encrypt 3455bc3d5698SJohn Baldwin.type aesni_cbc_encrypt,@function 3456bc3d5698SJohn Baldwin.align 16 3457bc3d5698SJohn Baldwinaesni_cbc_encrypt: 3458bc3d5698SJohn Baldwin.cfi_startproc 3459*c0855eaaSJohn Baldwin.byte 243,15,30,250 3460bc3d5698SJohn Baldwin testq %rdx,%rdx 3461bc3d5698SJohn Baldwin jz .Lcbc_ret 3462bc3d5698SJohn Baldwin 3463bc3d5698SJohn Baldwin movl 240(%rcx),%r10d 3464bc3d5698SJohn Baldwin movq %rcx,%r11 3465bc3d5698SJohn Baldwin testl %r9d,%r9d 3466bc3d5698SJohn Baldwin jz .Lcbc_decrypt 3467bc3d5698SJohn Baldwin 3468bc3d5698SJohn Baldwin movups (%r8),%xmm2 3469bc3d5698SJohn Baldwin movl %r10d,%eax 3470bc3d5698SJohn Baldwin cmpq $16,%rdx 3471bc3d5698SJohn Baldwin jb .Lcbc_enc_tail 3472bc3d5698SJohn Baldwin subq $16,%rdx 3473bc3d5698SJohn Baldwin jmp .Lcbc_enc_loop 3474bc3d5698SJohn Baldwin.align 16 3475bc3d5698SJohn Baldwin.Lcbc_enc_loop: 3476bc3d5698SJohn Baldwin movups (%rdi),%xmm3 3477bc3d5698SJohn Baldwin leaq 16(%rdi),%rdi 3478bc3d5698SJohn Baldwin 3479bc3d5698SJohn Baldwin movups (%rcx),%xmm0 3480bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 3481bc3d5698SJohn Baldwin xorps %xmm0,%xmm3 3482bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 3483bc3d5698SJohn Baldwin xorps %xmm3,%xmm2 3484bc3d5698SJohn Baldwin.Loop_enc1_15: 3485bc3d5698SJohn Baldwin.byte 102,15,56,220,209 3486bc3d5698SJohn Baldwin decl %eax 3487bc3d5698SJohn Baldwin movups (%rcx),%xmm1 3488bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 3489bc3d5698SJohn Baldwin jnz .Loop_enc1_15 3490bc3d5698SJohn Baldwin.byte 102,15,56,221,209 3491bc3d5698SJohn Baldwin movl %r10d,%eax 3492bc3d5698SJohn Baldwin movq %r11,%rcx 3493bc3d5698SJohn Baldwin movups %xmm2,0(%rsi) 3494bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3495bc3d5698SJohn Baldwin subq $16,%rdx 3496bc3d5698SJohn Baldwin jnc .Lcbc_enc_loop 3497bc3d5698SJohn Baldwin addq $16,%rdx 3498bc3d5698SJohn Baldwin jnz .Lcbc_enc_tail 3499bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 3500bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 3501bc3d5698SJohn Baldwin movups %xmm2,(%r8) 3502bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 3503bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3504bc3d5698SJohn Baldwin jmp .Lcbc_ret 3505bc3d5698SJohn Baldwin 3506bc3d5698SJohn Baldwin.Lcbc_enc_tail: 3507bc3d5698SJohn Baldwin movq %rdx,%rcx 3508bc3d5698SJohn Baldwin xchgq %rdi,%rsi 3509bc3d5698SJohn Baldwin.long 0x9066A4F3 3510bc3d5698SJohn Baldwin movl $16,%ecx 3511bc3d5698SJohn Baldwin subq %rdx,%rcx 3512bc3d5698SJohn Baldwin xorl %eax,%eax 3513bc3d5698SJohn Baldwin.long 0x9066AAF3 3514bc3d5698SJohn Baldwin leaq -16(%rdi),%rdi 3515bc3d5698SJohn Baldwin movl %r10d,%eax 3516bc3d5698SJohn Baldwin movq %rdi,%rsi 3517bc3d5698SJohn Baldwin movq %r11,%rcx 3518bc3d5698SJohn Baldwin xorq %rdx,%rdx 3519bc3d5698SJohn Baldwin jmp .Lcbc_enc_loop 3520bc3d5698SJohn Baldwin 3521bc3d5698SJohn Baldwin.align 16 3522bc3d5698SJohn Baldwin.Lcbc_decrypt: 3523bc3d5698SJohn Baldwin cmpq $16,%rdx 3524bc3d5698SJohn Baldwin jne .Lcbc_decrypt_bulk 3525bc3d5698SJohn Baldwin 3526bc3d5698SJohn Baldwin 3527bc3d5698SJohn Baldwin 3528bc3d5698SJohn Baldwin movdqu (%rdi),%xmm2 3529bc3d5698SJohn Baldwin movdqu (%r8),%xmm3 3530bc3d5698SJohn Baldwin movdqa %xmm2,%xmm4 3531bc3d5698SJohn Baldwin movups (%rcx),%xmm0 3532bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 3533bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 3534bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 3535bc3d5698SJohn Baldwin.Loop_dec1_16: 3536bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3537bc3d5698SJohn Baldwin decl %r10d 3538bc3d5698SJohn Baldwin movups (%rcx),%xmm1 3539bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 3540bc3d5698SJohn Baldwin jnz .Loop_dec1_16 3541bc3d5698SJohn Baldwin.byte 102,15,56,223,209 3542bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 3543bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 3544bc3d5698SJohn Baldwin movdqu %xmm4,(%r8) 3545bc3d5698SJohn Baldwin xorps %xmm3,%xmm2 3546bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3547bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 3548bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 3549bc3d5698SJohn Baldwin jmp .Lcbc_ret 3550bc3d5698SJohn Baldwin.align 16 3551bc3d5698SJohn Baldwin.Lcbc_decrypt_bulk: 3552bc3d5698SJohn Baldwin leaq (%rsp),%r11 3553bc3d5698SJohn Baldwin.cfi_def_cfa_register %r11 3554bc3d5698SJohn Baldwin pushq %rbp 3555bc3d5698SJohn Baldwin.cfi_offset %rbp,-16 3556bc3d5698SJohn Baldwin subq $16,%rsp 3557bc3d5698SJohn Baldwin andq $-16,%rsp 3558bc3d5698SJohn Baldwin movq %rcx,%rbp 3559bc3d5698SJohn Baldwin movups (%r8),%xmm10 3560bc3d5698SJohn Baldwin movl %r10d,%eax 3561bc3d5698SJohn Baldwin cmpq $0x50,%rdx 3562bc3d5698SJohn Baldwin jbe .Lcbc_dec_tail 3563bc3d5698SJohn Baldwin 3564bc3d5698SJohn Baldwin movups (%rcx),%xmm0 3565bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 3566bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 3567bc3d5698SJohn Baldwin movdqa %xmm2,%xmm11 3568bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 3569bc3d5698SJohn Baldwin movdqa %xmm3,%xmm12 3570bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 3571bc3d5698SJohn Baldwin movdqa %xmm4,%xmm13 3572bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 3573bc3d5698SJohn Baldwin movdqa %xmm5,%xmm14 3574bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 3575bc3d5698SJohn Baldwin movdqa %xmm6,%xmm15 3576bc3d5698SJohn Baldwin movl OPENSSL_ia32cap_P+4(%rip),%r9d 3577bc3d5698SJohn Baldwin cmpq $0x70,%rdx 3578bc3d5698SJohn Baldwin jbe .Lcbc_dec_six_or_seven 3579bc3d5698SJohn Baldwin 3580bc3d5698SJohn Baldwin andl $71303168,%r9d 3581bc3d5698SJohn Baldwin subq $0x50,%rdx 3582bc3d5698SJohn Baldwin cmpl $4194304,%r9d 3583bc3d5698SJohn Baldwin je .Lcbc_dec_loop6_enter 3584bc3d5698SJohn Baldwin subq $0x20,%rdx 3585bc3d5698SJohn Baldwin leaq 112(%rcx),%rcx 3586bc3d5698SJohn Baldwin jmp .Lcbc_dec_loop8_enter 3587bc3d5698SJohn Baldwin.align 16 3588bc3d5698SJohn Baldwin.Lcbc_dec_loop8: 3589bc3d5698SJohn Baldwin movups %xmm9,(%rsi) 3590bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3591bc3d5698SJohn Baldwin.Lcbc_dec_loop8_enter: 3592bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm8 3593bc3d5698SJohn Baldwin pxor %xmm0,%xmm2 3594bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm9 3595bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 3596bc3d5698SJohn Baldwin movups 16-112(%rcx),%xmm1 3597bc3d5698SJohn Baldwin pxor %xmm0,%xmm4 3598bc3d5698SJohn Baldwin movq $-1,%rbp 3599bc3d5698SJohn Baldwin cmpq $0x70,%rdx 3600bc3d5698SJohn Baldwin pxor %xmm0,%xmm5 3601bc3d5698SJohn Baldwin pxor %xmm0,%xmm6 3602bc3d5698SJohn Baldwin pxor %xmm0,%xmm7 3603bc3d5698SJohn Baldwin pxor %xmm0,%xmm8 3604bc3d5698SJohn Baldwin 3605bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3606bc3d5698SJohn Baldwin pxor %xmm0,%xmm9 3607bc3d5698SJohn Baldwin movups 32-112(%rcx),%xmm0 3608bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3609bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3610bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3611bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3612bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3613bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3614bc3d5698SJohn Baldwin adcq $0,%rbp 3615bc3d5698SJohn Baldwin andq $128,%rbp 3616bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3617bc3d5698SJohn Baldwin addq %rdi,%rbp 3618bc3d5698SJohn Baldwin movups 48-112(%rcx),%xmm1 3619bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3620bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3621bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3622bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3623bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3624bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3625bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3626bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3627bc3d5698SJohn Baldwin movups 64-112(%rcx),%xmm0 3628bc3d5698SJohn Baldwin nop 3629bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3630bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3631bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3632bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3633bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3634bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3635bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3636bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3637bc3d5698SJohn Baldwin movups 80-112(%rcx),%xmm1 3638bc3d5698SJohn Baldwin nop 3639bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3640bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3641bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3642bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3643bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3644bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3645bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3646bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3647bc3d5698SJohn Baldwin movups 96-112(%rcx),%xmm0 3648bc3d5698SJohn Baldwin nop 3649bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3650bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3651bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3652bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3653bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3654bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3655bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3656bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3657bc3d5698SJohn Baldwin movups 112-112(%rcx),%xmm1 3658bc3d5698SJohn Baldwin nop 3659bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3660bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3661bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3662bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3663bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3664bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3665bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3666bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3667bc3d5698SJohn Baldwin movups 128-112(%rcx),%xmm0 3668bc3d5698SJohn Baldwin nop 3669bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3670bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3671bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3672bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3673bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3674bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3675bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3676bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3677bc3d5698SJohn Baldwin movups 144-112(%rcx),%xmm1 3678bc3d5698SJohn Baldwin cmpl $11,%eax 3679bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3680bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3681bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3682bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3683bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3684bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3685bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3686bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3687bc3d5698SJohn Baldwin movups 160-112(%rcx),%xmm0 3688bc3d5698SJohn Baldwin jb .Lcbc_dec_done 3689bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3690bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3691bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3692bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3693bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3694bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3695bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3696bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3697bc3d5698SJohn Baldwin movups 176-112(%rcx),%xmm1 3698bc3d5698SJohn Baldwin nop 3699bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3700bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3701bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3702bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3703bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3704bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3705bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3706bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3707bc3d5698SJohn Baldwin movups 192-112(%rcx),%xmm0 3708bc3d5698SJohn Baldwin je .Lcbc_dec_done 3709bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3710bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3711bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3712bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3713bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3714bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3715bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3716bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3717bc3d5698SJohn Baldwin movups 208-112(%rcx),%xmm1 3718bc3d5698SJohn Baldwin nop 3719bc3d5698SJohn Baldwin.byte 102,15,56,222,208 3720bc3d5698SJohn Baldwin.byte 102,15,56,222,216 3721bc3d5698SJohn Baldwin.byte 102,15,56,222,224 3722bc3d5698SJohn Baldwin.byte 102,15,56,222,232 3723bc3d5698SJohn Baldwin.byte 102,15,56,222,240 3724bc3d5698SJohn Baldwin.byte 102,15,56,222,248 3725bc3d5698SJohn Baldwin.byte 102,68,15,56,222,192 3726bc3d5698SJohn Baldwin.byte 102,68,15,56,222,200 3727bc3d5698SJohn Baldwin movups 224-112(%rcx),%xmm0 3728bc3d5698SJohn Baldwin jmp .Lcbc_dec_done 3729bc3d5698SJohn Baldwin.align 16 3730bc3d5698SJohn Baldwin.Lcbc_dec_done: 3731bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3732bc3d5698SJohn Baldwin.byte 102,15,56,222,217 3733bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 3734bc3d5698SJohn Baldwin pxor %xmm0,%xmm11 3735bc3d5698SJohn Baldwin.byte 102,15,56,222,225 3736bc3d5698SJohn Baldwin.byte 102,15,56,222,233 3737bc3d5698SJohn Baldwin pxor %xmm0,%xmm12 3738bc3d5698SJohn Baldwin pxor %xmm0,%xmm13 3739bc3d5698SJohn Baldwin.byte 102,15,56,222,241 3740bc3d5698SJohn Baldwin.byte 102,15,56,222,249 3741bc3d5698SJohn Baldwin pxor %xmm0,%xmm14 3742bc3d5698SJohn Baldwin pxor %xmm0,%xmm15 3743bc3d5698SJohn Baldwin.byte 102,68,15,56,222,193 3744bc3d5698SJohn Baldwin.byte 102,68,15,56,222,201 3745bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm1 3746bc3d5698SJohn Baldwin 3747bc3d5698SJohn Baldwin.byte 102,65,15,56,223,210 3748bc3d5698SJohn Baldwin movdqu 96(%rdi),%xmm10 3749bc3d5698SJohn Baldwin pxor %xmm0,%xmm1 3750bc3d5698SJohn Baldwin.byte 102,65,15,56,223,219 3751bc3d5698SJohn Baldwin pxor %xmm0,%xmm10 3752bc3d5698SJohn Baldwin movdqu 112(%rdi),%xmm0 3753bc3d5698SJohn Baldwin.byte 102,65,15,56,223,228 3754bc3d5698SJohn Baldwin leaq 128(%rdi),%rdi 3755bc3d5698SJohn Baldwin movdqu 0(%rbp),%xmm11 3756bc3d5698SJohn Baldwin.byte 102,65,15,56,223,237 3757bc3d5698SJohn Baldwin.byte 102,65,15,56,223,246 3758bc3d5698SJohn Baldwin movdqu 16(%rbp),%xmm12 3759bc3d5698SJohn Baldwin movdqu 32(%rbp),%xmm13 3760bc3d5698SJohn Baldwin.byte 102,65,15,56,223,255 3761bc3d5698SJohn Baldwin.byte 102,68,15,56,223,193 3762bc3d5698SJohn Baldwin movdqu 48(%rbp),%xmm14 3763bc3d5698SJohn Baldwin movdqu 64(%rbp),%xmm15 3764bc3d5698SJohn Baldwin.byte 102,69,15,56,223,202 3765bc3d5698SJohn Baldwin movdqa %xmm0,%xmm10 3766bc3d5698SJohn Baldwin movdqu 80(%rbp),%xmm1 3767bc3d5698SJohn Baldwin movups -112(%rcx),%xmm0 3768bc3d5698SJohn Baldwin 3769bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 3770bc3d5698SJohn Baldwin movdqa %xmm11,%xmm2 3771bc3d5698SJohn Baldwin movups %xmm3,16(%rsi) 3772bc3d5698SJohn Baldwin movdqa %xmm12,%xmm3 3773bc3d5698SJohn Baldwin movups %xmm4,32(%rsi) 3774bc3d5698SJohn Baldwin movdqa %xmm13,%xmm4 3775bc3d5698SJohn Baldwin movups %xmm5,48(%rsi) 3776bc3d5698SJohn Baldwin movdqa %xmm14,%xmm5 3777bc3d5698SJohn Baldwin movups %xmm6,64(%rsi) 3778bc3d5698SJohn Baldwin movdqa %xmm15,%xmm6 3779bc3d5698SJohn Baldwin movups %xmm7,80(%rsi) 3780bc3d5698SJohn Baldwin movdqa %xmm1,%xmm7 3781bc3d5698SJohn Baldwin movups %xmm8,96(%rsi) 3782bc3d5698SJohn Baldwin leaq 112(%rsi),%rsi 3783bc3d5698SJohn Baldwin 3784bc3d5698SJohn Baldwin subq $0x80,%rdx 3785bc3d5698SJohn Baldwin ja .Lcbc_dec_loop8 3786bc3d5698SJohn Baldwin 3787bc3d5698SJohn Baldwin movaps %xmm9,%xmm2 3788bc3d5698SJohn Baldwin leaq -112(%rcx),%rcx 3789bc3d5698SJohn Baldwin addq $0x70,%rdx 3790bc3d5698SJohn Baldwin jle .Lcbc_dec_clear_tail_collected 3791bc3d5698SJohn Baldwin movups %xmm9,(%rsi) 3792bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3793bc3d5698SJohn Baldwin cmpq $0x50,%rdx 3794bc3d5698SJohn Baldwin jbe .Lcbc_dec_tail 3795bc3d5698SJohn Baldwin 3796bc3d5698SJohn Baldwin movaps %xmm11,%xmm2 3797bc3d5698SJohn Baldwin.Lcbc_dec_six_or_seven: 3798bc3d5698SJohn Baldwin cmpq $0x60,%rdx 3799bc3d5698SJohn Baldwin ja .Lcbc_dec_seven 3800bc3d5698SJohn Baldwin 3801bc3d5698SJohn Baldwin movaps %xmm7,%xmm8 3802bc3d5698SJohn Baldwin call _aesni_decrypt6 3803bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3804bc3d5698SJohn Baldwin movaps %xmm8,%xmm10 3805bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3806bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3807bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3808bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3809bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3810bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3811bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 3812bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3813bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 3814bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 3815bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3816bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 3817bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 3818bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 3819bc3d5698SJohn Baldwin leaq 80(%rsi),%rsi 3820bc3d5698SJohn Baldwin movdqa %xmm7,%xmm2 3821bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 3822bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3823bc3d5698SJohn Baldwin 3824bc3d5698SJohn Baldwin.align 16 3825bc3d5698SJohn Baldwin.Lcbc_dec_seven: 3826bc3d5698SJohn Baldwin movups 96(%rdi),%xmm8 3827bc3d5698SJohn Baldwin xorps %xmm9,%xmm9 3828bc3d5698SJohn Baldwin call _aesni_decrypt8 3829bc3d5698SJohn Baldwin movups 80(%rdi),%xmm9 3830bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3831bc3d5698SJohn Baldwin movups 96(%rdi),%xmm10 3832bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3833bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3834bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3835bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3836bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3837bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3838bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 3839bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3840bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 3841bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 3842bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3843bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 3844bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 3845bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 3846bc3d5698SJohn Baldwin pxor %xmm9,%xmm8 3847bc3d5698SJohn Baldwin movdqu %xmm7,80(%rsi) 3848bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 3849bc3d5698SJohn Baldwin leaq 96(%rsi),%rsi 3850bc3d5698SJohn Baldwin movdqa %xmm8,%xmm2 3851bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 3852bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 3853bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3854bc3d5698SJohn Baldwin 3855bc3d5698SJohn Baldwin.align 16 3856bc3d5698SJohn Baldwin.Lcbc_dec_loop6: 3857bc3d5698SJohn Baldwin movups %xmm7,(%rsi) 3858bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3859bc3d5698SJohn Baldwin movdqu 0(%rdi),%xmm2 3860bc3d5698SJohn Baldwin movdqu 16(%rdi),%xmm3 3861bc3d5698SJohn Baldwin movdqa %xmm2,%xmm11 3862bc3d5698SJohn Baldwin movdqu 32(%rdi),%xmm4 3863bc3d5698SJohn Baldwin movdqa %xmm3,%xmm12 3864bc3d5698SJohn Baldwin movdqu 48(%rdi),%xmm5 3865bc3d5698SJohn Baldwin movdqa %xmm4,%xmm13 3866bc3d5698SJohn Baldwin movdqu 64(%rdi),%xmm6 3867bc3d5698SJohn Baldwin movdqa %xmm5,%xmm14 3868bc3d5698SJohn Baldwin movdqu 80(%rdi),%xmm7 3869bc3d5698SJohn Baldwin movdqa %xmm6,%xmm15 3870bc3d5698SJohn Baldwin.Lcbc_dec_loop6_enter: 3871bc3d5698SJohn Baldwin leaq 96(%rdi),%rdi 3872bc3d5698SJohn Baldwin movdqa %xmm7,%xmm8 3873bc3d5698SJohn Baldwin 3874bc3d5698SJohn Baldwin call _aesni_decrypt6 3875bc3d5698SJohn Baldwin 3876bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3877bc3d5698SJohn Baldwin movdqa %xmm8,%xmm10 3878bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3879bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3880bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3881bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3882bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3883bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 3884bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 3885bc3d5698SJohn Baldwin movq %rbp,%rcx 3886bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 3887bc3d5698SJohn Baldwin pxor %xmm15,%xmm7 3888bc3d5698SJohn Baldwin movl %r10d,%eax 3889bc3d5698SJohn Baldwin movdqu %xmm6,64(%rsi) 3890bc3d5698SJohn Baldwin leaq 80(%rsi),%rsi 3891bc3d5698SJohn Baldwin subq $0x60,%rdx 3892bc3d5698SJohn Baldwin ja .Lcbc_dec_loop6 3893bc3d5698SJohn Baldwin 3894bc3d5698SJohn Baldwin movdqa %xmm7,%xmm2 3895bc3d5698SJohn Baldwin addq $0x50,%rdx 3896bc3d5698SJohn Baldwin jle .Lcbc_dec_clear_tail_collected 3897bc3d5698SJohn Baldwin movups %xmm7,(%rsi) 3898bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3899bc3d5698SJohn Baldwin 3900bc3d5698SJohn Baldwin.Lcbc_dec_tail: 3901bc3d5698SJohn Baldwin movups (%rdi),%xmm2 3902bc3d5698SJohn Baldwin subq $0x10,%rdx 3903bc3d5698SJohn Baldwin jbe .Lcbc_dec_one 3904bc3d5698SJohn Baldwin 3905bc3d5698SJohn Baldwin movups 16(%rdi),%xmm3 3906bc3d5698SJohn Baldwin movaps %xmm2,%xmm11 3907bc3d5698SJohn Baldwin subq $0x10,%rdx 3908bc3d5698SJohn Baldwin jbe .Lcbc_dec_two 3909bc3d5698SJohn Baldwin 3910bc3d5698SJohn Baldwin movups 32(%rdi),%xmm4 3911bc3d5698SJohn Baldwin movaps %xmm3,%xmm12 3912bc3d5698SJohn Baldwin subq $0x10,%rdx 3913bc3d5698SJohn Baldwin jbe .Lcbc_dec_three 3914bc3d5698SJohn Baldwin 3915bc3d5698SJohn Baldwin movups 48(%rdi),%xmm5 3916bc3d5698SJohn Baldwin movaps %xmm4,%xmm13 3917bc3d5698SJohn Baldwin subq $0x10,%rdx 3918bc3d5698SJohn Baldwin jbe .Lcbc_dec_four 3919bc3d5698SJohn Baldwin 3920bc3d5698SJohn Baldwin movups 64(%rdi),%xmm6 3921bc3d5698SJohn Baldwin movaps %xmm5,%xmm14 3922bc3d5698SJohn Baldwin movaps %xmm6,%xmm15 3923bc3d5698SJohn Baldwin xorps %xmm7,%xmm7 3924bc3d5698SJohn Baldwin call _aesni_decrypt6 3925bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3926bc3d5698SJohn Baldwin movaps %xmm15,%xmm10 3927bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3928bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3929bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3930bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3931bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3932bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 3933bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 3934bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3935bc3d5698SJohn Baldwin pxor %xmm14,%xmm6 3936bc3d5698SJohn Baldwin movdqu %xmm5,48(%rsi) 3937bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 3938bc3d5698SJohn Baldwin leaq 64(%rsi),%rsi 3939bc3d5698SJohn Baldwin movdqa %xmm6,%xmm2 3940bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 3941bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 3942bc3d5698SJohn Baldwin subq $0x10,%rdx 3943bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3944bc3d5698SJohn Baldwin 3945bc3d5698SJohn Baldwin.align 16 3946bc3d5698SJohn Baldwin.Lcbc_dec_one: 3947bc3d5698SJohn Baldwin movaps %xmm2,%xmm11 3948bc3d5698SJohn Baldwin movups (%rcx),%xmm0 3949bc3d5698SJohn Baldwin movups 16(%rcx),%xmm1 3950bc3d5698SJohn Baldwin leaq 32(%rcx),%rcx 3951bc3d5698SJohn Baldwin xorps %xmm0,%xmm2 3952bc3d5698SJohn Baldwin.Loop_dec1_17: 3953bc3d5698SJohn Baldwin.byte 102,15,56,222,209 3954bc3d5698SJohn Baldwin decl %eax 3955bc3d5698SJohn Baldwin movups (%rcx),%xmm1 3956bc3d5698SJohn Baldwin leaq 16(%rcx),%rcx 3957bc3d5698SJohn Baldwin jnz .Loop_dec1_17 3958bc3d5698SJohn Baldwin.byte 102,15,56,223,209 3959bc3d5698SJohn Baldwin xorps %xmm10,%xmm2 3960bc3d5698SJohn Baldwin movaps %xmm11,%xmm10 3961bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3962bc3d5698SJohn Baldwin.align 16 3963bc3d5698SJohn Baldwin.Lcbc_dec_two: 3964bc3d5698SJohn Baldwin movaps %xmm3,%xmm12 3965bc3d5698SJohn Baldwin call _aesni_decrypt2 3966bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3967bc3d5698SJohn Baldwin movaps %xmm12,%xmm10 3968bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3969bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3970bc3d5698SJohn Baldwin movdqa %xmm3,%xmm2 3971bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3972bc3d5698SJohn Baldwin leaq 16(%rsi),%rsi 3973bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3974bc3d5698SJohn Baldwin.align 16 3975bc3d5698SJohn Baldwin.Lcbc_dec_three: 3976bc3d5698SJohn Baldwin movaps %xmm4,%xmm13 3977bc3d5698SJohn Baldwin call _aesni_decrypt3 3978bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3979bc3d5698SJohn Baldwin movaps %xmm13,%xmm10 3980bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3981bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3982bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3983bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3984bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 3985bc3d5698SJohn Baldwin movdqa %xmm4,%xmm2 3986bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 3987bc3d5698SJohn Baldwin leaq 32(%rsi),%rsi 3988bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 3989bc3d5698SJohn Baldwin.align 16 3990bc3d5698SJohn Baldwin.Lcbc_dec_four: 3991bc3d5698SJohn Baldwin movaps %xmm5,%xmm14 3992bc3d5698SJohn Baldwin call _aesni_decrypt4 3993bc3d5698SJohn Baldwin pxor %xmm10,%xmm2 3994bc3d5698SJohn Baldwin movaps %xmm14,%xmm10 3995bc3d5698SJohn Baldwin pxor %xmm11,%xmm3 3996bc3d5698SJohn Baldwin movdqu %xmm2,(%rsi) 3997bc3d5698SJohn Baldwin pxor %xmm12,%xmm4 3998bc3d5698SJohn Baldwin movdqu %xmm3,16(%rsi) 3999bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 4000bc3d5698SJohn Baldwin pxor %xmm13,%xmm5 4001bc3d5698SJohn Baldwin movdqu %xmm4,32(%rsi) 4002bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 4003bc3d5698SJohn Baldwin movdqa %xmm5,%xmm2 4004bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 4005bc3d5698SJohn Baldwin leaq 48(%rsi),%rsi 4006bc3d5698SJohn Baldwin jmp .Lcbc_dec_tail_collected 4007bc3d5698SJohn Baldwin 4008bc3d5698SJohn Baldwin.align 16 4009bc3d5698SJohn Baldwin.Lcbc_dec_clear_tail_collected: 4010bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 4011bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 4012bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 4013bc3d5698SJohn Baldwin pxor %xmm6,%xmm6 4014bc3d5698SJohn Baldwin pxor %xmm7,%xmm7 4015bc3d5698SJohn Baldwin pxor %xmm8,%xmm8 4016bc3d5698SJohn Baldwin pxor %xmm9,%xmm9 4017bc3d5698SJohn Baldwin.Lcbc_dec_tail_collected: 4018bc3d5698SJohn Baldwin movups %xmm10,(%r8) 4019bc3d5698SJohn Baldwin andq $15,%rdx 4020bc3d5698SJohn Baldwin jnz .Lcbc_dec_tail_partial 4021bc3d5698SJohn Baldwin movups %xmm2,(%rsi) 4022bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 4023bc3d5698SJohn Baldwin jmp .Lcbc_dec_ret 4024bc3d5698SJohn Baldwin.align 16 4025bc3d5698SJohn Baldwin.Lcbc_dec_tail_partial: 4026bc3d5698SJohn Baldwin movaps %xmm2,(%rsp) 4027bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 4028bc3d5698SJohn Baldwin movq $16,%rcx 4029bc3d5698SJohn Baldwin movq %rsi,%rdi 4030bc3d5698SJohn Baldwin subq %rdx,%rcx 4031bc3d5698SJohn Baldwin leaq (%rsp),%rsi 4032bc3d5698SJohn Baldwin.long 0x9066A4F3 4033bc3d5698SJohn Baldwin movdqa %xmm2,(%rsp) 4034bc3d5698SJohn Baldwin 4035bc3d5698SJohn Baldwin.Lcbc_dec_ret: 4036bc3d5698SJohn Baldwin xorps %xmm0,%xmm0 4037bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 4038bc3d5698SJohn Baldwin movq -8(%r11),%rbp 4039bc3d5698SJohn Baldwin.cfi_restore %rbp 4040bc3d5698SJohn Baldwin leaq (%r11),%rsp 4041bc3d5698SJohn Baldwin.cfi_def_cfa_register %rsp 4042bc3d5698SJohn Baldwin.Lcbc_ret: 4043bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4044bc3d5698SJohn Baldwin.cfi_endproc 4045bc3d5698SJohn Baldwin.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 4046bc3d5698SJohn Baldwin.globl aesni_set_decrypt_key 4047bc3d5698SJohn Baldwin.type aesni_set_decrypt_key,@function 4048bc3d5698SJohn Baldwin.align 16 4049bc3d5698SJohn Baldwinaesni_set_decrypt_key: 4050bc3d5698SJohn Baldwin.cfi_startproc 4051bc3d5698SJohn Baldwin.byte 0x48,0x83,0xEC,0x08 4052bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 4053bc3d5698SJohn Baldwin call __aesni_set_encrypt_key 4054bc3d5698SJohn Baldwin shll $4,%esi 4055bc3d5698SJohn Baldwin testl %eax,%eax 4056bc3d5698SJohn Baldwin jnz .Ldec_key_ret 4057bc3d5698SJohn Baldwin leaq 16(%rdx,%rsi,1),%rdi 4058bc3d5698SJohn Baldwin 4059bc3d5698SJohn Baldwin movups (%rdx),%xmm0 4060bc3d5698SJohn Baldwin movups (%rdi),%xmm1 4061bc3d5698SJohn Baldwin movups %xmm0,(%rdi) 4062bc3d5698SJohn Baldwin movups %xmm1,(%rdx) 4063bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 4064bc3d5698SJohn Baldwin leaq -16(%rdi),%rdi 4065bc3d5698SJohn Baldwin 4066bc3d5698SJohn Baldwin.Ldec_key_inverse: 4067bc3d5698SJohn Baldwin movups (%rdx),%xmm0 4068bc3d5698SJohn Baldwin movups (%rdi),%xmm1 4069bc3d5698SJohn Baldwin.byte 102,15,56,219,192 4070bc3d5698SJohn Baldwin.byte 102,15,56,219,201 4071bc3d5698SJohn Baldwin leaq 16(%rdx),%rdx 4072bc3d5698SJohn Baldwin leaq -16(%rdi),%rdi 4073bc3d5698SJohn Baldwin movups %xmm0,16(%rdi) 4074bc3d5698SJohn Baldwin movups %xmm1,-16(%rdx) 4075bc3d5698SJohn Baldwin cmpq %rdx,%rdi 4076bc3d5698SJohn Baldwin ja .Ldec_key_inverse 4077bc3d5698SJohn Baldwin 4078bc3d5698SJohn Baldwin movups (%rdx),%xmm0 4079bc3d5698SJohn Baldwin.byte 102,15,56,219,192 4080bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 4081bc3d5698SJohn Baldwin movups %xmm0,(%rdi) 4082bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 4083bc3d5698SJohn Baldwin.Ldec_key_ret: 4084bc3d5698SJohn Baldwin addq $8,%rsp 4085bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -8 4086bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4087bc3d5698SJohn Baldwin.cfi_endproc 4088bc3d5698SJohn Baldwin.LSEH_end_set_decrypt_key: 4089bc3d5698SJohn Baldwin.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 4090bc3d5698SJohn Baldwin.globl aesni_set_encrypt_key 4091bc3d5698SJohn Baldwin.type aesni_set_encrypt_key,@function 4092bc3d5698SJohn Baldwin.align 16 4093bc3d5698SJohn Baldwinaesni_set_encrypt_key: 4094bc3d5698SJohn Baldwin__aesni_set_encrypt_key: 4095bc3d5698SJohn Baldwin.cfi_startproc 4096bc3d5698SJohn Baldwin.byte 0x48,0x83,0xEC,0x08 4097bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset 8 4098bc3d5698SJohn Baldwin movq $-1,%rax 4099bc3d5698SJohn Baldwin testq %rdi,%rdi 4100bc3d5698SJohn Baldwin jz .Lenc_key_ret 4101bc3d5698SJohn Baldwin testq %rdx,%rdx 4102bc3d5698SJohn Baldwin jz .Lenc_key_ret 4103bc3d5698SJohn Baldwin 4104bc3d5698SJohn Baldwin movl $268437504,%r10d 4105bc3d5698SJohn Baldwin movups (%rdi),%xmm0 4106bc3d5698SJohn Baldwin xorps %xmm4,%xmm4 4107bc3d5698SJohn Baldwin andl OPENSSL_ia32cap_P+4(%rip),%r10d 4108bc3d5698SJohn Baldwin leaq 16(%rdx),%rax 4109bc3d5698SJohn Baldwin cmpl $256,%esi 4110bc3d5698SJohn Baldwin je .L14rounds 4111bc3d5698SJohn Baldwin cmpl $192,%esi 4112bc3d5698SJohn Baldwin je .L12rounds 4113bc3d5698SJohn Baldwin cmpl $128,%esi 4114bc3d5698SJohn Baldwin jne .Lbad_keybits 4115bc3d5698SJohn Baldwin 4116bc3d5698SJohn Baldwin.L10rounds: 4117bc3d5698SJohn Baldwin movl $9,%esi 4118bc3d5698SJohn Baldwin cmpl $268435456,%r10d 4119bc3d5698SJohn Baldwin je .L10rounds_alt 4120bc3d5698SJohn Baldwin 4121bc3d5698SJohn Baldwin movups %xmm0,(%rdx) 4122bc3d5698SJohn Baldwin.byte 102,15,58,223,200,1 4123bc3d5698SJohn Baldwin call .Lkey_expansion_128_cold 4124bc3d5698SJohn Baldwin.byte 102,15,58,223,200,2 4125bc3d5698SJohn Baldwin call .Lkey_expansion_128 4126bc3d5698SJohn Baldwin.byte 102,15,58,223,200,4 4127bc3d5698SJohn Baldwin call .Lkey_expansion_128 4128bc3d5698SJohn Baldwin.byte 102,15,58,223,200,8 4129bc3d5698SJohn Baldwin call .Lkey_expansion_128 4130bc3d5698SJohn Baldwin.byte 102,15,58,223,200,16 4131bc3d5698SJohn Baldwin call .Lkey_expansion_128 4132bc3d5698SJohn Baldwin.byte 102,15,58,223,200,32 4133bc3d5698SJohn Baldwin call .Lkey_expansion_128 4134bc3d5698SJohn Baldwin.byte 102,15,58,223,200,64 4135bc3d5698SJohn Baldwin call .Lkey_expansion_128 4136bc3d5698SJohn Baldwin.byte 102,15,58,223,200,128 4137bc3d5698SJohn Baldwin call .Lkey_expansion_128 4138bc3d5698SJohn Baldwin.byte 102,15,58,223,200,27 4139bc3d5698SJohn Baldwin call .Lkey_expansion_128 4140bc3d5698SJohn Baldwin.byte 102,15,58,223,200,54 4141bc3d5698SJohn Baldwin call .Lkey_expansion_128 4142bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4143bc3d5698SJohn Baldwin movl %esi,80(%rax) 4144bc3d5698SJohn Baldwin xorl %eax,%eax 4145bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4146bc3d5698SJohn Baldwin 4147bc3d5698SJohn Baldwin.align 16 4148bc3d5698SJohn Baldwin.L10rounds_alt: 4149bc3d5698SJohn Baldwin movdqa .Lkey_rotate(%rip),%xmm5 4150bc3d5698SJohn Baldwin movl $8,%r10d 4151bc3d5698SJohn Baldwin movdqa .Lkey_rcon1(%rip),%xmm4 4152bc3d5698SJohn Baldwin movdqa %xmm0,%xmm2 4153bc3d5698SJohn Baldwin movdqu %xmm0,(%rdx) 4154bc3d5698SJohn Baldwin jmp .Loop_key128 4155bc3d5698SJohn Baldwin 4156bc3d5698SJohn Baldwin.align 16 4157bc3d5698SJohn Baldwin.Loop_key128: 4158bc3d5698SJohn Baldwin.byte 102,15,56,0,197 4159bc3d5698SJohn Baldwin.byte 102,15,56,221,196 4160bc3d5698SJohn Baldwin pslld $1,%xmm4 4161bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4162bc3d5698SJohn Baldwin 4163bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 4164bc3d5698SJohn Baldwin pslldq $4,%xmm2 4165bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4166bc3d5698SJohn Baldwin pslldq $4,%xmm2 4167bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4168bc3d5698SJohn Baldwin pslldq $4,%xmm2 4169bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4170bc3d5698SJohn Baldwin 4171bc3d5698SJohn Baldwin pxor %xmm2,%xmm0 4172bc3d5698SJohn Baldwin movdqu %xmm0,-16(%rax) 4173bc3d5698SJohn Baldwin movdqa %xmm0,%xmm2 4174bc3d5698SJohn Baldwin 4175bc3d5698SJohn Baldwin decl %r10d 4176bc3d5698SJohn Baldwin jnz .Loop_key128 4177bc3d5698SJohn Baldwin 4178bc3d5698SJohn Baldwin movdqa .Lkey_rcon1b(%rip),%xmm4 4179bc3d5698SJohn Baldwin 4180bc3d5698SJohn Baldwin.byte 102,15,56,0,197 4181bc3d5698SJohn Baldwin.byte 102,15,56,221,196 4182bc3d5698SJohn Baldwin pslld $1,%xmm4 4183bc3d5698SJohn Baldwin 4184bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 4185bc3d5698SJohn Baldwin pslldq $4,%xmm2 4186bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4187bc3d5698SJohn Baldwin pslldq $4,%xmm2 4188bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4189bc3d5698SJohn Baldwin pslldq $4,%xmm2 4190bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4191bc3d5698SJohn Baldwin 4192bc3d5698SJohn Baldwin pxor %xmm2,%xmm0 4193bc3d5698SJohn Baldwin movdqu %xmm0,(%rax) 4194bc3d5698SJohn Baldwin 4195bc3d5698SJohn Baldwin movdqa %xmm0,%xmm2 4196bc3d5698SJohn Baldwin.byte 102,15,56,0,197 4197bc3d5698SJohn Baldwin.byte 102,15,56,221,196 4198bc3d5698SJohn Baldwin 4199bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 4200bc3d5698SJohn Baldwin pslldq $4,%xmm2 4201bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4202bc3d5698SJohn Baldwin pslldq $4,%xmm2 4203bc3d5698SJohn Baldwin pxor %xmm2,%xmm3 4204bc3d5698SJohn Baldwin pslldq $4,%xmm2 4205bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4206bc3d5698SJohn Baldwin 4207bc3d5698SJohn Baldwin pxor %xmm2,%xmm0 4208bc3d5698SJohn Baldwin movdqu %xmm0,16(%rax) 4209bc3d5698SJohn Baldwin 4210bc3d5698SJohn Baldwin movl %esi,96(%rax) 4211bc3d5698SJohn Baldwin xorl %eax,%eax 4212bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4213bc3d5698SJohn Baldwin 4214bc3d5698SJohn Baldwin.align 16 4215bc3d5698SJohn Baldwin.L12rounds: 4216bc3d5698SJohn Baldwin movq 16(%rdi),%xmm2 4217bc3d5698SJohn Baldwin movl $11,%esi 4218bc3d5698SJohn Baldwin cmpl $268435456,%r10d 4219bc3d5698SJohn Baldwin je .L12rounds_alt 4220bc3d5698SJohn Baldwin 4221bc3d5698SJohn Baldwin movups %xmm0,(%rdx) 4222bc3d5698SJohn Baldwin.byte 102,15,58,223,202,1 4223bc3d5698SJohn Baldwin call .Lkey_expansion_192a_cold 4224bc3d5698SJohn Baldwin.byte 102,15,58,223,202,2 4225bc3d5698SJohn Baldwin call .Lkey_expansion_192b 4226bc3d5698SJohn Baldwin.byte 102,15,58,223,202,4 4227bc3d5698SJohn Baldwin call .Lkey_expansion_192a 4228bc3d5698SJohn Baldwin.byte 102,15,58,223,202,8 4229bc3d5698SJohn Baldwin call .Lkey_expansion_192b 4230bc3d5698SJohn Baldwin.byte 102,15,58,223,202,16 4231bc3d5698SJohn Baldwin call .Lkey_expansion_192a 4232bc3d5698SJohn Baldwin.byte 102,15,58,223,202,32 4233bc3d5698SJohn Baldwin call .Lkey_expansion_192b 4234bc3d5698SJohn Baldwin.byte 102,15,58,223,202,64 4235bc3d5698SJohn Baldwin call .Lkey_expansion_192a 4236bc3d5698SJohn Baldwin.byte 102,15,58,223,202,128 4237bc3d5698SJohn Baldwin call .Lkey_expansion_192b 4238bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4239bc3d5698SJohn Baldwin movl %esi,48(%rax) 4240bc3d5698SJohn Baldwin xorq %rax,%rax 4241bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4242bc3d5698SJohn Baldwin 4243bc3d5698SJohn Baldwin.align 16 4244bc3d5698SJohn Baldwin.L12rounds_alt: 4245bc3d5698SJohn Baldwin movdqa .Lkey_rotate192(%rip),%xmm5 4246bc3d5698SJohn Baldwin movdqa .Lkey_rcon1(%rip),%xmm4 4247bc3d5698SJohn Baldwin movl $8,%r10d 4248bc3d5698SJohn Baldwin movdqu %xmm0,(%rdx) 4249bc3d5698SJohn Baldwin jmp .Loop_key192 4250bc3d5698SJohn Baldwin 4251bc3d5698SJohn Baldwin.align 16 4252bc3d5698SJohn Baldwin.Loop_key192: 4253bc3d5698SJohn Baldwin movq %xmm2,0(%rax) 4254bc3d5698SJohn Baldwin movdqa %xmm2,%xmm1 4255bc3d5698SJohn Baldwin.byte 102,15,56,0,213 4256bc3d5698SJohn Baldwin.byte 102,15,56,221,212 4257bc3d5698SJohn Baldwin pslld $1,%xmm4 4258bc3d5698SJohn Baldwin leaq 24(%rax),%rax 4259bc3d5698SJohn Baldwin 4260bc3d5698SJohn Baldwin movdqa %xmm0,%xmm3 4261bc3d5698SJohn Baldwin pslldq $4,%xmm0 4262bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 4263bc3d5698SJohn Baldwin pslldq $4,%xmm0 4264bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 4265bc3d5698SJohn Baldwin pslldq $4,%xmm0 4266bc3d5698SJohn Baldwin pxor %xmm3,%xmm0 4267bc3d5698SJohn Baldwin 4268bc3d5698SJohn Baldwin pshufd $0xff,%xmm0,%xmm3 4269bc3d5698SJohn Baldwin pxor %xmm1,%xmm3 4270bc3d5698SJohn Baldwin pslldq $4,%xmm1 4271bc3d5698SJohn Baldwin pxor %xmm1,%xmm3 4272bc3d5698SJohn Baldwin 4273bc3d5698SJohn Baldwin pxor %xmm2,%xmm0 4274bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4275bc3d5698SJohn Baldwin movdqu %xmm0,-16(%rax) 4276bc3d5698SJohn Baldwin 4277bc3d5698SJohn Baldwin decl %r10d 4278bc3d5698SJohn Baldwin jnz .Loop_key192 4279bc3d5698SJohn Baldwin 4280bc3d5698SJohn Baldwin movl %esi,32(%rax) 4281bc3d5698SJohn Baldwin xorl %eax,%eax 4282bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4283bc3d5698SJohn Baldwin 4284bc3d5698SJohn Baldwin.align 16 4285bc3d5698SJohn Baldwin.L14rounds: 4286bc3d5698SJohn Baldwin movups 16(%rdi),%xmm2 4287bc3d5698SJohn Baldwin movl $13,%esi 4288bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4289bc3d5698SJohn Baldwin cmpl $268435456,%r10d 4290bc3d5698SJohn Baldwin je .L14rounds_alt 4291bc3d5698SJohn Baldwin 4292bc3d5698SJohn Baldwin movups %xmm0,(%rdx) 4293bc3d5698SJohn Baldwin movups %xmm2,16(%rdx) 4294bc3d5698SJohn Baldwin.byte 102,15,58,223,202,1 4295bc3d5698SJohn Baldwin call .Lkey_expansion_256a_cold 4296bc3d5698SJohn Baldwin.byte 102,15,58,223,200,1 4297bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4298bc3d5698SJohn Baldwin.byte 102,15,58,223,202,2 4299bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4300bc3d5698SJohn Baldwin.byte 102,15,58,223,200,2 4301bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4302bc3d5698SJohn Baldwin.byte 102,15,58,223,202,4 4303bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4304bc3d5698SJohn Baldwin.byte 102,15,58,223,200,4 4305bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4306bc3d5698SJohn Baldwin.byte 102,15,58,223,202,8 4307bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4308bc3d5698SJohn Baldwin.byte 102,15,58,223,200,8 4309bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4310bc3d5698SJohn Baldwin.byte 102,15,58,223,202,16 4311bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4312bc3d5698SJohn Baldwin.byte 102,15,58,223,200,16 4313bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4314bc3d5698SJohn Baldwin.byte 102,15,58,223,202,32 4315bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4316bc3d5698SJohn Baldwin.byte 102,15,58,223,200,32 4317bc3d5698SJohn Baldwin call .Lkey_expansion_256b 4318bc3d5698SJohn Baldwin.byte 102,15,58,223,202,64 4319bc3d5698SJohn Baldwin call .Lkey_expansion_256a 4320bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4321bc3d5698SJohn Baldwin movl %esi,16(%rax) 4322bc3d5698SJohn Baldwin xorq %rax,%rax 4323bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4324bc3d5698SJohn Baldwin 4325bc3d5698SJohn Baldwin.align 16 4326bc3d5698SJohn Baldwin.L14rounds_alt: 4327bc3d5698SJohn Baldwin movdqa .Lkey_rotate(%rip),%xmm5 4328bc3d5698SJohn Baldwin movdqa .Lkey_rcon1(%rip),%xmm4 4329bc3d5698SJohn Baldwin movl $7,%r10d 4330bc3d5698SJohn Baldwin movdqu %xmm0,0(%rdx) 4331bc3d5698SJohn Baldwin movdqa %xmm2,%xmm1 4332bc3d5698SJohn Baldwin movdqu %xmm2,16(%rdx) 4333bc3d5698SJohn Baldwin jmp .Loop_key256 4334bc3d5698SJohn Baldwin 4335bc3d5698SJohn Baldwin.align 16 4336bc3d5698SJohn Baldwin.Loop_key256: 4337bc3d5698SJohn Baldwin.byte 102,15,56,0,213 4338bc3d5698SJohn Baldwin.byte 102,15,56,221,212 4339bc3d5698SJohn Baldwin 4340bc3d5698SJohn Baldwin movdqa %xmm0,%xmm3 4341bc3d5698SJohn Baldwin pslldq $4,%xmm0 4342bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 4343bc3d5698SJohn Baldwin pslldq $4,%xmm0 4344bc3d5698SJohn Baldwin pxor %xmm0,%xmm3 4345bc3d5698SJohn Baldwin pslldq $4,%xmm0 4346bc3d5698SJohn Baldwin pxor %xmm3,%xmm0 4347bc3d5698SJohn Baldwin pslld $1,%xmm4 4348bc3d5698SJohn Baldwin 4349bc3d5698SJohn Baldwin pxor %xmm2,%xmm0 4350bc3d5698SJohn Baldwin movdqu %xmm0,(%rax) 4351bc3d5698SJohn Baldwin 4352bc3d5698SJohn Baldwin decl %r10d 4353bc3d5698SJohn Baldwin jz .Ldone_key256 4354bc3d5698SJohn Baldwin 4355bc3d5698SJohn Baldwin pshufd $0xff,%xmm0,%xmm2 4356bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 4357bc3d5698SJohn Baldwin.byte 102,15,56,221,211 4358bc3d5698SJohn Baldwin 4359bc3d5698SJohn Baldwin movdqa %xmm1,%xmm3 4360bc3d5698SJohn Baldwin pslldq $4,%xmm1 4361bc3d5698SJohn Baldwin pxor %xmm1,%xmm3 4362bc3d5698SJohn Baldwin pslldq $4,%xmm1 4363bc3d5698SJohn Baldwin pxor %xmm1,%xmm3 4364bc3d5698SJohn Baldwin pslldq $4,%xmm1 4365bc3d5698SJohn Baldwin pxor %xmm3,%xmm1 4366bc3d5698SJohn Baldwin 4367bc3d5698SJohn Baldwin pxor %xmm1,%xmm2 4368bc3d5698SJohn Baldwin movdqu %xmm2,16(%rax) 4369bc3d5698SJohn Baldwin leaq 32(%rax),%rax 4370bc3d5698SJohn Baldwin movdqa %xmm2,%xmm1 4371bc3d5698SJohn Baldwin 4372bc3d5698SJohn Baldwin jmp .Loop_key256 4373bc3d5698SJohn Baldwin 4374bc3d5698SJohn Baldwin.Ldone_key256: 4375bc3d5698SJohn Baldwin movl %esi,16(%rax) 4376bc3d5698SJohn Baldwin xorl %eax,%eax 4377bc3d5698SJohn Baldwin jmp .Lenc_key_ret 4378bc3d5698SJohn Baldwin 4379bc3d5698SJohn Baldwin.align 16 4380bc3d5698SJohn Baldwin.Lbad_keybits: 4381bc3d5698SJohn Baldwin movq $-2,%rax 4382bc3d5698SJohn Baldwin.Lenc_key_ret: 4383bc3d5698SJohn Baldwin pxor %xmm0,%xmm0 4384bc3d5698SJohn Baldwin pxor %xmm1,%xmm1 4385bc3d5698SJohn Baldwin pxor %xmm2,%xmm2 4386bc3d5698SJohn Baldwin pxor %xmm3,%xmm3 4387bc3d5698SJohn Baldwin pxor %xmm4,%xmm4 4388bc3d5698SJohn Baldwin pxor %xmm5,%xmm5 4389bc3d5698SJohn Baldwin addq $8,%rsp 4390bc3d5698SJohn Baldwin.cfi_adjust_cfa_offset -8 4391bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4392bc3d5698SJohn Baldwin.LSEH_end_set_encrypt_key: 4393bc3d5698SJohn Baldwin 4394bc3d5698SJohn Baldwin.align 16 4395bc3d5698SJohn Baldwin.Lkey_expansion_128: 4396bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4397bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4398bc3d5698SJohn Baldwin.Lkey_expansion_128_cold: 4399bc3d5698SJohn Baldwin shufps $16,%xmm0,%xmm4 4400bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4401bc3d5698SJohn Baldwin shufps $140,%xmm0,%xmm4 4402bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4403bc3d5698SJohn Baldwin shufps $255,%xmm1,%xmm1 4404bc3d5698SJohn Baldwin xorps %xmm1,%xmm0 4405bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4406bc3d5698SJohn Baldwin 4407bc3d5698SJohn Baldwin.align 16 4408bc3d5698SJohn Baldwin.Lkey_expansion_192a: 4409bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4410bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4411bc3d5698SJohn Baldwin.Lkey_expansion_192a_cold: 4412bc3d5698SJohn Baldwin movaps %xmm2,%xmm5 4413bc3d5698SJohn Baldwin.Lkey_expansion_192b_warm: 4414bc3d5698SJohn Baldwin shufps $16,%xmm0,%xmm4 4415bc3d5698SJohn Baldwin movdqa %xmm2,%xmm3 4416bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4417bc3d5698SJohn Baldwin shufps $140,%xmm0,%xmm4 4418bc3d5698SJohn Baldwin pslldq $4,%xmm3 4419bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4420bc3d5698SJohn Baldwin pshufd $85,%xmm1,%xmm1 4421bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4422bc3d5698SJohn Baldwin pxor %xmm1,%xmm0 4423bc3d5698SJohn Baldwin pshufd $255,%xmm0,%xmm3 4424bc3d5698SJohn Baldwin pxor %xmm3,%xmm2 4425bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4426bc3d5698SJohn Baldwin 4427bc3d5698SJohn Baldwin.align 16 4428bc3d5698SJohn Baldwin.Lkey_expansion_192b: 4429bc3d5698SJohn Baldwin movaps %xmm0,%xmm3 4430bc3d5698SJohn Baldwin shufps $68,%xmm0,%xmm5 4431bc3d5698SJohn Baldwin movups %xmm5,(%rax) 4432bc3d5698SJohn Baldwin shufps $78,%xmm2,%xmm3 4433bc3d5698SJohn Baldwin movups %xmm3,16(%rax) 4434bc3d5698SJohn Baldwin leaq 32(%rax),%rax 4435bc3d5698SJohn Baldwin jmp .Lkey_expansion_192b_warm 4436bc3d5698SJohn Baldwin 4437bc3d5698SJohn Baldwin.align 16 4438bc3d5698SJohn Baldwin.Lkey_expansion_256a: 4439bc3d5698SJohn Baldwin movups %xmm2,(%rax) 4440bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4441bc3d5698SJohn Baldwin.Lkey_expansion_256a_cold: 4442bc3d5698SJohn Baldwin shufps $16,%xmm0,%xmm4 4443bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4444bc3d5698SJohn Baldwin shufps $140,%xmm0,%xmm4 4445bc3d5698SJohn Baldwin xorps %xmm4,%xmm0 4446bc3d5698SJohn Baldwin shufps $255,%xmm1,%xmm1 4447bc3d5698SJohn Baldwin xorps %xmm1,%xmm0 4448bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4449bc3d5698SJohn Baldwin 4450bc3d5698SJohn Baldwin.align 16 4451bc3d5698SJohn Baldwin.Lkey_expansion_256b: 4452bc3d5698SJohn Baldwin movups %xmm0,(%rax) 4453bc3d5698SJohn Baldwin leaq 16(%rax),%rax 4454bc3d5698SJohn Baldwin 4455bc3d5698SJohn Baldwin shufps $16,%xmm2,%xmm4 4456bc3d5698SJohn Baldwin xorps %xmm4,%xmm2 4457bc3d5698SJohn Baldwin shufps $140,%xmm2,%xmm4 4458bc3d5698SJohn Baldwin xorps %xmm4,%xmm2 4459bc3d5698SJohn Baldwin shufps $170,%xmm1,%xmm1 4460bc3d5698SJohn Baldwin xorps %xmm1,%xmm2 4461bc3d5698SJohn Baldwin .byte 0xf3,0xc3 4462bc3d5698SJohn Baldwin.cfi_endproc 4463bc3d5698SJohn Baldwin.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 4464bc3d5698SJohn Baldwin.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 4465bc3d5698SJohn Baldwin.align 64 4466bc3d5698SJohn Baldwin.Lbswap_mask: 4467bc3d5698SJohn Baldwin.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4468bc3d5698SJohn Baldwin.Lincrement32: 4469bc3d5698SJohn Baldwin.long 6,6,6,0 4470bc3d5698SJohn Baldwin.Lincrement64: 4471bc3d5698SJohn Baldwin.long 1,0,0,0 4472bc3d5698SJohn Baldwin.Lxts_magic: 4473bc3d5698SJohn Baldwin.long 0x87,0,1,0 4474bc3d5698SJohn Baldwin.Lincrement1: 4475bc3d5698SJohn Baldwin.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4476bc3d5698SJohn Baldwin.Lkey_rotate: 4477bc3d5698SJohn Baldwin.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4478bc3d5698SJohn Baldwin.Lkey_rotate192: 4479bc3d5698SJohn Baldwin.long 0x04070605,0x04070605,0x04070605,0x04070605 4480bc3d5698SJohn Baldwin.Lkey_rcon1: 4481bc3d5698SJohn Baldwin.long 1,1,1,1 4482bc3d5698SJohn Baldwin.Lkey_rcon1b: 4483bc3d5698SJohn Baldwin.long 0x1b,0x1b,0x1b,0x1b 4484bc3d5698SJohn Baldwin 4485bc3d5698SJohn Baldwin.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4486bc3d5698SJohn Baldwin.align 64 4487*c0855eaaSJohn Baldwin .section ".note.gnu.property", "a" 4488*c0855eaaSJohn Baldwin .p2align 3 4489*c0855eaaSJohn Baldwin .long 1f - 0f 4490*c0855eaaSJohn Baldwin .long 4f - 1f 4491*c0855eaaSJohn Baldwin .long 5 4492*c0855eaaSJohn Baldwin0: 4493*c0855eaaSJohn Baldwin # "GNU" encoded with .byte, since .asciz isn't supported 4494*c0855eaaSJohn Baldwin # on Solaris. 4495*c0855eaaSJohn Baldwin .byte 0x47 4496*c0855eaaSJohn Baldwin .byte 0x4e 4497*c0855eaaSJohn Baldwin .byte 0x55 4498*c0855eaaSJohn Baldwin .byte 0 4499*c0855eaaSJohn Baldwin1: 4500*c0855eaaSJohn Baldwin .p2align 3 4501*c0855eaaSJohn Baldwin .long 0xc0000002 4502*c0855eaaSJohn Baldwin .long 3f - 2f 4503*c0855eaaSJohn Baldwin2: 4504*c0855eaaSJohn Baldwin .long 3 4505*c0855eaaSJohn Baldwin3: 4506*c0855eaaSJohn Baldwin .p2align 3 4507*c0855eaaSJohn Baldwin4: 4508