1#if defined(__x86_64__) 2 3#include "llvm_blake3_prefix.h" 4 5#if defined(__ELF__) && !(defined(__sun__) && defined(__svr4__)) 6.section .note.GNU-stack,"",%progbits 7#endif 8 9#if defined(__ELF__) && defined(__CET__) && defined(__has_include) 10#if __has_include(<cet.h>) 11#include <cet.h> 12#endif 13#endif 14 15#if !defined(_CET_ENDBR) 16#define _CET_ENDBR 17#endif 18 19#ifdef __APPLE__ 20#define HIDDEN .private_extern 21#else 22#define HIDDEN .hidden 23#endif 24 25.intel_syntax noprefix 26HIDDEN blake3_hash_many_sse41 27HIDDEN _blake3_hash_many_sse41 28HIDDEN blake3_compress_in_place_sse41 29HIDDEN _blake3_compress_in_place_sse41 30HIDDEN blake3_compress_xof_sse41 31HIDDEN _blake3_compress_xof_sse41 32.global blake3_hash_many_sse41 33.global _blake3_hash_many_sse41 34.global blake3_compress_in_place_sse41 35.global _blake3_compress_in_place_sse41 36.global blake3_compress_xof_sse41 37.global _blake3_compress_xof_sse41 38#ifdef __APPLE__ 39.text 40#else 41.section .text 42#endif 43 .p2align 6 44_blake3_hash_many_sse41: 45blake3_hash_many_sse41: 46 _CET_ENDBR 47 push r15 48 push r14 49 push r13 50 push r12 51 push rbx 52 push rbp 53 mov rbp, rsp 54 sub rsp, 360 55 and rsp, 0xFFFFFFFFFFFFFFC0 56 neg r9d 57 movd xmm0, r9d 58 pshufd xmm0, xmm0, 0x00 59 movdqa xmmword ptr [rsp+0x130], xmm0 60 movdqa xmm1, xmm0 61 pand xmm1, xmmword ptr [ADD0+rip] 62 pand xmm0, xmmword ptr [ADD1+rip] 63 movdqa xmmword ptr [rsp+0x150], xmm0 64 movd xmm0, r8d 65 pshufd xmm0, xmm0, 0x00 66 paddd xmm0, xmm1 67 movdqa xmmword ptr [rsp+0x110], xmm0 68 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 69 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 70 pcmpgtd xmm1, xmm0 71 shr r8, 32 72 movd xmm2, r8d 73 pshufd xmm2, xmm2, 0x00 74 psubd xmm2, xmm1 75 movdqa xmmword ptr [rsp+0x120], xmm2 76 mov rbx, qword ptr [rbp+0x50] 77 mov r15, rdx 78 shl r15, 6 79 movzx r13d, byte ptr [rbp+0x38] 80 movzx r12d, byte ptr [rbp+0x48] 81 cmp rsi, 4 82 jc 3f 832: 84 movdqu xmm3, xmmword ptr [rcx] 85 pshufd xmm0, xmm3, 0x00 86 pshufd xmm1, xmm3, 0x55 87 pshufd xmm2, xmm3, 0xAA 88 pshufd xmm3, xmm3, 0xFF 89 movdqu xmm7, xmmword ptr [rcx+0x10] 90 pshufd xmm4, xmm7, 0x00 91 pshufd xmm5, xmm7, 0x55 92 pshufd xmm6, xmm7, 0xAA 93 pshufd xmm7, xmm7, 0xFF 94 mov r8, qword ptr [rdi] 95 mov r9, qword ptr [rdi+0x8] 96 mov r10, qword ptr [rdi+0x10] 97 mov r11, qword ptr [rdi+0x18] 98 movzx eax, byte ptr [rbp+0x40] 99 or eax, r13d 100 xor edx, edx 1019: 102 mov r14d, eax 103 or eax, r12d 104 add rdx, 64 105 cmp rdx, r15 106 cmovne eax, r14d 107 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 108 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 109 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 110 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 111 movdqa xmm12, xmm8 112 punpckldq xmm8, xmm9 113 punpckhdq xmm12, xmm9 114 movdqa xmm14, xmm10 115 punpckldq xmm10, xmm11 116 punpckhdq xmm14, xmm11 117 movdqa xmm9, xmm8 118 punpcklqdq xmm8, xmm10 119 punpckhqdq xmm9, xmm10 120 movdqa xmm13, xmm12 121 punpcklqdq xmm12, xmm14 122 punpckhqdq xmm13, xmm14 123 movdqa xmmword ptr [rsp], xmm8 124 movdqa xmmword ptr [rsp+0x10], xmm9 125 movdqa xmmword ptr [rsp+0x20], xmm12 126 movdqa xmmword ptr [rsp+0x30], xmm13 127 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 128 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 129 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 130 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 131 movdqa xmm12, xmm8 132 punpckldq xmm8, xmm9 133 punpckhdq xmm12, xmm9 134 movdqa xmm14, xmm10 135 punpckldq xmm10, xmm11 136 punpckhdq xmm14, xmm11 137 movdqa xmm9, xmm8 138 punpcklqdq xmm8, xmm10 139 punpckhqdq xmm9, xmm10 140 movdqa xmm13, xmm12 141 punpcklqdq xmm12, xmm14 142 punpckhqdq xmm13, xmm14 143 movdqa xmmword ptr [rsp+0x40], xmm8 144 movdqa xmmword ptr [rsp+0x50], xmm9 145 movdqa xmmword ptr [rsp+0x60], xmm12 146 movdqa xmmword ptr [rsp+0x70], xmm13 147 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 148 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 149 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 150 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 151 movdqa xmm12, xmm8 152 punpckldq xmm8, xmm9 153 punpckhdq xmm12, xmm9 154 movdqa xmm14, xmm10 155 punpckldq xmm10, xmm11 156 punpckhdq xmm14, xmm11 157 movdqa xmm9, xmm8 158 punpcklqdq xmm8, xmm10 159 punpckhqdq xmm9, xmm10 160 movdqa xmm13, xmm12 161 punpcklqdq xmm12, xmm14 162 punpckhqdq xmm13, xmm14 163 movdqa xmmword ptr [rsp+0x80], xmm8 164 movdqa xmmword ptr [rsp+0x90], xmm9 165 movdqa xmmword ptr [rsp+0xA0], xmm12 166 movdqa xmmword ptr [rsp+0xB0], xmm13 167 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 168 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 169 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 170 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 171 movdqa xmm12, xmm8 172 punpckldq xmm8, xmm9 173 punpckhdq xmm12, xmm9 174 movdqa xmm14, xmm10 175 punpckldq xmm10, xmm11 176 punpckhdq xmm14, xmm11 177 movdqa xmm9, xmm8 178 punpcklqdq xmm8, xmm10 179 punpckhqdq xmm9, xmm10 180 movdqa xmm13, xmm12 181 punpcklqdq xmm12, xmm14 182 punpckhqdq xmm13, xmm14 183 movdqa xmmword ptr [rsp+0xC0], xmm8 184 movdqa xmmword ptr [rsp+0xD0], xmm9 185 movdqa xmmword ptr [rsp+0xE0], xmm12 186 movdqa xmmword ptr [rsp+0xF0], xmm13 187 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 188 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 189 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 190 movdqa xmm12, xmmword ptr [rsp+0x110] 191 movdqa xmm13, xmmword ptr [rsp+0x120] 192 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 193 movd xmm15, eax 194 pshufd xmm15, xmm15, 0x00 195 prefetcht0 [r8+rdx+0x80] 196 prefetcht0 [r9+rdx+0x80] 197 prefetcht0 [r10+rdx+0x80] 198 prefetcht0 [r11+rdx+0x80] 199 paddd xmm0, xmmword ptr [rsp] 200 paddd xmm1, xmmword ptr [rsp+0x20] 201 paddd xmm2, xmmword ptr [rsp+0x40] 202 paddd xmm3, xmmword ptr [rsp+0x60] 203 paddd xmm0, xmm4 204 paddd xmm1, xmm5 205 paddd xmm2, xmm6 206 paddd xmm3, xmm7 207 pxor xmm12, xmm0 208 pxor xmm13, xmm1 209 pxor xmm14, xmm2 210 pxor xmm15, xmm3 211 movdqa xmm8, xmmword ptr [ROT16+rip] 212 pshufb xmm12, xmm8 213 pshufb xmm13, xmm8 214 pshufb xmm14, xmm8 215 pshufb xmm15, xmm8 216 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 217 paddd xmm8, xmm12 218 paddd xmm9, xmm13 219 paddd xmm10, xmm14 220 paddd xmm11, xmm15 221 pxor xmm4, xmm8 222 pxor xmm5, xmm9 223 pxor xmm6, xmm10 224 pxor xmm7, xmm11 225 movdqa xmmword ptr [rsp+0x100], xmm8 226 movdqa xmm8, xmm4 227 psrld xmm8, 12 228 pslld xmm4, 20 229 por xmm4, xmm8 230 movdqa xmm8, xmm5 231 psrld xmm8, 12 232 pslld xmm5, 20 233 por xmm5, xmm8 234 movdqa xmm8, xmm6 235 psrld xmm8, 12 236 pslld xmm6, 20 237 por xmm6, xmm8 238 movdqa xmm8, xmm7 239 psrld xmm8, 12 240 pslld xmm7, 20 241 por xmm7, xmm8 242 paddd xmm0, xmmword ptr [rsp+0x10] 243 paddd xmm1, xmmword ptr [rsp+0x30] 244 paddd xmm2, xmmword ptr [rsp+0x50] 245 paddd xmm3, xmmword ptr [rsp+0x70] 246 paddd xmm0, xmm4 247 paddd xmm1, xmm5 248 paddd xmm2, xmm6 249 paddd xmm3, xmm7 250 pxor xmm12, xmm0 251 pxor xmm13, xmm1 252 pxor xmm14, xmm2 253 pxor xmm15, xmm3 254 movdqa xmm8, xmmword ptr [ROT8+rip] 255 pshufb xmm12, xmm8 256 pshufb xmm13, xmm8 257 pshufb xmm14, xmm8 258 pshufb xmm15, xmm8 259 movdqa xmm8, xmmword ptr [rsp+0x100] 260 paddd xmm8, xmm12 261 paddd xmm9, xmm13 262 paddd xmm10, xmm14 263 paddd xmm11, xmm15 264 pxor xmm4, xmm8 265 pxor xmm5, xmm9 266 pxor xmm6, xmm10 267 pxor xmm7, xmm11 268 movdqa xmmword ptr [rsp+0x100], xmm8 269 movdqa xmm8, xmm4 270 psrld xmm8, 7 271 pslld xmm4, 25 272 por xmm4, xmm8 273 movdqa xmm8, xmm5 274 psrld xmm8, 7 275 pslld xmm5, 25 276 por xmm5, xmm8 277 movdqa xmm8, xmm6 278 psrld xmm8, 7 279 pslld xmm6, 25 280 por xmm6, xmm8 281 movdqa xmm8, xmm7 282 psrld xmm8, 7 283 pslld xmm7, 25 284 por xmm7, xmm8 285 paddd xmm0, xmmword ptr [rsp+0x80] 286 paddd xmm1, xmmword ptr [rsp+0xA0] 287 paddd xmm2, xmmword ptr [rsp+0xC0] 288 paddd xmm3, xmmword ptr [rsp+0xE0] 289 paddd xmm0, xmm5 290 paddd xmm1, xmm6 291 paddd xmm2, xmm7 292 paddd xmm3, xmm4 293 pxor xmm15, xmm0 294 pxor xmm12, xmm1 295 pxor xmm13, xmm2 296 pxor xmm14, xmm3 297 movdqa xmm8, xmmword ptr [ROT16+rip] 298 pshufb xmm15, xmm8 299 pshufb xmm12, xmm8 300 pshufb xmm13, xmm8 301 pshufb xmm14, xmm8 302 paddd xmm10, xmm15 303 paddd xmm11, xmm12 304 movdqa xmm8, xmmword ptr [rsp+0x100] 305 paddd xmm8, xmm13 306 paddd xmm9, xmm14 307 pxor xmm5, xmm10 308 pxor xmm6, xmm11 309 pxor xmm7, xmm8 310 pxor xmm4, xmm9 311 movdqa xmmword ptr [rsp+0x100], xmm8 312 movdqa xmm8, xmm5 313 psrld xmm8, 12 314 pslld xmm5, 20 315 por xmm5, xmm8 316 movdqa xmm8, xmm6 317 psrld xmm8, 12 318 pslld xmm6, 20 319 por xmm6, xmm8 320 movdqa xmm8, xmm7 321 psrld xmm8, 12 322 pslld xmm7, 20 323 por xmm7, xmm8 324 movdqa xmm8, xmm4 325 psrld xmm8, 12 326 pslld xmm4, 20 327 por xmm4, xmm8 328 paddd xmm0, xmmword ptr [rsp+0x90] 329 paddd xmm1, xmmword ptr [rsp+0xB0] 330 paddd xmm2, xmmword ptr [rsp+0xD0] 331 paddd xmm3, xmmword ptr [rsp+0xF0] 332 paddd xmm0, xmm5 333 paddd xmm1, xmm6 334 paddd xmm2, xmm7 335 paddd xmm3, xmm4 336 pxor xmm15, xmm0 337 pxor xmm12, xmm1 338 pxor xmm13, xmm2 339 pxor xmm14, xmm3 340 movdqa xmm8, xmmword ptr [ROT8+rip] 341 pshufb xmm15, xmm8 342 pshufb xmm12, xmm8 343 pshufb xmm13, xmm8 344 pshufb xmm14, xmm8 345 paddd xmm10, xmm15 346 paddd xmm11, xmm12 347 movdqa xmm8, xmmword ptr [rsp+0x100] 348 paddd xmm8, xmm13 349 paddd xmm9, xmm14 350 pxor xmm5, xmm10 351 pxor xmm6, xmm11 352 pxor xmm7, xmm8 353 pxor xmm4, xmm9 354 movdqa xmmword ptr [rsp+0x100], xmm8 355 movdqa xmm8, xmm5 356 psrld xmm8, 7 357 pslld xmm5, 25 358 por xmm5, xmm8 359 movdqa xmm8, xmm6 360 psrld xmm8, 7 361 pslld xmm6, 25 362 por xmm6, xmm8 363 movdqa xmm8, xmm7 364 psrld xmm8, 7 365 pslld xmm7, 25 366 por xmm7, xmm8 367 movdqa xmm8, xmm4 368 psrld xmm8, 7 369 pslld xmm4, 25 370 por xmm4, xmm8 371 paddd xmm0, xmmword ptr [rsp+0x20] 372 paddd xmm1, xmmword ptr [rsp+0x30] 373 paddd xmm2, xmmword ptr [rsp+0x70] 374 paddd xmm3, xmmword ptr [rsp+0x40] 375 paddd xmm0, xmm4 376 paddd xmm1, xmm5 377 paddd xmm2, xmm6 378 paddd xmm3, xmm7 379 pxor xmm12, xmm0 380 pxor xmm13, xmm1 381 pxor xmm14, xmm2 382 pxor xmm15, xmm3 383 movdqa xmm8, xmmword ptr [ROT16+rip] 384 pshufb xmm12, xmm8 385 pshufb xmm13, xmm8 386 pshufb xmm14, xmm8 387 pshufb xmm15, xmm8 388 movdqa xmm8, xmmword ptr [rsp+0x100] 389 paddd xmm8, xmm12 390 paddd xmm9, xmm13 391 paddd xmm10, xmm14 392 paddd xmm11, xmm15 393 pxor xmm4, xmm8 394 pxor xmm5, xmm9 395 pxor xmm6, xmm10 396 pxor xmm7, xmm11 397 movdqa xmmword ptr [rsp+0x100], xmm8 398 movdqa xmm8, xmm4 399 psrld xmm8, 12 400 pslld xmm4, 20 401 por xmm4, xmm8 402 movdqa xmm8, xmm5 403 psrld xmm8, 12 404 pslld xmm5, 20 405 por xmm5, xmm8 406 movdqa xmm8, xmm6 407 psrld xmm8, 12 408 pslld xmm6, 20 409 por xmm6, xmm8 410 movdqa xmm8, xmm7 411 psrld xmm8, 12 412 pslld xmm7, 20 413 por xmm7, xmm8 414 paddd xmm0, xmmword ptr [rsp+0x60] 415 paddd xmm1, xmmword ptr [rsp+0xA0] 416 paddd xmm2, xmmword ptr [rsp] 417 paddd xmm3, xmmword ptr [rsp+0xD0] 418 paddd xmm0, xmm4 419 paddd xmm1, xmm5 420 paddd xmm2, xmm6 421 paddd xmm3, xmm7 422 pxor xmm12, xmm0 423 pxor xmm13, xmm1 424 pxor xmm14, xmm2 425 pxor xmm15, xmm3 426 movdqa xmm8, xmmword ptr [ROT8+rip] 427 pshufb xmm12, xmm8 428 pshufb xmm13, xmm8 429 pshufb xmm14, xmm8 430 pshufb xmm15, xmm8 431 movdqa xmm8, xmmword ptr [rsp+0x100] 432 paddd xmm8, xmm12 433 paddd xmm9, xmm13 434 paddd xmm10, xmm14 435 paddd xmm11, xmm15 436 pxor xmm4, xmm8 437 pxor xmm5, xmm9 438 pxor xmm6, xmm10 439 pxor xmm7, xmm11 440 movdqa xmmword ptr [rsp+0x100], xmm8 441 movdqa xmm8, xmm4 442 psrld xmm8, 7 443 pslld xmm4, 25 444 por xmm4, xmm8 445 movdqa xmm8, xmm5 446 psrld xmm8, 7 447 pslld xmm5, 25 448 por xmm5, xmm8 449 movdqa xmm8, xmm6 450 psrld xmm8, 7 451 pslld xmm6, 25 452 por xmm6, xmm8 453 movdqa xmm8, xmm7 454 psrld xmm8, 7 455 pslld xmm7, 25 456 por xmm7, xmm8 457 paddd xmm0, xmmword ptr [rsp+0x10] 458 paddd xmm1, xmmword ptr [rsp+0xC0] 459 paddd xmm2, xmmword ptr [rsp+0x90] 460 paddd xmm3, xmmword ptr [rsp+0xF0] 461 paddd xmm0, xmm5 462 paddd xmm1, xmm6 463 paddd xmm2, xmm7 464 paddd xmm3, xmm4 465 pxor xmm15, xmm0 466 pxor xmm12, xmm1 467 pxor xmm13, xmm2 468 pxor xmm14, xmm3 469 movdqa xmm8, xmmword ptr [ROT16+rip] 470 pshufb xmm15, xmm8 471 pshufb xmm12, xmm8 472 pshufb xmm13, xmm8 473 pshufb xmm14, xmm8 474 paddd xmm10, xmm15 475 paddd xmm11, xmm12 476 movdqa xmm8, xmmword ptr [rsp+0x100] 477 paddd xmm8, xmm13 478 paddd xmm9, xmm14 479 pxor xmm5, xmm10 480 pxor xmm6, xmm11 481 pxor xmm7, xmm8 482 pxor xmm4, xmm9 483 movdqa xmmword ptr [rsp+0x100], xmm8 484 movdqa xmm8, xmm5 485 psrld xmm8, 12 486 pslld xmm5, 20 487 por xmm5, xmm8 488 movdqa xmm8, xmm6 489 psrld xmm8, 12 490 pslld xmm6, 20 491 por xmm6, xmm8 492 movdqa xmm8, xmm7 493 psrld xmm8, 12 494 pslld xmm7, 20 495 por xmm7, xmm8 496 movdqa xmm8, xmm4 497 psrld xmm8, 12 498 pslld xmm4, 20 499 por xmm4, xmm8 500 paddd xmm0, xmmword ptr [rsp+0xB0] 501 paddd xmm1, xmmword ptr [rsp+0x50] 502 paddd xmm2, xmmword ptr [rsp+0xE0] 503 paddd xmm3, xmmword ptr [rsp+0x80] 504 paddd xmm0, xmm5 505 paddd xmm1, xmm6 506 paddd xmm2, xmm7 507 paddd xmm3, xmm4 508 pxor xmm15, xmm0 509 pxor xmm12, xmm1 510 pxor xmm13, xmm2 511 pxor xmm14, xmm3 512 movdqa xmm8, xmmword ptr [ROT8+rip] 513 pshufb xmm15, xmm8 514 pshufb xmm12, xmm8 515 pshufb xmm13, xmm8 516 pshufb xmm14, xmm8 517 paddd xmm10, xmm15 518 paddd xmm11, xmm12 519 movdqa xmm8, xmmword ptr [rsp+0x100] 520 paddd xmm8, xmm13 521 paddd xmm9, xmm14 522 pxor xmm5, xmm10 523 pxor xmm6, xmm11 524 pxor xmm7, xmm8 525 pxor xmm4, xmm9 526 movdqa xmmword ptr [rsp+0x100], xmm8 527 movdqa xmm8, xmm5 528 psrld xmm8, 7 529 pslld xmm5, 25 530 por xmm5, xmm8 531 movdqa xmm8, xmm6 532 psrld xmm8, 7 533 pslld xmm6, 25 534 por xmm6, xmm8 535 movdqa xmm8, xmm7 536 psrld xmm8, 7 537 pslld xmm7, 25 538 por xmm7, xmm8 539 movdqa xmm8, xmm4 540 psrld xmm8, 7 541 pslld xmm4, 25 542 por xmm4, xmm8 543 paddd xmm0, xmmword ptr [rsp+0x30] 544 paddd xmm1, xmmword ptr [rsp+0xA0] 545 paddd xmm2, xmmword ptr [rsp+0xD0] 546 paddd xmm3, xmmword ptr [rsp+0x70] 547 paddd xmm0, xmm4 548 paddd xmm1, xmm5 549 paddd xmm2, xmm6 550 paddd xmm3, xmm7 551 pxor xmm12, xmm0 552 pxor xmm13, xmm1 553 pxor xmm14, xmm2 554 pxor xmm15, xmm3 555 movdqa xmm8, xmmword ptr [ROT16+rip] 556 pshufb xmm12, xmm8 557 pshufb xmm13, xmm8 558 pshufb xmm14, xmm8 559 pshufb xmm15, xmm8 560 movdqa xmm8, xmmword ptr [rsp+0x100] 561 paddd xmm8, xmm12 562 paddd xmm9, xmm13 563 paddd xmm10, xmm14 564 paddd xmm11, xmm15 565 pxor xmm4, xmm8 566 pxor xmm5, xmm9 567 pxor xmm6, xmm10 568 pxor xmm7, xmm11 569 movdqa xmmword ptr [rsp+0x100], xmm8 570 movdqa xmm8, xmm4 571 psrld xmm8, 12 572 pslld xmm4, 20 573 por xmm4, xmm8 574 movdqa xmm8, xmm5 575 psrld xmm8, 12 576 pslld xmm5, 20 577 por xmm5, xmm8 578 movdqa xmm8, xmm6 579 psrld xmm8, 12 580 pslld xmm6, 20 581 por xmm6, xmm8 582 movdqa xmm8, xmm7 583 psrld xmm8, 12 584 pslld xmm7, 20 585 por xmm7, xmm8 586 paddd xmm0, xmmword ptr [rsp+0x40] 587 paddd xmm1, xmmword ptr [rsp+0xC0] 588 paddd xmm2, xmmword ptr [rsp+0x20] 589 paddd xmm3, xmmword ptr [rsp+0xE0] 590 paddd xmm0, xmm4 591 paddd xmm1, xmm5 592 paddd xmm2, xmm6 593 paddd xmm3, xmm7 594 pxor xmm12, xmm0 595 pxor xmm13, xmm1 596 pxor xmm14, xmm2 597 pxor xmm15, xmm3 598 movdqa xmm8, xmmword ptr [ROT8+rip] 599 pshufb xmm12, xmm8 600 pshufb xmm13, xmm8 601 pshufb xmm14, xmm8 602 pshufb xmm15, xmm8 603 movdqa xmm8, xmmword ptr [rsp+0x100] 604 paddd xmm8, xmm12 605 paddd xmm9, xmm13 606 paddd xmm10, xmm14 607 paddd xmm11, xmm15 608 pxor xmm4, xmm8 609 pxor xmm5, xmm9 610 pxor xmm6, xmm10 611 pxor xmm7, xmm11 612 movdqa xmmword ptr [rsp+0x100], xmm8 613 movdqa xmm8, xmm4 614 psrld xmm8, 7 615 pslld xmm4, 25 616 por xmm4, xmm8 617 movdqa xmm8, xmm5 618 psrld xmm8, 7 619 pslld xmm5, 25 620 por xmm5, xmm8 621 movdqa xmm8, xmm6 622 psrld xmm8, 7 623 pslld xmm6, 25 624 por xmm6, xmm8 625 movdqa xmm8, xmm7 626 psrld xmm8, 7 627 pslld xmm7, 25 628 por xmm7, xmm8 629 paddd xmm0, xmmword ptr [rsp+0x60] 630 paddd xmm1, xmmword ptr [rsp+0x90] 631 paddd xmm2, xmmword ptr [rsp+0xB0] 632 paddd xmm3, xmmword ptr [rsp+0x80] 633 paddd xmm0, xmm5 634 paddd xmm1, xmm6 635 paddd xmm2, xmm7 636 paddd xmm3, xmm4 637 pxor xmm15, xmm0 638 pxor xmm12, xmm1 639 pxor xmm13, xmm2 640 pxor xmm14, xmm3 641 movdqa xmm8, xmmword ptr [ROT16+rip] 642 pshufb xmm15, xmm8 643 pshufb xmm12, xmm8 644 pshufb xmm13, xmm8 645 pshufb xmm14, xmm8 646 paddd xmm10, xmm15 647 paddd xmm11, xmm12 648 movdqa xmm8, xmmword ptr [rsp+0x100] 649 paddd xmm8, xmm13 650 paddd xmm9, xmm14 651 pxor xmm5, xmm10 652 pxor xmm6, xmm11 653 pxor xmm7, xmm8 654 pxor xmm4, xmm9 655 movdqa xmmword ptr [rsp+0x100], xmm8 656 movdqa xmm8, xmm5 657 psrld xmm8, 12 658 pslld xmm5, 20 659 por xmm5, xmm8 660 movdqa xmm8, xmm6 661 psrld xmm8, 12 662 pslld xmm6, 20 663 por xmm6, xmm8 664 movdqa xmm8, xmm7 665 psrld xmm8, 12 666 pslld xmm7, 20 667 por xmm7, xmm8 668 movdqa xmm8, xmm4 669 psrld xmm8, 12 670 pslld xmm4, 20 671 por xmm4, xmm8 672 paddd xmm0, xmmword ptr [rsp+0x50] 673 paddd xmm1, xmmword ptr [rsp] 674 paddd xmm2, xmmword ptr [rsp+0xF0] 675 paddd xmm3, xmmword ptr [rsp+0x10] 676 paddd xmm0, xmm5 677 paddd xmm1, xmm6 678 paddd xmm2, xmm7 679 paddd xmm3, xmm4 680 pxor xmm15, xmm0 681 pxor xmm12, xmm1 682 pxor xmm13, xmm2 683 pxor xmm14, xmm3 684 movdqa xmm8, xmmword ptr [ROT8+rip] 685 pshufb xmm15, xmm8 686 pshufb xmm12, xmm8 687 pshufb xmm13, xmm8 688 pshufb xmm14, xmm8 689 paddd xmm10, xmm15 690 paddd xmm11, xmm12 691 movdqa xmm8, xmmword ptr [rsp+0x100] 692 paddd xmm8, xmm13 693 paddd xmm9, xmm14 694 pxor xmm5, xmm10 695 pxor xmm6, xmm11 696 pxor xmm7, xmm8 697 pxor xmm4, xmm9 698 movdqa xmmword ptr [rsp+0x100], xmm8 699 movdqa xmm8, xmm5 700 psrld xmm8, 7 701 pslld xmm5, 25 702 por xmm5, xmm8 703 movdqa xmm8, xmm6 704 psrld xmm8, 7 705 pslld xmm6, 25 706 por xmm6, xmm8 707 movdqa xmm8, xmm7 708 psrld xmm8, 7 709 pslld xmm7, 25 710 por xmm7, xmm8 711 movdqa xmm8, xmm4 712 psrld xmm8, 7 713 pslld xmm4, 25 714 por xmm4, xmm8 715 paddd xmm0, xmmword ptr [rsp+0xA0] 716 paddd xmm1, xmmword ptr [rsp+0xC0] 717 paddd xmm2, xmmword ptr [rsp+0xE0] 718 paddd xmm3, xmmword ptr [rsp+0xD0] 719 paddd xmm0, xmm4 720 paddd xmm1, xmm5 721 paddd xmm2, xmm6 722 paddd xmm3, xmm7 723 pxor xmm12, xmm0 724 pxor xmm13, xmm1 725 pxor xmm14, xmm2 726 pxor xmm15, xmm3 727 movdqa xmm8, xmmword ptr [ROT16+rip] 728 pshufb xmm12, xmm8 729 pshufb xmm13, xmm8 730 pshufb xmm14, xmm8 731 pshufb xmm15, xmm8 732 movdqa xmm8, xmmword ptr [rsp+0x100] 733 paddd xmm8, xmm12 734 paddd xmm9, xmm13 735 paddd xmm10, xmm14 736 paddd xmm11, xmm15 737 pxor xmm4, xmm8 738 pxor xmm5, xmm9 739 pxor xmm6, xmm10 740 pxor xmm7, xmm11 741 movdqa xmmword ptr [rsp+0x100], xmm8 742 movdqa xmm8, xmm4 743 psrld xmm8, 12 744 pslld xmm4, 20 745 por xmm4, xmm8 746 movdqa xmm8, xmm5 747 psrld xmm8, 12 748 pslld xmm5, 20 749 por xmm5, xmm8 750 movdqa xmm8, xmm6 751 psrld xmm8, 12 752 pslld xmm6, 20 753 por xmm6, xmm8 754 movdqa xmm8, xmm7 755 psrld xmm8, 12 756 pslld xmm7, 20 757 por xmm7, xmm8 758 paddd xmm0, xmmword ptr [rsp+0x70] 759 paddd xmm1, xmmword ptr [rsp+0x90] 760 paddd xmm2, xmmword ptr [rsp+0x30] 761 paddd xmm3, xmmword ptr [rsp+0xF0] 762 paddd xmm0, xmm4 763 paddd xmm1, xmm5 764 paddd xmm2, xmm6 765 paddd xmm3, xmm7 766 pxor xmm12, xmm0 767 pxor xmm13, xmm1 768 pxor xmm14, xmm2 769 pxor xmm15, xmm3 770 movdqa xmm8, xmmword ptr [ROT8+rip] 771 pshufb xmm12, xmm8 772 pshufb xmm13, xmm8 773 pshufb xmm14, xmm8 774 pshufb xmm15, xmm8 775 movdqa xmm8, xmmword ptr [rsp+0x100] 776 paddd xmm8, xmm12 777 paddd xmm9, xmm13 778 paddd xmm10, xmm14 779 paddd xmm11, xmm15 780 pxor xmm4, xmm8 781 pxor xmm5, xmm9 782 pxor xmm6, xmm10 783 pxor xmm7, xmm11 784 movdqa xmmword ptr [rsp+0x100], xmm8 785 movdqa xmm8, xmm4 786 psrld xmm8, 7 787 pslld xmm4, 25 788 por xmm4, xmm8 789 movdqa xmm8, xmm5 790 psrld xmm8, 7 791 pslld xmm5, 25 792 por xmm5, xmm8 793 movdqa xmm8, xmm6 794 psrld xmm8, 7 795 pslld xmm6, 25 796 por xmm6, xmm8 797 movdqa xmm8, xmm7 798 psrld xmm8, 7 799 pslld xmm7, 25 800 por xmm7, xmm8 801 paddd xmm0, xmmword ptr [rsp+0x40] 802 paddd xmm1, xmmword ptr [rsp+0xB0] 803 paddd xmm2, xmmword ptr [rsp+0x50] 804 paddd xmm3, xmmword ptr [rsp+0x10] 805 paddd xmm0, xmm5 806 paddd xmm1, xmm6 807 paddd xmm2, xmm7 808 paddd xmm3, xmm4 809 pxor xmm15, xmm0 810 pxor xmm12, xmm1 811 pxor xmm13, xmm2 812 pxor xmm14, xmm3 813 movdqa xmm8, xmmword ptr [ROT16+rip] 814 pshufb xmm15, xmm8 815 pshufb xmm12, xmm8 816 pshufb xmm13, xmm8 817 pshufb xmm14, xmm8 818 paddd xmm10, xmm15 819 paddd xmm11, xmm12 820 movdqa xmm8, xmmword ptr [rsp+0x100] 821 paddd xmm8, xmm13 822 paddd xmm9, xmm14 823 pxor xmm5, xmm10 824 pxor xmm6, xmm11 825 pxor xmm7, xmm8 826 pxor xmm4, xmm9 827 movdqa xmmword ptr [rsp+0x100], xmm8 828 movdqa xmm8, xmm5 829 psrld xmm8, 12 830 pslld xmm5, 20 831 por xmm5, xmm8 832 movdqa xmm8, xmm6 833 psrld xmm8, 12 834 pslld xmm6, 20 835 por xmm6, xmm8 836 movdqa xmm8, xmm7 837 psrld xmm8, 12 838 pslld xmm7, 20 839 por xmm7, xmm8 840 movdqa xmm8, xmm4 841 psrld xmm8, 12 842 pslld xmm4, 20 843 por xmm4, xmm8 844 paddd xmm0, xmmword ptr [rsp] 845 paddd xmm1, xmmword ptr [rsp+0x20] 846 paddd xmm2, xmmword ptr [rsp+0x80] 847 paddd xmm3, xmmword ptr [rsp+0x60] 848 paddd xmm0, xmm5 849 paddd xmm1, xmm6 850 paddd xmm2, xmm7 851 paddd xmm3, xmm4 852 pxor xmm15, xmm0 853 pxor xmm12, xmm1 854 pxor xmm13, xmm2 855 pxor xmm14, xmm3 856 movdqa xmm8, xmmword ptr [ROT8+rip] 857 pshufb xmm15, xmm8 858 pshufb xmm12, xmm8 859 pshufb xmm13, xmm8 860 pshufb xmm14, xmm8 861 paddd xmm10, xmm15 862 paddd xmm11, xmm12 863 movdqa xmm8, xmmword ptr [rsp+0x100] 864 paddd xmm8, xmm13 865 paddd xmm9, xmm14 866 pxor xmm5, xmm10 867 pxor xmm6, xmm11 868 pxor xmm7, xmm8 869 pxor xmm4, xmm9 870 movdqa xmmword ptr [rsp+0x100], xmm8 871 movdqa xmm8, xmm5 872 psrld xmm8, 7 873 pslld xmm5, 25 874 por xmm5, xmm8 875 movdqa xmm8, xmm6 876 psrld xmm8, 7 877 pslld xmm6, 25 878 por xmm6, xmm8 879 movdqa xmm8, xmm7 880 psrld xmm8, 7 881 pslld xmm7, 25 882 por xmm7, xmm8 883 movdqa xmm8, xmm4 884 psrld xmm8, 7 885 pslld xmm4, 25 886 por xmm4, xmm8 887 paddd xmm0, xmmword ptr [rsp+0xC0] 888 paddd xmm1, xmmword ptr [rsp+0x90] 889 paddd xmm2, xmmword ptr [rsp+0xF0] 890 paddd xmm3, xmmword ptr [rsp+0xE0] 891 paddd xmm0, xmm4 892 paddd xmm1, xmm5 893 paddd xmm2, xmm6 894 paddd xmm3, xmm7 895 pxor xmm12, xmm0 896 pxor xmm13, xmm1 897 pxor xmm14, xmm2 898 pxor xmm15, xmm3 899 movdqa xmm8, xmmword ptr [ROT16+rip] 900 pshufb xmm12, xmm8 901 pshufb xmm13, xmm8 902 pshufb xmm14, xmm8 903 pshufb xmm15, xmm8 904 movdqa xmm8, xmmword ptr [rsp+0x100] 905 paddd xmm8, xmm12 906 paddd xmm9, xmm13 907 paddd xmm10, xmm14 908 paddd xmm11, xmm15 909 pxor xmm4, xmm8 910 pxor xmm5, xmm9 911 pxor xmm6, xmm10 912 pxor xmm7, xmm11 913 movdqa xmmword ptr [rsp+0x100], xmm8 914 movdqa xmm8, xmm4 915 psrld xmm8, 12 916 pslld xmm4, 20 917 por xmm4, xmm8 918 movdqa xmm8, xmm5 919 psrld xmm8, 12 920 pslld xmm5, 20 921 por xmm5, xmm8 922 movdqa xmm8, xmm6 923 psrld xmm8, 12 924 pslld xmm6, 20 925 por xmm6, xmm8 926 movdqa xmm8, xmm7 927 psrld xmm8, 12 928 pslld xmm7, 20 929 por xmm7, xmm8 930 paddd xmm0, xmmword ptr [rsp+0xD0] 931 paddd xmm1, xmmword ptr [rsp+0xB0] 932 paddd xmm2, xmmword ptr [rsp+0xA0] 933 paddd xmm3, xmmword ptr [rsp+0x80] 934 paddd xmm0, xmm4 935 paddd xmm1, xmm5 936 paddd xmm2, xmm6 937 paddd xmm3, xmm7 938 pxor xmm12, xmm0 939 pxor xmm13, xmm1 940 pxor xmm14, xmm2 941 pxor xmm15, xmm3 942 movdqa xmm8, xmmword ptr [ROT8+rip] 943 pshufb xmm12, xmm8 944 pshufb xmm13, xmm8 945 pshufb xmm14, xmm8 946 pshufb xmm15, xmm8 947 movdqa xmm8, xmmword ptr [rsp+0x100] 948 paddd xmm8, xmm12 949 paddd xmm9, xmm13 950 paddd xmm10, xmm14 951 paddd xmm11, xmm15 952 pxor xmm4, xmm8 953 pxor xmm5, xmm9 954 pxor xmm6, xmm10 955 pxor xmm7, xmm11 956 movdqa xmmword ptr [rsp+0x100], xmm8 957 movdqa xmm8, xmm4 958 psrld xmm8, 7 959 pslld xmm4, 25 960 por xmm4, xmm8 961 movdqa xmm8, xmm5 962 psrld xmm8, 7 963 pslld xmm5, 25 964 por xmm5, xmm8 965 movdqa xmm8, xmm6 966 psrld xmm8, 7 967 pslld xmm6, 25 968 por xmm6, xmm8 969 movdqa xmm8, xmm7 970 psrld xmm8, 7 971 pslld xmm7, 25 972 por xmm7, xmm8 973 paddd xmm0, xmmword ptr [rsp+0x70] 974 paddd xmm1, xmmword ptr [rsp+0x50] 975 paddd xmm2, xmmword ptr [rsp] 976 paddd xmm3, xmmword ptr [rsp+0x60] 977 paddd xmm0, xmm5 978 paddd xmm1, xmm6 979 paddd xmm2, xmm7 980 paddd xmm3, xmm4 981 pxor xmm15, xmm0 982 pxor xmm12, xmm1 983 pxor xmm13, xmm2 984 pxor xmm14, xmm3 985 movdqa xmm8, xmmword ptr [ROT16+rip] 986 pshufb xmm15, xmm8 987 pshufb xmm12, xmm8 988 pshufb xmm13, xmm8 989 pshufb xmm14, xmm8 990 paddd xmm10, xmm15 991 paddd xmm11, xmm12 992 movdqa xmm8, xmmword ptr [rsp+0x100] 993 paddd xmm8, xmm13 994 paddd xmm9, xmm14 995 pxor xmm5, xmm10 996 pxor xmm6, xmm11 997 pxor xmm7, xmm8 998 pxor xmm4, xmm9 999 movdqa xmmword ptr [rsp+0x100], xmm8 1000 movdqa xmm8, xmm5 1001 psrld xmm8, 12 1002 pslld xmm5, 20 1003 por xmm5, xmm8 1004 movdqa xmm8, xmm6 1005 psrld xmm8, 12 1006 pslld xmm6, 20 1007 por xmm6, xmm8 1008 movdqa xmm8, xmm7 1009 psrld xmm8, 12 1010 pslld xmm7, 20 1011 por xmm7, xmm8 1012 movdqa xmm8, xmm4 1013 psrld xmm8, 12 1014 pslld xmm4, 20 1015 por xmm4, xmm8 1016 paddd xmm0, xmmword ptr [rsp+0x20] 1017 paddd xmm1, xmmword ptr [rsp+0x30] 1018 paddd xmm2, xmmword ptr [rsp+0x10] 1019 paddd xmm3, xmmword ptr [rsp+0x40] 1020 paddd xmm0, xmm5 1021 paddd xmm1, xmm6 1022 paddd xmm2, xmm7 1023 paddd xmm3, xmm4 1024 pxor xmm15, xmm0 1025 pxor xmm12, xmm1 1026 pxor xmm13, xmm2 1027 pxor xmm14, xmm3 1028 movdqa xmm8, xmmword ptr [ROT8+rip] 1029 pshufb xmm15, xmm8 1030 pshufb xmm12, xmm8 1031 pshufb xmm13, xmm8 1032 pshufb xmm14, xmm8 1033 paddd xmm10, xmm15 1034 paddd xmm11, xmm12 1035 movdqa xmm8, xmmword ptr [rsp+0x100] 1036 paddd xmm8, xmm13 1037 paddd xmm9, xmm14 1038 pxor xmm5, xmm10 1039 pxor xmm6, xmm11 1040 pxor xmm7, xmm8 1041 pxor xmm4, xmm9 1042 movdqa xmmword ptr [rsp+0x100], xmm8 1043 movdqa xmm8, xmm5 1044 psrld xmm8, 7 1045 pslld xmm5, 25 1046 por xmm5, xmm8 1047 movdqa xmm8, xmm6 1048 psrld xmm8, 7 1049 pslld xmm6, 25 1050 por xmm6, xmm8 1051 movdqa xmm8, xmm7 1052 psrld xmm8, 7 1053 pslld xmm7, 25 1054 por xmm7, xmm8 1055 movdqa xmm8, xmm4 1056 psrld xmm8, 7 1057 pslld xmm4, 25 1058 por xmm4, xmm8 1059 paddd xmm0, xmmword ptr [rsp+0x90] 1060 paddd xmm1, xmmword ptr [rsp+0xB0] 1061 paddd xmm2, xmmword ptr [rsp+0x80] 1062 paddd xmm3, xmmword ptr [rsp+0xF0] 1063 paddd xmm0, xmm4 1064 paddd xmm1, xmm5 1065 paddd xmm2, xmm6 1066 paddd xmm3, xmm7 1067 pxor xmm12, xmm0 1068 pxor xmm13, xmm1 1069 pxor xmm14, xmm2 1070 pxor xmm15, xmm3 1071 movdqa xmm8, xmmword ptr [ROT16+rip] 1072 pshufb xmm12, xmm8 1073 pshufb xmm13, xmm8 1074 pshufb xmm14, xmm8 1075 pshufb xmm15, xmm8 1076 movdqa xmm8, xmmword ptr [rsp+0x100] 1077 paddd xmm8, xmm12 1078 paddd xmm9, xmm13 1079 paddd xmm10, xmm14 1080 paddd xmm11, xmm15 1081 pxor xmm4, xmm8 1082 pxor xmm5, xmm9 1083 pxor xmm6, xmm10 1084 pxor xmm7, xmm11 1085 movdqa xmmword ptr [rsp+0x100], xmm8 1086 movdqa xmm8, xmm4 1087 psrld xmm8, 12 1088 pslld xmm4, 20 1089 por xmm4, xmm8 1090 movdqa xmm8, xmm5 1091 psrld xmm8, 12 1092 pslld xmm5, 20 1093 por xmm5, xmm8 1094 movdqa xmm8, xmm6 1095 psrld xmm8, 12 1096 pslld xmm6, 20 1097 por xmm6, xmm8 1098 movdqa xmm8, xmm7 1099 psrld xmm8, 12 1100 pslld xmm7, 20 1101 por xmm7, xmm8 1102 paddd xmm0, xmmword ptr [rsp+0xE0] 1103 paddd xmm1, xmmword ptr [rsp+0x50] 1104 paddd xmm2, xmmword ptr [rsp+0xC0] 1105 paddd xmm3, xmmword ptr [rsp+0x10] 1106 paddd xmm0, xmm4 1107 paddd xmm1, xmm5 1108 paddd xmm2, xmm6 1109 paddd xmm3, xmm7 1110 pxor xmm12, xmm0 1111 pxor xmm13, xmm1 1112 pxor xmm14, xmm2 1113 pxor xmm15, xmm3 1114 movdqa xmm8, xmmword ptr [ROT8+rip] 1115 pshufb xmm12, xmm8 1116 pshufb xmm13, xmm8 1117 pshufb xmm14, xmm8 1118 pshufb xmm15, xmm8 1119 movdqa xmm8, xmmword ptr [rsp+0x100] 1120 paddd xmm8, xmm12 1121 paddd xmm9, xmm13 1122 paddd xmm10, xmm14 1123 paddd xmm11, xmm15 1124 pxor xmm4, xmm8 1125 pxor xmm5, xmm9 1126 pxor xmm6, xmm10 1127 pxor xmm7, xmm11 1128 movdqa xmmword ptr [rsp+0x100], xmm8 1129 movdqa xmm8, xmm4 1130 psrld xmm8, 7 1131 pslld xmm4, 25 1132 por xmm4, xmm8 1133 movdqa xmm8, xmm5 1134 psrld xmm8, 7 1135 pslld xmm5, 25 1136 por xmm5, xmm8 1137 movdqa xmm8, xmm6 1138 psrld xmm8, 7 1139 pslld xmm6, 25 1140 por xmm6, xmm8 1141 movdqa xmm8, xmm7 1142 psrld xmm8, 7 1143 pslld xmm7, 25 1144 por xmm7, xmm8 1145 paddd xmm0, xmmword ptr [rsp+0xD0] 1146 paddd xmm1, xmmword ptr [rsp] 1147 paddd xmm2, xmmword ptr [rsp+0x20] 1148 paddd xmm3, xmmword ptr [rsp+0x40] 1149 paddd xmm0, xmm5 1150 paddd xmm1, xmm6 1151 paddd xmm2, xmm7 1152 paddd xmm3, xmm4 1153 pxor xmm15, xmm0 1154 pxor xmm12, xmm1 1155 pxor xmm13, xmm2 1156 pxor xmm14, xmm3 1157 movdqa xmm8, xmmword ptr [ROT16+rip] 1158 pshufb xmm15, xmm8 1159 pshufb xmm12, xmm8 1160 pshufb xmm13, xmm8 1161 pshufb xmm14, xmm8 1162 paddd xmm10, xmm15 1163 paddd xmm11, xmm12 1164 movdqa xmm8, xmmword ptr [rsp+0x100] 1165 paddd xmm8, xmm13 1166 paddd xmm9, xmm14 1167 pxor xmm5, xmm10 1168 pxor xmm6, xmm11 1169 pxor xmm7, xmm8 1170 pxor xmm4, xmm9 1171 movdqa xmmword ptr [rsp+0x100], xmm8 1172 movdqa xmm8, xmm5 1173 psrld xmm8, 12 1174 pslld xmm5, 20 1175 por xmm5, xmm8 1176 movdqa xmm8, xmm6 1177 psrld xmm8, 12 1178 pslld xmm6, 20 1179 por xmm6, xmm8 1180 movdqa xmm8, xmm7 1181 psrld xmm8, 12 1182 pslld xmm7, 20 1183 por xmm7, xmm8 1184 movdqa xmm8, xmm4 1185 psrld xmm8, 12 1186 pslld xmm4, 20 1187 por xmm4, xmm8 1188 paddd xmm0, xmmword ptr [rsp+0x30] 1189 paddd xmm1, xmmword ptr [rsp+0xA0] 1190 paddd xmm2, xmmword ptr [rsp+0x60] 1191 paddd xmm3, xmmword ptr [rsp+0x70] 1192 paddd xmm0, xmm5 1193 paddd xmm1, xmm6 1194 paddd xmm2, xmm7 1195 paddd xmm3, xmm4 1196 pxor xmm15, xmm0 1197 pxor xmm12, xmm1 1198 pxor xmm13, xmm2 1199 pxor xmm14, xmm3 1200 movdqa xmm8, xmmword ptr [ROT8+rip] 1201 pshufb xmm15, xmm8 1202 pshufb xmm12, xmm8 1203 pshufb xmm13, xmm8 1204 pshufb xmm14, xmm8 1205 paddd xmm10, xmm15 1206 paddd xmm11, xmm12 1207 movdqa xmm8, xmmword ptr [rsp+0x100] 1208 paddd xmm8, xmm13 1209 paddd xmm9, xmm14 1210 pxor xmm5, xmm10 1211 pxor xmm6, xmm11 1212 pxor xmm7, xmm8 1213 pxor xmm4, xmm9 1214 movdqa xmmword ptr [rsp+0x100], xmm8 1215 movdqa xmm8, xmm5 1216 psrld xmm8, 7 1217 pslld xmm5, 25 1218 por xmm5, xmm8 1219 movdqa xmm8, xmm6 1220 psrld xmm8, 7 1221 pslld xmm6, 25 1222 por xmm6, xmm8 1223 movdqa xmm8, xmm7 1224 psrld xmm8, 7 1225 pslld xmm7, 25 1226 por xmm7, xmm8 1227 movdqa xmm8, xmm4 1228 psrld xmm8, 7 1229 pslld xmm4, 25 1230 por xmm4, xmm8 1231 paddd xmm0, xmmword ptr [rsp+0xB0] 1232 paddd xmm1, xmmword ptr [rsp+0x50] 1233 paddd xmm2, xmmword ptr [rsp+0x10] 1234 paddd xmm3, xmmword ptr [rsp+0x80] 1235 paddd xmm0, xmm4 1236 paddd xmm1, xmm5 1237 paddd xmm2, xmm6 1238 paddd xmm3, xmm7 1239 pxor xmm12, xmm0 1240 pxor xmm13, xmm1 1241 pxor xmm14, xmm2 1242 pxor xmm15, xmm3 1243 movdqa xmm8, xmmword ptr [ROT16+rip] 1244 pshufb xmm12, xmm8 1245 pshufb xmm13, xmm8 1246 pshufb xmm14, xmm8 1247 pshufb xmm15, xmm8 1248 movdqa xmm8, xmmword ptr [rsp+0x100] 1249 paddd xmm8, xmm12 1250 paddd xmm9, xmm13 1251 paddd xmm10, xmm14 1252 paddd xmm11, xmm15 1253 pxor xmm4, xmm8 1254 pxor xmm5, xmm9 1255 pxor xmm6, xmm10 1256 pxor xmm7, xmm11 1257 movdqa xmmword ptr [rsp+0x100], xmm8 1258 movdqa xmm8, xmm4 1259 psrld xmm8, 12 1260 pslld xmm4, 20 1261 por xmm4, xmm8 1262 movdqa xmm8, xmm5 1263 psrld xmm8, 12 1264 pslld xmm5, 20 1265 por xmm5, xmm8 1266 movdqa xmm8, xmm6 1267 psrld xmm8, 12 1268 pslld xmm6, 20 1269 por xmm6, xmm8 1270 movdqa xmm8, xmm7 1271 psrld xmm8, 12 1272 pslld xmm7, 20 1273 por xmm7, xmm8 1274 paddd xmm0, xmmword ptr [rsp+0xF0] 1275 paddd xmm1, xmmword ptr [rsp] 1276 paddd xmm2, xmmword ptr [rsp+0x90] 1277 paddd xmm3, xmmword ptr [rsp+0x60] 1278 paddd xmm0, xmm4 1279 paddd xmm1, xmm5 1280 paddd xmm2, xmm6 1281 paddd xmm3, xmm7 1282 pxor xmm12, xmm0 1283 pxor xmm13, xmm1 1284 pxor xmm14, xmm2 1285 pxor xmm15, xmm3 1286 movdqa xmm8, xmmword ptr [ROT8+rip] 1287 pshufb xmm12, xmm8 1288 pshufb xmm13, xmm8 1289 pshufb xmm14, xmm8 1290 pshufb xmm15, xmm8 1291 movdqa xmm8, xmmword ptr [rsp+0x100] 1292 paddd xmm8, xmm12 1293 paddd xmm9, xmm13 1294 paddd xmm10, xmm14 1295 paddd xmm11, xmm15 1296 pxor xmm4, xmm8 1297 pxor xmm5, xmm9 1298 pxor xmm6, xmm10 1299 pxor xmm7, xmm11 1300 movdqa xmmword ptr [rsp+0x100], xmm8 1301 movdqa xmm8, xmm4 1302 psrld xmm8, 7 1303 pslld xmm4, 25 1304 por xmm4, xmm8 1305 movdqa xmm8, xmm5 1306 psrld xmm8, 7 1307 pslld xmm5, 25 1308 por xmm5, xmm8 1309 movdqa xmm8, xmm6 1310 psrld xmm8, 7 1311 pslld xmm6, 25 1312 por xmm6, xmm8 1313 movdqa xmm8, xmm7 1314 psrld xmm8, 7 1315 pslld xmm7, 25 1316 por xmm7, xmm8 1317 paddd xmm0, xmmword ptr [rsp+0xE0] 1318 paddd xmm1, xmmword ptr [rsp+0x20] 1319 paddd xmm2, xmmword ptr [rsp+0x30] 1320 paddd xmm3, xmmword ptr [rsp+0x70] 1321 paddd xmm0, xmm5 1322 paddd xmm1, xmm6 1323 paddd xmm2, xmm7 1324 paddd xmm3, xmm4 1325 pxor xmm15, xmm0 1326 pxor xmm12, xmm1 1327 pxor xmm13, xmm2 1328 pxor xmm14, xmm3 1329 movdqa xmm8, xmmword ptr [ROT16+rip] 1330 pshufb xmm15, xmm8 1331 pshufb xmm12, xmm8 1332 pshufb xmm13, xmm8 1333 pshufb xmm14, xmm8 1334 paddd xmm10, xmm15 1335 paddd xmm11, xmm12 1336 movdqa xmm8, xmmword ptr [rsp+0x100] 1337 paddd xmm8, xmm13 1338 paddd xmm9, xmm14 1339 pxor xmm5, xmm10 1340 pxor xmm6, xmm11 1341 pxor xmm7, xmm8 1342 pxor xmm4, xmm9 1343 movdqa xmmword ptr [rsp+0x100], xmm8 1344 movdqa xmm8, xmm5 1345 psrld xmm8, 12 1346 pslld xmm5, 20 1347 por xmm5, xmm8 1348 movdqa xmm8, xmm6 1349 psrld xmm8, 12 1350 pslld xmm6, 20 1351 por xmm6, xmm8 1352 movdqa xmm8, xmm7 1353 psrld xmm8, 12 1354 pslld xmm7, 20 1355 por xmm7, xmm8 1356 movdqa xmm8, xmm4 1357 psrld xmm8, 12 1358 pslld xmm4, 20 1359 por xmm4, xmm8 1360 paddd xmm0, xmmword ptr [rsp+0xA0] 1361 paddd xmm1, xmmword ptr [rsp+0xC0] 1362 paddd xmm2, xmmword ptr [rsp+0x40] 1363 paddd xmm3, xmmword ptr [rsp+0xD0] 1364 paddd xmm0, xmm5 1365 paddd xmm1, xmm6 1366 paddd xmm2, xmm7 1367 paddd xmm3, xmm4 1368 pxor xmm15, xmm0 1369 pxor xmm12, xmm1 1370 pxor xmm13, xmm2 1371 pxor xmm14, xmm3 1372 movdqa xmm8, xmmword ptr [ROT8+rip] 1373 pshufb xmm15, xmm8 1374 pshufb xmm12, xmm8 1375 pshufb xmm13, xmm8 1376 pshufb xmm14, xmm8 1377 paddd xmm10, xmm15 1378 paddd xmm11, xmm12 1379 movdqa xmm8, xmmword ptr [rsp+0x100] 1380 paddd xmm8, xmm13 1381 paddd xmm9, xmm14 1382 pxor xmm5, xmm10 1383 pxor xmm6, xmm11 1384 pxor xmm7, xmm8 1385 pxor xmm4, xmm9 1386 pxor xmm0, xmm8 1387 pxor xmm1, xmm9 1388 pxor xmm2, xmm10 1389 pxor xmm3, xmm11 1390 movdqa xmm8, xmm5 1391 psrld xmm8, 7 1392 pslld xmm5, 25 1393 por xmm5, xmm8 1394 movdqa xmm8, xmm6 1395 psrld xmm8, 7 1396 pslld xmm6, 25 1397 por xmm6, xmm8 1398 movdqa xmm8, xmm7 1399 psrld xmm8, 7 1400 pslld xmm7, 25 1401 por xmm7, xmm8 1402 movdqa xmm8, xmm4 1403 psrld xmm8, 7 1404 pslld xmm4, 25 1405 por xmm4, xmm8 1406 pxor xmm4, xmm12 1407 pxor xmm5, xmm13 1408 pxor xmm6, xmm14 1409 pxor xmm7, xmm15 1410 mov eax, r13d 1411 jne 9b 1412 movdqa xmm9, xmm0 1413 punpckldq xmm0, xmm1 1414 punpckhdq xmm9, xmm1 1415 movdqa xmm11, xmm2 1416 punpckldq xmm2, xmm3 1417 punpckhdq xmm11, xmm3 1418 movdqa xmm1, xmm0 1419 punpcklqdq xmm0, xmm2 1420 punpckhqdq xmm1, xmm2 1421 movdqa xmm3, xmm9 1422 punpcklqdq xmm9, xmm11 1423 punpckhqdq xmm3, xmm11 1424 movdqu xmmword ptr [rbx], xmm0 1425 movdqu xmmword ptr [rbx+0x20], xmm1 1426 movdqu xmmword ptr [rbx+0x40], xmm9 1427 movdqu xmmword ptr [rbx+0x60], xmm3 1428 movdqa xmm9, xmm4 1429 punpckldq xmm4, xmm5 1430 punpckhdq xmm9, xmm5 1431 movdqa xmm11, xmm6 1432 punpckldq xmm6, xmm7 1433 punpckhdq xmm11, xmm7 1434 movdqa xmm5, xmm4 1435 punpcklqdq xmm4, xmm6 1436 punpckhqdq xmm5, xmm6 1437 movdqa xmm7, xmm9 1438 punpcklqdq xmm9, xmm11 1439 punpckhqdq xmm7, xmm11 1440 movdqu xmmword ptr [rbx+0x10], xmm4 1441 movdqu xmmword ptr [rbx+0x30], xmm5 1442 movdqu xmmword ptr [rbx+0x50], xmm9 1443 movdqu xmmword ptr [rbx+0x70], xmm7 1444 movdqa xmm1, xmmword ptr [rsp+0x110] 1445 movdqa xmm0, xmm1 1446 paddd xmm1, xmmword ptr [rsp+0x150] 1447 movdqa xmmword ptr [rsp+0x110], xmm1 1448 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1449 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1450 pcmpgtd xmm0, xmm1 1451 movdqa xmm1, xmmword ptr [rsp+0x120] 1452 psubd xmm1, xmm0 1453 movdqa xmmword ptr [rsp+0x120], xmm1 1454 add rbx, 128 1455 add rdi, 32 1456 sub rsi, 4 1457 cmp rsi, 4 1458 jnc 2b 1459 test rsi, rsi 1460 jnz 3f 14614: 1462 mov rsp, rbp 1463 pop rbp 1464 pop rbx 1465 pop r12 1466 pop r13 1467 pop r14 1468 pop r15 1469 ret 1470.p2align 5 14713: 1472 test esi, 0x2 1473 je 3f 1474 movups xmm0, xmmword ptr [rcx] 1475 movups xmm1, xmmword ptr [rcx+0x10] 1476 movaps xmm8, xmm0 1477 movaps xmm9, xmm1 1478 movd xmm13, dword ptr [rsp+0x110] 1479 pinsrd xmm13, dword ptr [rsp+0x120], 1 1480 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1481 movaps xmmword ptr [rsp], xmm13 1482 movd xmm14, dword ptr [rsp+0x114] 1483 pinsrd xmm14, dword ptr [rsp+0x124], 1 1484 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1485 movaps xmmword ptr [rsp+0x10], xmm14 1486 mov r8, qword ptr [rdi] 1487 mov r9, qword ptr [rdi+0x8] 1488 movzx eax, byte ptr [rbp+0x40] 1489 or eax, r13d 1490 xor edx, edx 14912: 1492 mov r14d, eax 1493 or eax, r12d 1494 add rdx, 64 1495 cmp rdx, r15 1496 cmovne eax, r14d 1497 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1498 movaps xmm10, xmm2 1499 movups xmm4, xmmword ptr [r8+rdx-0x40] 1500 movups xmm5, xmmword ptr [r8+rdx-0x30] 1501 movaps xmm3, xmm4 1502 shufps xmm4, xmm5, 136 1503 shufps xmm3, xmm5, 221 1504 movaps xmm5, xmm3 1505 movups xmm6, xmmword ptr [r8+rdx-0x20] 1506 movups xmm7, xmmword ptr [r8+rdx-0x10] 1507 movaps xmm3, xmm6 1508 shufps xmm6, xmm7, 136 1509 pshufd xmm6, xmm6, 0x93 1510 shufps xmm3, xmm7, 221 1511 pshufd xmm7, xmm3, 0x93 1512 movups xmm12, xmmword ptr [r9+rdx-0x40] 1513 movups xmm13, xmmword ptr [r9+rdx-0x30] 1514 movaps xmm11, xmm12 1515 shufps xmm12, xmm13, 136 1516 shufps xmm11, xmm13, 221 1517 movaps xmm13, xmm11 1518 movups xmm14, xmmword ptr [r9+rdx-0x20] 1519 movups xmm15, xmmword ptr [r9+rdx-0x10] 1520 movaps xmm11, xmm14 1521 shufps xmm14, xmm15, 136 1522 pshufd xmm14, xmm14, 0x93 1523 shufps xmm11, xmm15, 221 1524 pshufd xmm15, xmm11, 0x93 1525 movaps xmm3, xmmword ptr [rsp] 1526 movaps xmm11, xmmword ptr [rsp+0x10] 1527 pinsrd xmm3, eax, 3 1528 pinsrd xmm11, eax, 3 1529 mov al, 7 15309: 1531 paddd xmm0, xmm4 1532 paddd xmm8, xmm12 1533 movaps xmmword ptr [rsp+0x20], xmm4 1534 movaps xmmword ptr [rsp+0x30], xmm12 1535 paddd xmm0, xmm1 1536 paddd xmm8, xmm9 1537 pxor xmm3, xmm0 1538 pxor xmm11, xmm8 1539 movaps xmm12, xmmword ptr [ROT16+rip] 1540 pshufb xmm3, xmm12 1541 pshufb xmm11, xmm12 1542 paddd xmm2, xmm3 1543 paddd xmm10, xmm11 1544 pxor xmm1, xmm2 1545 pxor xmm9, xmm10 1546 movdqa xmm4, xmm1 1547 pslld xmm1, 20 1548 psrld xmm4, 12 1549 por xmm1, xmm4 1550 movdqa xmm4, xmm9 1551 pslld xmm9, 20 1552 psrld xmm4, 12 1553 por xmm9, xmm4 1554 paddd xmm0, xmm5 1555 paddd xmm8, xmm13 1556 movaps xmmword ptr [rsp+0x40], xmm5 1557 movaps xmmword ptr [rsp+0x50], xmm13 1558 paddd xmm0, xmm1 1559 paddd xmm8, xmm9 1560 pxor xmm3, xmm0 1561 pxor xmm11, xmm8 1562 movaps xmm13, xmmword ptr [ROT8+rip] 1563 pshufb xmm3, xmm13 1564 pshufb xmm11, xmm13 1565 paddd xmm2, xmm3 1566 paddd xmm10, xmm11 1567 pxor xmm1, xmm2 1568 pxor xmm9, xmm10 1569 movdqa xmm4, xmm1 1570 pslld xmm1, 25 1571 psrld xmm4, 7 1572 por xmm1, xmm4 1573 movdqa xmm4, xmm9 1574 pslld xmm9, 25 1575 psrld xmm4, 7 1576 por xmm9, xmm4 1577 pshufd xmm0, xmm0, 0x93 1578 pshufd xmm8, xmm8, 0x93 1579 pshufd xmm3, xmm3, 0x4E 1580 pshufd xmm11, xmm11, 0x4E 1581 pshufd xmm2, xmm2, 0x39 1582 pshufd xmm10, xmm10, 0x39 1583 paddd xmm0, xmm6 1584 paddd xmm8, xmm14 1585 paddd xmm0, xmm1 1586 paddd xmm8, xmm9 1587 pxor xmm3, xmm0 1588 pxor xmm11, xmm8 1589 pshufb xmm3, xmm12 1590 pshufb xmm11, xmm12 1591 paddd xmm2, xmm3 1592 paddd xmm10, xmm11 1593 pxor xmm1, xmm2 1594 pxor xmm9, xmm10 1595 movdqa xmm4, xmm1 1596 pslld xmm1, 20 1597 psrld xmm4, 12 1598 por xmm1, xmm4 1599 movdqa xmm4, xmm9 1600 pslld xmm9, 20 1601 psrld xmm4, 12 1602 por xmm9, xmm4 1603 paddd xmm0, xmm7 1604 paddd xmm8, xmm15 1605 paddd xmm0, xmm1 1606 paddd xmm8, xmm9 1607 pxor xmm3, xmm0 1608 pxor xmm11, xmm8 1609 pshufb xmm3, xmm13 1610 pshufb xmm11, xmm13 1611 paddd xmm2, xmm3 1612 paddd xmm10, xmm11 1613 pxor xmm1, xmm2 1614 pxor xmm9, xmm10 1615 movdqa xmm4, xmm1 1616 pslld xmm1, 25 1617 psrld xmm4, 7 1618 por xmm1, xmm4 1619 movdqa xmm4, xmm9 1620 pslld xmm9, 25 1621 psrld xmm4, 7 1622 por xmm9, xmm4 1623 pshufd xmm0, xmm0, 0x39 1624 pshufd xmm8, xmm8, 0x39 1625 pshufd xmm3, xmm3, 0x4E 1626 pshufd xmm11, xmm11, 0x4E 1627 pshufd xmm2, xmm2, 0x93 1628 pshufd xmm10, xmm10, 0x93 1629 dec al 1630 je 9f 1631 movdqa xmm12, xmmword ptr [rsp+0x20] 1632 movdqa xmm5, xmmword ptr [rsp+0x40] 1633 pshufd xmm13, xmm12, 0x0F 1634 shufps xmm12, xmm5, 214 1635 pshufd xmm4, xmm12, 0x39 1636 movdqa xmm12, xmm6 1637 shufps xmm12, xmm7, 250 1638 pblendw xmm13, xmm12, 0xCC 1639 movdqa xmm12, xmm7 1640 punpcklqdq xmm12, xmm5 1641 pblendw xmm12, xmm6, 0xC0 1642 pshufd xmm12, xmm12, 0x78 1643 punpckhdq xmm5, xmm7 1644 punpckldq xmm6, xmm5 1645 pshufd xmm7, xmm6, 0x1E 1646 movdqa xmmword ptr [rsp+0x20], xmm13 1647 movdqa xmmword ptr [rsp+0x40], xmm12 1648 movdqa xmm5, xmmword ptr [rsp+0x30] 1649 movdqa xmm13, xmmword ptr [rsp+0x50] 1650 pshufd xmm6, xmm5, 0x0F 1651 shufps xmm5, xmm13, 214 1652 pshufd xmm12, xmm5, 0x39 1653 movdqa xmm5, xmm14 1654 shufps xmm5, xmm15, 250 1655 pblendw xmm6, xmm5, 0xCC 1656 movdqa xmm5, xmm15 1657 punpcklqdq xmm5, xmm13 1658 pblendw xmm5, xmm14, 0xC0 1659 pshufd xmm5, xmm5, 0x78 1660 punpckhdq xmm13, xmm15 1661 punpckldq xmm14, xmm13 1662 pshufd xmm15, xmm14, 0x1E 1663 movdqa xmm13, xmm6 1664 movdqa xmm14, xmm5 1665 movdqa xmm5, xmmword ptr [rsp+0x20] 1666 movdqa xmm6, xmmword ptr [rsp+0x40] 1667 jmp 9b 16689: 1669 pxor xmm0, xmm2 1670 pxor xmm1, xmm3 1671 pxor xmm8, xmm10 1672 pxor xmm9, xmm11 1673 mov eax, r13d 1674 cmp rdx, r15 1675 jne 2b 1676 movups xmmword ptr [rbx], xmm0 1677 movups xmmword ptr [rbx+0x10], xmm1 1678 movups xmmword ptr [rbx+0x20], xmm8 1679 movups xmmword ptr [rbx+0x30], xmm9 1680 movdqa xmm0, xmmword ptr [rsp+0x130] 1681 movdqa xmm1, xmmword ptr [rsp+0x110] 1682 movdqa xmm2, xmmword ptr [rsp+0x120] 1683 movdqu xmm3, xmmword ptr [rsp+0x118] 1684 movdqu xmm4, xmmword ptr [rsp+0x128] 1685 blendvps xmm1, xmm3, xmm0 1686 blendvps xmm2, xmm4, xmm0 1687 movdqa xmmword ptr [rsp+0x110], xmm1 1688 movdqa xmmword ptr [rsp+0x120], xmm2 1689 add rdi, 16 1690 add rbx, 64 1691 sub rsi, 2 16923: 1693 test esi, 0x1 1694 je 4b 1695 movups xmm0, xmmword ptr [rcx] 1696 movups xmm1, xmmword ptr [rcx+0x10] 1697 movd xmm13, dword ptr [rsp+0x110] 1698 pinsrd xmm13, dword ptr [rsp+0x120], 1 1699 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1700 movaps xmm14, xmmword ptr [ROT8+rip] 1701 movaps xmm15, xmmword ptr [ROT16+rip] 1702 mov r8, qword ptr [rdi] 1703 movzx eax, byte ptr [rbp+0x40] 1704 or eax, r13d 1705 xor edx, edx 17062: 1707 mov r14d, eax 1708 or eax, r12d 1709 add rdx, 64 1710 cmp rdx, r15 1711 cmovne eax, r14d 1712 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1713 movaps xmm3, xmm13 1714 pinsrd xmm3, eax, 3 1715 movups xmm4, xmmword ptr [r8+rdx-0x40] 1716 movups xmm5, xmmword ptr [r8+rdx-0x30] 1717 movaps xmm8, xmm4 1718 shufps xmm4, xmm5, 136 1719 shufps xmm8, xmm5, 221 1720 movaps xmm5, xmm8 1721 movups xmm6, xmmword ptr [r8+rdx-0x20] 1722 movups xmm7, xmmword ptr [r8+rdx-0x10] 1723 movaps xmm8, xmm6 1724 shufps xmm6, xmm7, 136 1725 pshufd xmm6, xmm6, 0x93 1726 shufps xmm8, xmm7, 221 1727 pshufd xmm7, xmm8, 0x93 1728 mov al, 7 17299: 1730 paddd xmm0, xmm4 1731 paddd xmm0, xmm1 1732 pxor xmm3, xmm0 1733 pshufb xmm3, xmm15 1734 paddd xmm2, xmm3 1735 pxor xmm1, xmm2 1736 movdqa xmm11, xmm1 1737 pslld xmm1, 20 1738 psrld xmm11, 12 1739 por xmm1, xmm11 1740 paddd xmm0, xmm5 1741 paddd xmm0, xmm1 1742 pxor xmm3, xmm0 1743 pshufb xmm3, xmm14 1744 paddd xmm2, xmm3 1745 pxor xmm1, xmm2 1746 movdqa xmm11, xmm1 1747 pslld xmm1, 25 1748 psrld xmm11, 7 1749 por xmm1, xmm11 1750 pshufd xmm0, xmm0, 0x93 1751 pshufd xmm3, xmm3, 0x4E 1752 pshufd xmm2, xmm2, 0x39 1753 paddd xmm0, xmm6 1754 paddd xmm0, xmm1 1755 pxor xmm3, xmm0 1756 pshufb xmm3, xmm15 1757 paddd xmm2, xmm3 1758 pxor xmm1, xmm2 1759 movdqa xmm11, xmm1 1760 pslld xmm1, 20 1761 psrld xmm11, 12 1762 por xmm1, xmm11 1763 paddd xmm0, xmm7 1764 paddd xmm0, xmm1 1765 pxor xmm3, xmm0 1766 pshufb xmm3, xmm14 1767 paddd xmm2, xmm3 1768 pxor xmm1, xmm2 1769 movdqa xmm11, xmm1 1770 pslld xmm1, 25 1771 psrld xmm11, 7 1772 por xmm1, xmm11 1773 pshufd xmm0, xmm0, 0x39 1774 pshufd xmm3, xmm3, 0x4E 1775 pshufd xmm2, xmm2, 0x93 1776 dec al 1777 jz 9f 1778 movdqa xmm8, xmm4 1779 shufps xmm8, xmm5, 214 1780 pshufd xmm9, xmm4, 0x0F 1781 pshufd xmm4, xmm8, 0x39 1782 movdqa xmm8, xmm6 1783 shufps xmm8, xmm7, 250 1784 pblendw xmm9, xmm8, 0xCC 1785 movdqa xmm8, xmm7 1786 punpcklqdq xmm8, xmm5 1787 pblendw xmm8, xmm6, 0xC0 1788 pshufd xmm8, xmm8, 0x78 1789 punpckhdq xmm5, xmm7 1790 punpckldq xmm6, xmm5 1791 pshufd xmm7, xmm6, 0x1E 1792 movdqa xmm5, xmm9 1793 movdqa xmm6, xmm8 1794 jmp 9b 17959: 1796 pxor xmm0, xmm2 1797 pxor xmm1, xmm3 1798 mov eax, r13d 1799 cmp rdx, r15 1800 jne 2b 1801 movups xmmword ptr [rbx], xmm0 1802 movups xmmword ptr [rbx+0x10], xmm1 1803 jmp 4b 1804 1805.p2align 6 1806blake3_compress_in_place_sse41: 1807_blake3_compress_in_place_sse41: 1808 _CET_ENDBR 1809 movups xmm0, xmmword ptr [rdi] 1810 movups xmm1, xmmword ptr [rdi+0x10] 1811 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1812 shl r8, 32 1813 add rdx, r8 1814 movq xmm3, rcx 1815 movq xmm4, rdx 1816 punpcklqdq xmm3, xmm4 1817 movups xmm4, xmmword ptr [rsi] 1818 movups xmm5, xmmword ptr [rsi+0x10] 1819 movaps xmm8, xmm4 1820 shufps xmm4, xmm5, 136 1821 shufps xmm8, xmm5, 221 1822 movaps xmm5, xmm8 1823 movups xmm6, xmmword ptr [rsi+0x20] 1824 movups xmm7, xmmword ptr [rsi+0x30] 1825 movaps xmm8, xmm6 1826 shufps xmm6, xmm7, 136 1827 pshufd xmm6, xmm6, 0x93 1828 shufps xmm8, xmm7, 221 1829 pshufd xmm7, xmm8, 0x93 1830 movaps xmm14, xmmword ptr [ROT8+rip] 1831 movaps xmm15, xmmword ptr [ROT16+rip] 1832 mov al, 7 18339: 1834 paddd xmm0, xmm4 1835 paddd xmm0, xmm1 1836 pxor xmm3, xmm0 1837 pshufb xmm3, xmm15 1838 paddd xmm2, xmm3 1839 pxor xmm1, xmm2 1840 movdqa xmm11, xmm1 1841 pslld xmm1, 20 1842 psrld xmm11, 12 1843 por xmm1, xmm11 1844 paddd xmm0, xmm5 1845 paddd xmm0, xmm1 1846 pxor xmm3, xmm0 1847 pshufb xmm3, xmm14 1848 paddd xmm2, xmm3 1849 pxor xmm1, xmm2 1850 movdqa xmm11, xmm1 1851 pslld xmm1, 25 1852 psrld xmm11, 7 1853 por xmm1, xmm11 1854 pshufd xmm0, xmm0, 0x93 1855 pshufd xmm3, xmm3, 0x4E 1856 pshufd xmm2, xmm2, 0x39 1857 paddd xmm0, xmm6 1858 paddd xmm0, xmm1 1859 pxor xmm3, xmm0 1860 pshufb xmm3, xmm15 1861 paddd xmm2, xmm3 1862 pxor xmm1, xmm2 1863 movdqa xmm11, xmm1 1864 pslld xmm1, 20 1865 psrld xmm11, 12 1866 por xmm1, xmm11 1867 paddd xmm0, xmm7 1868 paddd xmm0, xmm1 1869 pxor xmm3, xmm0 1870 pshufb xmm3, xmm14 1871 paddd xmm2, xmm3 1872 pxor xmm1, xmm2 1873 movdqa xmm11, xmm1 1874 pslld xmm1, 25 1875 psrld xmm11, 7 1876 por xmm1, xmm11 1877 pshufd xmm0, xmm0, 0x39 1878 pshufd xmm3, xmm3, 0x4E 1879 pshufd xmm2, xmm2, 0x93 1880 dec al 1881 jz 9f 1882 movdqa xmm8, xmm4 1883 shufps xmm8, xmm5, 214 1884 pshufd xmm9, xmm4, 0x0F 1885 pshufd xmm4, xmm8, 0x39 1886 movdqa xmm8, xmm6 1887 shufps xmm8, xmm7, 250 1888 pblendw xmm9, xmm8, 0xCC 1889 movdqa xmm8, xmm7 1890 punpcklqdq xmm8, xmm5 1891 pblendw xmm8, xmm6, 0xC0 1892 pshufd xmm8, xmm8, 0x78 1893 punpckhdq xmm5, xmm7 1894 punpckldq xmm6, xmm5 1895 pshufd xmm7, xmm6, 0x1E 1896 movdqa xmm5, xmm9 1897 movdqa xmm6, xmm8 1898 jmp 9b 18999: 1900 pxor xmm0, xmm2 1901 pxor xmm1, xmm3 1902 movups xmmword ptr [rdi], xmm0 1903 movups xmmword ptr [rdi+0x10], xmm1 1904 ret 1905 1906.p2align 6 1907blake3_compress_xof_sse41: 1908_blake3_compress_xof_sse41: 1909 _CET_ENDBR 1910 movups xmm0, xmmword ptr [rdi] 1911 movups xmm1, xmmword ptr [rdi+0x10] 1912 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1913 movzx eax, r8b 1914 movzx edx, dl 1915 shl rax, 32 1916 add rdx, rax 1917 movq xmm3, rcx 1918 movq xmm4, rdx 1919 punpcklqdq xmm3, xmm4 1920 movups xmm4, xmmword ptr [rsi] 1921 movups xmm5, xmmword ptr [rsi+0x10] 1922 movaps xmm8, xmm4 1923 shufps xmm4, xmm5, 136 1924 shufps xmm8, xmm5, 221 1925 movaps xmm5, xmm8 1926 movups xmm6, xmmword ptr [rsi+0x20] 1927 movups xmm7, xmmword ptr [rsi+0x30] 1928 movaps xmm8, xmm6 1929 shufps xmm6, xmm7, 136 1930 pshufd xmm6, xmm6, 0x93 1931 shufps xmm8, xmm7, 221 1932 pshufd xmm7, xmm8, 0x93 1933 movaps xmm14, xmmword ptr [ROT8+rip] 1934 movaps xmm15, xmmword ptr [ROT16+rip] 1935 mov al, 7 19369: 1937 paddd xmm0, xmm4 1938 paddd xmm0, xmm1 1939 pxor xmm3, xmm0 1940 pshufb xmm3, xmm15 1941 paddd xmm2, xmm3 1942 pxor xmm1, xmm2 1943 movdqa xmm11, xmm1 1944 pslld xmm1, 20 1945 psrld xmm11, 12 1946 por xmm1, xmm11 1947 paddd xmm0, xmm5 1948 paddd xmm0, xmm1 1949 pxor xmm3, xmm0 1950 pshufb xmm3, xmm14 1951 paddd xmm2, xmm3 1952 pxor xmm1, xmm2 1953 movdqa xmm11, xmm1 1954 pslld xmm1, 25 1955 psrld xmm11, 7 1956 por xmm1, xmm11 1957 pshufd xmm0, xmm0, 0x93 1958 pshufd xmm3, xmm3, 0x4E 1959 pshufd xmm2, xmm2, 0x39 1960 paddd xmm0, xmm6 1961 paddd xmm0, xmm1 1962 pxor xmm3, xmm0 1963 pshufb xmm3, xmm15 1964 paddd xmm2, xmm3 1965 pxor xmm1, xmm2 1966 movdqa xmm11, xmm1 1967 pslld xmm1, 20 1968 psrld xmm11, 12 1969 por xmm1, xmm11 1970 paddd xmm0, xmm7 1971 paddd xmm0, xmm1 1972 pxor xmm3, xmm0 1973 pshufb xmm3, xmm14 1974 paddd xmm2, xmm3 1975 pxor xmm1, xmm2 1976 movdqa xmm11, xmm1 1977 pslld xmm1, 25 1978 psrld xmm11, 7 1979 por xmm1, xmm11 1980 pshufd xmm0, xmm0, 0x39 1981 pshufd xmm3, xmm3, 0x4E 1982 pshufd xmm2, xmm2, 0x93 1983 dec al 1984 jz 9f 1985 movdqa xmm8, xmm4 1986 shufps xmm8, xmm5, 214 1987 pshufd xmm9, xmm4, 0x0F 1988 pshufd xmm4, xmm8, 0x39 1989 movdqa xmm8, xmm6 1990 shufps xmm8, xmm7, 250 1991 pblendw xmm9, xmm8, 0xCC 1992 movdqa xmm8, xmm7 1993 punpcklqdq xmm8, xmm5 1994 pblendw xmm8, xmm6, 0xC0 1995 pshufd xmm8, xmm8, 0x78 1996 punpckhdq xmm5, xmm7 1997 punpckldq xmm6, xmm5 1998 pshufd xmm7, xmm6, 0x1E 1999 movdqa xmm5, xmm9 2000 movdqa xmm6, xmm8 2001 jmp 9b 20029: 2003 movdqu xmm4, xmmword ptr [rdi] 2004 movdqu xmm5, xmmword ptr [rdi+0x10] 2005 pxor xmm0, xmm2 2006 pxor xmm1, xmm3 2007 pxor xmm2, xmm4 2008 pxor xmm3, xmm5 2009 movups xmmword ptr [r9], xmm0 2010 movups xmmword ptr [r9+0x10], xmm1 2011 movups xmmword ptr [r9+0x20], xmm2 2012 movups xmmword ptr [r9+0x30], xmm3 2013 ret 2014 2015 2016#ifdef __APPLE__ 2017.static_data 2018#else 2019.section .rodata 2020#endif 2021.p2align 6 2022BLAKE3_IV: 2023 .long 0x6A09E667, 0xBB67AE85 2024 .long 0x3C6EF372, 0xA54FF53A 2025ROT16: 2026 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2027ROT8: 2028 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2029ADD0: 2030 .long 0, 1, 2, 3 2031ADD1: 2032 .long 4, 4, 4, 4 2033BLAKE3_IV_0: 2034 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2035BLAKE3_IV_1: 2036 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2037BLAKE3_IV_2: 2038 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2039BLAKE3_IV_3: 2040 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2041BLAKE3_BLOCK_LEN: 2042 .long 64, 64, 64, 64 2043CMP_MSB_MASK: 2044 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2045 2046#endif 2047