1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 24 * Copyright (c) 2019-2020 Samuel Neves 25 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 26 */ 27 28#if defined(HAVE_SSE4_1) 29 30#define _ASM 31#include <sys/asm_linkage.h> 32 33.intel_syntax noprefix 34 35.text 36 37ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64) 38 ENDBR 39 push r15 40 push r14 41 push r13 42 push r12 43 push rbx 44 push rbp 45 mov rbp, rsp 46 sub rsp, 360 47 and rsp, 0xFFFFFFFFFFFFFFC0 48 neg r9d 49 movd xmm0, r9d 50 pshufd xmm0, xmm0, 0x00 51 movdqa xmmword ptr [rsp+0x130], xmm0 52 movdqa xmm1, xmm0 53 pand xmm1, xmmword ptr [ADD0+rip] 54 pand xmm0, xmmword ptr [ADD1+rip] 55 movdqa xmmword ptr [rsp+0x150], xmm0 56 movd xmm0, r8d 57 pshufd xmm0, xmm0, 0x00 58 paddd xmm0, xmm1 59 movdqa xmmword ptr [rsp+0x110], xmm0 60 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 61 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 62 pcmpgtd xmm1, xmm0 63 shr r8, 32 64 movd xmm2, r8d 65 pshufd xmm2, xmm2, 0x00 66 psubd xmm2, xmm1 67 movdqa xmmword ptr [rsp+0x120], xmm2 68 mov rbx, qword ptr [rbp+0x50] 69 mov r15, rdx 70 shl r15, 6 71 movzx r13d, byte ptr [rbp+0x38] 72 movzx r12d, byte ptr [rbp+0x48] 73 cmp rsi, 4 74 jc 3f 752: 76 movdqu xmm3, xmmword ptr [rcx] 77 pshufd xmm0, xmm3, 0x00 78 pshufd xmm1, xmm3, 0x55 79 pshufd xmm2, xmm3, 0xAA 80 pshufd xmm3, xmm3, 0xFF 81 movdqu xmm7, xmmword ptr [rcx+0x10] 82 pshufd xmm4, xmm7, 0x00 83 pshufd xmm5, xmm7, 0x55 84 pshufd xmm6, xmm7, 0xAA 85 pshufd xmm7, xmm7, 0xFF 86 mov r8, qword ptr [rdi] 87 mov r9, qword ptr [rdi+0x8] 88 mov r10, qword ptr [rdi+0x10] 89 mov r11, qword ptr [rdi+0x18] 90 movzx eax, byte ptr [rbp+0x40] 91 or eax, r13d 92 xor edx, edx 939: 94 mov r14d, eax 95 or eax, r12d 96 add rdx, 64 97 cmp rdx, r15 98 cmovne eax, r14d 99 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 100 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 101 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 102 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 103 movdqa xmm12, xmm8 104 punpckldq xmm8, xmm9 105 punpckhdq xmm12, xmm9 106 movdqa xmm14, xmm10 107 punpckldq xmm10, xmm11 108 punpckhdq xmm14, xmm11 109 movdqa xmm9, xmm8 110 punpcklqdq xmm8, xmm10 111 punpckhqdq xmm9, xmm10 112 movdqa xmm13, xmm12 113 punpcklqdq xmm12, xmm14 114 punpckhqdq xmm13, xmm14 115 movdqa xmmword ptr [rsp], xmm8 116 movdqa xmmword ptr [rsp+0x10], xmm9 117 movdqa xmmword ptr [rsp+0x20], xmm12 118 movdqa xmmword ptr [rsp+0x30], xmm13 119 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 120 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 121 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 122 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 123 movdqa xmm12, xmm8 124 punpckldq xmm8, xmm9 125 punpckhdq xmm12, xmm9 126 movdqa xmm14, xmm10 127 punpckldq xmm10, xmm11 128 punpckhdq xmm14, xmm11 129 movdqa xmm9, xmm8 130 punpcklqdq xmm8, xmm10 131 punpckhqdq xmm9, xmm10 132 movdqa xmm13, xmm12 133 punpcklqdq xmm12, xmm14 134 punpckhqdq xmm13, xmm14 135 movdqa xmmword ptr [rsp+0x40], xmm8 136 movdqa xmmword ptr [rsp+0x50], xmm9 137 movdqa xmmword ptr [rsp+0x60], xmm12 138 movdqa xmmword ptr [rsp+0x70], xmm13 139 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 140 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 141 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 142 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 143 movdqa xmm12, xmm8 144 punpckldq xmm8, xmm9 145 punpckhdq xmm12, xmm9 146 movdqa xmm14, xmm10 147 punpckldq xmm10, xmm11 148 punpckhdq xmm14, xmm11 149 movdqa xmm9, xmm8 150 punpcklqdq xmm8, xmm10 151 punpckhqdq xmm9, xmm10 152 movdqa xmm13, xmm12 153 punpcklqdq xmm12, xmm14 154 punpckhqdq xmm13, xmm14 155 movdqa xmmword ptr [rsp+0x80], xmm8 156 movdqa xmmword ptr [rsp+0x90], xmm9 157 movdqa xmmword ptr [rsp+0xA0], xmm12 158 movdqa xmmword ptr [rsp+0xB0], xmm13 159 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 160 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 161 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 162 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 163 movdqa xmm12, xmm8 164 punpckldq xmm8, xmm9 165 punpckhdq xmm12, xmm9 166 movdqa xmm14, xmm10 167 punpckldq xmm10, xmm11 168 punpckhdq xmm14, xmm11 169 movdqa xmm9, xmm8 170 punpcklqdq xmm8, xmm10 171 punpckhqdq xmm9, xmm10 172 movdqa xmm13, xmm12 173 punpcklqdq xmm12, xmm14 174 punpckhqdq xmm13, xmm14 175 movdqa xmmword ptr [rsp+0xC0], xmm8 176 movdqa xmmword ptr [rsp+0xD0], xmm9 177 movdqa xmmword ptr [rsp+0xE0], xmm12 178 movdqa xmmword ptr [rsp+0xF0], xmm13 179 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 180 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 181 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 182 movdqa xmm12, xmmword ptr [rsp+0x110] 183 movdqa xmm13, xmmword ptr [rsp+0x120] 184 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 185 movd xmm15, eax 186 pshufd xmm15, xmm15, 0x00 187 prefetcht0 [r8+rdx+0x80] 188 prefetcht0 [r9+rdx+0x80] 189 prefetcht0 [r10+rdx+0x80] 190 prefetcht0 [r11+rdx+0x80] 191 paddd xmm0, xmmword ptr [rsp] 192 paddd xmm1, xmmword ptr [rsp+0x20] 193 paddd xmm2, xmmword ptr [rsp+0x40] 194 paddd xmm3, xmmword ptr [rsp+0x60] 195 paddd xmm0, xmm4 196 paddd xmm1, xmm5 197 paddd xmm2, xmm6 198 paddd xmm3, xmm7 199 pxor xmm12, xmm0 200 pxor xmm13, xmm1 201 pxor xmm14, xmm2 202 pxor xmm15, xmm3 203 movdqa xmm8, xmmword ptr [ROT16+rip] 204 pshufb xmm12, xmm8 205 pshufb xmm13, xmm8 206 pshufb xmm14, xmm8 207 pshufb xmm15, xmm8 208 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 209 paddd xmm8, xmm12 210 paddd xmm9, xmm13 211 paddd xmm10, xmm14 212 paddd xmm11, xmm15 213 pxor xmm4, xmm8 214 pxor xmm5, xmm9 215 pxor xmm6, xmm10 216 pxor xmm7, xmm11 217 movdqa xmmword ptr [rsp+0x100], xmm8 218 movdqa xmm8, xmm4 219 psrld xmm8, 12 220 pslld xmm4, 20 221 por xmm4, xmm8 222 movdqa xmm8, xmm5 223 psrld xmm8, 12 224 pslld xmm5, 20 225 por xmm5, xmm8 226 movdqa xmm8, xmm6 227 psrld xmm8, 12 228 pslld xmm6, 20 229 por xmm6, xmm8 230 movdqa xmm8, xmm7 231 psrld xmm8, 12 232 pslld xmm7, 20 233 por xmm7, xmm8 234 paddd xmm0, xmmword ptr [rsp+0x10] 235 paddd xmm1, xmmword ptr [rsp+0x30] 236 paddd xmm2, xmmword ptr [rsp+0x50] 237 paddd xmm3, xmmword ptr [rsp+0x70] 238 paddd xmm0, xmm4 239 paddd xmm1, xmm5 240 paddd xmm2, xmm6 241 paddd xmm3, xmm7 242 pxor xmm12, xmm0 243 pxor xmm13, xmm1 244 pxor xmm14, xmm2 245 pxor xmm15, xmm3 246 movdqa xmm8, xmmword ptr [ROT8+rip] 247 pshufb xmm12, xmm8 248 pshufb xmm13, xmm8 249 pshufb xmm14, xmm8 250 pshufb xmm15, xmm8 251 movdqa xmm8, xmmword ptr [rsp+0x100] 252 paddd xmm8, xmm12 253 paddd xmm9, xmm13 254 paddd xmm10, xmm14 255 paddd xmm11, xmm15 256 pxor xmm4, xmm8 257 pxor xmm5, xmm9 258 pxor xmm6, xmm10 259 pxor xmm7, xmm11 260 movdqa xmmword ptr [rsp+0x100], xmm8 261 movdqa xmm8, xmm4 262 psrld xmm8, 7 263 pslld xmm4, 25 264 por xmm4, xmm8 265 movdqa xmm8, xmm5 266 psrld xmm8, 7 267 pslld xmm5, 25 268 por xmm5, xmm8 269 movdqa xmm8, xmm6 270 psrld xmm8, 7 271 pslld xmm6, 25 272 por xmm6, xmm8 273 movdqa xmm8, xmm7 274 psrld xmm8, 7 275 pslld xmm7, 25 276 por xmm7, xmm8 277 paddd xmm0, xmmword ptr [rsp+0x80] 278 paddd xmm1, xmmword ptr [rsp+0xA0] 279 paddd xmm2, xmmword ptr [rsp+0xC0] 280 paddd xmm3, xmmword ptr [rsp+0xE0] 281 paddd xmm0, xmm5 282 paddd xmm1, xmm6 283 paddd xmm2, xmm7 284 paddd xmm3, xmm4 285 pxor xmm15, xmm0 286 pxor xmm12, xmm1 287 pxor xmm13, xmm2 288 pxor xmm14, xmm3 289 movdqa xmm8, xmmword ptr [ROT16+rip] 290 pshufb xmm15, xmm8 291 pshufb xmm12, xmm8 292 pshufb xmm13, xmm8 293 pshufb xmm14, xmm8 294 paddd xmm10, xmm15 295 paddd xmm11, xmm12 296 movdqa xmm8, xmmword ptr [rsp+0x100] 297 paddd xmm8, xmm13 298 paddd xmm9, xmm14 299 pxor xmm5, xmm10 300 pxor xmm6, xmm11 301 pxor xmm7, xmm8 302 pxor xmm4, xmm9 303 movdqa xmmword ptr [rsp+0x100], xmm8 304 movdqa xmm8, xmm5 305 psrld xmm8, 12 306 pslld xmm5, 20 307 por xmm5, xmm8 308 movdqa xmm8, xmm6 309 psrld xmm8, 12 310 pslld xmm6, 20 311 por xmm6, xmm8 312 movdqa xmm8, xmm7 313 psrld xmm8, 12 314 pslld xmm7, 20 315 por xmm7, xmm8 316 movdqa xmm8, xmm4 317 psrld xmm8, 12 318 pslld xmm4, 20 319 por xmm4, xmm8 320 paddd xmm0, xmmword ptr [rsp+0x90] 321 paddd xmm1, xmmword ptr [rsp+0xB0] 322 paddd xmm2, xmmword ptr [rsp+0xD0] 323 paddd xmm3, xmmword ptr [rsp+0xF0] 324 paddd xmm0, xmm5 325 paddd xmm1, xmm6 326 paddd xmm2, xmm7 327 paddd xmm3, xmm4 328 pxor xmm15, xmm0 329 pxor xmm12, xmm1 330 pxor xmm13, xmm2 331 pxor xmm14, xmm3 332 movdqa xmm8, xmmword ptr [ROT8+rip] 333 pshufb xmm15, xmm8 334 pshufb xmm12, xmm8 335 pshufb xmm13, xmm8 336 pshufb xmm14, xmm8 337 paddd xmm10, xmm15 338 paddd xmm11, xmm12 339 movdqa xmm8, xmmword ptr [rsp+0x100] 340 paddd xmm8, xmm13 341 paddd xmm9, xmm14 342 pxor xmm5, xmm10 343 pxor xmm6, xmm11 344 pxor xmm7, xmm8 345 pxor xmm4, xmm9 346 movdqa xmmword ptr [rsp+0x100], xmm8 347 movdqa xmm8, xmm5 348 psrld xmm8, 7 349 pslld xmm5, 25 350 por xmm5, xmm8 351 movdqa xmm8, xmm6 352 psrld xmm8, 7 353 pslld xmm6, 25 354 por xmm6, xmm8 355 movdqa xmm8, xmm7 356 psrld xmm8, 7 357 pslld xmm7, 25 358 por xmm7, xmm8 359 movdqa xmm8, xmm4 360 psrld xmm8, 7 361 pslld xmm4, 25 362 por xmm4, xmm8 363 paddd xmm0, xmmword ptr [rsp+0x20] 364 paddd xmm1, xmmword ptr [rsp+0x30] 365 paddd xmm2, xmmword ptr [rsp+0x70] 366 paddd xmm3, xmmword ptr [rsp+0x40] 367 paddd xmm0, xmm4 368 paddd xmm1, xmm5 369 paddd xmm2, xmm6 370 paddd xmm3, xmm7 371 pxor xmm12, xmm0 372 pxor xmm13, xmm1 373 pxor xmm14, xmm2 374 pxor xmm15, xmm3 375 movdqa xmm8, xmmword ptr [ROT16+rip] 376 pshufb xmm12, xmm8 377 pshufb xmm13, xmm8 378 pshufb xmm14, xmm8 379 pshufb xmm15, xmm8 380 movdqa xmm8, xmmword ptr [rsp+0x100] 381 paddd xmm8, xmm12 382 paddd xmm9, xmm13 383 paddd xmm10, xmm14 384 paddd xmm11, xmm15 385 pxor xmm4, xmm8 386 pxor xmm5, xmm9 387 pxor xmm6, xmm10 388 pxor xmm7, xmm11 389 movdqa xmmword ptr [rsp+0x100], xmm8 390 movdqa xmm8, xmm4 391 psrld xmm8, 12 392 pslld xmm4, 20 393 por xmm4, xmm8 394 movdqa xmm8, xmm5 395 psrld xmm8, 12 396 pslld xmm5, 20 397 por xmm5, xmm8 398 movdqa xmm8, xmm6 399 psrld xmm8, 12 400 pslld xmm6, 20 401 por xmm6, xmm8 402 movdqa xmm8, xmm7 403 psrld xmm8, 12 404 pslld xmm7, 20 405 por xmm7, xmm8 406 paddd xmm0, xmmword ptr [rsp+0x60] 407 paddd xmm1, xmmword ptr [rsp+0xA0] 408 paddd xmm2, xmmword ptr [rsp] 409 paddd xmm3, xmmword ptr [rsp+0xD0] 410 paddd xmm0, xmm4 411 paddd xmm1, xmm5 412 paddd xmm2, xmm6 413 paddd xmm3, xmm7 414 pxor xmm12, xmm0 415 pxor xmm13, xmm1 416 pxor xmm14, xmm2 417 pxor xmm15, xmm3 418 movdqa xmm8, xmmword ptr [ROT8+rip] 419 pshufb xmm12, xmm8 420 pshufb xmm13, xmm8 421 pshufb xmm14, xmm8 422 pshufb xmm15, xmm8 423 movdqa xmm8, xmmword ptr [rsp+0x100] 424 paddd xmm8, xmm12 425 paddd xmm9, xmm13 426 paddd xmm10, xmm14 427 paddd xmm11, xmm15 428 pxor xmm4, xmm8 429 pxor xmm5, xmm9 430 pxor xmm6, xmm10 431 pxor xmm7, xmm11 432 movdqa xmmword ptr [rsp+0x100], xmm8 433 movdqa xmm8, xmm4 434 psrld xmm8, 7 435 pslld xmm4, 25 436 por xmm4, xmm8 437 movdqa xmm8, xmm5 438 psrld xmm8, 7 439 pslld xmm5, 25 440 por xmm5, xmm8 441 movdqa xmm8, xmm6 442 psrld xmm8, 7 443 pslld xmm6, 25 444 por xmm6, xmm8 445 movdqa xmm8, xmm7 446 psrld xmm8, 7 447 pslld xmm7, 25 448 por xmm7, xmm8 449 paddd xmm0, xmmword ptr [rsp+0x10] 450 paddd xmm1, xmmword ptr [rsp+0xC0] 451 paddd xmm2, xmmword ptr [rsp+0x90] 452 paddd xmm3, xmmword ptr [rsp+0xF0] 453 paddd xmm0, xmm5 454 paddd xmm1, xmm6 455 paddd xmm2, xmm7 456 paddd xmm3, xmm4 457 pxor xmm15, xmm0 458 pxor xmm12, xmm1 459 pxor xmm13, xmm2 460 pxor xmm14, xmm3 461 movdqa xmm8, xmmword ptr [ROT16+rip] 462 pshufb xmm15, xmm8 463 pshufb xmm12, xmm8 464 pshufb xmm13, xmm8 465 pshufb xmm14, xmm8 466 paddd xmm10, xmm15 467 paddd xmm11, xmm12 468 movdqa xmm8, xmmword ptr [rsp+0x100] 469 paddd xmm8, xmm13 470 paddd xmm9, xmm14 471 pxor xmm5, xmm10 472 pxor xmm6, xmm11 473 pxor xmm7, xmm8 474 pxor xmm4, xmm9 475 movdqa xmmword ptr [rsp+0x100], xmm8 476 movdqa xmm8, xmm5 477 psrld xmm8, 12 478 pslld xmm5, 20 479 por xmm5, xmm8 480 movdqa xmm8, xmm6 481 psrld xmm8, 12 482 pslld xmm6, 20 483 por xmm6, xmm8 484 movdqa xmm8, xmm7 485 psrld xmm8, 12 486 pslld xmm7, 20 487 por xmm7, xmm8 488 movdqa xmm8, xmm4 489 psrld xmm8, 12 490 pslld xmm4, 20 491 por xmm4, xmm8 492 paddd xmm0, xmmword ptr [rsp+0xB0] 493 paddd xmm1, xmmword ptr [rsp+0x50] 494 paddd xmm2, xmmword ptr [rsp+0xE0] 495 paddd xmm3, xmmword ptr [rsp+0x80] 496 paddd xmm0, xmm5 497 paddd xmm1, xmm6 498 paddd xmm2, xmm7 499 paddd xmm3, xmm4 500 pxor xmm15, xmm0 501 pxor xmm12, xmm1 502 pxor xmm13, xmm2 503 pxor xmm14, xmm3 504 movdqa xmm8, xmmword ptr [ROT8+rip] 505 pshufb xmm15, xmm8 506 pshufb xmm12, xmm8 507 pshufb xmm13, xmm8 508 pshufb xmm14, xmm8 509 paddd xmm10, xmm15 510 paddd xmm11, xmm12 511 movdqa xmm8, xmmword ptr [rsp+0x100] 512 paddd xmm8, xmm13 513 paddd xmm9, xmm14 514 pxor xmm5, xmm10 515 pxor xmm6, xmm11 516 pxor xmm7, xmm8 517 pxor xmm4, xmm9 518 movdqa xmmword ptr [rsp+0x100], xmm8 519 movdqa xmm8, xmm5 520 psrld xmm8, 7 521 pslld xmm5, 25 522 por xmm5, xmm8 523 movdqa xmm8, xmm6 524 psrld xmm8, 7 525 pslld xmm6, 25 526 por xmm6, xmm8 527 movdqa xmm8, xmm7 528 psrld xmm8, 7 529 pslld xmm7, 25 530 por xmm7, xmm8 531 movdqa xmm8, xmm4 532 psrld xmm8, 7 533 pslld xmm4, 25 534 por xmm4, xmm8 535 paddd xmm0, xmmword ptr [rsp+0x30] 536 paddd xmm1, xmmword ptr [rsp+0xA0] 537 paddd xmm2, xmmword ptr [rsp+0xD0] 538 paddd xmm3, xmmword ptr [rsp+0x70] 539 paddd xmm0, xmm4 540 paddd xmm1, xmm5 541 paddd xmm2, xmm6 542 paddd xmm3, xmm7 543 pxor xmm12, xmm0 544 pxor xmm13, xmm1 545 pxor xmm14, xmm2 546 pxor xmm15, xmm3 547 movdqa xmm8, xmmword ptr [ROT16+rip] 548 pshufb xmm12, xmm8 549 pshufb xmm13, xmm8 550 pshufb xmm14, xmm8 551 pshufb xmm15, xmm8 552 movdqa xmm8, xmmword ptr [rsp+0x100] 553 paddd xmm8, xmm12 554 paddd xmm9, xmm13 555 paddd xmm10, xmm14 556 paddd xmm11, xmm15 557 pxor xmm4, xmm8 558 pxor xmm5, xmm9 559 pxor xmm6, xmm10 560 pxor xmm7, xmm11 561 movdqa xmmword ptr [rsp+0x100], xmm8 562 movdqa xmm8, xmm4 563 psrld xmm8, 12 564 pslld xmm4, 20 565 por xmm4, xmm8 566 movdqa xmm8, xmm5 567 psrld xmm8, 12 568 pslld xmm5, 20 569 por xmm5, xmm8 570 movdqa xmm8, xmm6 571 psrld xmm8, 12 572 pslld xmm6, 20 573 por xmm6, xmm8 574 movdqa xmm8, xmm7 575 psrld xmm8, 12 576 pslld xmm7, 20 577 por xmm7, xmm8 578 paddd xmm0, xmmword ptr [rsp+0x40] 579 paddd xmm1, xmmword ptr [rsp+0xC0] 580 paddd xmm2, xmmword ptr [rsp+0x20] 581 paddd xmm3, xmmword ptr [rsp+0xE0] 582 paddd xmm0, xmm4 583 paddd xmm1, xmm5 584 paddd xmm2, xmm6 585 paddd xmm3, xmm7 586 pxor xmm12, xmm0 587 pxor xmm13, xmm1 588 pxor xmm14, xmm2 589 pxor xmm15, xmm3 590 movdqa xmm8, xmmword ptr [ROT8+rip] 591 pshufb xmm12, xmm8 592 pshufb xmm13, xmm8 593 pshufb xmm14, xmm8 594 pshufb xmm15, xmm8 595 movdqa xmm8, xmmword ptr [rsp+0x100] 596 paddd xmm8, xmm12 597 paddd xmm9, xmm13 598 paddd xmm10, xmm14 599 paddd xmm11, xmm15 600 pxor xmm4, xmm8 601 pxor xmm5, xmm9 602 pxor xmm6, xmm10 603 pxor xmm7, xmm11 604 movdqa xmmword ptr [rsp+0x100], xmm8 605 movdqa xmm8, xmm4 606 psrld xmm8, 7 607 pslld xmm4, 25 608 por xmm4, xmm8 609 movdqa xmm8, xmm5 610 psrld xmm8, 7 611 pslld xmm5, 25 612 por xmm5, xmm8 613 movdqa xmm8, xmm6 614 psrld xmm8, 7 615 pslld xmm6, 25 616 por xmm6, xmm8 617 movdqa xmm8, xmm7 618 psrld xmm8, 7 619 pslld xmm7, 25 620 por xmm7, xmm8 621 paddd xmm0, xmmword ptr [rsp+0x60] 622 paddd xmm1, xmmword ptr [rsp+0x90] 623 paddd xmm2, xmmword ptr [rsp+0xB0] 624 paddd xmm3, xmmword ptr [rsp+0x80] 625 paddd xmm0, xmm5 626 paddd xmm1, xmm6 627 paddd xmm2, xmm7 628 paddd xmm3, xmm4 629 pxor xmm15, xmm0 630 pxor xmm12, xmm1 631 pxor xmm13, xmm2 632 pxor xmm14, xmm3 633 movdqa xmm8, xmmword ptr [ROT16+rip] 634 pshufb xmm15, xmm8 635 pshufb xmm12, xmm8 636 pshufb xmm13, xmm8 637 pshufb xmm14, xmm8 638 paddd xmm10, xmm15 639 paddd xmm11, xmm12 640 movdqa xmm8, xmmword ptr [rsp+0x100] 641 paddd xmm8, xmm13 642 paddd xmm9, xmm14 643 pxor xmm5, xmm10 644 pxor xmm6, xmm11 645 pxor xmm7, xmm8 646 pxor xmm4, xmm9 647 movdqa xmmword ptr [rsp+0x100], xmm8 648 movdqa xmm8, xmm5 649 psrld xmm8, 12 650 pslld xmm5, 20 651 por xmm5, xmm8 652 movdqa xmm8, xmm6 653 psrld xmm8, 12 654 pslld xmm6, 20 655 por xmm6, xmm8 656 movdqa xmm8, xmm7 657 psrld xmm8, 12 658 pslld xmm7, 20 659 por xmm7, xmm8 660 movdqa xmm8, xmm4 661 psrld xmm8, 12 662 pslld xmm4, 20 663 por xmm4, xmm8 664 paddd xmm0, xmmword ptr [rsp+0x50] 665 paddd xmm1, xmmword ptr [rsp] 666 paddd xmm2, xmmword ptr [rsp+0xF0] 667 paddd xmm3, xmmword ptr [rsp+0x10] 668 paddd xmm0, xmm5 669 paddd xmm1, xmm6 670 paddd xmm2, xmm7 671 paddd xmm3, xmm4 672 pxor xmm15, xmm0 673 pxor xmm12, xmm1 674 pxor xmm13, xmm2 675 pxor xmm14, xmm3 676 movdqa xmm8, xmmword ptr [ROT8+rip] 677 pshufb xmm15, xmm8 678 pshufb xmm12, xmm8 679 pshufb xmm13, xmm8 680 pshufb xmm14, xmm8 681 paddd xmm10, xmm15 682 paddd xmm11, xmm12 683 movdqa xmm8, xmmword ptr [rsp+0x100] 684 paddd xmm8, xmm13 685 paddd xmm9, xmm14 686 pxor xmm5, xmm10 687 pxor xmm6, xmm11 688 pxor xmm7, xmm8 689 pxor xmm4, xmm9 690 movdqa xmmword ptr [rsp+0x100], xmm8 691 movdqa xmm8, xmm5 692 psrld xmm8, 7 693 pslld xmm5, 25 694 por xmm5, xmm8 695 movdqa xmm8, xmm6 696 psrld xmm8, 7 697 pslld xmm6, 25 698 por xmm6, xmm8 699 movdqa xmm8, xmm7 700 psrld xmm8, 7 701 pslld xmm7, 25 702 por xmm7, xmm8 703 movdqa xmm8, xmm4 704 psrld xmm8, 7 705 pslld xmm4, 25 706 por xmm4, xmm8 707 paddd xmm0, xmmword ptr [rsp+0xA0] 708 paddd xmm1, xmmword ptr [rsp+0xC0] 709 paddd xmm2, xmmword ptr [rsp+0xE0] 710 paddd xmm3, xmmword ptr [rsp+0xD0] 711 paddd xmm0, xmm4 712 paddd xmm1, xmm5 713 paddd xmm2, xmm6 714 paddd xmm3, xmm7 715 pxor xmm12, xmm0 716 pxor xmm13, xmm1 717 pxor xmm14, xmm2 718 pxor xmm15, xmm3 719 movdqa xmm8, xmmword ptr [ROT16+rip] 720 pshufb xmm12, xmm8 721 pshufb xmm13, xmm8 722 pshufb xmm14, xmm8 723 pshufb xmm15, xmm8 724 movdqa xmm8, xmmword ptr [rsp+0x100] 725 paddd xmm8, xmm12 726 paddd xmm9, xmm13 727 paddd xmm10, xmm14 728 paddd xmm11, xmm15 729 pxor xmm4, xmm8 730 pxor xmm5, xmm9 731 pxor xmm6, xmm10 732 pxor xmm7, xmm11 733 movdqa xmmword ptr [rsp+0x100], xmm8 734 movdqa xmm8, xmm4 735 psrld xmm8, 12 736 pslld xmm4, 20 737 por xmm4, xmm8 738 movdqa xmm8, xmm5 739 psrld xmm8, 12 740 pslld xmm5, 20 741 por xmm5, xmm8 742 movdqa xmm8, xmm6 743 psrld xmm8, 12 744 pslld xmm6, 20 745 por xmm6, xmm8 746 movdqa xmm8, xmm7 747 psrld xmm8, 12 748 pslld xmm7, 20 749 por xmm7, xmm8 750 paddd xmm0, xmmword ptr [rsp+0x70] 751 paddd xmm1, xmmword ptr [rsp+0x90] 752 paddd xmm2, xmmword ptr [rsp+0x30] 753 paddd xmm3, xmmword ptr [rsp+0xF0] 754 paddd xmm0, xmm4 755 paddd xmm1, xmm5 756 paddd xmm2, xmm6 757 paddd xmm3, xmm7 758 pxor xmm12, xmm0 759 pxor xmm13, xmm1 760 pxor xmm14, xmm2 761 pxor xmm15, xmm3 762 movdqa xmm8, xmmword ptr [ROT8+rip] 763 pshufb xmm12, xmm8 764 pshufb xmm13, xmm8 765 pshufb xmm14, xmm8 766 pshufb xmm15, xmm8 767 movdqa xmm8, xmmword ptr [rsp+0x100] 768 paddd xmm8, xmm12 769 paddd xmm9, xmm13 770 paddd xmm10, xmm14 771 paddd xmm11, xmm15 772 pxor xmm4, xmm8 773 pxor xmm5, xmm9 774 pxor xmm6, xmm10 775 pxor xmm7, xmm11 776 movdqa xmmword ptr [rsp+0x100], xmm8 777 movdqa xmm8, xmm4 778 psrld xmm8, 7 779 pslld xmm4, 25 780 por xmm4, xmm8 781 movdqa xmm8, xmm5 782 psrld xmm8, 7 783 pslld xmm5, 25 784 por xmm5, xmm8 785 movdqa xmm8, xmm6 786 psrld xmm8, 7 787 pslld xmm6, 25 788 por xmm6, xmm8 789 movdqa xmm8, xmm7 790 psrld xmm8, 7 791 pslld xmm7, 25 792 por xmm7, xmm8 793 paddd xmm0, xmmword ptr [rsp+0x40] 794 paddd xmm1, xmmword ptr [rsp+0xB0] 795 paddd xmm2, xmmword ptr [rsp+0x50] 796 paddd xmm3, xmmword ptr [rsp+0x10] 797 paddd xmm0, xmm5 798 paddd xmm1, xmm6 799 paddd xmm2, xmm7 800 paddd xmm3, xmm4 801 pxor xmm15, xmm0 802 pxor xmm12, xmm1 803 pxor xmm13, xmm2 804 pxor xmm14, xmm3 805 movdqa xmm8, xmmword ptr [ROT16+rip] 806 pshufb xmm15, xmm8 807 pshufb xmm12, xmm8 808 pshufb xmm13, xmm8 809 pshufb xmm14, xmm8 810 paddd xmm10, xmm15 811 paddd xmm11, xmm12 812 movdqa xmm8, xmmword ptr [rsp+0x100] 813 paddd xmm8, xmm13 814 paddd xmm9, xmm14 815 pxor xmm5, xmm10 816 pxor xmm6, xmm11 817 pxor xmm7, xmm8 818 pxor xmm4, xmm9 819 movdqa xmmword ptr [rsp+0x100], xmm8 820 movdqa xmm8, xmm5 821 psrld xmm8, 12 822 pslld xmm5, 20 823 por xmm5, xmm8 824 movdqa xmm8, xmm6 825 psrld xmm8, 12 826 pslld xmm6, 20 827 por xmm6, xmm8 828 movdqa xmm8, xmm7 829 psrld xmm8, 12 830 pslld xmm7, 20 831 por xmm7, xmm8 832 movdqa xmm8, xmm4 833 psrld xmm8, 12 834 pslld xmm4, 20 835 por xmm4, xmm8 836 paddd xmm0, xmmword ptr [rsp] 837 paddd xmm1, xmmword ptr [rsp+0x20] 838 paddd xmm2, xmmword ptr [rsp+0x80] 839 paddd xmm3, xmmword ptr [rsp+0x60] 840 paddd xmm0, xmm5 841 paddd xmm1, xmm6 842 paddd xmm2, xmm7 843 paddd xmm3, xmm4 844 pxor xmm15, xmm0 845 pxor xmm12, xmm1 846 pxor xmm13, xmm2 847 pxor xmm14, xmm3 848 movdqa xmm8, xmmword ptr [ROT8+rip] 849 pshufb xmm15, xmm8 850 pshufb xmm12, xmm8 851 pshufb xmm13, xmm8 852 pshufb xmm14, xmm8 853 paddd xmm10, xmm15 854 paddd xmm11, xmm12 855 movdqa xmm8, xmmword ptr [rsp+0x100] 856 paddd xmm8, xmm13 857 paddd xmm9, xmm14 858 pxor xmm5, xmm10 859 pxor xmm6, xmm11 860 pxor xmm7, xmm8 861 pxor xmm4, xmm9 862 movdqa xmmword ptr [rsp+0x100], xmm8 863 movdqa xmm8, xmm5 864 psrld xmm8, 7 865 pslld xmm5, 25 866 por xmm5, xmm8 867 movdqa xmm8, xmm6 868 psrld xmm8, 7 869 pslld xmm6, 25 870 por xmm6, xmm8 871 movdqa xmm8, xmm7 872 psrld xmm8, 7 873 pslld xmm7, 25 874 por xmm7, xmm8 875 movdqa xmm8, xmm4 876 psrld xmm8, 7 877 pslld xmm4, 25 878 por xmm4, xmm8 879 paddd xmm0, xmmword ptr [rsp+0xC0] 880 paddd xmm1, xmmword ptr [rsp+0x90] 881 paddd xmm2, xmmword ptr [rsp+0xF0] 882 paddd xmm3, xmmword ptr [rsp+0xE0] 883 paddd xmm0, xmm4 884 paddd xmm1, xmm5 885 paddd xmm2, xmm6 886 paddd xmm3, xmm7 887 pxor xmm12, xmm0 888 pxor xmm13, xmm1 889 pxor xmm14, xmm2 890 pxor xmm15, xmm3 891 movdqa xmm8, xmmword ptr [ROT16+rip] 892 pshufb xmm12, xmm8 893 pshufb xmm13, xmm8 894 pshufb xmm14, xmm8 895 pshufb xmm15, xmm8 896 movdqa xmm8, xmmword ptr [rsp+0x100] 897 paddd xmm8, xmm12 898 paddd xmm9, xmm13 899 paddd xmm10, xmm14 900 paddd xmm11, xmm15 901 pxor xmm4, xmm8 902 pxor xmm5, xmm9 903 pxor xmm6, xmm10 904 pxor xmm7, xmm11 905 movdqa xmmword ptr [rsp+0x100], xmm8 906 movdqa xmm8, xmm4 907 psrld xmm8, 12 908 pslld xmm4, 20 909 por xmm4, xmm8 910 movdqa xmm8, xmm5 911 psrld xmm8, 12 912 pslld xmm5, 20 913 por xmm5, xmm8 914 movdqa xmm8, xmm6 915 psrld xmm8, 12 916 pslld xmm6, 20 917 por xmm6, xmm8 918 movdqa xmm8, xmm7 919 psrld xmm8, 12 920 pslld xmm7, 20 921 por xmm7, xmm8 922 paddd xmm0, xmmword ptr [rsp+0xD0] 923 paddd xmm1, xmmword ptr [rsp+0xB0] 924 paddd xmm2, xmmword ptr [rsp+0xA0] 925 paddd xmm3, xmmword ptr [rsp+0x80] 926 paddd xmm0, xmm4 927 paddd xmm1, xmm5 928 paddd xmm2, xmm6 929 paddd xmm3, xmm7 930 pxor xmm12, xmm0 931 pxor xmm13, xmm1 932 pxor xmm14, xmm2 933 pxor xmm15, xmm3 934 movdqa xmm8, xmmword ptr [ROT8+rip] 935 pshufb xmm12, xmm8 936 pshufb xmm13, xmm8 937 pshufb xmm14, xmm8 938 pshufb xmm15, xmm8 939 movdqa xmm8, xmmword ptr [rsp+0x100] 940 paddd xmm8, xmm12 941 paddd xmm9, xmm13 942 paddd xmm10, xmm14 943 paddd xmm11, xmm15 944 pxor xmm4, xmm8 945 pxor xmm5, xmm9 946 pxor xmm6, xmm10 947 pxor xmm7, xmm11 948 movdqa xmmword ptr [rsp+0x100], xmm8 949 movdqa xmm8, xmm4 950 psrld xmm8, 7 951 pslld xmm4, 25 952 por xmm4, xmm8 953 movdqa xmm8, xmm5 954 psrld xmm8, 7 955 pslld xmm5, 25 956 por xmm5, xmm8 957 movdqa xmm8, xmm6 958 psrld xmm8, 7 959 pslld xmm6, 25 960 por xmm6, xmm8 961 movdqa xmm8, xmm7 962 psrld xmm8, 7 963 pslld xmm7, 25 964 por xmm7, xmm8 965 paddd xmm0, xmmword ptr [rsp+0x70] 966 paddd xmm1, xmmword ptr [rsp+0x50] 967 paddd xmm2, xmmword ptr [rsp] 968 paddd xmm3, xmmword ptr [rsp+0x60] 969 paddd xmm0, xmm5 970 paddd xmm1, xmm6 971 paddd xmm2, xmm7 972 paddd xmm3, xmm4 973 pxor xmm15, xmm0 974 pxor xmm12, xmm1 975 pxor xmm13, xmm2 976 pxor xmm14, xmm3 977 movdqa xmm8, xmmword ptr [ROT16+rip] 978 pshufb xmm15, xmm8 979 pshufb xmm12, xmm8 980 pshufb xmm13, xmm8 981 pshufb xmm14, xmm8 982 paddd xmm10, xmm15 983 paddd xmm11, xmm12 984 movdqa xmm8, xmmword ptr [rsp+0x100] 985 paddd xmm8, xmm13 986 paddd xmm9, xmm14 987 pxor xmm5, xmm10 988 pxor xmm6, xmm11 989 pxor xmm7, xmm8 990 pxor xmm4, xmm9 991 movdqa xmmword ptr [rsp+0x100], xmm8 992 movdqa xmm8, xmm5 993 psrld xmm8, 12 994 pslld xmm5, 20 995 por xmm5, xmm8 996 movdqa xmm8, xmm6 997 psrld xmm8, 12 998 pslld xmm6, 20 999 por xmm6, xmm8 1000 movdqa xmm8, xmm7 1001 psrld xmm8, 12 1002 pslld xmm7, 20 1003 por xmm7, xmm8 1004 movdqa xmm8, xmm4 1005 psrld xmm8, 12 1006 pslld xmm4, 20 1007 por xmm4, xmm8 1008 paddd xmm0, xmmword ptr [rsp+0x20] 1009 paddd xmm1, xmmword ptr [rsp+0x30] 1010 paddd xmm2, xmmword ptr [rsp+0x10] 1011 paddd xmm3, xmmword ptr [rsp+0x40] 1012 paddd xmm0, xmm5 1013 paddd xmm1, xmm6 1014 paddd xmm2, xmm7 1015 paddd xmm3, xmm4 1016 pxor xmm15, xmm0 1017 pxor xmm12, xmm1 1018 pxor xmm13, xmm2 1019 pxor xmm14, xmm3 1020 movdqa xmm8, xmmword ptr [ROT8+rip] 1021 pshufb xmm15, xmm8 1022 pshufb xmm12, xmm8 1023 pshufb xmm13, xmm8 1024 pshufb xmm14, xmm8 1025 paddd xmm10, xmm15 1026 paddd xmm11, xmm12 1027 movdqa xmm8, xmmword ptr [rsp+0x100] 1028 paddd xmm8, xmm13 1029 paddd xmm9, xmm14 1030 pxor xmm5, xmm10 1031 pxor xmm6, xmm11 1032 pxor xmm7, xmm8 1033 pxor xmm4, xmm9 1034 movdqa xmmword ptr [rsp+0x100], xmm8 1035 movdqa xmm8, xmm5 1036 psrld xmm8, 7 1037 pslld xmm5, 25 1038 por xmm5, xmm8 1039 movdqa xmm8, xmm6 1040 psrld xmm8, 7 1041 pslld xmm6, 25 1042 por xmm6, xmm8 1043 movdqa xmm8, xmm7 1044 psrld xmm8, 7 1045 pslld xmm7, 25 1046 por xmm7, xmm8 1047 movdqa xmm8, xmm4 1048 psrld xmm8, 7 1049 pslld xmm4, 25 1050 por xmm4, xmm8 1051 paddd xmm0, xmmword ptr [rsp+0x90] 1052 paddd xmm1, xmmword ptr [rsp+0xB0] 1053 paddd xmm2, xmmword ptr [rsp+0x80] 1054 paddd xmm3, xmmword ptr [rsp+0xF0] 1055 paddd xmm0, xmm4 1056 paddd xmm1, xmm5 1057 paddd xmm2, xmm6 1058 paddd xmm3, xmm7 1059 pxor xmm12, xmm0 1060 pxor xmm13, xmm1 1061 pxor xmm14, xmm2 1062 pxor xmm15, xmm3 1063 movdqa xmm8, xmmword ptr [ROT16+rip] 1064 pshufb xmm12, xmm8 1065 pshufb xmm13, xmm8 1066 pshufb xmm14, xmm8 1067 pshufb xmm15, xmm8 1068 movdqa xmm8, xmmword ptr [rsp+0x100] 1069 paddd xmm8, xmm12 1070 paddd xmm9, xmm13 1071 paddd xmm10, xmm14 1072 paddd xmm11, xmm15 1073 pxor xmm4, xmm8 1074 pxor xmm5, xmm9 1075 pxor xmm6, xmm10 1076 pxor xmm7, xmm11 1077 movdqa xmmword ptr [rsp+0x100], xmm8 1078 movdqa xmm8, xmm4 1079 psrld xmm8, 12 1080 pslld xmm4, 20 1081 por xmm4, xmm8 1082 movdqa xmm8, xmm5 1083 psrld xmm8, 12 1084 pslld xmm5, 20 1085 por xmm5, xmm8 1086 movdqa xmm8, xmm6 1087 psrld xmm8, 12 1088 pslld xmm6, 20 1089 por xmm6, xmm8 1090 movdqa xmm8, xmm7 1091 psrld xmm8, 12 1092 pslld xmm7, 20 1093 por xmm7, xmm8 1094 paddd xmm0, xmmword ptr [rsp+0xE0] 1095 paddd xmm1, xmmword ptr [rsp+0x50] 1096 paddd xmm2, xmmword ptr [rsp+0xC0] 1097 paddd xmm3, xmmword ptr [rsp+0x10] 1098 paddd xmm0, xmm4 1099 paddd xmm1, xmm5 1100 paddd xmm2, xmm6 1101 paddd xmm3, xmm7 1102 pxor xmm12, xmm0 1103 pxor xmm13, xmm1 1104 pxor xmm14, xmm2 1105 pxor xmm15, xmm3 1106 movdqa xmm8, xmmword ptr [ROT8+rip] 1107 pshufb xmm12, xmm8 1108 pshufb xmm13, xmm8 1109 pshufb xmm14, xmm8 1110 pshufb xmm15, xmm8 1111 movdqa xmm8, xmmword ptr [rsp+0x100] 1112 paddd xmm8, xmm12 1113 paddd xmm9, xmm13 1114 paddd xmm10, xmm14 1115 paddd xmm11, xmm15 1116 pxor xmm4, xmm8 1117 pxor xmm5, xmm9 1118 pxor xmm6, xmm10 1119 pxor xmm7, xmm11 1120 movdqa xmmword ptr [rsp+0x100], xmm8 1121 movdqa xmm8, xmm4 1122 psrld xmm8, 7 1123 pslld xmm4, 25 1124 por xmm4, xmm8 1125 movdqa xmm8, xmm5 1126 psrld xmm8, 7 1127 pslld xmm5, 25 1128 por xmm5, xmm8 1129 movdqa xmm8, xmm6 1130 psrld xmm8, 7 1131 pslld xmm6, 25 1132 por xmm6, xmm8 1133 movdqa xmm8, xmm7 1134 psrld xmm8, 7 1135 pslld xmm7, 25 1136 por xmm7, xmm8 1137 paddd xmm0, xmmword ptr [rsp+0xD0] 1138 paddd xmm1, xmmword ptr [rsp] 1139 paddd xmm2, xmmword ptr [rsp+0x20] 1140 paddd xmm3, xmmword ptr [rsp+0x40] 1141 paddd xmm0, xmm5 1142 paddd xmm1, xmm6 1143 paddd xmm2, xmm7 1144 paddd xmm3, xmm4 1145 pxor xmm15, xmm0 1146 pxor xmm12, xmm1 1147 pxor xmm13, xmm2 1148 pxor xmm14, xmm3 1149 movdqa xmm8, xmmword ptr [ROT16+rip] 1150 pshufb xmm15, xmm8 1151 pshufb xmm12, xmm8 1152 pshufb xmm13, xmm8 1153 pshufb xmm14, xmm8 1154 paddd xmm10, xmm15 1155 paddd xmm11, xmm12 1156 movdqa xmm8, xmmword ptr [rsp+0x100] 1157 paddd xmm8, xmm13 1158 paddd xmm9, xmm14 1159 pxor xmm5, xmm10 1160 pxor xmm6, xmm11 1161 pxor xmm7, xmm8 1162 pxor xmm4, xmm9 1163 movdqa xmmword ptr [rsp+0x100], xmm8 1164 movdqa xmm8, xmm5 1165 psrld xmm8, 12 1166 pslld xmm5, 20 1167 por xmm5, xmm8 1168 movdqa xmm8, xmm6 1169 psrld xmm8, 12 1170 pslld xmm6, 20 1171 por xmm6, xmm8 1172 movdqa xmm8, xmm7 1173 psrld xmm8, 12 1174 pslld xmm7, 20 1175 por xmm7, xmm8 1176 movdqa xmm8, xmm4 1177 psrld xmm8, 12 1178 pslld xmm4, 20 1179 por xmm4, xmm8 1180 paddd xmm0, xmmword ptr [rsp+0x30] 1181 paddd xmm1, xmmword ptr [rsp+0xA0] 1182 paddd xmm2, xmmword ptr [rsp+0x60] 1183 paddd xmm3, xmmword ptr [rsp+0x70] 1184 paddd xmm0, xmm5 1185 paddd xmm1, xmm6 1186 paddd xmm2, xmm7 1187 paddd xmm3, xmm4 1188 pxor xmm15, xmm0 1189 pxor xmm12, xmm1 1190 pxor xmm13, xmm2 1191 pxor xmm14, xmm3 1192 movdqa xmm8, xmmword ptr [ROT8+rip] 1193 pshufb xmm15, xmm8 1194 pshufb xmm12, xmm8 1195 pshufb xmm13, xmm8 1196 pshufb xmm14, xmm8 1197 paddd xmm10, xmm15 1198 paddd xmm11, xmm12 1199 movdqa xmm8, xmmword ptr [rsp+0x100] 1200 paddd xmm8, xmm13 1201 paddd xmm9, xmm14 1202 pxor xmm5, xmm10 1203 pxor xmm6, xmm11 1204 pxor xmm7, xmm8 1205 pxor xmm4, xmm9 1206 movdqa xmmword ptr [rsp+0x100], xmm8 1207 movdqa xmm8, xmm5 1208 psrld xmm8, 7 1209 pslld xmm5, 25 1210 por xmm5, xmm8 1211 movdqa xmm8, xmm6 1212 psrld xmm8, 7 1213 pslld xmm6, 25 1214 por xmm6, xmm8 1215 movdqa xmm8, xmm7 1216 psrld xmm8, 7 1217 pslld xmm7, 25 1218 por xmm7, xmm8 1219 movdqa xmm8, xmm4 1220 psrld xmm8, 7 1221 pslld xmm4, 25 1222 por xmm4, xmm8 1223 paddd xmm0, xmmword ptr [rsp+0xB0] 1224 paddd xmm1, xmmword ptr [rsp+0x50] 1225 paddd xmm2, xmmword ptr [rsp+0x10] 1226 paddd xmm3, xmmword ptr [rsp+0x80] 1227 paddd xmm0, xmm4 1228 paddd xmm1, xmm5 1229 paddd xmm2, xmm6 1230 paddd xmm3, xmm7 1231 pxor xmm12, xmm0 1232 pxor xmm13, xmm1 1233 pxor xmm14, xmm2 1234 pxor xmm15, xmm3 1235 movdqa xmm8, xmmword ptr [ROT16+rip] 1236 pshufb xmm12, xmm8 1237 pshufb xmm13, xmm8 1238 pshufb xmm14, xmm8 1239 pshufb xmm15, xmm8 1240 movdqa xmm8, xmmword ptr [rsp+0x100] 1241 paddd xmm8, xmm12 1242 paddd xmm9, xmm13 1243 paddd xmm10, xmm14 1244 paddd xmm11, xmm15 1245 pxor xmm4, xmm8 1246 pxor xmm5, xmm9 1247 pxor xmm6, xmm10 1248 pxor xmm7, xmm11 1249 movdqa xmmword ptr [rsp+0x100], xmm8 1250 movdqa xmm8, xmm4 1251 psrld xmm8, 12 1252 pslld xmm4, 20 1253 por xmm4, xmm8 1254 movdqa xmm8, xmm5 1255 psrld xmm8, 12 1256 pslld xmm5, 20 1257 por xmm5, xmm8 1258 movdqa xmm8, xmm6 1259 psrld xmm8, 12 1260 pslld xmm6, 20 1261 por xmm6, xmm8 1262 movdqa xmm8, xmm7 1263 psrld xmm8, 12 1264 pslld xmm7, 20 1265 por xmm7, xmm8 1266 paddd xmm0, xmmword ptr [rsp+0xF0] 1267 paddd xmm1, xmmword ptr [rsp] 1268 paddd xmm2, xmmword ptr [rsp+0x90] 1269 paddd xmm3, xmmword ptr [rsp+0x60] 1270 paddd xmm0, xmm4 1271 paddd xmm1, xmm5 1272 paddd xmm2, xmm6 1273 paddd xmm3, xmm7 1274 pxor xmm12, xmm0 1275 pxor xmm13, xmm1 1276 pxor xmm14, xmm2 1277 pxor xmm15, xmm3 1278 movdqa xmm8, xmmword ptr [ROT8+rip] 1279 pshufb xmm12, xmm8 1280 pshufb xmm13, xmm8 1281 pshufb xmm14, xmm8 1282 pshufb xmm15, xmm8 1283 movdqa xmm8, xmmword ptr [rsp+0x100] 1284 paddd xmm8, xmm12 1285 paddd xmm9, xmm13 1286 paddd xmm10, xmm14 1287 paddd xmm11, xmm15 1288 pxor xmm4, xmm8 1289 pxor xmm5, xmm9 1290 pxor xmm6, xmm10 1291 pxor xmm7, xmm11 1292 movdqa xmmword ptr [rsp+0x100], xmm8 1293 movdqa xmm8, xmm4 1294 psrld xmm8, 7 1295 pslld xmm4, 25 1296 por xmm4, xmm8 1297 movdqa xmm8, xmm5 1298 psrld xmm8, 7 1299 pslld xmm5, 25 1300 por xmm5, xmm8 1301 movdqa xmm8, xmm6 1302 psrld xmm8, 7 1303 pslld xmm6, 25 1304 por xmm6, xmm8 1305 movdqa xmm8, xmm7 1306 psrld xmm8, 7 1307 pslld xmm7, 25 1308 por xmm7, xmm8 1309 paddd xmm0, xmmword ptr [rsp+0xE0] 1310 paddd xmm1, xmmword ptr [rsp+0x20] 1311 paddd xmm2, xmmword ptr [rsp+0x30] 1312 paddd xmm3, xmmword ptr [rsp+0x70] 1313 paddd xmm0, xmm5 1314 paddd xmm1, xmm6 1315 paddd xmm2, xmm7 1316 paddd xmm3, xmm4 1317 pxor xmm15, xmm0 1318 pxor xmm12, xmm1 1319 pxor xmm13, xmm2 1320 pxor xmm14, xmm3 1321 movdqa xmm8, xmmword ptr [ROT16+rip] 1322 pshufb xmm15, xmm8 1323 pshufb xmm12, xmm8 1324 pshufb xmm13, xmm8 1325 pshufb xmm14, xmm8 1326 paddd xmm10, xmm15 1327 paddd xmm11, xmm12 1328 movdqa xmm8, xmmword ptr [rsp+0x100] 1329 paddd xmm8, xmm13 1330 paddd xmm9, xmm14 1331 pxor xmm5, xmm10 1332 pxor xmm6, xmm11 1333 pxor xmm7, xmm8 1334 pxor xmm4, xmm9 1335 movdqa xmmword ptr [rsp+0x100], xmm8 1336 movdqa xmm8, xmm5 1337 psrld xmm8, 12 1338 pslld xmm5, 20 1339 por xmm5, xmm8 1340 movdqa xmm8, xmm6 1341 psrld xmm8, 12 1342 pslld xmm6, 20 1343 por xmm6, xmm8 1344 movdqa xmm8, xmm7 1345 psrld xmm8, 12 1346 pslld xmm7, 20 1347 por xmm7, xmm8 1348 movdqa xmm8, xmm4 1349 psrld xmm8, 12 1350 pslld xmm4, 20 1351 por xmm4, xmm8 1352 paddd xmm0, xmmword ptr [rsp+0xA0] 1353 paddd xmm1, xmmword ptr [rsp+0xC0] 1354 paddd xmm2, xmmword ptr [rsp+0x40] 1355 paddd xmm3, xmmword ptr [rsp+0xD0] 1356 paddd xmm0, xmm5 1357 paddd xmm1, xmm6 1358 paddd xmm2, xmm7 1359 paddd xmm3, xmm4 1360 pxor xmm15, xmm0 1361 pxor xmm12, xmm1 1362 pxor xmm13, xmm2 1363 pxor xmm14, xmm3 1364 movdqa xmm8, xmmword ptr [ROT8+rip] 1365 pshufb xmm15, xmm8 1366 pshufb xmm12, xmm8 1367 pshufb xmm13, xmm8 1368 pshufb xmm14, xmm8 1369 paddd xmm10, xmm15 1370 paddd xmm11, xmm12 1371 movdqa xmm8, xmmword ptr [rsp+0x100] 1372 paddd xmm8, xmm13 1373 paddd xmm9, xmm14 1374 pxor xmm5, xmm10 1375 pxor xmm6, xmm11 1376 pxor xmm7, xmm8 1377 pxor xmm4, xmm9 1378 pxor xmm0, xmm8 1379 pxor xmm1, xmm9 1380 pxor xmm2, xmm10 1381 pxor xmm3, xmm11 1382 movdqa xmm8, xmm5 1383 psrld xmm8, 7 1384 pslld xmm5, 25 1385 por xmm5, xmm8 1386 movdqa xmm8, xmm6 1387 psrld xmm8, 7 1388 pslld xmm6, 25 1389 por xmm6, xmm8 1390 movdqa xmm8, xmm7 1391 psrld xmm8, 7 1392 pslld xmm7, 25 1393 por xmm7, xmm8 1394 movdqa xmm8, xmm4 1395 psrld xmm8, 7 1396 pslld xmm4, 25 1397 por xmm4, xmm8 1398 pxor xmm4, xmm12 1399 pxor xmm5, xmm13 1400 pxor xmm6, xmm14 1401 pxor xmm7, xmm15 1402 mov eax, r13d 1403 jne 9b 1404 movdqa xmm9, xmm0 1405 punpckldq xmm0, xmm1 1406 punpckhdq xmm9, xmm1 1407 movdqa xmm11, xmm2 1408 punpckldq xmm2, xmm3 1409 punpckhdq xmm11, xmm3 1410 movdqa xmm1, xmm0 1411 punpcklqdq xmm0, xmm2 1412 punpckhqdq xmm1, xmm2 1413 movdqa xmm3, xmm9 1414 punpcklqdq xmm9, xmm11 1415 punpckhqdq xmm3, xmm11 1416 movdqu xmmword ptr [rbx], xmm0 1417 movdqu xmmword ptr [rbx+0x20], xmm1 1418 movdqu xmmword ptr [rbx+0x40], xmm9 1419 movdqu xmmword ptr [rbx+0x60], xmm3 1420 movdqa xmm9, xmm4 1421 punpckldq xmm4, xmm5 1422 punpckhdq xmm9, xmm5 1423 movdqa xmm11, xmm6 1424 punpckldq xmm6, xmm7 1425 punpckhdq xmm11, xmm7 1426 movdqa xmm5, xmm4 1427 punpcklqdq xmm4, xmm6 1428 punpckhqdq xmm5, xmm6 1429 movdqa xmm7, xmm9 1430 punpcklqdq xmm9, xmm11 1431 punpckhqdq xmm7, xmm11 1432 movdqu xmmword ptr [rbx+0x10], xmm4 1433 movdqu xmmword ptr [rbx+0x30], xmm5 1434 movdqu xmmword ptr [rbx+0x50], xmm9 1435 movdqu xmmword ptr [rbx+0x70], xmm7 1436 movdqa xmm1, xmmword ptr [rsp+0x110] 1437 movdqa xmm0, xmm1 1438 paddd xmm1, xmmword ptr [rsp+0x150] 1439 movdqa xmmword ptr [rsp+0x110], xmm1 1440 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1441 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1442 pcmpgtd xmm0, xmm1 1443 movdqa xmm1, xmmword ptr [rsp+0x120] 1444 psubd xmm1, xmm0 1445 movdqa xmmword ptr [rsp+0x120], xmm1 1446 add rbx, 128 1447 add rdi, 32 1448 sub rsi, 4 1449 cmp rsi, 4 1450 jnc 2b 1451 test rsi, rsi 1452 jnz 3f 14534: 1454 mov rsp, rbp 1455 pop rbp 1456 pop rbx 1457 pop r12 1458 pop r13 1459 pop r14 1460 pop r15 1461 RET 1462.p2align 5 14633: 1464 test esi, 0x2 1465 je 3f 1466 movups xmm0, xmmword ptr [rcx] 1467 movups xmm1, xmmword ptr [rcx+0x10] 1468 movaps xmm8, xmm0 1469 movaps xmm9, xmm1 1470 movd xmm13, dword ptr [rsp+0x110] 1471 pinsrd xmm13, dword ptr [rsp+0x120], 1 1472 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1473 movaps xmmword ptr [rsp], xmm13 1474 movd xmm14, dword ptr [rsp+0x114] 1475 pinsrd xmm14, dword ptr [rsp+0x124], 1 1476 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1477 movaps xmmword ptr [rsp+0x10], xmm14 1478 mov r8, qword ptr [rdi] 1479 mov r9, qword ptr [rdi+0x8] 1480 movzx eax, byte ptr [rbp+0x40] 1481 or eax, r13d 1482 xor edx, edx 14832: 1484 mov r14d, eax 1485 or eax, r12d 1486 add rdx, 64 1487 cmp rdx, r15 1488 cmovne eax, r14d 1489 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1490 movaps xmm10, xmm2 1491 movups xmm4, xmmword ptr [r8+rdx-0x40] 1492 movups xmm5, xmmword ptr [r8+rdx-0x30] 1493 movaps xmm3, xmm4 1494 shufps xmm4, xmm5, 136 1495 shufps xmm3, xmm5, 221 1496 movaps xmm5, xmm3 1497 movups xmm6, xmmword ptr [r8+rdx-0x20] 1498 movups xmm7, xmmword ptr [r8+rdx-0x10] 1499 movaps xmm3, xmm6 1500 shufps xmm6, xmm7, 136 1501 pshufd xmm6, xmm6, 0x93 1502 shufps xmm3, xmm7, 221 1503 pshufd xmm7, xmm3, 0x93 1504 movups xmm12, xmmword ptr [r9+rdx-0x40] 1505 movups xmm13, xmmword ptr [r9+rdx-0x30] 1506 movaps xmm11, xmm12 1507 shufps xmm12, xmm13, 136 1508 shufps xmm11, xmm13, 221 1509 movaps xmm13, xmm11 1510 movups xmm14, xmmword ptr [r9+rdx-0x20] 1511 movups xmm15, xmmword ptr [r9+rdx-0x10] 1512 movaps xmm11, xmm14 1513 shufps xmm14, xmm15, 136 1514 pshufd xmm14, xmm14, 0x93 1515 shufps xmm11, xmm15, 221 1516 pshufd xmm15, xmm11, 0x93 1517 movaps xmm3, xmmword ptr [rsp] 1518 movaps xmm11, xmmword ptr [rsp+0x10] 1519 pinsrd xmm3, eax, 3 1520 pinsrd xmm11, eax, 3 1521 mov al, 7 15229: 1523 paddd xmm0, xmm4 1524 paddd xmm8, xmm12 1525 movaps xmmword ptr [rsp+0x20], xmm4 1526 movaps xmmword ptr [rsp+0x30], xmm12 1527 paddd xmm0, xmm1 1528 paddd xmm8, xmm9 1529 pxor xmm3, xmm0 1530 pxor xmm11, xmm8 1531 movaps xmm12, xmmword ptr [ROT16+rip] 1532 pshufb xmm3, xmm12 1533 pshufb xmm11, xmm12 1534 paddd xmm2, xmm3 1535 paddd xmm10, xmm11 1536 pxor xmm1, xmm2 1537 pxor xmm9, xmm10 1538 movdqa xmm4, xmm1 1539 pslld xmm1, 20 1540 psrld xmm4, 12 1541 por xmm1, xmm4 1542 movdqa xmm4, xmm9 1543 pslld xmm9, 20 1544 psrld xmm4, 12 1545 por xmm9, xmm4 1546 paddd xmm0, xmm5 1547 paddd xmm8, xmm13 1548 movaps xmmword ptr [rsp+0x40], xmm5 1549 movaps xmmword ptr [rsp+0x50], xmm13 1550 paddd xmm0, xmm1 1551 paddd xmm8, xmm9 1552 pxor xmm3, xmm0 1553 pxor xmm11, xmm8 1554 movaps xmm13, xmmword ptr [ROT8+rip] 1555 pshufb xmm3, xmm13 1556 pshufb xmm11, xmm13 1557 paddd xmm2, xmm3 1558 paddd xmm10, xmm11 1559 pxor xmm1, xmm2 1560 pxor xmm9, xmm10 1561 movdqa xmm4, xmm1 1562 pslld xmm1, 25 1563 psrld xmm4, 7 1564 por xmm1, xmm4 1565 movdqa xmm4, xmm9 1566 pslld xmm9, 25 1567 psrld xmm4, 7 1568 por xmm9, xmm4 1569 pshufd xmm0, xmm0, 0x93 1570 pshufd xmm8, xmm8, 0x93 1571 pshufd xmm3, xmm3, 0x4E 1572 pshufd xmm11, xmm11, 0x4E 1573 pshufd xmm2, xmm2, 0x39 1574 pshufd xmm10, xmm10, 0x39 1575 paddd xmm0, xmm6 1576 paddd xmm8, xmm14 1577 paddd xmm0, xmm1 1578 paddd xmm8, xmm9 1579 pxor xmm3, xmm0 1580 pxor xmm11, xmm8 1581 pshufb xmm3, xmm12 1582 pshufb xmm11, xmm12 1583 paddd xmm2, xmm3 1584 paddd xmm10, xmm11 1585 pxor xmm1, xmm2 1586 pxor xmm9, xmm10 1587 movdqa xmm4, xmm1 1588 pslld xmm1, 20 1589 psrld xmm4, 12 1590 por xmm1, xmm4 1591 movdqa xmm4, xmm9 1592 pslld xmm9, 20 1593 psrld xmm4, 12 1594 por xmm9, xmm4 1595 paddd xmm0, xmm7 1596 paddd xmm8, xmm15 1597 paddd xmm0, xmm1 1598 paddd xmm8, xmm9 1599 pxor xmm3, xmm0 1600 pxor xmm11, xmm8 1601 pshufb xmm3, xmm13 1602 pshufb xmm11, xmm13 1603 paddd xmm2, xmm3 1604 paddd xmm10, xmm11 1605 pxor xmm1, xmm2 1606 pxor xmm9, xmm10 1607 movdqa xmm4, xmm1 1608 pslld xmm1, 25 1609 psrld xmm4, 7 1610 por xmm1, xmm4 1611 movdqa xmm4, xmm9 1612 pslld xmm9, 25 1613 psrld xmm4, 7 1614 por xmm9, xmm4 1615 pshufd xmm0, xmm0, 0x39 1616 pshufd xmm8, xmm8, 0x39 1617 pshufd xmm3, xmm3, 0x4E 1618 pshufd xmm11, xmm11, 0x4E 1619 pshufd xmm2, xmm2, 0x93 1620 pshufd xmm10, xmm10, 0x93 1621 dec al 1622 je 9f 1623 movdqa xmm12, xmmword ptr [rsp+0x20] 1624 movdqa xmm5, xmmword ptr [rsp+0x40] 1625 pshufd xmm13, xmm12, 0x0F 1626 shufps xmm12, xmm5, 214 1627 pshufd xmm4, xmm12, 0x39 1628 movdqa xmm12, xmm6 1629 shufps xmm12, xmm7, 250 1630 pblendw xmm13, xmm12, 0xCC 1631 movdqa xmm12, xmm7 1632 punpcklqdq xmm12, xmm5 1633 pblendw xmm12, xmm6, 0xC0 1634 pshufd xmm12, xmm12, 0x78 1635 punpckhdq xmm5, xmm7 1636 punpckldq xmm6, xmm5 1637 pshufd xmm7, xmm6, 0x1E 1638 movdqa xmmword ptr [rsp+0x20], xmm13 1639 movdqa xmmword ptr [rsp+0x40], xmm12 1640 movdqa xmm5, xmmword ptr [rsp+0x30] 1641 movdqa xmm13, xmmword ptr [rsp+0x50] 1642 pshufd xmm6, xmm5, 0x0F 1643 shufps xmm5, xmm13, 214 1644 pshufd xmm12, xmm5, 0x39 1645 movdqa xmm5, xmm14 1646 shufps xmm5, xmm15, 250 1647 pblendw xmm6, xmm5, 0xCC 1648 movdqa xmm5, xmm15 1649 punpcklqdq xmm5, xmm13 1650 pblendw xmm5, xmm14, 0xC0 1651 pshufd xmm5, xmm5, 0x78 1652 punpckhdq xmm13, xmm15 1653 punpckldq xmm14, xmm13 1654 pshufd xmm15, xmm14, 0x1E 1655 movdqa xmm13, xmm6 1656 movdqa xmm14, xmm5 1657 movdqa xmm5, xmmword ptr [rsp+0x20] 1658 movdqa xmm6, xmmword ptr [rsp+0x40] 1659 jmp 9b 16609: 1661 pxor xmm0, xmm2 1662 pxor xmm1, xmm3 1663 pxor xmm8, xmm10 1664 pxor xmm9, xmm11 1665 mov eax, r13d 1666 cmp rdx, r15 1667 jne 2b 1668 movups xmmword ptr [rbx], xmm0 1669 movups xmmword ptr [rbx+0x10], xmm1 1670 movups xmmword ptr [rbx+0x20], xmm8 1671 movups xmmword ptr [rbx+0x30], xmm9 1672 movdqa xmm0, xmmword ptr [rsp+0x130] 1673 movdqa xmm1, xmmword ptr [rsp+0x110] 1674 movdqa xmm2, xmmword ptr [rsp+0x120] 1675 movdqu xmm3, xmmword ptr [rsp+0x118] 1676 movdqu xmm4, xmmword ptr [rsp+0x128] 1677 blendvps xmm1, xmm3, xmm0 1678 blendvps xmm2, xmm4, xmm0 1679 movdqa xmmword ptr [rsp+0x110], xmm1 1680 movdqa xmmword ptr [rsp+0x120], xmm2 1681 add rdi, 16 1682 add rbx, 64 1683 sub rsi, 2 16843: 1685 test esi, 0x1 1686 je 4b 1687 movups xmm0, xmmword ptr [rcx] 1688 movups xmm1, xmmword ptr [rcx+0x10] 1689 movd xmm13, dword ptr [rsp+0x110] 1690 pinsrd xmm13, dword ptr [rsp+0x120], 1 1691 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1692 movaps xmm14, xmmword ptr [ROT8+rip] 1693 movaps xmm15, xmmword ptr [ROT16+rip] 1694 mov r8, qword ptr [rdi] 1695 movzx eax, byte ptr [rbp+0x40] 1696 or eax, r13d 1697 xor edx, edx 16982: 1699 mov r14d, eax 1700 or eax, r12d 1701 add rdx, 64 1702 cmp rdx, r15 1703 cmovne eax, r14d 1704 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1705 movaps xmm3, xmm13 1706 pinsrd xmm3, eax, 3 1707 movups xmm4, xmmword ptr [r8+rdx-0x40] 1708 movups xmm5, xmmword ptr [r8+rdx-0x30] 1709 movaps xmm8, xmm4 1710 shufps xmm4, xmm5, 136 1711 shufps xmm8, xmm5, 221 1712 movaps xmm5, xmm8 1713 movups xmm6, xmmword ptr [r8+rdx-0x20] 1714 movups xmm7, xmmword ptr [r8+rdx-0x10] 1715 movaps xmm8, xmm6 1716 shufps xmm6, xmm7, 136 1717 pshufd xmm6, xmm6, 0x93 1718 shufps xmm8, xmm7, 221 1719 pshufd xmm7, xmm8, 0x93 1720 mov al, 7 17219: 1722 paddd xmm0, xmm4 1723 paddd xmm0, xmm1 1724 pxor xmm3, xmm0 1725 pshufb xmm3, xmm15 1726 paddd xmm2, xmm3 1727 pxor xmm1, xmm2 1728 movdqa xmm11, xmm1 1729 pslld xmm1, 20 1730 psrld xmm11, 12 1731 por xmm1, xmm11 1732 paddd xmm0, xmm5 1733 paddd xmm0, xmm1 1734 pxor xmm3, xmm0 1735 pshufb xmm3, xmm14 1736 paddd xmm2, xmm3 1737 pxor xmm1, xmm2 1738 movdqa xmm11, xmm1 1739 pslld xmm1, 25 1740 psrld xmm11, 7 1741 por xmm1, xmm11 1742 pshufd xmm0, xmm0, 0x93 1743 pshufd xmm3, xmm3, 0x4E 1744 pshufd xmm2, xmm2, 0x39 1745 paddd xmm0, xmm6 1746 paddd xmm0, xmm1 1747 pxor xmm3, xmm0 1748 pshufb xmm3, xmm15 1749 paddd xmm2, xmm3 1750 pxor xmm1, xmm2 1751 movdqa xmm11, xmm1 1752 pslld xmm1, 20 1753 psrld xmm11, 12 1754 por xmm1, xmm11 1755 paddd xmm0, xmm7 1756 paddd xmm0, xmm1 1757 pxor xmm3, xmm0 1758 pshufb xmm3, xmm14 1759 paddd xmm2, xmm3 1760 pxor xmm1, xmm2 1761 movdqa xmm11, xmm1 1762 pslld xmm1, 25 1763 psrld xmm11, 7 1764 por xmm1, xmm11 1765 pshufd xmm0, xmm0, 0x39 1766 pshufd xmm3, xmm3, 0x4E 1767 pshufd xmm2, xmm2, 0x93 1768 dec al 1769 jz 9f 1770 movdqa xmm8, xmm4 1771 shufps xmm8, xmm5, 214 1772 pshufd xmm9, xmm4, 0x0F 1773 pshufd xmm4, xmm8, 0x39 1774 movdqa xmm8, xmm6 1775 shufps xmm8, xmm7, 250 1776 pblendw xmm9, xmm8, 0xCC 1777 movdqa xmm8, xmm7 1778 punpcklqdq xmm8, xmm5 1779 pblendw xmm8, xmm6, 0xC0 1780 pshufd xmm8, xmm8, 0x78 1781 punpckhdq xmm5, xmm7 1782 punpckldq xmm6, xmm5 1783 pshufd xmm7, xmm6, 0x1E 1784 movdqa xmm5, xmm9 1785 movdqa xmm6, xmm8 1786 jmp 9b 17879: 1788 pxor xmm0, xmm2 1789 pxor xmm1, xmm3 1790 mov eax, r13d 1791 cmp rdx, r15 1792 jne 2b 1793 movups xmmword ptr [rbx], xmm0 1794 movups xmmword ptr [rbx+0x10], xmm1 1795 jmp 4b 1796SET_SIZE(zfs_blake3_hash_many_sse41) 1797 1798ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64) 1799 ENDBR 1800 movups xmm0, xmmword ptr [rdi] 1801 movups xmm1, xmmword ptr [rdi+0x10] 1802 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1803 shl r8, 32 1804 add rdx, r8 1805 movq xmm3, rcx 1806 movq xmm4, rdx 1807 punpcklqdq xmm3, xmm4 1808 movups xmm4, xmmword ptr [rsi] 1809 movups xmm5, xmmword ptr [rsi+0x10] 1810 movaps xmm8, xmm4 1811 shufps xmm4, xmm5, 136 1812 shufps xmm8, xmm5, 221 1813 movaps xmm5, xmm8 1814 movups xmm6, xmmword ptr [rsi+0x20] 1815 movups xmm7, xmmword ptr [rsi+0x30] 1816 movaps xmm8, xmm6 1817 shufps xmm6, xmm7, 136 1818 pshufd xmm6, xmm6, 0x93 1819 shufps xmm8, xmm7, 221 1820 pshufd xmm7, xmm8, 0x93 1821 movaps xmm14, xmmword ptr [ROT8+rip] 1822 movaps xmm15, xmmword ptr [ROT16+rip] 1823 mov al, 7 18249: 1825 paddd xmm0, xmm4 1826 paddd xmm0, xmm1 1827 pxor xmm3, xmm0 1828 pshufb xmm3, xmm15 1829 paddd xmm2, xmm3 1830 pxor xmm1, xmm2 1831 movdqa xmm11, xmm1 1832 pslld xmm1, 20 1833 psrld xmm11, 12 1834 por xmm1, xmm11 1835 paddd xmm0, xmm5 1836 paddd xmm0, xmm1 1837 pxor xmm3, xmm0 1838 pshufb xmm3, xmm14 1839 paddd xmm2, xmm3 1840 pxor xmm1, xmm2 1841 movdqa xmm11, xmm1 1842 pslld xmm1, 25 1843 psrld xmm11, 7 1844 por xmm1, xmm11 1845 pshufd xmm0, xmm0, 0x93 1846 pshufd xmm3, xmm3, 0x4E 1847 pshufd xmm2, xmm2, 0x39 1848 paddd xmm0, xmm6 1849 paddd xmm0, xmm1 1850 pxor xmm3, xmm0 1851 pshufb xmm3, xmm15 1852 paddd xmm2, xmm3 1853 pxor xmm1, xmm2 1854 movdqa xmm11, xmm1 1855 pslld xmm1, 20 1856 psrld xmm11, 12 1857 por xmm1, xmm11 1858 paddd xmm0, xmm7 1859 paddd xmm0, xmm1 1860 pxor xmm3, xmm0 1861 pshufb xmm3, xmm14 1862 paddd xmm2, xmm3 1863 pxor xmm1, xmm2 1864 movdqa xmm11, xmm1 1865 pslld xmm1, 25 1866 psrld xmm11, 7 1867 por xmm1, xmm11 1868 pshufd xmm0, xmm0, 0x39 1869 pshufd xmm3, xmm3, 0x4E 1870 pshufd xmm2, xmm2, 0x93 1871 dec al 1872 jz 9f 1873 movdqa xmm8, xmm4 1874 shufps xmm8, xmm5, 214 1875 pshufd xmm9, xmm4, 0x0F 1876 pshufd xmm4, xmm8, 0x39 1877 movdqa xmm8, xmm6 1878 shufps xmm8, xmm7, 250 1879 pblendw xmm9, xmm8, 0xCC 1880 movdqa xmm8, xmm7 1881 punpcklqdq xmm8, xmm5 1882 pblendw xmm8, xmm6, 0xC0 1883 pshufd xmm8, xmm8, 0x78 1884 punpckhdq xmm5, xmm7 1885 punpckldq xmm6, xmm5 1886 pshufd xmm7, xmm6, 0x1E 1887 movdqa xmm5, xmm9 1888 movdqa xmm6, xmm8 1889 jmp 9b 18909: 1891 pxor xmm0, xmm2 1892 pxor xmm1, xmm3 1893 movups xmmword ptr [rdi], xmm0 1894 movups xmmword ptr [rdi+0x10], xmm1 1895 RET 1896SET_SIZE(zfs_blake3_compress_in_place_sse41) 1897 1898ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64) 1899 ENDBR 1900 movups xmm0, xmmword ptr [rdi] 1901 movups xmm1, xmmword ptr [rdi+0x10] 1902 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1903 movzx eax, r8b 1904 movzx edx, dl 1905 shl rax, 32 1906 add rdx, rax 1907 movq xmm3, rcx 1908 movq xmm4, rdx 1909 punpcklqdq xmm3, xmm4 1910 movups xmm4, xmmword ptr [rsi] 1911 movups xmm5, xmmword ptr [rsi+0x10] 1912 movaps xmm8, xmm4 1913 shufps xmm4, xmm5, 136 1914 shufps xmm8, xmm5, 221 1915 movaps xmm5, xmm8 1916 movups xmm6, xmmword ptr [rsi+0x20] 1917 movups xmm7, xmmword ptr [rsi+0x30] 1918 movaps xmm8, xmm6 1919 shufps xmm6, xmm7, 136 1920 pshufd xmm6, xmm6, 0x93 1921 shufps xmm8, xmm7, 221 1922 pshufd xmm7, xmm8, 0x93 1923 movaps xmm14, xmmword ptr [ROT8+rip] 1924 movaps xmm15, xmmword ptr [ROT16+rip] 1925 mov al, 7 19269: 1927 paddd xmm0, xmm4 1928 paddd xmm0, xmm1 1929 pxor xmm3, xmm0 1930 pshufb xmm3, xmm15 1931 paddd xmm2, xmm3 1932 pxor xmm1, xmm2 1933 movdqa xmm11, xmm1 1934 pslld xmm1, 20 1935 psrld xmm11, 12 1936 por xmm1, xmm11 1937 paddd xmm0, xmm5 1938 paddd xmm0, xmm1 1939 pxor xmm3, xmm0 1940 pshufb xmm3, xmm14 1941 paddd xmm2, xmm3 1942 pxor xmm1, xmm2 1943 movdqa xmm11, xmm1 1944 pslld xmm1, 25 1945 psrld xmm11, 7 1946 por xmm1, xmm11 1947 pshufd xmm0, xmm0, 0x93 1948 pshufd xmm3, xmm3, 0x4E 1949 pshufd xmm2, xmm2, 0x39 1950 paddd xmm0, xmm6 1951 paddd xmm0, xmm1 1952 pxor xmm3, xmm0 1953 pshufb xmm3, xmm15 1954 paddd xmm2, xmm3 1955 pxor xmm1, xmm2 1956 movdqa xmm11, xmm1 1957 pslld xmm1, 20 1958 psrld xmm11, 12 1959 por xmm1, xmm11 1960 paddd xmm0, xmm7 1961 paddd xmm0, xmm1 1962 pxor xmm3, xmm0 1963 pshufb xmm3, xmm14 1964 paddd xmm2, xmm3 1965 pxor xmm1, xmm2 1966 movdqa xmm11, xmm1 1967 pslld xmm1, 25 1968 psrld xmm11, 7 1969 por xmm1, xmm11 1970 pshufd xmm0, xmm0, 0x39 1971 pshufd xmm3, xmm3, 0x4E 1972 pshufd xmm2, xmm2, 0x93 1973 dec al 1974 jz 9f 1975 movdqa xmm8, xmm4 1976 shufps xmm8, xmm5, 214 1977 pshufd xmm9, xmm4, 0x0F 1978 pshufd xmm4, xmm8, 0x39 1979 movdqa xmm8, xmm6 1980 shufps xmm8, xmm7, 250 1981 pblendw xmm9, xmm8, 0xCC 1982 movdqa xmm8, xmm7 1983 punpcklqdq xmm8, xmm5 1984 pblendw xmm8, xmm6, 0xC0 1985 pshufd xmm8, xmm8, 0x78 1986 punpckhdq xmm5, xmm7 1987 punpckldq xmm6, xmm5 1988 pshufd xmm7, xmm6, 0x1E 1989 movdqa xmm5, xmm9 1990 movdqa xmm6, xmm8 1991 jmp 9b 19929: 1993 movdqu xmm4, xmmword ptr [rdi] 1994 movdqu xmm5, xmmword ptr [rdi+0x10] 1995 pxor xmm0, xmm2 1996 pxor xmm1, xmm3 1997 pxor xmm2, xmm4 1998 pxor xmm3, xmm5 1999 movups xmmword ptr [r9], xmm0 2000 movups xmmword ptr [r9+0x10], xmm1 2001 movups xmmword ptr [r9+0x20], xmm2 2002 movups xmmword ptr [r9+0x30], xmm3 2003 RET 2004SET_SIZE(zfs_blake3_compress_xof_sse41) 2005 2006SECTION_STATIC 2007 2008.p2align 6 2009BLAKE3_IV: 2010 .long 0x6A09E667, 0xBB67AE85 2011 .long 0x3C6EF372, 0xA54FF53A 2012ROT16: 2013 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2014ROT8: 2015 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2016ADD0: 2017 .long 0, 1, 2, 3 2018ADD1: 2019 .long 4, 4, 4, 4 2020BLAKE3_IV_0: 2021 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2022BLAKE3_IV_1: 2023 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2024BLAKE3_IV_2: 2025 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2026BLAKE3_IV_3: 2027 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2028BLAKE3_BLOCK_LEN: 2029 .long 64, 64, 64, 64 2030CMP_MSB_MASK: 2031 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2032 2033#endif /* HAVE_SSE4_1 */ 2034 2035#ifdef __ELF__ 2036.section .note.GNU-stack,"",%progbits 2037#endif 2038