1// SPDX-License-Identifier: CDDL-1.0 2/* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23/* 24 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 25 * Copyright (c) 2019-2020 Samuel Neves 26 * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 27 */ 28 29#if defined(HAVE_SSE4_1) 30 31#define _ASM 32#include <sys/asm_linkage.h> 33 34.intel_syntax noprefix 35 36.text 37 38ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64) 39 ENDBR 40 push r15 41 push r14 42 push r13 43 push r12 44 push rbx 45 push rbp 46 mov rbp, rsp 47 sub rsp, 360 48 and rsp, 0xFFFFFFFFFFFFFFC0 49 neg r9d 50 movd xmm0, r9d 51 pshufd xmm0, xmm0, 0x00 52 movdqa xmmword ptr [rsp+0x130], xmm0 53 movdqa xmm1, xmm0 54 pand xmm1, xmmword ptr [ADD0+rip] 55 pand xmm0, xmmword ptr [ADD1+rip] 56 movdqa xmmword ptr [rsp+0x150], xmm0 57 movd xmm0, r8d 58 pshufd xmm0, xmm0, 0x00 59 paddd xmm0, xmm1 60 movdqa xmmword ptr [rsp+0x110], xmm0 61 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 62 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 63 pcmpgtd xmm1, xmm0 64 shr r8, 32 65 movd xmm2, r8d 66 pshufd xmm2, xmm2, 0x00 67 psubd xmm2, xmm1 68 movdqa xmmword ptr [rsp+0x120], xmm2 69 mov rbx, qword ptr [rbp+0x50] 70 mov r15, rdx 71 shl r15, 6 72 movzx r13d, byte ptr [rbp+0x38] 73 movzx r12d, byte ptr [rbp+0x48] 74 cmp rsi, 4 75 jc 3f 762: 77 movdqu xmm3, xmmword ptr [rcx] 78 pshufd xmm0, xmm3, 0x00 79 pshufd xmm1, xmm3, 0x55 80 pshufd xmm2, xmm3, 0xAA 81 pshufd xmm3, xmm3, 0xFF 82 movdqu xmm7, xmmword ptr [rcx+0x10] 83 pshufd xmm4, xmm7, 0x00 84 pshufd xmm5, xmm7, 0x55 85 pshufd xmm6, xmm7, 0xAA 86 pshufd xmm7, xmm7, 0xFF 87 mov r8, qword ptr [rdi] 88 mov r9, qword ptr [rdi+0x8] 89 mov r10, qword ptr [rdi+0x10] 90 mov r11, qword ptr [rdi+0x18] 91 movzx eax, byte ptr [rbp+0x40] 92 or eax, r13d 93 xor edx, edx 949: 95 mov r14d, eax 96 or eax, r12d 97 add rdx, 64 98 cmp rdx, r15 99 cmovne eax, r14d 100 movdqu xmm8, xmmword ptr [r8+rdx-0x40] 101 movdqu xmm9, xmmword ptr [r9+rdx-0x40] 102 movdqu xmm10, xmmword ptr [r10+rdx-0x40] 103 movdqu xmm11, xmmword ptr [r11+rdx-0x40] 104 movdqa xmm12, xmm8 105 punpckldq xmm8, xmm9 106 punpckhdq xmm12, xmm9 107 movdqa xmm14, xmm10 108 punpckldq xmm10, xmm11 109 punpckhdq xmm14, xmm11 110 movdqa xmm9, xmm8 111 punpcklqdq xmm8, xmm10 112 punpckhqdq xmm9, xmm10 113 movdqa xmm13, xmm12 114 punpcklqdq xmm12, xmm14 115 punpckhqdq xmm13, xmm14 116 movdqa xmmword ptr [rsp], xmm8 117 movdqa xmmword ptr [rsp+0x10], xmm9 118 movdqa xmmword ptr [rsp+0x20], xmm12 119 movdqa xmmword ptr [rsp+0x30], xmm13 120 movdqu xmm8, xmmword ptr [r8+rdx-0x30] 121 movdqu xmm9, xmmword ptr [r9+rdx-0x30] 122 movdqu xmm10, xmmword ptr [r10+rdx-0x30] 123 movdqu xmm11, xmmword ptr [r11+rdx-0x30] 124 movdqa xmm12, xmm8 125 punpckldq xmm8, xmm9 126 punpckhdq xmm12, xmm9 127 movdqa xmm14, xmm10 128 punpckldq xmm10, xmm11 129 punpckhdq xmm14, xmm11 130 movdqa xmm9, xmm8 131 punpcklqdq xmm8, xmm10 132 punpckhqdq xmm9, xmm10 133 movdqa xmm13, xmm12 134 punpcklqdq xmm12, xmm14 135 punpckhqdq xmm13, xmm14 136 movdqa xmmword ptr [rsp+0x40], xmm8 137 movdqa xmmword ptr [rsp+0x50], xmm9 138 movdqa xmmword ptr [rsp+0x60], xmm12 139 movdqa xmmword ptr [rsp+0x70], xmm13 140 movdqu xmm8, xmmword ptr [r8+rdx-0x20] 141 movdqu xmm9, xmmword ptr [r9+rdx-0x20] 142 movdqu xmm10, xmmword ptr [r10+rdx-0x20] 143 movdqu xmm11, xmmword ptr [r11+rdx-0x20] 144 movdqa xmm12, xmm8 145 punpckldq xmm8, xmm9 146 punpckhdq xmm12, xmm9 147 movdqa xmm14, xmm10 148 punpckldq xmm10, xmm11 149 punpckhdq xmm14, xmm11 150 movdqa xmm9, xmm8 151 punpcklqdq xmm8, xmm10 152 punpckhqdq xmm9, xmm10 153 movdqa xmm13, xmm12 154 punpcklqdq xmm12, xmm14 155 punpckhqdq xmm13, xmm14 156 movdqa xmmword ptr [rsp+0x80], xmm8 157 movdqa xmmword ptr [rsp+0x90], xmm9 158 movdqa xmmword ptr [rsp+0xA0], xmm12 159 movdqa xmmword ptr [rsp+0xB0], xmm13 160 movdqu xmm8, xmmword ptr [r8+rdx-0x10] 161 movdqu xmm9, xmmword ptr [r9+rdx-0x10] 162 movdqu xmm10, xmmword ptr [r10+rdx-0x10] 163 movdqu xmm11, xmmword ptr [r11+rdx-0x10] 164 movdqa xmm12, xmm8 165 punpckldq xmm8, xmm9 166 punpckhdq xmm12, xmm9 167 movdqa xmm14, xmm10 168 punpckldq xmm10, xmm11 169 punpckhdq xmm14, xmm11 170 movdqa xmm9, xmm8 171 punpcklqdq xmm8, xmm10 172 punpckhqdq xmm9, xmm10 173 movdqa xmm13, xmm12 174 punpcklqdq xmm12, xmm14 175 punpckhqdq xmm13, xmm14 176 movdqa xmmword ptr [rsp+0xC0], xmm8 177 movdqa xmmword ptr [rsp+0xD0], xmm9 178 movdqa xmmword ptr [rsp+0xE0], xmm12 179 movdqa xmmword ptr [rsp+0xF0], xmm13 180 movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] 181 movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] 182 movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] 183 movdqa xmm12, xmmword ptr [rsp+0x110] 184 movdqa xmm13, xmmword ptr [rsp+0x120] 185 movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] 186 movd xmm15, eax 187 pshufd xmm15, xmm15, 0x00 188 prefetcht0 [r8+rdx+0x80] 189 prefetcht0 [r9+rdx+0x80] 190 prefetcht0 [r10+rdx+0x80] 191 prefetcht0 [r11+rdx+0x80] 192 paddd xmm0, xmmword ptr [rsp] 193 paddd xmm1, xmmword ptr [rsp+0x20] 194 paddd xmm2, xmmword ptr [rsp+0x40] 195 paddd xmm3, xmmword ptr [rsp+0x60] 196 paddd xmm0, xmm4 197 paddd xmm1, xmm5 198 paddd xmm2, xmm6 199 paddd xmm3, xmm7 200 pxor xmm12, xmm0 201 pxor xmm13, xmm1 202 pxor xmm14, xmm2 203 pxor xmm15, xmm3 204 movdqa xmm8, xmmword ptr [ROT16+rip] 205 pshufb xmm12, xmm8 206 pshufb xmm13, xmm8 207 pshufb xmm14, xmm8 208 pshufb xmm15, xmm8 209 movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] 210 paddd xmm8, xmm12 211 paddd xmm9, xmm13 212 paddd xmm10, xmm14 213 paddd xmm11, xmm15 214 pxor xmm4, xmm8 215 pxor xmm5, xmm9 216 pxor xmm6, xmm10 217 pxor xmm7, xmm11 218 movdqa xmmword ptr [rsp+0x100], xmm8 219 movdqa xmm8, xmm4 220 psrld xmm8, 12 221 pslld xmm4, 20 222 por xmm4, xmm8 223 movdqa xmm8, xmm5 224 psrld xmm8, 12 225 pslld xmm5, 20 226 por xmm5, xmm8 227 movdqa xmm8, xmm6 228 psrld xmm8, 12 229 pslld xmm6, 20 230 por xmm6, xmm8 231 movdqa xmm8, xmm7 232 psrld xmm8, 12 233 pslld xmm7, 20 234 por xmm7, xmm8 235 paddd xmm0, xmmword ptr [rsp+0x10] 236 paddd xmm1, xmmword ptr [rsp+0x30] 237 paddd xmm2, xmmword ptr [rsp+0x50] 238 paddd xmm3, xmmword ptr [rsp+0x70] 239 paddd xmm0, xmm4 240 paddd xmm1, xmm5 241 paddd xmm2, xmm6 242 paddd xmm3, xmm7 243 pxor xmm12, xmm0 244 pxor xmm13, xmm1 245 pxor xmm14, xmm2 246 pxor xmm15, xmm3 247 movdqa xmm8, xmmword ptr [ROT8+rip] 248 pshufb xmm12, xmm8 249 pshufb xmm13, xmm8 250 pshufb xmm14, xmm8 251 pshufb xmm15, xmm8 252 movdqa xmm8, xmmword ptr [rsp+0x100] 253 paddd xmm8, xmm12 254 paddd xmm9, xmm13 255 paddd xmm10, xmm14 256 paddd xmm11, xmm15 257 pxor xmm4, xmm8 258 pxor xmm5, xmm9 259 pxor xmm6, xmm10 260 pxor xmm7, xmm11 261 movdqa xmmword ptr [rsp+0x100], xmm8 262 movdqa xmm8, xmm4 263 psrld xmm8, 7 264 pslld xmm4, 25 265 por xmm4, xmm8 266 movdqa xmm8, xmm5 267 psrld xmm8, 7 268 pslld xmm5, 25 269 por xmm5, xmm8 270 movdqa xmm8, xmm6 271 psrld xmm8, 7 272 pslld xmm6, 25 273 por xmm6, xmm8 274 movdqa xmm8, xmm7 275 psrld xmm8, 7 276 pslld xmm7, 25 277 por xmm7, xmm8 278 paddd xmm0, xmmword ptr [rsp+0x80] 279 paddd xmm1, xmmword ptr [rsp+0xA0] 280 paddd xmm2, xmmword ptr [rsp+0xC0] 281 paddd xmm3, xmmword ptr [rsp+0xE0] 282 paddd xmm0, xmm5 283 paddd xmm1, xmm6 284 paddd xmm2, xmm7 285 paddd xmm3, xmm4 286 pxor xmm15, xmm0 287 pxor xmm12, xmm1 288 pxor xmm13, xmm2 289 pxor xmm14, xmm3 290 movdqa xmm8, xmmword ptr [ROT16+rip] 291 pshufb xmm15, xmm8 292 pshufb xmm12, xmm8 293 pshufb xmm13, xmm8 294 pshufb xmm14, xmm8 295 paddd xmm10, xmm15 296 paddd xmm11, xmm12 297 movdqa xmm8, xmmword ptr [rsp+0x100] 298 paddd xmm8, xmm13 299 paddd xmm9, xmm14 300 pxor xmm5, xmm10 301 pxor xmm6, xmm11 302 pxor xmm7, xmm8 303 pxor xmm4, xmm9 304 movdqa xmmword ptr [rsp+0x100], xmm8 305 movdqa xmm8, xmm5 306 psrld xmm8, 12 307 pslld xmm5, 20 308 por xmm5, xmm8 309 movdqa xmm8, xmm6 310 psrld xmm8, 12 311 pslld xmm6, 20 312 por xmm6, xmm8 313 movdqa xmm8, xmm7 314 psrld xmm8, 12 315 pslld xmm7, 20 316 por xmm7, xmm8 317 movdqa xmm8, xmm4 318 psrld xmm8, 12 319 pslld xmm4, 20 320 por xmm4, xmm8 321 paddd xmm0, xmmword ptr [rsp+0x90] 322 paddd xmm1, xmmword ptr [rsp+0xB0] 323 paddd xmm2, xmmword ptr [rsp+0xD0] 324 paddd xmm3, xmmword ptr [rsp+0xF0] 325 paddd xmm0, xmm5 326 paddd xmm1, xmm6 327 paddd xmm2, xmm7 328 paddd xmm3, xmm4 329 pxor xmm15, xmm0 330 pxor xmm12, xmm1 331 pxor xmm13, xmm2 332 pxor xmm14, xmm3 333 movdqa xmm8, xmmword ptr [ROT8+rip] 334 pshufb xmm15, xmm8 335 pshufb xmm12, xmm8 336 pshufb xmm13, xmm8 337 pshufb xmm14, xmm8 338 paddd xmm10, xmm15 339 paddd xmm11, xmm12 340 movdqa xmm8, xmmword ptr [rsp+0x100] 341 paddd xmm8, xmm13 342 paddd xmm9, xmm14 343 pxor xmm5, xmm10 344 pxor xmm6, xmm11 345 pxor xmm7, xmm8 346 pxor xmm4, xmm9 347 movdqa xmmword ptr [rsp+0x100], xmm8 348 movdqa xmm8, xmm5 349 psrld xmm8, 7 350 pslld xmm5, 25 351 por xmm5, xmm8 352 movdqa xmm8, xmm6 353 psrld xmm8, 7 354 pslld xmm6, 25 355 por xmm6, xmm8 356 movdqa xmm8, xmm7 357 psrld xmm8, 7 358 pslld xmm7, 25 359 por xmm7, xmm8 360 movdqa xmm8, xmm4 361 psrld xmm8, 7 362 pslld xmm4, 25 363 por xmm4, xmm8 364 paddd xmm0, xmmword ptr [rsp+0x20] 365 paddd xmm1, xmmword ptr [rsp+0x30] 366 paddd xmm2, xmmword ptr [rsp+0x70] 367 paddd xmm3, xmmword ptr [rsp+0x40] 368 paddd xmm0, xmm4 369 paddd xmm1, xmm5 370 paddd xmm2, xmm6 371 paddd xmm3, xmm7 372 pxor xmm12, xmm0 373 pxor xmm13, xmm1 374 pxor xmm14, xmm2 375 pxor xmm15, xmm3 376 movdqa xmm8, xmmword ptr [ROT16+rip] 377 pshufb xmm12, xmm8 378 pshufb xmm13, xmm8 379 pshufb xmm14, xmm8 380 pshufb xmm15, xmm8 381 movdqa xmm8, xmmword ptr [rsp+0x100] 382 paddd xmm8, xmm12 383 paddd xmm9, xmm13 384 paddd xmm10, xmm14 385 paddd xmm11, xmm15 386 pxor xmm4, xmm8 387 pxor xmm5, xmm9 388 pxor xmm6, xmm10 389 pxor xmm7, xmm11 390 movdqa xmmword ptr [rsp+0x100], xmm8 391 movdqa xmm8, xmm4 392 psrld xmm8, 12 393 pslld xmm4, 20 394 por xmm4, xmm8 395 movdqa xmm8, xmm5 396 psrld xmm8, 12 397 pslld xmm5, 20 398 por xmm5, xmm8 399 movdqa xmm8, xmm6 400 psrld xmm8, 12 401 pslld xmm6, 20 402 por xmm6, xmm8 403 movdqa xmm8, xmm7 404 psrld xmm8, 12 405 pslld xmm7, 20 406 por xmm7, xmm8 407 paddd xmm0, xmmword ptr [rsp+0x60] 408 paddd xmm1, xmmword ptr [rsp+0xA0] 409 paddd xmm2, xmmword ptr [rsp] 410 paddd xmm3, xmmword ptr [rsp+0xD0] 411 paddd xmm0, xmm4 412 paddd xmm1, xmm5 413 paddd xmm2, xmm6 414 paddd xmm3, xmm7 415 pxor xmm12, xmm0 416 pxor xmm13, xmm1 417 pxor xmm14, xmm2 418 pxor xmm15, xmm3 419 movdqa xmm8, xmmword ptr [ROT8+rip] 420 pshufb xmm12, xmm8 421 pshufb xmm13, xmm8 422 pshufb xmm14, xmm8 423 pshufb xmm15, xmm8 424 movdqa xmm8, xmmword ptr [rsp+0x100] 425 paddd xmm8, xmm12 426 paddd xmm9, xmm13 427 paddd xmm10, xmm14 428 paddd xmm11, xmm15 429 pxor xmm4, xmm8 430 pxor xmm5, xmm9 431 pxor xmm6, xmm10 432 pxor xmm7, xmm11 433 movdqa xmmword ptr [rsp+0x100], xmm8 434 movdqa xmm8, xmm4 435 psrld xmm8, 7 436 pslld xmm4, 25 437 por xmm4, xmm8 438 movdqa xmm8, xmm5 439 psrld xmm8, 7 440 pslld xmm5, 25 441 por xmm5, xmm8 442 movdqa xmm8, xmm6 443 psrld xmm8, 7 444 pslld xmm6, 25 445 por xmm6, xmm8 446 movdqa xmm8, xmm7 447 psrld xmm8, 7 448 pslld xmm7, 25 449 por xmm7, xmm8 450 paddd xmm0, xmmword ptr [rsp+0x10] 451 paddd xmm1, xmmword ptr [rsp+0xC0] 452 paddd xmm2, xmmword ptr [rsp+0x90] 453 paddd xmm3, xmmword ptr [rsp+0xF0] 454 paddd xmm0, xmm5 455 paddd xmm1, xmm6 456 paddd xmm2, xmm7 457 paddd xmm3, xmm4 458 pxor xmm15, xmm0 459 pxor xmm12, xmm1 460 pxor xmm13, xmm2 461 pxor xmm14, xmm3 462 movdqa xmm8, xmmword ptr [ROT16+rip] 463 pshufb xmm15, xmm8 464 pshufb xmm12, xmm8 465 pshufb xmm13, xmm8 466 pshufb xmm14, xmm8 467 paddd xmm10, xmm15 468 paddd xmm11, xmm12 469 movdqa xmm8, xmmword ptr [rsp+0x100] 470 paddd xmm8, xmm13 471 paddd xmm9, xmm14 472 pxor xmm5, xmm10 473 pxor xmm6, xmm11 474 pxor xmm7, xmm8 475 pxor xmm4, xmm9 476 movdqa xmmword ptr [rsp+0x100], xmm8 477 movdqa xmm8, xmm5 478 psrld xmm8, 12 479 pslld xmm5, 20 480 por xmm5, xmm8 481 movdqa xmm8, xmm6 482 psrld xmm8, 12 483 pslld xmm6, 20 484 por xmm6, xmm8 485 movdqa xmm8, xmm7 486 psrld xmm8, 12 487 pslld xmm7, 20 488 por xmm7, xmm8 489 movdqa xmm8, xmm4 490 psrld xmm8, 12 491 pslld xmm4, 20 492 por xmm4, xmm8 493 paddd xmm0, xmmword ptr [rsp+0xB0] 494 paddd xmm1, xmmword ptr [rsp+0x50] 495 paddd xmm2, xmmword ptr [rsp+0xE0] 496 paddd xmm3, xmmword ptr [rsp+0x80] 497 paddd xmm0, xmm5 498 paddd xmm1, xmm6 499 paddd xmm2, xmm7 500 paddd xmm3, xmm4 501 pxor xmm15, xmm0 502 pxor xmm12, xmm1 503 pxor xmm13, xmm2 504 pxor xmm14, xmm3 505 movdqa xmm8, xmmword ptr [ROT8+rip] 506 pshufb xmm15, xmm8 507 pshufb xmm12, xmm8 508 pshufb xmm13, xmm8 509 pshufb xmm14, xmm8 510 paddd xmm10, xmm15 511 paddd xmm11, xmm12 512 movdqa xmm8, xmmword ptr [rsp+0x100] 513 paddd xmm8, xmm13 514 paddd xmm9, xmm14 515 pxor xmm5, xmm10 516 pxor xmm6, xmm11 517 pxor xmm7, xmm8 518 pxor xmm4, xmm9 519 movdqa xmmword ptr [rsp+0x100], xmm8 520 movdqa xmm8, xmm5 521 psrld xmm8, 7 522 pslld xmm5, 25 523 por xmm5, xmm8 524 movdqa xmm8, xmm6 525 psrld xmm8, 7 526 pslld xmm6, 25 527 por xmm6, xmm8 528 movdqa xmm8, xmm7 529 psrld xmm8, 7 530 pslld xmm7, 25 531 por xmm7, xmm8 532 movdqa xmm8, xmm4 533 psrld xmm8, 7 534 pslld xmm4, 25 535 por xmm4, xmm8 536 paddd xmm0, xmmword ptr [rsp+0x30] 537 paddd xmm1, xmmword ptr [rsp+0xA0] 538 paddd xmm2, xmmword ptr [rsp+0xD0] 539 paddd xmm3, xmmword ptr [rsp+0x70] 540 paddd xmm0, xmm4 541 paddd xmm1, xmm5 542 paddd xmm2, xmm6 543 paddd xmm3, xmm7 544 pxor xmm12, xmm0 545 pxor xmm13, xmm1 546 pxor xmm14, xmm2 547 pxor xmm15, xmm3 548 movdqa xmm8, xmmword ptr [ROT16+rip] 549 pshufb xmm12, xmm8 550 pshufb xmm13, xmm8 551 pshufb xmm14, xmm8 552 pshufb xmm15, xmm8 553 movdqa xmm8, xmmword ptr [rsp+0x100] 554 paddd xmm8, xmm12 555 paddd xmm9, xmm13 556 paddd xmm10, xmm14 557 paddd xmm11, xmm15 558 pxor xmm4, xmm8 559 pxor xmm5, xmm9 560 pxor xmm6, xmm10 561 pxor xmm7, xmm11 562 movdqa xmmword ptr [rsp+0x100], xmm8 563 movdqa xmm8, xmm4 564 psrld xmm8, 12 565 pslld xmm4, 20 566 por xmm4, xmm8 567 movdqa xmm8, xmm5 568 psrld xmm8, 12 569 pslld xmm5, 20 570 por xmm5, xmm8 571 movdqa xmm8, xmm6 572 psrld xmm8, 12 573 pslld xmm6, 20 574 por xmm6, xmm8 575 movdqa xmm8, xmm7 576 psrld xmm8, 12 577 pslld xmm7, 20 578 por xmm7, xmm8 579 paddd xmm0, xmmword ptr [rsp+0x40] 580 paddd xmm1, xmmword ptr [rsp+0xC0] 581 paddd xmm2, xmmword ptr [rsp+0x20] 582 paddd xmm3, xmmword ptr [rsp+0xE0] 583 paddd xmm0, xmm4 584 paddd xmm1, xmm5 585 paddd xmm2, xmm6 586 paddd xmm3, xmm7 587 pxor xmm12, xmm0 588 pxor xmm13, xmm1 589 pxor xmm14, xmm2 590 pxor xmm15, xmm3 591 movdqa xmm8, xmmword ptr [ROT8+rip] 592 pshufb xmm12, xmm8 593 pshufb xmm13, xmm8 594 pshufb xmm14, xmm8 595 pshufb xmm15, xmm8 596 movdqa xmm8, xmmword ptr [rsp+0x100] 597 paddd xmm8, xmm12 598 paddd xmm9, xmm13 599 paddd xmm10, xmm14 600 paddd xmm11, xmm15 601 pxor xmm4, xmm8 602 pxor xmm5, xmm9 603 pxor xmm6, xmm10 604 pxor xmm7, xmm11 605 movdqa xmmword ptr [rsp+0x100], xmm8 606 movdqa xmm8, xmm4 607 psrld xmm8, 7 608 pslld xmm4, 25 609 por xmm4, xmm8 610 movdqa xmm8, xmm5 611 psrld xmm8, 7 612 pslld xmm5, 25 613 por xmm5, xmm8 614 movdqa xmm8, xmm6 615 psrld xmm8, 7 616 pslld xmm6, 25 617 por xmm6, xmm8 618 movdqa xmm8, xmm7 619 psrld xmm8, 7 620 pslld xmm7, 25 621 por xmm7, xmm8 622 paddd xmm0, xmmword ptr [rsp+0x60] 623 paddd xmm1, xmmword ptr [rsp+0x90] 624 paddd xmm2, xmmword ptr [rsp+0xB0] 625 paddd xmm3, xmmword ptr [rsp+0x80] 626 paddd xmm0, xmm5 627 paddd xmm1, xmm6 628 paddd xmm2, xmm7 629 paddd xmm3, xmm4 630 pxor xmm15, xmm0 631 pxor xmm12, xmm1 632 pxor xmm13, xmm2 633 pxor xmm14, xmm3 634 movdqa xmm8, xmmword ptr [ROT16+rip] 635 pshufb xmm15, xmm8 636 pshufb xmm12, xmm8 637 pshufb xmm13, xmm8 638 pshufb xmm14, xmm8 639 paddd xmm10, xmm15 640 paddd xmm11, xmm12 641 movdqa xmm8, xmmword ptr [rsp+0x100] 642 paddd xmm8, xmm13 643 paddd xmm9, xmm14 644 pxor xmm5, xmm10 645 pxor xmm6, xmm11 646 pxor xmm7, xmm8 647 pxor xmm4, xmm9 648 movdqa xmmword ptr [rsp+0x100], xmm8 649 movdqa xmm8, xmm5 650 psrld xmm8, 12 651 pslld xmm5, 20 652 por xmm5, xmm8 653 movdqa xmm8, xmm6 654 psrld xmm8, 12 655 pslld xmm6, 20 656 por xmm6, xmm8 657 movdqa xmm8, xmm7 658 psrld xmm8, 12 659 pslld xmm7, 20 660 por xmm7, xmm8 661 movdqa xmm8, xmm4 662 psrld xmm8, 12 663 pslld xmm4, 20 664 por xmm4, xmm8 665 paddd xmm0, xmmword ptr [rsp+0x50] 666 paddd xmm1, xmmword ptr [rsp] 667 paddd xmm2, xmmword ptr [rsp+0xF0] 668 paddd xmm3, xmmword ptr [rsp+0x10] 669 paddd xmm0, xmm5 670 paddd xmm1, xmm6 671 paddd xmm2, xmm7 672 paddd xmm3, xmm4 673 pxor xmm15, xmm0 674 pxor xmm12, xmm1 675 pxor xmm13, xmm2 676 pxor xmm14, xmm3 677 movdqa xmm8, xmmword ptr [ROT8+rip] 678 pshufb xmm15, xmm8 679 pshufb xmm12, xmm8 680 pshufb xmm13, xmm8 681 pshufb xmm14, xmm8 682 paddd xmm10, xmm15 683 paddd xmm11, xmm12 684 movdqa xmm8, xmmword ptr [rsp+0x100] 685 paddd xmm8, xmm13 686 paddd xmm9, xmm14 687 pxor xmm5, xmm10 688 pxor xmm6, xmm11 689 pxor xmm7, xmm8 690 pxor xmm4, xmm9 691 movdqa xmmword ptr [rsp+0x100], xmm8 692 movdqa xmm8, xmm5 693 psrld xmm8, 7 694 pslld xmm5, 25 695 por xmm5, xmm8 696 movdqa xmm8, xmm6 697 psrld xmm8, 7 698 pslld xmm6, 25 699 por xmm6, xmm8 700 movdqa xmm8, xmm7 701 psrld xmm8, 7 702 pslld xmm7, 25 703 por xmm7, xmm8 704 movdqa xmm8, xmm4 705 psrld xmm8, 7 706 pslld xmm4, 25 707 por xmm4, xmm8 708 paddd xmm0, xmmword ptr [rsp+0xA0] 709 paddd xmm1, xmmword ptr [rsp+0xC0] 710 paddd xmm2, xmmword ptr [rsp+0xE0] 711 paddd xmm3, xmmword ptr [rsp+0xD0] 712 paddd xmm0, xmm4 713 paddd xmm1, xmm5 714 paddd xmm2, xmm6 715 paddd xmm3, xmm7 716 pxor xmm12, xmm0 717 pxor xmm13, xmm1 718 pxor xmm14, xmm2 719 pxor xmm15, xmm3 720 movdqa xmm8, xmmword ptr [ROT16+rip] 721 pshufb xmm12, xmm8 722 pshufb xmm13, xmm8 723 pshufb xmm14, xmm8 724 pshufb xmm15, xmm8 725 movdqa xmm8, xmmword ptr [rsp+0x100] 726 paddd xmm8, xmm12 727 paddd xmm9, xmm13 728 paddd xmm10, xmm14 729 paddd xmm11, xmm15 730 pxor xmm4, xmm8 731 pxor xmm5, xmm9 732 pxor xmm6, xmm10 733 pxor xmm7, xmm11 734 movdqa xmmword ptr [rsp+0x100], xmm8 735 movdqa xmm8, xmm4 736 psrld xmm8, 12 737 pslld xmm4, 20 738 por xmm4, xmm8 739 movdqa xmm8, xmm5 740 psrld xmm8, 12 741 pslld xmm5, 20 742 por xmm5, xmm8 743 movdqa xmm8, xmm6 744 psrld xmm8, 12 745 pslld xmm6, 20 746 por xmm6, xmm8 747 movdqa xmm8, xmm7 748 psrld xmm8, 12 749 pslld xmm7, 20 750 por xmm7, xmm8 751 paddd xmm0, xmmword ptr [rsp+0x70] 752 paddd xmm1, xmmword ptr [rsp+0x90] 753 paddd xmm2, xmmword ptr [rsp+0x30] 754 paddd xmm3, xmmword ptr [rsp+0xF0] 755 paddd xmm0, xmm4 756 paddd xmm1, xmm5 757 paddd xmm2, xmm6 758 paddd xmm3, xmm7 759 pxor xmm12, xmm0 760 pxor xmm13, xmm1 761 pxor xmm14, xmm2 762 pxor xmm15, xmm3 763 movdqa xmm8, xmmword ptr [ROT8+rip] 764 pshufb xmm12, xmm8 765 pshufb xmm13, xmm8 766 pshufb xmm14, xmm8 767 pshufb xmm15, xmm8 768 movdqa xmm8, xmmword ptr [rsp+0x100] 769 paddd xmm8, xmm12 770 paddd xmm9, xmm13 771 paddd xmm10, xmm14 772 paddd xmm11, xmm15 773 pxor xmm4, xmm8 774 pxor xmm5, xmm9 775 pxor xmm6, xmm10 776 pxor xmm7, xmm11 777 movdqa xmmword ptr [rsp+0x100], xmm8 778 movdqa xmm8, xmm4 779 psrld xmm8, 7 780 pslld xmm4, 25 781 por xmm4, xmm8 782 movdqa xmm8, xmm5 783 psrld xmm8, 7 784 pslld xmm5, 25 785 por xmm5, xmm8 786 movdqa xmm8, xmm6 787 psrld xmm8, 7 788 pslld xmm6, 25 789 por xmm6, xmm8 790 movdqa xmm8, xmm7 791 psrld xmm8, 7 792 pslld xmm7, 25 793 por xmm7, xmm8 794 paddd xmm0, xmmword ptr [rsp+0x40] 795 paddd xmm1, xmmword ptr [rsp+0xB0] 796 paddd xmm2, xmmword ptr [rsp+0x50] 797 paddd xmm3, xmmword ptr [rsp+0x10] 798 paddd xmm0, xmm5 799 paddd xmm1, xmm6 800 paddd xmm2, xmm7 801 paddd xmm3, xmm4 802 pxor xmm15, xmm0 803 pxor xmm12, xmm1 804 pxor xmm13, xmm2 805 pxor xmm14, xmm3 806 movdqa xmm8, xmmword ptr [ROT16+rip] 807 pshufb xmm15, xmm8 808 pshufb xmm12, xmm8 809 pshufb xmm13, xmm8 810 pshufb xmm14, xmm8 811 paddd xmm10, xmm15 812 paddd xmm11, xmm12 813 movdqa xmm8, xmmword ptr [rsp+0x100] 814 paddd xmm8, xmm13 815 paddd xmm9, xmm14 816 pxor xmm5, xmm10 817 pxor xmm6, xmm11 818 pxor xmm7, xmm8 819 pxor xmm4, xmm9 820 movdqa xmmword ptr [rsp+0x100], xmm8 821 movdqa xmm8, xmm5 822 psrld xmm8, 12 823 pslld xmm5, 20 824 por xmm5, xmm8 825 movdqa xmm8, xmm6 826 psrld xmm8, 12 827 pslld xmm6, 20 828 por xmm6, xmm8 829 movdqa xmm8, xmm7 830 psrld xmm8, 12 831 pslld xmm7, 20 832 por xmm7, xmm8 833 movdqa xmm8, xmm4 834 psrld xmm8, 12 835 pslld xmm4, 20 836 por xmm4, xmm8 837 paddd xmm0, xmmword ptr [rsp] 838 paddd xmm1, xmmword ptr [rsp+0x20] 839 paddd xmm2, xmmword ptr [rsp+0x80] 840 paddd xmm3, xmmword ptr [rsp+0x60] 841 paddd xmm0, xmm5 842 paddd xmm1, xmm6 843 paddd xmm2, xmm7 844 paddd xmm3, xmm4 845 pxor xmm15, xmm0 846 pxor xmm12, xmm1 847 pxor xmm13, xmm2 848 pxor xmm14, xmm3 849 movdqa xmm8, xmmword ptr [ROT8+rip] 850 pshufb xmm15, xmm8 851 pshufb xmm12, xmm8 852 pshufb xmm13, xmm8 853 pshufb xmm14, xmm8 854 paddd xmm10, xmm15 855 paddd xmm11, xmm12 856 movdqa xmm8, xmmword ptr [rsp+0x100] 857 paddd xmm8, xmm13 858 paddd xmm9, xmm14 859 pxor xmm5, xmm10 860 pxor xmm6, xmm11 861 pxor xmm7, xmm8 862 pxor xmm4, xmm9 863 movdqa xmmword ptr [rsp+0x100], xmm8 864 movdqa xmm8, xmm5 865 psrld xmm8, 7 866 pslld xmm5, 25 867 por xmm5, xmm8 868 movdqa xmm8, xmm6 869 psrld xmm8, 7 870 pslld xmm6, 25 871 por xmm6, xmm8 872 movdqa xmm8, xmm7 873 psrld xmm8, 7 874 pslld xmm7, 25 875 por xmm7, xmm8 876 movdqa xmm8, xmm4 877 psrld xmm8, 7 878 pslld xmm4, 25 879 por xmm4, xmm8 880 paddd xmm0, xmmword ptr [rsp+0xC0] 881 paddd xmm1, xmmword ptr [rsp+0x90] 882 paddd xmm2, xmmword ptr [rsp+0xF0] 883 paddd xmm3, xmmword ptr [rsp+0xE0] 884 paddd xmm0, xmm4 885 paddd xmm1, xmm5 886 paddd xmm2, xmm6 887 paddd xmm3, xmm7 888 pxor xmm12, xmm0 889 pxor xmm13, xmm1 890 pxor xmm14, xmm2 891 pxor xmm15, xmm3 892 movdqa xmm8, xmmword ptr [ROT16+rip] 893 pshufb xmm12, xmm8 894 pshufb xmm13, xmm8 895 pshufb xmm14, xmm8 896 pshufb xmm15, xmm8 897 movdqa xmm8, xmmword ptr [rsp+0x100] 898 paddd xmm8, xmm12 899 paddd xmm9, xmm13 900 paddd xmm10, xmm14 901 paddd xmm11, xmm15 902 pxor xmm4, xmm8 903 pxor xmm5, xmm9 904 pxor xmm6, xmm10 905 pxor xmm7, xmm11 906 movdqa xmmword ptr [rsp+0x100], xmm8 907 movdqa xmm8, xmm4 908 psrld xmm8, 12 909 pslld xmm4, 20 910 por xmm4, xmm8 911 movdqa xmm8, xmm5 912 psrld xmm8, 12 913 pslld xmm5, 20 914 por xmm5, xmm8 915 movdqa xmm8, xmm6 916 psrld xmm8, 12 917 pslld xmm6, 20 918 por xmm6, xmm8 919 movdqa xmm8, xmm7 920 psrld xmm8, 12 921 pslld xmm7, 20 922 por xmm7, xmm8 923 paddd xmm0, xmmword ptr [rsp+0xD0] 924 paddd xmm1, xmmword ptr [rsp+0xB0] 925 paddd xmm2, xmmword ptr [rsp+0xA0] 926 paddd xmm3, xmmword ptr [rsp+0x80] 927 paddd xmm0, xmm4 928 paddd xmm1, xmm5 929 paddd xmm2, xmm6 930 paddd xmm3, xmm7 931 pxor xmm12, xmm0 932 pxor xmm13, xmm1 933 pxor xmm14, xmm2 934 pxor xmm15, xmm3 935 movdqa xmm8, xmmword ptr [ROT8+rip] 936 pshufb xmm12, xmm8 937 pshufb xmm13, xmm8 938 pshufb xmm14, xmm8 939 pshufb xmm15, xmm8 940 movdqa xmm8, xmmword ptr [rsp+0x100] 941 paddd xmm8, xmm12 942 paddd xmm9, xmm13 943 paddd xmm10, xmm14 944 paddd xmm11, xmm15 945 pxor xmm4, xmm8 946 pxor xmm5, xmm9 947 pxor xmm6, xmm10 948 pxor xmm7, xmm11 949 movdqa xmmword ptr [rsp+0x100], xmm8 950 movdqa xmm8, xmm4 951 psrld xmm8, 7 952 pslld xmm4, 25 953 por xmm4, xmm8 954 movdqa xmm8, xmm5 955 psrld xmm8, 7 956 pslld xmm5, 25 957 por xmm5, xmm8 958 movdqa xmm8, xmm6 959 psrld xmm8, 7 960 pslld xmm6, 25 961 por xmm6, xmm8 962 movdqa xmm8, xmm7 963 psrld xmm8, 7 964 pslld xmm7, 25 965 por xmm7, xmm8 966 paddd xmm0, xmmword ptr [rsp+0x70] 967 paddd xmm1, xmmword ptr [rsp+0x50] 968 paddd xmm2, xmmword ptr [rsp] 969 paddd xmm3, xmmword ptr [rsp+0x60] 970 paddd xmm0, xmm5 971 paddd xmm1, xmm6 972 paddd xmm2, xmm7 973 paddd xmm3, xmm4 974 pxor xmm15, xmm0 975 pxor xmm12, xmm1 976 pxor xmm13, xmm2 977 pxor xmm14, xmm3 978 movdqa xmm8, xmmword ptr [ROT16+rip] 979 pshufb xmm15, xmm8 980 pshufb xmm12, xmm8 981 pshufb xmm13, xmm8 982 pshufb xmm14, xmm8 983 paddd xmm10, xmm15 984 paddd xmm11, xmm12 985 movdqa xmm8, xmmword ptr [rsp+0x100] 986 paddd xmm8, xmm13 987 paddd xmm9, xmm14 988 pxor xmm5, xmm10 989 pxor xmm6, xmm11 990 pxor xmm7, xmm8 991 pxor xmm4, xmm9 992 movdqa xmmword ptr [rsp+0x100], xmm8 993 movdqa xmm8, xmm5 994 psrld xmm8, 12 995 pslld xmm5, 20 996 por xmm5, xmm8 997 movdqa xmm8, xmm6 998 psrld xmm8, 12 999 pslld xmm6, 20 1000 por xmm6, xmm8 1001 movdqa xmm8, xmm7 1002 psrld xmm8, 12 1003 pslld xmm7, 20 1004 por xmm7, xmm8 1005 movdqa xmm8, xmm4 1006 psrld xmm8, 12 1007 pslld xmm4, 20 1008 por xmm4, xmm8 1009 paddd xmm0, xmmword ptr [rsp+0x20] 1010 paddd xmm1, xmmword ptr [rsp+0x30] 1011 paddd xmm2, xmmword ptr [rsp+0x10] 1012 paddd xmm3, xmmword ptr [rsp+0x40] 1013 paddd xmm0, xmm5 1014 paddd xmm1, xmm6 1015 paddd xmm2, xmm7 1016 paddd xmm3, xmm4 1017 pxor xmm15, xmm0 1018 pxor xmm12, xmm1 1019 pxor xmm13, xmm2 1020 pxor xmm14, xmm3 1021 movdqa xmm8, xmmword ptr [ROT8+rip] 1022 pshufb xmm15, xmm8 1023 pshufb xmm12, xmm8 1024 pshufb xmm13, xmm8 1025 pshufb xmm14, xmm8 1026 paddd xmm10, xmm15 1027 paddd xmm11, xmm12 1028 movdqa xmm8, xmmword ptr [rsp+0x100] 1029 paddd xmm8, xmm13 1030 paddd xmm9, xmm14 1031 pxor xmm5, xmm10 1032 pxor xmm6, xmm11 1033 pxor xmm7, xmm8 1034 pxor xmm4, xmm9 1035 movdqa xmmword ptr [rsp+0x100], xmm8 1036 movdqa xmm8, xmm5 1037 psrld xmm8, 7 1038 pslld xmm5, 25 1039 por xmm5, xmm8 1040 movdqa xmm8, xmm6 1041 psrld xmm8, 7 1042 pslld xmm6, 25 1043 por xmm6, xmm8 1044 movdqa xmm8, xmm7 1045 psrld xmm8, 7 1046 pslld xmm7, 25 1047 por xmm7, xmm8 1048 movdqa xmm8, xmm4 1049 psrld xmm8, 7 1050 pslld xmm4, 25 1051 por xmm4, xmm8 1052 paddd xmm0, xmmword ptr [rsp+0x90] 1053 paddd xmm1, xmmword ptr [rsp+0xB0] 1054 paddd xmm2, xmmword ptr [rsp+0x80] 1055 paddd xmm3, xmmword ptr [rsp+0xF0] 1056 paddd xmm0, xmm4 1057 paddd xmm1, xmm5 1058 paddd xmm2, xmm6 1059 paddd xmm3, xmm7 1060 pxor xmm12, xmm0 1061 pxor xmm13, xmm1 1062 pxor xmm14, xmm2 1063 pxor xmm15, xmm3 1064 movdqa xmm8, xmmword ptr [ROT16+rip] 1065 pshufb xmm12, xmm8 1066 pshufb xmm13, xmm8 1067 pshufb xmm14, xmm8 1068 pshufb xmm15, xmm8 1069 movdqa xmm8, xmmword ptr [rsp+0x100] 1070 paddd xmm8, xmm12 1071 paddd xmm9, xmm13 1072 paddd xmm10, xmm14 1073 paddd xmm11, xmm15 1074 pxor xmm4, xmm8 1075 pxor xmm5, xmm9 1076 pxor xmm6, xmm10 1077 pxor xmm7, xmm11 1078 movdqa xmmword ptr [rsp+0x100], xmm8 1079 movdqa xmm8, xmm4 1080 psrld xmm8, 12 1081 pslld xmm4, 20 1082 por xmm4, xmm8 1083 movdqa xmm8, xmm5 1084 psrld xmm8, 12 1085 pslld xmm5, 20 1086 por xmm5, xmm8 1087 movdqa xmm8, xmm6 1088 psrld xmm8, 12 1089 pslld xmm6, 20 1090 por xmm6, xmm8 1091 movdqa xmm8, xmm7 1092 psrld xmm8, 12 1093 pslld xmm7, 20 1094 por xmm7, xmm8 1095 paddd xmm0, xmmword ptr [rsp+0xE0] 1096 paddd xmm1, xmmword ptr [rsp+0x50] 1097 paddd xmm2, xmmword ptr [rsp+0xC0] 1098 paddd xmm3, xmmword ptr [rsp+0x10] 1099 paddd xmm0, xmm4 1100 paddd xmm1, xmm5 1101 paddd xmm2, xmm6 1102 paddd xmm3, xmm7 1103 pxor xmm12, xmm0 1104 pxor xmm13, xmm1 1105 pxor xmm14, xmm2 1106 pxor xmm15, xmm3 1107 movdqa xmm8, xmmword ptr [ROT8+rip] 1108 pshufb xmm12, xmm8 1109 pshufb xmm13, xmm8 1110 pshufb xmm14, xmm8 1111 pshufb xmm15, xmm8 1112 movdqa xmm8, xmmword ptr [rsp+0x100] 1113 paddd xmm8, xmm12 1114 paddd xmm9, xmm13 1115 paddd xmm10, xmm14 1116 paddd xmm11, xmm15 1117 pxor xmm4, xmm8 1118 pxor xmm5, xmm9 1119 pxor xmm6, xmm10 1120 pxor xmm7, xmm11 1121 movdqa xmmword ptr [rsp+0x100], xmm8 1122 movdqa xmm8, xmm4 1123 psrld xmm8, 7 1124 pslld xmm4, 25 1125 por xmm4, xmm8 1126 movdqa xmm8, xmm5 1127 psrld xmm8, 7 1128 pslld xmm5, 25 1129 por xmm5, xmm8 1130 movdqa xmm8, xmm6 1131 psrld xmm8, 7 1132 pslld xmm6, 25 1133 por xmm6, xmm8 1134 movdqa xmm8, xmm7 1135 psrld xmm8, 7 1136 pslld xmm7, 25 1137 por xmm7, xmm8 1138 paddd xmm0, xmmword ptr [rsp+0xD0] 1139 paddd xmm1, xmmword ptr [rsp] 1140 paddd xmm2, xmmword ptr [rsp+0x20] 1141 paddd xmm3, xmmword ptr [rsp+0x40] 1142 paddd xmm0, xmm5 1143 paddd xmm1, xmm6 1144 paddd xmm2, xmm7 1145 paddd xmm3, xmm4 1146 pxor xmm15, xmm0 1147 pxor xmm12, xmm1 1148 pxor xmm13, xmm2 1149 pxor xmm14, xmm3 1150 movdqa xmm8, xmmword ptr [ROT16+rip] 1151 pshufb xmm15, xmm8 1152 pshufb xmm12, xmm8 1153 pshufb xmm13, xmm8 1154 pshufb xmm14, xmm8 1155 paddd xmm10, xmm15 1156 paddd xmm11, xmm12 1157 movdqa xmm8, xmmword ptr [rsp+0x100] 1158 paddd xmm8, xmm13 1159 paddd xmm9, xmm14 1160 pxor xmm5, xmm10 1161 pxor xmm6, xmm11 1162 pxor xmm7, xmm8 1163 pxor xmm4, xmm9 1164 movdqa xmmword ptr [rsp+0x100], xmm8 1165 movdqa xmm8, xmm5 1166 psrld xmm8, 12 1167 pslld xmm5, 20 1168 por xmm5, xmm8 1169 movdqa xmm8, xmm6 1170 psrld xmm8, 12 1171 pslld xmm6, 20 1172 por xmm6, xmm8 1173 movdqa xmm8, xmm7 1174 psrld xmm8, 12 1175 pslld xmm7, 20 1176 por xmm7, xmm8 1177 movdqa xmm8, xmm4 1178 psrld xmm8, 12 1179 pslld xmm4, 20 1180 por xmm4, xmm8 1181 paddd xmm0, xmmword ptr [rsp+0x30] 1182 paddd xmm1, xmmword ptr [rsp+0xA0] 1183 paddd xmm2, xmmword ptr [rsp+0x60] 1184 paddd xmm3, xmmword ptr [rsp+0x70] 1185 paddd xmm0, xmm5 1186 paddd xmm1, xmm6 1187 paddd xmm2, xmm7 1188 paddd xmm3, xmm4 1189 pxor xmm15, xmm0 1190 pxor xmm12, xmm1 1191 pxor xmm13, xmm2 1192 pxor xmm14, xmm3 1193 movdqa xmm8, xmmword ptr [ROT8+rip] 1194 pshufb xmm15, xmm8 1195 pshufb xmm12, xmm8 1196 pshufb xmm13, xmm8 1197 pshufb xmm14, xmm8 1198 paddd xmm10, xmm15 1199 paddd xmm11, xmm12 1200 movdqa xmm8, xmmword ptr [rsp+0x100] 1201 paddd xmm8, xmm13 1202 paddd xmm9, xmm14 1203 pxor xmm5, xmm10 1204 pxor xmm6, xmm11 1205 pxor xmm7, xmm8 1206 pxor xmm4, xmm9 1207 movdqa xmmword ptr [rsp+0x100], xmm8 1208 movdqa xmm8, xmm5 1209 psrld xmm8, 7 1210 pslld xmm5, 25 1211 por xmm5, xmm8 1212 movdqa xmm8, xmm6 1213 psrld xmm8, 7 1214 pslld xmm6, 25 1215 por xmm6, xmm8 1216 movdqa xmm8, xmm7 1217 psrld xmm8, 7 1218 pslld xmm7, 25 1219 por xmm7, xmm8 1220 movdqa xmm8, xmm4 1221 psrld xmm8, 7 1222 pslld xmm4, 25 1223 por xmm4, xmm8 1224 paddd xmm0, xmmword ptr [rsp+0xB0] 1225 paddd xmm1, xmmword ptr [rsp+0x50] 1226 paddd xmm2, xmmword ptr [rsp+0x10] 1227 paddd xmm3, xmmword ptr [rsp+0x80] 1228 paddd xmm0, xmm4 1229 paddd xmm1, xmm5 1230 paddd xmm2, xmm6 1231 paddd xmm3, xmm7 1232 pxor xmm12, xmm0 1233 pxor xmm13, xmm1 1234 pxor xmm14, xmm2 1235 pxor xmm15, xmm3 1236 movdqa xmm8, xmmword ptr [ROT16+rip] 1237 pshufb xmm12, xmm8 1238 pshufb xmm13, xmm8 1239 pshufb xmm14, xmm8 1240 pshufb xmm15, xmm8 1241 movdqa xmm8, xmmword ptr [rsp+0x100] 1242 paddd xmm8, xmm12 1243 paddd xmm9, xmm13 1244 paddd xmm10, xmm14 1245 paddd xmm11, xmm15 1246 pxor xmm4, xmm8 1247 pxor xmm5, xmm9 1248 pxor xmm6, xmm10 1249 pxor xmm7, xmm11 1250 movdqa xmmword ptr [rsp+0x100], xmm8 1251 movdqa xmm8, xmm4 1252 psrld xmm8, 12 1253 pslld xmm4, 20 1254 por xmm4, xmm8 1255 movdqa xmm8, xmm5 1256 psrld xmm8, 12 1257 pslld xmm5, 20 1258 por xmm5, xmm8 1259 movdqa xmm8, xmm6 1260 psrld xmm8, 12 1261 pslld xmm6, 20 1262 por xmm6, xmm8 1263 movdqa xmm8, xmm7 1264 psrld xmm8, 12 1265 pslld xmm7, 20 1266 por xmm7, xmm8 1267 paddd xmm0, xmmword ptr [rsp+0xF0] 1268 paddd xmm1, xmmword ptr [rsp] 1269 paddd xmm2, xmmword ptr [rsp+0x90] 1270 paddd xmm3, xmmword ptr [rsp+0x60] 1271 paddd xmm0, xmm4 1272 paddd xmm1, xmm5 1273 paddd xmm2, xmm6 1274 paddd xmm3, xmm7 1275 pxor xmm12, xmm0 1276 pxor xmm13, xmm1 1277 pxor xmm14, xmm2 1278 pxor xmm15, xmm3 1279 movdqa xmm8, xmmword ptr [ROT8+rip] 1280 pshufb xmm12, xmm8 1281 pshufb xmm13, xmm8 1282 pshufb xmm14, xmm8 1283 pshufb xmm15, xmm8 1284 movdqa xmm8, xmmword ptr [rsp+0x100] 1285 paddd xmm8, xmm12 1286 paddd xmm9, xmm13 1287 paddd xmm10, xmm14 1288 paddd xmm11, xmm15 1289 pxor xmm4, xmm8 1290 pxor xmm5, xmm9 1291 pxor xmm6, xmm10 1292 pxor xmm7, xmm11 1293 movdqa xmmword ptr [rsp+0x100], xmm8 1294 movdqa xmm8, xmm4 1295 psrld xmm8, 7 1296 pslld xmm4, 25 1297 por xmm4, xmm8 1298 movdqa xmm8, xmm5 1299 psrld xmm8, 7 1300 pslld xmm5, 25 1301 por xmm5, xmm8 1302 movdqa xmm8, xmm6 1303 psrld xmm8, 7 1304 pslld xmm6, 25 1305 por xmm6, xmm8 1306 movdqa xmm8, xmm7 1307 psrld xmm8, 7 1308 pslld xmm7, 25 1309 por xmm7, xmm8 1310 paddd xmm0, xmmword ptr [rsp+0xE0] 1311 paddd xmm1, xmmword ptr [rsp+0x20] 1312 paddd xmm2, xmmword ptr [rsp+0x30] 1313 paddd xmm3, xmmword ptr [rsp+0x70] 1314 paddd xmm0, xmm5 1315 paddd xmm1, xmm6 1316 paddd xmm2, xmm7 1317 paddd xmm3, xmm4 1318 pxor xmm15, xmm0 1319 pxor xmm12, xmm1 1320 pxor xmm13, xmm2 1321 pxor xmm14, xmm3 1322 movdqa xmm8, xmmword ptr [ROT16+rip] 1323 pshufb xmm15, xmm8 1324 pshufb xmm12, xmm8 1325 pshufb xmm13, xmm8 1326 pshufb xmm14, xmm8 1327 paddd xmm10, xmm15 1328 paddd xmm11, xmm12 1329 movdqa xmm8, xmmword ptr [rsp+0x100] 1330 paddd xmm8, xmm13 1331 paddd xmm9, xmm14 1332 pxor xmm5, xmm10 1333 pxor xmm6, xmm11 1334 pxor xmm7, xmm8 1335 pxor xmm4, xmm9 1336 movdqa xmmword ptr [rsp+0x100], xmm8 1337 movdqa xmm8, xmm5 1338 psrld xmm8, 12 1339 pslld xmm5, 20 1340 por xmm5, xmm8 1341 movdqa xmm8, xmm6 1342 psrld xmm8, 12 1343 pslld xmm6, 20 1344 por xmm6, xmm8 1345 movdqa xmm8, xmm7 1346 psrld xmm8, 12 1347 pslld xmm7, 20 1348 por xmm7, xmm8 1349 movdqa xmm8, xmm4 1350 psrld xmm8, 12 1351 pslld xmm4, 20 1352 por xmm4, xmm8 1353 paddd xmm0, xmmword ptr [rsp+0xA0] 1354 paddd xmm1, xmmword ptr [rsp+0xC0] 1355 paddd xmm2, xmmword ptr [rsp+0x40] 1356 paddd xmm3, xmmword ptr [rsp+0xD0] 1357 paddd xmm0, xmm5 1358 paddd xmm1, xmm6 1359 paddd xmm2, xmm7 1360 paddd xmm3, xmm4 1361 pxor xmm15, xmm0 1362 pxor xmm12, xmm1 1363 pxor xmm13, xmm2 1364 pxor xmm14, xmm3 1365 movdqa xmm8, xmmword ptr [ROT8+rip] 1366 pshufb xmm15, xmm8 1367 pshufb xmm12, xmm8 1368 pshufb xmm13, xmm8 1369 pshufb xmm14, xmm8 1370 paddd xmm10, xmm15 1371 paddd xmm11, xmm12 1372 movdqa xmm8, xmmword ptr [rsp+0x100] 1373 paddd xmm8, xmm13 1374 paddd xmm9, xmm14 1375 pxor xmm5, xmm10 1376 pxor xmm6, xmm11 1377 pxor xmm7, xmm8 1378 pxor xmm4, xmm9 1379 pxor xmm0, xmm8 1380 pxor xmm1, xmm9 1381 pxor xmm2, xmm10 1382 pxor xmm3, xmm11 1383 movdqa xmm8, xmm5 1384 psrld xmm8, 7 1385 pslld xmm5, 25 1386 por xmm5, xmm8 1387 movdqa xmm8, xmm6 1388 psrld xmm8, 7 1389 pslld xmm6, 25 1390 por xmm6, xmm8 1391 movdqa xmm8, xmm7 1392 psrld xmm8, 7 1393 pslld xmm7, 25 1394 por xmm7, xmm8 1395 movdqa xmm8, xmm4 1396 psrld xmm8, 7 1397 pslld xmm4, 25 1398 por xmm4, xmm8 1399 pxor xmm4, xmm12 1400 pxor xmm5, xmm13 1401 pxor xmm6, xmm14 1402 pxor xmm7, xmm15 1403 mov eax, r13d 1404 jne 9b 1405 movdqa xmm9, xmm0 1406 punpckldq xmm0, xmm1 1407 punpckhdq xmm9, xmm1 1408 movdqa xmm11, xmm2 1409 punpckldq xmm2, xmm3 1410 punpckhdq xmm11, xmm3 1411 movdqa xmm1, xmm0 1412 punpcklqdq xmm0, xmm2 1413 punpckhqdq xmm1, xmm2 1414 movdqa xmm3, xmm9 1415 punpcklqdq xmm9, xmm11 1416 punpckhqdq xmm3, xmm11 1417 movdqu xmmword ptr [rbx], xmm0 1418 movdqu xmmword ptr [rbx+0x20], xmm1 1419 movdqu xmmword ptr [rbx+0x40], xmm9 1420 movdqu xmmword ptr [rbx+0x60], xmm3 1421 movdqa xmm9, xmm4 1422 punpckldq xmm4, xmm5 1423 punpckhdq xmm9, xmm5 1424 movdqa xmm11, xmm6 1425 punpckldq xmm6, xmm7 1426 punpckhdq xmm11, xmm7 1427 movdqa xmm5, xmm4 1428 punpcklqdq xmm4, xmm6 1429 punpckhqdq xmm5, xmm6 1430 movdqa xmm7, xmm9 1431 punpcklqdq xmm9, xmm11 1432 punpckhqdq xmm7, xmm11 1433 movdqu xmmword ptr [rbx+0x10], xmm4 1434 movdqu xmmword ptr [rbx+0x30], xmm5 1435 movdqu xmmword ptr [rbx+0x50], xmm9 1436 movdqu xmmword ptr [rbx+0x70], xmm7 1437 movdqa xmm1, xmmword ptr [rsp+0x110] 1438 movdqa xmm0, xmm1 1439 paddd xmm1, xmmword ptr [rsp+0x150] 1440 movdqa xmmword ptr [rsp+0x110], xmm1 1441 pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] 1442 pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] 1443 pcmpgtd xmm0, xmm1 1444 movdqa xmm1, xmmword ptr [rsp+0x120] 1445 psubd xmm1, xmm0 1446 movdqa xmmword ptr [rsp+0x120], xmm1 1447 add rbx, 128 1448 add rdi, 32 1449 sub rsi, 4 1450 cmp rsi, 4 1451 jnc 2b 1452 test rsi, rsi 1453 jnz 3f 14544: 1455 mov rsp, rbp 1456 pop rbp 1457 pop rbx 1458 pop r12 1459 pop r13 1460 pop r14 1461 pop r15 1462 RET 1463.p2align 5 14643: 1465 test esi, 0x2 1466 je 3f 1467 movups xmm0, xmmword ptr [rcx] 1468 movups xmm1, xmmword ptr [rcx+0x10] 1469 movaps xmm8, xmm0 1470 movaps xmm9, xmm1 1471 movd xmm13, dword ptr [rsp+0x110] 1472 pinsrd xmm13, dword ptr [rsp+0x120], 1 1473 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1474 movaps xmmword ptr [rsp], xmm13 1475 movd xmm14, dword ptr [rsp+0x114] 1476 pinsrd xmm14, dword ptr [rsp+0x124], 1 1477 pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1478 movaps xmmword ptr [rsp+0x10], xmm14 1479 mov r8, qword ptr [rdi] 1480 mov r9, qword ptr [rdi+0x8] 1481 movzx eax, byte ptr [rbp+0x40] 1482 or eax, r13d 1483 xor edx, edx 14842: 1485 mov r14d, eax 1486 or eax, r12d 1487 add rdx, 64 1488 cmp rdx, r15 1489 cmovne eax, r14d 1490 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1491 movaps xmm10, xmm2 1492 movups xmm4, xmmword ptr [r8+rdx-0x40] 1493 movups xmm5, xmmword ptr [r8+rdx-0x30] 1494 movaps xmm3, xmm4 1495 shufps xmm4, xmm5, 136 1496 shufps xmm3, xmm5, 221 1497 movaps xmm5, xmm3 1498 movups xmm6, xmmword ptr [r8+rdx-0x20] 1499 movups xmm7, xmmword ptr [r8+rdx-0x10] 1500 movaps xmm3, xmm6 1501 shufps xmm6, xmm7, 136 1502 pshufd xmm6, xmm6, 0x93 1503 shufps xmm3, xmm7, 221 1504 pshufd xmm7, xmm3, 0x93 1505 movups xmm12, xmmword ptr [r9+rdx-0x40] 1506 movups xmm13, xmmword ptr [r9+rdx-0x30] 1507 movaps xmm11, xmm12 1508 shufps xmm12, xmm13, 136 1509 shufps xmm11, xmm13, 221 1510 movaps xmm13, xmm11 1511 movups xmm14, xmmword ptr [r9+rdx-0x20] 1512 movups xmm15, xmmword ptr [r9+rdx-0x10] 1513 movaps xmm11, xmm14 1514 shufps xmm14, xmm15, 136 1515 pshufd xmm14, xmm14, 0x93 1516 shufps xmm11, xmm15, 221 1517 pshufd xmm15, xmm11, 0x93 1518 movaps xmm3, xmmword ptr [rsp] 1519 movaps xmm11, xmmword ptr [rsp+0x10] 1520 pinsrd xmm3, eax, 3 1521 pinsrd xmm11, eax, 3 1522 mov al, 7 15239: 1524 paddd xmm0, xmm4 1525 paddd xmm8, xmm12 1526 movaps xmmword ptr [rsp+0x20], xmm4 1527 movaps xmmword ptr [rsp+0x30], xmm12 1528 paddd xmm0, xmm1 1529 paddd xmm8, xmm9 1530 pxor xmm3, xmm0 1531 pxor xmm11, xmm8 1532 movaps xmm12, xmmword ptr [ROT16+rip] 1533 pshufb xmm3, xmm12 1534 pshufb xmm11, xmm12 1535 paddd xmm2, xmm3 1536 paddd xmm10, xmm11 1537 pxor xmm1, xmm2 1538 pxor xmm9, xmm10 1539 movdqa xmm4, xmm1 1540 pslld xmm1, 20 1541 psrld xmm4, 12 1542 por xmm1, xmm4 1543 movdqa xmm4, xmm9 1544 pslld xmm9, 20 1545 psrld xmm4, 12 1546 por xmm9, xmm4 1547 paddd xmm0, xmm5 1548 paddd xmm8, xmm13 1549 movaps xmmword ptr [rsp+0x40], xmm5 1550 movaps xmmword ptr [rsp+0x50], xmm13 1551 paddd xmm0, xmm1 1552 paddd xmm8, xmm9 1553 pxor xmm3, xmm0 1554 pxor xmm11, xmm8 1555 movaps xmm13, xmmword ptr [ROT8+rip] 1556 pshufb xmm3, xmm13 1557 pshufb xmm11, xmm13 1558 paddd xmm2, xmm3 1559 paddd xmm10, xmm11 1560 pxor xmm1, xmm2 1561 pxor xmm9, xmm10 1562 movdqa xmm4, xmm1 1563 pslld xmm1, 25 1564 psrld xmm4, 7 1565 por xmm1, xmm4 1566 movdqa xmm4, xmm9 1567 pslld xmm9, 25 1568 psrld xmm4, 7 1569 por xmm9, xmm4 1570 pshufd xmm0, xmm0, 0x93 1571 pshufd xmm8, xmm8, 0x93 1572 pshufd xmm3, xmm3, 0x4E 1573 pshufd xmm11, xmm11, 0x4E 1574 pshufd xmm2, xmm2, 0x39 1575 pshufd xmm10, xmm10, 0x39 1576 paddd xmm0, xmm6 1577 paddd xmm8, xmm14 1578 paddd xmm0, xmm1 1579 paddd xmm8, xmm9 1580 pxor xmm3, xmm0 1581 pxor xmm11, xmm8 1582 pshufb xmm3, xmm12 1583 pshufb xmm11, xmm12 1584 paddd xmm2, xmm3 1585 paddd xmm10, xmm11 1586 pxor xmm1, xmm2 1587 pxor xmm9, xmm10 1588 movdqa xmm4, xmm1 1589 pslld xmm1, 20 1590 psrld xmm4, 12 1591 por xmm1, xmm4 1592 movdqa xmm4, xmm9 1593 pslld xmm9, 20 1594 psrld xmm4, 12 1595 por xmm9, xmm4 1596 paddd xmm0, xmm7 1597 paddd xmm8, xmm15 1598 paddd xmm0, xmm1 1599 paddd xmm8, xmm9 1600 pxor xmm3, xmm0 1601 pxor xmm11, xmm8 1602 pshufb xmm3, xmm13 1603 pshufb xmm11, xmm13 1604 paddd xmm2, xmm3 1605 paddd xmm10, xmm11 1606 pxor xmm1, xmm2 1607 pxor xmm9, xmm10 1608 movdqa xmm4, xmm1 1609 pslld xmm1, 25 1610 psrld xmm4, 7 1611 por xmm1, xmm4 1612 movdqa xmm4, xmm9 1613 pslld xmm9, 25 1614 psrld xmm4, 7 1615 por xmm9, xmm4 1616 pshufd xmm0, xmm0, 0x39 1617 pshufd xmm8, xmm8, 0x39 1618 pshufd xmm3, xmm3, 0x4E 1619 pshufd xmm11, xmm11, 0x4E 1620 pshufd xmm2, xmm2, 0x93 1621 pshufd xmm10, xmm10, 0x93 1622 dec al 1623 je 9f 1624 movdqa xmm12, xmmword ptr [rsp+0x20] 1625 movdqa xmm5, xmmword ptr [rsp+0x40] 1626 pshufd xmm13, xmm12, 0x0F 1627 shufps xmm12, xmm5, 214 1628 pshufd xmm4, xmm12, 0x39 1629 movdqa xmm12, xmm6 1630 shufps xmm12, xmm7, 250 1631 pblendw xmm13, xmm12, 0xCC 1632 movdqa xmm12, xmm7 1633 punpcklqdq xmm12, xmm5 1634 pblendw xmm12, xmm6, 0xC0 1635 pshufd xmm12, xmm12, 0x78 1636 punpckhdq xmm5, xmm7 1637 punpckldq xmm6, xmm5 1638 pshufd xmm7, xmm6, 0x1E 1639 movdqa xmmword ptr [rsp+0x20], xmm13 1640 movdqa xmmword ptr [rsp+0x40], xmm12 1641 movdqa xmm5, xmmword ptr [rsp+0x30] 1642 movdqa xmm13, xmmword ptr [rsp+0x50] 1643 pshufd xmm6, xmm5, 0x0F 1644 shufps xmm5, xmm13, 214 1645 pshufd xmm12, xmm5, 0x39 1646 movdqa xmm5, xmm14 1647 shufps xmm5, xmm15, 250 1648 pblendw xmm6, xmm5, 0xCC 1649 movdqa xmm5, xmm15 1650 punpcklqdq xmm5, xmm13 1651 pblendw xmm5, xmm14, 0xC0 1652 pshufd xmm5, xmm5, 0x78 1653 punpckhdq xmm13, xmm15 1654 punpckldq xmm14, xmm13 1655 pshufd xmm15, xmm14, 0x1E 1656 movdqa xmm13, xmm6 1657 movdqa xmm14, xmm5 1658 movdqa xmm5, xmmword ptr [rsp+0x20] 1659 movdqa xmm6, xmmword ptr [rsp+0x40] 1660 jmp 9b 16619: 1662 pxor xmm0, xmm2 1663 pxor xmm1, xmm3 1664 pxor xmm8, xmm10 1665 pxor xmm9, xmm11 1666 mov eax, r13d 1667 cmp rdx, r15 1668 jne 2b 1669 movups xmmword ptr [rbx], xmm0 1670 movups xmmword ptr [rbx+0x10], xmm1 1671 movups xmmword ptr [rbx+0x20], xmm8 1672 movups xmmword ptr [rbx+0x30], xmm9 1673 movdqa xmm0, xmmword ptr [rsp+0x130] 1674 movdqa xmm1, xmmword ptr [rsp+0x110] 1675 movdqa xmm2, xmmword ptr [rsp+0x120] 1676 movdqu xmm3, xmmword ptr [rsp+0x118] 1677 movdqu xmm4, xmmword ptr [rsp+0x128] 1678 blendvps xmm1, xmm3, xmm0 1679 blendvps xmm2, xmm4, xmm0 1680 movdqa xmmword ptr [rsp+0x110], xmm1 1681 movdqa xmmword ptr [rsp+0x120], xmm2 1682 add rdi, 16 1683 add rbx, 64 1684 sub rsi, 2 16853: 1686 test esi, 0x1 1687 je 4b 1688 movups xmm0, xmmword ptr [rcx] 1689 movups xmm1, xmmword ptr [rcx+0x10] 1690 movd xmm13, dword ptr [rsp+0x110] 1691 pinsrd xmm13, dword ptr [rsp+0x120], 1 1692 pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 1693 movaps xmm14, xmmword ptr [ROT8+rip] 1694 movaps xmm15, xmmword ptr [ROT16+rip] 1695 mov r8, qword ptr [rdi] 1696 movzx eax, byte ptr [rbp+0x40] 1697 or eax, r13d 1698 xor edx, edx 16992: 1700 mov r14d, eax 1701 or eax, r12d 1702 add rdx, 64 1703 cmp rdx, r15 1704 cmovne eax, r14d 1705 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1706 movaps xmm3, xmm13 1707 pinsrd xmm3, eax, 3 1708 movups xmm4, xmmword ptr [r8+rdx-0x40] 1709 movups xmm5, xmmword ptr [r8+rdx-0x30] 1710 movaps xmm8, xmm4 1711 shufps xmm4, xmm5, 136 1712 shufps xmm8, xmm5, 221 1713 movaps xmm5, xmm8 1714 movups xmm6, xmmword ptr [r8+rdx-0x20] 1715 movups xmm7, xmmword ptr [r8+rdx-0x10] 1716 movaps xmm8, xmm6 1717 shufps xmm6, xmm7, 136 1718 pshufd xmm6, xmm6, 0x93 1719 shufps xmm8, xmm7, 221 1720 pshufd xmm7, xmm8, 0x93 1721 mov al, 7 17229: 1723 paddd xmm0, xmm4 1724 paddd xmm0, xmm1 1725 pxor xmm3, xmm0 1726 pshufb xmm3, xmm15 1727 paddd xmm2, xmm3 1728 pxor xmm1, xmm2 1729 movdqa xmm11, xmm1 1730 pslld xmm1, 20 1731 psrld xmm11, 12 1732 por xmm1, xmm11 1733 paddd xmm0, xmm5 1734 paddd xmm0, xmm1 1735 pxor xmm3, xmm0 1736 pshufb xmm3, xmm14 1737 paddd xmm2, xmm3 1738 pxor xmm1, xmm2 1739 movdqa xmm11, xmm1 1740 pslld xmm1, 25 1741 psrld xmm11, 7 1742 por xmm1, xmm11 1743 pshufd xmm0, xmm0, 0x93 1744 pshufd xmm3, xmm3, 0x4E 1745 pshufd xmm2, xmm2, 0x39 1746 paddd xmm0, xmm6 1747 paddd xmm0, xmm1 1748 pxor xmm3, xmm0 1749 pshufb xmm3, xmm15 1750 paddd xmm2, xmm3 1751 pxor xmm1, xmm2 1752 movdqa xmm11, xmm1 1753 pslld xmm1, 20 1754 psrld xmm11, 12 1755 por xmm1, xmm11 1756 paddd xmm0, xmm7 1757 paddd xmm0, xmm1 1758 pxor xmm3, xmm0 1759 pshufb xmm3, xmm14 1760 paddd xmm2, xmm3 1761 pxor xmm1, xmm2 1762 movdqa xmm11, xmm1 1763 pslld xmm1, 25 1764 psrld xmm11, 7 1765 por xmm1, xmm11 1766 pshufd xmm0, xmm0, 0x39 1767 pshufd xmm3, xmm3, 0x4E 1768 pshufd xmm2, xmm2, 0x93 1769 dec al 1770 jz 9f 1771 movdqa xmm8, xmm4 1772 shufps xmm8, xmm5, 214 1773 pshufd xmm9, xmm4, 0x0F 1774 pshufd xmm4, xmm8, 0x39 1775 movdqa xmm8, xmm6 1776 shufps xmm8, xmm7, 250 1777 pblendw xmm9, xmm8, 0xCC 1778 movdqa xmm8, xmm7 1779 punpcklqdq xmm8, xmm5 1780 pblendw xmm8, xmm6, 0xC0 1781 pshufd xmm8, xmm8, 0x78 1782 punpckhdq xmm5, xmm7 1783 punpckldq xmm6, xmm5 1784 pshufd xmm7, xmm6, 0x1E 1785 movdqa xmm5, xmm9 1786 movdqa xmm6, xmm8 1787 jmp 9b 17889: 1789 pxor xmm0, xmm2 1790 pxor xmm1, xmm3 1791 mov eax, r13d 1792 cmp rdx, r15 1793 jne 2b 1794 movups xmmword ptr [rbx], xmm0 1795 movups xmmword ptr [rbx+0x10], xmm1 1796 jmp 4b 1797SET_SIZE(zfs_blake3_hash_many_sse41) 1798 1799ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64) 1800 ENDBR 1801 movups xmm0, xmmword ptr [rdi] 1802 movups xmm1, xmmword ptr [rdi+0x10] 1803 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1804 shl r8, 32 1805 add rdx, r8 1806 movq xmm3, rcx 1807 movq xmm4, rdx 1808 punpcklqdq xmm3, xmm4 1809 movups xmm4, xmmword ptr [rsi] 1810 movups xmm5, xmmword ptr [rsi+0x10] 1811 movaps xmm8, xmm4 1812 shufps xmm4, xmm5, 136 1813 shufps xmm8, xmm5, 221 1814 movaps xmm5, xmm8 1815 movups xmm6, xmmword ptr [rsi+0x20] 1816 movups xmm7, xmmword ptr [rsi+0x30] 1817 movaps xmm8, xmm6 1818 shufps xmm6, xmm7, 136 1819 pshufd xmm6, xmm6, 0x93 1820 shufps xmm8, xmm7, 221 1821 pshufd xmm7, xmm8, 0x93 1822 movaps xmm14, xmmword ptr [ROT8+rip] 1823 movaps xmm15, xmmword ptr [ROT16+rip] 1824 mov al, 7 18259: 1826 paddd xmm0, xmm4 1827 paddd xmm0, xmm1 1828 pxor xmm3, xmm0 1829 pshufb xmm3, xmm15 1830 paddd xmm2, xmm3 1831 pxor xmm1, xmm2 1832 movdqa xmm11, xmm1 1833 pslld xmm1, 20 1834 psrld xmm11, 12 1835 por xmm1, xmm11 1836 paddd xmm0, xmm5 1837 paddd xmm0, xmm1 1838 pxor xmm3, xmm0 1839 pshufb xmm3, xmm14 1840 paddd xmm2, xmm3 1841 pxor xmm1, xmm2 1842 movdqa xmm11, xmm1 1843 pslld xmm1, 25 1844 psrld xmm11, 7 1845 por xmm1, xmm11 1846 pshufd xmm0, xmm0, 0x93 1847 pshufd xmm3, xmm3, 0x4E 1848 pshufd xmm2, xmm2, 0x39 1849 paddd xmm0, xmm6 1850 paddd xmm0, xmm1 1851 pxor xmm3, xmm0 1852 pshufb xmm3, xmm15 1853 paddd xmm2, xmm3 1854 pxor xmm1, xmm2 1855 movdqa xmm11, xmm1 1856 pslld xmm1, 20 1857 psrld xmm11, 12 1858 por xmm1, xmm11 1859 paddd xmm0, xmm7 1860 paddd xmm0, xmm1 1861 pxor xmm3, xmm0 1862 pshufb xmm3, xmm14 1863 paddd xmm2, xmm3 1864 pxor xmm1, xmm2 1865 movdqa xmm11, xmm1 1866 pslld xmm1, 25 1867 psrld xmm11, 7 1868 por xmm1, xmm11 1869 pshufd xmm0, xmm0, 0x39 1870 pshufd xmm3, xmm3, 0x4E 1871 pshufd xmm2, xmm2, 0x93 1872 dec al 1873 jz 9f 1874 movdqa xmm8, xmm4 1875 shufps xmm8, xmm5, 214 1876 pshufd xmm9, xmm4, 0x0F 1877 pshufd xmm4, xmm8, 0x39 1878 movdqa xmm8, xmm6 1879 shufps xmm8, xmm7, 250 1880 pblendw xmm9, xmm8, 0xCC 1881 movdqa xmm8, xmm7 1882 punpcklqdq xmm8, xmm5 1883 pblendw xmm8, xmm6, 0xC0 1884 pshufd xmm8, xmm8, 0x78 1885 punpckhdq xmm5, xmm7 1886 punpckldq xmm6, xmm5 1887 pshufd xmm7, xmm6, 0x1E 1888 movdqa xmm5, xmm9 1889 movdqa xmm6, xmm8 1890 jmp 9b 18919: 1892 pxor xmm0, xmm2 1893 pxor xmm1, xmm3 1894 movups xmmword ptr [rdi], xmm0 1895 movups xmmword ptr [rdi+0x10], xmm1 1896 RET 1897SET_SIZE(zfs_blake3_compress_in_place_sse41) 1898 1899ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64) 1900 ENDBR 1901 movups xmm0, xmmword ptr [rdi] 1902 movups xmm1, xmmword ptr [rdi+0x10] 1903 movaps xmm2, xmmword ptr [BLAKE3_IV+rip] 1904 movzx eax, r8b 1905 movzx edx, dl 1906 shl rax, 32 1907 add rdx, rax 1908 movq xmm3, rcx 1909 movq xmm4, rdx 1910 punpcklqdq xmm3, xmm4 1911 movups xmm4, xmmword ptr [rsi] 1912 movups xmm5, xmmword ptr [rsi+0x10] 1913 movaps xmm8, xmm4 1914 shufps xmm4, xmm5, 136 1915 shufps xmm8, xmm5, 221 1916 movaps xmm5, xmm8 1917 movups xmm6, xmmword ptr [rsi+0x20] 1918 movups xmm7, xmmword ptr [rsi+0x30] 1919 movaps xmm8, xmm6 1920 shufps xmm6, xmm7, 136 1921 pshufd xmm6, xmm6, 0x93 1922 shufps xmm8, xmm7, 221 1923 pshufd xmm7, xmm8, 0x93 1924 movaps xmm14, xmmword ptr [ROT8+rip] 1925 movaps xmm15, xmmword ptr [ROT16+rip] 1926 mov al, 7 19279: 1928 paddd xmm0, xmm4 1929 paddd xmm0, xmm1 1930 pxor xmm3, xmm0 1931 pshufb xmm3, xmm15 1932 paddd xmm2, xmm3 1933 pxor xmm1, xmm2 1934 movdqa xmm11, xmm1 1935 pslld xmm1, 20 1936 psrld xmm11, 12 1937 por xmm1, xmm11 1938 paddd xmm0, xmm5 1939 paddd xmm0, xmm1 1940 pxor xmm3, xmm0 1941 pshufb xmm3, xmm14 1942 paddd xmm2, xmm3 1943 pxor xmm1, xmm2 1944 movdqa xmm11, xmm1 1945 pslld xmm1, 25 1946 psrld xmm11, 7 1947 por xmm1, xmm11 1948 pshufd xmm0, xmm0, 0x93 1949 pshufd xmm3, xmm3, 0x4E 1950 pshufd xmm2, xmm2, 0x39 1951 paddd xmm0, xmm6 1952 paddd xmm0, xmm1 1953 pxor xmm3, xmm0 1954 pshufb xmm3, xmm15 1955 paddd xmm2, xmm3 1956 pxor xmm1, xmm2 1957 movdqa xmm11, xmm1 1958 pslld xmm1, 20 1959 psrld xmm11, 12 1960 por xmm1, xmm11 1961 paddd xmm0, xmm7 1962 paddd xmm0, xmm1 1963 pxor xmm3, xmm0 1964 pshufb xmm3, xmm14 1965 paddd xmm2, xmm3 1966 pxor xmm1, xmm2 1967 movdqa xmm11, xmm1 1968 pslld xmm1, 25 1969 psrld xmm11, 7 1970 por xmm1, xmm11 1971 pshufd xmm0, xmm0, 0x39 1972 pshufd xmm3, xmm3, 0x4E 1973 pshufd xmm2, xmm2, 0x93 1974 dec al 1975 jz 9f 1976 movdqa xmm8, xmm4 1977 shufps xmm8, xmm5, 214 1978 pshufd xmm9, xmm4, 0x0F 1979 pshufd xmm4, xmm8, 0x39 1980 movdqa xmm8, xmm6 1981 shufps xmm8, xmm7, 250 1982 pblendw xmm9, xmm8, 0xCC 1983 movdqa xmm8, xmm7 1984 punpcklqdq xmm8, xmm5 1985 pblendw xmm8, xmm6, 0xC0 1986 pshufd xmm8, xmm8, 0x78 1987 punpckhdq xmm5, xmm7 1988 punpckldq xmm6, xmm5 1989 pshufd xmm7, xmm6, 0x1E 1990 movdqa xmm5, xmm9 1991 movdqa xmm6, xmm8 1992 jmp 9b 19939: 1994 movdqu xmm4, xmmword ptr [rdi] 1995 movdqu xmm5, xmmword ptr [rdi+0x10] 1996 pxor xmm0, xmm2 1997 pxor xmm1, xmm3 1998 pxor xmm2, xmm4 1999 pxor xmm3, xmm5 2000 movups xmmword ptr [r9], xmm0 2001 movups xmmword ptr [r9+0x10], xmm1 2002 movups xmmword ptr [r9+0x20], xmm2 2003 movups xmmword ptr [r9+0x30], xmm3 2004 RET 2005SET_SIZE(zfs_blake3_compress_xof_sse41) 2006 2007SECTION_STATIC 2008 2009.p2align 6 2010BLAKE3_IV: 2011 .long 0x6A09E667, 0xBB67AE85 2012 .long 0x3C6EF372, 0xA54FF53A 2013ROT16: 2014 .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 2015ROT8: 2016 .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 2017ADD0: 2018 .long 0, 1, 2, 3 2019ADD1: 2020 .long 4, 4, 4, 4 2021BLAKE3_IV_0: 2022 .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 2023BLAKE3_IV_1: 2024 .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 2025BLAKE3_IV_2: 2026 .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 2027BLAKE3_IV_3: 2028 .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A 2029BLAKE3_BLOCK_LEN: 2030 .long 64, 64, 64, 64 2031CMP_MSB_MASK: 2032 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 2033 2034#endif /* HAVE_SSE4_1 */ 2035 2036#ifdef __ELF__ 2037.section .note.GNU-stack,"",%progbits 2038#endif 2039