1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 24 * Copyright (c) 2019-2022 Samuel Neves 25 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de> 26 * 27 * This is converted assembly: SSE4.1 -> ARMv8-A 28 * Used tools: SIMDe https://github.com/simd-everywhere/simde 29 * 30 * Should work on FreeBSD, Linux and macOS 31 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh 32 */ 33 34#if defined(__aarch64__) 35 36/* make gcc <= 9 happy */ 37#if !defined(LD_VERSION) || LD_VERSION >= 233010000 38#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state 39#else 40#define CFI_NEGATE_RA_STATE 41#endif 42 43 .text 44 .section .note.gnu.property,"a",@note 45 .p2align 3 46 .word 4 47 .word 16 48 .word 5 49 .asciz "GNU" 50 .word 3221225472 51 .word 4 52 .word 3 53 .word 0 54.Lsec_end0: 55 .text 56 .globl zfs_blake3_compress_in_place_sse41 57 .p2align 2 58 .type zfs_blake3_compress_in_place_sse41,@function 59zfs_blake3_compress_in_place_sse41: 60 .cfi_startproc 61 hint #25 62 CFI_NEGATE_RA_STATE 63 sub sp, sp, #96 64 stp x29, x30, [sp, #64] 65 add x29, sp, #64 66 str x19, [sp, #80] 67 .cfi_def_cfa w29, 32 68 .cfi_offset w19, -16 69 .cfi_offset w30, -24 70 .cfi_offset w29, -32 71 mov x19, x0 72 mov w5, w4 73 mov x4, x3 74 mov w3, w2 75 mov x2, x1 76 mov x0, sp 77 mov x1, x19 78 bl compress_pre 79 ldp q0, q1, [sp] 80 ldp q2, q3, [sp, #32] 81 eor v0.16b, v2.16b, v0.16b 82 eor v1.16b, v3.16b, v1.16b 83 ldp x29, x30, [sp, #64] 84 stp q0, q1, [x19] 85 ldr x19, [sp, #80] 86 add sp, sp, #96 87 hint #29 88 ret 89.Lfunc_end0: 90 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41 91 .cfi_endproc 92 93 .section .rodata.cst16,"aM",@progbits,16 94 .p2align 4 95.LCPI1_0: 96 .xword -4942790177982912921 97 .xword -6534734903820487822 98.LCPI1_1: 99 .byte 2 100 .byte 3 101 .byte 0 102 .byte 1 103 .byte 6 104 .byte 7 105 .byte 4 106 .byte 5 107 .byte 10 108 .byte 11 109 .byte 8 110 .byte 9 111 .byte 14 112 .byte 15 113 .byte 12 114 .byte 13 115.LCPI1_2: 116 .byte 1 117 .byte 2 118 .byte 3 119 .byte 0 120 .byte 5 121 .byte 6 122 .byte 7 123 .byte 4 124 .byte 9 125 .byte 10 126 .byte 11 127 .byte 8 128 .byte 13 129 .byte 14 130 .byte 15 131 .byte 12 132 .text 133 .p2align 2 134 .type compress_pre,@function 135compress_pre: 136 .cfi_startproc 137 hint #34 138 fmov s1, w3 139 movi d0, #0x0000ff000000ff 140 ldr q2, [x1] 141 adrp x8, .LCPI1_0 142 mov v1.s[1], w5 143 str q2, [x0] 144 ldr q4, [x8, :lo12:.LCPI1_0] 145 ldr q5, [x1, #16] 146 adrp x8, .LCPI1_1 147 and v0.8b, v1.8b, v0.8b 148 fmov d1, x4 149 stp q5, q4, [x0, #16] 150 mov v1.d[1], v0.d[0] 151 str q1, [x0, #48] 152 ldp q6, q7, [x2] 153 uzp1 v3.4s, v6.4s, v7.4s 154 add v0.4s, v2.4s, v3.4s 155 uzp2 v2.4s, v6.4s, v7.4s 156 add v16.4s, v0.4s, v5.4s 157 ldr q0, [x8, :lo12:.LCPI1_1] 158 adrp x8, .LCPI1_2 159 eor v1.16b, v16.16b, v1.16b 160 add v7.4s, v16.4s, v2.4s 161 tbl v1.16b, { v1.16b }, v0.16b 162 add v4.4s, v1.4s, v4.4s 163 eor v5.16b, v4.16b, v5.16b 164 ushr v6.4s, v5.4s, #12 165 shl v5.4s, v5.4s, #20 166 orr v5.16b, v5.16b, v6.16b 167 add v6.4s, v7.4s, v5.4s 168 eor v7.16b, v1.16b, v6.16b 169 ldr q1, [x8, :lo12:.LCPI1_2] 170 add x8, x2, #32 171 tbl v7.16b, { v7.16b }, v1.16b 172 ld2 { v16.4s, v17.4s }, [x8] 173 add v4.4s, v4.4s, v7.4s 174 ext v7.16b, v7.16b, v7.16b, #8 175 add v6.4s, v6.4s, v16.4s 176 eor v5.16b, v4.16b, v5.16b 177 ext v4.16b, v4.16b, v4.16b, #4 178 ext v16.16b, v16.16b, v16.16b, #12 179 ext v6.16b, v6.16b, v6.16b, #12 180 ushr v18.4s, v5.4s, #7 181 shl v5.4s, v5.4s, #25 182 orr v5.16b, v5.16b, v18.16b 183 ext v18.16b, v17.16b, v17.16b, #12 184 add v6.4s, v6.4s, v5.4s 185 mov v17.16b, v18.16b 186 eor v7.16b, v7.16b, v6.16b 187 add v6.4s, v6.4s, v18.4s 188 mov v17.s[1], v16.s[2] 189 tbl v7.16b, { v7.16b }, v0.16b 190 add v4.4s, v4.4s, v7.4s 191 eor v5.16b, v4.16b, v5.16b 192 ushr v19.4s, v5.4s, #12 193 shl v5.4s, v5.4s, #20 194 orr v5.16b, v5.16b, v19.16b 195 uzp1 v19.4s, v3.4s, v3.4s 196 add v6.4s, v6.4s, v5.4s 197 ext v19.16b, v19.16b, v3.16b, #8 198 eor v7.16b, v7.16b, v6.16b 199 uzp2 v19.4s, v19.4s, v2.4s 200 tbl v7.16b, { v7.16b }, v1.16b 201 add v6.4s, v6.4s, v19.4s 202 add v4.4s, v4.4s, v7.4s 203 ext v6.16b, v6.16b, v6.16b, #4 204 ext v7.16b, v7.16b, v7.16b, #8 205 eor v5.16b, v4.16b, v5.16b 206 ext v4.16b, v4.16b, v4.16b, #12 207 ushr v20.4s, v5.4s, #7 208 shl v5.4s, v5.4s, #25 209 orr v5.16b, v5.16b, v20.16b 210 ext v20.16b, v3.16b, v3.16b, #12 211 add v6.4s, v6.4s, v5.4s 212 ext v3.16b, v3.16b, v20.16b, #12 213 eor v7.16b, v7.16b, v6.16b 214 rev64 v3.4s, v3.4s 215 tbl v7.16b, { v7.16b }, v0.16b 216 trn2 v3.4s, v3.4s, v17.4s 217 add v4.4s, v4.4s, v7.4s 218 add v6.4s, v6.4s, v3.4s 219 eor v5.16b, v4.16b, v5.16b 220 ushr v17.4s, v5.4s, #12 221 shl v5.4s, v5.4s, #20 222 orr v5.16b, v5.16b, v17.16b 223 zip1 v17.2d, v18.2d, v2.2d 224 zip2 v2.4s, v2.4s, v18.4s 225 add v6.4s, v6.4s, v5.4s 226 mov v17.s[3], v16.s[3] 227 zip1 v18.4s, v2.4s, v16.4s 228 zip1 v2.4s, v16.4s, v2.4s 229 eor v7.16b, v7.16b, v6.16b 230 ext v6.16b, v6.16b, v6.16b, #12 231 ext v16.16b, v2.16b, v18.16b, #8 232 tbl v7.16b, { v7.16b }, v1.16b 233 add v20.4s, v4.4s, v7.4s 234 ext v4.16b, v17.16b, v17.16b, #12 235 ext v7.16b, v7.16b, v7.16b, #8 236 eor v5.16b, v20.16b, v5.16b 237 uzp1 v4.4s, v17.4s, v4.4s 238 ushr v17.4s, v5.4s, #7 239 shl v5.4s, v5.4s, #25 240 add v6.4s, v6.4s, v4.4s 241 orr v5.16b, v5.16b, v17.16b 242 ext v17.16b, v20.16b, v20.16b, #4 243 add v6.4s, v6.4s, v5.4s 244 eor v7.16b, v7.16b, v6.16b 245 add v6.4s, v6.4s, v16.4s 246 tbl v7.16b, { v7.16b }, v0.16b 247 add v17.4s, v17.4s, v7.4s 248 eor v5.16b, v17.16b, v5.16b 249 ushr v2.4s, v5.4s, #12 250 shl v5.4s, v5.4s, #20 251 orr v2.16b, v5.16b, v2.16b 252 add v5.4s, v6.4s, v2.4s 253 ext v6.16b, v19.16b, v19.16b, #4 254 eor v7.16b, v7.16b, v5.16b 255 uzp1 v18.4s, v6.4s, v6.4s 256 tbl v7.16b, { v7.16b }, v1.16b 257 ext v18.16b, v18.16b, v6.16b, #8 258 add v17.4s, v17.4s, v7.4s 259 uzp2 v18.4s, v18.4s, v3.4s 260 ext v7.16b, v7.16b, v7.16b, #8 261 eor v2.16b, v17.16b, v2.16b 262 add v5.4s, v5.4s, v18.4s 263 ext v17.16b, v17.16b, v17.16b, #12 264 ushr v19.4s, v2.4s, #7 265 shl v2.4s, v2.4s, #25 266 ext v5.16b, v5.16b, v5.16b, #4 267 orr v2.16b, v2.16b, v19.16b 268 ext v19.16b, v6.16b, v6.16b, #12 269 add v5.4s, v5.4s, v2.4s 270 ext v6.16b, v6.16b, v19.16b, #12 271 mov v19.16b, v16.16b 272 eor v7.16b, v7.16b, v5.16b 273 rev64 v6.4s, v6.4s 274 mov v19.s[1], v4.s[2] 275 tbl v7.16b, { v7.16b }, v0.16b 276 add v17.4s, v17.4s, v7.4s 277 eor v20.16b, v17.16b, v2.16b 278 trn2 v2.4s, v6.4s, v19.4s 279 ushr v6.4s, v20.4s, #12 280 shl v19.4s, v20.4s, #20 281 add v5.4s, v5.4s, v2.4s 282 orr v6.16b, v19.16b, v6.16b 283 add v19.4s, v5.4s, v6.4s 284 eor v5.16b, v7.16b, v19.16b 285 zip1 v7.2d, v16.2d, v3.2d 286 zip2 v3.4s, v3.4s, v16.4s 287 tbl v20.16b, { v5.16b }, v1.16b 288 mov v7.s[3], v4.s[3] 289 add v17.4s, v17.4s, v20.4s 290 ext v5.16b, v7.16b, v7.16b, #12 291 eor v6.16b, v17.16b, v6.16b 292 uzp1 v5.4s, v7.4s, v5.4s 293 ext v7.16b, v19.16b, v19.16b, #12 294 ext v17.16b, v17.16b, v17.16b, #4 295 ushr v19.4s, v6.4s, #7 296 shl v6.4s, v6.4s, #25 297 add v7.4s, v7.4s, v5.4s 298 orr v6.16b, v6.16b, v19.16b 299 ext v19.16b, v20.16b, v20.16b, #8 300 add v7.4s, v7.4s, v6.4s 301 eor v19.16b, v19.16b, v7.16b 302 tbl v19.16b, { v19.16b }, v0.16b 303 add v16.4s, v17.4s, v19.4s 304 zip1 v17.4s, v3.4s, v4.4s 305 zip1 v3.4s, v4.4s, v3.4s 306 eor v4.16b, v16.16b, v6.16b 307 ext v17.16b, v3.16b, v17.16b, #8 308 ushr v3.4s, v4.4s, #12 309 shl v4.4s, v4.4s, #20 310 add v6.4s, v7.4s, v17.4s 311 orr v3.16b, v4.16b, v3.16b 312 add v4.4s, v6.4s, v3.4s 313 ext v6.16b, v18.16b, v18.16b, #4 314 eor v7.16b, v19.16b, v4.16b 315 uzp1 v18.4s, v6.4s, v6.4s 316 tbl v7.16b, { v7.16b }, v1.16b 317 ext v18.16b, v18.16b, v6.16b, #8 318 add v16.4s, v16.4s, v7.4s 319 uzp2 v18.4s, v18.4s, v2.4s 320 ext v7.16b, v7.16b, v7.16b, #8 321 eor v3.16b, v16.16b, v3.16b 322 add v4.4s, v4.4s, v18.4s 323 ext v16.16b, v16.16b, v16.16b, #12 324 ushr v19.4s, v3.4s, #7 325 shl v3.4s, v3.4s, #25 326 ext v4.16b, v4.16b, v4.16b, #4 327 orr v3.16b, v3.16b, v19.16b 328 ext v19.16b, v6.16b, v6.16b, #12 329 add v4.4s, v4.4s, v3.4s 330 ext v6.16b, v6.16b, v19.16b, #12 331 mov v19.16b, v17.16b 332 eor v7.16b, v7.16b, v4.16b 333 rev64 v6.4s, v6.4s 334 mov v19.s[1], v5.s[2] 335 tbl v7.16b, { v7.16b }, v0.16b 336 add v16.4s, v16.4s, v7.4s 337 eor v20.16b, v16.16b, v3.16b 338 trn2 v3.4s, v6.4s, v19.4s 339 ushr v6.4s, v20.4s, #12 340 shl v19.4s, v20.4s, #20 341 add v4.4s, v4.4s, v3.4s 342 orr v6.16b, v19.16b, v6.16b 343 zip1 v19.2d, v17.2d, v2.2d 344 zip2 v2.4s, v2.4s, v17.4s 345 add v4.4s, v4.4s, v6.4s 346 mov v19.s[3], v5.s[3] 347 zip1 v17.4s, v2.4s, v5.4s 348 zip1 v2.4s, v5.4s, v2.4s 349 eor v7.16b, v7.16b, v4.16b 350 ext v20.16b, v19.16b, v19.16b, #12 351 ext v4.16b, v4.16b, v4.16b, #12 352 ext v2.16b, v2.16b, v17.16b, #8 353 tbl v7.16b, { v7.16b }, v1.16b 354 add v16.4s, v16.4s, v7.4s 355 ext v7.16b, v7.16b, v7.16b, #8 356 eor v21.16b, v16.16b, v6.16b 357 uzp1 v6.4s, v19.4s, v20.4s 358 ext v16.16b, v16.16b, v16.16b, #4 359 ushr v19.4s, v21.4s, #7 360 shl v20.4s, v21.4s, #25 361 add v4.4s, v4.4s, v6.4s 362 orr v19.16b, v20.16b, v19.16b 363 add v4.4s, v4.4s, v19.4s 364 eor v7.16b, v7.16b, v4.16b 365 add v4.4s, v4.4s, v2.4s 366 tbl v7.16b, { v7.16b }, v0.16b 367 add v16.4s, v16.4s, v7.4s 368 eor v5.16b, v16.16b, v19.16b 369 ushr v17.4s, v5.4s, #12 370 shl v5.4s, v5.4s, #20 371 orr v5.16b, v5.16b, v17.16b 372 ext v17.16b, v18.16b, v18.16b, #4 373 add v4.4s, v4.4s, v5.4s 374 uzp1 v18.4s, v17.4s, v17.4s 375 eor v7.16b, v7.16b, v4.16b 376 ext v18.16b, v18.16b, v17.16b, #8 377 tbl v7.16b, { v7.16b }, v1.16b 378 uzp2 v18.4s, v18.4s, v3.4s 379 add v16.4s, v16.4s, v7.4s 380 add v4.4s, v4.4s, v18.4s 381 ext v7.16b, v7.16b, v7.16b, #8 382 eor v5.16b, v16.16b, v5.16b 383 ext v4.16b, v4.16b, v4.16b, #4 384 ext v16.16b, v16.16b, v16.16b, #12 385 ushr v19.4s, v5.4s, #7 386 shl v5.4s, v5.4s, #25 387 orr v5.16b, v5.16b, v19.16b 388 add v19.4s, v4.4s, v5.4s 389 eor v4.16b, v7.16b, v19.16b 390 ext v7.16b, v17.16b, v17.16b, #12 391 tbl v20.16b, { v4.16b }, v0.16b 392 ext v4.16b, v17.16b, v7.16b, #12 393 mov v7.16b, v2.16b 394 add v16.4s, v16.4s, v20.4s 395 rev64 v4.4s, v4.4s 396 mov v7.s[1], v6.s[2] 397 eor v5.16b, v16.16b, v5.16b 398 trn2 v4.4s, v4.4s, v7.4s 399 ushr v7.4s, v5.4s, #12 400 shl v5.4s, v5.4s, #20 401 add v17.4s, v19.4s, v4.4s 402 zip1 v19.2d, v2.2d, v3.2d 403 zip2 v2.4s, v3.4s, v2.4s 404 orr v5.16b, v5.16b, v7.16b 405 mov v19.s[3], v6.s[3] 406 add v7.4s, v17.4s, v5.4s 407 eor v17.16b, v20.16b, v7.16b 408 ext v20.16b, v19.16b, v19.16b, #12 409 ext v7.16b, v7.16b, v7.16b, #12 410 tbl v17.16b, { v17.16b }, v1.16b 411 add v16.4s, v16.4s, v17.4s 412 ext v17.16b, v17.16b, v17.16b, #8 413 eor v21.16b, v16.16b, v5.16b 414 uzp1 v5.4s, v19.4s, v20.4s 415 ext v16.16b, v16.16b, v16.16b, #4 416 ushr v19.4s, v21.4s, #7 417 shl v20.4s, v21.4s, #25 418 add v7.4s, v7.4s, v5.4s 419 orr v19.16b, v20.16b, v19.16b 420 add v7.4s, v7.4s, v19.4s 421 eor v17.16b, v17.16b, v7.16b 422 tbl v17.16b, { v17.16b }, v0.16b 423 add v3.4s, v16.4s, v17.4s 424 zip1 v16.4s, v2.4s, v6.4s 425 zip1 v2.4s, v6.4s, v2.4s 426 eor v6.16b, v3.16b, v19.16b 427 ext v16.16b, v2.16b, v16.16b, #8 428 ushr v2.4s, v6.4s, #12 429 shl v6.4s, v6.4s, #20 430 add v7.4s, v7.4s, v16.4s 431 orr v2.16b, v6.16b, v2.16b 432 add v6.4s, v7.4s, v2.4s 433 ext v7.16b, v18.16b, v18.16b, #4 434 eor v17.16b, v17.16b, v6.16b 435 uzp1 v18.4s, v7.4s, v7.4s 436 tbl v17.16b, { v17.16b }, v1.16b 437 ext v18.16b, v18.16b, v7.16b, #8 438 add v3.4s, v3.4s, v17.4s 439 uzp2 v18.4s, v18.4s, v4.4s 440 eor v2.16b, v3.16b, v2.16b 441 add v6.4s, v6.4s, v18.4s 442 ext v3.16b, v3.16b, v3.16b, #12 443 ext v18.16b, v18.16b, v18.16b, #4 444 ushr v19.4s, v2.4s, #7 445 shl v2.4s, v2.4s, #25 446 ext v6.16b, v6.16b, v6.16b, #4 447 orr v19.16b, v2.16b, v19.16b 448 ext v2.16b, v17.16b, v17.16b, #8 449 ext v17.16b, v7.16b, v7.16b, #12 450 add v6.4s, v6.4s, v19.4s 451 eor v2.16b, v2.16b, v6.16b 452 tbl v20.16b, { v2.16b }, v0.16b 453 ext v2.16b, v7.16b, v17.16b, #12 454 mov v7.16b, v16.16b 455 add v17.4s, v3.4s, v20.4s 456 rev64 v3.4s, v2.4s 457 mov v7.s[1], v5.s[2] 458 eor v19.16b, v17.16b, v19.16b 459 trn2 v3.4s, v3.4s, v7.4s 460 ushr v21.4s, v19.4s, #12 461 shl v19.4s, v19.4s, #20 462 add v6.4s, v6.4s, v3.4s 463 orr v19.16b, v19.16b, v21.16b 464 add v21.4s, v6.4s, v19.4s 465 eor v6.16b, v20.16b, v21.16b 466 zip1 v20.2d, v16.2d, v4.2d 467 zip2 v4.4s, v4.4s, v16.4s 468 tbl v22.16b, { v6.16b }, v1.16b 469 mov v20.s[3], v5.s[3] 470 add v17.4s, v17.4s, v22.4s 471 ext v6.16b, v20.16b, v20.16b, #12 472 eor v19.16b, v17.16b, v19.16b 473 uzp1 v6.4s, v20.4s, v6.4s 474 ext v20.16b, v21.16b, v21.16b, #12 475 ext v17.16b, v17.16b, v17.16b, #4 476 ushr v21.4s, v19.4s, #7 477 shl v19.4s, v19.4s, #25 478 add v20.4s, v20.4s, v6.4s 479 orr v19.16b, v19.16b, v21.16b 480 ext v21.16b, v22.16b, v22.16b, #8 481 add v20.4s, v20.4s, v19.4s 482 eor v21.16b, v21.16b, v20.16b 483 tbl v21.16b, { v21.16b }, v0.16b 484 add v16.4s, v17.4s, v21.4s 485 zip1 v17.4s, v4.4s, v5.4s 486 zip1 v4.4s, v5.4s, v4.4s 487 eor v5.16b, v16.16b, v19.16b 488 ext v4.16b, v4.16b, v17.16b, #8 489 ushr v17.4s, v5.4s, #12 490 shl v5.4s, v5.4s, #20 491 add v19.4s, v20.4s, v4.4s 492 ext v20.16b, v18.16b, v18.16b, #8 493 zip1 v3.2d, v4.2d, v3.2d 494 orr v5.16b, v5.16b, v17.16b 495 zip2 v2.4s, v2.4s, v4.4s 496 uzp2 v7.4s, v20.4s, v7.4s 497 mov v3.s[3], v6.s[3] 498 add v17.4s, v19.4s, v5.4s 499 ext v7.16b, v7.16b, v20.16b, #4 500 eor v19.16b, v21.16b, v17.16b 501 ext v17.16b, v17.16b, v17.16b, #4 502 tbl v19.16b, { v19.16b }, v1.16b 503 add v7.4s, v17.4s, v7.4s 504 add v16.4s, v16.4s, v19.4s 505 ext v17.16b, v19.16b, v19.16b, #8 506 ext v19.16b, v18.16b, v18.16b, #12 507 eor v5.16b, v16.16b, v5.16b 508 ext v16.16b, v16.16b, v16.16b, #12 509 ext v18.16b, v18.16b, v19.16b, #12 510 mov v19.16b, v4.16b 511 ushr v20.4s, v5.4s, #7 512 shl v5.4s, v5.4s, #25 513 rev64 v18.4s, v18.4s 514 mov v19.s[1], v6.s[2] 515 orr v5.16b, v5.16b, v20.16b 516 trn2 v18.4s, v18.4s, v19.4s 517 add v7.4s, v5.4s, v7.4s 518 eor v17.16b, v17.16b, v7.16b 519 add v7.4s, v7.4s, v18.4s 520 ext v18.16b, v3.16b, v3.16b, #12 521 tbl v17.16b, { v17.16b }, v0.16b 522 uzp1 v3.4s, v3.4s, v18.4s 523 add v16.4s, v16.4s, v17.4s 524 eor v5.16b, v16.16b, v5.16b 525 ushr v19.4s, v5.4s, #12 526 shl v5.4s, v5.4s, #20 527 orr v5.16b, v5.16b, v19.16b 528 add v7.4s, v7.4s, v5.4s 529 eor v17.16b, v17.16b, v7.16b 530 ext v7.16b, v7.16b, v7.16b, #12 531 tbl v17.16b, { v17.16b }, v1.16b 532 add v3.4s, v7.4s, v3.4s 533 add v16.4s, v16.4s, v17.4s 534 ext v7.16b, v17.16b, v17.16b, #8 535 eor v5.16b, v16.16b, v5.16b 536 ext v16.16b, v16.16b, v16.16b, #4 537 ushr v18.4s, v5.4s, #7 538 shl v5.4s, v5.4s, #25 539 orr v5.16b, v5.16b, v18.16b 540 add v3.4s, v3.4s, v5.4s 541 eor v7.16b, v7.16b, v3.16b 542 tbl v0.16b, { v7.16b }, v0.16b 543 zip1 v7.4s, v2.4s, v6.4s 544 zip1 v2.4s, v6.4s, v2.4s 545 add v4.4s, v16.4s, v0.4s 546 ext v2.16b, v2.16b, v7.16b, #8 547 eor v5.16b, v4.16b, v5.16b 548 add v2.4s, v3.4s, v2.4s 549 ushr v6.4s, v5.4s, #12 550 shl v5.4s, v5.4s, #20 551 orr v3.16b, v5.16b, v6.16b 552 add v2.4s, v2.4s, v3.4s 553 eor v0.16b, v0.16b, v2.16b 554 ext v2.16b, v2.16b, v2.16b, #4 555 tbl v0.16b, { v0.16b }, v1.16b 556 add v1.4s, v4.4s, v0.4s 557 ext v0.16b, v0.16b, v0.16b, #8 558 eor v3.16b, v1.16b, v3.16b 559 ext v1.16b, v1.16b, v1.16b, #12 560 ushr v4.4s, v3.4s, #7 561 shl v3.4s, v3.4s, #25 562 stp q1, q0, [x0, #32] 563 orr v3.16b, v3.16b, v4.16b 564 stp q2, q3, [x0] 565 ret 566.Lfunc_end1: 567 .size compress_pre, .Lfunc_end1-compress_pre 568 .cfi_endproc 569 570 .globl zfs_blake3_compress_xof_sse41 571 .p2align 2 572 .type zfs_blake3_compress_xof_sse41,@function 573zfs_blake3_compress_xof_sse41: 574 .cfi_startproc 575 hint #25 576 CFI_NEGATE_RA_STATE 577 sub sp, sp, #96 578 stp x29, x30, [sp, #64] 579 add x29, sp, #64 580 stp x20, x19, [sp, #80] 581 .cfi_def_cfa w29, 32 582 .cfi_offset w19, -8 583 .cfi_offset w20, -16 584 .cfi_offset w30, -24 585 .cfi_offset w29, -32 586 mov x20, x0 587 mov x19, x5 588 mov w5, w4 589 mov x4, x3 590 mov w3, w2 591 mov x2, x1 592 mov x0, sp 593 mov x1, x20 594 bl compress_pre 595 ldp q0, q1, [sp] 596 ldp q2, q3, [sp, #32] 597 eor v0.16b, v2.16b, v0.16b 598 eor v1.16b, v3.16b, v1.16b 599 ldp x29, x30, [sp, #64] 600 stp q0, q1, [x19] 601 ldr q0, [x20] 602 eor v0.16b, v0.16b, v2.16b 603 str q0, [x19, #32] 604 ldr q0, [x20, #16] 605 eor v0.16b, v0.16b, v3.16b 606 str q0, [x19, #48] 607 ldp x20, x19, [sp, #80] 608 add sp, sp, #96 609 hint #29 610 ret 611.Lfunc_end2: 612 .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41 613 .cfi_endproc 614 615 .section .rodata.cst16,"aM",@progbits,16 616 .p2align 4 617.LCPI3_0: 618 .word 0 619 .word 1 620 .word 2 621 .word 3 622.LCPI3_1: 623 .byte 2 624 .byte 3 625 .byte 0 626 .byte 1 627 .byte 6 628 .byte 7 629 .byte 4 630 .byte 5 631 .byte 10 632 .byte 11 633 .byte 8 634 .byte 9 635 .byte 14 636 .byte 15 637 .byte 12 638 .byte 13 639.LCPI3_2: 640 .byte 1 641 .byte 2 642 .byte 3 643 .byte 0 644 .byte 5 645 .byte 6 646 .byte 7 647 .byte 4 648 .byte 9 649 .byte 10 650 .byte 11 651 .byte 8 652 .byte 13 653 .byte 14 654 .byte 15 655 .byte 12 656.LCPI3_3: 657 .word 1779033703 658 .word 3144134277 659 .word 1013904242 660 .word 2773480762 661 .text 662 .globl zfs_blake3_hash_many_sse41 663 .p2align 2 664 .type zfs_blake3_hash_many_sse41,@function 665zfs_blake3_hash_many_sse41: 666 .cfi_startproc 667 hint #34 668 stp d15, d14, [sp, #-144]! 669 stp d13, d12, [sp, #16] 670 stp d11, d10, [sp, #32] 671 stp d9, d8, [sp, #48] 672 stp x29, x27, [sp, #64] 673 stp x26, x25, [sp, #80] 674 stp x24, x23, [sp, #96] 675 stp x22, x21, [sp, #112] 676 stp x20, x19, [sp, #128] 677 sub sp, sp, #368 678 .cfi_def_cfa_offset 512 679 .cfi_offset w19, -8 680 .cfi_offset w20, -16 681 .cfi_offset w21, -24 682 .cfi_offset w22, -32 683 .cfi_offset w23, -40 684 .cfi_offset w24, -48 685 .cfi_offset w25, -56 686 .cfi_offset w26, -64 687 .cfi_offset w27, -72 688 .cfi_offset w29, -80 689 .cfi_offset b8, -88 690 .cfi_offset b9, -96 691 .cfi_offset b10, -104 692 .cfi_offset b11, -112 693 .cfi_offset b12, -120 694 .cfi_offset b13, -128 695 .cfi_offset b14, -136 696 .cfi_offset b15, -144 697 ldr x8, [sp, #520] 698 adrp x11, .LCPI3_1 699 ldrb w9, [sp, #512] 700 adrp x10, .LCPI3_2 701 cmp x1, #4 702 b.lo .LBB3_6 703 adrp x12, .LCPI3_0 704 sbfx w13, w5, #0, #1 705 mov w15, #58983 706 mov w16, #44677 707 movk w15, #27145, lsl #16 708 movk w16, #47975, lsl #16 709 ldr q0, [x12, :lo12:.LCPI3_0] 710 dup v1.4s, w13 711 movi v13.4s, #64 712 mov w13, #62322 713 mov w14, #62778 714 orr w12, w7, w6 715 and v0.16b, v1.16b, v0.16b 716 ldr q1, [x11, :lo12:.LCPI3_1] 717 movk w13, #15470, lsl #16 718 movk w14, #42319, lsl #16 719 dup v14.4s, w15 720 stp q0, q1, [sp, #16] 721 orr v0.4s, #128, lsl #24 722 str q0, [sp] 723 dup v0.4s, w16 724 stp q0, q14, [sp, #48] 725 b .LBB3_3 726.LBB3_2: 727 zip1 v0.4s, v29.4s, v8.4s 728 add x15, x4, #4 729 zip1 v1.4s, v30.4s, v31.4s 730 tst w5, #0x1 731 zip1 v2.4s, v24.4s, v18.4s 732 csel x4, x15, x4, ne 733 zip1 v3.4s, v25.4s, v26.4s 734 add x0, x0, #32 735 zip2 v6.4s, v29.4s, v8.4s 736 sub x1, x1, #4 737 zip1 v4.2d, v0.2d, v1.2d 738 cmp x1, #3 739 zip2 v7.4s, v30.4s, v31.4s 740 zip1 v5.2d, v2.2d, v3.2d 741 zip2 v0.2d, v0.2d, v1.2d 742 zip2 v1.2d, v2.2d, v3.2d 743 zip2 v2.4s, v24.4s, v18.4s 744 zip2 v3.4s, v25.4s, v26.4s 745 stp q4, q5, [x8] 746 zip2 v4.2d, v6.2d, v7.2d 747 stp q0, q1, [x8, #32] 748 zip1 v0.2d, v6.2d, v7.2d 749 zip1 v1.2d, v2.2d, v3.2d 750 zip2 v2.2d, v2.2d, v3.2d 751 stp q0, q1, [x8, #64] 752 stp q4, q2, [x8, #96] 753 add x8, x8, #128 754 b.ls .LBB3_6 755.LBB3_3: 756 mov x15, x3 757 add x16, x3, #8 758 add x17, x3, #12 759 add x19, x3, #16 760 add x20, x3, #20 761 ld1r { v29.4s }, [x15], #4 762 ld1r { v30.4s }, [x16] 763 add x16, x3, #24 764 ld1r { v31.4s }, [x17] 765 add x17, x3, #28 766 ld1r { v24.4s }, [x19] 767 ld1r { v18.4s }, [x20] 768 ld1r { v25.4s }, [x16] 769 ld1r { v8.4s }, [x15] 770 ld1r { v26.4s }, [x17] 771 cbz x2, .LBB3_2 772 ldr q1, [sp, #16] 773 dup v0.4s, w4 774 lsr x17, x4, #32 775 mov x15, xzr 776 ldp x19, x20, [x0, #16] 777 add v1.4s, v0.4s, v1.4s 778 mov x21, x2 779 movi v0.4s, #128, lsl #24 780 mov w26, w12 781 str q1, [sp, #96] 782 eor v0.16b, v1.16b, v0.16b 783 ldr q1, [sp] 784 cmgt v0.4s, v1.4s, v0.4s 785 dup v1.4s, w17 786 ldp x16, x17, [x0] 787 sub v0.4s, v1.4s, v0.4s 788 str q0, [sp, #80] 789.LBB3_5: 790 add x23, x16, x15 791 add x24, x17, x15 792 add x22, x19, x15 793 add x25, x20, x15 794 subs x21, x21, #1 795 add x15, x15, #64 796 ldp q1, q2, [x23] 797 csel w27, w9, wzr, eq 798 orr w26, w27, w26 799 and w26, w26, #0xff 800 ldp q4, q5, [x24] 801 dup v0.4s, w26 802 mov w26, w6 803 zip1 v22.4s, v1.4s, v4.4s 804 zip2 v20.4s, v1.4s, v4.4s 805 ldp q6, q7, [x22] 806 zip1 v17.4s, v2.4s, v5.4s 807 zip2 v23.4s, v2.4s, v5.4s 808 ldp q16, q21, [x25] 809 zip1 v19.4s, v6.4s, v16.4s 810 zip2 v1.4s, v6.4s, v16.4s 811 ldp q27, q28, [x23, #32] 812 zip1 v4.4s, v7.4s, v21.4s 813 zip2 v5.4s, v7.4s, v21.4s 814 zip2 v15.2d, v17.2d, v4.2d 815 ldp q9, q10, [x24, #32] 816 mov v17.d[1], v4.d[0] 817 add v4.4s, v30.4s, v25.4s 818 zip2 v11.2d, v23.2d, v5.2d 819 zip2 v3.4s, v27.4s, v9.4s 820 zip1 v7.4s, v27.4s, v9.4s 821 ldp q12, q6, [x22, #32] 822 mov v23.d[1], v5.d[0] 823 stp q11, q3, [sp, #256] 824 add v5.4s, v31.4s, v26.4s 825 add v4.4s, v4.4s, v17.4s 826 str q23, [sp, #352] 827 ldp q16, q2, [x25, #32] 828 add v5.4s, v5.4s, v23.4s 829 zip1 v3.4s, v12.4s, v16.4s 830 eor v0.16b, v5.16b, v0.16b 831 zip1 v9.4s, v6.4s, v2.4s 832 zip2 v2.4s, v6.4s, v2.4s 833 stp q7, q3, [sp, #208] 834 zip2 v3.4s, v12.4s, v16.4s 835 zip1 v12.4s, v28.4s, v10.4s 836 zip2 v10.4s, v28.4s, v10.4s 837 stp q17, q2, [sp, #160] 838 zip2 v28.2d, v22.2d, v19.2d 839 mov v22.d[1], v19.d[0] 840 str q3, [sp, #240] 841 add v2.4s, v8.4s, v18.4s 842 eor v16.16b, v4.16b, v13.16b 843 dup v17.4s, w13 844 mov v3.16b, v22.16b 845 stp q22, q28, [sp, #320] 846 zip2 v22.2d, v20.2d, v1.2d 847 mov v20.d[1], v1.d[0] 848 add v1.4s, v29.4s, v24.4s 849 add v4.4s, v4.4s, v15.4s 850 add v5.4s, v5.4s, v11.4s 851 add v2.4s, v2.4s, v20.4s 852 stp q15, q20, [sp, #288] 853 add v1.4s, v1.4s, v3.4s 854 ldr q3, [sp, #96] 855 dup v20.4s, w14 856 mov v23.16b, v22.16b 857 mov v15.16b, v10.16b 858 eor v6.16b, v1.16b, v3.16b 859 ldr q3, [sp, #80] 860 add v1.4s, v1.4s, v28.4s 861 ldr q28, [sp, #272] 862 str q23, [sp, #128] 863 eor v7.16b, v2.16b, v3.16b 864 ldp q27, q3, [sp, #32] 865 add v2.4s, v2.4s, v22.4s 866 tbl v6.16b, { v6.16b }, v27.16b 867 tbl v7.16b, { v7.16b }, v27.16b 868 tbl v16.16b, { v16.16b }, v27.16b 869 tbl v0.16b, { v0.16b }, v27.16b 870 add v19.4s, v6.4s, v14.4s 871 add v21.4s, v7.4s, v3.4s 872 add v30.4s, v16.4s, v17.4s 873 add v31.4s, v0.4s, v20.4s 874 eor v24.16b, v19.16b, v24.16b 875 eor v17.16b, v21.16b, v18.16b 876 ushr v18.4s, v24.4s, #12 877 shl v20.4s, v24.4s, #20 878 eor v24.16b, v30.16b, v25.16b 879 eor v25.16b, v31.16b, v26.16b 880 ushr v26.4s, v17.4s, #12 881 shl v17.4s, v17.4s, #20 882 ushr v29.4s, v24.4s, #12 883 shl v24.4s, v24.4s, #20 884 ushr v8.4s, v25.4s, #12 885 shl v25.4s, v25.4s, #20 886 orr v3.16b, v20.16b, v18.16b 887 ldr q18, [x10, :lo12:.LCPI3_2] 888 orr v13.16b, v17.16b, v26.16b 889 orr v24.16b, v24.16b, v29.16b 890 orr v14.16b, v25.16b, v8.16b 891 add v8.4s, v1.4s, v3.4s 892 add v29.4s, v2.4s, v13.4s 893 add v17.4s, v4.4s, v24.4s 894 add v20.4s, v5.4s, v14.4s 895 eor v1.16b, v6.16b, v8.16b 896 eor v2.16b, v7.16b, v29.16b 897 eor v4.16b, v16.16b, v17.16b 898 eor v0.16b, v0.16b, v20.16b 899 tbl v25.16b, { v1.16b }, v18.16b 900 tbl v16.16b, { v2.16b }, v18.16b 901 tbl v6.16b, { v4.16b }, v18.16b 902 tbl v4.16b, { v0.16b }, v18.16b 903 add v19.4s, v19.4s, v25.4s 904 add v21.4s, v21.4s, v16.4s 905 add v26.4s, v30.4s, v6.4s 906 add v7.4s, v31.4s, v4.4s 907 eor v0.16b, v19.16b, v3.16b 908 eor v1.16b, v21.16b, v13.16b 909 eor v2.16b, v26.16b, v24.16b 910 eor v3.16b, v7.16b, v14.16b 911 ushr v5.4s, v0.4s, #7 912 shl v0.4s, v0.4s, #25 913 ushr v24.4s, v1.4s, #7 914 shl v1.4s, v1.4s, #25 915 ushr v30.4s, v2.4s, #7 916 shl v2.4s, v2.4s, #25 917 orr v5.16b, v0.16b, v5.16b 918 orr v0.16b, v1.16b, v24.16b 919 ushr v31.4s, v3.4s, #7 920 orr v2.16b, v2.16b, v30.16b 921 ldp q24, q30, [sp, #208] 922 shl v3.4s, v3.4s, #25 923 zip2 v14.2d, v12.2d, v9.2d 924 mov v22.16b, v24.16b 925 orr v1.16b, v3.16b, v31.16b 926 zip2 v3.2d, v24.2d, v30.2d 927 mov v24.16b, v28.16b 928 mov v22.d[1], v30.d[0] 929 ldr q30, [sp, #240] 930 mov v31.16b, v12.16b 931 stp q22, q14, [sp, #224] 932 mov v24.d[1], v30.d[0] 933 add v12.4s, v8.4s, v22.4s 934 mov v31.d[1], v9.d[0] 935 add v22.4s, v29.4s, v24.4s 936 ldr q29, [sp, #176] 937 zip2 v28.2d, v28.2d, v30.2d 938 mov v9.16b, v24.16b 939 mov v15.d[1], v29.d[0] 940 zip2 v8.2d, v10.2d, v29.2d 941 add v10.4s, v12.4s, v0.4s 942 add v22.4s, v22.4s, v2.4s 943 str q9, [sp, #144] 944 add v20.4s, v20.4s, v15.4s 945 add v17.4s, v17.4s, v31.4s 946 stp q3, q8, [sp, #192] 947 eor v4.16b, v4.16b, v10.16b 948 eor v25.16b, v25.16b, v22.16b 949 add v20.4s, v20.4s, v5.4s 950 add v17.4s, v17.4s, v1.4s 951 tbl v4.16b, { v4.16b }, v27.16b 952 tbl v25.16b, { v25.16b }, v27.16b 953 eor v6.16b, v6.16b, v20.16b 954 eor v16.16b, v16.16b, v17.16b 955 add v26.4s, v26.4s, v4.4s 956 add v7.4s, v7.4s, v25.4s 957 tbl v6.16b, { v6.16b }, v27.16b 958 tbl v16.16b, { v16.16b }, v27.16b 959 eor v0.16b, v26.16b, v0.16b 960 eor v2.16b, v7.16b, v2.16b 961 add v21.4s, v21.4s, v6.4s 962 add v19.4s, v19.4s, v16.4s 963 ushr v12.4s, v0.4s, #12 964 shl v0.4s, v0.4s, #20 965 ushr v13.4s, v2.4s, #12 966 shl v2.4s, v2.4s, #20 967 eor v5.16b, v21.16b, v5.16b 968 eor v1.16b, v19.16b, v1.16b 969 orr v0.16b, v0.16b, v12.16b 970 add v10.4s, v10.4s, v3.4s 971 orr v2.16b, v2.16b, v13.16b 972 ushr v13.4s, v5.4s, #12 973 shl v5.4s, v5.4s, #20 974 add v22.4s, v22.4s, v28.4s 975 ushr v12.4s, v1.4s, #12 976 shl v1.4s, v1.4s, #20 977 add v10.4s, v10.4s, v0.4s 978 orr v5.16b, v5.16b, v13.16b 979 add v22.4s, v22.4s, v2.4s 980 add v20.4s, v20.4s, v8.4s 981 orr v1.16b, v1.16b, v12.16b 982 add v17.4s, v17.4s, v14.4s 983 eor v4.16b, v4.16b, v10.16b 984 eor v25.16b, v25.16b, v22.16b 985 add v20.4s, v20.4s, v5.4s 986 add v17.4s, v17.4s, v1.4s 987 tbl v4.16b, { v4.16b }, v18.16b 988 tbl v25.16b, { v25.16b }, v18.16b 989 eor v6.16b, v6.16b, v20.16b 990 eor v16.16b, v16.16b, v17.16b 991 add v26.4s, v26.4s, v4.4s 992 add v7.4s, v7.4s, v25.4s 993 tbl v6.16b, { v6.16b }, v18.16b 994 tbl v16.16b, { v16.16b }, v18.16b 995 eor v0.16b, v26.16b, v0.16b 996 eor v2.16b, v7.16b, v2.16b 997 add v21.4s, v21.4s, v6.4s 998 add v19.4s, v19.4s, v16.4s 999 ushr v12.4s, v0.4s, #7 1000 shl v0.4s, v0.4s, #25 1001 ushr v13.4s, v2.4s, #7 1002 shl v2.4s, v2.4s, #25 1003 eor v5.16b, v21.16b, v5.16b 1004 eor v1.16b, v19.16b, v1.16b 1005 orr v0.16b, v0.16b, v12.16b 1006 add v22.4s, v22.4s, v23.4s 1007 orr v2.16b, v2.16b, v13.16b 1008 ushr v13.4s, v5.4s, #7 1009 shl v5.4s, v5.4s, #25 1010 add v17.4s, v17.4s, v11.4s 1011 mov v30.16b, v28.16b 1012 mov v28.16b, v23.16b 1013 ldr q23, [sp, #304] 1014 ushr v12.4s, v1.4s, #7 1015 shl v1.4s, v1.4s, #25 1016 add v22.4s, v22.4s, v0.4s 1017 mov v29.16b, v31.16b 1018 ldr q31, [sp, #160] 1019 orr v5.16b, v5.16b, v13.16b 1020 add v17.4s, v17.4s, v2.4s 1021 add v10.4s, v10.4s, v23.4s 1022 orr v1.16b, v1.16b, v12.16b 1023 str q29, [sp, #272] 1024 eor v16.16b, v16.16b, v22.16b 1025 add v20.4s, v20.4s, v31.4s 1026 eor v6.16b, v6.16b, v17.16b 1027 add v10.4s, v10.4s, v5.4s 1028 tbl v16.16b, { v16.16b }, v27.16b 1029 add v20.4s, v20.4s, v1.4s 1030 tbl v6.16b, { v6.16b }, v27.16b 1031 eor v25.16b, v25.16b, v10.16b 1032 add v21.4s, v21.4s, v16.4s 1033 eor v4.16b, v4.16b, v20.16b 1034 add v26.4s, v26.4s, v6.4s 1035 tbl v25.16b, { v25.16b }, v27.16b 1036 eor v0.16b, v21.16b, v0.16b 1037 tbl v4.16b, { v4.16b }, v27.16b 1038 eor v2.16b, v26.16b, v2.16b 1039 add v19.4s, v19.4s, v25.4s 1040 ushr v12.4s, v0.4s, #12 1041 shl v0.4s, v0.4s, #20 1042 add v7.4s, v7.4s, v4.4s 1043 ushr v13.4s, v2.4s, #12 1044 shl v2.4s, v2.4s, #20 1045 eor v5.16b, v5.16b, v19.16b 1046 add v22.4s, v22.4s, v24.4s 1047 ldr q24, [sp, #320] 1048 orr v0.16b, v0.16b, v12.16b 1049 eor v1.16b, v7.16b, v1.16b 1050 orr v2.16b, v2.16b, v13.16b 1051 ushr v12.4s, v5.4s, #12 1052 shl v5.4s, v5.4s, #20 1053 add v17.4s, v17.4s, v24.4s 1054 ldr q24, [sp, #352] 1055 ushr v13.4s, v1.4s, #12 1056 shl v1.4s, v1.4s, #20 1057 add v22.4s, v22.4s, v0.4s 1058 orr v5.16b, v5.16b, v12.16b 1059 add v17.4s, v17.4s, v2.4s 1060 add v10.4s, v10.4s, v24.4s 1061 ldr q24, [sp, #336] 1062 orr v1.16b, v1.16b, v13.16b 1063 eor v16.16b, v16.16b, v22.16b 1064 add v20.4s, v20.4s, v14.4s 1065 eor v6.16b, v6.16b, v17.16b 1066 add v10.4s, v10.4s, v5.4s 1067 tbl v16.16b, { v16.16b }, v18.16b 1068 add v20.4s, v20.4s, v1.4s 1069 tbl v6.16b, { v6.16b }, v18.16b 1070 eor v25.16b, v25.16b, v10.16b 1071 add v21.4s, v21.4s, v16.4s 1072 eor v4.16b, v4.16b, v20.16b 1073 add v26.4s, v26.4s, v6.4s 1074 tbl v25.16b, { v25.16b }, v18.16b 1075 eor v0.16b, v21.16b, v0.16b 1076 tbl v4.16b, { v4.16b }, v18.16b 1077 eor v2.16b, v26.16b, v2.16b 1078 add v19.4s, v19.4s, v25.4s 1079 ushr v12.4s, v0.4s, #7 1080 shl v0.4s, v0.4s, #25 1081 add v7.4s, v7.4s, v4.4s 1082 ushr v13.4s, v2.4s, #7 1083 shl v2.4s, v2.4s, #25 1084 eor v5.16b, v19.16b, v5.16b 1085 orr v0.16b, v0.16b, v12.16b 1086 eor v1.16b, v7.16b, v1.16b 1087 add v10.4s, v10.4s, v24.4s 1088 orr v2.16b, v2.16b, v13.16b 1089 ushr v12.4s, v5.4s, #7 1090 shl v5.4s, v5.4s, #25 1091 add v22.4s, v22.4s, v29.4s 1092 ushr v13.4s, v1.4s, #7 1093 shl v1.4s, v1.4s, #25 1094 add v10.4s, v10.4s, v0.4s 1095 orr v5.16b, v5.16b, v12.16b 1096 add v22.4s, v22.4s, v2.4s 1097 add v20.4s, v20.4s, v8.4s 1098 ldr q8, [sp, #288] 1099 orr v1.16b, v1.16b, v13.16b 1100 add v17.4s, v17.4s, v3.4s 1101 ldr q3, [sp, #352] 1102 eor v4.16b, v4.16b, v10.16b 1103 eor v25.16b, v25.16b, v22.16b 1104 add v20.4s, v20.4s, v5.4s 1105 add v17.4s, v17.4s, v1.4s 1106 tbl v4.16b, { v4.16b }, v27.16b 1107 tbl v25.16b, { v25.16b }, v27.16b 1108 eor v6.16b, v6.16b, v20.16b 1109 eor v16.16b, v16.16b, v17.16b 1110 add v26.4s, v26.4s, v4.4s 1111 add v7.4s, v7.4s, v25.4s 1112 tbl v6.16b, { v6.16b }, v27.16b 1113 tbl v16.16b, { v16.16b }, v27.16b 1114 eor v0.16b, v26.16b, v0.16b 1115 eor v2.16b, v7.16b, v2.16b 1116 add v21.4s, v21.4s, v6.4s 1117 add v19.4s, v19.4s, v16.4s 1118 ushr v12.4s, v0.4s, #12 1119 shl v0.4s, v0.4s, #20 1120 ushr v13.4s, v2.4s, #12 1121 shl v2.4s, v2.4s, #20 1122 eor v5.16b, v21.16b, v5.16b 1123 eor v1.16b, v19.16b, v1.16b 1124 orr v0.16b, v0.16b, v12.16b 1125 add v10.4s, v10.4s, v30.4s 1126 orr v2.16b, v2.16b, v13.16b 1127 ushr v13.4s, v5.4s, #12 1128 shl v5.4s, v5.4s, #20 1129 add v22.4s, v22.4s, v8.4s 1130 mov v24.16b, v30.16b 1131 mov v30.16b, v15.16b 1132 add v17.4s, v17.4s, v15.4s 1133 ldr q15, [sp, #224] 1134 ushr v12.4s, v1.4s, #12 1135 shl v1.4s, v1.4s, #20 1136 add v10.4s, v10.4s, v0.4s 1137 str q30, [sp, #176] 1138 orr v5.16b, v5.16b, v13.16b 1139 add v22.4s, v22.4s, v2.4s 1140 add v20.4s, v20.4s, v15.4s 1141 orr v1.16b, v1.16b, v12.16b 1142 eor v4.16b, v4.16b, v10.16b 1143 eor v25.16b, v25.16b, v22.16b 1144 add v20.4s, v20.4s, v5.4s 1145 add v17.4s, v17.4s, v1.4s 1146 tbl v4.16b, { v4.16b }, v18.16b 1147 tbl v25.16b, { v25.16b }, v18.16b 1148 eor v6.16b, v6.16b, v20.16b 1149 eor v16.16b, v16.16b, v17.16b 1150 add v26.4s, v26.4s, v4.4s 1151 add v7.4s, v7.4s, v25.4s 1152 tbl v6.16b, { v6.16b }, v18.16b 1153 tbl v16.16b, { v16.16b }, v18.16b 1154 eor v0.16b, v26.16b, v0.16b 1155 eor v2.16b, v7.16b, v2.16b 1156 add v21.4s, v21.4s, v6.4s 1157 add v19.4s, v19.4s, v16.4s 1158 ushr v12.4s, v0.4s, #7 1159 shl v0.4s, v0.4s, #25 1160 ushr v13.4s, v2.4s, #7 1161 shl v2.4s, v2.4s, #25 1162 eor v5.16b, v21.16b, v5.16b 1163 eor v1.16b, v19.16b, v1.16b 1164 orr v0.16b, v0.16b, v12.16b 1165 add v22.4s, v22.4s, v9.4s 1166 orr v2.16b, v2.16b, v13.16b 1167 ushr v13.4s, v5.4s, #7 1168 shl v5.4s, v5.4s, #25 1169 add v17.4s, v17.4s, v14.4s 1170 ushr v12.4s, v1.4s, #7 1171 shl v1.4s, v1.4s, #25 1172 add v22.4s, v22.4s, v0.4s 1173 orr v5.16b, v5.16b, v13.16b 1174 add v17.4s, v17.4s, v2.4s 1175 add v10.4s, v10.4s, v28.4s 1176 orr v1.16b, v1.16b, v12.16b 1177 eor v16.16b, v16.16b, v22.16b 1178 add v20.4s, v20.4s, v11.4s 1179 eor v6.16b, v6.16b, v17.16b 1180 add v10.4s, v10.4s, v5.4s 1181 tbl v16.16b, { v16.16b }, v27.16b 1182 add v20.4s, v20.4s, v1.4s 1183 tbl v6.16b, { v6.16b }, v27.16b 1184 eor v25.16b, v25.16b, v10.16b 1185 add v21.4s, v21.4s, v16.4s 1186 eor v4.16b, v4.16b, v20.16b 1187 add v26.4s, v26.4s, v6.4s 1188 tbl v25.16b, { v25.16b }, v27.16b 1189 eor v0.16b, v21.16b, v0.16b 1190 tbl v4.16b, { v4.16b }, v27.16b 1191 eor v2.16b, v26.16b, v2.16b 1192 add v19.4s, v19.4s, v25.4s 1193 ushr v12.4s, v0.4s, #12 1194 shl v0.4s, v0.4s, #20 1195 add v7.4s, v7.4s, v4.4s 1196 ushr v13.4s, v2.4s, #12 1197 shl v2.4s, v2.4s, #20 1198 eor v5.16b, v5.16b, v19.16b 1199 orr v0.16b, v0.16b, v12.16b 1200 eor v1.16b, v7.16b, v1.16b 1201 add v22.4s, v22.4s, v29.4s 1202 orr v2.16b, v2.16b, v13.16b 1203 ushr v12.4s, v5.4s, #12 1204 shl v5.4s, v5.4s, #20 1205 add v17.4s, v17.4s, v23.4s 1206 ushr v13.4s, v1.4s, #12 1207 shl v1.4s, v1.4s, #20 1208 add v22.4s, v22.4s, v0.4s 1209 orr v5.16b, v5.16b, v12.16b 1210 add v17.4s, v17.4s, v2.4s 1211 add v10.4s, v10.4s, v31.4s 1212 orr v1.16b, v1.16b, v13.16b 1213 eor v16.16b, v16.16b, v22.16b 1214 add v20.4s, v20.4s, v30.4s 1215 eor v6.16b, v6.16b, v17.16b 1216 add v10.4s, v10.4s, v5.4s 1217 tbl v16.16b, { v16.16b }, v18.16b 1218 add v20.4s, v20.4s, v1.4s 1219 tbl v6.16b, { v6.16b }, v18.16b 1220 eor v25.16b, v25.16b, v10.16b 1221 add v21.4s, v21.4s, v16.4s 1222 eor v4.16b, v4.16b, v20.16b 1223 add v26.4s, v26.4s, v6.4s 1224 tbl v25.16b, { v25.16b }, v18.16b 1225 eor v0.16b, v21.16b, v0.16b 1226 tbl v4.16b, { v4.16b }, v18.16b 1227 eor v2.16b, v26.16b, v2.16b 1228 add v19.4s, v19.4s, v25.4s 1229 ushr v12.4s, v0.4s, #7 1230 shl v0.4s, v0.4s, #25 1231 add v7.4s, v7.4s, v4.4s 1232 ushr v13.4s, v2.4s, #7 1233 shl v2.4s, v2.4s, #25 1234 eor v5.16b, v19.16b, v5.16b 1235 add v10.4s, v10.4s, v3.4s 1236 ldr q3, [sp, #192] 1237 orr v0.16b, v0.16b, v12.16b 1238 eor v1.16b, v7.16b, v1.16b 1239 orr v2.16b, v2.16b, v13.16b 1240 ushr v12.4s, v5.4s, #7 1241 shl v5.4s, v5.4s, #25 1242 add v22.4s, v22.4s, v3.4s 1243 ushr v13.4s, v1.4s, #7 1244 shl v1.4s, v1.4s, #25 1245 add v10.4s, v10.4s, v0.4s 1246 orr v5.16b, v5.16b, v12.16b 1247 add v22.4s, v22.4s, v2.4s 1248 add v20.4s, v20.4s, v15.4s 1249 ldr q15, [sp, #128] 1250 orr v1.16b, v1.16b, v13.16b 1251 add v17.4s, v17.4s, v24.4s 1252 eor v4.16b, v4.16b, v10.16b 1253 eor v25.16b, v25.16b, v22.16b 1254 add v20.4s, v20.4s, v5.4s 1255 add v17.4s, v17.4s, v1.4s 1256 tbl v4.16b, { v4.16b }, v27.16b 1257 tbl v25.16b, { v25.16b }, v27.16b 1258 eor v6.16b, v6.16b, v20.16b 1259 eor v16.16b, v16.16b, v17.16b 1260 add v26.4s, v26.4s, v4.4s 1261 add v7.4s, v7.4s, v25.4s 1262 tbl v6.16b, { v6.16b }, v27.16b 1263 tbl v16.16b, { v16.16b }, v27.16b 1264 eor v0.16b, v26.16b, v0.16b 1265 eor v2.16b, v7.16b, v2.16b 1266 add v21.4s, v21.4s, v6.4s 1267 add v19.4s, v19.4s, v16.4s 1268 ushr v12.4s, v0.4s, #12 1269 shl v0.4s, v0.4s, #20 1270 ushr v13.4s, v2.4s, #12 1271 shl v2.4s, v2.4s, #20 1272 eor v5.16b, v21.16b, v5.16b 1273 ldp q23, q11, [sp, #320] 1274 eor v1.16b, v19.16b, v1.16b 1275 orr v0.16b, v0.16b, v12.16b 1276 add v10.4s, v10.4s, v8.4s 1277 orr v2.16b, v2.16b, v13.16b 1278 ushr v13.4s, v5.4s, #12 1279 shl v5.4s, v5.4s, #20 1280 add v22.4s, v22.4s, v23.4s 1281 ushr v12.4s, v1.4s, #12 1282 shl v1.4s, v1.4s, #20 1283 add v10.4s, v10.4s, v0.4s 1284 mov v28.16b, v31.16b 1285 mov v31.16b, v8.16b 1286 ldr q8, [sp, #208] 1287 orr v5.16b, v5.16b, v13.16b 1288 add v22.4s, v22.4s, v2.4s 1289 add v20.4s, v20.4s, v11.4s 1290 orr v1.16b, v1.16b, v12.16b 1291 add v17.4s, v17.4s, v8.4s 1292 eor v4.16b, v4.16b, v10.16b 1293 eor v25.16b, v25.16b, v22.16b 1294 add v20.4s, v20.4s, v5.4s 1295 add v17.4s, v17.4s, v1.4s 1296 tbl v4.16b, { v4.16b }, v18.16b 1297 tbl v25.16b, { v25.16b }, v18.16b 1298 eor v6.16b, v6.16b, v20.16b 1299 eor v16.16b, v16.16b, v17.16b 1300 add v26.4s, v26.4s, v4.4s 1301 add v7.4s, v7.4s, v25.4s 1302 tbl v6.16b, { v6.16b }, v18.16b 1303 tbl v16.16b, { v16.16b }, v18.16b 1304 eor v0.16b, v26.16b, v0.16b 1305 eor v2.16b, v7.16b, v2.16b 1306 add v21.4s, v21.4s, v6.4s 1307 add v19.4s, v19.4s, v16.4s 1308 ushr v12.4s, v0.4s, #7 1309 shl v0.4s, v0.4s, #25 1310 ushr v13.4s, v2.4s, #7 1311 shl v2.4s, v2.4s, #25 1312 eor v5.16b, v21.16b, v5.16b 1313 eor v1.16b, v19.16b, v1.16b 1314 orr v0.16b, v0.16b, v12.16b 1315 add v22.4s, v22.4s, v29.4s 1316 orr v2.16b, v2.16b, v13.16b 1317 ushr v13.4s, v5.4s, #7 1318 shl v5.4s, v5.4s, #25 1319 add v17.4s, v17.4s, v30.4s 1320 ushr v12.4s, v1.4s, #7 1321 shl v1.4s, v1.4s, #25 1322 add v22.4s, v22.4s, v0.4s 1323 orr v5.16b, v5.16b, v13.16b 1324 add v17.4s, v17.4s, v2.4s 1325 add v10.4s, v10.4s, v9.4s 1326 orr v1.16b, v1.16b, v12.16b 1327 eor v16.16b, v16.16b, v22.16b 1328 add v20.4s, v20.4s, v14.4s 1329 ldr q14, [sp, #256] 1330 eor v6.16b, v6.16b, v17.16b 1331 add v10.4s, v10.4s, v5.4s 1332 tbl v16.16b, { v16.16b }, v27.16b 1333 add v20.4s, v20.4s, v1.4s 1334 tbl v6.16b, { v6.16b }, v27.16b 1335 eor v25.16b, v25.16b, v10.16b 1336 add v21.4s, v21.4s, v16.4s 1337 eor v4.16b, v4.16b, v20.16b 1338 add v26.4s, v26.4s, v6.4s 1339 tbl v25.16b, { v25.16b }, v27.16b 1340 eor v0.16b, v21.16b, v0.16b 1341 tbl v4.16b, { v4.16b }, v27.16b 1342 eor v2.16b, v26.16b, v2.16b 1343 add v19.4s, v19.4s, v25.4s 1344 ushr v12.4s, v0.4s, #12 1345 shl v0.4s, v0.4s, #20 1346 add v7.4s, v7.4s, v4.4s 1347 ushr v13.4s, v2.4s, #12 1348 shl v2.4s, v2.4s, #20 1349 eor v5.16b, v5.16b, v19.16b 1350 orr v0.16b, v0.16b, v12.16b 1351 eor v1.16b, v7.16b, v1.16b 1352 add v22.4s, v22.4s, v3.4s 1353 orr v2.16b, v2.16b, v13.16b 1354 ushr v12.4s, v5.4s, #12 1355 shl v5.4s, v5.4s, #20 1356 add v17.4s, v17.4s, v15.4s 1357 ushr v13.4s, v1.4s, #12 1358 shl v1.4s, v1.4s, #20 1359 add v22.4s, v22.4s, v0.4s 1360 orr v5.16b, v5.16b, v12.16b 1361 add v17.4s, v17.4s, v2.4s 1362 add v10.4s, v10.4s, v14.4s 1363 orr v1.16b, v1.16b, v13.16b 1364 eor v16.16b, v16.16b, v22.16b 1365 add v20.4s, v20.4s, v8.4s 1366 eor v6.16b, v6.16b, v17.16b 1367 add v10.4s, v10.4s, v5.4s 1368 tbl v16.16b, { v16.16b }, v18.16b 1369 add v20.4s, v20.4s, v1.4s 1370 tbl v6.16b, { v6.16b }, v18.16b 1371 eor v25.16b, v25.16b, v10.16b 1372 add v21.4s, v21.4s, v16.4s 1373 eor v4.16b, v4.16b, v20.16b 1374 add v26.4s, v26.4s, v6.4s 1375 tbl v25.16b, { v25.16b }, v18.16b 1376 eor v0.16b, v21.16b, v0.16b 1377 tbl v4.16b, { v4.16b }, v18.16b 1378 eor v2.16b, v26.16b, v2.16b 1379 add v19.4s, v19.4s, v25.4s 1380 ushr v12.4s, v0.4s, #7 1381 shl v0.4s, v0.4s, #25 1382 add v7.4s, v7.4s, v4.4s 1383 ushr v13.4s, v2.4s, #7 1384 shl v2.4s, v2.4s, #25 1385 eor v5.16b, v19.16b, v5.16b 1386 orr v0.16b, v0.16b, v12.16b 1387 eor v1.16b, v7.16b, v1.16b 1388 add v10.4s, v10.4s, v28.4s 1389 orr v2.16b, v2.16b, v13.16b 1390 ushr v12.4s, v5.4s, #7 1391 shl v5.4s, v5.4s, #25 1392 add v22.4s, v22.4s, v24.4s 1393 ushr v13.4s, v1.4s, #7 1394 shl v1.4s, v1.4s, #25 1395 add v10.4s, v10.4s, v0.4s 1396 orr v5.16b, v5.16b, v12.16b 1397 add v22.4s, v22.4s, v2.4s 1398 add v20.4s, v20.4s, v11.4s 1399 ldr q11, [sp, #304] 1400 orr v1.16b, v1.16b, v13.16b 1401 add v17.4s, v17.4s, v31.4s 1402 ldr q31, [sp, #224] 1403 eor v4.16b, v4.16b, v10.16b 1404 eor v25.16b, v25.16b, v22.16b 1405 add v20.4s, v20.4s, v5.4s 1406 add v17.4s, v17.4s, v1.4s 1407 tbl v4.16b, { v4.16b }, v27.16b 1408 tbl v25.16b, { v25.16b }, v27.16b 1409 eor v6.16b, v6.16b, v20.16b 1410 eor v16.16b, v16.16b, v17.16b 1411 add v26.4s, v26.4s, v4.4s 1412 add v7.4s, v7.4s, v25.4s 1413 tbl v6.16b, { v6.16b }, v27.16b 1414 tbl v16.16b, { v16.16b }, v27.16b 1415 eor v0.16b, v26.16b, v0.16b 1416 eor v2.16b, v7.16b, v2.16b 1417 add v21.4s, v21.4s, v6.4s 1418 add v19.4s, v19.4s, v16.4s 1419 ushr v12.4s, v0.4s, #12 1420 shl v0.4s, v0.4s, #20 1421 ushr v13.4s, v2.4s, #12 1422 shl v2.4s, v2.4s, #20 1423 eor v5.16b, v21.16b, v5.16b 1424 eor v1.16b, v19.16b, v1.16b 1425 orr v0.16b, v0.16b, v12.16b 1426 add v10.4s, v10.4s, v23.4s 1427 ldr q23, [sp, #240] 1428 orr v2.16b, v2.16b, v13.16b 1429 ushr v13.4s, v5.4s, #12 1430 shl v5.4s, v5.4s, #20 1431 add v22.4s, v22.4s, v11.4s 1432 mov v30.16b, v8.16b 1433 mov v8.16b, v24.16b 1434 ldr q24, [sp, #352] 1435 ushr v12.4s, v1.4s, #12 1436 shl v1.4s, v1.4s, #20 1437 add v10.4s, v10.4s, v0.4s 1438 orr v5.16b, v5.16b, v13.16b 1439 str q8, [sp, #112] 1440 add v22.4s, v22.4s, v2.4s 1441 add v20.4s, v20.4s, v24.4s 1442 orr v1.16b, v1.16b, v12.16b 1443 add v17.4s, v17.4s, v31.4s 1444 eor v4.16b, v4.16b, v10.16b 1445 eor v25.16b, v25.16b, v22.16b 1446 add v20.4s, v20.4s, v5.4s 1447 add v17.4s, v17.4s, v1.4s 1448 tbl v4.16b, { v4.16b }, v18.16b 1449 tbl v25.16b, { v25.16b }, v18.16b 1450 eor v6.16b, v6.16b, v20.16b 1451 eor v16.16b, v16.16b, v17.16b 1452 add v26.4s, v26.4s, v4.4s 1453 add v7.4s, v7.4s, v25.4s 1454 tbl v6.16b, { v6.16b }, v18.16b 1455 tbl v16.16b, { v16.16b }, v18.16b 1456 eor v0.16b, v26.16b, v0.16b 1457 eor v2.16b, v7.16b, v2.16b 1458 add v21.4s, v21.4s, v6.4s 1459 mov v29.16b, v3.16b 1460 add v19.4s, v19.4s, v16.4s 1461 ushr v12.4s, v0.4s, #7 1462 shl v0.4s, v0.4s, #25 1463 ushr v13.4s, v2.4s, #7 1464 shl v2.4s, v2.4s, #25 1465 eor v5.16b, v21.16b, v5.16b 1466 eor v1.16b, v19.16b, v1.16b 1467 orr v0.16b, v0.16b, v12.16b 1468 add v22.4s, v22.4s, v29.4s 1469 orr v2.16b, v2.16b, v13.16b 1470 ushr v13.4s, v5.4s, #7 1471 shl v5.4s, v5.4s, #25 1472 add v17.4s, v17.4s, v30.4s 1473 ldr q30, [sp, #272] 1474 ushr v12.4s, v1.4s, #7 1475 shl v1.4s, v1.4s, #25 1476 add v22.4s, v22.4s, v0.4s 1477 mov v3.16b, v28.16b 1478 ldr q28, [sp, #176] 1479 orr v5.16b, v5.16b, v13.16b 1480 add v17.4s, v17.4s, v2.4s 1481 add v10.4s, v10.4s, v30.4s 1482 orr v1.16b, v1.16b, v12.16b 1483 eor v16.16b, v16.16b, v22.16b 1484 add v20.4s, v20.4s, v28.4s 1485 eor v6.16b, v6.16b, v17.16b 1486 add v10.4s, v10.4s, v5.4s 1487 tbl v16.16b, { v16.16b }, v27.16b 1488 add v20.4s, v20.4s, v1.4s 1489 tbl v6.16b, { v6.16b }, v27.16b 1490 eor v25.16b, v25.16b, v10.16b 1491 add v21.4s, v21.4s, v16.4s 1492 eor v4.16b, v4.16b, v20.16b 1493 add v26.4s, v26.4s, v6.4s 1494 tbl v25.16b, { v25.16b }, v27.16b 1495 eor v0.16b, v21.16b, v0.16b 1496 tbl v4.16b, { v4.16b }, v27.16b 1497 eor v2.16b, v26.16b, v2.16b 1498 add v19.4s, v19.4s, v25.4s 1499 ushr v12.4s, v0.4s, #12 1500 shl v0.4s, v0.4s, #20 1501 add v7.4s, v7.4s, v4.4s 1502 ushr v13.4s, v2.4s, #12 1503 shl v2.4s, v2.4s, #20 1504 eor v5.16b, v5.16b, v19.16b 1505 orr v0.16b, v0.16b, v12.16b 1506 eor v1.16b, v7.16b, v1.16b 1507 add v22.4s, v22.4s, v8.4s 1508 orr v2.16b, v2.16b, v13.16b 1509 ushr v12.4s, v5.4s, #12 1510 shl v5.4s, v5.4s, #20 1511 add v17.4s, v17.4s, v9.4s 1512 ldr q9, [sp, #320] 1513 ushr v13.4s, v1.4s, #12 1514 shl v1.4s, v1.4s, #20 1515 add v22.4s, v22.4s, v0.4s 1516 orr v5.16b, v5.16b, v12.16b 1517 add v17.4s, v17.4s, v2.4s 1518 add v10.4s, v10.4s, v23.4s 1519 orr v1.16b, v1.16b, v13.16b 1520 eor v16.16b, v16.16b, v22.16b 1521 add v20.4s, v20.4s, v31.4s 1522 eor v6.16b, v6.16b, v17.16b 1523 add v10.4s, v10.4s, v5.4s 1524 tbl v16.16b, { v16.16b }, v18.16b 1525 add v20.4s, v20.4s, v1.4s 1526 tbl v6.16b, { v6.16b }, v18.16b 1527 eor v25.16b, v25.16b, v10.16b 1528 add v21.4s, v21.4s, v16.4s 1529 eor v4.16b, v4.16b, v20.16b 1530 add v26.4s, v26.4s, v6.4s 1531 tbl v25.16b, { v25.16b }, v18.16b 1532 eor v0.16b, v21.16b, v0.16b 1533 tbl v4.16b, { v4.16b }, v18.16b 1534 eor v2.16b, v26.16b, v2.16b 1535 add v19.4s, v19.4s, v25.4s 1536 ushr v12.4s, v0.4s, #7 1537 shl v0.4s, v0.4s, #25 1538 add v7.4s, v7.4s, v4.4s 1539 ushr v13.4s, v2.4s, #7 1540 shl v2.4s, v2.4s, #25 1541 eor v5.16b, v19.16b, v5.16b 1542 add v10.4s, v10.4s, v14.4s 1543 ldr q14, [sp, #288] 1544 orr v0.16b, v0.16b, v12.16b 1545 eor v1.16b, v7.16b, v1.16b 1546 orr v2.16b, v2.16b, v13.16b 1547 ushr v12.4s, v5.4s, #7 1548 shl v5.4s, v5.4s, #25 1549 add v22.4s, v22.4s, v14.4s 1550 ushr v13.4s, v1.4s, #7 1551 shl v1.4s, v1.4s, #25 1552 add v10.4s, v10.4s, v0.4s 1553 orr v5.16b, v5.16b, v12.16b 1554 add v22.4s, v22.4s, v2.4s 1555 add v20.4s, v20.4s, v24.4s 1556 orr v1.16b, v1.16b, v13.16b 1557 eor v4.16b, v4.16b, v10.16b 1558 add v17.4s, v17.4s, v9.4s 1559 eor v25.16b, v25.16b, v22.16b 1560 add v20.4s, v20.4s, v5.4s 1561 tbl v4.16b, { v4.16b }, v27.16b 1562 add v17.4s, v17.4s, v1.4s 1563 tbl v25.16b, { v25.16b }, v27.16b 1564 eor v6.16b, v6.16b, v20.16b 1565 add v26.4s, v26.4s, v4.4s 1566 eor v16.16b, v16.16b, v17.16b 1567 add v7.4s, v7.4s, v25.4s 1568 tbl v6.16b, { v6.16b }, v27.16b 1569 eor v0.16b, v26.16b, v0.16b 1570 tbl v16.16b, { v16.16b }, v27.16b 1571 eor v2.16b, v7.16b, v2.16b 1572 add v21.4s, v21.4s, v6.4s 1573 ushr v12.4s, v0.4s, #12 1574 shl v0.4s, v0.4s, #20 1575 add v19.4s, v19.4s, v16.4s 1576 ushr v13.4s, v2.4s, #12 1577 shl v2.4s, v2.4s, #20 1578 eor v5.16b, v21.16b, v5.16b 1579 orr v0.16b, v0.16b, v12.16b 1580 eor v1.16b, v19.16b, v1.16b 1581 add v10.4s, v10.4s, v11.4s 1582 orr v2.16b, v2.16b, v13.16b 1583 ushr v13.4s, v5.4s, #12 1584 shl v5.4s, v5.4s, #20 1585 ushr v12.4s, v1.4s, #12 1586 shl v1.4s, v1.4s, #20 1587 add v10.4s, v10.4s, v0.4s 1588 add v22.4s, v22.4s, v15.4s 1589 orr v5.16b, v5.16b, v13.16b 1590 add v20.4s, v20.4s, v3.4s 1591 mov v24.16b, v3.16b 1592 ldr q3, [sp, #336] 1593 orr v1.16b, v1.16b, v12.16b 1594 eor v4.16b, v4.16b, v10.16b 1595 add v22.4s, v22.4s, v2.4s 1596 add v17.4s, v17.4s, v3.4s 1597 add v20.4s, v20.4s, v5.4s 1598 tbl v4.16b, { v4.16b }, v18.16b 1599 eor v25.16b, v25.16b, v22.16b 1600 add v17.4s, v17.4s, v1.4s 1601 eor v6.16b, v6.16b, v20.16b 1602 add v26.4s, v26.4s, v4.4s 1603 tbl v25.16b, { v25.16b }, v18.16b 1604 eor v16.16b, v16.16b, v17.16b 1605 tbl v6.16b, { v6.16b }, v18.16b 1606 eor v0.16b, v26.16b, v0.16b 1607 add v7.4s, v7.4s, v25.4s 1608 tbl v16.16b, { v16.16b }, v18.16b 1609 add v21.4s, v21.4s, v6.4s 1610 ushr v12.4s, v0.4s, #7 1611 shl v0.4s, v0.4s, #25 1612 eor v2.16b, v7.16b, v2.16b 1613 add v19.4s, v19.4s, v16.4s 1614 eor v5.16b, v21.16b, v5.16b 1615 orr v0.16b, v0.16b, v12.16b 1616 ushr v12.4s, v2.4s, #7 1617 shl v2.4s, v2.4s, #25 1618 eor v1.16b, v19.16b, v1.16b 1619 ushr v13.4s, v5.4s, #7 1620 shl v5.4s, v5.4s, #25 1621 add v22.4s, v22.4s, v8.4s 1622 orr v2.16b, v2.16b, v12.16b 1623 ushr v12.4s, v1.4s, #7 1624 shl v1.4s, v1.4s, #25 1625 orr v5.16b, v5.16b, v13.16b 1626 add v22.4s, v22.4s, v0.4s 1627 add v10.4s, v10.4s, v29.4s 1628 ldr q29, [sp, #208] 1629 add v17.4s, v17.4s, v31.4s 1630 orr v1.16b, v1.16b, v12.16b 1631 add v20.4s, v20.4s, v29.4s 1632 eor v16.16b, v16.16b, v22.16b 1633 add v10.4s, v10.4s, v5.4s 1634 add v17.4s, v17.4s, v2.4s 1635 add v20.4s, v20.4s, v1.4s 1636 tbl v16.16b, { v16.16b }, v27.16b 1637 eor v25.16b, v25.16b, v10.16b 1638 eor v6.16b, v6.16b, v17.16b 1639 eor v4.16b, v4.16b, v20.16b 1640 add v21.4s, v21.4s, v16.4s 1641 tbl v25.16b, { v25.16b }, v27.16b 1642 tbl v6.16b, { v6.16b }, v27.16b 1643 tbl v4.16b, { v4.16b }, v27.16b 1644 eor v0.16b, v21.16b, v0.16b 1645 add v19.4s, v19.4s, v25.4s 1646 add v26.4s, v26.4s, v6.4s 1647 add v7.4s, v7.4s, v4.4s 1648 ushr v12.4s, v0.4s, #12 1649 shl v0.4s, v0.4s, #20 1650 eor v5.16b, v5.16b, v19.16b 1651 eor v2.16b, v26.16b, v2.16b 1652 eor v1.16b, v7.16b, v1.16b 1653 orr v0.16b, v0.16b, v12.16b 1654 ushr v12.4s, v5.4s, #12 1655 shl v5.4s, v5.4s, #20 1656 add v22.4s, v22.4s, v14.4s 1657 mov v8.16b, v31.16b 1658 ushr v13.4s, v2.4s, #12 1659 shl v2.4s, v2.4s, #20 1660 mov v31.16b, v14.16b 1661 ushr v14.4s, v1.4s, #12 1662 shl v1.4s, v1.4s, #20 1663 orr v5.16b, v5.16b, v12.16b 1664 add v22.4s, v22.4s, v0.4s 1665 add v10.4s, v10.4s, v28.4s 1666 ldr q28, [sp, #352] 1667 orr v2.16b, v2.16b, v13.16b 1668 orr v1.16b, v1.16b, v14.16b 1669 add v17.4s, v17.4s, v30.4s 1670 add v20.4s, v20.4s, v3.4s 1671 eor v16.16b, v16.16b, v22.16b 1672 add v10.4s, v10.4s, v5.4s 1673 add v17.4s, v17.4s, v2.4s 1674 add v20.4s, v20.4s, v1.4s 1675 tbl v16.16b, { v16.16b }, v18.16b 1676 eor v25.16b, v25.16b, v10.16b 1677 eor v6.16b, v6.16b, v17.16b 1678 eor v4.16b, v4.16b, v20.16b 1679 add v21.4s, v21.4s, v16.4s 1680 tbl v25.16b, { v25.16b }, v18.16b 1681 tbl v6.16b, { v6.16b }, v18.16b 1682 tbl v4.16b, { v4.16b }, v18.16b 1683 eor v0.16b, v21.16b, v0.16b 1684 add v19.4s, v19.4s, v25.4s 1685 add v26.4s, v26.4s, v6.4s 1686 add v7.4s, v7.4s, v4.4s 1687 ushr v12.4s, v0.4s, #7 1688 shl v0.4s, v0.4s, #25 1689 eor v5.16b, v19.16b, v5.16b 1690 eor v2.16b, v26.16b, v2.16b 1691 eor v1.16b, v7.16b, v1.16b 1692 orr v0.16b, v0.16b, v12.16b 1693 ushr v12.4s, v5.4s, #7 1694 shl v5.4s, v5.4s, #25 1695 add v10.4s, v10.4s, v23.4s 1696 ushr v13.4s, v2.4s, #7 1697 shl v2.4s, v2.4s, #25 1698 ushr v14.4s, v1.4s, #7 1699 shl v1.4s, v1.4s, #25 1700 orr v5.16b, v5.16b, v12.16b 1701 add v10.4s, v10.4s, v0.4s 1702 add v20.4s, v20.4s, v24.4s 1703 ldr q24, [sp, #144] 1704 orr v2.16b, v2.16b, v13.16b 1705 orr v1.16b, v1.16b, v14.16b 1706 add v22.4s, v22.4s, v9.4s 1707 add v17.4s, v17.4s, v11.4s 1708 eor v4.16b, v4.16b, v10.16b 1709 add v20.4s, v20.4s, v5.4s 1710 add v22.4s, v22.4s, v2.4s 1711 add v17.4s, v17.4s, v1.4s 1712 tbl v4.16b, { v4.16b }, v27.16b 1713 eor v6.16b, v6.16b, v20.16b 1714 eor v25.16b, v25.16b, v22.16b 1715 eor v16.16b, v16.16b, v17.16b 1716 add v26.4s, v26.4s, v4.4s 1717 tbl v6.16b, { v6.16b }, v27.16b 1718 tbl v25.16b, { v25.16b }, v27.16b 1719 tbl v16.16b, { v16.16b }, v27.16b 1720 eor v0.16b, v26.16b, v0.16b 1721 add v21.4s, v21.4s, v6.4s 1722 add v7.4s, v7.4s, v25.4s 1723 add v19.4s, v19.4s, v16.4s 1724 ushr v12.4s, v0.4s, #12 1725 shl v0.4s, v0.4s, #20 1726 eor v5.16b, v21.16b, v5.16b 1727 eor v2.16b, v7.16b, v2.16b 1728 eor v1.16b, v19.16b, v1.16b 1729 orr v0.16b, v0.16b, v12.16b 1730 add v10.4s, v10.4s, v15.4s 1731 ushr v14.4s, v5.4s, #12 1732 shl v5.4s, v5.4s, #20 1733 mov v30.16b, v3.16b 1734 ldr q3, [sp, #256] 1735 ushr v12.4s, v2.4s, #12 1736 shl v2.4s, v2.4s, #20 1737 ushr v13.4s, v1.4s, #12 1738 shl v1.4s, v1.4s, #20 1739 add v10.4s, v10.4s, v0.4s 1740 orr v5.16b, v5.16b, v14.16b 1741 add v20.4s, v20.4s, v3.4s 1742 orr v2.16b, v2.16b, v12.16b 1743 orr v1.16b, v1.16b, v13.16b 1744 add v22.4s, v22.4s, v24.4s 1745 add v17.4s, v17.4s, v28.4s 1746 eor v4.16b, v4.16b, v10.16b 1747 add v20.4s, v20.4s, v5.4s 1748 add v22.4s, v22.4s, v2.4s 1749 add v17.4s, v17.4s, v1.4s 1750 tbl v4.16b, { v4.16b }, v18.16b 1751 eor v6.16b, v6.16b, v20.16b 1752 eor v25.16b, v25.16b, v22.16b 1753 eor v16.16b, v16.16b, v17.16b 1754 add v26.4s, v26.4s, v4.4s 1755 tbl v6.16b, { v6.16b }, v18.16b 1756 tbl v25.16b, { v25.16b }, v18.16b 1757 tbl v16.16b, { v16.16b }, v18.16b 1758 eor v0.16b, v26.16b, v0.16b 1759 add v21.4s, v21.4s, v6.4s 1760 add v7.4s, v7.4s, v25.4s 1761 add v19.4s, v19.4s, v16.4s 1762 ushr v12.4s, v0.4s, #7 1763 shl v0.4s, v0.4s, #25 1764 eor v5.16b, v21.16b, v5.16b 1765 eor v2.16b, v7.16b, v2.16b 1766 eor v1.16b, v19.16b, v1.16b 1767 orr v0.16b, v0.16b, v12.16b 1768 ushr v12.4s, v5.4s, #7 1769 shl v5.4s, v5.4s, #25 1770 mov v23.16b, v9.16b 1771 ldr q9, [sp, #112] 1772 ushr v13.4s, v2.4s, #7 1773 shl v2.4s, v2.4s, #25 1774 ushr v14.4s, v1.4s, #7 1775 shl v1.4s, v1.4s, #25 1776 orr v5.16b, v5.16b, v12.16b 1777 add v9.4s, v10.4s, v9.4s 1778 orr v2.16b, v2.16b, v13.16b 1779 orr v1.16b, v1.16b, v14.16b 1780 ldr q14, [sp, #64] 1781 add v22.4s, v22.4s, v31.4s 1782 add v17.4s, v17.4s, v30.4s 1783 add v20.4s, v20.4s, v8.4s 1784 add v9.4s, v9.4s, v5.4s 1785 add v22.4s, v22.4s, v0.4s 1786 add v17.4s, v17.4s, v2.4s 1787 add v20.4s, v20.4s, v1.4s 1788 eor v25.16b, v25.16b, v9.16b 1789 eor v16.16b, v16.16b, v22.16b 1790 eor v6.16b, v6.16b, v17.16b 1791 eor v4.16b, v4.16b, v20.16b 1792 tbl v25.16b, { v25.16b }, v27.16b 1793 tbl v16.16b, { v16.16b }, v27.16b 1794 tbl v6.16b, { v6.16b }, v27.16b 1795 tbl v4.16b, { v4.16b }, v27.16b 1796 add v19.4s, v19.4s, v25.4s 1797 add v21.4s, v21.4s, v16.4s 1798 add v26.4s, v26.4s, v6.4s 1799 add v7.4s, v7.4s, v4.4s 1800 eor v5.16b, v5.16b, v19.16b 1801 eor v0.16b, v21.16b, v0.16b 1802 eor v2.16b, v26.16b, v2.16b 1803 eor v1.16b, v7.16b, v1.16b 1804 ushr v30.4s, v5.4s, #12 1805 shl v5.4s, v5.4s, #20 1806 ushr v10.4s, v0.4s, #12 1807 shl v0.4s, v0.4s, #20 1808 ushr v12.4s, v2.4s, #12 1809 shl v2.4s, v2.4s, #20 1810 ushr v13.4s, v1.4s, #12 1811 shl v1.4s, v1.4s, #20 1812 orr v5.16b, v5.16b, v30.16b 1813 add v30.4s, v9.4s, v29.4s 1814 add v22.4s, v22.4s, v23.4s 1815 ldr q23, [sp, #192] 1816 orr v0.16b, v0.16b, v10.16b 1817 orr v2.16b, v2.16b, v12.16b 1818 orr v1.16b, v1.16b, v13.16b 1819 add v17.4s, v17.4s, v23.4s 1820 add v20.4s, v20.4s, v28.4s 1821 add v23.4s, v30.4s, v5.4s 1822 add v22.4s, v22.4s, v0.4s 1823 add v17.4s, v17.4s, v2.4s 1824 add v20.4s, v20.4s, v1.4s 1825 eor v25.16b, v25.16b, v23.16b 1826 eor v16.16b, v16.16b, v22.16b 1827 eor v6.16b, v6.16b, v17.16b 1828 eor v4.16b, v4.16b, v20.16b 1829 tbl v25.16b, { v25.16b }, v18.16b 1830 tbl v16.16b, { v16.16b }, v18.16b 1831 tbl v6.16b, { v6.16b }, v18.16b 1832 tbl v4.16b, { v4.16b }, v18.16b 1833 add v19.4s, v19.4s, v25.4s 1834 add v21.4s, v21.4s, v16.4s 1835 add v26.4s, v26.4s, v6.4s 1836 add v7.4s, v7.4s, v4.4s 1837 eor v5.16b, v19.16b, v5.16b 1838 eor v0.16b, v21.16b, v0.16b 1839 eor v2.16b, v26.16b, v2.16b 1840 eor v1.16b, v7.16b, v1.16b 1841 ushr v28.4s, v5.4s, #7 1842 shl v5.4s, v5.4s, #25 1843 ushr v30.4s, v0.4s, #7 1844 shl v0.4s, v0.4s, #25 1845 ushr v31.4s, v2.4s, #7 1846 shl v2.4s, v2.4s, #25 1847 ushr v8.4s, v1.4s, #7 1848 shl v1.4s, v1.4s, #25 1849 orr v5.16b, v5.16b, v28.16b 1850 ldr q28, [sp, #176] 1851 orr v0.16b, v0.16b, v30.16b 1852 orr v2.16b, v2.16b, v31.16b 1853 orr v1.16b, v1.16b, v8.16b 1854 add v23.4s, v23.4s, v28.4s 1855 add v22.4s, v22.4s, v11.4s 1856 add v17.4s, v17.4s, v15.4s 1857 add v20.4s, v20.4s, v3.4s 1858 ldr q3, [sp, #272] 1859 add v23.4s, v23.4s, v0.4s 1860 add v22.4s, v22.4s, v2.4s 1861 add v17.4s, v17.4s, v1.4s 1862 add v20.4s, v20.4s, v5.4s 1863 eor v4.16b, v4.16b, v23.16b 1864 eor v25.16b, v25.16b, v22.16b 1865 eor v16.16b, v16.16b, v17.16b 1866 eor v6.16b, v6.16b, v20.16b 1867 tbl v4.16b, { v4.16b }, v27.16b 1868 tbl v25.16b, { v25.16b }, v27.16b 1869 tbl v16.16b, { v16.16b }, v27.16b 1870 tbl v6.16b, { v6.16b }, v27.16b 1871 add v26.4s, v26.4s, v4.4s 1872 add v7.4s, v7.4s, v25.4s 1873 add v19.4s, v19.4s, v16.4s 1874 add v21.4s, v21.4s, v6.4s 1875 eor v0.16b, v26.16b, v0.16b 1876 eor v2.16b, v7.16b, v2.16b 1877 eor v1.16b, v19.16b, v1.16b 1878 eor v5.16b, v21.16b, v5.16b 1879 add v3.4s, v22.4s, v3.4s 1880 ldr q22, [sp, #160] 1881 ushr v28.4s, v0.4s, #12 1882 shl v0.4s, v0.4s, #20 1883 ushr v29.4s, v2.4s, #12 1884 shl v2.4s, v2.4s, #20 1885 ushr v30.4s, v1.4s, #12 1886 shl v1.4s, v1.4s, #20 1887 ushr v31.4s, v5.4s, #12 1888 shl v5.4s, v5.4s, #20 1889 add v17.4s, v17.4s, v22.4s 1890 ldr q22, [sp, #240] 1891 orr v0.16b, v0.16b, v28.16b 1892 prfm pldl1keep, [x23, #256] 1893 orr v2.16b, v2.16b, v29.16b 1894 prfm pldl1keep, [x24, #256] 1895 orr v1.16b, v1.16b, v30.16b 1896 prfm pldl1keep, [x22, #256] 1897 orr v5.16b, v5.16b, v31.16b 1898 prfm pldl1keep, [x25, #256] 1899 add v23.4s, v23.4s, v24.4s 1900 add v20.4s, v20.4s, v22.4s 1901 add v3.4s, v3.4s, v2.4s 1902 add v17.4s, v17.4s, v1.4s 1903 add v22.4s, v23.4s, v0.4s 1904 add v20.4s, v20.4s, v5.4s 1905 eor v23.16b, v25.16b, v3.16b 1906 eor v16.16b, v16.16b, v17.16b 1907 eor v4.16b, v4.16b, v22.16b 1908 eor v6.16b, v6.16b, v20.16b 1909 tbl v23.16b, { v23.16b }, v18.16b 1910 tbl v16.16b, { v16.16b }, v18.16b 1911 tbl v4.16b, { v4.16b }, v18.16b 1912 tbl v6.16b, { v6.16b }, v18.16b 1913 add v7.4s, v7.4s, v23.4s 1914 add v19.4s, v19.4s, v16.4s 1915 add v18.4s, v26.4s, v4.4s 1916 add v21.4s, v21.4s, v6.4s 1917 eor v2.16b, v7.16b, v2.16b 1918 eor v1.16b, v19.16b, v1.16b 1919 eor v0.16b, v18.16b, v0.16b 1920 eor v5.16b, v21.16b, v5.16b 1921 ushr v25.4s, v2.4s, #7 1922 shl v2.4s, v2.4s, #25 1923 ushr v24.4s, v0.4s, #7 1924 shl v0.4s, v0.4s, #25 1925 ushr v26.4s, v1.4s, #7 1926 shl v1.4s, v1.4s, #25 1927 ushr v27.4s, v5.4s, #7 1928 shl v5.4s, v5.4s, #25 1929 orr v0.16b, v0.16b, v24.16b 1930 orr v2.16b, v2.16b, v25.16b 1931 orr v1.16b, v1.16b, v26.16b 1932 orr v5.16b, v5.16b, v27.16b 1933 movi v13.4s, #64 1934 eor v29.16b, v19.16b, v22.16b 1935 eor v8.16b, v21.16b, v3.16b 1936 eor v30.16b, v17.16b, v18.16b 1937 eor v31.16b, v20.16b, v7.16b 1938 eor v24.16b, v5.16b, v23.16b 1939 eor v18.16b, v0.16b, v16.16b 1940 eor v25.16b, v2.16b, v6.16b 1941 eor v26.16b, v1.16b, v4.16b 1942 cbnz x21, .LBB3_5 1943 b .LBB3_2 1944.LBB3_6: 1945 cbz x1, .LBB3_14 1946 adrp x12, .LCPI3_3 1947 ldr q0, [x11, :lo12:.LCPI3_1] 1948 orr w11, w7, w6 1949 ldr q2, [x10, :lo12:.LCPI3_2] 1950 ldr q1, [x12, :lo12:.LCPI3_3] 1951 and x12, x5, #0x1 1952.LBB3_8: 1953 movi v3.4s, #64 1954 lsr x13, x4, #32 1955 ldp q5, q4, [x3] 1956 mov x15, x2 1957 mov w14, w11 1958 mov v3.s[0], w4 1959 ldr x10, [x0] 1960 mov v3.s[1], w13 1961 b .LBB3_11 1962.LBB3_9: 1963 orr w14, w14, w9 1964.LBB3_10: 1965 ldp q6, q7, [x10] 1966 mov v16.16b, v3.16b 1967 and w14, w14, #0xff 1968 add v5.4s, v5.4s, v4.4s 1969 mov x15, x13 1970 mov v16.s[3], w14 1971 add x14, x10, #32 1972 uzp1 v17.4s, v6.4s, v7.4s 1973 add x10, x10, #64 1974 add v5.4s, v5.4s, v17.4s 1975 eor v16.16b, v5.16b, v16.16b 1976 tbl v16.16b, { v16.16b }, v0.16b 1977 add v18.4s, v16.4s, v1.4s 1978 eor v19.16b, v18.16b, v4.16b 1979 uzp2 v4.4s, v6.4s, v7.4s 1980 ushr v6.4s, v19.4s, #12 1981 shl v7.4s, v19.4s, #20 1982 ld2 { v19.4s, v20.4s }, [x14] 1983 add v5.4s, v5.4s, v4.4s 1984 mov w14, w6 1985 orr v6.16b, v7.16b, v6.16b 1986 add v5.4s, v5.4s, v6.4s 1987 eor v7.16b, v16.16b, v5.16b 1988 add v5.4s, v5.4s, v19.4s 1989 tbl v7.16b, { v7.16b }, v2.16b 1990 ext v5.16b, v5.16b, v5.16b, #12 1991 add v16.4s, v18.4s, v7.4s 1992 ext v7.16b, v7.16b, v7.16b, #8 1993 eor v6.16b, v6.16b, v16.16b 1994 ext v16.16b, v16.16b, v16.16b, #4 1995 ushr v18.4s, v6.4s, #7 1996 shl v6.4s, v6.4s, #25 1997 orr v6.16b, v6.16b, v18.16b 1998 ext v18.16b, v20.16b, v20.16b, #12 1999 add v5.4s, v5.4s, v6.4s 2000 eor v7.16b, v5.16b, v7.16b 2001 add v5.4s, v5.4s, v18.4s 2002 tbl v7.16b, { v7.16b }, v0.16b 2003 add v16.4s, v16.4s, v7.4s 2004 eor v6.16b, v6.16b, v16.16b 2005 ushr v21.4s, v6.4s, #12 2006 shl v6.4s, v6.4s, #20 2007 orr v6.16b, v6.16b, v21.16b 2008 uzp1 v21.4s, v17.4s, v17.4s 2009 add v5.4s, v5.4s, v6.4s 2010 ext v21.16b, v21.16b, v17.16b, #8 2011 eor v7.16b, v7.16b, v5.16b 2012 uzp2 v21.4s, v21.4s, v4.4s 2013 tbl v7.16b, { v7.16b }, v2.16b 2014 add v5.4s, v5.4s, v21.4s 2015 add v16.4s, v16.4s, v7.4s 2016 ext v5.16b, v5.16b, v5.16b, #4 2017 ext v7.16b, v7.16b, v7.16b, #8 2018 eor v6.16b, v6.16b, v16.16b 2019 ushr v22.4s, v6.4s, #7 2020 shl v6.4s, v6.4s, #25 2021 orr v6.16b, v6.16b, v22.16b 2022 add v22.4s, v5.4s, v6.4s 2023 eor v5.16b, v22.16b, v7.16b 2024 ext v7.16b, v16.16b, v16.16b, #12 2025 tbl v16.16b, { v5.16b }, v0.16b 2026 ext v5.16b, v17.16b, v17.16b, #12 2027 add v7.4s, v7.4s, v16.4s 2028 ext v5.16b, v17.16b, v5.16b, #12 2029 ext v17.16b, v19.16b, v19.16b, #12 2030 mov v19.16b, v18.16b 2031 eor v6.16b, v6.16b, v7.16b 2032 rev64 v5.4s, v5.4s 2033 mov v19.s[1], v17.s[2] 2034 ushr v20.4s, v6.4s, #12 2035 shl v6.4s, v6.4s, #20 2036 trn2 v5.4s, v5.4s, v19.4s 2037 orr v6.16b, v6.16b, v20.16b 2038 zip1 v20.2d, v18.2d, v4.2d 2039 zip2 v4.4s, v4.4s, v18.4s 2040 add v19.4s, v6.4s, v5.4s 2041 mov v20.s[3], v17.s[3] 2042 add v19.4s, v19.4s, v22.4s 2043 ext v22.16b, v20.16b, v20.16b, #12 2044 eor v16.16b, v16.16b, v19.16b 2045 ext v19.16b, v19.16b, v19.16b, #12 2046 tbl v16.16b, { v16.16b }, v2.16b 2047 add v7.4s, v7.4s, v16.4s 2048 ext v16.16b, v16.16b, v16.16b, #8 2049 eor v6.16b, v6.16b, v7.16b 2050 ext v7.16b, v7.16b, v7.16b, #4 2051 ushr v23.4s, v6.4s, #7 2052 shl v24.4s, v6.4s, #25 2053 uzp1 v6.4s, v20.4s, v22.4s 2054 orr v20.16b, v24.16b, v23.16b 2055 add v22.4s, v20.4s, v6.4s 2056 add v19.4s, v22.4s, v19.4s 2057 eor v16.16b, v19.16b, v16.16b 2058 tbl v16.16b, { v16.16b }, v0.16b 2059 add v7.4s, v7.4s, v16.4s 2060 eor v18.16b, v20.16b, v7.16b 2061 zip1 v20.4s, v4.4s, v17.4s 2062 zip1 v4.4s, v17.4s, v4.4s 2063 ushr v17.4s, v18.4s, #12 2064 shl v18.4s, v18.4s, #20 2065 ext v20.16b, v4.16b, v20.16b, #8 2066 orr v4.16b, v18.16b, v17.16b 2067 ext v18.16b, v21.16b, v21.16b, #4 2068 add v17.4s, v4.4s, v20.4s 2069 add v17.4s, v17.4s, v19.4s 2070 uzp1 v19.4s, v18.4s, v18.4s 2071 eor v16.16b, v16.16b, v17.16b 2072 ext v19.16b, v19.16b, v18.16b, #8 2073 tbl v16.16b, { v16.16b }, v2.16b 2074 uzp2 v19.4s, v19.4s, v5.4s 2075 add v7.4s, v7.4s, v16.4s 2076 add v17.4s, v17.4s, v19.4s 2077 ext v16.16b, v16.16b, v16.16b, #8 2078 eor v4.16b, v4.16b, v7.16b 2079 ext v17.16b, v17.16b, v17.16b, #4 2080 ext v7.16b, v7.16b, v7.16b, #12 2081 ushr v21.4s, v4.4s, #7 2082 shl v4.4s, v4.4s, #25 2083 orr v4.16b, v4.16b, v21.16b 2084 ext v21.16b, v18.16b, v18.16b, #12 2085 add v17.4s, v17.4s, v4.4s 2086 ext v18.16b, v18.16b, v21.16b, #12 2087 mov v21.16b, v20.16b 2088 eor v16.16b, v17.16b, v16.16b 2089 rev64 v18.4s, v18.4s 2090 mov v21.s[1], v6.s[2] 2091 tbl v16.16b, { v16.16b }, v0.16b 2092 add v7.4s, v7.4s, v16.4s 2093 eor v4.16b, v4.16b, v7.16b 2094 ushr v22.4s, v4.4s, #12 2095 shl v23.4s, v4.4s, #20 2096 trn2 v4.4s, v18.4s, v21.4s 2097 orr v18.16b, v23.16b, v22.16b 2098 add v21.4s, v18.4s, v4.4s 2099 add v17.4s, v21.4s, v17.4s 2100 zip1 v21.2d, v20.2d, v5.2d 2101 zip2 v5.4s, v5.4s, v20.4s 2102 eor v16.16b, v16.16b, v17.16b 2103 mov v21.s[3], v6.s[3] 2104 ext v17.16b, v17.16b, v17.16b, #12 2105 zip1 v20.4s, v5.4s, v6.4s 2106 tbl v16.16b, { v16.16b }, v2.16b 2107 zip1 v5.4s, v6.4s, v5.4s 2108 add v22.4s, v7.4s, v16.4s 2109 ext v16.16b, v16.16b, v16.16b, #8 2110 ext v20.16b, v5.16b, v20.16b, #8 2111 eor v7.16b, v18.16b, v22.16b 2112 ext v18.16b, v21.16b, v21.16b, #12 2113 ushr v23.4s, v7.4s, #7 2114 shl v24.4s, v7.4s, #25 2115 uzp1 v7.4s, v21.4s, v18.4s 2116 orr v18.16b, v24.16b, v23.16b 2117 add v21.4s, v18.4s, v7.4s 2118 add v17.4s, v21.4s, v17.4s 2119 ext v21.16b, v22.16b, v22.16b, #4 2120 eor v16.16b, v17.16b, v16.16b 2121 tbl v16.16b, { v16.16b }, v0.16b 2122 add v21.4s, v21.4s, v16.4s 2123 eor v18.16b, v18.16b, v21.16b 2124 ushr v6.4s, v18.4s, #12 2125 shl v18.4s, v18.4s, #20 2126 orr v5.16b, v18.16b, v6.16b 2127 add v6.4s, v5.4s, v20.4s 2128 add v6.4s, v6.4s, v17.4s 2129 ext v17.16b, v19.16b, v19.16b, #4 2130 eor v16.16b, v16.16b, v6.16b 2131 uzp1 v18.4s, v17.4s, v17.4s 2132 tbl v16.16b, { v16.16b }, v2.16b 2133 ext v18.16b, v18.16b, v17.16b, #8 2134 add v19.4s, v21.4s, v16.4s 2135 uzp2 v18.4s, v18.4s, v4.4s 2136 ext v16.16b, v16.16b, v16.16b, #8 2137 eor v5.16b, v5.16b, v19.16b 2138 add v6.4s, v6.4s, v18.4s 2139 ext v19.16b, v19.16b, v19.16b, #12 2140 ushr v21.4s, v5.4s, #7 2141 shl v5.4s, v5.4s, #25 2142 ext v6.16b, v6.16b, v6.16b, #4 2143 orr v5.16b, v5.16b, v21.16b 2144 ext v21.16b, v17.16b, v17.16b, #12 2145 add v6.4s, v6.4s, v5.4s 2146 ext v17.16b, v17.16b, v21.16b, #12 2147 mov v21.16b, v20.16b 2148 eor v16.16b, v6.16b, v16.16b 2149 rev64 v17.4s, v17.4s 2150 mov v21.s[1], v7.s[2] 2151 tbl v16.16b, { v16.16b }, v0.16b 2152 add v19.4s, v19.4s, v16.4s 2153 eor v5.16b, v5.16b, v19.16b 2154 ushr v22.4s, v5.4s, #12 2155 shl v23.4s, v5.4s, #20 2156 trn2 v5.4s, v17.4s, v21.4s 2157 orr v17.16b, v23.16b, v22.16b 2158 add v21.4s, v17.4s, v5.4s 2159 add v6.4s, v21.4s, v6.4s 2160 eor v16.16b, v16.16b, v6.16b 2161 ext v6.16b, v6.16b, v6.16b, #12 2162 tbl v21.16b, { v16.16b }, v2.16b 2163 zip1 v16.2d, v20.2d, v4.2d 2164 zip2 v4.4s, v4.4s, v20.4s 2165 add v19.4s, v19.4s, v21.4s 2166 mov v16.s[3], v7.s[3] 2167 ext v21.16b, v21.16b, v21.16b, #8 2168 zip1 v20.4s, v4.4s, v7.4s 2169 eor v17.16b, v17.16b, v19.16b 2170 ext v22.16b, v16.16b, v16.16b, #12 2171 ext v19.16b, v19.16b, v19.16b, #4 2172 zip1 v4.4s, v7.4s, v4.4s 2173 ushr v23.4s, v17.4s, #7 2174 shl v17.4s, v17.4s, #25 2175 uzp1 v16.4s, v16.4s, v22.4s 2176 ext v4.16b, v4.16b, v20.16b, #8 2177 orr v17.16b, v17.16b, v23.16b 2178 add v22.4s, v17.4s, v16.4s 2179 add v6.4s, v22.4s, v6.4s 2180 eor v21.16b, v6.16b, v21.16b 2181 tbl v21.16b, { v21.16b }, v0.16b 2182 add v19.4s, v19.4s, v21.4s 2183 eor v17.16b, v17.16b, v19.16b 2184 ushr v7.4s, v17.4s, #12 2185 shl v17.4s, v17.4s, #20 2186 orr v7.16b, v17.16b, v7.16b 2187 add v17.4s, v7.4s, v4.4s 2188 add v6.4s, v17.4s, v6.4s 2189 ext v17.16b, v18.16b, v18.16b, #4 2190 eor v18.16b, v21.16b, v6.16b 2191 uzp1 v20.4s, v17.4s, v17.4s 2192 tbl v18.16b, { v18.16b }, v2.16b 2193 ext v20.16b, v20.16b, v17.16b, #8 2194 add v19.4s, v19.4s, v18.4s 2195 uzp2 v20.4s, v20.4s, v5.4s 2196 ext v18.16b, v18.16b, v18.16b, #8 2197 eor v7.16b, v7.16b, v19.16b 2198 add v6.4s, v6.4s, v20.4s 2199 ushr v21.4s, v7.4s, #7 2200 shl v7.4s, v7.4s, #25 2201 ext v6.16b, v6.16b, v6.16b, #4 2202 orr v7.16b, v7.16b, v21.16b 2203 add v21.4s, v6.4s, v7.4s 2204 eor v6.16b, v21.16b, v18.16b 2205 ext v18.16b, v19.16b, v19.16b, #12 2206 tbl v19.16b, { v6.16b }, v0.16b 2207 ext v6.16b, v17.16b, v17.16b, #12 2208 add v18.4s, v18.4s, v19.4s 2209 ext v6.16b, v17.16b, v6.16b, #12 2210 mov v17.16b, v4.16b 2211 eor v7.16b, v7.16b, v18.16b 2212 rev64 v6.4s, v6.4s 2213 mov v17.s[1], v16.s[2] 2214 ushr v22.4s, v7.4s, #12 2215 shl v7.4s, v7.4s, #20 2216 trn2 v6.4s, v6.4s, v17.4s 2217 orr v7.16b, v7.16b, v22.16b 2218 add v17.4s, v7.4s, v6.4s 2219 add v17.4s, v17.4s, v21.4s 2220 zip1 v21.2d, v4.2d, v5.2d 2221 zip2 v4.4s, v5.4s, v4.4s 2222 eor v19.16b, v19.16b, v17.16b 2223 mov v21.s[3], v16.s[3] 2224 ext v17.16b, v17.16b, v17.16b, #12 2225 tbl v19.16b, { v19.16b }, v2.16b 2226 ext v22.16b, v21.16b, v21.16b, #12 2227 add v18.4s, v18.4s, v19.4s 2228 ext v19.16b, v19.16b, v19.16b, #8 2229 eor v7.16b, v7.16b, v18.16b 2230 ext v18.16b, v18.16b, v18.16b, #4 2231 ushr v23.4s, v7.4s, #7 2232 shl v24.4s, v7.4s, #25 2233 uzp1 v7.4s, v21.4s, v22.4s 2234 orr v21.16b, v24.16b, v23.16b 2235 add v22.4s, v21.4s, v7.4s 2236 add v17.4s, v22.4s, v17.4s 2237 eor v19.16b, v17.16b, v19.16b 2238 tbl v19.16b, { v19.16b }, v0.16b 2239 add v18.4s, v18.4s, v19.4s 2240 eor v5.16b, v21.16b, v18.16b 2241 zip1 v21.4s, v4.4s, v16.4s 2242 zip1 v4.4s, v16.4s, v4.4s 2243 ushr v16.4s, v5.4s, #12 2244 shl v5.4s, v5.4s, #20 2245 ext v21.16b, v4.16b, v21.16b, #8 2246 orr v4.16b, v5.16b, v16.16b 2247 ext v16.16b, v20.16b, v20.16b, #4 2248 mov v23.16b, v21.16b 2249 add v5.4s, v4.4s, v21.4s 2250 mov v23.s[1], v7.s[2] 2251 add v5.4s, v5.4s, v17.4s 2252 eor v17.16b, v19.16b, v5.16b 2253 uzp1 v19.4s, v16.4s, v16.4s 2254 tbl v17.16b, { v17.16b }, v2.16b 2255 ext v19.16b, v19.16b, v16.16b, #8 2256 add v18.4s, v18.4s, v17.4s 2257 uzp2 v19.4s, v19.4s, v6.4s 2258 eor v4.16b, v4.16b, v18.16b 2259 add v5.4s, v5.4s, v19.4s 2260 ext v19.16b, v19.16b, v19.16b, #4 2261 ushr v20.4s, v4.4s, #7 2262 shl v4.4s, v4.4s, #25 2263 ext v5.16b, v5.16b, v5.16b, #4 2264 orr v20.16b, v4.16b, v20.16b 2265 ext v4.16b, v17.16b, v17.16b, #8 2266 add v17.4s, v5.4s, v20.4s 2267 ext v5.16b, v18.16b, v18.16b, #12 2268 eor v4.16b, v17.16b, v4.16b 2269 tbl v18.16b, { v4.16b }, v0.16b 2270 ext v4.16b, v16.16b, v16.16b, #12 2271 add v22.4s, v5.4s, v18.4s 2272 ext v4.16b, v16.16b, v4.16b, #12 2273 eor v5.16b, v20.16b, v22.16b 2274 rev64 v16.4s, v4.4s 2275 ushr v20.4s, v5.4s, #12 2276 shl v24.4s, v5.4s, #20 2277 trn2 v5.4s, v16.4s, v23.4s 2278 orr v16.16b, v24.16b, v20.16b 2279 add v20.4s, v16.4s, v5.4s 2280 add v17.4s, v20.4s, v17.4s 2281 zip1 v20.2d, v21.2d, v6.2d 2282 zip2 v6.4s, v6.4s, v21.4s 2283 eor v18.16b, v18.16b, v17.16b 2284 mov v20.s[3], v7.s[3] 2285 ext v17.16b, v17.16b, v17.16b, #12 2286 zip1 v21.4s, v6.4s, v7.4s 2287 tbl v18.16b, { v18.16b }, v2.16b 2288 ext v24.16b, v20.16b, v20.16b, #12 2289 zip1 v6.4s, v7.4s, v6.4s 2290 add v22.4s, v22.4s, v18.4s 2291 ext v18.16b, v18.16b, v18.16b, #8 2292 ext v6.16b, v6.16b, v21.16b, #8 2293 eor v16.16b, v16.16b, v22.16b 2294 ext v22.16b, v22.16b, v22.16b, #4 2295 zip1 v5.2d, v6.2d, v5.2d 2296 zip2 v4.4s, v4.4s, v6.4s 2297 ushr v25.4s, v16.4s, #7 2298 shl v26.4s, v16.4s, #25 2299 uzp1 v16.4s, v20.4s, v24.4s 2300 orr v20.16b, v26.16b, v25.16b 2301 mov v5.s[3], v16.s[3] 2302 add v24.4s, v20.4s, v16.4s 2303 add v17.4s, v24.4s, v17.4s 2304 eor v18.16b, v17.16b, v18.16b 2305 tbl v18.16b, { v18.16b }, v0.16b 2306 add v22.4s, v22.4s, v18.4s 2307 eor v20.16b, v20.16b, v22.16b 2308 ushr v7.4s, v20.4s, #12 2309 shl v20.4s, v20.4s, #20 2310 orr v7.16b, v20.16b, v7.16b 2311 add v20.4s, v7.4s, v6.4s 2312 add v17.4s, v20.4s, v17.4s 2313 ext v20.16b, v19.16b, v19.16b, #8 2314 eor v18.16b, v18.16b, v17.16b 2315 ext v17.16b, v17.16b, v17.16b, #4 2316 tbl v18.16b, { v18.16b }, v2.16b 2317 add v21.4s, v22.4s, v18.4s 2318 uzp2 v22.4s, v20.4s, v23.4s 2319 ext v18.16b, v18.16b, v18.16b, #8 2320 eor v7.16b, v7.16b, v21.16b 2321 ext v20.16b, v22.16b, v20.16b, #4 2322 ushr v22.4s, v7.4s, #7 2323 shl v7.4s, v7.4s, #25 2324 add v17.4s, v17.4s, v20.4s 2325 ext v20.16b, v21.16b, v21.16b, #12 2326 ext v21.16b, v19.16b, v19.16b, #12 2327 orr v7.16b, v7.16b, v22.16b 2328 ext v19.16b, v19.16b, v21.16b, #12 2329 add v17.4s, v17.4s, v7.4s 2330 mov v21.16b, v6.16b 2331 rev64 v19.4s, v19.4s 2332 eor v18.16b, v17.16b, v18.16b 2333 mov v21.s[1], v16.s[2] 2334 tbl v18.16b, { v18.16b }, v0.16b 2335 trn2 v19.4s, v19.4s, v21.4s 2336 add v20.4s, v20.4s, v18.4s 2337 eor v7.16b, v7.16b, v20.16b 2338 ushr v22.4s, v7.4s, #12 2339 shl v7.4s, v7.4s, #20 2340 orr v7.16b, v7.16b, v22.16b 2341 add v19.4s, v7.4s, v19.4s 2342 add v17.4s, v19.4s, v17.4s 2343 eor v18.16b, v18.16b, v17.16b 2344 ext v17.16b, v17.16b, v17.16b, #12 2345 tbl v18.16b, { v18.16b }, v2.16b 2346 add v19.4s, v20.4s, v18.4s 2347 ext v20.16b, v5.16b, v5.16b, #12 2348 ext v18.16b, v18.16b, v18.16b, #8 2349 eor v7.16b, v7.16b, v19.16b 2350 uzp1 v5.4s, v5.4s, v20.4s 2351 ushr v21.4s, v7.4s, #7 2352 shl v7.4s, v7.4s, #25 2353 orr v7.16b, v7.16b, v21.16b 2354 add v5.4s, v7.4s, v5.4s 2355 add v5.4s, v5.4s, v17.4s 2356 eor v17.16b, v5.16b, v18.16b 2357 ext v18.16b, v19.16b, v19.16b, #4 2358 tbl v17.16b, { v17.16b }, v0.16b 2359 add v18.4s, v18.4s, v17.4s 2360 eor v6.16b, v7.16b, v18.16b 2361 zip1 v7.4s, v4.4s, v16.4s 2362 zip1 v4.4s, v16.4s, v4.4s 2363 ushr v16.4s, v6.4s, #12 2364 shl v6.4s, v6.4s, #20 2365 ext v4.16b, v4.16b, v7.16b, #8 2366 orr v6.16b, v6.16b, v16.16b 2367 add v4.4s, v6.4s, v4.4s 2368 add v4.4s, v4.4s, v5.4s 2369 eor v5.16b, v17.16b, v4.16b 2370 ext v4.16b, v4.16b, v4.16b, #4 2371 tbl v5.16b, { v5.16b }, v2.16b 2372 add v7.4s, v18.4s, v5.4s 2373 eor v6.16b, v6.16b, v7.16b 2374 ext v7.16b, v7.16b, v7.16b, #12 2375 ushr v16.4s, v6.4s, #7 2376 shl v6.4s, v6.4s, #25 2377 orr v6.16b, v6.16b, v16.16b 2378 ext v16.16b, v5.16b, v5.16b, #8 2379 eor v5.16b, v4.16b, v7.16b 2380 eor v4.16b, v6.16b, v16.16b 2381.LBB3_11: 2382 subs x13, x15, #1 2383 b.eq .LBB3_9 2384 cbnz x15, .LBB3_10 2385 add x4, x4, x12 2386 add x0, x0, #8 2387 subs x1, x1, #1 2388 stp q5, q4, [x8], #32 2389 b.ne .LBB3_8 2390.LBB3_14: 2391 add sp, sp, #368 2392 ldp x20, x19, [sp, #128] 2393 ldp x22, x21, [sp, #112] 2394 ldp x24, x23, [sp, #96] 2395 ldp x26, x25, [sp, #80] 2396 ldp x29, x27, [sp, #64] 2397 ldp d9, d8, [sp, #48] 2398 ldp d11, d10, [sp, #32] 2399 ldp d13, d12, [sp, #16] 2400 ldp d15, d14, [sp], #144 2401 ret 2402.Lfunc_end3: 2403 .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41 2404 .cfi_endproc 2405 .section ".note.GNU-stack","",@progbits 2406#endif 2407