1// SPDX-License-Identifier: CDDL-1.0 2/* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23/* 24 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 25 * Copyright (c) 2019-2022 Samuel Neves 26 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de> 27 * 28 * This is converted assembly: SSE4.1 -> ARMv8-A 29 * Used tools: SIMDe https://github.com/simd-everywhere/simde 30 * 31 * Should work on FreeBSD, Linux and macOS 32 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh 33 */ 34 35#if defined(__aarch64__) 36 37/* make gcc <= 9 happy */ 38#if !defined(LD_VERSION) || LD_VERSION >= 233010000 39#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state 40#else 41#define CFI_NEGATE_RA_STATE 42#endif 43 44 .text 45 .section .note.gnu.property,"a",@note 46 .p2align 3 47 .word 4 48 .word 16 49 .word 5 50 .asciz "GNU" 51 .word 3221225472 52 .word 4 53 .word 3 54 .word 0 55.Lsec_end0: 56 .text 57 .globl zfs_blake3_compress_in_place_sse41 58 .p2align 2 59 .type zfs_blake3_compress_in_place_sse41,@function 60zfs_blake3_compress_in_place_sse41: 61 .cfi_startproc 62 hint #25 63 CFI_NEGATE_RA_STATE 64 sub sp, sp, #96 65 stp x29, x30, [sp, #64] 66 add x29, sp, #64 67 str x19, [sp, #80] 68 .cfi_def_cfa w29, 32 69 .cfi_offset w19, -16 70 .cfi_offset w30, -24 71 .cfi_offset w29, -32 72 mov x19, x0 73 mov w5, w4 74 mov x4, x3 75 mov w3, w2 76 mov x2, x1 77 mov x0, sp 78 mov x1, x19 79 bl compress_pre 80 ldp q0, q1, [sp] 81 ldp q2, q3, [sp, #32] 82 eor v0.16b, v2.16b, v0.16b 83 eor v1.16b, v3.16b, v1.16b 84 ldp x29, x30, [sp, #64] 85 stp q0, q1, [x19] 86 ldr x19, [sp, #80] 87 add sp, sp, #96 88 hint #29 89 ret 90.Lfunc_end0: 91 .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41 92 .cfi_endproc 93 94 .section .rodata.cst16,"aM",@progbits,16 95 .p2align 4 96.LCPI1_0: 97 .xword -4942790177982912921 98 .xword -6534734903820487822 99.LCPI1_1: 100 .byte 2 101 .byte 3 102 .byte 0 103 .byte 1 104 .byte 6 105 .byte 7 106 .byte 4 107 .byte 5 108 .byte 10 109 .byte 11 110 .byte 8 111 .byte 9 112 .byte 14 113 .byte 15 114 .byte 12 115 .byte 13 116.LCPI1_2: 117 .byte 1 118 .byte 2 119 .byte 3 120 .byte 0 121 .byte 5 122 .byte 6 123 .byte 7 124 .byte 4 125 .byte 9 126 .byte 10 127 .byte 11 128 .byte 8 129 .byte 13 130 .byte 14 131 .byte 15 132 .byte 12 133 .text 134 .p2align 2 135 .type compress_pre,@function 136compress_pre: 137 .cfi_startproc 138 hint #34 139 fmov s1, w3 140 movi d0, #0x0000ff000000ff 141 ldr q2, [x1] 142 adrp x8, .LCPI1_0 143 mov v1.s[1], w5 144 str q2, [x0] 145 ldr q4, [x8, :lo12:.LCPI1_0] 146 ldr q5, [x1, #16] 147 adrp x8, .LCPI1_1 148 and v0.8b, v1.8b, v0.8b 149 fmov d1, x4 150 stp q5, q4, [x0, #16] 151 mov v1.d[1], v0.d[0] 152 str q1, [x0, #48] 153 ldp q6, q7, [x2] 154 uzp1 v3.4s, v6.4s, v7.4s 155 add v0.4s, v2.4s, v3.4s 156 uzp2 v2.4s, v6.4s, v7.4s 157 add v16.4s, v0.4s, v5.4s 158 ldr q0, [x8, :lo12:.LCPI1_1] 159 adrp x8, .LCPI1_2 160 eor v1.16b, v16.16b, v1.16b 161 add v7.4s, v16.4s, v2.4s 162 tbl v1.16b, { v1.16b }, v0.16b 163 add v4.4s, v1.4s, v4.4s 164 eor v5.16b, v4.16b, v5.16b 165 ushr v6.4s, v5.4s, #12 166 shl v5.4s, v5.4s, #20 167 orr v5.16b, v5.16b, v6.16b 168 add v6.4s, v7.4s, v5.4s 169 eor v7.16b, v1.16b, v6.16b 170 ldr q1, [x8, :lo12:.LCPI1_2] 171 add x8, x2, #32 172 tbl v7.16b, { v7.16b }, v1.16b 173 ld2 { v16.4s, v17.4s }, [x8] 174 add v4.4s, v4.4s, v7.4s 175 ext v7.16b, v7.16b, v7.16b, #8 176 add v6.4s, v6.4s, v16.4s 177 eor v5.16b, v4.16b, v5.16b 178 ext v4.16b, v4.16b, v4.16b, #4 179 ext v16.16b, v16.16b, v16.16b, #12 180 ext v6.16b, v6.16b, v6.16b, #12 181 ushr v18.4s, v5.4s, #7 182 shl v5.4s, v5.4s, #25 183 orr v5.16b, v5.16b, v18.16b 184 ext v18.16b, v17.16b, v17.16b, #12 185 add v6.4s, v6.4s, v5.4s 186 mov v17.16b, v18.16b 187 eor v7.16b, v7.16b, v6.16b 188 add v6.4s, v6.4s, v18.4s 189 mov v17.s[1], v16.s[2] 190 tbl v7.16b, { v7.16b }, v0.16b 191 add v4.4s, v4.4s, v7.4s 192 eor v5.16b, v4.16b, v5.16b 193 ushr v19.4s, v5.4s, #12 194 shl v5.4s, v5.4s, #20 195 orr v5.16b, v5.16b, v19.16b 196 uzp1 v19.4s, v3.4s, v3.4s 197 add v6.4s, v6.4s, v5.4s 198 ext v19.16b, v19.16b, v3.16b, #8 199 eor v7.16b, v7.16b, v6.16b 200 uzp2 v19.4s, v19.4s, v2.4s 201 tbl v7.16b, { v7.16b }, v1.16b 202 add v6.4s, v6.4s, v19.4s 203 add v4.4s, v4.4s, v7.4s 204 ext v6.16b, v6.16b, v6.16b, #4 205 ext v7.16b, v7.16b, v7.16b, #8 206 eor v5.16b, v4.16b, v5.16b 207 ext v4.16b, v4.16b, v4.16b, #12 208 ushr v20.4s, v5.4s, #7 209 shl v5.4s, v5.4s, #25 210 orr v5.16b, v5.16b, v20.16b 211 ext v20.16b, v3.16b, v3.16b, #12 212 add v6.4s, v6.4s, v5.4s 213 ext v3.16b, v3.16b, v20.16b, #12 214 eor v7.16b, v7.16b, v6.16b 215 rev64 v3.4s, v3.4s 216 tbl v7.16b, { v7.16b }, v0.16b 217 trn2 v3.4s, v3.4s, v17.4s 218 add v4.4s, v4.4s, v7.4s 219 add v6.4s, v6.4s, v3.4s 220 eor v5.16b, v4.16b, v5.16b 221 ushr v17.4s, v5.4s, #12 222 shl v5.4s, v5.4s, #20 223 orr v5.16b, v5.16b, v17.16b 224 zip1 v17.2d, v18.2d, v2.2d 225 zip2 v2.4s, v2.4s, v18.4s 226 add v6.4s, v6.4s, v5.4s 227 mov v17.s[3], v16.s[3] 228 zip1 v18.4s, v2.4s, v16.4s 229 zip1 v2.4s, v16.4s, v2.4s 230 eor v7.16b, v7.16b, v6.16b 231 ext v6.16b, v6.16b, v6.16b, #12 232 ext v16.16b, v2.16b, v18.16b, #8 233 tbl v7.16b, { v7.16b }, v1.16b 234 add v20.4s, v4.4s, v7.4s 235 ext v4.16b, v17.16b, v17.16b, #12 236 ext v7.16b, v7.16b, v7.16b, #8 237 eor v5.16b, v20.16b, v5.16b 238 uzp1 v4.4s, v17.4s, v4.4s 239 ushr v17.4s, v5.4s, #7 240 shl v5.4s, v5.4s, #25 241 add v6.4s, v6.4s, v4.4s 242 orr v5.16b, v5.16b, v17.16b 243 ext v17.16b, v20.16b, v20.16b, #4 244 add v6.4s, v6.4s, v5.4s 245 eor v7.16b, v7.16b, v6.16b 246 add v6.4s, v6.4s, v16.4s 247 tbl v7.16b, { v7.16b }, v0.16b 248 add v17.4s, v17.4s, v7.4s 249 eor v5.16b, v17.16b, v5.16b 250 ushr v2.4s, v5.4s, #12 251 shl v5.4s, v5.4s, #20 252 orr v2.16b, v5.16b, v2.16b 253 add v5.4s, v6.4s, v2.4s 254 ext v6.16b, v19.16b, v19.16b, #4 255 eor v7.16b, v7.16b, v5.16b 256 uzp1 v18.4s, v6.4s, v6.4s 257 tbl v7.16b, { v7.16b }, v1.16b 258 ext v18.16b, v18.16b, v6.16b, #8 259 add v17.4s, v17.4s, v7.4s 260 uzp2 v18.4s, v18.4s, v3.4s 261 ext v7.16b, v7.16b, v7.16b, #8 262 eor v2.16b, v17.16b, v2.16b 263 add v5.4s, v5.4s, v18.4s 264 ext v17.16b, v17.16b, v17.16b, #12 265 ushr v19.4s, v2.4s, #7 266 shl v2.4s, v2.4s, #25 267 ext v5.16b, v5.16b, v5.16b, #4 268 orr v2.16b, v2.16b, v19.16b 269 ext v19.16b, v6.16b, v6.16b, #12 270 add v5.4s, v5.4s, v2.4s 271 ext v6.16b, v6.16b, v19.16b, #12 272 mov v19.16b, v16.16b 273 eor v7.16b, v7.16b, v5.16b 274 rev64 v6.4s, v6.4s 275 mov v19.s[1], v4.s[2] 276 tbl v7.16b, { v7.16b }, v0.16b 277 add v17.4s, v17.4s, v7.4s 278 eor v20.16b, v17.16b, v2.16b 279 trn2 v2.4s, v6.4s, v19.4s 280 ushr v6.4s, v20.4s, #12 281 shl v19.4s, v20.4s, #20 282 add v5.4s, v5.4s, v2.4s 283 orr v6.16b, v19.16b, v6.16b 284 add v19.4s, v5.4s, v6.4s 285 eor v5.16b, v7.16b, v19.16b 286 zip1 v7.2d, v16.2d, v3.2d 287 zip2 v3.4s, v3.4s, v16.4s 288 tbl v20.16b, { v5.16b }, v1.16b 289 mov v7.s[3], v4.s[3] 290 add v17.4s, v17.4s, v20.4s 291 ext v5.16b, v7.16b, v7.16b, #12 292 eor v6.16b, v17.16b, v6.16b 293 uzp1 v5.4s, v7.4s, v5.4s 294 ext v7.16b, v19.16b, v19.16b, #12 295 ext v17.16b, v17.16b, v17.16b, #4 296 ushr v19.4s, v6.4s, #7 297 shl v6.4s, v6.4s, #25 298 add v7.4s, v7.4s, v5.4s 299 orr v6.16b, v6.16b, v19.16b 300 ext v19.16b, v20.16b, v20.16b, #8 301 add v7.4s, v7.4s, v6.4s 302 eor v19.16b, v19.16b, v7.16b 303 tbl v19.16b, { v19.16b }, v0.16b 304 add v16.4s, v17.4s, v19.4s 305 zip1 v17.4s, v3.4s, v4.4s 306 zip1 v3.4s, v4.4s, v3.4s 307 eor v4.16b, v16.16b, v6.16b 308 ext v17.16b, v3.16b, v17.16b, #8 309 ushr v3.4s, v4.4s, #12 310 shl v4.4s, v4.4s, #20 311 add v6.4s, v7.4s, v17.4s 312 orr v3.16b, v4.16b, v3.16b 313 add v4.4s, v6.4s, v3.4s 314 ext v6.16b, v18.16b, v18.16b, #4 315 eor v7.16b, v19.16b, v4.16b 316 uzp1 v18.4s, v6.4s, v6.4s 317 tbl v7.16b, { v7.16b }, v1.16b 318 ext v18.16b, v18.16b, v6.16b, #8 319 add v16.4s, v16.4s, v7.4s 320 uzp2 v18.4s, v18.4s, v2.4s 321 ext v7.16b, v7.16b, v7.16b, #8 322 eor v3.16b, v16.16b, v3.16b 323 add v4.4s, v4.4s, v18.4s 324 ext v16.16b, v16.16b, v16.16b, #12 325 ushr v19.4s, v3.4s, #7 326 shl v3.4s, v3.4s, #25 327 ext v4.16b, v4.16b, v4.16b, #4 328 orr v3.16b, v3.16b, v19.16b 329 ext v19.16b, v6.16b, v6.16b, #12 330 add v4.4s, v4.4s, v3.4s 331 ext v6.16b, v6.16b, v19.16b, #12 332 mov v19.16b, v17.16b 333 eor v7.16b, v7.16b, v4.16b 334 rev64 v6.4s, v6.4s 335 mov v19.s[1], v5.s[2] 336 tbl v7.16b, { v7.16b }, v0.16b 337 add v16.4s, v16.4s, v7.4s 338 eor v20.16b, v16.16b, v3.16b 339 trn2 v3.4s, v6.4s, v19.4s 340 ushr v6.4s, v20.4s, #12 341 shl v19.4s, v20.4s, #20 342 add v4.4s, v4.4s, v3.4s 343 orr v6.16b, v19.16b, v6.16b 344 zip1 v19.2d, v17.2d, v2.2d 345 zip2 v2.4s, v2.4s, v17.4s 346 add v4.4s, v4.4s, v6.4s 347 mov v19.s[3], v5.s[3] 348 zip1 v17.4s, v2.4s, v5.4s 349 zip1 v2.4s, v5.4s, v2.4s 350 eor v7.16b, v7.16b, v4.16b 351 ext v20.16b, v19.16b, v19.16b, #12 352 ext v4.16b, v4.16b, v4.16b, #12 353 ext v2.16b, v2.16b, v17.16b, #8 354 tbl v7.16b, { v7.16b }, v1.16b 355 add v16.4s, v16.4s, v7.4s 356 ext v7.16b, v7.16b, v7.16b, #8 357 eor v21.16b, v16.16b, v6.16b 358 uzp1 v6.4s, v19.4s, v20.4s 359 ext v16.16b, v16.16b, v16.16b, #4 360 ushr v19.4s, v21.4s, #7 361 shl v20.4s, v21.4s, #25 362 add v4.4s, v4.4s, v6.4s 363 orr v19.16b, v20.16b, v19.16b 364 add v4.4s, v4.4s, v19.4s 365 eor v7.16b, v7.16b, v4.16b 366 add v4.4s, v4.4s, v2.4s 367 tbl v7.16b, { v7.16b }, v0.16b 368 add v16.4s, v16.4s, v7.4s 369 eor v5.16b, v16.16b, v19.16b 370 ushr v17.4s, v5.4s, #12 371 shl v5.4s, v5.4s, #20 372 orr v5.16b, v5.16b, v17.16b 373 ext v17.16b, v18.16b, v18.16b, #4 374 add v4.4s, v4.4s, v5.4s 375 uzp1 v18.4s, v17.4s, v17.4s 376 eor v7.16b, v7.16b, v4.16b 377 ext v18.16b, v18.16b, v17.16b, #8 378 tbl v7.16b, { v7.16b }, v1.16b 379 uzp2 v18.4s, v18.4s, v3.4s 380 add v16.4s, v16.4s, v7.4s 381 add v4.4s, v4.4s, v18.4s 382 ext v7.16b, v7.16b, v7.16b, #8 383 eor v5.16b, v16.16b, v5.16b 384 ext v4.16b, v4.16b, v4.16b, #4 385 ext v16.16b, v16.16b, v16.16b, #12 386 ushr v19.4s, v5.4s, #7 387 shl v5.4s, v5.4s, #25 388 orr v5.16b, v5.16b, v19.16b 389 add v19.4s, v4.4s, v5.4s 390 eor v4.16b, v7.16b, v19.16b 391 ext v7.16b, v17.16b, v17.16b, #12 392 tbl v20.16b, { v4.16b }, v0.16b 393 ext v4.16b, v17.16b, v7.16b, #12 394 mov v7.16b, v2.16b 395 add v16.4s, v16.4s, v20.4s 396 rev64 v4.4s, v4.4s 397 mov v7.s[1], v6.s[2] 398 eor v5.16b, v16.16b, v5.16b 399 trn2 v4.4s, v4.4s, v7.4s 400 ushr v7.4s, v5.4s, #12 401 shl v5.4s, v5.4s, #20 402 add v17.4s, v19.4s, v4.4s 403 zip1 v19.2d, v2.2d, v3.2d 404 zip2 v2.4s, v3.4s, v2.4s 405 orr v5.16b, v5.16b, v7.16b 406 mov v19.s[3], v6.s[3] 407 add v7.4s, v17.4s, v5.4s 408 eor v17.16b, v20.16b, v7.16b 409 ext v20.16b, v19.16b, v19.16b, #12 410 ext v7.16b, v7.16b, v7.16b, #12 411 tbl v17.16b, { v17.16b }, v1.16b 412 add v16.4s, v16.4s, v17.4s 413 ext v17.16b, v17.16b, v17.16b, #8 414 eor v21.16b, v16.16b, v5.16b 415 uzp1 v5.4s, v19.4s, v20.4s 416 ext v16.16b, v16.16b, v16.16b, #4 417 ushr v19.4s, v21.4s, #7 418 shl v20.4s, v21.4s, #25 419 add v7.4s, v7.4s, v5.4s 420 orr v19.16b, v20.16b, v19.16b 421 add v7.4s, v7.4s, v19.4s 422 eor v17.16b, v17.16b, v7.16b 423 tbl v17.16b, { v17.16b }, v0.16b 424 add v3.4s, v16.4s, v17.4s 425 zip1 v16.4s, v2.4s, v6.4s 426 zip1 v2.4s, v6.4s, v2.4s 427 eor v6.16b, v3.16b, v19.16b 428 ext v16.16b, v2.16b, v16.16b, #8 429 ushr v2.4s, v6.4s, #12 430 shl v6.4s, v6.4s, #20 431 add v7.4s, v7.4s, v16.4s 432 orr v2.16b, v6.16b, v2.16b 433 add v6.4s, v7.4s, v2.4s 434 ext v7.16b, v18.16b, v18.16b, #4 435 eor v17.16b, v17.16b, v6.16b 436 uzp1 v18.4s, v7.4s, v7.4s 437 tbl v17.16b, { v17.16b }, v1.16b 438 ext v18.16b, v18.16b, v7.16b, #8 439 add v3.4s, v3.4s, v17.4s 440 uzp2 v18.4s, v18.4s, v4.4s 441 eor v2.16b, v3.16b, v2.16b 442 add v6.4s, v6.4s, v18.4s 443 ext v3.16b, v3.16b, v3.16b, #12 444 ext v18.16b, v18.16b, v18.16b, #4 445 ushr v19.4s, v2.4s, #7 446 shl v2.4s, v2.4s, #25 447 ext v6.16b, v6.16b, v6.16b, #4 448 orr v19.16b, v2.16b, v19.16b 449 ext v2.16b, v17.16b, v17.16b, #8 450 ext v17.16b, v7.16b, v7.16b, #12 451 add v6.4s, v6.4s, v19.4s 452 eor v2.16b, v2.16b, v6.16b 453 tbl v20.16b, { v2.16b }, v0.16b 454 ext v2.16b, v7.16b, v17.16b, #12 455 mov v7.16b, v16.16b 456 add v17.4s, v3.4s, v20.4s 457 rev64 v3.4s, v2.4s 458 mov v7.s[1], v5.s[2] 459 eor v19.16b, v17.16b, v19.16b 460 trn2 v3.4s, v3.4s, v7.4s 461 ushr v21.4s, v19.4s, #12 462 shl v19.4s, v19.4s, #20 463 add v6.4s, v6.4s, v3.4s 464 orr v19.16b, v19.16b, v21.16b 465 add v21.4s, v6.4s, v19.4s 466 eor v6.16b, v20.16b, v21.16b 467 zip1 v20.2d, v16.2d, v4.2d 468 zip2 v4.4s, v4.4s, v16.4s 469 tbl v22.16b, { v6.16b }, v1.16b 470 mov v20.s[3], v5.s[3] 471 add v17.4s, v17.4s, v22.4s 472 ext v6.16b, v20.16b, v20.16b, #12 473 eor v19.16b, v17.16b, v19.16b 474 uzp1 v6.4s, v20.4s, v6.4s 475 ext v20.16b, v21.16b, v21.16b, #12 476 ext v17.16b, v17.16b, v17.16b, #4 477 ushr v21.4s, v19.4s, #7 478 shl v19.4s, v19.4s, #25 479 add v20.4s, v20.4s, v6.4s 480 orr v19.16b, v19.16b, v21.16b 481 ext v21.16b, v22.16b, v22.16b, #8 482 add v20.4s, v20.4s, v19.4s 483 eor v21.16b, v21.16b, v20.16b 484 tbl v21.16b, { v21.16b }, v0.16b 485 add v16.4s, v17.4s, v21.4s 486 zip1 v17.4s, v4.4s, v5.4s 487 zip1 v4.4s, v5.4s, v4.4s 488 eor v5.16b, v16.16b, v19.16b 489 ext v4.16b, v4.16b, v17.16b, #8 490 ushr v17.4s, v5.4s, #12 491 shl v5.4s, v5.4s, #20 492 add v19.4s, v20.4s, v4.4s 493 ext v20.16b, v18.16b, v18.16b, #8 494 zip1 v3.2d, v4.2d, v3.2d 495 orr v5.16b, v5.16b, v17.16b 496 zip2 v2.4s, v2.4s, v4.4s 497 uzp2 v7.4s, v20.4s, v7.4s 498 mov v3.s[3], v6.s[3] 499 add v17.4s, v19.4s, v5.4s 500 ext v7.16b, v7.16b, v20.16b, #4 501 eor v19.16b, v21.16b, v17.16b 502 ext v17.16b, v17.16b, v17.16b, #4 503 tbl v19.16b, { v19.16b }, v1.16b 504 add v7.4s, v17.4s, v7.4s 505 add v16.4s, v16.4s, v19.4s 506 ext v17.16b, v19.16b, v19.16b, #8 507 ext v19.16b, v18.16b, v18.16b, #12 508 eor v5.16b, v16.16b, v5.16b 509 ext v16.16b, v16.16b, v16.16b, #12 510 ext v18.16b, v18.16b, v19.16b, #12 511 mov v19.16b, v4.16b 512 ushr v20.4s, v5.4s, #7 513 shl v5.4s, v5.4s, #25 514 rev64 v18.4s, v18.4s 515 mov v19.s[1], v6.s[2] 516 orr v5.16b, v5.16b, v20.16b 517 trn2 v18.4s, v18.4s, v19.4s 518 add v7.4s, v5.4s, v7.4s 519 eor v17.16b, v17.16b, v7.16b 520 add v7.4s, v7.4s, v18.4s 521 ext v18.16b, v3.16b, v3.16b, #12 522 tbl v17.16b, { v17.16b }, v0.16b 523 uzp1 v3.4s, v3.4s, v18.4s 524 add v16.4s, v16.4s, v17.4s 525 eor v5.16b, v16.16b, v5.16b 526 ushr v19.4s, v5.4s, #12 527 shl v5.4s, v5.4s, #20 528 orr v5.16b, v5.16b, v19.16b 529 add v7.4s, v7.4s, v5.4s 530 eor v17.16b, v17.16b, v7.16b 531 ext v7.16b, v7.16b, v7.16b, #12 532 tbl v17.16b, { v17.16b }, v1.16b 533 add v3.4s, v7.4s, v3.4s 534 add v16.4s, v16.4s, v17.4s 535 ext v7.16b, v17.16b, v17.16b, #8 536 eor v5.16b, v16.16b, v5.16b 537 ext v16.16b, v16.16b, v16.16b, #4 538 ushr v18.4s, v5.4s, #7 539 shl v5.4s, v5.4s, #25 540 orr v5.16b, v5.16b, v18.16b 541 add v3.4s, v3.4s, v5.4s 542 eor v7.16b, v7.16b, v3.16b 543 tbl v0.16b, { v7.16b }, v0.16b 544 zip1 v7.4s, v2.4s, v6.4s 545 zip1 v2.4s, v6.4s, v2.4s 546 add v4.4s, v16.4s, v0.4s 547 ext v2.16b, v2.16b, v7.16b, #8 548 eor v5.16b, v4.16b, v5.16b 549 add v2.4s, v3.4s, v2.4s 550 ushr v6.4s, v5.4s, #12 551 shl v5.4s, v5.4s, #20 552 orr v3.16b, v5.16b, v6.16b 553 add v2.4s, v2.4s, v3.4s 554 eor v0.16b, v0.16b, v2.16b 555 ext v2.16b, v2.16b, v2.16b, #4 556 tbl v0.16b, { v0.16b }, v1.16b 557 add v1.4s, v4.4s, v0.4s 558 ext v0.16b, v0.16b, v0.16b, #8 559 eor v3.16b, v1.16b, v3.16b 560 ext v1.16b, v1.16b, v1.16b, #12 561 ushr v4.4s, v3.4s, #7 562 shl v3.4s, v3.4s, #25 563 stp q1, q0, [x0, #32] 564 orr v3.16b, v3.16b, v4.16b 565 stp q2, q3, [x0] 566 ret 567.Lfunc_end1: 568 .size compress_pre, .Lfunc_end1-compress_pre 569 .cfi_endproc 570 571 .globl zfs_blake3_compress_xof_sse41 572 .p2align 2 573 .type zfs_blake3_compress_xof_sse41,@function 574zfs_blake3_compress_xof_sse41: 575 .cfi_startproc 576 hint #25 577 CFI_NEGATE_RA_STATE 578 sub sp, sp, #96 579 stp x29, x30, [sp, #64] 580 add x29, sp, #64 581 stp x20, x19, [sp, #80] 582 .cfi_def_cfa w29, 32 583 .cfi_offset w19, -8 584 .cfi_offset w20, -16 585 .cfi_offset w30, -24 586 .cfi_offset w29, -32 587 mov x20, x0 588 mov x19, x5 589 mov w5, w4 590 mov x4, x3 591 mov w3, w2 592 mov x2, x1 593 mov x0, sp 594 mov x1, x20 595 bl compress_pre 596 ldp q0, q1, [sp] 597 ldp q2, q3, [sp, #32] 598 eor v0.16b, v2.16b, v0.16b 599 eor v1.16b, v3.16b, v1.16b 600 ldp x29, x30, [sp, #64] 601 stp q0, q1, [x19] 602 ldr q0, [x20] 603 eor v0.16b, v0.16b, v2.16b 604 str q0, [x19, #32] 605 ldr q0, [x20, #16] 606 eor v0.16b, v0.16b, v3.16b 607 str q0, [x19, #48] 608 ldp x20, x19, [sp, #80] 609 add sp, sp, #96 610 hint #29 611 ret 612.Lfunc_end2: 613 .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41 614 .cfi_endproc 615 616 .section .rodata.cst16,"aM",@progbits,16 617 .p2align 4 618.LCPI3_0: 619 .word 0 620 .word 1 621 .word 2 622 .word 3 623.LCPI3_1: 624 .byte 2 625 .byte 3 626 .byte 0 627 .byte 1 628 .byte 6 629 .byte 7 630 .byte 4 631 .byte 5 632 .byte 10 633 .byte 11 634 .byte 8 635 .byte 9 636 .byte 14 637 .byte 15 638 .byte 12 639 .byte 13 640.LCPI3_2: 641 .byte 1 642 .byte 2 643 .byte 3 644 .byte 0 645 .byte 5 646 .byte 6 647 .byte 7 648 .byte 4 649 .byte 9 650 .byte 10 651 .byte 11 652 .byte 8 653 .byte 13 654 .byte 14 655 .byte 15 656 .byte 12 657.LCPI3_3: 658 .word 1779033703 659 .word 3144134277 660 .word 1013904242 661 .word 2773480762 662 .text 663 .globl zfs_blake3_hash_many_sse41 664 .p2align 2 665 .type zfs_blake3_hash_many_sse41,@function 666zfs_blake3_hash_many_sse41: 667 .cfi_startproc 668 hint #34 669 stp d15, d14, [sp, #-144]! 670 stp d13, d12, [sp, #16] 671 stp d11, d10, [sp, #32] 672 stp d9, d8, [sp, #48] 673 stp x29, x27, [sp, #64] 674 stp x26, x25, [sp, #80] 675 stp x24, x23, [sp, #96] 676 stp x22, x21, [sp, #112] 677 stp x20, x19, [sp, #128] 678 sub sp, sp, #368 679 .cfi_def_cfa_offset 512 680 .cfi_offset w19, -8 681 .cfi_offset w20, -16 682 .cfi_offset w21, -24 683 .cfi_offset w22, -32 684 .cfi_offset w23, -40 685 .cfi_offset w24, -48 686 .cfi_offset w25, -56 687 .cfi_offset w26, -64 688 .cfi_offset w27, -72 689 .cfi_offset w29, -80 690 .cfi_offset b8, -88 691 .cfi_offset b9, -96 692 .cfi_offset b10, -104 693 .cfi_offset b11, -112 694 .cfi_offset b12, -120 695 .cfi_offset b13, -128 696 .cfi_offset b14, -136 697 .cfi_offset b15, -144 698 ldr x8, [sp, #520] 699 adrp x11, .LCPI3_1 700 ldrb w9, [sp, #512] 701 adrp x10, .LCPI3_2 702 cmp x1, #4 703 b.lo .LBB3_6 704 adrp x12, .LCPI3_0 705 sbfx w13, w5, #0, #1 706 mov w15, #58983 707 mov w16, #44677 708 movk w15, #27145, lsl #16 709 movk w16, #47975, lsl #16 710 ldr q0, [x12, :lo12:.LCPI3_0] 711 dup v1.4s, w13 712 movi v13.4s, #64 713 mov w13, #62322 714 mov w14, #62778 715 orr w12, w7, w6 716 and v0.16b, v1.16b, v0.16b 717 ldr q1, [x11, :lo12:.LCPI3_1] 718 movk w13, #15470, lsl #16 719 movk w14, #42319, lsl #16 720 dup v14.4s, w15 721 stp q0, q1, [sp, #16] 722 orr v0.4s, #128, lsl #24 723 str q0, [sp] 724 dup v0.4s, w16 725 stp q0, q14, [sp, #48] 726 b .LBB3_3 727.LBB3_2: 728 zip1 v0.4s, v29.4s, v8.4s 729 add x15, x4, #4 730 zip1 v1.4s, v30.4s, v31.4s 731 tst w5, #0x1 732 zip1 v2.4s, v24.4s, v18.4s 733 csel x4, x15, x4, ne 734 zip1 v3.4s, v25.4s, v26.4s 735 add x0, x0, #32 736 zip2 v6.4s, v29.4s, v8.4s 737 sub x1, x1, #4 738 zip1 v4.2d, v0.2d, v1.2d 739 cmp x1, #3 740 zip2 v7.4s, v30.4s, v31.4s 741 zip1 v5.2d, v2.2d, v3.2d 742 zip2 v0.2d, v0.2d, v1.2d 743 zip2 v1.2d, v2.2d, v3.2d 744 zip2 v2.4s, v24.4s, v18.4s 745 zip2 v3.4s, v25.4s, v26.4s 746 stp q4, q5, [x8] 747 zip2 v4.2d, v6.2d, v7.2d 748 stp q0, q1, [x8, #32] 749 zip1 v0.2d, v6.2d, v7.2d 750 zip1 v1.2d, v2.2d, v3.2d 751 zip2 v2.2d, v2.2d, v3.2d 752 stp q0, q1, [x8, #64] 753 stp q4, q2, [x8, #96] 754 add x8, x8, #128 755 b.ls .LBB3_6 756.LBB3_3: 757 mov x15, x3 758 add x16, x3, #8 759 add x17, x3, #12 760 add x19, x3, #16 761 add x20, x3, #20 762 ld1r { v29.4s }, [x15], #4 763 ld1r { v30.4s }, [x16] 764 add x16, x3, #24 765 ld1r { v31.4s }, [x17] 766 add x17, x3, #28 767 ld1r { v24.4s }, [x19] 768 ld1r { v18.4s }, [x20] 769 ld1r { v25.4s }, [x16] 770 ld1r { v8.4s }, [x15] 771 ld1r { v26.4s }, [x17] 772 cbz x2, .LBB3_2 773 ldr q1, [sp, #16] 774 dup v0.4s, w4 775 lsr x17, x4, #32 776 mov x15, xzr 777 ldp x19, x20, [x0, #16] 778 add v1.4s, v0.4s, v1.4s 779 mov x21, x2 780 movi v0.4s, #128, lsl #24 781 mov w26, w12 782 str q1, [sp, #96] 783 eor v0.16b, v1.16b, v0.16b 784 ldr q1, [sp] 785 cmgt v0.4s, v1.4s, v0.4s 786 dup v1.4s, w17 787 ldp x16, x17, [x0] 788 sub v0.4s, v1.4s, v0.4s 789 str q0, [sp, #80] 790.LBB3_5: 791 add x23, x16, x15 792 add x24, x17, x15 793 add x22, x19, x15 794 add x25, x20, x15 795 subs x21, x21, #1 796 add x15, x15, #64 797 ldp q1, q2, [x23] 798 csel w27, w9, wzr, eq 799 orr w26, w27, w26 800 and w26, w26, #0xff 801 ldp q4, q5, [x24] 802 dup v0.4s, w26 803 mov w26, w6 804 zip1 v22.4s, v1.4s, v4.4s 805 zip2 v20.4s, v1.4s, v4.4s 806 ldp q6, q7, [x22] 807 zip1 v17.4s, v2.4s, v5.4s 808 zip2 v23.4s, v2.4s, v5.4s 809 ldp q16, q21, [x25] 810 zip1 v19.4s, v6.4s, v16.4s 811 zip2 v1.4s, v6.4s, v16.4s 812 ldp q27, q28, [x23, #32] 813 zip1 v4.4s, v7.4s, v21.4s 814 zip2 v5.4s, v7.4s, v21.4s 815 zip2 v15.2d, v17.2d, v4.2d 816 ldp q9, q10, [x24, #32] 817 mov v17.d[1], v4.d[0] 818 add v4.4s, v30.4s, v25.4s 819 zip2 v11.2d, v23.2d, v5.2d 820 zip2 v3.4s, v27.4s, v9.4s 821 zip1 v7.4s, v27.4s, v9.4s 822 ldp q12, q6, [x22, #32] 823 mov v23.d[1], v5.d[0] 824 stp q11, q3, [sp, #256] 825 add v5.4s, v31.4s, v26.4s 826 add v4.4s, v4.4s, v17.4s 827 str q23, [sp, #352] 828 ldp q16, q2, [x25, #32] 829 add v5.4s, v5.4s, v23.4s 830 zip1 v3.4s, v12.4s, v16.4s 831 eor v0.16b, v5.16b, v0.16b 832 zip1 v9.4s, v6.4s, v2.4s 833 zip2 v2.4s, v6.4s, v2.4s 834 stp q7, q3, [sp, #208] 835 zip2 v3.4s, v12.4s, v16.4s 836 zip1 v12.4s, v28.4s, v10.4s 837 zip2 v10.4s, v28.4s, v10.4s 838 stp q17, q2, [sp, #160] 839 zip2 v28.2d, v22.2d, v19.2d 840 mov v22.d[1], v19.d[0] 841 str q3, [sp, #240] 842 add v2.4s, v8.4s, v18.4s 843 eor v16.16b, v4.16b, v13.16b 844 dup v17.4s, w13 845 mov v3.16b, v22.16b 846 stp q22, q28, [sp, #320] 847 zip2 v22.2d, v20.2d, v1.2d 848 mov v20.d[1], v1.d[0] 849 add v1.4s, v29.4s, v24.4s 850 add v4.4s, v4.4s, v15.4s 851 add v5.4s, v5.4s, v11.4s 852 add v2.4s, v2.4s, v20.4s 853 stp q15, q20, [sp, #288] 854 add v1.4s, v1.4s, v3.4s 855 ldr q3, [sp, #96] 856 dup v20.4s, w14 857 mov v23.16b, v22.16b 858 mov v15.16b, v10.16b 859 eor v6.16b, v1.16b, v3.16b 860 ldr q3, [sp, #80] 861 add v1.4s, v1.4s, v28.4s 862 ldr q28, [sp, #272] 863 str q23, [sp, #128] 864 eor v7.16b, v2.16b, v3.16b 865 ldp q27, q3, [sp, #32] 866 add v2.4s, v2.4s, v22.4s 867 tbl v6.16b, { v6.16b }, v27.16b 868 tbl v7.16b, { v7.16b }, v27.16b 869 tbl v16.16b, { v16.16b }, v27.16b 870 tbl v0.16b, { v0.16b }, v27.16b 871 add v19.4s, v6.4s, v14.4s 872 add v21.4s, v7.4s, v3.4s 873 add v30.4s, v16.4s, v17.4s 874 add v31.4s, v0.4s, v20.4s 875 eor v24.16b, v19.16b, v24.16b 876 eor v17.16b, v21.16b, v18.16b 877 ushr v18.4s, v24.4s, #12 878 shl v20.4s, v24.4s, #20 879 eor v24.16b, v30.16b, v25.16b 880 eor v25.16b, v31.16b, v26.16b 881 ushr v26.4s, v17.4s, #12 882 shl v17.4s, v17.4s, #20 883 ushr v29.4s, v24.4s, #12 884 shl v24.4s, v24.4s, #20 885 ushr v8.4s, v25.4s, #12 886 shl v25.4s, v25.4s, #20 887 orr v3.16b, v20.16b, v18.16b 888 ldr q18, [x10, :lo12:.LCPI3_2] 889 orr v13.16b, v17.16b, v26.16b 890 orr v24.16b, v24.16b, v29.16b 891 orr v14.16b, v25.16b, v8.16b 892 add v8.4s, v1.4s, v3.4s 893 add v29.4s, v2.4s, v13.4s 894 add v17.4s, v4.4s, v24.4s 895 add v20.4s, v5.4s, v14.4s 896 eor v1.16b, v6.16b, v8.16b 897 eor v2.16b, v7.16b, v29.16b 898 eor v4.16b, v16.16b, v17.16b 899 eor v0.16b, v0.16b, v20.16b 900 tbl v25.16b, { v1.16b }, v18.16b 901 tbl v16.16b, { v2.16b }, v18.16b 902 tbl v6.16b, { v4.16b }, v18.16b 903 tbl v4.16b, { v0.16b }, v18.16b 904 add v19.4s, v19.4s, v25.4s 905 add v21.4s, v21.4s, v16.4s 906 add v26.4s, v30.4s, v6.4s 907 add v7.4s, v31.4s, v4.4s 908 eor v0.16b, v19.16b, v3.16b 909 eor v1.16b, v21.16b, v13.16b 910 eor v2.16b, v26.16b, v24.16b 911 eor v3.16b, v7.16b, v14.16b 912 ushr v5.4s, v0.4s, #7 913 shl v0.4s, v0.4s, #25 914 ushr v24.4s, v1.4s, #7 915 shl v1.4s, v1.4s, #25 916 ushr v30.4s, v2.4s, #7 917 shl v2.4s, v2.4s, #25 918 orr v5.16b, v0.16b, v5.16b 919 orr v0.16b, v1.16b, v24.16b 920 ushr v31.4s, v3.4s, #7 921 orr v2.16b, v2.16b, v30.16b 922 ldp q24, q30, [sp, #208] 923 shl v3.4s, v3.4s, #25 924 zip2 v14.2d, v12.2d, v9.2d 925 mov v22.16b, v24.16b 926 orr v1.16b, v3.16b, v31.16b 927 zip2 v3.2d, v24.2d, v30.2d 928 mov v24.16b, v28.16b 929 mov v22.d[1], v30.d[0] 930 ldr q30, [sp, #240] 931 mov v31.16b, v12.16b 932 stp q22, q14, [sp, #224] 933 mov v24.d[1], v30.d[0] 934 add v12.4s, v8.4s, v22.4s 935 mov v31.d[1], v9.d[0] 936 add v22.4s, v29.4s, v24.4s 937 ldr q29, [sp, #176] 938 zip2 v28.2d, v28.2d, v30.2d 939 mov v9.16b, v24.16b 940 mov v15.d[1], v29.d[0] 941 zip2 v8.2d, v10.2d, v29.2d 942 add v10.4s, v12.4s, v0.4s 943 add v22.4s, v22.4s, v2.4s 944 str q9, [sp, #144] 945 add v20.4s, v20.4s, v15.4s 946 add v17.4s, v17.4s, v31.4s 947 stp q3, q8, [sp, #192] 948 eor v4.16b, v4.16b, v10.16b 949 eor v25.16b, v25.16b, v22.16b 950 add v20.4s, v20.4s, v5.4s 951 add v17.4s, v17.4s, v1.4s 952 tbl v4.16b, { v4.16b }, v27.16b 953 tbl v25.16b, { v25.16b }, v27.16b 954 eor v6.16b, v6.16b, v20.16b 955 eor v16.16b, v16.16b, v17.16b 956 add v26.4s, v26.4s, v4.4s 957 add v7.4s, v7.4s, v25.4s 958 tbl v6.16b, { v6.16b }, v27.16b 959 tbl v16.16b, { v16.16b }, v27.16b 960 eor v0.16b, v26.16b, v0.16b 961 eor v2.16b, v7.16b, v2.16b 962 add v21.4s, v21.4s, v6.4s 963 add v19.4s, v19.4s, v16.4s 964 ushr v12.4s, v0.4s, #12 965 shl v0.4s, v0.4s, #20 966 ushr v13.4s, v2.4s, #12 967 shl v2.4s, v2.4s, #20 968 eor v5.16b, v21.16b, v5.16b 969 eor v1.16b, v19.16b, v1.16b 970 orr v0.16b, v0.16b, v12.16b 971 add v10.4s, v10.4s, v3.4s 972 orr v2.16b, v2.16b, v13.16b 973 ushr v13.4s, v5.4s, #12 974 shl v5.4s, v5.4s, #20 975 add v22.4s, v22.4s, v28.4s 976 ushr v12.4s, v1.4s, #12 977 shl v1.4s, v1.4s, #20 978 add v10.4s, v10.4s, v0.4s 979 orr v5.16b, v5.16b, v13.16b 980 add v22.4s, v22.4s, v2.4s 981 add v20.4s, v20.4s, v8.4s 982 orr v1.16b, v1.16b, v12.16b 983 add v17.4s, v17.4s, v14.4s 984 eor v4.16b, v4.16b, v10.16b 985 eor v25.16b, v25.16b, v22.16b 986 add v20.4s, v20.4s, v5.4s 987 add v17.4s, v17.4s, v1.4s 988 tbl v4.16b, { v4.16b }, v18.16b 989 tbl v25.16b, { v25.16b }, v18.16b 990 eor v6.16b, v6.16b, v20.16b 991 eor v16.16b, v16.16b, v17.16b 992 add v26.4s, v26.4s, v4.4s 993 add v7.4s, v7.4s, v25.4s 994 tbl v6.16b, { v6.16b }, v18.16b 995 tbl v16.16b, { v16.16b }, v18.16b 996 eor v0.16b, v26.16b, v0.16b 997 eor v2.16b, v7.16b, v2.16b 998 add v21.4s, v21.4s, v6.4s 999 add v19.4s, v19.4s, v16.4s 1000 ushr v12.4s, v0.4s, #7 1001 shl v0.4s, v0.4s, #25 1002 ushr v13.4s, v2.4s, #7 1003 shl v2.4s, v2.4s, #25 1004 eor v5.16b, v21.16b, v5.16b 1005 eor v1.16b, v19.16b, v1.16b 1006 orr v0.16b, v0.16b, v12.16b 1007 add v22.4s, v22.4s, v23.4s 1008 orr v2.16b, v2.16b, v13.16b 1009 ushr v13.4s, v5.4s, #7 1010 shl v5.4s, v5.4s, #25 1011 add v17.4s, v17.4s, v11.4s 1012 mov v30.16b, v28.16b 1013 mov v28.16b, v23.16b 1014 ldr q23, [sp, #304] 1015 ushr v12.4s, v1.4s, #7 1016 shl v1.4s, v1.4s, #25 1017 add v22.4s, v22.4s, v0.4s 1018 mov v29.16b, v31.16b 1019 ldr q31, [sp, #160] 1020 orr v5.16b, v5.16b, v13.16b 1021 add v17.4s, v17.4s, v2.4s 1022 add v10.4s, v10.4s, v23.4s 1023 orr v1.16b, v1.16b, v12.16b 1024 str q29, [sp, #272] 1025 eor v16.16b, v16.16b, v22.16b 1026 add v20.4s, v20.4s, v31.4s 1027 eor v6.16b, v6.16b, v17.16b 1028 add v10.4s, v10.4s, v5.4s 1029 tbl v16.16b, { v16.16b }, v27.16b 1030 add v20.4s, v20.4s, v1.4s 1031 tbl v6.16b, { v6.16b }, v27.16b 1032 eor v25.16b, v25.16b, v10.16b 1033 add v21.4s, v21.4s, v16.4s 1034 eor v4.16b, v4.16b, v20.16b 1035 add v26.4s, v26.4s, v6.4s 1036 tbl v25.16b, { v25.16b }, v27.16b 1037 eor v0.16b, v21.16b, v0.16b 1038 tbl v4.16b, { v4.16b }, v27.16b 1039 eor v2.16b, v26.16b, v2.16b 1040 add v19.4s, v19.4s, v25.4s 1041 ushr v12.4s, v0.4s, #12 1042 shl v0.4s, v0.4s, #20 1043 add v7.4s, v7.4s, v4.4s 1044 ushr v13.4s, v2.4s, #12 1045 shl v2.4s, v2.4s, #20 1046 eor v5.16b, v5.16b, v19.16b 1047 add v22.4s, v22.4s, v24.4s 1048 ldr q24, [sp, #320] 1049 orr v0.16b, v0.16b, v12.16b 1050 eor v1.16b, v7.16b, v1.16b 1051 orr v2.16b, v2.16b, v13.16b 1052 ushr v12.4s, v5.4s, #12 1053 shl v5.4s, v5.4s, #20 1054 add v17.4s, v17.4s, v24.4s 1055 ldr q24, [sp, #352] 1056 ushr v13.4s, v1.4s, #12 1057 shl v1.4s, v1.4s, #20 1058 add v22.4s, v22.4s, v0.4s 1059 orr v5.16b, v5.16b, v12.16b 1060 add v17.4s, v17.4s, v2.4s 1061 add v10.4s, v10.4s, v24.4s 1062 ldr q24, [sp, #336] 1063 orr v1.16b, v1.16b, v13.16b 1064 eor v16.16b, v16.16b, v22.16b 1065 add v20.4s, v20.4s, v14.4s 1066 eor v6.16b, v6.16b, v17.16b 1067 add v10.4s, v10.4s, v5.4s 1068 tbl v16.16b, { v16.16b }, v18.16b 1069 add v20.4s, v20.4s, v1.4s 1070 tbl v6.16b, { v6.16b }, v18.16b 1071 eor v25.16b, v25.16b, v10.16b 1072 add v21.4s, v21.4s, v16.4s 1073 eor v4.16b, v4.16b, v20.16b 1074 add v26.4s, v26.4s, v6.4s 1075 tbl v25.16b, { v25.16b }, v18.16b 1076 eor v0.16b, v21.16b, v0.16b 1077 tbl v4.16b, { v4.16b }, v18.16b 1078 eor v2.16b, v26.16b, v2.16b 1079 add v19.4s, v19.4s, v25.4s 1080 ushr v12.4s, v0.4s, #7 1081 shl v0.4s, v0.4s, #25 1082 add v7.4s, v7.4s, v4.4s 1083 ushr v13.4s, v2.4s, #7 1084 shl v2.4s, v2.4s, #25 1085 eor v5.16b, v19.16b, v5.16b 1086 orr v0.16b, v0.16b, v12.16b 1087 eor v1.16b, v7.16b, v1.16b 1088 add v10.4s, v10.4s, v24.4s 1089 orr v2.16b, v2.16b, v13.16b 1090 ushr v12.4s, v5.4s, #7 1091 shl v5.4s, v5.4s, #25 1092 add v22.4s, v22.4s, v29.4s 1093 ushr v13.4s, v1.4s, #7 1094 shl v1.4s, v1.4s, #25 1095 add v10.4s, v10.4s, v0.4s 1096 orr v5.16b, v5.16b, v12.16b 1097 add v22.4s, v22.4s, v2.4s 1098 add v20.4s, v20.4s, v8.4s 1099 ldr q8, [sp, #288] 1100 orr v1.16b, v1.16b, v13.16b 1101 add v17.4s, v17.4s, v3.4s 1102 ldr q3, [sp, #352] 1103 eor v4.16b, v4.16b, v10.16b 1104 eor v25.16b, v25.16b, v22.16b 1105 add v20.4s, v20.4s, v5.4s 1106 add v17.4s, v17.4s, v1.4s 1107 tbl v4.16b, { v4.16b }, v27.16b 1108 tbl v25.16b, { v25.16b }, v27.16b 1109 eor v6.16b, v6.16b, v20.16b 1110 eor v16.16b, v16.16b, v17.16b 1111 add v26.4s, v26.4s, v4.4s 1112 add v7.4s, v7.4s, v25.4s 1113 tbl v6.16b, { v6.16b }, v27.16b 1114 tbl v16.16b, { v16.16b }, v27.16b 1115 eor v0.16b, v26.16b, v0.16b 1116 eor v2.16b, v7.16b, v2.16b 1117 add v21.4s, v21.4s, v6.4s 1118 add v19.4s, v19.4s, v16.4s 1119 ushr v12.4s, v0.4s, #12 1120 shl v0.4s, v0.4s, #20 1121 ushr v13.4s, v2.4s, #12 1122 shl v2.4s, v2.4s, #20 1123 eor v5.16b, v21.16b, v5.16b 1124 eor v1.16b, v19.16b, v1.16b 1125 orr v0.16b, v0.16b, v12.16b 1126 add v10.4s, v10.4s, v30.4s 1127 orr v2.16b, v2.16b, v13.16b 1128 ushr v13.4s, v5.4s, #12 1129 shl v5.4s, v5.4s, #20 1130 add v22.4s, v22.4s, v8.4s 1131 mov v24.16b, v30.16b 1132 mov v30.16b, v15.16b 1133 add v17.4s, v17.4s, v15.4s 1134 ldr q15, [sp, #224] 1135 ushr v12.4s, v1.4s, #12 1136 shl v1.4s, v1.4s, #20 1137 add v10.4s, v10.4s, v0.4s 1138 str q30, [sp, #176] 1139 orr v5.16b, v5.16b, v13.16b 1140 add v22.4s, v22.4s, v2.4s 1141 add v20.4s, v20.4s, v15.4s 1142 orr v1.16b, v1.16b, v12.16b 1143 eor v4.16b, v4.16b, v10.16b 1144 eor v25.16b, v25.16b, v22.16b 1145 add v20.4s, v20.4s, v5.4s 1146 add v17.4s, v17.4s, v1.4s 1147 tbl v4.16b, { v4.16b }, v18.16b 1148 tbl v25.16b, { v25.16b }, v18.16b 1149 eor v6.16b, v6.16b, v20.16b 1150 eor v16.16b, v16.16b, v17.16b 1151 add v26.4s, v26.4s, v4.4s 1152 add v7.4s, v7.4s, v25.4s 1153 tbl v6.16b, { v6.16b }, v18.16b 1154 tbl v16.16b, { v16.16b }, v18.16b 1155 eor v0.16b, v26.16b, v0.16b 1156 eor v2.16b, v7.16b, v2.16b 1157 add v21.4s, v21.4s, v6.4s 1158 add v19.4s, v19.4s, v16.4s 1159 ushr v12.4s, v0.4s, #7 1160 shl v0.4s, v0.4s, #25 1161 ushr v13.4s, v2.4s, #7 1162 shl v2.4s, v2.4s, #25 1163 eor v5.16b, v21.16b, v5.16b 1164 eor v1.16b, v19.16b, v1.16b 1165 orr v0.16b, v0.16b, v12.16b 1166 add v22.4s, v22.4s, v9.4s 1167 orr v2.16b, v2.16b, v13.16b 1168 ushr v13.4s, v5.4s, #7 1169 shl v5.4s, v5.4s, #25 1170 add v17.4s, v17.4s, v14.4s 1171 ushr v12.4s, v1.4s, #7 1172 shl v1.4s, v1.4s, #25 1173 add v22.4s, v22.4s, v0.4s 1174 orr v5.16b, v5.16b, v13.16b 1175 add v17.4s, v17.4s, v2.4s 1176 add v10.4s, v10.4s, v28.4s 1177 orr v1.16b, v1.16b, v12.16b 1178 eor v16.16b, v16.16b, v22.16b 1179 add v20.4s, v20.4s, v11.4s 1180 eor v6.16b, v6.16b, v17.16b 1181 add v10.4s, v10.4s, v5.4s 1182 tbl v16.16b, { v16.16b }, v27.16b 1183 add v20.4s, v20.4s, v1.4s 1184 tbl v6.16b, { v6.16b }, v27.16b 1185 eor v25.16b, v25.16b, v10.16b 1186 add v21.4s, v21.4s, v16.4s 1187 eor v4.16b, v4.16b, v20.16b 1188 add v26.4s, v26.4s, v6.4s 1189 tbl v25.16b, { v25.16b }, v27.16b 1190 eor v0.16b, v21.16b, v0.16b 1191 tbl v4.16b, { v4.16b }, v27.16b 1192 eor v2.16b, v26.16b, v2.16b 1193 add v19.4s, v19.4s, v25.4s 1194 ushr v12.4s, v0.4s, #12 1195 shl v0.4s, v0.4s, #20 1196 add v7.4s, v7.4s, v4.4s 1197 ushr v13.4s, v2.4s, #12 1198 shl v2.4s, v2.4s, #20 1199 eor v5.16b, v5.16b, v19.16b 1200 orr v0.16b, v0.16b, v12.16b 1201 eor v1.16b, v7.16b, v1.16b 1202 add v22.4s, v22.4s, v29.4s 1203 orr v2.16b, v2.16b, v13.16b 1204 ushr v12.4s, v5.4s, #12 1205 shl v5.4s, v5.4s, #20 1206 add v17.4s, v17.4s, v23.4s 1207 ushr v13.4s, v1.4s, #12 1208 shl v1.4s, v1.4s, #20 1209 add v22.4s, v22.4s, v0.4s 1210 orr v5.16b, v5.16b, v12.16b 1211 add v17.4s, v17.4s, v2.4s 1212 add v10.4s, v10.4s, v31.4s 1213 orr v1.16b, v1.16b, v13.16b 1214 eor v16.16b, v16.16b, v22.16b 1215 add v20.4s, v20.4s, v30.4s 1216 eor v6.16b, v6.16b, v17.16b 1217 add v10.4s, v10.4s, v5.4s 1218 tbl v16.16b, { v16.16b }, v18.16b 1219 add v20.4s, v20.4s, v1.4s 1220 tbl v6.16b, { v6.16b }, v18.16b 1221 eor v25.16b, v25.16b, v10.16b 1222 add v21.4s, v21.4s, v16.4s 1223 eor v4.16b, v4.16b, v20.16b 1224 add v26.4s, v26.4s, v6.4s 1225 tbl v25.16b, { v25.16b }, v18.16b 1226 eor v0.16b, v21.16b, v0.16b 1227 tbl v4.16b, { v4.16b }, v18.16b 1228 eor v2.16b, v26.16b, v2.16b 1229 add v19.4s, v19.4s, v25.4s 1230 ushr v12.4s, v0.4s, #7 1231 shl v0.4s, v0.4s, #25 1232 add v7.4s, v7.4s, v4.4s 1233 ushr v13.4s, v2.4s, #7 1234 shl v2.4s, v2.4s, #25 1235 eor v5.16b, v19.16b, v5.16b 1236 add v10.4s, v10.4s, v3.4s 1237 ldr q3, [sp, #192] 1238 orr v0.16b, v0.16b, v12.16b 1239 eor v1.16b, v7.16b, v1.16b 1240 orr v2.16b, v2.16b, v13.16b 1241 ushr v12.4s, v5.4s, #7 1242 shl v5.4s, v5.4s, #25 1243 add v22.4s, v22.4s, v3.4s 1244 ushr v13.4s, v1.4s, #7 1245 shl v1.4s, v1.4s, #25 1246 add v10.4s, v10.4s, v0.4s 1247 orr v5.16b, v5.16b, v12.16b 1248 add v22.4s, v22.4s, v2.4s 1249 add v20.4s, v20.4s, v15.4s 1250 ldr q15, [sp, #128] 1251 orr v1.16b, v1.16b, v13.16b 1252 add v17.4s, v17.4s, v24.4s 1253 eor v4.16b, v4.16b, v10.16b 1254 eor v25.16b, v25.16b, v22.16b 1255 add v20.4s, v20.4s, v5.4s 1256 add v17.4s, v17.4s, v1.4s 1257 tbl v4.16b, { v4.16b }, v27.16b 1258 tbl v25.16b, { v25.16b }, v27.16b 1259 eor v6.16b, v6.16b, v20.16b 1260 eor v16.16b, v16.16b, v17.16b 1261 add v26.4s, v26.4s, v4.4s 1262 add v7.4s, v7.4s, v25.4s 1263 tbl v6.16b, { v6.16b }, v27.16b 1264 tbl v16.16b, { v16.16b }, v27.16b 1265 eor v0.16b, v26.16b, v0.16b 1266 eor v2.16b, v7.16b, v2.16b 1267 add v21.4s, v21.4s, v6.4s 1268 add v19.4s, v19.4s, v16.4s 1269 ushr v12.4s, v0.4s, #12 1270 shl v0.4s, v0.4s, #20 1271 ushr v13.4s, v2.4s, #12 1272 shl v2.4s, v2.4s, #20 1273 eor v5.16b, v21.16b, v5.16b 1274 ldp q23, q11, [sp, #320] 1275 eor v1.16b, v19.16b, v1.16b 1276 orr v0.16b, v0.16b, v12.16b 1277 add v10.4s, v10.4s, v8.4s 1278 orr v2.16b, v2.16b, v13.16b 1279 ushr v13.4s, v5.4s, #12 1280 shl v5.4s, v5.4s, #20 1281 add v22.4s, v22.4s, v23.4s 1282 ushr v12.4s, v1.4s, #12 1283 shl v1.4s, v1.4s, #20 1284 add v10.4s, v10.4s, v0.4s 1285 mov v28.16b, v31.16b 1286 mov v31.16b, v8.16b 1287 ldr q8, [sp, #208] 1288 orr v5.16b, v5.16b, v13.16b 1289 add v22.4s, v22.4s, v2.4s 1290 add v20.4s, v20.4s, v11.4s 1291 orr v1.16b, v1.16b, v12.16b 1292 add v17.4s, v17.4s, v8.4s 1293 eor v4.16b, v4.16b, v10.16b 1294 eor v25.16b, v25.16b, v22.16b 1295 add v20.4s, v20.4s, v5.4s 1296 add v17.4s, v17.4s, v1.4s 1297 tbl v4.16b, { v4.16b }, v18.16b 1298 tbl v25.16b, { v25.16b }, v18.16b 1299 eor v6.16b, v6.16b, v20.16b 1300 eor v16.16b, v16.16b, v17.16b 1301 add v26.4s, v26.4s, v4.4s 1302 add v7.4s, v7.4s, v25.4s 1303 tbl v6.16b, { v6.16b }, v18.16b 1304 tbl v16.16b, { v16.16b }, v18.16b 1305 eor v0.16b, v26.16b, v0.16b 1306 eor v2.16b, v7.16b, v2.16b 1307 add v21.4s, v21.4s, v6.4s 1308 add v19.4s, v19.4s, v16.4s 1309 ushr v12.4s, v0.4s, #7 1310 shl v0.4s, v0.4s, #25 1311 ushr v13.4s, v2.4s, #7 1312 shl v2.4s, v2.4s, #25 1313 eor v5.16b, v21.16b, v5.16b 1314 eor v1.16b, v19.16b, v1.16b 1315 orr v0.16b, v0.16b, v12.16b 1316 add v22.4s, v22.4s, v29.4s 1317 orr v2.16b, v2.16b, v13.16b 1318 ushr v13.4s, v5.4s, #7 1319 shl v5.4s, v5.4s, #25 1320 add v17.4s, v17.4s, v30.4s 1321 ushr v12.4s, v1.4s, #7 1322 shl v1.4s, v1.4s, #25 1323 add v22.4s, v22.4s, v0.4s 1324 orr v5.16b, v5.16b, v13.16b 1325 add v17.4s, v17.4s, v2.4s 1326 add v10.4s, v10.4s, v9.4s 1327 orr v1.16b, v1.16b, v12.16b 1328 eor v16.16b, v16.16b, v22.16b 1329 add v20.4s, v20.4s, v14.4s 1330 ldr q14, [sp, #256] 1331 eor v6.16b, v6.16b, v17.16b 1332 add v10.4s, v10.4s, v5.4s 1333 tbl v16.16b, { v16.16b }, v27.16b 1334 add v20.4s, v20.4s, v1.4s 1335 tbl v6.16b, { v6.16b }, v27.16b 1336 eor v25.16b, v25.16b, v10.16b 1337 add v21.4s, v21.4s, v16.4s 1338 eor v4.16b, v4.16b, v20.16b 1339 add v26.4s, v26.4s, v6.4s 1340 tbl v25.16b, { v25.16b }, v27.16b 1341 eor v0.16b, v21.16b, v0.16b 1342 tbl v4.16b, { v4.16b }, v27.16b 1343 eor v2.16b, v26.16b, v2.16b 1344 add v19.4s, v19.4s, v25.4s 1345 ushr v12.4s, v0.4s, #12 1346 shl v0.4s, v0.4s, #20 1347 add v7.4s, v7.4s, v4.4s 1348 ushr v13.4s, v2.4s, #12 1349 shl v2.4s, v2.4s, #20 1350 eor v5.16b, v5.16b, v19.16b 1351 orr v0.16b, v0.16b, v12.16b 1352 eor v1.16b, v7.16b, v1.16b 1353 add v22.4s, v22.4s, v3.4s 1354 orr v2.16b, v2.16b, v13.16b 1355 ushr v12.4s, v5.4s, #12 1356 shl v5.4s, v5.4s, #20 1357 add v17.4s, v17.4s, v15.4s 1358 ushr v13.4s, v1.4s, #12 1359 shl v1.4s, v1.4s, #20 1360 add v22.4s, v22.4s, v0.4s 1361 orr v5.16b, v5.16b, v12.16b 1362 add v17.4s, v17.4s, v2.4s 1363 add v10.4s, v10.4s, v14.4s 1364 orr v1.16b, v1.16b, v13.16b 1365 eor v16.16b, v16.16b, v22.16b 1366 add v20.4s, v20.4s, v8.4s 1367 eor v6.16b, v6.16b, v17.16b 1368 add v10.4s, v10.4s, v5.4s 1369 tbl v16.16b, { v16.16b }, v18.16b 1370 add v20.4s, v20.4s, v1.4s 1371 tbl v6.16b, { v6.16b }, v18.16b 1372 eor v25.16b, v25.16b, v10.16b 1373 add v21.4s, v21.4s, v16.4s 1374 eor v4.16b, v4.16b, v20.16b 1375 add v26.4s, v26.4s, v6.4s 1376 tbl v25.16b, { v25.16b }, v18.16b 1377 eor v0.16b, v21.16b, v0.16b 1378 tbl v4.16b, { v4.16b }, v18.16b 1379 eor v2.16b, v26.16b, v2.16b 1380 add v19.4s, v19.4s, v25.4s 1381 ushr v12.4s, v0.4s, #7 1382 shl v0.4s, v0.4s, #25 1383 add v7.4s, v7.4s, v4.4s 1384 ushr v13.4s, v2.4s, #7 1385 shl v2.4s, v2.4s, #25 1386 eor v5.16b, v19.16b, v5.16b 1387 orr v0.16b, v0.16b, v12.16b 1388 eor v1.16b, v7.16b, v1.16b 1389 add v10.4s, v10.4s, v28.4s 1390 orr v2.16b, v2.16b, v13.16b 1391 ushr v12.4s, v5.4s, #7 1392 shl v5.4s, v5.4s, #25 1393 add v22.4s, v22.4s, v24.4s 1394 ushr v13.4s, v1.4s, #7 1395 shl v1.4s, v1.4s, #25 1396 add v10.4s, v10.4s, v0.4s 1397 orr v5.16b, v5.16b, v12.16b 1398 add v22.4s, v22.4s, v2.4s 1399 add v20.4s, v20.4s, v11.4s 1400 ldr q11, [sp, #304] 1401 orr v1.16b, v1.16b, v13.16b 1402 add v17.4s, v17.4s, v31.4s 1403 ldr q31, [sp, #224] 1404 eor v4.16b, v4.16b, v10.16b 1405 eor v25.16b, v25.16b, v22.16b 1406 add v20.4s, v20.4s, v5.4s 1407 add v17.4s, v17.4s, v1.4s 1408 tbl v4.16b, { v4.16b }, v27.16b 1409 tbl v25.16b, { v25.16b }, v27.16b 1410 eor v6.16b, v6.16b, v20.16b 1411 eor v16.16b, v16.16b, v17.16b 1412 add v26.4s, v26.4s, v4.4s 1413 add v7.4s, v7.4s, v25.4s 1414 tbl v6.16b, { v6.16b }, v27.16b 1415 tbl v16.16b, { v16.16b }, v27.16b 1416 eor v0.16b, v26.16b, v0.16b 1417 eor v2.16b, v7.16b, v2.16b 1418 add v21.4s, v21.4s, v6.4s 1419 add v19.4s, v19.4s, v16.4s 1420 ushr v12.4s, v0.4s, #12 1421 shl v0.4s, v0.4s, #20 1422 ushr v13.4s, v2.4s, #12 1423 shl v2.4s, v2.4s, #20 1424 eor v5.16b, v21.16b, v5.16b 1425 eor v1.16b, v19.16b, v1.16b 1426 orr v0.16b, v0.16b, v12.16b 1427 add v10.4s, v10.4s, v23.4s 1428 ldr q23, [sp, #240] 1429 orr v2.16b, v2.16b, v13.16b 1430 ushr v13.4s, v5.4s, #12 1431 shl v5.4s, v5.4s, #20 1432 add v22.4s, v22.4s, v11.4s 1433 mov v30.16b, v8.16b 1434 mov v8.16b, v24.16b 1435 ldr q24, [sp, #352] 1436 ushr v12.4s, v1.4s, #12 1437 shl v1.4s, v1.4s, #20 1438 add v10.4s, v10.4s, v0.4s 1439 orr v5.16b, v5.16b, v13.16b 1440 str q8, [sp, #112] 1441 add v22.4s, v22.4s, v2.4s 1442 add v20.4s, v20.4s, v24.4s 1443 orr v1.16b, v1.16b, v12.16b 1444 add v17.4s, v17.4s, v31.4s 1445 eor v4.16b, v4.16b, v10.16b 1446 eor v25.16b, v25.16b, v22.16b 1447 add v20.4s, v20.4s, v5.4s 1448 add v17.4s, v17.4s, v1.4s 1449 tbl v4.16b, { v4.16b }, v18.16b 1450 tbl v25.16b, { v25.16b }, v18.16b 1451 eor v6.16b, v6.16b, v20.16b 1452 eor v16.16b, v16.16b, v17.16b 1453 add v26.4s, v26.4s, v4.4s 1454 add v7.4s, v7.4s, v25.4s 1455 tbl v6.16b, { v6.16b }, v18.16b 1456 tbl v16.16b, { v16.16b }, v18.16b 1457 eor v0.16b, v26.16b, v0.16b 1458 eor v2.16b, v7.16b, v2.16b 1459 add v21.4s, v21.4s, v6.4s 1460 mov v29.16b, v3.16b 1461 add v19.4s, v19.4s, v16.4s 1462 ushr v12.4s, v0.4s, #7 1463 shl v0.4s, v0.4s, #25 1464 ushr v13.4s, v2.4s, #7 1465 shl v2.4s, v2.4s, #25 1466 eor v5.16b, v21.16b, v5.16b 1467 eor v1.16b, v19.16b, v1.16b 1468 orr v0.16b, v0.16b, v12.16b 1469 add v22.4s, v22.4s, v29.4s 1470 orr v2.16b, v2.16b, v13.16b 1471 ushr v13.4s, v5.4s, #7 1472 shl v5.4s, v5.4s, #25 1473 add v17.4s, v17.4s, v30.4s 1474 ldr q30, [sp, #272] 1475 ushr v12.4s, v1.4s, #7 1476 shl v1.4s, v1.4s, #25 1477 add v22.4s, v22.4s, v0.4s 1478 mov v3.16b, v28.16b 1479 ldr q28, [sp, #176] 1480 orr v5.16b, v5.16b, v13.16b 1481 add v17.4s, v17.4s, v2.4s 1482 add v10.4s, v10.4s, v30.4s 1483 orr v1.16b, v1.16b, v12.16b 1484 eor v16.16b, v16.16b, v22.16b 1485 add v20.4s, v20.4s, v28.4s 1486 eor v6.16b, v6.16b, v17.16b 1487 add v10.4s, v10.4s, v5.4s 1488 tbl v16.16b, { v16.16b }, v27.16b 1489 add v20.4s, v20.4s, v1.4s 1490 tbl v6.16b, { v6.16b }, v27.16b 1491 eor v25.16b, v25.16b, v10.16b 1492 add v21.4s, v21.4s, v16.4s 1493 eor v4.16b, v4.16b, v20.16b 1494 add v26.4s, v26.4s, v6.4s 1495 tbl v25.16b, { v25.16b }, v27.16b 1496 eor v0.16b, v21.16b, v0.16b 1497 tbl v4.16b, { v4.16b }, v27.16b 1498 eor v2.16b, v26.16b, v2.16b 1499 add v19.4s, v19.4s, v25.4s 1500 ushr v12.4s, v0.4s, #12 1501 shl v0.4s, v0.4s, #20 1502 add v7.4s, v7.4s, v4.4s 1503 ushr v13.4s, v2.4s, #12 1504 shl v2.4s, v2.4s, #20 1505 eor v5.16b, v5.16b, v19.16b 1506 orr v0.16b, v0.16b, v12.16b 1507 eor v1.16b, v7.16b, v1.16b 1508 add v22.4s, v22.4s, v8.4s 1509 orr v2.16b, v2.16b, v13.16b 1510 ushr v12.4s, v5.4s, #12 1511 shl v5.4s, v5.4s, #20 1512 add v17.4s, v17.4s, v9.4s 1513 ldr q9, [sp, #320] 1514 ushr v13.4s, v1.4s, #12 1515 shl v1.4s, v1.4s, #20 1516 add v22.4s, v22.4s, v0.4s 1517 orr v5.16b, v5.16b, v12.16b 1518 add v17.4s, v17.4s, v2.4s 1519 add v10.4s, v10.4s, v23.4s 1520 orr v1.16b, v1.16b, v13.16b 1521 eor v16.16b, v16.16b, v22.16b 1522 add v20.4s, v20.4s, v31.4s 1523 eor v6.16b, v6.16b, v17.16b 1524 add v10.4s, v10.4s, v5.4s 1525 tbl v16.16b, { v16.16b }, v18.16b 1526 add v20.4s, v20.4s, v1.4s 1527 tbl v6.16b, { v6.16b }, v18.16b 1528 eor v25.16b, v25.16b, v10.16b 1529 add v21.4s, v21.4s, v16.4s 1530 eor v4.16b, v4.16b, v20.16b 1531 add v26.4s, v26.4s, v6.4s 1532 tbl v25.16b, { v25.16b }, v18.16b 1533 eor v0.16b, v21.16b, v0.16b 1534 tbl v4.16b, { v4.16b }, v18.16b 1535 eor v2.16b, v26.16b, v2.16b 1536 add v19.4s, v19.4s, v25.4s 1537 ushr v12.4s, v0.4s, #7 1538 shl v0.4s, v0.4s, #25 1539 add v7.4s, v7.4s, v4.4s 1540 ushr v13.4s, v2.4s, #7 1541 shl v2.4s, v2.4s, #25 1542 eor v5.16b, v19.16b, v5.16b 1543 add v10.4s, v10.4s, v14.4s 1544 ldr q14, [sp, #288] 1545 orr v0.16b, v0.16b, v12.16b 1546 eor v1.16b, v7.16b, v1.16b 1547 orr v2.16b, v2.16b, v13.16b 1548 ushr v12.4s, v5.4s, #7 1549 shl v5.4s, v5.4s, #25 1550 add v22.4s, v22.4s, v14.4s 1551 ushr v13.4s, v1.4s, #7 1552 shl v1.4s, v1.4s, #25 1553 add v10.4s, v10.4s, v0.4s 1554 orr v5.16b, v5.16b, v12.16b 1555 add v22.4s, v22.4s, v2.4s 1556 add v20.4s, v20.4s, v24.4s 1557 orr v1.16b, v1.16b, v13.16b 1558 eor v4.16b, v4.16b, v10.16b 1559 add v17.4s, v17.4s, v9.4s 1560 eor v25.16b, v25.16b, v22.16b 1561 add v20.4s, v20.4s, v5.4s 1562 tbl v4.16b, { v4.16b }, v27.16b 1563 add v17.4s, v17.4s, v1.4s 1564 tbl v25.16b, { v25.16b }, v27.16b 1565 eor v6.16b, v6.16b, v20.16b 1566 add v26.4s, v26.4s, v4.4s 1567 eor v16.16b, v16.16b, v17.16b 1568 add v7.4s, v7.4s, v25.4s 1569 tbl v6.16b, { v6.16b }, v27.16b 1570 eor v0.16b, v26.16b, v0.16b 1571 tbl v16.16b, { v16.16b }, v27.16b 1572 eor v2.16b, v7.16b, v2.16b 1573 add v21.4s, v21.4s, v6.4s 1574 ushr v12.4s, v0.4s, #12 1575 shl v0.4s, v0.4s, #20 1576 add v19.4s, v19.4s, v16.4s 1577 ushr v13.4s, v2.4s, #12 1578 shl v2.4s, v2.4s, #20 1579 eor v5.16b, v21.16b, v5.16b 1580 orr v0.16b, v0.16b, v12.16b 1581 eor v1.16b, v19.16b, v1.16b 1582 add v10.4s, v10.4s, v11.4s 1583 orr v2.16b, v2.16b, v13.16b 1584 ushr v13.4s, v5.4s, #12 1585 shl v5.4s, v5.4s, #20 1586 ushr v12.4s, v1.4s, #12 1587 shl v1.4s, v1.4s, #20 1588 add v10.4s, v10.4s, v0.4s 1589 add v22.4s, v22.4s, v15.4s 1590 orr v5.16b, v5.16b, v13.16b 1591 add v20.4s, v20.4s, v3.4s 1592 mov v24.16b, v3.16b 1593 ldr q3, [sp, #336] 1594 orr v1.16b, v1.16b, v12.16b 1595 eor v4.16b, v4.16b, v10.16b 1596 add v22.4s, v22.4s, v2.4s 1597 add v17.4s, v17.4s, v3.4s 1598 add v20.4s, v20.4s, v5.4s 1599 tbl v4.16b, { v4.16b }, v18.16b 1600 eor v25.16b, v25.16b, v22.16b 1601 add v17.4s, v17.4s, v1.4s 1602 eor v6.16b, v6.16b, v20.16b 1603 add v26.4s, v26.4s, v4.4s 1604 tbl v25.16b, { v25.16b }, v18.16b 1605 eor v16.16b, v16.16b, v17.16b 1606 tbl v6.16b, { v6.16b }, v18.16b 1607 eor v0.16b, v26.16b, v0.16b 1608 add v7.4s, v7.4s, v25.4s 1609 tbl v16.16b, { v16.16b }, v18.16b 1610 add v21.4s, v21.4s, v6.4s 1611 ushr v12.4s, v0.4s, #7 1612 shl v0.4s, v0.4s, #25 1613 eor v2.16b, v7.16b, v2.16b 1614 add v19.4s, v19.4s, v16.4s 1615 eor v5.16b, v21.16b, v5.16b 1616 orr v0.16b, v0.16b, v12.16b 1617 ushr v12.4s, v2.4s, #7 1618 shl v2.4s, v2.4s, #25 1619 eor v1.16b, v19.16b, v1.16b 1620 ushr v13.4s, v5.4s, #7 1621 shl v5.4s, v5.4s, #25 1622 add v22.4s, v22.4s, v8.4s 1623 orr v2.16b, v2.16b, v12.16b 1624 ushr v12.4s, v1.4s, #7 1625 shl v1.4s, v1.4s, #25 1626 orr v5.16b, v5.16b, v13.16b 1627 add v22.4s, v22.4s, v0.4s 1628 add v10.4s, v10.4s, v29.4s 1629 ldr q29, [sp, #208] 1630 add v17.4s, v17.4s, v31.4s 1631 orr v1.16b, v1.16b, v12.16b 1632 add v20.4s, v20.4s, v29.4s 1633 eor v16.16b, v16.16b, v22.16b 1634 add v10.4s, v10.4s, v5.4s 1635 add v17.4s, v17.4s, v2.4s 1636 add v20.4s, v20.4s, v1.4s 1637 tbl v16.16b, { v16.16b }, v27.16b 1638 eor v25.16b, v25.16b, v10.16b 1639 eor v6.16b, v6.16b, v17.16b 1640 eor v4.16b, v4.16b, v20.16b 1641 add v21.4s, v21.4s, v16.4s 1642 tbl v25.16b, { v25.16b }, v27.16b 1643 tbl v6.16b, { v6.16b }, v27.16b 1644 tbl v4.16b, { v4.16b }, v27.16b 1645 eor v0.16b, v21.16b, v0.16b 1646 add v19.4s, v19.4s, v25.4s 1647 add v26.4s, v26.4s, v6.4s 1648 add v7.4s, v7.4s, v4.4s 1649 ushr v12.4s, v0.4s, #12 1650 shl v0.4s, v0.4s, #20 1651 eor v5.16b, v5.16b, v19.16b 1652 eor v2.16b, v26.16b, v2.16b 1653 eor v1.16b, v7.16b, v1.16b 1654 orr v0.16b, v0.16b, v12.16b 1655 ushr v12.4s, v5.4s, #12 1656 shl v5.4s, v5.4s, #20 1657 add v22.4s, v22.4s, v14.4s 1658 mov v8.16b, v31.16b 1659 ushr v13.4s, v2.4s, #12 1660 shl v2.4s, v2.4s, #20 1661 mov v31.16b, v14.16b 1662 ushr v14.4s, v1.4s, #12 1663 shl v1.4s, v1.4s, #20 1664 orr v5.16b, v5.16b, v12.16b 1665 add v22.4s, v22.4s, v0.4s 1666 add v10.4s, v10.4s, v28.4s 1667 ldr q28, [sp, #352] 1668 orr v2.16b, v2.16b, v13.16b 1669 orr v1.16b, v1.16b, v14.16b 1670 add v17.4s, v17.4s, v30.4s 1671 add v20.4s, v20.4s, v3.4s 1672 eor v16.16b, v16.16b, v22.16b 1673 add v10.4s, v10.4s, v5.4s 1674 add v17.4s, v17.4s, v2.4s 1675 add v20.4s, v20.4s, v1.4s 1676 tbl v16.16b, { v16.16b }, v18.16b 1677 eor v25.16b, v25.16b, v10.16b 1678 eor v6.16b, v6.16b, v17.16b 1679 eor v4.16b, v4.16b, v20.16b 1680 add v21.4s, v21.4s, v16.4s 1681 tbl v25.16b, { v25.16b }, v18.16b 1682 tbl v6.16b, { v6.16b }, v18.16b 1683 tbl v4.16b, { v4.16b }, v18.16b 1684 eor v0.16b, v21.16b, v0.16b 1685 add v19.4s, v19.4s, v25.4s 1686 add v26.4s, v26.4s, v6.4s 1687 add v7.4s, v7.4s, v4.4s 1688 ushr v12.4s, v0.4s, #7 1689 shl v0.4s, v0.4s, #25 1690 eor v5.16b, v19.16b, v5.16b 1691 eor v2.16b, v26.16b, v2.16b 1692 eor v1.16b, v7.16b, v1.16b 1693 orr v0.16b, v0.16b, v12.16b 1694 ushr v12.4s, v5.4s, #7 1695 shl v5.4s, v5.4s, #25 1696 add v10.4s, v10.4s, v23.4s 1697 ushr v13.4s, v2.4s, #7 1698 shl v2.4s, v2.4s, #25 1699 ushr v14.4s, v1.4s, #7 1700 shl v1.4s, v1.4s, #25 1701 orr v5.16b, v5.16b, v12.16b 1702 add v10.4s, v10.4s, v0.4s 1703 add v20.4s, v20.4s, v24.4s 1704 ldr q24, [sp, #144] 1705 orr v2.16b, v2.16b, v13.16b 1706 orr v1.16b, v1.16b, v14.16b 1707 add v22.4s, v22.4s, v9.4s 1708 add v17.4s, v17.4s, v11.4s 1709 eor v4.16b, v4.16b, v10.16b 1710 add v20.4s, v20.4s, v5.4s 1711 add v22.4s, v22.4s, v2.4s 1712 add v17.4s, v17.4s, v1.4s 1713 tbl v4.16b, { v4.16b }, v27.16b 1714 eor v6.16b, v6.16b, v20.16b 1715 eor v25.16b, v25.16b, v22.16b 1716 eor v16.16b, v16.16b, v17.16b 1717 add v26.4s, v26.4s, v4.4s 1718 tbl v6.16b, { v6.16b }, v27.16b 1719 tbl v25.16b, { v25.16b }, v27.16b 1720 tbl v16.16b, { v16.16b }, v27.16b 1721 eor v0.16b, v26.16b, v0.16b 1722 add v21.4s, v21.4s, v6.4s 1723 add v7.4s, v7.4s, v25.4s 1724 add v19.4s, v19.4s, v16.4s 1725 ushr v12.4s, v0.4s, #12 1726 shl v0.4s, v0.4s, #20 1727 eor v5.16b, v21.16b, v5.16b 1728 eor v2.16b, v7.16b, v2.16b 1729 eor v1.16b, v19.16b, v1.16b 1730 orr v0.16b, v0.16b, v12.16b 1731 add v10.4s, v10.4s, v15.4s 1732 ushr v14.4s, v5.4s, #12 1733 shl v5.4s, v5.4s, #20 1734 mov v30.16b, v3.16b 1735 ldr q3, [sp, #256] 1736 ushr v12.4s, v2.4s, #12 1737 shl v2.4s, v2.4s, #20 1738 ushr v13.4s, v1.4s, #12 1739 shl v1.4s, v1.4s, #20 1740 add v10.4s, v10.4s, v0.4s 1741 orr v5.16b, v5.16b, v14.16b 1742 add v20.4s, v20.4s, v3.4s 1743 orr v2.16b, v2.16b, v12.16b 1744 orr v1.16b, v1.16b, v13.16b 1745 add v22.4s, v22.4s, v24.4s 1746 add v17.4s, v17.4s, v28.4s 1747 eor v4.16b, v4.16b, v10.16b 1748 add v20.4s, v20.4s, v5.4s 1749 add v22.4s, v22.4s, v2.4s 1750 add v17.4s, v17.4s, v1.4s 1751 tbl v4.16b, { v4.16b }, v18.16b 1752 eor v6.16b, v6.16b, v20.16b 1753 eor v25.16b, v25.16b, v22.16b 1754 eor v16.16b, v16.16b, v17.16b 1755 add v26.4s, v26.4s, v4.4s 1756 tbl v6.16b, { v6.16b }, v18.16b 1757 tbl v25.16b, { v25.16b }, v18.16b 1758 tbl v16.16b, { v16.16b }, v18.16b 1759 eor v0.16b, v26.16b, v0.16b 1760 add v21.4s, v21.4s, v6.4s 1761 add v7.4s, v7.4s, v25.4s 1762 add v19.4s, v19.4s, v16.4s 1763 ushr v12.4s, v0.4s, #7 1764 shl v0.4s, v0.4s, #25 1765 eor v5.16b, v21.16b, v5.16b 1766 eor v2.16b, v7.16b, v2.16b 1767 eor v1.16b, v19.16b, v1.16b 1768 orr v0.16b, v0.16b, v12.16b 1769 ushr v12.4s, v5.4s, #7 1770 shl v5.4s, v5.4s, #25 1771 mov v23.16b, v9.16b 1772 ldr q9, [sp, #112] 1773 ushr v13.4s, v2.4s, #7 1774 shl v2.4s, v2.4s, #25 1775 ushr v14.4s, v1.4s, #7 1776 shl v1.4s, v1.4s, #25 1777 orr v5.16b, v5.16b, v12.16b 1778 add v9.4s, v10.4s, v9.4s 1779 orr v2.16b, v2.16b, v13.16b 1780 orr v1.16b, v1.16b, v14.16b 1781 ldr q14, [sp, #64] 1782 add v22.4s, v22.4s, v31.4s 1783 add v17.4s, v17.4s, v30.4s 1784 add v20.4s, v20.4s, v8.4s 1785 add v9.4s, v9.4s, v5.4s 1786 add v22.4s, v22.4s, v0.4s 1787 add v17.4s, v17.4s, v2.4s 1788 add v20.4s, v20.4s, v1.4s 1789 eor v25.16b, v25.16b, v9.16b 1790 eor v16.16b, v16.16b, v22.16b 1791 eor v6.16b, v6.16b, v17.16b 1792 eor v4.16b, v4.16b, v20.16b 1793 tbl v25.16b, { v25.16b }, v27.16b 1794 tbl v16.16b, { v16.16b }, v27.16b 1795 tbl v6.16b, { v6.16b }, v27.16b 1796 tbl v4.16b, { v4.16b }, v27.16b 1797 add v19.4s, v19.4s, v25.4s 1798 add v21.4s, v21.4s, v16.4s 1799 add v26.4s, v26.4s, v6.4s 1800 add v7.4s, v7.4s, v4.4s 1801 eor v5.16b, v5.16b, v19.16b 1802 eor v0.16b, v21.16b, v0.16b 1803 eor v2.16b, v26.16b, v2.16b 1804 eor v1.16b, v7.16b, v1.16b 1805 ushr v30.4s, v5.4s, #12 1806 shl v5.4s, v5.4s, #20 1807 ushr v10.4s, v0.4s, #12 1808 shl v0.4s, v0.4s, #20 1809 ushr v12.4s, v2.4s, #12 1810 shl v2.4s, v2.4s, #20 1811 ushr v13.4s, v1.4s, #12 1812 shl v1.4s, v1.4s, #20 1813 orr v5.16b, v5.16b, v30.16b 1814 add v30.4s, v9.4s, v29.4s 1815 add v22.4s, v22.4s, v23.4s 1816 ldr q23, [sp, #192] 1817 orr v0.16b, v0.16b, v10.16b 1818 orr v2.16b, v2.16b, v12.16b 1819 orr v1.16b, v1.16b, v13.16b 1820 add v17.4s, v17.4s, v23.4s 1821 add v20.4s, v20.4s, v28.4s 1822 add v23.4s, v30.4s, v5.4s 1823 add v22.4s, v22.4s, v0.4s 1824 add v17.4s, v17.4s, v2.4s 1825 add v20.4s, v20.4s, v1.4s 1826 eor v25.16b, v25.16b, v23.16b 1827 eor v16.16b, v16.16b, v22.16b 1828 eor v6.16b, v6.16b, v17.16b 1829 eor v4.16b, v4.16b, v20.16b 1830 tbl v25.16b, { v25.16b }, v18.16b 1831 tbl v16.16b, { v16.16b }, v18.16b 1832 tbl v6.16b, { v6.16b }, v18.16b 1833 tbl v4.16b, { v4.16b }, v18.16b 1834 add v19.4s, v19.4s, v25.4s 1835 add v21.4s, v21.4s, v16.4s 1836 add v26.4s, v26.4s, v6.4s 1837 add v7.4s, v7.4s, v4.4s 1838 eor v5.16b, v19.16b, v5.16b 1839 eor v0.16b, v21.16b, v0.16b 1840 eor v2.16b, v26.16b, v2.16b 1841 eor v1.16b, v7.16b, v1.16b 1842 ushr v28.4s, v5.4s, #7 1843 shl v5.4s, v5.4s, #25 1844 ushr v30.4s, v0.4s, #7 1845 shl v0.4s, v0.4s, #25 1846 ushr v31.4s, v2.4s, #7 1847 shl v2.4s, v2.4s, #25 1848 ushr v8.4s, v1.4s, #7 1849 shl v1.4s, v1.4s, #25 1850 orr v5.16b, v5.16b, v28.16b 1851 ldr q28, [sp, #176] 1852 orr v0.16b, v0.16b, v30.16b 1853 orr v2.16b, v2.16b, v31.16b 1854 orr v1.16b, v1.16b, v8.16b 1855 add v23.4s, v23.4s, v28.4s 1856 add v22.4s, v22.4s, v11.4s 1857 add v17.4s, v17.4s, v15.4s 1858 add v20.4s, v20.4s, v3.4s 1859 ldr q3, [sp, #272] 1860 add v23.4s, v23.4s, v0.4s 1861 add v22.4s, v22.4s, v2.4s 1862 add v17.4s, v17.4s, v1.4s 1863 add v20.4s, v20.4s, v5.4s 1864 eor v4.16b, v4.16b, v23.16b 1865 eor v25.16b, v25.16b, v22.16b 1866 eor v16.16b, v16.16b, v17.16b 1867 eor v6.16b, v6.16b, v20.16b 1868 tbl v4.16b, { v4.16b }, v27.16b 1869 tbl v25.16b, { v25.16b }, v27.16b 1870 tbl v16.16b, { v16.16b }, v27.16b 1871 tbl v6.16b, { v6.16b }, v27.16b 1872 add v26.4s, v26.4s, v4.4s 1873 add v7.4s, v7.4s, v25.4s 1874 add v19.4s, v19.4s, v16.4s 1875 add v21.4s, v21.4s, v6.4s 1876 eor v0.16b, v26.16b, v0.16b 1877 eor v2.16b, v7.16b, v2.16b 1878 eor v1.16b, v19.16b, v1.16b 1879 eor v5.16b, v21.16b, v5.16b 1880 add v3.4s, v22.4s, v3.4s 1881 ldr q22, [sp, #160] 1882 ushr v28.4s, v0.4s, #12 1883 shl v0.4s, v0.4s, #20 1884 ushr v29.4s, v2.4s, #12 1885 shl v2.4s, v2.4s, #20 1886 ushr v30.4s, v1.4s, #12 1887 shl v1.4s, v1.4s, #20 1888 ushr v31.4s, v5.4s, #12 1889 shl v5.4s, v5.4s, #20 1890 add v17.4s, v17.4s, v22.4s 1891 ldr q22, [sp, #240] 1892 orr v0.16b, v0.16b, v28.16b 1893 prfm pldl1keep, [x23, #256] 1894 orr v2.16b, v2.16b, v29.16b 1895 prfm pldl1keep, [x24, #256] 1896 orr v1.16b, v1.16b, v30.16b 1897 prfm pldl1keep, [x22, #256] 1898 orr v5.16b, v5.16b, v31.16b 1899 prfm pldl1keep, [x25, #256] 1900 add v23.4s, v23.4s, v24.4s 1901 add v20.4s, v20.4s, v22.4s 1902 add v3.4s, v3.4s, v2.4s 1903 add v17.4s, v17.4s, v1.4s 1904 add v22.4s, v23.4s, v0.4s 1905 add v20.4s, v20.4s, v5.4s 1906 eor v23.16b, v25.16b, v3.16b 1907 eor v16.16b, v16.16b, v17.16b 1908 eor v4.16b, v4.16b, v22.16b 1909 eor v6.16b, v6.16b, v20.16b 1910 tbl v23.16b, { v23.16b }, v18.16b 1911 tbl v16.16b, { v16.16b }, v18.16b 1912 tbl v4.16b, { v4.16b }, v18.16b 1913 tbl v6.16b, { v6.16b }, v18.16b 1914 add v7.4s, v7.4s, v23.4s 1915 add v19.4s, v19.4s, v16.4s 1916 add v18.4s, v26.4s, v4.4s 1917 add v21.4s, v21.4s, v6.4s 1918 eor v2.16b, v7.16b, v2.16b 1919 eor v1.16b, v19.16b, v1.16b 1920 eor v0.16b, v18.16b, v0.16b 1921 eor v5.16b, v21.16b, v5.16b 1922 ushr v25.4s, v2.4s, #7 1923 shl v2.4s, v2.4s, #25 1924 ushr v24.4s, v0.4s, #7 1925 shl v0.4s, v0.4s, #25 1926 ushr v26.4s, v1.4s, #7 1927 shl v1.4s, v1.4s, #25 1928 ushr v27.4s, v5.4s, #7 1929 shl v5.4s, v5.4s, #25 1930 orr v0.16b, v0.16b, v24.16b 1931 orr v2.16b, v2.16b, v25.16b 1932 orr v1.16b, v1.16b, v26.16b 1933 orr v5.16b, v5.16b, v27.16b 1934 movi v13.4s, #64 1935 eor v29.16b, v19.16b, v22.16b 1936 eor v8.16b, v21.16b, v3.16b 1937 eor v30.16b, v17.16b, v18.16b 1938 eor v31.16b, v20.16b, v7.16b 1939 eor v24.16b, v5.16b, v23.16b 1940 eor v18.16b, v0.16b, v16.16b 1941 eor v25.16b, v2.16b, v6.16b 1942 eor v26.16b, v1.16b, v4.16b 1943 cbnz x21, .LBB3_5 1944 b .LBB3_2 1945.LBB3_6: 1946 cbz x1, .LBB3_14 1947 adrp x12, .LCPI3_3 1948 ldr q0, [x11, :lo12:.LCPI3_1] 1949 orr w11, w7, w6 1950 ldr q2, [x10, :lo12:.LCPI3_2] 1951 ldr q1, [x12, :lo12:.LCPI3_3] 1952 and x12, x5, #0x1 1953.LBB3_8: 1954 movi v3.4s, #64 1955 lsr x13, x4, #32 1956 ldp q5, q4, [x3] 1957 mov x15, x2 1958 mov w14, w11 1959 mov v3.s[0], w4 1960 ldr x10, [x0] 1961 mov v3.s[1], w13 1962 b .LBB3_11 1963.LBB3_9: 1964 orr w14, w14, w9 1965.LBB3_10: 1966 ldp q6, q7, [x10] 1967 mov v16.16b, v3.16b 1968 and w14, w14, #0xff 1969 add v5.4s, v5.4s, v4.4s 1970 mov x15, x13 1971 mov v16.s[3], w14 1972 add x14, x10, #32 1973 uzp1 v17.4s, v6.4s, v7.4s 1974 add x10, x10, #64 1975 add v5.4s, v5.4s, v17.4s 1976 eor v16.16b, v5.16b, v16.16b 1977 tbl v16.16b, { v16.16b }, v0.16b 1978 add v18.4s, v16.4s, v1.4s 1979 eor v19.16b, v18.16b, v4.16b 1980 uzp2 v4.4s, v6.4s, v7.4s 1981 ushr v6.4s, v19.4s, #12 1982 shl v7.4s, v19.4s, #20 1983 ld2 { v19.4s, v20.4s }, [x14] 1984 add v5.4s, v5.4s, v4.4s 1985 mov w14, w6 1986 orr v6.16b, v7.16b, v6.16b 1987 add v5.4s, v5.4s, v6.4s 1988 eor v7.16b, v16.16b, v5.16b 1989 add v5.4s, v5.4s, v19.4s 1990 tbl v7.16b, { v7.16b }, v2.16b 1991 ext v5.16b, v5.16b, v5.16b, #12 1992 add v16.4s, v18.4s, v7.4s 1993 ext v7.16b, v7.16b, v7.16b, #8 1994 eor v6.16b, v6.16b, v16.16b 1995 ext v16.16b, v16.16b, v16.16b, #4 1996 ushr v18.4s, v6.4s, #7 1997 shl v6.4s, v6.4s, #25 1998 orr v6.16b, v6.16b, v18.16b 1999 ext v18.16b, v20.16b, v20.16b, #12 2000 add v5.4s, v5.4s, v6.4s 2001 eor v7.16b, v5.16b, v7.16b 2002 add v5.4s, v5.4s, v18.4s 2003 tbl v7.16b, { v7.16b }, v0.16b 2004 add v16.4s, v16.4s, v7.4s 2005 eor v6.16b, v6.16b, v16.16b 2006 ushr v21.4s, v6.4s, #12 2007 shl v6.4s, v6.4s, #20 2008 orr v6.16b, v6.16b, v21.16b 2009 uzp1 v21.4s, v17.4s, v17.4s 2010 add v5.4s, v5.4s, v6.4s 2011 ext v21.16b, v21.16b, v17.16b, #8 2012 eor v7.16b, v7.16b, v5.16b 2013 uzp2 v21.4s, v21.4s, v4.4s 2014 tbl v7.16b, { v7.16b }, v2.16b 2015 add v5.4s, v5.4s, v21.4s 2016 add v16.4s, v16.4s, v7.4s 2017 ext v5.16b, v5.16b, v5.16b, #4 2018 ext v7.16b, v7.16b, v7.16b, #8 2019 eor v6.16b, v6.16b, v16.16b 2020 ushr v22.4s, v6.4s, #7 2021 shl v6.4s, v6.4s, #25 2022 orr v6.16b, v6.16b, v22.16b 2023 add v22.4s, v5.4s, v6.4s 2024 eor v5.16b, v22.16b, v7.16b 2025 ext v7.16b, v16.16b, v16.16b, #12 2026 tbl v16.16b, { v5.16b }, v0.16b 2027 ext v5.16b, v17.16b, v17.16b, #12 2028 add v7.4s, v7.4s, v16.4s 2029 ext v5.16b, v17.16b, v5.16b, #12 2030 ext v17.16b, v19.16b, v19.16b, #12 2031 mov v19.16b, v18.16b 2032 eor v6.16b, v6.16b, v7.16b 2033 rev64 v5.4s, v5.4s 2034 mov v19.s[1], v17.s[2] 2035 ushr v20.4s, v6.4s, #12 2036 shl v6.4s, v6.4s, #20 2037 trn2 v5.4s, v5.4s, v19.4s 2038 orr v6.16b, v6.16b, v20.16b 2039 zip1 v20.2d, v18.2d, v4.2d 2040 zip2 v4.4s, v4.4s, v18.4s 2041 add v19.4s, v6.4s, v5.4s 2042 mov v20.s[3], v17.s[3] 2043 add v19.4s, v19.4s, v22.4s 2044 ext v22.16b, v20.16b, v20.16b, #12 2045 eor v16.16b, v16.16b, v19.16b 2046 ext v19.16b, v19.16b, v19.16b, #12 2047 tbl v16.16b, { v16.16b }, v2.16b 2048 add v7.4s, v7.4s, v16.4s 2049 ext v16.16b, v16.16b, v16.16b, #8 2050 eor v6.16b, v6.16b, v7.16b 2051 ext v7.16b, v7.16b, v7.16b, #4 2052 ushr v23.4s, v6.4s, #7 2053 shl v24.4s, v6.4s, #25 2054 uzp1 v6.4s, v20.4s, v22.4s 2055 orr v20.16b, v24.16b, v23.16b 2056 add v22.4s, v20.4s, v6.4s 2057 add v19.4s, v22.4s, v19.4s 2058 eor v16.16b, v19.16b, v16.16b 2059 tbl v16.16b, { v16.16b }, v0.16b 2060 add v7.4s, v7.4s, v16.4s 2061 eor v18.16b, v20.16b, v7.16b 2062 zip1 v20.4s, v4.4s, v17.4s 2063 zip1 v4.4s, v17.4s, v4.4s 2064 ushr v17.4s, v18.4s, #12 2065 shl v18.4s, v18.4s, #20 2066 ext v20.16b, v4.16b, v20.16b, #8 2067 orr v4.16b, v18.16b, v17.16b 2068 ext v18.16b, v21.16b, v21.16b, #4 2069 add v17.4s, v4.4s, v20.4s 2070 add v17.4s, v17.4s, v19.4s 2071 uzp1 v19.4s, v18.4s, v18.4s 2072 eor v16.16b, v16.16b, v17.16b 2073 ext v19.16b, v19.16b, v18.16b, #8 2074 tbl v16.16b, { v16.16b }, v2.16b 2075 uzp2 v19.4s, v19.4s, v5.4s 2076 add v7.4s, v7.4s, v16.4s 2077 add v17.4s, v17.4s, v19.4s 2078 ext v16.16b, v16.16b, v16.16b, #8 2079 eor v4.16b, v4.16b, v7.16b 2080 ext v17.16b, v17.16b, v17.16b, #4 2081 ext v7.16b, v7.16b, v7.16b, #12 2082 ushr v21.4s, v4.4s, #7 2083 shl v4.4s, v4.4s, #25 2084 orr v4.16b, v4.16b, v21.16b 2085 ext v21.16b, v18.16b, v18.16b, #12 2086 add v17.4s, v17.4s, v4.4s 2087 ext v18.16b, v18.16b, v21.16b, #12 2088 mov v21.16b, v20.16b 2089 eor v16.16b, v17.16b, v16.16b 2090 rev64 v18.4s, v18.4s 2091 mov v21.s[1], v6.s[2] 2092 tbl v16.16b, { v16.16b }, v0.16b 2093 add v7.4s, v7.4s, v16.4s 2094 eor v4.16b, v4.16b, v7.16b 2095 ushr v22.4s, v4.4s, #12 2096 shl v23.4s, v4.4s, #20 2097 trn2 v4.4s, v18.4s, v21.4s 2098 orr v18.16b, v23.16b, v22.16b 2099 add v21.4s, v18.4s, v4.4s 2100 add v17.4s, v21.4s, v17.4s 2101 zip1 v21.2d, v20.2d, v5.2d 2102 zip2 v5.4s, v5.4s, v20.4s 2103 eor v16.16b, v16.16b, v17.16b 2104 mov v21.s[3], v6.s[3] 2105 ext v17.16b, v17.16b, v17.16b, #12 2106 zip1 v20.4s, v5.4s, v6.4s 2107 tbl v16.16b, { v16.16b }, v2.16b 2108 zip1 v5.4s, v6.4s, v5.4s 2109 add v22.4s, v7.4s, v16.4s 2110 ext v16.16b, v16.16b, v16.16b, #8 2111 ext v20.16b, v5.16b, v20.16b, #8 2112 eor v7.16b, v18.16b, v22.16b 2113 ext v18.16b, v21.16b, v21.16b, #12 2114 ushr v23.4s, v7.4s, #7 2115 shl v24.4s, v7.4s, #25 2116 uzp1 v7.4s, v21.4s, v18.4s 2117 orr v18.16b, v24.16b, v23.16b 2118 add v21.4s, v18.4s, v7.4s 2119 add v17.4s, v21.4s, v17.4s 2120 ext v21.16b, v22.16b, v22.16b, #4 2121 eor v16.16b, v17.16b, v16.16b 2122 tbl v16.16b, { v16.16b }, v0.16b 2123 add v21.4s, v21.4s, v16.4s 2124 eor v18.16b, v18.16b, v21.16b 2125 ushr v6.4s, v18.4s, #12 2126 shl v18.4s, v18.4s, #20 2127 orr v5.16b, v18.16b, v6.16b 2128 add v6.4s, v5.4s, v20.4s 2129 add v6.4s, v6.4s, v17.4s 2130 ext v17.16b, v19.16b, v19.16b, #4 2131 eor v16.16b, v16.16b, v6.16b 2132 uzp1 v18.4s, v17.4s, v17.4s 2133 tbl v16.16b, { v16.16b }, v2.16b 2134 ext v18.16b, v18.16b, v17.16b, #8 2135 add v19.4s, v21.4s, v16.4s 2136 uzp2 v18.4s, v18.4s, v4.4s 2137 ext v16.16b, v16.16b, v16.16b, #8 2138 eor v5.16b, v5.16b, v19.16b 2139 add v6.4s, v6.4s, v18.4s 2140 ext v19.16b, v19.16b, v19.16b, #12 2141 ushr v21.4s, v5.4s, #7 2142 shl v5.4s, v5.4s, #25 2143 ext v6.16b, v6.16b, v6.16b, #4 2144 orr v5.16b, v5.16b, v21.16b 2145 ext v21.16b, v17.16b, v17.16b, #12 2146 add v6.4s, v6.4s, v5.4s 2147 ext v17.16b, v17.16b, v21.16b, #12 2148 mov v21.16b, v20.16b 2149 eor v16.16b, v6.16b, v16.16b 2150 rev64 v17.4s, v17.4s 2151 mov v21.s[1], v7.s[2] 2152 tbl v16.16b, { v16.16b }, v0.16b 2153 add v19.4s, v19.4s, v16.4s 2154 eor v5.16b, v5.16b, v19.16b 2155 ushr v22.4s, v5.4s, #12 2156 shl v23.4s, v5.4s, #20 2157 trn2 v5.4s, v17.4s, v21.4s 2158 orr v17.16b, v23.16b, v22.16b 2159 add v21.4s, v17.4s, v5.4s 2160 add v6.4s, v21.4s, v6.4s 2161 eor v16.16b, v16.16b, v6.16b 2162 ext v6.16b, v6.16b, v6.16b, #12 2163 tbl v21.16b, { v16.16b }, v2.16b 2164 zip1 v16.2d, v20.2d, v4.2d 2165 zip2 v4.4s, v4.4s, v20.4s 2166 add v19.4s, v19.4s, v21.4s 2167 mov v16.s[3], v7.s[3] 2168 ext v21.16b, v21.16b, v21.16b, #8 2169 zip1 v20.4s, v4.4s, v7.4s 2170 eor v17.16b, v17.16b, v19.16b 2171 ext v22.16b, v16.16b, v16.16b, #12 2172 ext v19.16b, v19.16b, v19.16b, #4 2173 zip1 v4.4s, v7.4s, v4.4s 2174 ushr v23.4s, v17.4s, #7 2175 shl v17.4s, v17.4s, #25 2176 uzp1 v16.4s, v16.4s, v22.4s 2177 ext v4.16b, v4.16b, v20.16b, #8 2178 orr v17.16b, v17.16b, v23.16b 2179 add v22.4s, v17.4s, v16.4s 2180 add v6.4s, v22.4s, v6.4s 2181 eor v21.16b, v6.16b, v21.16b 2182 tbl v21.16b, { v21.16b }, v0.16b 2183 add v19.4s, v19.4s, v21.4s 2184 eor v17.16b, v17.16b, v19.16b 2185 ushr v7.4s, v17.4s, #12 2186 shl v17.4s, v17.4s, #20 2187 orr v7.16b, v17.16b, v7.16b 2188 add v17.4s, v7.4s, v4.4s 2189 add v6.4s, v17.4s, v6.4s 2190 ext v17.16b, v18.16b, v18.16b, #4 2191 eor v18.16b, v21.16b, v6.16b 2192 uzp1 v20.4s, v17.4s, v17.4s 2193 tbl v18.16b, { v18.16b }, v2.16b 2194 ext v20.16b, v20.16b, v17.16b, #8 2195 add v19.4s, v19.4s, v18.4s 2196 uzp2 v20.4s, v20.4s, v5.4s 2197 ext v18.16b, v18.16b, v18.16b, #8 2198 eor v7.16b, v7.16b, v19.16b 2199 add v6.4s, v6.4s, v20.4s 2200 ushr v21.4s, v7.4s, #7 2201 shl v7.4s, v7.4s, #25 2202 ext v6.16b, v6.16b, v6.16b, #4 2203 orr v7.16b, v7.16b, v21.16b 2204 add v21.4s, v6.4s, v7.4s 2205 eor v6.16b, v21.16b, v18.16b 2206 ext v18.16b, v19.16b, v19.16b, #12 2207 tbl v19.16b, { v6.16b }, v0.16b 2208 ext v6.16b, v17.16b, v17.16b, #12 2209 add v18.4s, v18.4s, v19.4s 2210 ext v6.16b, v17.16b, v6.16b, #12 2211 mov v17.16b, v4.16b 2212 eor v7.16b, v7.16b, v18.16b 2213 rev64 v6.4s, v6.4s 2214 mov v17.s[1], v16.s[2] 2215 ushr v22.4s, v7.4s, #12 2216 shl v7.4s, v7.4s, #20 2217 trn2 v6.4s, v6.4s, v17.4s 2218 orr v7.16b, v7.16b, v22.16b 2219 add v17.4s, v7.4s, v6.4s 2220 add v17.4s, v17.4s, v21.4s 2221 zip1 v21.2d, v4.2d, v5.2d 2222 zip2 v4.4s, v5.4s, v4.4s 2223 eor v19.16b, v19.16b, v17.16b 2224 mov v21.s[3], v16.s[3] 2225 ext v17.16b, v17.16b, v17.16b, #12 2226 tbl v19.16b, { v19.16b }, v2.16b 2227 ext v22.16b, v21.16b, v21.16b, #12 2228 add v18.4s, v18.4s, v19.4s 2229 ext v19.16b, v19.16b, v19.16b, #8 2230 eor v7.16b, v7.16b, v18.16b 2231 ext v18.16b, v18.16b, v18.16b, #4 2232 ushr v23.4s, v7.4s, #7 2233 shl v24.4s, v7.4s, #25 2234 uzp1 v7.4s, v21.4s, v22.4s 2235 orr v21.16b, v24.16b, v23.16b 2236 add v22.4s, v21.4s, v7.4s 2237 add v17.4s, v22.4s, v17.4s 2238 eor v19.16b, v17.16b, v19.16b 2239 tbl v19.16b, { v19.16b }, v0.16b 2240 add v18.4s, v18.4s, v19.4s 2241 eor v5.16b, v21.16b, v18.16b 2242 zip1 v21.4s, v4.4s, v16.4s 2243 zip1 v4.4s, v16.4s, v4.4s 2244 ushr v16.4s, v5.4s, #12 2245 shl v5.4s, v5.4s, #20 2246 ext v21.16b, v4.16b, v21.16b, #8 2247 orr v4.16b, v5.16b, v16.16b 2248 ext v16.16b, v20.16b, v20.16b, #4 2249 mov v23.16b, v21.16b 2250 add v5.4s, v4.4s, v21.4s 2251 mov v23.s[1], v7.s[2] 2252 add v5.4s, v5.4s, v17.4s 2253 eor v17.16b, v19.16b, v5.16b 2254 uzp1 v19.4s, v16.4s, v16.4s 2255 tbl v17.16b, { v17.16b }, v2.16b 2256 ext v19.16b, v19.16b, v16.16b, #8 2257 add v18.4s, v18.4s, v17.4s 2258 uzp2 v19.4s, v19.4s, v6.4s 2259 eor v4.16b, v4.16b, v18.16b 2260 add v5.4s, v5.4s, v19.4s 2261 ext v19.16b, v19.16b, v19.16b, #4 2262 ushr v20.4s, v4.4s, #7 2263 shl v4.4s, v4.4s, #25 2264 ext v5.16b, v5.16b, v5.16b, #4 2265 orr v20.16b, v4.16b, v20.16b 2266 ext v4.16b, v17.16b, v17.16b, #8 2267 add v17.4s, v5.4s, v20.4s 2268 ext v5.16b, v18.16b, v18.16b, #12 2269 eor v4.16b, v17.16b, v4.16b 2270 tbl v18.16b, { v4.16b }, v0.16b 2271 ext v4.16b, v16.16b, v16.16b, #12 2272 add v22.4s, v5.4s, v18.4s 2273 ext v4.16b, v16.16b, v4.16b, #12 2274 eor v5.16b, v20.16b, v22.16b 2275 rev64 v16.4s, v4.4s 2276 ushr v20.4s, v5.4s, #12 2277 shl v24.4s, v5.4s, #20 2278 trn2 v5.4s, v16.4s, v23.4s 2279 orr v16.16b, v24.16b, v20.16b 2280 add v20.4s, v16.4s, v5.4s 2281 add v17.4s, v20.4s, v17.4s 2282 zip1 v20.2d, v21.2d, v6.2d 2283 zip2 v6.4s, v6.4s, v21.4s 2284 eor v18.16b, v18.16b, v17.16b 2285 mov v20.s[3], v7.s[3] 2286 ext v17.16b, v17.16b, v17.16b, #12 2287 zip1 v21.4s, v6.4s, v7.4s 2288 tbl v18.16b, { v18.16b }, v2.16b 2289 ext v24.16b, v20.16b, v20.16b, #12 2290 zip1 v6.4s, v7.4s, v6.4s 2291 add v22.4s, v22.4s, v18.4s 2292 ext v18.16b, v18.16b, v18.16b, #8 2293 ext v6.16b, v6.16b, v21.16b, #8 2294 eor v16.16b, v16.16b, v22.16b 2295 ext v22.16b, v22.16b, v22.16b, #4 2296 zip1 v5.2d, v6.2d, v5.2d 2297 zip2 v4.4s, v4.4s, v6.4s 2298 ushr v25.4s, v16.4s, #7 2299 shl v26.4s, v16.4s, #25 2300 uzp1 v16.4s, v20.4s, v24.4s 2301 orr v20.16b, v26.16b, v25.16b 2302 mov v5.s[3], v16.s[3] 2303 add v24.4s, v20.4s, v16.4s 2304 add v17.4s, v24.4s, v17.4s 2305 eor v18.16b, v17.16b, v18.16b 2306 tbl v18.16b, { v18.16b }, v0.16b 2307 add v22.4s, v22.4s, v18.4s 2308 eor v20.16b, v20.16b, v22.16b 2309 ushr v7.4s, v20.4s, #12 2310 shl v20.4s, v20.4s, #20 2311 orr v7.16b, v20.16b, v7.16b 2312 add v20.4s, v7.4s, v6.4s 2313 add v17.4s, v20.4s, v17.4s 2314 ext v20.16b, v19.16b, v19.16b, #8 2315 eor v18.16b, v18.16b, v17.16b 2316 ext v17.16b, v17.16b, v17.16b, #4 2317 tbl v18.16b, { v18.16b }, v2.16b 2318 add v21.4s, v22.4s, v18.4s 2319 uzp2 v22.4s, v20.4s, v23.4s 2320 ext v18.16b, v18.16b, v18.16b, #8 2321 eor v7.16b, v7.16b, v21.16b 2322 ext v20.16b, v22.16b, v20.16b, #4 2323 ushr v22.4s, v7.4s, #7 2324 shl v7.4s, v7.4s, #25 2325 add v17.4s, v17.4s, v20.4s 2326 ext v20.16b, v21.16b, v21.16b, #12 2327 ext v21.16b, v19.16b, v19.16b, #12 2328 orr v7.16b, v7.16b, v22.16b 2329 ext v19.16b, v19.16b, v21.16b, #12 2330 add v17.4s, v17.4s, v7.4s 2331 mov v21.16b, v6.16b 2332 rev64 v19.4s, v19.4s 2333 eor v18.16b, v17.16b, v18.16b 2334 mov v21.s[1], v16.s[2] 2335 tbl v18.16b, { v18.16b }, v0.16b 2336 trn2 v19.4s, v19.4s, v21.4s 2337 add v20.4s, v20.4s, v18.4s 2338 eor v7.16b, v7.16b, v20.16b 2339 ushr v22.4s, v7.4s, #12 2340 shl v7.4s, v7.4s, #20 2341 orr v7.16b, v7.16b, v22.16b 2342 add v19.4s, v7.4s, v19.4s 2343 add v17.4s, v19.4s, v17.4s 2344 eor v18.16b, v18.16b, v17.16b 2345 ext v17.16b, v17.16b, v17.16b, #12 2346 tbl v18.16b, { v18.16b }, v2.16b 2347 add v19.4s, v20.4s, v18.4s 2348 ext v20.16b, v5.16b, v5.16b, #12 2349 ext v18.16b, v18.16b, v18.16b, #8 2350 eor v7.16b, v7.16b, v19.16b 2351 uzp1 v5.4s, v5.4s, v20.4s 2352 ushr v21.4s, v7.4s, #7 2353 shl v7.4s, v7.4s, #25 2354 orr v7.16b, v7.16b, v21.16b 2355 add v5.4s, v7.4s, v5.4s 2356 add v5.4s, v5.4s, v17.4s 2357 eor v17.16b, v5.16b, v18.16b 2358 ext v18.16b, v19.16b, v19.16b, #4 2359 tbl v17.16b, { v17.16b }, v0.16b 2360 add v18.4s, v18.4s, v17.4s 2361 eor v6.16b, v7.16b, v18.16b 2362 zip1 v7.4s, v4.4s, v16.4s 2363 zip1 v4.4s, v16.4s, v4.4s 2364 ushr v16.4s, v6.4s, #12 2365 shl v6.4s, v6.4s, #20 2366 ext v4.16b, v4.16b, v7.16b, #8 2367 orr v6.16b, v6.16b, v16.16b 2368 add v4.4s, v6.4s, v4.4s 2369 add v4.4s, v4.4s, v5.4s 2370 eor v5.16b, v17.16b, v4.16b 2371 ext v4.16b, v4.16b, v4.16b, #4 2372 tbl v5.16b, { v5.16b }, v2.16b 2373 add v7.4s, v18.4s, v5.4s 2374 eor v6.16b, v6.16b, v7.16b 2375 ext v7.16b, v7.16b, v7.16b, #12 2376 ushr v16.4s, v6.4s, #7 2377 shl v6.4s, v6.4s, #25 2378 orr v6.16b, v6.16b, v16.16b 2379 ext v16.16b, v5.16b, v5.16b, #8 2380 eor v5.16b, v4.16b, v7.16b 2381 eor v4.16b, v6.16b, v16.16b 2382.LBB3_11: 2383 subs x13, x15, #1 2384 b.eq .LBB3_9 2385 cbnz x15, .LBB3_10 2386 add x4, x4, x12 2387 add x0, x0, #8 2388 subs x1, x1, #1 2389 stp q5, q4, [x8], #32 2390 b.ne .LBB3_8 2391.LBB3_14: 2392 add sp, sp, #368 2393 ldp x20, x19, [sp, #128] 2394 ldp x22, x21, [sp, #112] 2395 ldp x24, x23, [sp, #96] 2396 ldp x26, x25, [sp, #80] 2397 ldp x29, x27, [sp, #64] 2398 ldp d9, d8, [sp, #48] 2399 ldp d11, d10, [sp, #32] 2400 ldp d13, d12, [sp, #16] 2401 ldp d15, d14, [sp], #144 2402 ret 2403.Lfunc_end3: 2404 .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41 2405 .cfi_endproc 2406 .section ".note.GNU-stack","",@progbits 2407#endif 2408