xref: /freebsd/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1// SPDX-License-Identifier: CDDL-1.0
2/*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23/*
24 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
25 * Copyright (c) 2019-2022 Samuel Neves
26 * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de>
27 *
28 * This is converted assembly: SSE4.1 -> ARMv8-A
29 * Used tools: SIMDe https://github.com/simd-everywhere/simde
30 *
31 * Should work on FreeBSD, Linux and macOS
32 * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh
33 */
34
35#if defined(__aarch64__)
36
37/* make gcc <= 9 happy */
38#if !defined(LD_VERSION) || LD_VERSION >= 233010000
39#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state
40#else
41#define CFI_NEGATE_RA_STATE
42#endif
43
44	.text
45	.section	.note.gnu.property,"a",@note
46	.p2align	3
47	.word	4
48	.word	16
49	.word	5
50	.asciz	"GNU"
51	.word	3221225472
52	.word	4
53	.word	3
54	.word	0
55.Lsec_end0:
56	.text
57	.globl	zfs_blake3_compress_in_place_sse41
58	.p2align	2
59	.type	zfs_blake3_compress_in_place_sse41,@function
60zfs_blake3_compress_in_place_sse41:
61	.cfi_startproc
62	hint	#25
63	CFI_NEGATE_RA_STATE
64	sub	sp, sp, #96
65	stp	x29, x30, [sp, #64]
66	add	x29, sp, #64
67	str	x19, [sp, #80]
68	.cfi_def_cfa w29, 32
69	.cfi_offset w19, -16
70	.cfi_offset w30, -24
71	.cfi_offset w29, -32
72	mov	x19, x0
73	mov	w5, w4
74	mov	x4, x3
75	mov	w3, w2
76	mov	x2, x1
77	mov	x0, sp
78	mov	x1, x19
79	bl	compress_pre
80	ldp	q0, q1, [sp]
81	ldp	q2, q3, [sp, #32]
82	eor	v0.16b, v2.16b, v0.16b
83	eor	v1.16b, v3.16b, v1.16b
84	ldp	x29, x30, [sp, #64]
85	stp	q0, q1, [x19]
86	ldr	x19, [sp, #80]
87	add	sp, sp, #96
88	hint	#29
89	ret
90.Lfunc_end0:
91	.size	zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41
92	.cfi_endproc
93
94	.section	.rodata.cst16,"aM",@progbits,16
95	.p2align	4
96.LCPI1_0:
97	.xword	-4942790177982912921
98	.xword	-6534734903820487822
99.LCPI1_1:
100	.byte	2
101	.byte	3
102	.byte	0
103	.byte	1
104	.byte	6
105	.byte	7
106	.byte	4
107	.byte	5
108	.byte	10
109	.byte	11
110	.byte	8
111	.byte	9
112	.byte	14
113	.byte	15
114	.byte	12
115	.byte	13
116.LCPI1_2:
117	.byte	1
118	.byte	2
119	.byte	3
120	.byte	0
121	.byte	5
122	.byte	6
123	.byte	7
124	.byte	4
125	.byte	9
126	.byte	10
127	.byte	11
128	.byte	8
129	.byte	13
130	.byte	14
131	.byte	15
132	.byte	12
133	.text
134	.p2align	2
135	.type	compress_pre,@function
136compress_pre:
137	.cfi_startproc
138	hint	#34
139	fmov	s1, w3
140	movi	d0, #0x0000ff000000ff
141	ldr	q2, [x1]
142	adrp	x8, .LCPI1_0
143	mov	v1.s[1], w5
144	str	q2, [x0]
145	ldr	q4, [x8, :lo12:.LCPI1_0]
146	ldr	q5, [x1, #16]
147	adrp	x8, .LCPI1_1
148	and	v0.8b, v1.8b, v0.8b
149	fmov	d1, x4
150	stp	q5, q4, [x0, #16]
151	mov	v1.d[1], v0.d[0]
152	str	q1, [x0, #48]
153	ldp	q6, q7, [x2]
154	uzp1	v3.4s, v6.4s, v7.4s
155	add	v0.4s, v2.4s, v3.4s
156	uzp2	v2.4s, v6.4s, v7.4s
157	add	v16.4s, v0.4s, v5.4s
158	ldr	q0, [x8, :lo12:.LCPI1_1]
159	adrp	x8, .LCPI1_2
160	eor	v1.16b, v16.16b, v1.16b
161	add	v7.4s, v16.4s, v2.4s
162	tbl	v1.16b, { v1.16b }, v0.16b
163	add	v4.4s, v1.4s, v4.4s
164	eor	v5.16b, v4.16b, v5.16b
165	ushr	v6.4s, v5.4s, #12
166	shl	v5.4s, v5.4s, #20
167	orr	v5.16b, v5.16b, v6.16b
168	add	v6.4s, v7.4s, v5.4s
169	eor	v7.16b, v1.16b, v6.16b
170	ldr	q1, [x8, :lo12:.LCPI1_2]
171	add	x8, x2, #32
172	tbl	v7.16b, { v7.16b }, v1.16b
173	ld2	{ v16.4s, v17.4s }, [x8]
174	add	v4.4s, v4.4s, v7.4s
175	ext	v7.16b, v7.16b, v7.16b, #8
176	add	v6.4s, v6.4s, v16.4s
177	eor	v5.16b, v4.16b, v5.16b
178	ext	v4.16b, v4.16b, v4.16b, #4
179	ext	v16.16b, v16.16b, v16.16b, #12
180	ext	v6.16b, v6.16b, v6.16b, #12
181	ushr	v18.4s, v5.4s, #7
182	shl	v5.4s, v5.4s, #25
183	orr	v5.16b, v5.16b, v18.16b
184	ext	v18.16b, v17.16b, v17.16b, #12
185	add	v6.4s, v6.4s, v5.4s
186	mov	v17.16b, v18.16b
187	eor	v7.16b, v7.16b, v6.16b
188	add	v6.4s, v6.4s, v18.4s
189	mov	v17.s[1], v16.s[2]
190	tbl	v7.16b, { v7.16b }, v0.16b
191	add	v4.4s, v4.4s, v7.4s
192	eor	v5.16b, v4.16b, v5.16b
193	ushr	v19.4s, v5.4s, #12
194	shl	v5.4s, v5.4s, #20
195	orr	v5.16b, v5.16b, v19.16b
196	uzp1	v19.4s, v3.4s, v3.4s
197	add	v6.4s, v6.4s, v5.4s
198	ext	v19.16b, v19.16b, v3.16b, #8
199	eor	v7.16b, v7.16b, v6.16b
200	uzp2	v19.4s, v19.4s, v2.4s
201	tbl	v7.16b, { v7.16b }, v1.16b
202	add	v6.4s, v6.4s, v19.4s
203	add	v4.4s, v4.4s, v7.4s
204	ext	v6.16b, v6.16b, v6.16b, #4
205	ext	v7.16b, v7.16b, v7.16b, #8
206	eor	v5.16b, v4.16b, v5.16b
207	ext	v4.16b, v4.16b, v4.16b, #12
208	ushr	v20.4s, v5.4s, #7
209	shl	v5.4s, v5.4s, #25
210	orr	v5.16b, v5.16b, v20.16b
211	ext	v20.16b, v3.16b, v3.16b, #12
212	add	v6.4s, v6.4s, v5.4s
213	ext	v3.16b, v3.16b, v20.16b, #12
214	eor	v7.16b, v7.16b, v6.16b
215	rev64	v3.4s, v3.4s
216	tbl	v7.16b, { v7.16b }, v0.16b
217	trn2	v3.4s, v3.4s, v17.4s
218	add	v4.4s, v4.4s, v7.4s
219	add	v6.4s, v6.4s, v3.4s
220	eor	v5.16b, v4.16b, v5.16b
221	ushr	v17.4s, v5.4s, #12
222	shl	v5.4s, v5.4s, #20
223	orr	v5.16b, v5.16b, v17.16b
224	zip1	v17.2d, v18.2d, v2.2d
225	zip2	v2.4s, v2.4s, v18.4s
226	add	v6.4s, v6.4s, v5.4s
227	mov	v17.s[3], v16.s[3]
228	zip1	v18.4s, v2.4s, v16.4s
229	zip1	v2.4s, v16.4s, v2.4s
230	eor	v7.16b, v7.16b, v6.16b
231	ext	v6.16b, v6.16b, v6.16b, #12
232	ext	v16.16b, v2.16b, v18.16b, #8
233	tbl	v7.16b, { v7.16b }, v1.16b
234	add	v20.4s, v4.4s, v7.4s
235	ext	v4.16b, v17.16b, v17.16b, #12
236	ext	v7.16b, v7.16b, v7.16b, #8
237	eor	v5.16b, v20.16b, v5.16b
238	uzp1	v4.4s, v17.4s, v4.4s
239	ushr	v17.4s, v5.4s, #7
240	shl	v5.4s, v5.4s, #25
241	add	v6.4s, v6.4s, v4.4s
242	orr	v5.16b, v5.16b, v17.16b
243	ext	v17.16b, v20.16b, v20.16b, #4
244	add	v6.4s, v6.4s, v5.4s
245	eor	v7.16b, v7.16b, v6.16b
246	add	v6.4s, v6.4s, v16.4s
247	tbl	v7.16b, { v7.16b }, v0.16b
248	add	v17.4s, v17.4s, v7.4s
249	eor	v5.16b, v17.16b, v5.16b
250	ushr	v2.4s, v5.4s, #12
251	shl	v5.4s, v5.4s, #20
252	orr	v2.16b, v5.16b, v2.16b
253	add	v5.4s, v6.4s, v2.4s
254	ext	v6.16b, v19.16b, v19.16b, #4
255	eor	v7.16b, v7.16b, v5.16b
256	uzp1	v18.4s, v6.4s, v6.4s
257	tbl	v7.16b, { v7.16b }, v1.16b
258	ext	v18.16b, v18.16b, v6.16b, #8
259	add	v17.4s, v17.4s, v7.4s
260	uzp2	v18.4s, v18.4s, v3.4s
261	ext	v7.16b, v7.16b, v7.16b, #8
262	eor	v2.16b, v17.16b, v2.16b
263	add	v5.4s, v5.4s, v18.4s
264	ext	v17.16b, v17.16b, v17.16b, #12
265	ushr	v19.4s, v2.4s, #7
266	shl	v2.4s, v2.4s, #25
267	ext	v5.16b, v5.16b, v5.16b, #4
268	orr	v2.16b, v2.16b, v19.16b
269	ext	v19.16b, v6.16b, v6.16b, #12
270	add	v5.4s, v5.4s, v2.4s
271	ext	v6.16b, v6.16b, v19.16b, #12
272	mov	v19.16b, v16.16b
273	eor	v7.16b, v7.16b, v5.16b
274	rev64	v6.4s, v6.4s
275	mov	v19.s[1], v4.s[2]
276	tbl	v7.16b, { v7.16b }, v0.16b
277	add	v17.4s, v17.4s, v7.4s
278	eor	v20.16b, v17.16b, v2.16b
279	trn2	v2.4s, v6.4s, v19.4s
280	ushr	v6.4s, v20.4s, #12
281	shl	v19.4s, v20.4s, #20
282	add	v5.4s, v5.4s, v2.4s
283	orr	v6.16b, v19.16b, v6.16b
284	add	v19.4s, v5.4s, v6.4s
285	eor	v5.16b, v7.16b, v19.16b
286	zip1	v7.2d, v16.2d, v3.2d
287	zip2	v3.4s, v3.4s, v16.4s
288	tbl	v20.16b, { v5.16b }, v1.16b
289	mov	v7.s[3], v4.s[3]
290	add	v17.4s, v17.4s, v20.4s
291	ext	v5.16b, v7.16b, v7.16b, #12
292	eor	v6.16b, v17.16b, v6.16b
293	uzp1	v5.4s, v7.4s, v5.4s
294	ext	v7.16b, v19.16b, v19.16b, #12
295	ext	v17.16b, v17.16b, v17.16b, #4
296	ushr	v19.4s, v6.4s, #7
297	shl	v6.4s, v6.4s, #25
298	add	v7.4s, v7.4s, v5.4s
299	orr	v6.16b, v6.16b, v19.16b
300	ext	v19.16b, v20.16b, v20.16b, #8
301	add	v7.4s, v7.4s, v6.4s
302	eor	v19.16b, v19.16b, v7.16b
303	tbl	v19.16b, { v19.16b }, v0.16b
304	add	v16.4s, v17.4s, v19.4s
305	zip1	v17.4s, v3.4s, v4.4s
306	zip1	v3.4s, v4.4s, v3.4s
307	eor	v4.16b, v16.16b, v6.16b
308	ext	v17.16b, v3.16b, v17.16b, #8
309	ushr	v3.4s, v4.4s, #12
310	shl	v4.4s, v4.4s, #20
311	add	v6.4s, v7.4s, v17.4s
312	orr	v3.16b, v4.16b, v3.16b
313	add	v4.4s, v6.4s, v3.4s
314	ext	v6.16b, v18.16b, v18.16b, #4
315	eor	v7.16b, v19.16b, v4.16b
316	uzp1	v18.4s, v6.4s, v6.4s
317	tbl	v7.16b, { v7.16b }, v1.16b
318	ext	v18.16b, v18.16b, v6.16b, #8
319	add	v16.4s, v16.4s, v7.4s
320	uzp2	v18.4s, v18.4s, v2.4s
321	ext	v7.16b, v7.16b, v7.16b, #8
322	eor	v3.16b, v16.16b, v3.16b
323	add	v4.4s, v4.4s, v18.4s
324	ext	v16.16b, v16.16b, v16.16b, #12
325	ushr	v19.4s, v3.4s, #7
326	shl	v3.4s, v3.4s, #25
327	ext	v4.16b, v4.16b, v4.16b, #4
328	orr	v3.16b, v3.16b, v19.16b
329	ext	v19.16b, v6.16b, v6.16b, #12
330	add	v4.4s, v4.4s, v3.4s
331	ext	v6.16b, v6.16b, v19.16b, #12
332	mov	v19.16b, v17.16b
333	eor	v7.16b, v7.16b, v4.16b
334	rev64	v6.4s, v6.4s
335	mov	v19.s[1], v5.s[2]
336	tbl	v7.16b, { v7.16b }, v0.16b
337	add	v16.4s, v16.4s, v7.4s
338	eor	v20.16b, v16.16b, v3.16b
339	trn2	v3.4s, v6.4s, v19.4s
340	ushr	v6.4s, v20.4s, #12
341	shl	v19.4s, v20.4s, #20
342	add	v4.4s, v4.4s, v3.4s
343	orr	v6.16b, v19.16b, v6.16b
344	zip1	v19.2d, v17.2d, v2.2d
345	zip2	v2.4s, v2.4s, v17.4s
346	add	v4.4s, v4.4s, v6.4s
347	mov	v19.s[3], v5.s[3]
348	zip1	v17.4s, v2.4s, v5.4s
349	zip1	v2.4s, v5.4s, v2.4s
350	eor	v7.16b, v7.16b, v4.16b
351	ext	v20.16b, v19.16b, v19.16b, #12
352	ext	v4.16b, v4.16b, v4.16b, #12
353	ext	v2.16b, v2.16b, v17.16b, #8
354	tbl	v7.16b, { v7.16b }, v1.16b
355	add	v16.4s, v16.4s, v7.4s
356	ext	v7.16b, v7.16b, v7.16b, #8
357	eor	v21.16b, v16.16b, v6.16b
358	uzp1	v6.4s, v19.4s, v20.4s
359	ext	v16.16b, v16.16b, v16.16b, #4
360	ushr	v19.4s, v21.4s, #7
361	shl	v20.4s, v21.4s, #25
362	add	v4.4s, v4.4s, v6.4s
363	orr	v19.16b, v20.16b, v19.16b
364	add	v4.4s, v4.4s, v19.4s
365	eor	v7.16b, v7.16b, v4.16b
366	add	v4.4s, v4.4s, v2.4s
367	tbl	v7.16b, { v7.16b }, v0.16b
368	add	v16.4s, v16.4s, v7.4s
369	eor	v5.16b, v16.16b, v19.16b
370	ushr	v17.4s, v5.4s, #12
371	shl	v5.4s, v5.4s, #20
372	orr	v5.16b, v5.16b, v17.16b
373	ext	v17.16b, v18.16b, v18.16b, #4
374	add	v4.4s, v4.4s, v5.4s
375	uzp1	v18.4s, v17.4s, v17.4s
376	eor	v7.16b, v7.16b, v4.16b
377	ext	v18.16b, v18.16b, v17.16b, #8
378	tbl	v7.16b, { v7.16b }, v1.16b
379	uzp2	v18.4s, v18.4s, v3.4s
380	add	v16.4s, v16.4s, v7.4s
381	add	v4.4s, v4.4s, v18.4s
382	ext	v7.16b, v7.16b, v7.16b, #8
383	eor	v5.16b, v16.16b, v5.16b
384	ext	v4.16b, v4.16b, v4.16b, #4
385	ext	v16.16b, v16.16b, v16.16b, #12
386	ushr	v19.4s, v5.4s, #7
387	shl	v5.4s, v5.4s, #25
388	orr	v5.16b, v5.16b, v19.16b
389	add	v19.4s, v4.4s, v5.4s
390	eor	v4.16b, v7.16b, v19.16b
391	ext	v7.16b, v17.16b, v17.16b, #12
392	tbl	v20.16b, { v4.16b }, v0.16b
393	ext	v4.16b, v17.16b, v7.16b, #12
394	mov	v7.16b, v2.16b
395	add	v16.4s, v16.4s, v20.4s
396	rev64	v4.4s, v4.4s
397	mov	v7.s[1], v6.s[2]
398	eor	v5.16b, v16.16b, v5.16b
399	trn2	v4.4s, v4.4s, v7.4s
400	ushr	v7.4s, v5.4s, #12
401	shl	v5.4s, v5.4s, #20
402	add	v17.4s, v19.4s, v4.4s
403	zip1	v19.2d, v2.2d, v3.2d
404	zip2	v2.4s, v3.4s, v2.4s
405	orr	v5.16b, v5.16b, v7.16b
406	mov	v19.s[3], v6.s[3]
407	add	v7.4s, v17.4s, v5.4s
408	eor	v17.16b, v20.16b, v7.16b
409	ext	v20.16b, v19.16b, v19.16b, #12
410	ext	v7.16b, v7.16b, v7.16b, #12
411	tbl	v17.16b, { v17.16b }, v1.16b
412	add	v16.4s, v16.4s, v17.4s
413	ext	v17.16b, v17.16b, v17.16b, #8
414	eor	v21.16b, v16.16b, v5.16b
415	uzp1	v5.4s, v19.4s, v20.4s
416	ext	v16.16b, v16.16b, v16.16b, #4
417	ushr	v19.4s, v21.4s, #7
418	shl	v20.4s, v21.4s, #25
419	add	v7.4s, v7.4s, v5.4s
420	orr	v19.16b, v20.16b, v19.16b
421	add	v7.4s, v7.4s, v19.4s
422	eor	v17.16b, v17.16b, v7.16b
423	tbl	v17.16b, { v17.16b }, v0.16b
424	add	v3.4s, v16.4s, v17.4s
425	zip1	v16.4s, v2.4s, v6.4s
426	zip1	v2.4s, v6.4s, v2.4s
427	eor	v6.16b, v3.16b, v19.16b
428	ext	v16.16b, v2.16b, v16.16b, #8
429	ushr	v2.4s, v6.4s, #12
430	shl	v6.4s, v6.4s, #20
431	add	v7.4s, v7.4s, v16.4s
432	orr	v2.16b, v6.16b, v2.16b
433	add	v6.4s, v7.4s, v2.4s
434	ext	v7.16b, v18.16b, v18.16b, #4
435	eor	v17.16b, v17.16b, v6.16b
436	uzp1	v18.4s, v7.4s, v7.4s
437	tbl	v17.16b, { v17.16b }, v1.16b
438	ext	v18.16b, v18.16b, v7.16b, #8
439	add	v3.4s, v3.4s, v17.4s
440	uzp2	v18.4s, v18.4s, v4.4s
441	eor	v2.16b, v3.16b, v2.16b
442	add	v6.4s, v6.4s, v18.4s
443	ext	v3.16b, v3.16b, v3.16b, #12
444	ext	v18.16b, v18.16b, v18.16b, #4
445	ushr	v19.4s, v2.4s, #7
446	shl	v2.4s, v2.4s, #25
447	ext	v6.16b, v6.16b, v6.16b, #4
448	orr	v19.16b, v2.16b, v19.16b
449	ext	v2.16b, v17.16b, v17.16b, #8
450	ext	v17.16b, v7.16b, v7.16b, #12
451	add	v6.4s, v6.4s, v19.4s
452	eor	v2.16b, v2.16b, v6.16b
453	tbl	v20.16b, { v2.16b }, v0.16b
454	ext	v2.16b, v7.16b, v17.16b, #12
455	mov	v7.16b, v16.16b
456	add	v17.4s, v3.4s, v20.4s
457	rev64	v3.4s, v2.4s
458	mov	v7.s[1], v5.s[2]
459	eor	v19.16b, v17.16b, v19.16b
460	trn2	v3.4s, v3.4s, v7.4s
461	ushr	v21.4s, v19.4s, #12
462	shl	v19.4s, v19.4s, #20
463	add	v6.4s, v6.4s, v3.4s
464	orr	v19.16b, v19.16b, v21.16b
465	add	v21.4s, v6.4s, v19.4s
466	eor	v6.16b, v20.16b, v21.16b
467	zip1	v20.2d, v16.2d, v4.2d
468	zip2	v4.4s, v4.4s, v16.4s
469	tbl	v22.16b, { v6.16b }, v1.16b
470	mov	v20.s[3], v5.s[3]
471	add	v17.4s, v17.4s, v22.4s
472	ext	v6.16b, v20.16b, v20.16b, #12
473	eor	v19.16b, v17.16b, v19.16b
474	uzp1	v6.4s, v20.4s, v6.4s
475	ext	v20.16b, v21.16b, v21.16b, #12
476	ext	v17.16b, v17.16b, v17.16b, #4
477	ushr	v21.4s, v19.4s, #7
478	shl	v19.4s, v19.4s, #25
479	add	v20.4s, v20.4s, v6.4s
480	orr	v19.16b, v19.16b, v21.16b
481	ext	v21.16b, v22.16b, v22.16b, #8
482	add	v20.4s, v20.4s, v19.4s
483	eor	v21.16b, v21.16b, v20.16b
484	tbl	v21.16b, { v21.16b }, v0.16b
485	add	v16.4s, v17.4s, v21.4s
486	zip1	v17.4s, v4.4s, v5.4s
487	zip1	v4.4s, v5.4s, v4.4s
488	eor	v5.16b, v16.16b, v19.16b
489	ext	v4.16b, v4.16b, v17.16b, #8
490	ushr	v17.4s, v5.4s, #12
491	shl	v5.4s, v5.4s, #20
492	add	v19.4s, v20.4s, v4.4s
493	ext	v20.16b, v18.16b, v18.16b, #8
494	zip1	v3.2d, v4.2d, v3.2d
495	orr	v5.16b, v5.16b, v17.16b
496	zip2	v2.4s, v2.4s, v4.4s
497	uzp2	v7.4s, v20.4s, v7.4s
498	mov	v3.s[3], v6.s[3]
499	add	v17.4s, v19.4s, v5.4s
500	ext	v7.16b, v7.16b, v20.16b, #4
501	eor	v19.16b, v21.16b, v17.16b
502	ext	v17.16b, v17.16b, v17.16b, #4
503	tbl	v19.16b, { v19.16b }, v1.16b
504	add	v7.4s, v17.4s, v7.4s
505	add	v16.4s, v16.4s, v19.4s
506	ext	v17.16b, v19.16b, v19.16b, #8
507	ext	v19.16b, v18.16b, v18.16b, #12
508	eor	v5.16b, v16.16b, v5.16b
509	ext	v16.16b, v16.16b, v16.16b, #12
510	ext	v18.16b, v18.16b, v19.16b, #12
511	mov	v19.16b, v4.16b
512	ushr	v20.4s, v5.4s, #7
513	shl	v5.4s, v5.4s, #25
514	rev64	v18.4s, v18.4s
515	mov	v19.s[1], v6.s[2]
516	orr	v5.16b, v5.16b, v20.16b
517	trn2	v18.4s, v18.4s, v19.4s
518	add	v7.4s, v5.4s, v7.4s
519	eor	v17.16b, v17.16b, v7.16b
520	add	v7.4s, v7.4s, v18.4s
521	ext	v18.16b, v3.16b, v3.16b, #12
522	tbl	v17.16b, { v17.16b }, v0.16b
523	uzp1	v3.4s, v3.4s, v18.4s
524	add	v16.4s, v16.4s, v17.4s
525	eor	v5.16b, v16.16b, v5.16b
526	ushr	v19.4s, v5.4s, #12
527	shl	v5.4s, v5.4s, #20
528	orr	v5.16b, v5.16b, v19.16b
529	add	v7.4s, v7.4s, v5.4s
530	eor	v17.16b, v17.16b, v7.16b
531	ext	v7.16b, v7.16b, v7.16b, #12
532	tbl	v17.16b, { v17.16b }, v1.16b
533	add	v3.4s, v7.4s, v3.4s
534	add	v16.4s, v16.4s, v17.4s
535	ext	v7.16b, v17.16b, v17.16b, #8
536	eor	v5.16b, v16.16b, v5.16b
537	ext	v16.16b, v16.16b, v16.16b, #4
538	ushr	v18.4s, v5.4s, #7
539	shl	v5.4s, v5.4s, #25
540	orr	v5.16b, v5.16b, v18.16b
541	add	v3.4s, v3.4s, v5.4s
542	eor	v7.16b, v7.16b, v3.16b
543	tbl	v0.16b, { v7.16b }, v0.16b
544	zip1	v7.4s, v2.4s, v6.4s
545	zip1	v2.4s, v6.4s, v2.4s
546	add	v4.4s, v16.4s, v0.4s
547	ext	v2.16b, v2.16b, v7.16b, #8
548	eor	v5.16b, v4.16b, v5.16b
549	add	v2.4s, v3.4s, v2.4s
550	ushr	v6.4s, v5.4s, #12
551	shl	v5.4s, v5.4s, #20
552	orr	v3.16b, v5.16b, v6.16b
553	add	v2.4s, v2.4s, v3.4s
554	eor	v0.16b, v0.16b, v2.16b
555	ext	v2.16b, v2.16b, v2.16b, #4
556	tbl	v0.16b, { v0.16b }, v1.16b
557	add	v1.4s, v4.4s, v0.4s
558	ext	v0.16b, v0.16b, v0.16b, #8
559	eor	v3.16b, v1.16b, v3.16b
560	ext	v1.16b, v1.16b, v1.16b, #12
561	ushr	v4.4s, v3.4s, #7
562	shl	v3.4s, v3.4s, #25
563	stp	q1, q0, [x0, #32]
564	orr	v3.16b, v3.16b, v4.16b
565	stp	q2, q3, [x0]
566	ret
567.Lfunc_end1:
568	.size	compress_pre, .Lfunc_end1-compress_pre
569	.cfi_endproc
570
571	.globl	zfs_blake3_compress_xof_sse41
572	.p2align	2
573	.type	zfs_blake3_compress_xof_sse41,@function
574zfs_blake3_compress_xof_sse41:
575	.cfi_startproc
576	hint	#25
577	CFI_NEGATE_RA_STATE
578	sub	sp, sp, #96
579	stp	x29, x30, [sp, #64]
580	add	x29, sp, #64
581	stp	x20, x19, [sp, #80]
582	.cfi_def_cfa w29, 32
583	.cfi_offset w19, -8
584	.cfi_offset w20, -16
585	.cfi_offset w30, -24
586	.cfi_offset w29, -32
587	mov	x20, x0
588	mov	x19, x5
589	mov	w5, w4
590	mov	x4, x3
591	mov	w3, w2
592	mov	x2, x1
593	mov	x0, sp
594	mov	x1, x20
595	bl	compress_pre
596	ldp	q0, q1, [sp]
597	ldp	q2, q3, [sp, #32]
598	eor	v0.16b, v2.16b, v0.16b
599	eor	v1.16b, v3.16b, v1.16b
600	ldp	x29, x30, [sp, #64]
601	stp	q0, q1, [x19]
602	ldr	q0, [x20]
603	eor	v0.16b, v0.16b, v2.16b
604	str	q0, [x19, #32]
605	ldr	q0, [x20, #16]
606	eor	v0.16b, v0.16b, v3.16b
607	str	q0, [x19, #48]
608	ldp	x20, x19, [sp, #80]
609	add	sp, sp, #96
610	hint	#29
611	ret
612.Lfunc_end2:
613	.size	zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41
614	.cfi_endproc
615
616	.section	.rodata.cst16,"aM",@progbits,16
617	.p2align	4
618.LCPI3_0:
619	.word	0
620	.word	1
621	.word	2
622	.word	3
623.LCPI3_1:
624	.byte	2
625	.byte	3
626	.byte	0
627	.byte	1
628	.byte	6
629	.byte	7
630	.byte	4
631	.byte	5
632	.byte	10
633	.byte	11
634	.byte	8
635	.byte	9
636	.byte	14
637	.byte	15
638	.byte	12
639	.byte	13
640.LCPI3_2:
641	.byte	1
642	.byte	2
643	.byte	3
644	.byte	0
645	.byte	5
646	.byte	6
647	.byte	7
648	.byte	4
649	.byte	9
650	.byte	10
651	.byte	11
652	.byte	8
653	.byte	13
654	.byte	14
655	.byte	15
656	.byte	12
657.LCPI3_3:
658	.word	1779033703
659	.word	3144134277
660	.word	1013904242
661	.word	2773480762
662	.text
663	.globl	zfs_blake3_hash_many_sse41
664	.p2align	2
665	.type	zfs_blake3_hash_many_sse41,@function
666zfs_blake3_hash_many_sse41:
667	.cfi_startproc
668	hint	#34
669	stp	d15, d14, [sp, #-144]!
670	stp	d13, d12, [sp, #16]
671	stp	d11, d10, [sp, #32]
672	stp	d9, d8, [sp, #48]
673	stp	x29, x27, [sp, #64]
674	stp	x26, x25, [sp, #80]
675	stp	x24, x23, [sp, #96]
676	stp	x22, x21, [sp, #112]
677	stp	x20, x19, [sp, #128]
678	sub	sp, sp, #368
679	.cfi_def_cfa_offset 512
680	.cfi_offset w19, -8
681	.cfi_offset w20, -16
682	.cfi_offset w21, -24
683	.cfi_offset w22, -32
684	.cfi_offset w23, -40
685	.cfi_offset w24, -48
686	.cfi_offset w25, -56
687	.cfi_offset w26, -64
688	.cfi_offset w27, -72
689	.cfi_offset w29, -80
690	.cfi_offset b8, -88
691	.cfi_offset b9, -96
692	.cfi_offset b10, -104
693	.cfi_offset b11, -112
694	.cfi_offset b12, -120
695	.cfi_offset b13, -128
696	.cfi_offset b14, -136
697	.cfi_offset b15, -144
698	ldr	x8, [sp, #520]
699	adrp	x11, .LCPI3_1
700	ldrb	w9, [sp, #512]
701	adrp	x10, .LCPI3_2
702	cmp	x1, #4
703	b.lo	.LBB3_6
704	adrp	x12, .LCPI3_0
705	sbfx	w13, w5, #0, #1
706	mov	w15, #58983
707	mov	w16, #44677
708	movk	w15, #27145, lsl #16
709	movk	w16, #47975, lsl #16
710	ldr	q0, [x12, :lo12:.LCPI3_0]
711	dup	v1.4s, w13
712	movi	v13.4s, #64
713	mov	w13, #62322
714	mov	w14, #62778
715	orr	w12, w7, w6
716	and	v0.16b, v1.16b, v0.16b
717	ldr	q1, [x11, :lo12:.LCPI3_1]
718	movk	w13, #15470, lsl #16
719	movk	w14, #42319, lsl #16
720	dup	v14.4s, w15
721	stp	q0, q1, [sp, #16]
722	orr	v0.4s, #128, lsl #24
723	str	q0, [sp]
724	dup	v0.4s, w16
725	stp	q0, q14, [sp, #48]
726	b	.LBB3_3
727.LBB3_2:
728	zip1	v0.4s, v29.4s, v8.4s
729	add	x15, x4, #4
730	zip1	v1.4s, v30.4s, v31.4s
731	tst	w5, #0x1
732	zip1	v2.4s, v24.4s, v18.4s
733	csel	x4, x15, x4, ne
734	zip1	v3.4s, v25.4s, v26.4s
735	add	x0, x0, #32
736	zip2	v6.4s, v29.4s, v8.4s
737	sub	x1, x1, #4
738	zip1	v4.2d, v0.2d, v1.2d
739	cmp	x1, #3
740	zip2	v7.4s, v30.4s, v31.4s
741	zip1	v5.2d, v2.2d, v3.2d
742	zip2	v0.2d, v0.2d, v1.2d
743	zip2	v1.2d, v2.2d, v3.2d
744	zip2	v2.4s, v24.4s, v18.4s
745	zip2	v3.4s, v25.4s, v26.4s
746	stp	q4, q5, [x8]
747	zip2	v4.2d, v6.2d, v7.2d
748	stp	q0, q1, [x8, #32]
749	zip1	v0.2d, v6.2d, v7.2d
750	zip1	v1.2d, v2.2d, v3.2d
751	zip2	v2.2d, v2.2d, v3.2d
752	stp	q0, q1, [x8, #64]
753	stp	q4, q2, [x8, #96]
754	add	x8, x8, #128
755	b.ls	.LBB3_6
756.LBB3_3:
757	mov	x15, x3
758	add	x16, x3, #8
759	add	x17, x3, #12
760	add	x19, x3, #16
761	add	x20, x3, #20
762	ld1r	{ v29.4s }, [x15], #4
763	ld1r	{ v30.4s }, [x16]
764	add	x16, x3, #24
765	ld1r	{ v31.4s }, [x17]
766	add	x17, x3, #28
767	ld1r	{ v24.4s }, [x19]
768	ld1r	{ v18.4s }, [x20]
769	ld1r	{ v25.4s }, [x16]
770	ld1r	{ v8.4s }, [x15]
771	ld1r	{ v26.4s }, [x17]
772	cbz	x2, .LBB3_2
773	ldr	q1, [sp, #16]
774	dup	v0.4s, w4
775	lsr	x17, x4, #32
776	mov	x15, xzr
777	ldp	x19, x20, [x0, #16]
778	add	v1.4s, v0.4s, v1.4s
779	mov	x21, x2
780	movi	v0.4s, #128, lsl #24
781	mov	w26, w12
782	str	q1, [sp, #96]
783	eor	v0.16b, v1.16b, v0.16b
784	ldr	q1, [sp]
785	cmgt	v0.4s, v1.4s, v0.4s
786	dup	v1.4s, w17
787	ldp	x16, x17, [x0]
788	sub	v0.4s, v1.4s, v0.4s
789	str	q0, [sp, #80]
790.LBB3_5:
791	add	x23, x16, x15
792	add	x24, x17, x15
793	add	x22, x19, x15
794	add	x25, x20, x15
795	subs	x21, x21, #1
796	add	x15, x15, #64
797	ldp	q1, q2, [x23]
798	csel	w27, w9, wzr, eq
799	orr	w26, w27, w26
800	and	w26, w26, #0xff
801	ldp	q4, q5, [x24]
802	dup	v0.4s, w26
803	mov	w26, w6
804	zip1	v22.4s, v1.4s, v4.4s
805	zip2	v20.4s, v1.4s, v4.4s
806	ldp	q6, q7, [x22]
807	zip1	v17.4s, v2.4s, v5.4s
808	zip2	v23.4s, v2.4s, v5.4s
809	ldp	q16, q21, [x25]
810	zip1	v19.4s, v6.4s, v16.4s
811	zip2	v1.4s, v6.4s, v16.4s
812	ldp	q27, q28, [x23, #32]
813	zip1	v4.4s, v7.4s, v21.4s
814	zip2	v5.4s, v7.4s, v21.4s
815	zip2	v15.2d, v17.2d, v4.2d
816	ldp	q9, q10, [x24, #32]
817	mov	v17.d[1], v4.d[0]
818	add	v4.4s, v30.4s, v25.4s
819	zip2	v11.2d, v23.2d, v5.2d
820	zip2	v3.4s, v27.4s, v9.4s
821	zip1	v7.4s, v27.4s, v9.4s
822	ldp	q12, q6, [x22, #32]
823	mov	v23.d[1], v5.d[0]
824	stp	q11, q3, [sp, #256]
825	add	v5.4s, v31.4s, v26.4s
826	add	v4.4s, v4.4s, v17.4s
827	str	q23, [sp, #352]
828	ldp	q16, q2, [x25, #32]
829	add	v5.4s, v5.4s, v23.4s
830	zip1	v3.4s, v12.4s, v16.4s
831	eor	v0.16b, v5.16b, v0.16b
832	zip1	v9.4s, v6.4s, v2.4s
833	zip2	v2.4s, v6.4s, v2.4s
834	stp	q7, q3, [sp, #208]
835	zip2	v3.4s, v12.4s, v16.4s
836	zip1	v12.4s, v28.4s, v10.4s
837	zip2	v10.4s, v28.4s, v10.4s
838	stp	q17, q2, [sp, #160]
839	zip2	v28.2d, v22.2d, v19.2d
840	mov	v22.d[1], v19.d[0]
841	str	q3, [sp, #240]
842	add	v2.4s, v8.4s, v18.4s
843	eor	v16.16b, v4.16b, v13.16b
844	dup	v17.4s, w13
845	mov	v3.16b, v22.16b
846	stp	q22, q28, [sp, #320]
847	zip2	v22.2d, v20.2d, v1.2d
848	mov	v20.d[1], v1.d[0]
849	add	v1.4s, v29.4s, v24.4s
850	add	v4.4s, v4.4s, v15.4s
851	add	v5.4s, v5.4s, v11.4s
852	add	v2.4s, v2.4s, v20.4s
853	stp	q15, q20, [sp, #288]
854	add	v1.4s, v1.4s, v3.4s
855	ldr	q3, [sp, #96]
856	dup	v20.4s, w14
857	mov	v23.16b, v22.16b
858	mov	v15.16b, v10.16b
859	eor	v6.16b, v1.16b, v3.16b
860	ldr	q3, [sp, #80]
861	add	v1.4s, v1.4s, v28.4s
862	ldr	q28, [sp, #272]
863	str	q23, [sp, #128]
864	eor	v7.16b, v2.16b, v3.16b
865	ldp	q27, q3, [sp, #32]
866	add	v2.4s, v2.4s, v22.4s
867	tbl	v6.16b, { v6.16b }, v27.16b
868	tbl	v7.16b, { v7.16b }, v27.16b
869	tbl	v16.16b, { v16.16b }, v27.16b
870	tbl	v0.16b, { v0.16b }, v27.16b
871	add	v19.4s, v6.4s, v14.4s
872	add	v21.4s, v7.4s, v3.4s
873	add	v30.4s, v16.4s, v17.4s
874	add	v31.4s, v0.4s, v20.4s
875	eor	v24.16b, v19.16b, v24.16b
876	eor	v17.16b, v21.16b, v18.16b
877	ushr	v18.4s, v24.4s, #12
878	shl	v20.4s, v24.4s, #20
879	eor	v24.16b, v30.16b, v25.16b
880	eor	v25.16b, v31.16b, v26.16b
881	ushr	v26.4s, v17.4s, #12
882	shl	v17.4s, v17.4s, #20
883	ushr	v29.4s, v24.4s, #12
884	shl	v24.4s, v24.4s, #20
885	ushr	v8.4s, v25.4s, #12
886	shl	v25.4s, v25.4s, #20
887	orr	v3.16b, v20.16b, v18.16b
888	ldr	q18, [x10, :lo12:.LCPI3_2]
889	orr	v13.16b, v17.16b, v26.16b
890	orr	v24.16b, v24.16b, v29.16b
891	orr	v14.16b, v25.16b, v8.16b
892	add	v8.4s, v1.4s, v3.4s
893	add	v29.4s, v2.4s, v13.4s
894	add	v17.4s, v4.4s, v24.4s
895	add	v20.4s, v5.4s, v14.4s
896	eor	v1.16b, v6.16b, v8.16b
897	eor	v2.16b, v7.16b, v29.16b
898	eor	v4.16b, v16.16b, v17.16b
899	eor	v0.16b, v0.16b, v20.16b
900	tbl	v25.16b, { v1.16b }, v18.16b
901	tbl	v16.16b, { v2.16b }, v18.16b
902	tbl	v6.16b, { v4.16b }, v18.16b
903	tbl	v4.16b, { v0.16b }, v18.16b
904	add	v19.4s, v19.4s, v25.4s
905	add	v21.4s, v21.4s, v16.4s
906	add	v26.4s, v30.4s, v6.4s
907	add	v7.4s, v31.4s, v4.4s
908	eor	v0.16b, v19.16b, v3.16b
909	eor	v1.16b, v21.16b, v13.16b
910	eor	v2.16b, v26.16b, v24.16b
911	eor	v3.16b, v7.16b, v14.16b
912	ushr	v5.4s, v0.4s, #7
913	shl	v0.4s, v0.4s, #25
914	ushr	v24.4s, v1.4s, #7
915	shl	v1.4s, v1.4s, #25
916	ushr	v30.4s, v2.4s, #7
917	shl	v2.4s, v2.4s, #25
918	orr	v5.16b, v0.16b, v5.16b
919	orr	v0.16b, v1.16b, v24.16b
920	ushr	v31.4s, v3.4s, #7
921	orr	v2.16b, v2.16b, v30.16b
922	ldp	q24, q30, [sp, #208]
923	shl	v3.4s, v3.4s, #25
924	zip2	v14.2d, v12.2d, v9.2d
925	mov	v22.16b, v24.16b
926	orr	v1.16b, v3.16b, v31.16b
927	zip2	v3.2d, v24.2d, v30.2d
928	mov	v24.16b, v28.16b
929	mov	v22.d[1], v30.d[0]
930	ldr	q30, [sp, #240]
931	mov	v31.16b, v12.16b
932	stp	q22, q14, [sp, #224]
933	mov	v24.d[1], v30.d[0]
934	add	v12.4s, v8.4s, v22.4s
935	mov	v31.d[1], v9.d[0]
936	add	v22.4s, v29.4s, v24.4s
937	ldr	q29, [sp, #176]
938	zip2	v28.2d, v28.2d, v30.2d
939	mov	v9.16b, v24.16b
940	mov	v15.d[1], v29.d[0]
941	zip2	v8.2d, v10.2d, v29.2d
942	add	v10.4s, v12.4s, v0.4s
943	add	v22.4s, v22.4s, v2.4s
944	str	q9, [sp, #144]
945	add	v20.4s, v20.4s, v15.4s
946	add	v17.4s, v17.4s, v31.4s
947	stp	q3, q8, [sp, #192]
948	eor	v4.16b, v4.16b, v10.16b
949	eor	v25.16b, v25.16b, v22.16b
950	add	v20.4s, v20.4s, v5.4s
951	add	v17.4s, v17.4s, v1.4s
952	tbl	v4.16b, { v4.16b }, v27.16b
953	tbl	v25.16b, { v25.16b }, v27.16b
954	eor	v6.16b, v6.16b, v20.16b
955	eor	v16.16b, v16.16b, v17.16b
956	add	v26.4s, v26.4s, v4.4s
957	add	v7.4s, v7.4s, v25.4s
958	tbl	v6.16b, { v6.16b }, v27.16b
959	tbl	v16.16b, { v16.16b }, v27.16b
960	eor	v0.16b, v26.16b, v0.16b
961	eor	v2.16b, v7.16b, v2.16b
962	add	v21.4s, v21.4s, v6.4s
963	add	v19.4s, v19.4s, v16.4s
964	ushr	v12.4s, v0.4s, #12
965	shl	v0.4s, v0.4s, #20
966	ushr	v13.4s, v2.4s, #12
967	shl	v2.4s, v2.4s, #20
968	eor	v5.16b, v21.16b, v5.16b
969	eor	v1.16b, v19.16b, v1.16b
970	orr	v0.16b, v0.16b, v12.16b
971	add	v10.4s, v10.4s, v3.4s
972	orr	v2.16b, v2.16b, v13.16b
973	ushr	v13.4s, v5.4s, #12
974	shl	v5.4s, v5.4s, #20
975	add	v22.4s, v22.4s, v28.4s
976	ushr	v12.4s, v1.4s, #12
977	shl	v1.4s, v1.4s, #20
978	add	v10.4s, v10.4s, v0.4s
979	orr	v5.16b, v5.16b, v13.16b
980	add	v22.4s, v22.4s, v2.4s
981	add	v20.4s, v20.4s, v8.4s
982	orr	v1.16b, v1.16b, v12.16b
983	add	v17.4s, v17.4s, v14.4s
984	eor	v4.16b, v4.16b, v10.16b
985	eor	v25.16b, v25.16b, v22.16b
986	add	v20.4s, v20.4s, v5.4s
987	add	v17.4s, v17.4s, v1.4s
988	tbl	v4.16b, { v4.16b }, v18.16b
989	tbl	v25.16b, { v25.16b }, v18.16b
990	eor	v6.16b, v6.16b, v20.16b
991	eor	v16.16b, v16.16b, v17.16b
992	add	v26.4s, v26.4s, v4.4s
993	add	v7.4s, v7.4s, v25.4s
994	tbl	v6.16b, { v6.16b }, v18.16b
995	tbl	v16.16b, { v16.16b }, v18.16b
996	eor	v0.16b, v26.16b, v0.16b
997	eor	v2.16b, v7.16b, v2.16b
998	add	v21.4s, v21.4s, v6.4s
999	add	v19.4s, v19.4s, v16.4s
1000	ushr	v12.4s, v0.4s, #7
1001	shl	v0.4s, v0.4s, #25
1002	ushr	v13.4s, v2.4s, #7
1003	shl	v2.4s, v2.4s, #25
1004	eor	v5.16b, v21.16b, v5.16b
1005	eor	v1.16b, v19.16b, v1.16b
1006	orr	v0.16b, v0.16b, v12.16b
1007	add	v22.4s, v22.4s, v23.4s
1008	orr	v2.16b, v2.16b, v13.16b
1009	ushr	v13.4s, v5.4s, #7
1010	shl	v5.4s, v5.4s, #25
1011	add	v17.4s, v17.4s, v11.4s
1012	mov	v30.16b, v28.16b
1013	mov	v28.16b, v23.16b
1014	ldr	q23, [sp, #304]
1015	ushr	v12.4s, v1.4s, #7
1016	shl	v1.4s, v1.4s, #25
1017	add	v22.4s, v22.4s, v0.4s
1018	mov	v29.16b, v31.16b
1019	ldr	q31, [sp, #160]
1020	orr	v5.16b, v5.16b, v13.16b
1021	add	v17.4s, v17.4s, v2.4s
1022	add	v10.4s, v10.4s, v23.4s
1023	orr	v1.16b, v1.16b, v12.16b
1024	str	q29, [sp, #272]
1025	eor	v16.16b, v16.16b, v22.16b
1026	add	v20.4s, v20.4s, v31.4s
1027	eor	v6.16b, v6.16b, v17.16b
1028	add	v10.4s, v10.4s, v5.4s
1029	tbl	v16.16b, { v16.16b }, v27.16b
1030	add	v20.4s, v20.4s, v1.4s
1031	tbl	v6.16b, { v6.16b }, v27.16b
1032	eor	v25.16b, v25.16b, v10.16b
1033	add	v21.4s, v21.4s, v16.4s
1034	eor	v4.16b, v4.16b, v20.16b
1035	add	v26.4s, v26.4s, v6.4s
1036	tbl	v25.16b, { v25.16b }, v27.16b
1037	eor	v0.16b, v21.16b, v0.16b
1038	tbl	v4.16b, { v4.16b }, v27.16b
1039	eor	v2.16b, v26.16b, v2.16b
1040	add	v19.4s, v19.4s, v25.4s
1041	ushr	v12.4s, v0.4s, #12
1042	shl	v0.4s, v0.4s, #20
1043	add	v7.4s, v7.4s, v4.4s
1044	ushr	v13.4s, v2.4s, #12
1045	shl	v2.4s, v2.4s, #20
1046	eor	v5.16b, v5.16b, v19.16b
1047	add	v22.4s, v22.4s, v24.4s
1048	ldr	q24, [sp, #320]
1049	orr	v0.16b, v0.16b, v12.16b
1050	eor	v1.16b, v7.16b, v1.16b
1051	orr	v2.16b, v2.16b, v13.16b
1052	ushr	v12.4s, v5.4s, #12
1053	shl	v5.4s, v5.4s, #20
1054	add	v17.4s, v17.4s, v24.4s
1055	ldr	q24, [sp, #352]
1056	ushr	v13.4s, v1.4s, #12
1057	shl	v1.4s, v1.4s, #20
1058	add	v22.4s, v22.4s, v0.4s
1059	orr	v5.16b, v5.16b, v12.16b
1060	add	v17.4s, v17.4s, v2.4s
1061	add	v10.4s, v10.4s, v24.4s
1062	ldr	q24, [sp, #336]
1063	orr	v1.16b, v1.16b, v13.16b
1064	eor	v16.16b, v16.16b, v22.16b
1065	add	v20.4s, v20.4s, v14.4s
1066	eor	v6.16b, v6.16b, v17.16b
1067	add	v10.4s, v10.4s, v5.4s
1068	tbl	v16.16b, { v16.16b }, v18.16b
1069	add	v20.4s, v20.4s, v1.4s
1070	tbl	v6.16b, { v6.16b }, v18.16b
1071	eor	v25.16b, v25.16b, v10.16b
1072	add	v21.4s, v21.4s, v16.4s
1073	eor	v4.16b, v4.16b, v20.16b
1074	add	v26.4s, v26.4s, v6.4s
1075	tbl	v25.16b, { v25.16b }, v18.16b
1076	eor	v0.16b, v21.16b, v0.16b
1077	tbl	v4.16b, { v4.16b }, v18.16b
1078	eor	v2.16b, v26.16b, v2.16b
1079	add	v19.4s, v19.4s, v25.4s
1080	ushr	v12.4s, v0.4s, #7
1081	shl	v0.4s, v0.4s, #25
1082	add	v7.4s, v7.4s, v4.4s
1083	ushr	v13.4s, v2.4s, #7
1084	shl	v2.4s, v2.4s, #25
1085	eor	v5.16b, v19.16b, v5.16b
1086	orr	v0.16b, v0.16b, v12.16b
1087	eor	v1.16b, v7.16b, v1.16b
1088	add	v10.4s, v10.4s, v24.4s
1089	orr	v2.16b, v2.16b, v13.16b
1090	ushr	v12.4s, v5.4s, #7
1091	shl	v5.4s, v5.4s, #25
1092	add	v22.4s, v22.4s, v29.4s
1093	ushr	v13.4s, v1.4s, #7
1094	shl	v1.4s, v1.4s, #25
1095	add	v10.4s, v10.4s, v0.4s
1096	orr	v5.16b, v5.16b, v12.16b
1097	add	v22.4s, v22.4s, v2.4s
1098	add	v20.4s, v20.4s, v8.4s
1099	ldr	q8, [sp, #288]
1100	orr	v1.16b, v1.16b, v13.16b
1101	add	v17.4s, v17.4s, v3.4s
1102	ldr	q3, [sp, #352]
1103	eor	v4.16b, v4.16b, v10.16b
1104	eor	v25.16b, v25.16b, v22.16b
1105	add	v20.4s, v20.4s, v5.4s
1106	add	v17.4s, v17.4s, v1.4s
1107	tbl	v4.16b, { v4.16b }, v27.16b
1108	tbl	v25.16b, { v25.16b }, v27.16b
1109	eor	v6.16b, v6.16b, v20.16b
1110	eor	v16.16b, v16.16b, v17.16b
1111	add	v26.4s, v26.4s, v4.4s
1112	add	v7.4s, v7.4s, v25.4s
1113	tbl	v6.16b, { v6.16b }, v27.16b
1114	tbl	v16.16b, { v16.16b }, v27.16b
1115	eor	v0.16b, v26.16b, v0.16b
1116	eor	v2.16b, v7.16b, v2.16b
1117	add	v21.4s, v21.4s, v6.4s
1118	add	v19.4s, v19.4s, v16.4s
1119	ushr	v12.4s, v0.4s, #12
1120	shl	v0.4s, v0.4s, #20
1121	ushr	v13.4s, v2.4s, #12
1122	shl	v2.4s, v2.4s, #20
1123	eor	v5.16b, v21.16b, v5.16b
1124	eor	v1.16b, v19.16b, v1.16b
1125	orr	v0.16b, v0.16b, v12.16b
1126	add	v10.4s, v10.4s, v30.4s
1127	orr	v2.16b, v2.16b, v13.16b
1128	ushr	v13.4s, v5.4s, #12
1129	shl	v5.4s, v5.4s, #20
1130	add	v22.4s, v22.4s, v8.4s
1131	mov	v24.16b, v30.16b
1132	mov	v30.16b, v15.16b
1133	add	v17.4s, v17.4s, v15.4s
1134	ldr	q15, [sp, #224]
1135	ushr	v12.4s, v1.4s, #12
1136	shl	v1.4s, v1.4s, #20
1137	add	v10.4s, v10.4s, v0.4s
1138	str	q30, [sp, #176]
1139	orr	v5.16b, v5.16b, v13.16b
1140	add	v22.4s, v22.4s, v2.4s
1141	add	v20.4s, v20.4s, v15.4s
1142	orr	v1.16b, v1.16b, v12.16b
1143	eor	v4.16b, v4.16b, v10.16b
1144	eor	v25.16b, v25.16b, v22.16b
1145	add	v20.4s, v20.4s, v5.4s
1146	add	v17.4s, v17.4s, v1.4s
1147	tbl	v4.16b, { v4.16b }, v18.16b
1148	tbl	v25.16b, { v25.16b }, v18.16b
1149	eor	v6.16b, v6.16b, v20.16b
1150	eor	v16.16b, v16.16b, v17.16b
1151	add	v26.4s, v26.4s, v4.4s
1152	add	v7.4s, v7.4s, v25.4s
1153	tbl	v6.16b, { v6.16b }, v18.16b
1154	tbl	v16.16b, { v16.16b }, v18.16b
1155	eor	v0.16b, v26.16b, v0.16b
1156	eor	v2.16b, v7.16b, v2.16b
1157	add	v21.4s, v21.4s, v6.4s
1158	add	v19.4s, v19.4s, v16.4s
1159	ushr	v12.4s, v0.4s, #7
1160	shl	v0.4s, v0.4s, #25
1161	ushr	v13.4s, v2.4s, #7
1162	shl	v2.4s, v2.4s, #25
1163	eor	v5.16b, v21.16b, v5.16b
1164	eor	v1.16b, v19.16b, v1.16b
1165	orr	v0.16b, v0.16b, v12.16b
1166	add	v22.4s, v22.4s, v9.4s
1167	orr	v2.16b, v2.16b, v13.16b
1168	ushr	v13.4s, v5.4s, #7
1169	shl	v5.4s, v5.4s, #25
1170	add	v17.4s, v17.4s, v14.4s
1171	ushr	v12.4s, v1.4s, #7
1172	shl	v1.4s, v1.4s, #25
1173	add	v22.4s, v22.4s, v0.4s
1174	orr	v5.16b, v5.16b, v13.16b
1175	add	v17.4s, v17.4s, v2.4s
1176	add	v10.4s, v10.4s, v28.4s
1177	orr	v1.16b, v1.16b, v12.16b
1178	eor	v16.16b, v16.16b, v22.16b
1179	add	v20.4s, v20.4s, v11.4s
1180	eor	v6.16b, v6.16b, v17.16b
1181	add	v10.4s, v10.4s, v5.4s
1182	tbl	v16.16b, { v16.16b }, v27.16b
1183	add	v20.4s, v20.4s, v1.4s
1184	tbl	v6.16b, { v6.16b }, v27.16b
1185	eor	v25.16b, v25.16b, v10.16b
1186	add	v21.4s, v21.4s, v16.4s
1187	eor	v4.16b, v4.16b, v20.16b
1188	add	v26.4s, v26.4s, v6.4s
1189	tbl	v25.16b, { v25.16b }, v27.16b
1190	eor	v0.16b, v21.16b, v0.16b
1191	tbl	v4.16b, { v4.16b }, v27.16b
1192	eor	v2.16b, v26.16b, v2.16b
1193	add	v19.4s, v19.4s, v25.4s
1194	ushr	v12.4s, v0.4s, #12
1195	shl	v0.4s, v0.4s, #20
1196	add	v7.4s, v7.4s, v4.4s
1197	ushr	v13.4s, v2.4s, #12
1198	shl	v2.4s, v2.4s, #20
1199	eor	v5.16b, v5.16b, v19.16b
1200	orr	v0.16b, v0.16b, v12.16b
1201	eor	v1.16b, v7.16b, v1.16b
1202	add	v22.4s, v22.4s, v29.4s
1203	orr	v2.16b, v2.16b, v13.16b
1204	ushr	v12.4s, v5.4s, #12
1205	shl	v5.4s, v5.4s, #20
1206	add	v17.4s, v17.4s, v23.4s
1207	ushr	v13.4s, v1.4s, #12
1208	shl	v1.4s, v1.4s, #20
1209	add	v22.4s, v22.4s, v0.4s
1210	orr	v5.16b, v5.16b, v12.16b
1211	add	v17.4s, v17.4s, v2.4s
1212	add	v10.4s, v10.4s, v31.4s
1213	orr	v1.16b, v1.16b, v13.16b
1214	eor	v16.16b, v16.16b, v22.16b
1215	add	v20.4s, v20.4s, v30.4s
1216	eor	v6.16b, v6.16b, v17.16b
1217	add	v10.4s, v10.4s, v5.4s
1218	tbl	v16.16b, { v16.16b }, v18.16b
1219	add	v20.4s, v20.4s, v1.4s
1220	tbl	v6.16b, { v6.16b }, v18.16b
1221	eor	v25.16b, v25.16b, v10.16b
1222	add	v21.4s, v21.4s, v16.4s
1223	eor	v4.16b, v4.16b, v20.16b
1224	add	v26.4s, v26.4s, v6.4s
1225	tbl	v25.16b, { v25.16b }, v18.16b
1226	eor	v0.16b, v21.16b, v0.16b
1227	tbl	v4.16b, { v4.16b }, v18.16b
1228	eor	v2.16b, v26.16b, v2.16b
1229	add	v19.4s, v19.4s, v25.4s
1230	ushr	v12.4s, v0.4s, #7
1231	shl	v0.4s, v0.4s, #25
1232	add	v7.4s, v7.4s, v4.4s
1233	ushr	v13.4s, v2.4s, #7
1234	shl	v2.4s, v2.4s, #25
1235	eor	v5.16b, v19.16b, v5.16b
1236	add	v10.4s, v10.4s, v3.4s
1237	ldr	q3, [sp, #192]
1238	orr	v0.16b, v0.16b, v12.16b
1239	eor	v1.16b, v7.16b, v1.16b
1240	orr	v2.16b, v2.16b, v13.16b
1241	ushr	v12.4s, v5.4s, #7
1242	shl	v5.4s, v5.4s, #25
1243	add	v22.4s, v22.4s, v3.4s
1244	ushr	v13.4s, v1.4s, #7
1245	shl	v1.4s, v1.4s, #25
1246	add	v10.4s, v10.4s, v0.4s
1247	orr	v5.16b, v5.16b, v12.16b
1248	add	v22.4s, v22.4s, v2.4s
1249	add	v20.4s, v20.4s, v15.4s
1250	ldr	q15, [sp, #128]
1251	orr	v1.16b, v1.16b, v13.16b
1252	add	v17.4s, v17.4s, v24.4s
1253	eor	v4.16b, v4.16b, v10.16b
1254	eor	v25.16b, v25.16b, v22.16b
1255	add	v20.4s, v20.4s, v5.4s
1256	add	v17.4s, v17.4s, v1.4s
1257	tbl	v4.16b, { v4.16b }, v27.16b
1258	tbl	v25.16b, { v25.16b }, v27.16b
1259	eor	v6.16b, v6.16b, v20.16b
1260	eor	v16.16b, v16.16b, v17.16b
1261	add	v26.4s, v26.4s, v4.4s
1262	add	v7.4s, v7.4s, v25.4s
1263	tbl	v6.16b, { v6.16b }, v27.16b
1264	tbl	v16.16b, { v16.16b }, v27.16b
1265	eor	v0.16b, v26.16b, v0.16b
1266	eor	v2.16b, v7.16b, v2.16b
1267	add	v21.4s, v21.4s, v6.4s
1268	add	v19.4s, v19.4s, v16.4s
1269	ushr	v12.4s, v0.4s, #12
1270	shl	v0.4s, v0.4s, #20
1271	ushr	v13.4s, v2.4s, #12
1272	shl	v2.4s, v2.4s, #20
1273	eor	v5.16b, v21.16b, v5.16b
1274	ldp	q23, q11, [sp, #320]
1275	eor	v1.16b, v19.16b, v1.16b
1276	orr	v0.16b, v0.16b, v12.16b
1277	add	v10.4s, v10.4s, v8.4s
1278	orr	v2.16b, v2.16b, v13.16b
1279	ushr	v13.4s, v5.4s, #12
1280	shl	v5.4s, v5.4s, #20
1281	add	v22.4s, v22.4s, v23.4s
1282	ushr	v12.4s, v1.4s, #12
1283	shl	v1.4s, v1.4s, #20
1284	add	v10.4s, v10.4s, v0.4s
1285	mov	v28.16b, v31.16b
1286	mov	v31.16b, v8.16b
1287	ldr	q8, [sp, #208]
1288	orr	v5.16b, v5.16b, v13.16b
1289	add	v22.4s, v22.4s, v2.4s
1290	add	v20.4s, v20.4s, v11.4s
1291	orr	v1.16b, v1.16b, v12.16b
1292	add	v17.4s, v17.4s, v8.4s
1293	eor	v4.16b, v4.16b, v10.16b
1294	eor	v25.16b, v25.16b, v22.16b
1295	add	v20.4s, v20.4s, v5.4s
1296	add	v17.4s, v17.4s, v1.4s
1297	tbl	v4.16b, { v4.16b }, v18.16b
1298	tbl	v25.16b, { v25.16b }, v18.16b
1299	eor	v6.16b, v6.16b, v20.16b
1300	eor	v16.16b, v16.16b, v17.16b
1301	add	v26.4s, v26.4s, v4.4s
1302	add	v7.4s, v7.4s, v25.4s
1303	tbl	v6.16b, { v6.16b }, v18.16b
1304	tbl	v16.16b, { v16.16b }, v18.16b
1305	eor	v0.16b, v26.16b, v0.16b
1306	eor	v2.16b, v7.16b, v2.16b
1307	add	v21.4s, v21.4s, v6.4s
1308	add	v19.4s, v19.4s, v16.4s
1309	ushr	v12.4s, v0.4s, #7
1310	shl	v0.4s, v0.4s, #25
1311	ushr	v13.4s, v2.4s, #7
1312	shl	v2.4s, v2.4s, #25
1313	eor	v5.16b, v21.16b, v5.16b
1314	eor	v1.16b, v19.16b, v1.16b
1315	orr	v0.16b, v0.16b, v12.16b
1316	add	v22.4s, v22.4s, v29.4s
1317	orr	v2.16b, v2.16b, v13.16b
1318	ushr	v13.4s, v5.4s, #7
1319	shl	v5.4s, v5.4s, #25
1320	add	v17.4s, v17.4s, v30.4s
1321	ushr	v12.4s, v1.4s, #7
1322	shl	v1.4s, v1.4s, #25
1323	add	v22.4s, v22.4s, v0.4s
1324	orr	v5.16b, v5.16b, v13.16b
1325	add	v17.4s, v17.4s, v2.4s
1326	add	v10.4s, v10.4s, v9.4s
1327	orr	v1.16b, v1.16b, v12.16b
1328	eor	v16.16b, v16.16b, v22.16b
1329	add	v20.4s, v20.4s, v14.4s
1330	ldr	q14, [sp, #256]
1331	eor	v6.16b, v6.16b, v17.16b
1332	add	v10.4s, v10.4s, v5.4s
1333	tbl	v16.16b, { v16.16b }, v27.16b
1334	add	v20.4s, v20.4s, v1.4s
1335	tbl	v6.16b, { v6.16b }, v27.16b
1336	eor	v25.16b, v25.16b, v10.16b
1337	add	v21.4s, v21.4s, v16.4s
1338	eor	v4.16b, v4.16b, v20.16b
1339	add	v26.4s, v26.4s, v6.4s
1340	tbl	v25.16b, { v25.16b }, v27.16b
1341	eor	v0.16b, v21.16b, v0.16b
1342	tbl	v4.16b, { v4.16b }, v27.16b
1343	eor	v2.16b, v26.16b, v2.16b
1344	add	v19.4s, v19.4s, v25.4s
1345	ushr	v12.4s, v0.4s, #12
1346	shl	v0.4s, v0.4s, #20
1347	add	v7.4s, v7.4s, v4.4s
1348	ushr	v13.4s, v2.4s, #12
1349	shl	v2.4s, v2.4s, #20
1350	eor	v5.16b, v5.16b, v19.16b
1351	orr	v0.16b, v0.16b, v12.16b
1352	eor	v1.16b, v7.16b, v1.16b
1353	add	v22.4s, v22.4s, v3.4s
1354	orr	v2.16b, v2.16b, v13.16b
1355	ushr	v12.4s, v5.4s, #12
1356	shl	v5.4s, v5.4s, #20
1357	add	v17.4s, v17.4s, v15.4s
1358	ushr	v13.4s, v1.4s, #12
1359	shl	v1.4s, v1.4s, #20
1360	add	v22.4s, v22.4s, v0.4s
1361	orr	v5.16b, v5.16b, v12.16b
1362	add	v17.4s, v17.4s, v2.4s
1363	add	v10.4s, v10.4s, v14.4s
1364	orr	v1.16b, v1.16b, v13.16b
1365	eor	v16.16b, v16.16b, v22.16b
1366	add	v20.4s, v20.4s, v8.4s
1367	eor	v6.16b, v6.16b, v17.16b
1368	add	v10.4s, v10.4s, v5.4s
1369	tbl	v16.16b, { v16.16b }, v18.16b
1370	add	v20.4s, v20.4s, v1.4s
1371	tbl	v6.16b, { v6.16b }, v18.16b
1372	eor	v25.16b, v25.16b, v10.16b
1373	add	v21.4s, v21.4s, v16.4s
1374	eor	v4.16b, v4.16b, v20.16b
1375	add	v26.4s, v26.4s, v6.4s
1376	tbl	v25.16b, { v25.16b }, v18.16b
1377	eor	v0.16b, v21.16b, v0.16b
1378	tbl	v4.16b, { v4.16b }, v18.16b
1379	eor	v2.16b, v26.16b, v2.16b
1380	add	v19.4s, v19.4s, v25.4s
1381	ushr	v12.4s, v0.4s, #7
1382	shl	v0.4s, v0.4s, #25
1383	add	v7.4s, v7.4s, v4.4s
1384	ushr	v13.4s, v2.4s, #7
1385	shl	v2.4s, v2.4s, #25
1386	eor	v5.16b, v19.16b, v5.16b
1387	orr	v0.16b, v0.16b, v12.16b
1388	eor	v1.16b, v7.16b, v1.16b
1389	add	v10.4s, v10.4s, v28.4s
1390	orr	v2.16b, v2.16b, v13.16b
1391	ushr	v12.4s, v5.4s, #7
1392	shl	v5.4s, v5.4s, #25
1393	add	v22.4s, v22.4s, v24.4s
1394	ushr	v13.4s, v1.4s, #7
1395	shl	v1.4s, v1.4s, #25
1396	add	v10.4s, v10.4s, v0.4s
1397	orr	v5.16b, v5.16b, v12.16b
1398	add	v22.4s, v22.4s, v2.4s
1399	add	v20.4s, v20.4s, v11.4s
1400	ldr	q11, [sp, #304]
1401	orr	v1.16b, v1.16b, v13.16b
1402	add	v17.4s, v17.4s, v31.4s
1403	ldr	q31, [sp, #224]
1404	eor	v4.16b, v4.16b, v10.16b
1405	eor	v25.16b, v25.16b, v22.16b
1406	add	v20.4s, v20.4s, v5.4s
1407	add	v17.4s, v17.4s, v1.4s
1408	tbl	v4.16b, { v4.16b }, v27.16b
1409	tbl	v25.16b, { v25.16b }, v27.16b
1410	eor	v6.16b, v6.16b, v20.16b
1411	eor	v16.16b, v16.16b, v17.16b
1412	add	v26.4s, v26.4s, v4.4s
1413	add	v7.4s, v7.4s, v25.4s
1414	tbl	v6.16b, { v6.16b }, v27.16b
1415	tbl	v16.16b, { v16.16b }, v27.16b
1416	eor	v0.16b, v26.16b, v0.16b
1417	eor	v2.16b, v7.16b, v2.16b
1418	add	v21.4s, v21.4s, v6.4s
1419	add	v19.4s, v19.4s, v16.4s
1420	ushr	v12.4s, v0.4s, #12
1421	shl	v0.4s, v0.4s, #20
1422	ushr	v13.4s, v2.4s, #12
1423	shl	v2.4s, v2.4s, #20
1424	eor	v5.16b, v21.16b, v5.16b
1425	eor	v1.16b, v19.16b, v1.16b
1426	orr	v0.16b, v0.16b, v12.16b
1427	add	v10.4s, v10.4s, v23.4s
1428	ldr	q23, [sp, #240]
1429	orr	v2.16b, v2.16b, v13.16b
1430	ushr	v13.4s, v5.4s, #12
1431	shl	v5.4s, v5.4s, #20
1432	add	v22.4s, v22.4s, v11.4s
1433	mov	v30.16b, v8.16b
1434	mov	v8.16b, v24.16b
1435	ldr	q24, [sp, #352]
1436	ushr	v12.4s, v1.4s, #12
1437	shl	v1.4s, v1.4s, #20
1438	add	v10.4s, v10.4s, v0.4s
1439	orr	v5.16b, v5.16b, v13.16b
1440	str	q8, [sp, #112]
1441	add	v22.4s, v22.4s, v2.4s
1442	add	v20.4s, v20.4s, v24.4s
1443	orr	v1.16b, v1.16b, v12.16b
1444	add	v17.4s, v17.4s, v31.4s
1445	eor	v4.16b, v4.16b, v10.16b
1446	eor	v25.16b, v25.16b, v22.16b
1447	add	v20.4s, v20.4s, v5.4s
1448	add	v17.4s, v17.4s, v1.4s
1449	tbl	v4.16b, { v4.16b }, v18.16b
1450	tbl	v25.16b, { v25.16b }, v18.16b
1451	eor	v6.16b, v6.16b, v20.16b
1452	eor	v16.16b, v16.16b, v17.16b
1453	add	v26.4s, v26.4s, v4.4s
1454	add	v7.4s, v7.4s, v25.4s
1455	tbl	v6.16b, { v6.16b }, v18.16b
1456	tbl	v16.16b, { v16.16b }, v18.16b
1457	eor	v0.16b, v26.16b, v0.16b
1458	eor	v2.16b, v7.16b, v2.16b
1459	add	v21.4s, v21.4s, v6.4s
1460	mov	v29.16b, v3.16b
1461	add	v19.4s, v19.4s, v16.4s
1462	ushr	v12.4s, v0.4s, #7
1463	shl	v0.4s, v0.4s, #25
1464	ushr	v13.4s, v2.4s, #7
1465	shl	v2.4s, v2.4s, #25
1466	eor	v5.16b, v21.16b, v5.16b
1467	eor	v1.16b, v19.16b, v1.16b
1468	orr	v0.16b, v0.16b, v12.16b
1469	add	v22.4s, v22.4s, v29.4s
1470	orr	v2.16b, v2.16b, v13.16b
1471	ushr	v13.4s, v5.4s, #7
1472	shl	v5.4s, v5.4s, #25
1473	add	v17.4s, v17.4s, v30.4s
1474	ldr	q30, [sp, #272]
1475	ushr	v12.4s, v1.4s, #7
1476	shl	v1.4s, v1.4s, #25
1477	add	v22.4s, v22.4s, v0.4s
1478	mov	v3.16b, v28.16b
1479	ldr	q28, [sp, #176]
1480	orr	v5.16b, v5.16b, v13.16b
1481	add	v17.4s, v17.4s, v2.4s
1482	add	v10.4s, v10.4s, v30.4s
1483	orr	v1.16b, v1.16b, v12.16b
1484	eor	v16.16b, v16.16b, v22.16b
1485	add	v20.4s, v20.4s, v28.4s
1486	eor	v6.16b, v6.16b, v17.16b
1487	add	v10.4s, v10.4s, v5.4s
1488	tbl	v16.16b, { v16.16b }, v27.16b
1489	add	v20.4s, v20.4s, v1.4s
1490	tbl	v6.16b, { v6.16b }, v27.16b
1491	eor	v25.16b, v25.16b, v10.16b
1492	add	v21.4s, v21.4s, v16.4s
1493	eor	v4.16b, v4.16b, v20.16b
1494	add	v26.4s, v26.4s, v6.4s
1495	tbl	v25.16b, { v25.16b }, v27.16b
1496	eor	v0.16b, v21.16b, v0.16b
1497	tbl	v4.16b, { v4.16b }, v27.16b
1498	eor	v2.16b, v26.16b, v2.16b
1499	add	v19.4s, v19.4s, v25.4s
1500	ushr	v12.4s, v0.4s, #12
1501	shl	v0.4s, v0.4s, #20
1502	add	v7.4s, v7.4s, v4.4s
1503	ushr	v13.4s, v2.4s, #12
1504	shl	v2.4s, v2.4s, #20
1505	eor	v5.16b, v5.16b, v19.16b
1506	orr	v0.16b, v0.16b, v12.16b
1507	eor	v1.16b, v7.16b, v1.16b
1508	add	v22.4s, v22.4s, v8.4s
1509	orr	v2.16b, v2.16b, v13.16b
1510	ushr	v12.4s, v5.4s, #12
1511	shl	v5.4s, v5.4s, #20
1512	add	v17.4s, v17.4s, v9.4s
1513	ldr	q9, [sp, #320]
1514	ushr	v13.4s, v1.4s, #12
1515	shl	v1.4s, v1.4s, #20
1516	add	v22.4s, v22.4s, v0.4s
1517	orr	v5.16b, v5.16b, v12.16b
1518	add	v17.4s, v17.4s, v2.4s
1519	add	v10.4s, v10.4s, v23.4s
1520	orr	v1.16b, v1.16b, v13.16b
1521	eor	v16.16b, v16.16b, v22.16b
1522	add	v20.4s, v20.4s, v31.4s
1523	eor	v6.16b, v6.16b, v17.16b
1524	add	v10.4s, v10.4s, v5.4s
1525	tbl	v16.16b, { v16.16b }, v18.16b
1526	add	v20.4s, v20.4s, v1.4s
1527	tbl	v6.16b, { v6.16b }, v18.16b
1528	eor	v25.16b, v25.16b, v10.16b
1529	add	v21.4s, v21.4s, v16.4s
1530	eor	v4.16b, v4.16b, v20.16b
1531	add	v26.4s, v26.4s, v6.4s
1532	tbl	v25.16b, { v25.16b }, v18.16b
1533	eor	v0.16b, v21.16b, v0.16b
1534	tbl	v4.16b, { v4.16b }, v18.16b
1535	eor	v2.16b, v26.16b, v2.16b
1536	add	v19.4s, v19.4s, v25.4s
1537	ushr	v12.4s, v0.4s, #7
1538	shl	v0.4s, v0.4s, #25
1539	add	v7.4s, v7.4s, v4.4s
1540	ushr	v13.4s, v2.4s, #7
1541	shl	v2.4s, v2.4s, #25
1542	eor	v5.16b, v19.16b, v5.16b
1543	add	v10.4s, v10.4s, v14.4s
1544	ldr	q14, [sp, #288]
1545	orr	v0.16b, v0.16b, v12.16b
1546	eor	v1.16b, v7.16b, v1.16b
1547	orr	v2.16b, v2.16b, v13.16b
1548	ushr	v12.4s, v5.4s, #7
1549	shl	v5.4s, v5.4s, #25
1550	add	v22.4s, v22.4s, v14.4s
1551	ushr	v13.4s, v1.4s, #7
1552	shl	v1.4s, v1.4s, #25
1553	add	v10.4s, v10.4s, v0.4s
1554	orr	v5.16b, v5.16b, v12.16b
1555	add	v22.4s, v22.4s, v2.4s
1556	add	v20.4s, v20.4s, v24.4s
1557	orr	v1.16b, v1.16b, v13.16b
1558	eor	v4.16b, v4.16b, v10.16b
1559	add	v17.4s, v17.4s, v9.4s
1560	eor	v25.16b, v25.16b, v22.16b
1561	add	v20.4s, v20.4s, v5.4s
1562	tbl	v4.16b, { v4.16b }, v27.16b
1563	add	v17.4s, v17.4s, v1.4s
1564	tbl	v25.16b, { v25.16b }, v27.16b
1565	eor	v6.16b, v6.16b, v20.16b
1566	add	v26.4s, v26.4s, v4.4s
1567	eor	v16.16b, v16.16b, v17.16b
1568	add	v7.4s, v7.4s, v25.4s
1569	tbl	v6.16b, { v6.16b }, v27.16b
1570	eor	v0.16b, v26.16b, v0.16b
1571	tbl	v16.16b, { v16.16b }, v27.16b
1572	eor	v2.16b, v7.16b, v2.16b
1573	add	v21.4s, v21.4s, v6.4s
1574	ushr	v12.4s, v0.4s, #12
1575	shl	v0.4s, v0.4s, #20
1576	add	v19.4s, v19.4s, v16.4s
1577	ushr	v13.4s, v2.4s, #12
1578	shl	v2.4s, v2.4s, #20
1579	eor	v5.16b, v21.16b, v5.16b
1580	orr	v0.16b, v0.16b, v12.16b
1581	eor	v1.16b, v19.16b, v1.16b
1582	add	v10.4s, v10.4s, v11.4s
1583	orr	v2.16b, v2.16b, v13.16b
1584	ushr	v13.4s, v5.4s, #12
1585	shl	v5.4s, v5.4s, #20
1586	ushr	v12.4s, v1.4s, #12
1587	shl	v1.4s, v1.4s, #20
1588	add	v10.4s, v10.4s, v0.4s
1589	add	v22.4s, v22.4s, v15.4s
1590	orr	v5.16b, v5.16b, v13.16b
1591	add	v20.4s, v20.4s, v3.4s
1592	mov	v24.16b, v3.16b
1593	ldr	q3, [sp, #336]
1594	orr	v1.16b, v1.16b, v12.16b
1595	eor	v4.16b, v4.16b, v10.16b
1596	add	v22.4s, v22.4s, v2.4s
1597	add	v17.4s, v17.4s, v3.4s
1598	add	v20.4s, v20.4s, v5.4s
1599	tbl	v4.16b, { v4.16b }, v18.16b
1600	eor	v25.16b, v25.16b, v22.16b
1601	add	v17.4s, v17.4s, v1.4s
1602	eor	v6.16b, v6.16b, v20.16b
1603	add	v26.4s, v26.4s, v4.4s
1604	tbl	v25.16b, { v25.16b }, v18.16b
1605	eor	v16.16b, v16.16b, v17.16b
1606	tbl	v6.16b, { v6.16b }, v18.16b
1607	eor	v0.16b, v26.16b, v0.16b
1608	add	v7.4s, v7.4s, v25.4s
1609	tbl	v16.16b, { v16.16b }, v18.16b
1610	add	v21.4s, v21.4s, v6.4s
1611	ushr	v12.4s, v0.4s, #7
1612	shl	v0.4s, v0.4s, #25
1613	eor	v2.16b, v7.16b, v2.16b
1614	add	v19.4s, v19.4s, v16.4s
1615	eor	v5.16b, v21.16b, v5.16b
1616	orr	v0.16b, v0.16b, v12.16b
1617	ushr	v12.4s, v2.4s, #7
1618	shl	v2.4s, v2.4s, #25
1619	eor	v1.16b, v19.16b, v1.16b
1620	ushr	v13.4s, v5.4s, #7
1621	shl	v5.4s, v5.4s, #25
1622	add	v22.4s, v22.4s, v8.4s
1623	orr	v2.16b, v2.16b, v12.16b
1624	ushr	v12.4s, v1.4s, #7
1625	shl	v1.4s, v1.4s, #25
1626	orr	v5.16b, v5.16b, v13.16b
1627	add	v22.4s, v22.4s, v0.4s
1628	add	v10.4s, v10.4s, v29.4s
1629	ldr	q29, [sp, #208]
1630	add	v17.4s, v17.4s, v31.4s
1631	orr	v1.16b, v1.16b, v12.16b
1632	add	v20.4s, v20.4s, v29.4s
1633	eor	v16.16b, v16.16b, v22.16b
1634	add	v10.4s, v10.4s, v5.4s
1635	add	v17.4s, v17.4s, v2.4s
1636	add	v20.4s, v20.4s, v1.4s
1637	tbl	v16.16b, { v16.16b }, v27.16b
1638	eor	v25.16b, v25.16b, v10.16b
1639	eor	v6.16b, v6.16b, v17.16b
1640	eor	v4.16b, v4.16b, v20.16b
1641	add	v21.4s, v21.4s, v16.4s
1642	tbl	v25.16b, { v25.16b }, v27.16b
1643	tbl	v6.16b, { v6.16b }, v27.16b
1644	tbl	v4.16b, { v4.16b }, v27.16b
1645	eor	v0.16b, v21.16b, v0.16b
1646	add	v19.4s, v19.4s, v25.4s
1647	add	v26.4s, v26.4s, v6.4s
1648	add	v7.4s, v7.4s, v4.4s
1649	ushr	v12.4s, v0.4s, #12
1650	shl	v0.4s, v0.4s, #20
1651	eor	v5.16b, v5.16b, v19.16b
1652	eor	v2.16b, v26.16b, v2.16b
1653	eor	v1.16b, v7.16b, v1.16b
1654	orr	v0.16b, v0.16b, v12.16b
1655	ushr	v12.4s, v5.4s, #12
1656	shl	v5.4s, v5.4s, #20
1657	add	v22.4s, v22.4s, v14.4s
1658	mov	v8.16b, v31.16b
1659	ushr	v13.4s, v2.4s, #12
1660	shl	v2.4s, v2.4s, #20
1661	mov	v31.16b, v14.16b
1662	ushr	v14.4s, v1.4s, #12
1663	shl	v1.4s, v1.4s, #20
1664	orr	v5.16b, v5.16b, v12.16b
1665	add	v22.4s, v22.4s, v0.4s
1666	add	v10.4s, v10.4s, v28.4s
1667	ldr	q28, [sp, #352]
1668	orr	v2.16b, v2.16b, v13.16b
1669	orr	v1.16b, v1.16b, v14.16b
1670	add	v17.4s, v17.4s, v30.4s
1671	add	v20.4s, v20.4s, v3.4s
1672	eor	v16.16b, v16.16b, v22.16b
1673	add	v10.4s, v10.4s, v5.4s
1674	add	v17.4s, v17.4s, v2.4s
1675	add	v20.4s, v20.4s, v1.4s
1676	tbl	v16.16b, { v16.16b }, v18.16b
1677	eor	v25.16b, v25.16b, v10.16b
1678	eor	v6.16b, v6.16b, v17.16b
1679	eor	v4.16b, v4.16b, v20.16b
1680	add	v21.4s, v21.4s, v16.4s
1681	tbl	v25.16b, { v25.16b }, v18.16b
1682	tbl	v6.16b, { v6.16b }, v18.16b
1683	tbl	v4.16b, { v4.16b }, v18.16b
1684	eor	v0.16b, v21.16b, v0.16b
1685	add	v19.4s, v19.4s, v25.4s
1686	add	v26.4s, v26.4s, v6.4s
1687	add	v7.4s, v7.4s, v4.4s
1688	ushr	v12.4s, v0.4s, #7
1689	shl	v0.4s, v0.4s, #25
1690	eor	v5.16b, v19.16b, v5.16b
1691	eor	v2.16b, v26.16b, v2.16b
1692	eor	v1.16b, v7.16b, v1.16b
1693	orr	v0.16b, v0.16b, v12.16b
1694	ushr	v12.4s, v5.4s, #7
1695	shl	v5.4s, v5.4s, #25
1696	add	v10.4s, v10.4s, v23.4s
1697	ushr	v13.4s, v2.4s, #7
1698	shl	v2.4s, v2.4s, #25
1699	ushr	v14.4s, v1.4s, #7
1700	shl	v1.4s, v1.4s, #25
1701	orr	v5.16b, v5.16b, v12.16b
1702	add	v10.4s, v10.4s, v0.4s
1703	add	v20.4s, v20.4s, v24.4s
1704	ldr	q24, [sp, #144]
1705	orr	v2.16b, v2.16b, v13.16b
1706	orr	v1.16b, v1.16b, v14.16b
1707	add	v22.4s, v22.4s, v9.4s
1708	add	v17.4s, v17.4s, v11.4s
1709	eor	v4.16b, v4.16b, v10.16b
1710	add	v20.4s, v20.4s, v5.4s
1711	add	v22.4s, v22.4s, v2.4s
1712	add	v17.4s, v17.4s, v1.4s
1713	tbl	v4.16b, { v4.16b }, v27.16b
1714	eor	v6.16b, v6.16b, v20.16b
1715	eor	v25.16b, v25.16b, v22.16b
1716	eor	v16.16b, v16.16b, v17.16b
1717	add	v26.4s, v26.4s, v4.4s
1718	tbl	v6.16b, { v6.16b }, v27.16b
1719	tbl	v25.16b, { v25.16b }, v27.16b
1720	tbl	v16.16b, { v16.16b }, v27.16b
1721	eor	v0.16b, v26.16b, v0.16b
1722	add	v21.4s, v21.4s, v6.4s
1723	add	v7.4s, v7.4s, v25.4s
1724	add	v19.4s, v19.4s, v16.4s
1725	ushr	v12.4s, v0.4s, #12
1726	shl	v0.4s, v0.4s, #20
1727	eor	v5.16b, v21.16b, v5.16b
1728	eor	v2.16b, v7.16b, v2.16b
1729	eor	v1.16b, v19.16b, v1.16b
1730	orr	v0.16b, v0.16b, v12.16b
1731	add	v10.4s, v10.4s, v15.4s
1732	ushr	v14.4s, v5.4s, #12
1733	shl	v5.4s, v5.4s, #20
1734	mov	v30.16b, v3.16b
1735	ldr	q3, [sp, #256]
1736	ushr	v12.4s, v2.4s, #12
1737	shl	v2.4s, v2.4s, #20
1738	ushr	v13.4s, v1.4s, #12
1739	shl	v1.4s, v1.4s, #20
1740	add	v10.4s, v10.4s, v0.4s
1741	orr	v5.16b, v5.16b, v14.16b
1742	add	v20.4s, v20.4s, v3.4s
1743	orr	v2.16b, v2.16b, v12.16b
1744	orr	v1.16b, v1.16b, v13.16b
1745	add	v22.4s, v22.4s, v24.4s
1746	add	v17.4s, v17.4s, v28.4s
1747	eor	v4.16b, v4.16b, v10.16b
1748	add	v20.4s, v20.4s, v5.4s
1749	add	v22.4s, v22.4s, v2.4s
1750	add	v17.4s, v17.4s, v1.4s
1751	tbl	v4.16b, { v4.16b }, v18.16b
1752	eor	v6.16b, v6.16b, v20.16b
1753	eor	v25.16b, v25.16b, v22.16b
1754	eor	v16.16b, v16.16b, v17.16b
1755	add	v26.4s, v26.4s, v4.4s
1756	tbl	v6.16b, { v6.16b }, v18.16b
1757	tbl	v25.16b, { v25.16b }, v18.16b
1758	tbl	v16.16b, { v16.16b }, v18.16b
1759	eor	v0.16b, v26.16b, v0.16b
1760	add	v21.4s, v21.4s, v6.4s
1761	add	v7.4s, v7.4s, v25.4s
1762	add	v19.4s, v19.4s, v16.4s
1763	ushr	v12.4s, v0.4s, #7
1764	shl	v0.4s, v0.4s, #25
1765	eor	v5.16b, v21.16b, v5.16b
1766	eor	v2.16b, v7.16b, v2.16b
1767	eor	v1.16b, v19.16b, v1.16b
1768	orr	v0.16b, v0.16b, v12.16b
1769	ushr	v12.4s, v5.4s, #7
1770	shl	v5.4s, v5.4s, #25
1771	mov	v23.16b, v9.16b
1772	ldr	q9, [sp, #112]
1773	ushr	v13.4s, v2.4s, #7
1774	shl	v2.4s, v2.4s, #25
1775	ushr	v14.4s, v1.4s, #7
1776	shl	v1.4s, v1.4s, #25
1777	orr	v5.16b, v5.16b, v12.16b
1778	add	v9.4s, v10.4s, v9.4s
1779	orr	v2.16b, v2.16b, v13.16b
1780	orr	v1.16b, v1.16b, v14.16b
1781	ldr	q14, [sp, #64]
1782	add	v22.4s, v22.4s, v31.4s
1783	add	v17.4s, v17.4s, v30.4s
1784	add	v20.4s, v20.4s, v8.4s
1785	add	v9.4s, v9.4s, v5.4s
1786	add	v22.4s, v22.4s, v0.4s
1787	add	v17.4s, v17.4s, v2.4s
1788	add	v20.4s, v20.4s, v1.4s
1789	eor	v25.16b, v25.16b, v9.16b
1790	eor	v16.16b, v16.16b, v22.16b
1791	eor	v6.16b, v6.16b, v17.16b
1792	eor	v4.16b, v4.16b, v20.16b
1793	tbl	v25.16b, { v25.16b }, v27.16b
1794	tbl	v16.16b, { v16.16b }, v27.16b
1795	tbl	v6.16b, { v6.16b }, v27.16b
1796	tbl	v4.16b, { v4.16b }, v27.16b
1797	add	v19.4s, v19.4s, v25.4s
1798	add	v21.4s, v21.4s, v16.4s
1799	add	v26.4s, v26.4s, v6.4s
1800	add	v7.4s, v7.4s, v4.4s
1801	eor	v5.16b, v5.16b, v19.16b
1802	eor	v0.16b, v21.16b, v0.16b
1803	eor	v2.16b, v26.16b, v2.16b
1804	eor	v1.16b, v7.16b, v1.16b
1805	ushr	v30.4s, v5.4s, #12
1806	shl	v5.4s, v5.4s, #20
1807	ushr	v10.4s, v0.4s, #12
1808	shl	v0.4s, v0.4s, #20
1809	ushr	v12.4s, v2.4s, #12
1810	shl	v2.4s, v2.4s, #20
1811	ushr	v13.4s, v1.4s, #12
1812	shl	v1.4s, v1.4s, #20
1813	orr	v5.16b, v5.16b, v30.16b
1814	add	v30.4s, v9.4s, v29.4s
1815	add	v22.4s, v22.4s, v23.4s
1816	ldr	q23, [sp, #192]
1817	orr	v0.16b, v0.16b, v10.16b
1818	orr	v2.16b, v2.16b, v12.16b
1819	orr	v1.16b, v1.16b, v13.16b
1820	add	v17.4s, v17.4s, v23.4s
1821	add	v20.4s, v20.4s, v28.4s
1822	add	v23.4s, v30.4s, v5.4s
1823	add	v22.4s, v22.4s, v0.4s
1824	add	v17.4s, v17.4s, v2.4s
1825	add	v20.4s, v20.4s, v1.4s
1826	eor	v25.16b, v25.16b, v23.16b
1827	eor	v16.16b, v16.16b, v22.16b
1828	eor	v6.16b, v6.16b, v17.16b
1829	eor	v4.16b, v4.16b, v20.16b
1830	tbl	v25.16b, { v25.16b }, v18.16b
1831	tbl	v16.16b, { v16.16b }, v18.16b
1832	tbl	v6.16b, { v6.16b }, v18.16b
1833	tbl	v4.16b, { v4.16b }, v18.16b
1834	add	v19.4s, v19.4s, v25.4s
1835	add	v21.4s, v21.4s, v16.4s
1836	add	v26.4s, v26.4s, v6.4s
1837	add	v7.4s, v7.4s, v4.4s
1838	eor	v5.16b, v19.16b, v5.16b
1839	eor	v0.16b, v21.16b, v0.16b
1840	eor	v2.16b, v26.16b, v2.16b
1841	eor	v1.16b, v7.16b, v1.16b
1842	ushr	v28.4s, v5.4s, #7
1843	shl	v5.4s, v5.4s, #25
1844	ushr	v30.4s, v0.4s, #7
1845	shl	v0.4s, v0.4s, #25
1846	ushr	v31.4s, v2.4s, #7
1847	shl	v2.4s, v2.4s, #25
1848	ushr	v8.4s, v1.4s, #7
1849	shl	v1.4s, v1.4s, #25
1850	orr	v5.16b, v5.16b, v28.16b
1851	ldr	q28, [sp, #176]
1852	orr	v0.16b, v0.16b, v30.16b
1853	orr	v2.16b, v2.16b, v31.16b
1854	orr	v1.16b, v1.16b, v8.16b
1855	add	v23.4s, v23.4s, v28.4s
1856	add	v22.4s, v22.4s, v11.4s
1857	add	v17.4s, v17.4s, v15.4s
1858	add	v20.4s, v20.4s, v3.4s
1859	ldr	q3, [sp, #272]
1860	add	v23.4s, v23.4s, v0.4s
1861	add	v22.4s, v22.4s, v2.4s
1862	add	v17.4s, v17.4s, v1.4s
1863	add	v20.4s, v20.4s, v5.4s
1864	eor	v4.16b, v4.16b, v23.16b
1865	eor	v25.16b, v25.16b, v22.16b
1866	eor	v16.16b, v16.16b, v17.16b
1867	eor	v6.16b, v6.16b, v20.16b
1868	tbl	v4.16b, { v4.16b }, v27.16b
1869	tbl	v25.16b, { v25.16b }, v27.16b
1870	tbl	v16.16b, { v16.16b }, v27.16b
1871	tbl	v6.16b, { v6.16b }, v27.16b
1872	add	v26.4s, v26.4s, v4.4s
1873	add	v7.4s, v7.4s, v25.4s
1874	add	v19.4s, v19.4s, v16.4s
1875	add	v21.4s, v21.4s, v6.4s
1876	eor	v0.16b, v26.16b, v0.16b
1877	eor	v2.16b, v7.16b, v2.16b
1878	eor	v1.16b, v19.16b, v1.16b
1879	eor	v5.16b, v21.16b, v5.16b
1880	add	v3.4s, v22.4s, v3.4s
1881	ldr	q22, [sp, #160]
1882	ushr	v28.4s, v0.4s, #12
1883	shl	v0.4s, v0.4s, #20
1884	ushr	v29.4s, v2.4s, #12
1885	shl	v2.4s, v2.4s, #20
1886	ushr	v30.4s, v1.4s, #12
1887	shl	v1.4s, v1.4s, #20
1888	ushr	v31.4s, v5.4s, #12
1889	shl	v5.4s, v5.4s, #20
1890	add	v17.4s, v17.4s, v22.4s
1891	ldr	q22, [sp, #240]
1892	orr	v0.16b, v0.16b, v28.16b
1893	prfm	pldl1keep, [x23, #256]
1894	orr	v2.16b, v2.16b, v29.16b
1895	prfm	pldl1keep, [x24, #256]
1896	orr	v1.16b, v1.16b, v30.16b
1897	prfm	pldl1keep, [x22, #256]
1898	orr	v5.16b, v5.16b, v31.16b
1899	prfm	pldl1keep, [x25, #256]
1900	add	v23.4s, v23.4s, v24.4s
1901	add	v20.4s, v20.4s, v22.4s
1902	add	v3.4s, v3.4s, v2.4s
1903	add	v17.4s, v17.4s, v1.4s
1904	add	v22.4s, v23.4s, v0.4s
1905	add	v20.4s, v20.4s, v5.4s
1906	eor	v23.16b, v25.16b, v3.16b
1907	eor	v16.16b, v16.16b, v17.16b
1908	eor	v4.16b, v4.16b, v22.16b
1909	eor	v6.16b, v6.16b, v20.16b
1910	tbl	v23.16b, { v23.16b }, v18.16b
1911	tbl	v16.16b, { v16.16b }, v18.16b
1912	tbl	v4.16b, { v4.16b }, v18.16b
1913	tbl	v6.16b, { v6.16b }, v18.16b
1914	add	v7.4s, v7.4s, v23.4s
1915	add	v19.4s, v19.4s, v16.4s
1916	add	v18.4s, v26.4s, v4.4s
1917	add	v21.4s, v21.4s, v6.4s
1918	eor	v2.16b, v7.16b, v2.16b
1919	eor	v1.16b, v19.16b, v1.16b
1920	eor	v0.16b, v18.16b, v0.16b
1921	eor	v5.16b, v21.16b, v5.16b
1922	ushr	v25.4s, v2.4s, #7
1923	shl	v2.4s, v2.4s, #25
1924	ushr	v24.4s, v0.4s, #7
1925	shl	v0.4s, v0.4s, #25
1926	ushr	v26.4s, v1.4s, #7
1927	shl	v1.4s, v1.4s, #25
1928	ushr	v27.4s, v5.4s, #7
1929	shl	v5.4s, v5.4s, #25
1930	orr	v0.16b, v0.16b, v24.16b
1931	orr	v2.16b, v2.16b, v25.16b
1932	orr	v1.16b, v1.16b, v26.16b
1933	orr	v5.16b, v5.16b, v27.16b
1934	movi	v13.4s, #64
1935	eor	v29.16b, v19.16b, v22.16b
1936	eor	v8.16b, v21.16b, v3.16b
1937	eor	v30.16b, v17.16b, v18.16b
1938	eor	v31.16b, v20.16b, v7.16b
1939	eor	v24.16b, v5.16b, v23.16b
1940	eor	v18.16b, v0.16b, v16.16b
1941	eor	v25.16b, v2.16b, v6.16b
1942	eor	v26.16b, v1.16b, v4.16b
1943	cbnz	x21, .LBB3_5
1944	b	.LBB3_2
1945.LBB3_6:
1946	cbz	x1, .LBB3_14
1947	adrp	x12, .LCPI3_3
1948	ldr	q0, [x11, :lo12:.LCPI3_1]
1949	orr	w11, w7, w6
1950	ldr	q2, [x10, :lo12:.LCPI3_2]
1951	ldr	q1, [x12, :lo12:.LCPI3_3]
1952	and	x12, x5, #0x1
1953.LBB3_8:
1954	movi	v3.4s, #64
1955	lsr	x13, x4, #32
1956	ldp	q5, q4, [x3]
1957	mov	x15, x2
1958	mov	w14, w11
1959	mov	v3.s[0], w4
1960	ldr	x10, [x0]
1961	mov	v3.s[1], w13
1962	b	.LBB3_11
1963.LBB3_9:
1964	orr	w14, w14, w9
1965.LBB3_10:
1966	ldp	q6, q7, [x10]
1967	mov	v16.16b, v3.16b
1968	and	w14, w14, #0xff
1969	add	v5.4s, v5.4s, v4.4s
1970	mov	x15, x13
1971	mov	v16.s[3], w14
1972	add	x14, x10, #32
1973	uzp1	v17.4s, v6.4s, v7.4s
1974	add	x10, x10, #64
1975	add	v5.4s, v5.4s, v17.4s
1976	eor	v16.16b, v5.16b, v16.16b
1977	tbl	v16.16b, { v16.16b }, v0.16b
1978	add	v18.4s, v16.4s, v1.4s
1979	eor	v19.16b, v18.16b, v4.16b
1980	uzp2	v4.4s, v6.4s, v7.4s
1981	ushr	v6.4s, v19.4s, #12
1982	shl	v7.4s, v19.4s, #20
1983	ld2	{ v19.4s, v20.4s }, [x14]
1984	add	v5.4s, v5.4s, v4.4s
1985	mov	w14, w6
1986	orr	v6.16b, v7.16b, v6.16b
1987	add	v5.4s, v5.4s, v6.4s
1988	eor	v7.16b, v16.16b, v5.16b
1989	add	v5.4s, v5.4s, v19.4s
1990	tbl	v7.16b, { v7.16b }, v2.16b
1991	ext	v5.16b, v5.16b, v5.16b, #12
1992	add	v16.4s, v18.4s, v7.4s
1993	ext	v7.16b, v7.16b, v7.16b, #8
1994	eor	v6.16b, v6.16b, v16.16b
1995	ext	v16.16b, v16.16b, v16.16b, #4
1996	ushr	v18.4s, v6.4s, #7
1997	shl	v6.4s, v6.4s, #25
1998	orr	v6.16b, v6.16b, v18.16b
1999	ext	v18.16b, v20.16b, v20.16b, #12
2000	add	v5.4s, v5.4s, v6.4s
2001	eor	v7.16b, v5.16b, v7.16b
2002	add	v5.4s, v5.4s, v18.4s
2003	tbl	v7.16b, { v7.16b }, v0.16b
2004	add	v16.4s, v16.4s, v7.4s
2005	eor	v6.16b, v6.16b, v16.16b
2006	ushr	v21.4s, v6.4s, #12
2007	shl	v6.4s, v6.4s, #20
2008	orr	v6.16b, v6.16b, v21.16b
2009	uzp1	v21.4s, v17.4s, v17.4s
2010	add	v5.4s, v5.4s, v6.4s
2011	ext	v21.16b, v21.16b, v17.16b, #8
2012	eor	v7.16b, v7.16b, v5.16b
2013	uzp2	v21.4s, v21.4s, v4.4s
2014	tbl	v7.16b, { v7.16b }, v2.16b
2015	add	v5.4s, v5.4s, v21.4s
2016	add	v16.4s, v16.4s, v7.4s
2017	ext	v5.16b, v5.16b, v5.16b, #4
2018	ext	v7.16b, v7.16b, v7.16b, #8
2019	eor	v6.16b, v6.16b, v16.16b
2020	ushr	v22.4s, v6.4s, #7
2021	shl	v6.4s, v6.4s, #25
2022	orr	v6.16b, v6.16b, v22.16b
2023	add	v22.4s, v5.4s, v6.4s
2024	eor	v5.16b, v22.16b, v7.16b
2025	ext	v7.16b, v16.16b, v16.16b, #12
2026	tbl	v16.16b, { v5.16b }, v0.16b
2027	ext	v5.16b, v17.16b, v17.16b, #12
2028	add	v7.4s, v7.4s, v16.4s
2029	ext	v5.16b, v17.16b, v5.16b, #12
2030	ext	v17.16b, v19.16b, v19.16b, #12
2031	mov	v19.16b, v18.16b
2032	eor	v6.16b, v6.16b, v7.16b
2033	rev64	v5.4s, v5.4s
2034	mov	v19.s[1], v17.s[2]
2035	ushr	v20.4s, v6.4s, #12
2036	shl	v6.4s, v6.4s, #20
2037	trn2	v5.4s, v5.4s, v19.4s
2038	orr	v6.16b, v6.16b, v20.16b
2039	zip1	v20.2d, v18.2d, v4.2d
2040	zip2	v4.4s, v4.4s, v18.4s
2041	add	v19.4s, v6.4s, v5.4s
2042	mov	v20.s[3], v17.s[3]
2043	add	v19.4s, v19.4s, v22.4s
2044	ext	v22.16b, v20.16b, v20.16b, #12
2045	eor	v16.16b, v16.16b, v19.16b
2046	ext	v19.16b, v19.16b, v19.16b, #12
2047	tbl	v16.16b, { v16.16b }, v2.16b
2048	add	v7.4s, v7.4s, v16.4s
2049	ext	v16.16b, v16.16b, v16.16b, #8
2050	eor	v6.16b, v6.16b, v7.16b
2051	ext	v7.16b, v7.16b, v7.16b, #4
2052	ushr	v23.4s, v6.4s, #7
2053	shl	v24.4s, v6.4s, #25
2054	uzp1	v6.4s, v20.4s, v22.4s
2055	orr	v20.16b, v24.16b, v23.16b
2056	add	v22.4s, v20.4s, v6.4s
2057	add	v19.4s, v22.4s, v19.4s
2058	eor	v16.16b, v19.16b, v16.16b
2059	tbl	v16.16b, { v16.16b }, v0.16b
2060	add	v7.4s, v7.4s, v16.4s
2061	eor	v18.16b, v20.16b, v7.16b
2062	zip1	v20.4s, v4.4s, v17.4s
2063	zip1	v4.4s, v17.4s, v4.4s
2064	ushr	v17.4s, v18.4s, #12
2065	shl	v18.4s, v18.4s, #20
2066	ext	v20.16b, v4.16b, v20.16b, #8
2067	orr	v4.16b, v18.16b, v17.16b
2068	ext	v18.16b, v21.16b, v21.16b, #4
2069	add	v17.4s, v4.4s, v20.4s
2070	add	v17.4s, v17.4s, v19.4s
2071	uzp1	v19.4s, v18.4s, v18.4s
2072	eor	v16.16b, v16.16b, v17.16b
2073	ext	v19.16b, v19.16b, v18.16b, #8
2074	tbl	v16.16b, { v16.16b }, v2.16b
2075	uzp2	v19.4s, v19.4s, v5.4s
2076	add	v7.4s, v7.4s, v16.4s
2077	add	v17.4s, v17.4s, v19.4s
2078	ext	v16.16b, v16.16b, v16.16b, #8
2079	eor	v4.16b, v4.16b, v7.16b
2080	ext	v17.16b, v17.16b, v17.16b, #4
2081	ext	v7.16b, v7.16b, v7.16b, #12
2082	ushr	v21.4s, v4.4s, #7
2083	shl	v4.4s, v4.4s, #25
2084	orr	v4.16b, v4.16b, v21.16b
2085	ext	v21.16b, v18.16b, v18.16b, #12
2086	add	v17.4s, v17.4s, v4.4s
2087	ext	v18.16b, v18.16b, v21.16b, #12
2088	mov	v21.16b, v20.16b
2089	eor	v16.16b, v17.16b, v16.16b
2090	rev64	v18.4s, v18.4s
2091	mov	v21.s[1], v6.s[2]
2092	tbl	v16.16b, { v16.16b }, v0.16b
2093	add	v7.4s, v7.4s, v16.4s
2094	eor	v4.16b, v4.16b, v7.16b
2095	ushr	v22.4s, v4.4s, #12
2096	shl	v23.4s, v4.4s, #20
2097	trn2	v4.4s, v18.4s, v21.4s
2098	orr	v18.16b, v23.16b, v22.16b
2099	add	v21.4s, v18.4s, v4.4s
2100	add	v17.4s, v21.4s, v17.4s
2101	zip1	v21.2d, v20.2d, v5.2d
2102	zip2	v5.4s, v5.4s, v20.4s
2103	eor	v16.16b, v16.16b, v17.16b
2104	mov	v21.s[3], v6.s[3]
2105	ext	v17.16b, v17.16b, v17.16b, #12
2106	zip1	v20.4s, v5.4s, v6.4s
2107	tbl	v16.16b, { v16.16b }, v2.16b
2108	zip1	v5.4s, v6.4s, v5.4s
2109	add	v22.4s, v7.4s, v16.4s
2110	ext	v16.16b, v16.16b, v16.16b, #8
2111	ext	v20.16b, v5.16b, v20.16b, #8
2112	eor	v7.16b, v18.16b, v22.16b
2113	ext	v18.16b, v21.16b, v21.16b, #12
2114	ushr	v23.4s, v7.4s, #7
2115	shl	v24.4s, v7.4s, #25
2116	uzp1	v7.4s, v21.4s, v18.4s
2117	orr	v18.16b, v24.16b, v23.16b
2118	add	v21.4s, v18.4s, v7.4s
2119	add	v17.4s, v21.4s, v17.4s
2120	ext	v21.16b, v22.16b, v22.16b, #4
2121	eor	v16.16b, v17.16b, v16.16b
2122	tbl	v16.16b, { v16.16b }, v0.16b
2123	add	v21.4s, v21.4s, v16.4s
2124	eor	v18.16b, v18.16b, v21.16b
2125	ushr	v6.4s, v18.4s, #12
2126	shl	v18.4s, v18.4s, #20
2127	orr	v5.16b, v18.16b, v6.16b
2128	add	v6.4s, v5.4s, v20.4s
2129	add	v6.4s, v6.4s, v17.4s
2130	ext	v17.16b, v19.16b, v19.16b, #4
2131	eor	v16.16b, v16.16b, v6.16b
2132	uzp1	v18.4s, v17.4s, v17.4s
2133	tbl	v16.16b, { v16.16b }, v2.16b
2134	ext	v18.16b, v18.16b, v17.16b, #8
2135	add	v19.4s, v21.4s, v16.4s
2136	uzp2	v18.4s, v18.4s, v4.4s
2137	ext	v16.16b, v16.16b, v16.16b, #8
2138	eor	v5.16b, v5.16b, v19.16b
2139	add	v6.4s, v6.4s, v18.4s
2140	ext	v19.16b, v19.16b, v19.16b, #12
2141	ushr	v21.4s, v5.4s, #7
2142	shl	v5.4s, v5.4s, #25
2143	ext	v6.16b, v6.16b, v6.16b, #4
2144	orr	v5.16b, v5.16b, v21.16b
2145	ext	v21.16b, v17.16b, v17.16b, #12
2146	add	v6.4s, v6.4s, v5.4s
2147	ext	v17.16b, v17.16b, v21.16b, #12
2148	mov	v21.16b, v20.16b
2149	eor	v16.16b, v6.16b, v16.16b
2150	rev64	v17.4s, v17.4s
2151	mov	v21.s[1], v7.s[2]
2152	tbl	v16.16b, { v16.16b }, v0.16b
2153	add	v19.4s, v19.4s, v16.4s
2154	eor	v5.16b, v5.16b, v19.16b
2155	ushr	v22.4s, v5.4s, #12
2156	shl	v23.4s, v5.4s, #20
2157	trn2	v5.4s, v17.4s, v21.4s
2158	orr	v17.16b, v23.16b, v22.16b
2159	add	v21.4s, v17.4s, v5.4s
2160	add	v6.4s, v21.4s, v6.4s
2161	eor	v16.16b, v16.16b, v6.16b
2162	ext	v6.16b, v6.16b, v6.16b, #12
2163	tbl	v21.16b, { v16.16b }, v2.16b
2164	zip1	v16.2d, v20.2d, v4.2d
2165	zip2	v4.4s, v4.4s, v20.4s
2166	add	v19.4s, v19.4s, v21.4s
2167	mov	v16.s[3], v7.s[3]
2168	ext	v21.16b, v21.16b, v21.16b, #8
2169	zip1	v20.4s, v4.4s, v7.4s
2170	eor	v17.16b, v17.16b, v19.16b
2171	ext	v22.16b, v16.16b, v16.16b, #12
2172	ext	v19.16b, v19.16b, v19.16b, #4
2173	zip1	v4.4s, v7.4s, v4.4s
2174	ushr	v23.4s, v17.4s, #7
2175	shl	v17.4s, v17.4s, #25
2176	uzp1	v16.4s, v16.4s, v22.4s
2177	ext	v4.16b, v4.16b, v20.16b, #8
2178	orr	v17.16b, v17.16b, v23.16b
2179	add	v22.4s, v17.4s, v16.4s
2180	add	v6.4s, v22.4s, v6.4s
2181	eor	v21.16b, v6.16b, v21.16b
2182	tbl	v21.16b, { v21.16b }, v0.16b
2183	add	v19.4s, v19.4s, v21.4s
2184	eor	v17.16b, v17.16b, v19.16b
2185	ushr	v7.4s, v17.4s, #12
2186	shl	v17.4s, v17.4s, #20
2187	orr	v7.16b, v17.16b, v7.16b
2188	add	v17.4s, v7.4s, v4.4s
2189	add	v6.4s, v17.4s, v6.4s
2190	ext	v17.16b, v18.16b, v18.16b, #4
2191	eor	v18.16b, v21.16b, v6.16b
2192	uzp1	v20.4s, v17.4s, v17.4s
2193	tbl	v18.16b, { v18.16b }, v2.16b
2194	ext	v20.16b, v20.16b, v17.16b, #8
2195	add	v19.4s, v19.4s, v18.4s
2196	uzp2	v20.4s, v20.4s, v5.4s
2197	ext	v18.16b, v18.16b, v18.16b, #8
2198	eor	v7.16b, v7.16b, v19.16b
2199	add	v6.4s, v6.4s, v20.4s
2200	ushr	v21.4s, v7.4s, #7
2201	shl	v7.4s, v7.4s, #25
2202	ext	v6.16b, v6.16b, v6.16b, #4
2203	orr	v7.16b, v7.16b, v21.16b
2204	add	v21.4s, v6.4s, v7.4s
2205	eor	v6.16b, v21.16b, v18.16b
2206	ext	v18.16b, v19.16b, v19.16b, #12
2207	tbl	v19.16b, { v6.16b }, v0.16b
2208	ext	v6.16b, v17.16b, v17.16b, #12
2209	add	v18.4s, v18.4s, v19.4s
2210	ext	v6.16b, v17.16b, v6.16b, #12
2211	mov	v17.16b, v4.16b
2212	eor	v7.16b, v7.16b, v18.16b
2213	rev64	v6.4s, v6.4s
2214	mov	v17.s[1], v16.s[2]
2215	ushr	v22.4s, v7.4s, #12
2216	shl	v7.4s, v7.4s, #20
2217	trn2	v6.4s, v6.4s, v17.4s
2218	orr	v7.16b, v7.16b, v22.16b
2219	add	v17.4s, v7.4s, v6.4s
2220	add	v17.4s, v17.4s, v21.4s
2221	zip1	v21.2d, v4.2d, v5.2d
2222	zip2	v4.4s, v5.4s, v4.4s
2223	eor	v19.16b, v19.16b, v17.16b
2224	mov	v21.s[3], v16.s[3]
2225	ext	v17.16b, v17.16b, v17.16b, #12
2226	tbl	v19.16b, { v19.16b }, v2.16b
2227	ext	v22.16b, v21.16b, v21.16b, #12
2228	add	v18.4s, v18.4s, v19.4s
2229	ext	v19.16b, v19.16b, v19.16b, #8
2230	eor	v7.16b, v7.16b, v18.16b
2231	ext	v18.16b, v18.16b, v18.16b, #4
2232	ushr	v23.4s, v7.4s, #7
2233	shl	v24.4s, v7.4s, #25
2234	uzp1	v7.4s, v21.4s, v22.4s
2235	orr	v21.16b, v24.16b, v23.16b
2236	add	v22.4s, v21.4s, v7.4s
2237	add	v17.4s, v22.4s, v17.4s
2238	eor	v19.16b, v17.16b, v19.16b
2239	tbl	v19.16b, { v19.16b }, v0.16b
2240	add	v18.4s, v18.4s, v19.4s
2241	eor	v5.16b, v21.16b, v18.16b
2242	zip1	v21.4s, v4.4s, v16.4s
2243	zip1	v4.4s, v16.4s, v4.4s
2244	ushr	v16.4s, v5.4s, #12
2245	shl	v5.4s, v5.4s, #20
2246	ext	v21.16b, v4.16b, v21.16b, #8
2247	orr	v4.16b, v5.16b, v16.16b
2248	ext	v16.16b, v20.16b, v20.16b, #4
2249	mov	v23.16b, v21.16b
2250	add	v5.4s, v4.4s, v21.4s
2251	mov	v23.s[1], v7.s[2]
2252	add	v5.4s, v5.4s, v17.4s
2253	eor	v17.16b, v19.16b, v5.16b
2254	uzp1	v19.4s, v16.4s, v16.4s
2255	tbl	v17.16b, { v17.16b }, v2.16b
2256	ext	v19.16b, v19.16b, v16.16b, #8
2257	add	v18.4s, v18.4s, v17.4s
2258	uzp2	v19.4s, v19.4s, v6.4s
2259	eor	v4.16b, v4.16b, v18.16b
2260	add	v5.4s, v5.4s, v19.4s
2261	ext	v19.16b, v19.16b, v19.16b, #4
2262	ushr	v20.4s, v4.4s, #7
2263	shl	v4.4s, v4.4s, #25
2264	ext	v5.16b, v5.16b, v5.16b, #4
2265	orr	v20.16b, v4.16b, v20.16b
2266	ext	v4.16b, v17.16b, v17.16b, #8
2267	add	v17.4s, v5.4s, v20.4s
2268	ext	v5.16b, v18.16b, v18.16b, #12
2269	eor	v4.16b, v17.16b, v4.16b
2270	tbl	v18.16b, { v4.16b }, v0.16b
2271	ext	v4.16b, v16.16b, v16.16b, #12
2272	add	v22.4s, v5.4s, v18.4s
2273	ext	v4.16b, v16.16b, v4.16b, #12
2274	eor	v5.16b, v20.16b, v22.16b
2275	rev64	v16.4s, v4.4s
2276	ushr	v20.4s, v5.4s, #12
2277	shl	v24.4s, v5.4s, #20
2278	trn2	v5.4s, v16.4s, v23.4s
2279	orr	v16.16b, v24.16b, v20.16b
2280	add	v20.4s, v16.4s, v5.4s
2281	add	v17.4s, v20.4s, v17.4s
2282	zip1	v20.2d, v21.2d, v6.2d
2283	zip2	v6.4s, v6.4s, v21.4s
2284	eor	v18.16b, v18.16b, v17.16b
2285	mov	v20.s[3], v7.s[3]
2286	ext	v17.16b, v17.16b, v17.16b, #12
2287	zip1	v21.4s, v6.4s, v7.4s
2288	tbl	v18.16b, { v18.16b }, v2.16b
2289	ext	v24.16b, v20.16b, v20.16b, #12
2290	zip1	v6.4s, v7.4s, v6.4s
2291	add	v22.4s, v22.4s, v18.4s
2292	ext	v18.16b, v18.16b, v18.16b, #8
2293	ext	v6.16b, v6.16b, v21.16b, #8
2294	eor	v16.16b, v16.16b, v22.16b
2295	ext	v22.16b, v22.16b, v22.16b, #4
2296	zip1	v5.2d, v6.2d, v5.2d
2297	zip2	v4.4s, v4.4s, v6.4s
2298	ushr	v25.4s, v16.4s, #7
2299	shl	v26.4s, v16.4s, #25
2300	uzp1	v16.4s, v20.4s, v24.4s
2301	orr	v20.16b, v26.16b, v25.16b
2302	mov	v5.s[3], v16.s[3]
2303	add	v24.4s, v20.4s, v16.4s
2304	add	v17.4s, v24.4s, v17.4s
2305	eor	v18.16b, v17.16b, v18.16b
2306	tbl	v18.16b, { v18.16b }, v0.16b
2307	add	v22.4s, v22.4s, v18.4s
2308	eor	v20.16b, v20.16b, v22.16b
2309	ushr	v7.4s, v20.4s, #12
2310	shl	v20.4s, v20.4s, #20
2311	orr	v7.16b, v20.16b, v7.16b
2312	add	v20.4s, v7.4s, v6.4s
2313	add	v17.4s, v20.4s, v17.4s
2314	ext	v20.16b, v19.16b, v19.16b, #8
2315	eor	v18.16b, v18.16b, v17.16b
2316	ext	v17.16b, v17.16b, v17.16b, #4
2317	tbl	v18.16b, { v18.16b }, v2.16b
2318	add	v21.4s, v22.4s, v18.4s
2319	uzp2	v22.4s, v20.4s, v23.4s
2320	ext	v18.16b, v18.16b, v18.16b, #8
2321	eor	v7.16b, v7.16b, v21.16b
2322	ext	v20.16b, v22.16b, v20.16b, #4
2323	ushr	v22.4s, v7.4s, #7
2324	shl	v7.4s, v7.4s, #25
2325	add	v17.4s, v17.4s, v20.4s
2326	ext	v20.16b, v21.16b, v21.16b, #12
2327	ext	v21.16b, v19.16b, v19.16b, #12
2328	orr	v7.16b, v7.16b, v22.16b
2329	ext	v19.16b, v19.16b, v21.16b, #12
2330	add	v17.4s, v17.4s, v7.4s
2331	mov	v21.16b, v6.16b
2332	rev64	v19.4s, v19.4s
2333	eor	v18.16b, v17.16b, v18.16b
2334	mov	v21.s[1], v16.s[2]
2335	tbl	v18.16b, { v18.16b }, v0.16b
2336	trn2	v19.4s, v19.4s, v21.4s
2337	add	v20.4s, v20.4s, v18.4s
2338	eor	v7.16b, v7.16b, v20.16b
2339	ushr	v22.4s, v7.4s, #12
2340	shl	v7.4s, v7.4s, #20
2341	orr	v7.16b, v7.16b, v22.16b
2342	add	v19.4s, v7.4s, v19.4s
2343	add	v17.4s, v19.4s, v17.4s
2344	eor	v18.16b, v18.16b, v17.16b
2345	ext	v17.16b, v17.16b, v17.16b, #12
2346	tbl	v18.16b, { v18.16b }, v2.16b
2347	add	v19.4s, v20.4s, v18.4s
2348	ext	v20.16b, v5.16b, v5.16b, #12
2349	ext	v18.16b, v18.16b, v18.16b, #8
2350	eor	v7.16b, v7.16b, v19.16b
2351	uzp1	v5.4s, v5.4s, v20.4s
2352	ushr	v21.4s, v7.4s, #7
2353	shl	v7.4s, v7.4s, #25
2354	orr	v7.16b, v7.16b, v21.16b
2355	add	v5.4s, v7.4s, v5.4s
2356	add	v5.4s, v5.4s, v17.4s
2357	eor	v17.16b, v5.16b, v18.16b
2358	ext	v18.16b, v19.16b, v19.16b, #4
2359	tbl	v17.16b, { v17.16b }, v0.16b
2360	add	v18.4s, v18.4s, v17.4s
2361	eor	v6.16b, v7.16b, v18.16b
2362	zip1	v7.4s, v4.4s, v16.4s
2363	zip1	v4.4s, v16.4s, v4.4s
2364	ushr	v16.4s, v6.4s, #12
2365	shl	v6.4s, v6.4s, #20
2366	ext	v4.16b, v4.16b, v7.16b, #8
2367	orr	v6.16b, v6.16b, v16.16b
2368	add	v4.4s, v6.4s, v4.4s
2369	add	v4.4s, v4.4s, v5.4s
2370	eor	v5.16b, v17.16b, v4.16b
2371	ext	v4.16b, v4.16b, v4.16b, #4
2372	tbl	v5.16b, { v5.16b }, v2.16b
2373	add	v7.4s, v18.4s, v5.4s
2374	eor	v6.16b, v6.16b, v7.16b
2375	ext	v7.16b, v7.16b, v7.16b, #12
2376	ushr	v16.4s, v6.4s, #7
2377	shl	v6.4s, v6.4s, #25
2378	orr	v6.16b, v6.16b, v16.16b
2379	ext	v16.16b, v5.16b, v5.16b, #8
2380	eor	v5.16b, v4.16b, v7.16b
2381	eor	v4.16b, v6.16b, v16.16b
2382.LBB3_11:
2383	subs	x13, x15, #1
2384	b.eq	.LBB3_9
2385	cbnz	x15, .LBB3_10
2386	add	x4, x4, x12
2387	add	x0, x0, #8
2388	subs	x1, x1, #1
2389	stp	q5, q4, [x8], #32
2390	b.ne	.LBB3_8
2391.LBB3_14:
2392	add	sp, sp, #368
2393	ldp	x20, x19, [sp, #128]
2394	ldp	x22, x21, [sp, #112]
2395	ldp	x24, x23, [sp, #96]
2396	ldp	x26, x25, [sp, #80]
2397	ldp	x29, x27, [sp, #64]
2398	ldp	d9, d8, [sp, #48]
2399	ldp	d11, d10, [sp, #32]
2400	ldp	d13, d12, [sp, #16]
2401	ldp	d15, d14, [sp], #144
2402	ret
2403.Lfunc_end3:
2404	.size	zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41
2405	.cfi_endproc
2406	.section	".note.GNU-stack","",@progbits
2407#endif
2408