xref: /freebsd/sys/crypto/openssl/aarch64/chacha-armv8-sve.S (revision 4757b351ea9d59d71d4a38b82506d2d16fcd560d)
1*4757b351SPierre Pronchery/* Do not modify. This file is auto-generated from chacha-armv8-sve.pl. */
2*4757b351SPierre Pronchery// Copyright 2022-2025  The OpenSSL Project Authors. All Rights Reserved.
3*4757b351SPierre Pronchery//
4*4757b351SPierre Pronchery// Licensed under the Apache License 2.0 (the "License").  You may not use
5*4757b351SPierre Pronchery// this file except in compliance with the License.  You can obtain a copy
6*4757b351SPierre Pronchery// in the file LICENSE in the source distribution or at
7*4757b351SPierre Pronchery// https://www.openssl.org/source/license.html
8*4757b351SPierre Pronchery//
9*4757b351SPierre Pronchery//
10*4757b351SPierre Pronchery// ChaCha20 for ARMv8 via SVE
11*4757b351SPierre Pronchery//
12*4757b351SPierre Pronchery// $output is the last argument if it looks like a file (it has an extension)
13*4757b351SPierre Pronchery// $flavour is the first argument if it doesn't look like a file
14*4757b351SPierre Pronchery#include "arm_arch.h"
15*4757b351SPierre Pronchery
16*4757b351SPierre Pronchery.arch	armv8-a
17*4757b351SPierre Pronchery
18*4757b351SPierre Pronchery
19*4757b351SPierre Pronchery.hidden	OPENSSL_armcap_P
20*4757b351SPierre Pronchery
21*4757b351SPierre Pronchery.text
22*4757b351SPierre Pronchery
23*4757b351SPierre Pronchery.section	.rodata
24*4757b351SPierre Pronchery.align	5
25*4757b351SPierre Pronchery.type	_chacha_sve_consts,%object
26*4757b351SPierre Pronchery_chacha_sve_consts:
27*4757b351SPierre Pronchery.Lchacha20_consts:
28*4757b351SPierre Pronchery.quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
29*4757b351SPierre Pronchery.Lrot8:
30*4757b351SPierre Pronchery.word	0x02010003,0x04040404,0x02010003,0x04040404
31*4757b351SPierre Pronchery.size	_chacha_sve_consts,.-_chacha_sve_consts
32*4757b351SPierre Pronchery
33*4757b351SPierre Pronchery.previous
34*4757b351SPierre Pronchery
35*4757b351SPierre Pronchery.globl	ChaCha20_ctr32_sve
36*4757b351SPierre Pronchery.type	ChaCha20_ctr32_sve,%function
37*4757b351SPierre Pronchery.align	5
38*4757b351SPierre ProncheryChaCha20_ctr32_sve:
39*4757b351SPierre Pronchery	AARCH64_VALID_CALL_TARGET
40*4757b351SPierre Pronchery.inst	0x04a0e3e5	//cntw x5, ALL, MUL #1
41*4757b351SPierre Pronchery	cmp	x2,x5,lsl #6
42*4757b351SPierre Pronchery	b.lt	.Lreturn
43*4757b351SPierre Pronchery	mov	x7,0
44*4757b351SPierre Pronchery	adrp	x6,OPENSSL_armcap_P
45*4757b351SPierre Pronchery	ldr	w6,[x6,#:lo12:OPENSSL_armcap_P]
46*4757b351SPierre Pronchery	tst	w6,#ARMV8_SVE2
47*4757b351SPierre Pronchery	b.eq	1f
48*4757b351SPierre Pronchery	mov	x7,1
49*4757b351SPierre Pronchery	b	2f
50*4757b351SPierre Pronchery1:
51*4757b351SPierre Pronchery	cmp	x5,4
52*4757b351SPierre Pronchery	b.le	.Lreturn
53*4757b351SPierre Pronchery	adrp	x6,.Lrot8
54*4757b351SPierre Pronchery	add	x6,x6,#:lo12:.Lrot8
55*4757b351SPierre Pronchery	ldp	w9,w10,[x6]
56*4757b351SPierre Pronchery.inst	0x04aa4d3f	//index z31.s,w9,w10
57*4757b351SPierre Pronchery2:
58*4757b351SPierre Pronchery	AARCH64_SIGN_LINK_REGISTER
59*4757b351SPierre Pronchery	stp	d8,d9,[sp,-192]!
60*4757b351SPierre Pronchery	stp	d10,d11,[sp,16]
61*4757b351SPierre Pronchery	stp	d12,d13,[sp,32]
62*4757b351SPierre Pronchery	stp	d14,d15,[sp,48]
63*4757b351SPierre Pronchery	stp	x16,x17,[sp,64]
64*4757b351SPierre Pronchery	stp	x18,x19,[sp,80]
65*4757b351SPierre Pronchery	stp	x20,x21,[sp,96]
66*4757b351SPierre Pronchery	stp	x22,x23,[sp,112]
67*4757b351SPierre Pronchery	stp	x24,x25,[sp,128]
68*4757b351SPierre Pronchery	stp	x26,x27,[sp,144]
69*4757b351SPierre Pronchery	stp	x28,x29,[sp,160]
70*4757b351SPierre Pronchery	str	x30,[sp,176]
71*4757b351SPierre Pronchery
72*4757b351SPierre Pronchery	adrp	x6,.Lchacha20_consts
73*4757b351SPierre Pronchery	add	x6,x6,#:lo12:.Lchacha20_consts
74*4757b351SPierre Pronchery	ldp	x23,x24,[x6]
75*4757b351SPierre Pronchery	ldp	x25,x26,[x3]
76*4757b351SPierre Pronchery	ldp	x27,x28,[x3, 16]
77*4757b351SPierre Pronchery	ldp	x29,x30,[x4]
78*4757b351SPierre Pronchery.inst	0x2599e3e0	//ptrues p0.s,ALL
79*4757b351SPierre Pronchery#ifdef	__AARCH64EB__
80*4757b351SPierre Pronchery	ror	x25,x25,#32
81*4757b351SPierre Pronchery	ror	x26,x26,#32
82*4757b351SPierre Pronchery	ror	x27,x27,#32
83*4757b351SPierre Pronchery	ror	x28,x28,#32
84*4757b351SPierre Pronchery	ror	x29,x29,#32
85*4757b351SPierre Pronchery	ror	x30,x30,#32
86*4757b351SPierre Pronchery#endif
87*4757b351SPierre Pronchery	cbz	x7, 1f
88*4757b351SPierre Pronchery.align	5
89*4757b351SPierre Pronchery100:
90*4757b351SPierre Pronchery	subs	x7,x2,x5,lsl #6
91*4757b351SPierre Pronchery	b.lt	110f
92*4757b351SPierre Pronchery	mov	x2,x7
93*4757b351SPierre Pronchery	b.eq	101f
94*4757b351SPierre Pronchery	cmp	x2,64
95*4757b351SPierre Pronchery	b.lt	101f
96*4757b351SPierre Pronchery	mixin=1
97*4757b351SPierre Pronchery	lsr	x8,x23,#32
98*4757b351SPierre Pronchery.inst	0x05a03ae0	//dup z0.s,w23
99*4757b351SPierre Pronchery.inst	0x05a03af9	//dup z25.s,w23
100*4757b351SPierre Pronchery.if	mixin == 1
101*4757b351SPierre Pronchery	mov	w7,w23
102*4757b351SPierre Pronchery.endif
103*4757b351SPierre Pronchery.inst	0x05a03904	//dup z4.s,w8
104*4757b351SPierre Pronchery.inst	0x05a0391a	//dup z26.s,w8
105*4757b351SPierre Pronchery	lsr	x10,x24,#32
106*4757b351SPierre Pronchery.inst	0x05a03b08	//dup z8.s,w24
107*4757b351SPierre Pronchery.inst	0x05a03b1b	//dup z27.s,w24
108*4757b351SPierre Pronchery.if	mixin == 1
109*4757b351SPierre Pronchery	mov	w9,w24
110*4757b351SPierre Pronchery.endif
111*4757b351SPierre Pronchery.inst	0x05a0394c	//dup z12.s,w10
112*4757b351SPierre Pronchery.inst	0x05a0395c	//dup z28.s,w10
113*4757b351SPierre Pronchery	lsr	x12,x25,#32
114*4757b351SPierre Pronchery.inst	0x05a03b21	//dup z1.s,w25
115*4757b351SPierre Pronchery.inst	0x05a03b3d	//dup z29.s,w25
116*4757b351SPierre Pronchery.if	mixin == 1
117*4757b351SPierre Pronchery	mov	w11,w25
118*4757b351SPierre Pronchery.endif
119*4757b351SPierre Pronchery.inst	0x05a03985	//dup z5.s,w12
120*4757b351SPierre Pronchery.inst	0x05a0399e	//dup z30.s,w12
121*4757b351SPierre Pronchery	lsr	x14,x26,#32
122*4757b351SPierre Pronchery.inst	0x05a03b49	//dup z9.s,w26
123*4757b351SPierre Pronchery.inst	0x05a03b55	//dup z21.s,w26
124*4757b351SPierre Pronchery.if	mixin == 1
125*4757b351SPierre Pronchery	mov	w13,w26
126*4757b351SPierre Pronchery.endif
127*4757b351SPierre Pronchery.inst	0x05a039cd	//dup z13.s,w14
128*4757b351SPierre Pronchery.inst	0x05a039d6	//dup z22.s,w14
129*4757b351SPierre Pronchery	lsr	x16,x27,#32
130*4757b351SPierre Pronchery.inst	0x05a03b62	//dup z2.s,w27
131*4757b351SPierre Pronchery.inst	0x05a03b77	//dup z23.s,w27
132*4757b351SPierre Pronchery.if	mixin == 1
133*4757b351SPierre Pronchery	mov	w15,w27
134*4757b351SPierre Pronchery.endif
135*4757b351SPierre Pronchery.inst	0x05a03a06	//dup z6.s,w16
136*4757b351SPierre Pronchery.inst	0x05a03a18	//dup z24.s,w16
137*4757b351SPierre Pronchery	lsr	x18,x28,#32
138*4757b351SPierre Pronchery.inst	0x05a03b8a	//dup z10.s,w28
139*4757b351SPierre Pronchery.inst	0x05a03b91	//dup z17.s,w28
140*4757b351SPierre Pronchery.if	mixin == 1
141*4757b351SPierre Pronchery	mov	w17,w28
142*4757b351SPierre Pronchery.endif
143*4757b351SPierre Pronchery.inst	0x05a03a4e	//dup z14.s,w18
144*4757b351SPierre Pronchery.inst	0x05a03a52	//dup z18.s,w18
145*4757b351SPierre Pronchery	lsr	x22,x30,#32
146*4757b351SPierre Pronchery.inst	0x05a03bcb	//dup z11.s,w30
147*4757b351SPierre Pronchery.inst	0x05a03bd4	//dup z20.s,w30
148*4757b351SPierre Pronchery.if	mixin == 1
149*4757b351SPierre Pronchery	mov	w21,w30
150*4757b351SPierre Pronchery.endif
151*4757b351SPierre Pronchery.inst	0x05a03acf	//dup z15.s,w22
152*4757b351SPierre Pronchery.inst	0x05a03adf	//dup z31.s,w22
153*4757b351SPierre Pronchery.if	mixin == 1
154*4757b351SPierre Pronchery	add	w20,w29,#1
155*4757b351SPierre Pronchery	mov	w19,w29
156*4757b351SPierre Pronchery.inst	0x04a14690	//index z16.s,w20,1
157*4757b351SPierre Pronchery.inst	0x04a14683	//index z3.s,w20,1
158*4757b351SPierre Pronchery.else
159*4757b351SPierre Pronchery.inst	0x04a147b0	//index z16.s,w29,1
160*4757b351SPierre Pronchery.inst	0x04a147a3	//index z3.s,w29,1
161*4757b351SPierre Pronchery.endif
162*4757b351SPierre Pronchery	lsr	x20,x29,#32
163*4757b351SPierre Pronchery.inst	0x05a03a87	//dup z7.s,w20
164*4757b351SPierre Pronchery.inst	0x05a03a93	//dup z19.s,w20
165*4757b351SPierre Pronchery	mov	x6,#10
166*4757b351SPierre Pronchery10:
167*4757b351SPierre Pronchery.align	5
168*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
169*4757b351SPierre Pronchery.if	mixin == 1
170*4757b351SPierre Pronchery	add	w7,w7,w11
171*4757b351SPierre Pronchery.endif
172*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
173*4757b351SPierre Pronchery.if	mixin == 1
174*4757b351SPierre Pronchery	add	w8,w8,w12
175*4757b351SPierre Pronchery.endif
176*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
177*4757b351SPierre Pronchery.if	mixin == 1
178*4757b351SPierre Pronchery	add	w9,w9,w13
179*4757b351SPierre Pronchery.endif
180*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
181*4757b351SPierre Pronchery.if	mixin == 1
182*4757b351SPierre Pronchery	add	w10,w10,w14
183*4757b351SPierre Pronchery.endif
184*4757b351SPierre Pronchery.if	mixin == 1
185*4757b351SPierre Pronchery	eor	w19,w19,w7
186*4757b351SPierre Pronchery.endif
187*4757b351SPierre Pronchery.inst	0x04703403	//xar z3.s,z3.s,z0.s,16
188*4757b351SPierre Pronchery.if	mixin == 1
189*4757b351SPierre Pronchery	ror	w19,w19,16
190*4757b351SPierre Pronchery.endif
191*4757b351SPierre Pronchery.if	mixin == 1
192*4757b351SPierre Pronchery	eor	w20,w20,w8
193*4757b351SPierre Pronchery.endif
194*4757b351SPierre Pronchery.inst	0x04703487	//xar z7.s,z7.s,z4.s,16
195*4757b351SPierre Pronchery.if	mixin == 1
196*4757b351SPierre Pronchery	ror	w20,w20,16
197*4757b351SPierre Pronchery.endif
198*4757b351SPierre Pronchery.if	mixin == 1
199*4757b351SPierre Pronchery	eor	w21,w21,w9
200*4757b351SPierre Pronchery.endif
201*4757b351SPierre Pronchery.inst	0x0470350b	//xar z11.s,z11.s,z8.s,16
202*4757b351SPierre Pronchery.if	mixin == 1
203*4757b351SPierre Pronchery	ror	w21,w21,16
204*4757b351SPierre Pronchery.endif
205*4757b351SPierre Pronchery.if	mixin == 1
206*4757b351SPierre Pronchery	eor	w22,w22,w10
207*4757b351SPierre Pronchery.endif
208*4757b351SPierre Pronchery.inst	0x0470358f	//xar z15.s,z15.s,z12.s,16
209*4757b351SPierre Pronchery.if	mixin == 1
210*4757b351SPierre Pronchery	ror	w22,w22,16
211*4757b351SPierre Pronchery.endif
212*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
213*4757b351SPierre Pronchery.if	mixin == 1
214*4757b351SPierre Pronchery	add	w15,w15,w19
215*4757b351SPierre Pronchery.endif
216*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
217*4757b351SPierre Pronchery.if	mixin == 1
218*4757b351SPierre Pronchery	add	w16,w16,w20
219*4757b351SPierre Pronchery.endif
220*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
221*4757b351SPierre Pronchery.if	mixin == 1
222*4757b351SPierre Pronchery	add	w17,w17,w21
223*4757b351SPierre Pronchery.endif
224*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
225*4757b351SPierre Pronchery.if	mixin == 1
226*4757b351SPierre Pronchery	add	w18,w18,w22
227*4757b351SPierre Pronchery.endif
228*4757b351SPierre Pronchery.if	mixin == 1
229*4757b351SPierre Pronchery	eor	w11,w11,w15
230*4757b351SPierre Pronchery.endif
231*4757b351SPierre Pronchery.inst	0x046c3441	//xar z1.s,z1.s,z2.s,20
232*4757b351SPierre Pronchery.if	mixin == 1
233*4757b351SPierre Pronchery	ror	w11,w11,20
234*4757b351SPierre Pronchery.endif
235*4757b351SPierre Pronchery.if	mixin == 1
236*4757b351SPierre Pronchery	eor	w12,w12,w16
237*4757b351SPierre Pronchery.endif
238*4757b351SPierre Pronchery.inst	0x046c34c5	//xar z5.s,z5.s,z6.s,20
239*4757b351SPierre Pronchery.if	mixin == 1
240*4757b351SPierre Pronchery	ror	w12,w12,20
241*4757b351SPierre Pronchery.endif
242*4757b351SPierre Pronchery.if	mixin == 1
243*4757b351SPierre Pronchery	eor	w13,w13,w17
244*4757b351SPierre Pronchery.endif
245*4757b351SPierre Pronchery.inst	0x046c3549	//xar z9.s,z9.s,z10.s,20
246*4757b351SPierre Pronchery.if	mixin == 1
247*4757b351SPierre Pronchery	ror	w13,w13,20
248*4757b351SPierre Pronchery.endif
249*4757b351SPierre Pronchery.if	mixin == 1
250*4757b351SPierre Pronchery	eor	w14,w14,w18
251*4757b351SPierre Pronchery.endif
252*4757b351SPierre Pronchery.inst	0x046c35cd	//xar z13.s,z13.s,z14.s,20
253*4757b351SPierre Pronchery.if	mixin == 1
254*4757b351SPierre Pronchery	ror	w14,w14,20
255*4757b351SPierre Pronchery.endif
256*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
257*4757b351SPierre Pronchery.if	mixin == 1
258*4757b351SPierre Pronchery	add	w7,w7,w11
259*4757b351SPierre Pronchery.endif
260*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
261*4757b351SPierre Pronchery.if	mixin == 1
262*4757b351SPierre Pronchery	add	w8,w8,w12
263*4757b351SPierre Pronchery.endif
264*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
265*4757b351SPierre Pronchery.if	mixin == 1
266*4757b351SPierre Pronchery	add	w9,w9,w13
267*4757b351SPierre Pronchery.endif
268*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
269*4757b351SPierre Pronchery.if	mixin == 1
270*4757b351SPierre Pronchery	add	w10,w10,w14
271*4757b351SPierre Pronchery.endif
272*4757b351SPierre Pronchery.if	mixin == 1
273*4757b351SPierre Pronchery	eor	w19,w19,w7
274*4757b351SPierre Pronchery.endif
275*4757b351SPierre Pronchery.inst	0x04683403	//xar z3.s,z3.s,z0.s,24
276*4757b351SPierre Pronchery.if	mixin == 1
277*4757b351SPierre Pronchery	ror	w19,w19,24
278*4757b351SPierre Pronchery.endif
279*4757b351SPierre Pronchery.if	mixin == 1
280*4757b351SPierre Pronchery	eor	w20,w20,w8
281*4757b351SPierre Pronchery.endif
282*4757b351SPierre Pronchery.inst	0x04683487	//xar z7.s,z7.s,z4.s,24
283*4757b351SPierre Pronchery.if	mixin == 1
284*4757b351SPierre Pronchery	ror	w20,w20,24
285*4757b351SPierre Pronchery.endif
286*4757b351SPierre Pronchery.if	mixin == 1
287*4757b351SPierre Pronchery	eor	w21,w21,w9
288*4757b351SPierre Pronchery.endif
289*4757b351SPierre Pronchery.inst	0x0468350b	//xar z11.s,z11.s,z8.s,24
290*4757b351SPierre Pronchery.if	mixin == 1
291*4757b351SPierre Pronchery	ror	w21,w21,24
292*4757b351SPierre Pronchery.endif
293*4757b351SPierre Pronchery.if	mixin == 1
294*4757b351SPierre Pronchery	eor	w22,w22,w10
295*4757b351SPierre Pronchery.endif
296*4757b351SPierre Pronchery.inst	0x0468358f	//xar z15.s,z15.s,z12.s,24
297*4757b351SPierre Pronchery.if	mixin == 1
298*4757b351SPierre Pronchery	ror	w22,w22,24
299*4757b351SPierre Pronchery.endif
300*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
301*4757b351SPierre Pronchery.if	mixin == 1
302*4757b351SPierre Pronchery	add	w15,w15,w19
303*4757b351SPierre Pronchery.endif
304*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
305*4757b351SPierre Pronchery.if	mixin == 1
306*4757b351SPierre Pronchery	add	w16,w16,w20
307*4757b351SPierre Pronchery.endif
308*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
309*4757b351SPierre Pronchery.if	mixin == 1
310*4757b351SPierre Pronchery	add	w17,w17,w21
311*4757b351SPierre Pronchery.endif
312*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
313*4757b351SPierre Pronchery.if	mixin == 1
314*4757b351SPierre Pronchery	add	w18,w18,w22
315*4757b351SPierre Pronchery.endif
316*4757b351SPierre Pronchery.if	mixin == 1
317*4757b351SPierre Pronchery	eor	w11,w11,w15
318*4757b351SPierre Pronchery.endif
319*4757b351SPierre Pronchery.inst	0x04673441	//xar z1.s,z1.s,z2.s,25
320*4757b351SPierre Pronchery.if	mixin == 1
321*4757b351SPierre Pronchery	ror	w11,w11,25
322*4757b351SPierre Pronchery.endif
323*4757b351SPierre Pronchery.if	mixin == 1
324*4757b351SPierre Pronchery	eor	w12,w12,w16
325*4757b351SPierre Pronchery.endif
326*4757b351SPierre Pronchery.inst	0x046734c5	//xar z5.s,z5.s,z6.s,25
327*4757b351SPierre Pronchery.if	mixin == 1
328*4757b351SPierre Pronchery	ror	w12,w12,25
329*4757b351SPierre Pronchery.endif
330*4757b351SPierre Pronchery.if	mixin == 1
331*4757b351SPierre Pronchery	eor	w13,w13,w17
332*4757b351SPierre Pronchery.endif
333*4757b351SPierre Pronchery.inst	0x04673549	//xar z9.s,z9.s,z10.s,25
334*4757b351SPierre Pronchery.if	mixin == 1
335*4757b351SPierre Pronchery	ror	w13,w13,25
336*4757b351SPierre Pronchery.endif
337*4757b351SPierre Pronchery.if	mixin == 1
338*4757b351SPierre Pronchery	eor	w14,w14,w18
339*4757b351SPierre Pronchery.endif
340*4757b351SPierre Pronchery.inst	0x046735cd	//xar z13.s,z13.s,z14.s,25
341*4757b351SPierre Pronchery.if	mixin == 1
342*4757b351SPierre Pronchery	ror	w14,w14,25
343*4757b351SPierre Pronchery.endif
344*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
345*4757b351SPierre Pronchery.if	mixin == 1
346*4757b351SPierre Pronchery	add	w7,w7,w12
347*4757b351SPierre Pronchery.endif
348*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
349*4757b351SPierre Pronchery.if	mixin == 1
350*4757b351SPierre Pronchery	add	w8,w8,w13
351*4757b351SPierre Pronchery.endif
352*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
353*4757b351SPierre Pronchery.if	mixin == 1
354*4757b351SPierre Pronchery	add	w9,w9,w14
355*4757b351SPierre Pronchery.endif
356*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
357*4757b351SPierre Pronchery.if	mixin == 1
358*4757b351SPierre Pronchery	add	w10,w10,w11
359*4757b351SPierre Pronchery.endif
360*4757b351SPierre Pronchery.if	mixin == 1
361*4757b351SPierre Pronchery	eor	w22,w22,w7
362*4757b351SPierre Pronchery.endif
363*4757b351SPierre Pronchery.inst	0x0470340f	//xar z15.s,z15.s,z0.s,16
364*4757b351SPierre Pronchery.if	mixin == 1
365*4757b351SPierre Pronchery	ror	w22,w22,16
366*4757b351SPierre Pronchery.endif
367*4757b351SPierre Pronchery.if	mixin == 1
368*4757b351SPierre Pronchery	eor	w19,w19,w8
369*4757b351SPierre Pronchery.endif
370*4757b351SPierre Pronchery.inst	0x04703483	//xar z3.s,z3.s,z4.s,16
371*4757b351SPierre Pronchery.if	mixin == 1
372*4757b351SPierre Pronchery	ror	w19,w19,16
373*4757b351SPierre Pronchery.endif
374*4757b351SPierre Pronchery.if	mixin == 1
375*4757b351SPierre Pronchery	eor	w20,w20,w9
376*4757b351SPierre Pronchery.endif
377*4757b351SPierre Pronchery.inst	0x04703507	//xar z7.s,z7.s,z8.s,16
378*4757b351SPierre Pronchery.if	mixin == 1
379*4757b351SPierre Pronchery	ror	w20,w20,16
380*4757b351SPierre Pronchery.endif
381*4757b351SPierre Pronchery.if	mixin == 1
382*4757b351SPierre Pronchery	eor	w21,w21,w10
383*4757b351SPierre Pronchery.endif
384*4757b351SPierre Pronchery.inst	0x0470358b	//xar z11.s,z11.s,z12.s,16
385*4757b351SPierre Pronchery.if	mixin == 1
386*4757b351SPierre Pronchery	ror	w21,w21,16
387*4757b351SPierre Pronchery.endif
388*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
389*4757b351SPierre Pronchery.if	mixin == 1
390*4757b351SPierre Pronchery	add	w17,w17,w22
391*4757b351SPierre Pronchery.endif
392*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
393*4757b351SPierre Pronchery.if	mixin == 1
394*4757b351SPierre Pronchery	add	w18,w18,w19
395*4757b351SPierre Pronchery.endif
396*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
397*4757b351SPierre Pronchery.if	mixin == 1
398*4757b351SPierre Pronchery	add	w15,w15,w20
399*4757b351SPierre Pronchery.endif
400*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
401*4757b351SPierre Pronchery.if	mixin == 1
402*4757b351SPierre Pronchery	add	w16,w16,w21
403*4757b351SPierre Pronchery.endif
404*4757b351SPierre Pronchery.if	mixin == 1
405*4757b351SPierre Pronchery	eor	w12,w12,w17
406*4757b351SPierre Pronchery.endif
407*4757b351SPierre Pronchery.inst	0x046c3545	//xar z5.s,z5.s,z10.s,20
408*4757b351SPierre Pronchery.if	mixin == 1
409*4757b351SPierre Pronchery	ror	w12,w12,20
410*4757b351SPierre Pronchery.endif
411*4757b351SPierre Pronchery.if	mixin == 1
412*4757b351SPierre Pronchery	eor	w13,w13,w18
413*4757b351SPierre Pronchery.endif
414*4757b351SPierre Pronchery.inst	0x046c35c9	//xar z9.s,z9.s,z14.s,20
415*4757b351SPierre Pronchery.if	mixin == 1
416*4757b351SPierre Pronchery	ror	w13,w13,20
417*4757b351SPierre Pronchery.endif
418*4757b351SPierre Pronchery.if	mixin == 1
419*4757b351SPierre Pronchery	eor	w14,w14,w15
420*4757b351SPierre Pronchery.endif
421*4757b351SPierre Pronchery.inst	0x046c344d	//xar z13.s,z13.s,z2.s,20
422*4757b351SPierre Pronchery.if	mixin == 1
423*4757b351SPierre Pronchery	ror	w14,w14,20
424*4757b351SPierre Pronchery.endif
425*4757b351SPierre Pronchery.if	mixin == 1
426*4757b351SPierre Pronchery	eor	w11,w11,w16
427*4757b351SPierre Pronchery.endif
428*4757b351SPierre Pronchery.inst	0x046c34c1	//xar z1.s,z1.s,z6.s,20
429*4757b351SPierre Pronchery.if	mixin == 1
430*4757b351SPierre Pronchery	ror	w11,w11,20
431*4757b351SPierre Pronchery.endif
432*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
433*4757b351SPierre Pronchery.if	mixin == 1
434*4757b351SPierre Pronchery	add	w7,w7,w12
435*4757b351SPierre Pronchery.endif
436*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
437*4757b351SPierre Pronchery.if	mixin == 1
438*4757b351SPierre Pronchery	add	w8,w8,w13
439*4757b351SPierre Pronchery.endif
440*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
441*4757b351SPierre Pronchery.if	mixin == 1
442*4757b351SPierre Pronchery	add	w9,w9,w14
443*4757b351SPierre Pronchery.endif
444*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
445*4757b351SPierre Pronchery.if	mixin == 1
446*4757b351SPierre Pronchery	add	w10,w10,w11
447*4757b351SPierre Pronchery.endif
448*4757b351SPierre Pronchery.if	mixin == 1
449*4757b351SPierre Pronchery	eor	w22,w22,w7
450*4757b351SPierre Pronchery.endif
451*4757b351SPierre Pronchery.inst	0x0468340f	//xar z15.s,z15.s,z0.s,24
452*4757b351SPierre Pronchery.if	mixin == 1
453*4757b351SPierre Pronchery	ror	w22,w22,24
454*4757b351SPierre Pronchery.endif
455*4757b351SPierre Pronchery.if	mixin == 1
456*4757b351SPierre Pronchery	eor	w19,w19,w8
457*4757b351SPierre Pronchery.endif
458*4757b351SPierre Pronchery.inst	0x04683483	//xar z3.s,z3.s,z4.s,24
459*4757b351SPierre Pronchery.if	mixin == 1
460*4757b351SPierre Pronchery	ror	w19,w19,24
461*4757b351SPierre Pronchery.endif
462*4757b351SPierre Pronchery.if	mixin == 1
463*4757b351SPierre Pronchery	eor	w20,w20,w9
464*4757b351SPierre Pronchery.endif
465*4757b351SPierre Pronchery.inst	0x04683507	//xar z7.s,z7.s,z8.s,24
466*4757b351SPierre Pronchery.if	mixin == 1
467*4757b351SPierre Pronchery	ror	w20,w20,24
468*4757b351SPierre Pronchery.endif
469*4757b351SPierre Pronchery.if	mixin == 1
470*4757b351SPierre Pronchery	eor	w21,w21,w10
471*4757b351SPierre Pronchery.endif
472*4757b351SPierre Pronchery.inst	0x0468358b	//xar z11.s,z11.s,z12.s,24
473*4757b351SPierre Pronchery.if	mixin == 1
474*4757b351SPierre Pronchery	ror	w21,w21,24
475*4757b351SPierre Pronchery.endif
476*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
477*4757b351SPierre Pronchery.if	mixin == 1
478*4757b351SPierre Pronchery	add	w17,w17,w22
479*4757b351SPierre Pronchery.endif
480*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
481*4757b351SPierre Pronchery.if	mixin == 1
482*4757b351SPierre Pronchery	add	w18,w18,w19
483*4757b351SPierre Pronchery.endif
484*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
485*4757b351SPierre Pronchery.if	mixin == 1
486*4757b351SPierre Pronchery	add	w15,w15,w20
487*4757b351SPierre Pronchery.endif
488*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
489*4757b351SPierre Pronchery.if	mixin == 1
490*4757b351SPierre Pronchery	add	w16,w16,w21
491*4757b351SPierre Pronchery.endif
492*4757b351SPierre Pronchery.if	mixin == 1
493*4757b351SPierre Pronchery	eor	w12,w12,w17
494*4757b351SPierre Pronchery.endif
495*4757b351SPierre Pronchery.inst	0x04673545	//xar z5.s,z5.s,z10.s,25
496*4757b351SPierre Pronchery.if	mixin == 1
497*4757b351SPierre Pronchery	ror	w12,w12,25
498*4757b351SPierre Pronchery.endif
499*4757b351SPierre Pronchery.if	mixin == 1
500*4757b351SPierre Pronchery	eor	w13,w13,w18
501*4757b351SPierre Pronchery.endif
502*4757b351SPierre Pronchery.inst	0x046735c9	//xar z9.s,z9.s,z14.s,25
503*4757b351SPierre Pronchery.if	mixin == 1
504*4757b351SPierre Pronchery	ror	w13,w13,25
505*4757b351SPierre Pronchery.endif
506*4757b351SPierre Pronchery.if	mixin == 1
507*4757b351SPierre Pronchery	eor	w14,w14,w15
508*4757b351SPierre Pronchery.endif
509*4757b351SPierre Pronchery.inst	0x0467344d	//xar z13.s,z13.s,z2.s,25
510*4757b351SPierre Pronchery.if	mixin == 1
511*4757b351SPierre Pronchery	ror	w14,w14,25
512*4757b351SPierre Pronchery.endif
513*4757b351SPierre Pronchery.if	mixin == 1
514*4757b351SPierre Pronchery	eor	w11,w11,w16
515*4757b351SPierre Pronchery.endif
516*4757b351SPierre Pronchery.inst	0x046734c1	//xar z1.s,z1.s,z6.s,25
517*4757b351SPierre Pronchery.if	mixin == 1
518*4757b351SPierre Pronchery	ror	w11,w11,25
519*4757b351SPierre Pronchery.endif
520*4757b351SPierre Pronchery	sub	x6,x6,1
521*4757b351SPierre Pronchery	cbnz	x6,10b
522*4757b351SPierre Pronchery.if	mixin == 1
523*4757b351SPierre Pronchery	add	w7,w7,w23
524*4757b351SPierre Pronchery.endif
525*4757b351SPierre Pronchery.inst	0x04b90000	//add z0.s,z0.s,z25.s
526*4757b351SPierre Pronchery.if	mixin == 1
527*4757b351SPierre Pronchery	add	x8,x8,x23,lsr #32
528*4757b351SPierre Pronchery.endif
529*4757b351SPierre Pronchery.inst	0x04ba0084	//add z4.s,z4.s,z26.s
530*4757b351SPierre Pronchery.if	mixin == 1
531*4757b351SPierre Pronchery	add	x7,x7,x8,lsl #32  // pack
532*4757b351SPierre Pronchery.endif
533*4757b351SPierre Pronchery.if	mixin == 1
534*4757b351SPierre Pronchery	add	w9,w9,w24
535*4757b351SPierre Pronchery.endif
536*4757b351SPierre Pronchery.inst	0x04bb0108	//add z8.s,z8.s,z27.s
537*4757b351SPierre Pronchery.if	mixin == 1
538*4757b351SPierre Pronchery	add	x10,x10,x24,lsr #32
539*4757b351SPierre Pronchery.endif
540*4757b351SPierre Pronchery.inst	0x04bc018c	//add z12.s,z12.s,z28.s
541*4757b351SPierre Pronchery.if	mixin == 1
542*4757b351SPierre Pronchery	add	x9,x9,x10,lsl #32  // pack
543*4757b351SPierre Pronchery.endif
544*4757b351SPierre Pronchery.if	mixin == 1
545*4757b351SPierre Pronchery	ldp	x8,x10,[x1],#16
546*4757b351SPierre Pronchery.endif
547*4757b351SPierre Pronchery.if	mixin == 1
548*4757b351SPierre Pronchery	add	w11,w11,w25
549*4757b351SPierre Pronchery.endif
550*4757b351SPierre Pronchery.inst	0x04bd0021	//add z1.s,z1.s,z29.s
551*4757b351SPierre Pronchery.if	mixin == 1
552*4757b351SPierre Pronchery	add	x12,x12,x25,lsr #32
553*4757b351SPierre Pronchery.endif
554*4757b351SPierre Pronchery.inst	0x04be00a5	//add z5.s,z5.s,z30.s
555*4757b351SPierre Pronchery.if	mixin == 1
556*4757b351SPierre Pronchery	add	x11,x11,x12,lsl #32  // pack
557*4757b351SPierre Pronchery.endif
558*4757b351SPierre Pronchery.if	mixin == 1
559*4757b351SPierre Pronchery	add	w13,w13,w26
560*4757b351SPierre Pronchery.endif
561*4757b351SPierre Pronchery.inst	0x04b50129	//add z9.s,z9.s,z21.s
562*4757b351SPierre Pronchery.if	mixin == 1
563*4757b351SPierre Pronchery	add	x14,x14,x26,lsr #32
564*4757b351SPierre Pronchery.endif
565*4757b351SPierre Pronchery.inst	0x04b601ad	//add z13.s,z13.s,z22.s
566*4757b351SPierre Pronchery.if	mixin == 1
567*4757b351SPierre Pronchery	add	x13,x13,x14,lsl #32  // pack
568*4757b351SPierre Pronchery.endif
569*4757b351SPierre Pronchery.if	mixin == 1
570*4757b351SPierre Pronchery	ldp	x12,x14,[x1],#16
571*4757b351SPierre Pronchery.endif
572*4757b351SPierre Pronchery.if	mixin == 1
573*4757b351SPierre Pronchery	add	w15,w15,w27
574*4757b351SPierre Pronchery.endif
575*4757b351SPierre Pronchery.inst	0x04b70042	//add z2.s,z2.s,z23.s
576*4757b351SPierre Pronchery.if	mixin == 1
577*4757b351SPierre Pronchery	add	x16,x16,x27,lsr #32
578*4757b351SPierre Pronchery.endif
579*4757b351SPierre Pronchery.inst	0x04b800c6	//add z6.s,z6.s,z24.s
580*4757b351SPierre Pronchery.if	mixin == 1
581*4757b351SPierre Pronchery	add	x15,x15,x16,lsl #32  // pack
582*4757b351SPierre Pronchery.endif
583*4757b351SPierre Pronchery.if	mixin == 1
584*4757b351SPierre Pronchery	add	w17,w17,w28
585*4757b351SPierre Pronchery.endif
586*4757b351SPierre Pronchery.inst	0x04b1014a	//add z10.s,z10.s,z17.s
587*4757b351SPierre Pronchery.if	mixin == 1
588*4757b351SPierre Pronchery	add	x18,x18,x28,lsr #32
589*4757b351SPierre Pronchery.endif
590*4757b351SPierre Pronchery.inst	0x04b201ce	//add z14.s,z14.s,z18.s
591*4757b351SPierre Pronchery.if	mixin == 1
592*4757b351SPierre Pronchery	add	x17,x17,x18,lsl #32  // pack
593*4757b351SPierre Pronchery.endif
594*4757b351SPierre Pronchery.if	mixin == 1
595*4757b351SPierre Pronchery	ldp	x16,x18,[x1],#16
596*4757b351SPierre Pronchery.endif
597*4757b351SPierre Pronchery.if	mixin == 1
598*4757b351SPierre Pronchery	add	w19,w19,w29
599*4757b351SPierre Pronchery.endif
600*4757b351SPierre Pronchery.inst	0x04b00063	//add z3.s,z3.s,z16.s
601*4757b351SPierre Pronchery.if	mixin == 1
602*4757b351SPierre Pronchery	add	x20,x20,x29,lsr #32
603*4757b351SPierre Pronchery.endif
604*4757b351SPierre Pronchery.inst	0x04b300e7	//add z7.s,z7.s,z19.s
605*4757b351SPierre Pronchery.if	mixin == 1
606*4757b351SPierre Pronchery	add	x19,x19,x20,lsl #32  // pack
607*4757b351SPierre Pronchery.endif
608*4757b351SPierre Pronchery.if	mixin == 1
609*4757b351SPierre Pronchery	add	w21,w21,w30
610*4757b351SPierre Pronchery.endif
611*4757b351SPierre Pronchery.inst	0x04b4016b	//add z11.s,z11.s,z20.s
612*4757b351SPierre Pronchery.if	mixin == 1
613*4757b351SPierre Pronchery	add	x22,x22,x30,lsr #32
614*4757b351SPierre Pronchery.endif
615*4757b351SPierre Pronchery.inst	0x04bf01ef	//add z15.s,z15.s,z31.s
616*4757b351SPierre Pronchery.if	mixin == 1
617*4757b351SPierre Pronchery	add	x21,x21,x22,lsl #32  // pack
618*4757b351SPierre Pronchery.endif
619*4757b351SPierre Pronchery.if	mixin == 1
620*4757b351SPierre Pronchery	ldp	x20,x22,[x1],#16
621*4757b351SPierre Pronchery.endif
622*4757b351SPierre Pronchery#ifdef	__AARCH64EB__
623*4757b351SPierre Pronchery	rev	x7,x7
624*4757b351SPierre Pronchery.inst	0x05a48000	//revb z0.s,p0/m,z0.s
625*4757b351SPierre Pronchery.inst	0x05a48084	//revb z4.s,p0/m,z4.s
626*4757b351SPierre Pronchery	rev	x9,x9
627*4757b351SPierre Pronchery.inst	0x05a48108	//revb z8.s,p0/m,z8.s
628*4757b351SPierre Pronchery.inst	0x05a4818c	//revb z12.s,p0/m,z12.s
629*4757b351SPierre Pronchery	rev	x11,x11
630*4757b351SPierre Pronchery.inst	0x05a48021	//revb z1.s,p0/m,z1.s
631*4757b351SPierre Pronchery.inst	0x05a480a5	//revb z5.s,p0/m,z5.s
632*4757b351SPierre Pronchery	rev	x13,x13
633*4757b351SPierre Pronchery.inst	0x05a48129	//revb z9.s,p0/m,z9.s
634*4757b351SPierre Pronchery.inst	0x05a481ad	//revb z13.s,p0/m,z13.s
635*4757b351SPierre Pronchery	rev	x15,x15
636*4757b351SPierre Pronchery.inst	0x05a48042	//revb z2.s,p0/m,z2.s
637*4757b351SPierre Pronchery.inst	0x05a480c6	//revb z6.s,p0/m,z6.s
638*4757b351SPierre Pronchery	rev	x17,x17
639*4757b351SPierre Pronchery.inst	0x05a4814a	//revb z10.s,p0/m,z10.s
640*4757b351SPierre Pronchery.inst	0x05a481ce	//revb z14.s,p0/m,z14.s
641*4757b351SPierre Pronchery	rev	x19,x19
642*4757b351SPierre Pronchery.inst	0x05a48063	//revb z3.s,p0/m,z3.s
643*4757b351SPierre Pronchery.inst	0x05a480e7	//revb z7.s,p0/m,z7.s
644*4757b351SPierre Pronchery	rev	x21,x21
645*4757b351SPierre Pronchery.inst	0x05a4816b	//revb z11.s,p0/m,z11.s
646*4757b351SPierre Pronchery.inst	0x05a481ef	//revb z15.s,p0/m,z15.s
647*4757b351SPierre Pronchery#endif
648*4757b351SPierre Pronchery.if	mixin == 1
649*4757b351SPierre Pronchery	add	x29,x29,#1
650*4757b351SPierre Pronchery.endif
651*4757b351SPierre Pronchery	cmp	x5,4
652*4757b351SPierre Pronchery	b.ne	200f
653*4757b351SPierre Pronchery.if	mixin == 1
654*4757b351SPierre Pronchery	eor	x7,x7,x8
655*4757b351SPierre Pronchery.endif
656*4757b351SPierre Pronchery.if	mixin == 1
657*4757b351SPierre Pronchery	eor	x9,x9,x10
658*4757b351SPierre Pronchery.endif
659*4757b351SPierre Pronchery.if	mixin == 1
660*4757b351SPierre Pronchery	eor	x11,x11,x12
661*4757b351SPierre Pronchery.endif
662*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
663*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
664*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
665*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
666*4757b351SPierre Pronchery
667*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
668*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
669*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
670*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
671*4757b351SPierre Pronchery
672*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
673*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
674*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
675*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
676*4757b351SPierre Pronchery
677*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
678*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
679*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
680*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
681*4757b351SPierre Pronchery.if	mixin == 1
682*4757b351SPierre Pronchery	eor	x13,x13,x14
683*4757b351SPierre Pronchery.endif
684*4757b351SPierre Pronchery.if	mixin == 1
685*4757b351SPierre Pronchery	eor	x15,x15,x16
686*4757b351SPierre Pronchery.endif
687*4757b351SPierre Pronchery.if	mixin == 1
688*4757b351SPierre Pronchery	eor	x17,x17,x18
689*4757b351SPierre Pronchery.endif
690*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
691*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
692*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
693*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
694*4757b351SPierre Pronchery
695*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
696*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
697*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
698*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
699*4757b351SPierre Pronchery
700*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
701*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
702*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
703*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
704*4757b351SPierre Pronchery
705*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
706*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
707*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
708*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
709*4757b351SPierre Pronchery.if	mixin == 1
710*4757b351SPierre Pronchery	eor	x19,x19,x20
711*4757b351SPierre Pronchery.endif
712*4757b351SPierre Pronchery.if	mixin == 1
713*4757b351SPierre Pronchery	eor	x21,x21,x22
714*4757b351SPierre Pronchery.endif
715*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
716*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
717*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
718*4757b351SPierre Pronchery.inst	0x04b23021	//eor z1.d,z1.d,z18.d
719*4757b351SPierre Pronchery.inst	0x04b33042	//eor z2.d,z2.d,z19.d
720*4757b351SPierre Pronchery.inst	0x04b43063	//eor z3.d,z3.d,z20.d
721*4757b351SPierre Pronchery.inst	0x04b53084	//eor z4.d,z4.d,z21.d
722*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
723*4757b351SPierre Pronchery.inst	0x04b730c6	//eor z6.d,z6.d,z23.d
724*4757b351SPierre Pronchery.inst	0x04b830e7	//eor z7.d,z7.d,z24.d
725*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
726*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
727*4757b351SPierre Pronchery.if	mixin == 1
728*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
729*4757b351SPierre Pronchery.endif
730*4757b351SPierre Pronchery.inst	0x04b13108	//eor z8.d,z8.d,z17.d
731*4757b351SPierre Pronchery.inst	0x04b23129	//eor z9.d,z9.d,z18.d
732*4757b351SPierre Pronchery.if	mixin == 1
733*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
734*4757b351SPierre Pronchery.endif
735*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
736*4757b351SPierre Pronchery.inst	0x04b4316b	//eor z11.d,z11.d,z20.d
737*4757b351SPierre Pronchery.if	mixin == 1
738*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
739*4757b351SPierre Pronchery.endif
740*4757b351SPierre Pronchery.inst	0x04b5318c	//eor z12.d,z12.d,z21.d
741*4757b351SPierre Pronchery.inst	0x04b631ad	//eor z13.d,z13.d,z22.d
742*4757b351SPierre Pronchery.if	mixin == 1
743*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
744*4757b351SPierre Pronchery.endif
745*4757b351SPierre Pronchery.inst	0x04b731ce	//eor z14.d,z14.d,z23.d
746*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
747*4757b351SPierre Pronchery	st1	{v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64
748*4757b351SPierre Pronchery	st1	{v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64
749*4757b351SPierre Pronchery	st1	{v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64
750*4757b351SPierre Pronchery	st1	{v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64
751*4757b351SPierre Pronchery	b	210f
752*4757b351SPierre Pronchery200:
753*4757b351SPierre Pronchery.inst	0x05a16011	//zip1 z17.s,z0.s,z1.s
754*4757b351SPierre Pronchery.inst	0x05a16412	//zip2 z18.s,z0.s,z1.s
755*4757b351SPierre Pronchery.inst	0x05a36053	//zip1 z19.s,z2.s,z3.s
756*4757b351SPierre Pronchery.inst	0x05a36454	//zip2 z20.s,z2.s,z3.s
757*4757b351SPierre Pronchery
758*4757b351SPierre Pronchery.inst	0x05a56095	//zip1 z21.s,z4.s,z5.s
759*4757b351SPierre Pronchery.inst	0x05a56496	//zip2 z22.s,z4.s,z5.s
760*4757b351SPierre Pronchery.inst	0x05a760d7	//zip1 z23.s,z6.s,z7.s
761*4757b351SPierre Pronchery.inst	0x05a764d8	//zip2 z24.s,z6.s,z7.s
762*4757b351SPierre Pronchery
763*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
764*4757b351SPierre Pronchery.inst	0x05f36621	//zip2 z1.d,z17.d,z19.d
765*4757b351SPierre Pronchery.inst	0x05f46242	//zip1 z2.d,z18.d,z20.d
766*4757b351SPierre Pronchery.inst	0x05f46643	//zip2 z3.d,z18.d,z20.d
767*4757b351SPierre Pronchery
768*4757b351SPierre Pronchery.inst	0x05f762a4	//zip1 z4.d,z21.d,z23.d
769*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
770*4757b351SPierre Pronchery.inst	0x05f862c6	//zip1 z6.d,z22.d,z24.d
771*4757b351SPierre Pronchery.inst	0x05f866c7	//zip2 z7.d,z22.d,z24.d
772*4757b351SPierre Pronchery.if	mixin == 1
773*4757b351SPierre Pronchery	eor	x7,x7,x8
774*4757b351SPierre Pronchery.endif
775*4757b351SPierre Pronchery.if	mixin == 1
776*4757b351SPierre Pronchery	eor	x9,x9,x10
777*4757b351SPierre Pronchery.endif
778*4757b351SPierre Pronchery.inst	0x05a96111	//zip1 z17.s,z8.s,z9.s
779*4757b351SPierre Pronchery.inst	0x05a96512	//zip2 z18.s,z8.s,z9.s
780*4757b351SPierre Pronchery.inst	0x05ab6153	//zip1 z19.s,z10.s,z11.s
781*4757b351SPierre Pronchery.inst	0x05ab6554	//zip2 z20.s,z10.s,z11.s
782*4757b351SPierre Pronchery
783*4757b351SPierre Pronchery.inst	0x05ad6195	//zip1 z21.s,z12.s,z13.s
784*4757b351SPierre Pronchery.inst	0x05ad6596	//zip2 z22.s,z12.s,z13.s
785*4757b351SPierre Pronchery.inst	0x05af61d7	//zip1 z23.s,z14.s,z15.s
786*4757b351SPierre Pronchery.inst	0x05af65d8	//zip2 z24.s,z14.s,z15.s
787*4757b351SPierre Pronchery
788*4757b351SPierre Pronchery.inst	0x05f36228	//zip1 z8.d,z17.d,z19.d
789*4757b351SPierre Pronchery.inst	0x05f36629	//zip2 z9.d,z17.d,z19.d
790*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
791*4757b351SPierre Pronchery.inst	0x05f4664b	//zip2 z11.d,z18.d,z20.d
792*4757b351SPierre Pronchery
793*4757b351SPierre Pronchery.inst	0x05f762ac	//zip1 z12.d,z21.d,z23.d
794*4757b351SPierre Pronchery.inst	0x05f766ad	//zip2 z13.d,z21.d,z23.d
795*4757b351SPierre Pronchery.inst	0x05f862ce	//zip1 z14.d,z22.d,z24.d
796*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
797*4757b351SPierre Pronchery.if	mixin == 1
798*4757b351SPierre Pronchery	eor	x11,x11,x12
799*4757b351SPierre Pronchery.endif
800*4757b351SPierre Pronchery.if	mixin == 1
801*4757b351SPierre Pronchery	eor	x13,x13,x14
802*4757b351SPierre Pronchery.endif
803*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
804*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
805*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
806*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
807*4757b351SPierre Pronchery
808*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
809*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
810*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
811*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
812*4757b351SPierre Pronchery
813*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
814*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
815*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
816*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
817*4757b351SPierre Pronchery
818*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
819*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
820*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
821*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
822*4757b351SPierre Pronchery.if	mixin == 1
823*4757b351SPierre Pronchery	eor	x15,x15,x16
824*4757b351SPierre Pronchery.endif
825*4757b351SPierre Pronchery.if	mixin == 1
826*4757b351SPierre Pronchery	eor	x17,x17,x18
827*4757b351SPierre Pronchery.endif
828*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
829*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
830*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
831*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
832*4757b351SPierre Pronchery
833*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
834*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
835*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
836*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
837*4757b351SPierre Pronchery
838*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
839*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
840*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
841*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
842*4757b351SPierre Pronchery
843*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
844*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
845*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
846*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
847*4757b351SPierre Pronchery.if	mixin == 1
848*4757b351SPierre Pronchery	eor	x19,x19,x20
849*4757b351SPierre Pronchery.endif
850*4757b351SPierre Pronchery.if	mixin == 1
851*4757b351SPierre Pronchery	eor	x21,x21,x22
852*4757b351SPierre Pronchery.endif
853*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
854*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
855*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
856*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
857*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
858*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
859*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
860*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
861*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
862*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
863*4757b351SPierre Pronchery.inst	0x04b23084	//eor z4.d,z4.d,z18.d
864*4757b351SPierre Pronchery.inst	0x04b33108	//eor z8.d,z8.d,z19.d
865*4757b351SPierre Pronchery.inst	0x04b4318c	//eor z12.d,z12.d,z20.d
866*4757b351SPierre Pronchery.inst	0x04b53021	//eor z1.d,z1.d,z21.d
867*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
868*4757b351SPierre Pronchery.inst	0x04b73129	//eor z9.d,z9.d,z23.d
869*4757b351SPierre Pronchery.inst	0x04b831ad	//eor z13.d,z13.d,z24.d
870*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
871*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
872*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
873*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
874*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
875*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
876*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
877*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
878*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
879*4757b351SPierre Pronchery.if	mixin == 1
880*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
881*4757b351SPierre Pronchery.endif
882*4757b351SPierre Pronchery.inst	0x04b13042	//eor z2.d,z2.d,z17.d
883*4757b351SPierre Pronchery.inst	0x04b230c6	//eor z6.d,z6.d,z18.d
884*4757b351SPierre Pronchery.if	mixin == 1
885*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
886*4757b351SPierre Pronchery.endif
887*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
888*4757b351SPierre Pronchery.inst	0x04b431ce	//eor z14.d,z14.d,z20.d
889*4757b351SPierre Pronchery.if	mixin == 1
890*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
891*4757b351SPierre Pronchery.endif
892*4757b351SPierre Pronchery.inst	0x04b53063	//eor z3.d,z3.d,z21.d
893*4757b351SPierre Pronchery.inst	0x04b630e7	//eor z7.d,z7.d,z22.d
894*4757b351SPierre Pronchery.if	mixin == 1
895*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
896*4757b351SPierre Pronchery.endif
897*4757b351SPierre Pronchery.inst	0x04b7316b	//eor z11.d,z11.d,z23.d
898*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
899*4757b351SPierre Pronchery.inst	0xe540e000	//st1w {z0.s},p0,[x0,#0,MUL VL]
900*4757b351SPierre Pronchery.inst	0xe541e004	//st1w {z4.s},p0,[x0,#1,MUL VL]
901*4757b351SPierre Pronchery.inst	0xe542e008	//st1w {z8.s},p0,[x0,#2,MUL VL]
902*4757b351SPierre Pronchery.inst	0xe543e00c	//st1w {z12.s},p0,[x0,#3,MUL VL]
903*4757b351SPierre Pronchery.inst	0xe544e001	//st1w {z1.s},p0,[x0,#4,MUL VL]
904*4757b351SPierre Pronchery.inst	0xe545e005	//st1w {z5.s},p0,[x0,#5,MUL VL]
905*4757b351SPierre Pronchery.inst	0xe546e009	//st1w {z9.s},p0,[x0,#6,MUL VL]
906*4757b351SPierre Pronchery.inst	0xe547e00d	//st1w {z13.s},p0,[x0,#7,MUL VL]
907*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
908*4757b351SPierre Pronchery.inst	0xe540e002	//st1w {z2.s},p0,[x0,#0,MUL VL]
909*4757b351SPierre Pronchery.inst	0xe541e006	//st1w {z6.s},p0,[x0,#1,MUL VL]
910*4757b351SPierre Pronchery.inst	0xe542e00a	//st1w {z10.s},p0,[x0,#2,MUL VL]
911*4757b351SPierre Pronchery.inst	0xe543e00e	//st1w {z14.s},p0,[x0,#3,MUL VL]
912*4757b351SPierre Pronchery.inst	0xe544e003	//st1w {z3.s},p0,[x0,#4,MUL VL]
913*4757b351SPierre Pronchery.inst	0xe545e007	//st1w {z7.s},p0,[x0,#5,MUL VL]
914*4757b351SPierre Pronchery.inst	0xe546e00b	//st1w {z11.s},p0,[x0,#6,MUL VL]
915*4757b351SPierre Pronchery.inst	0xe547e00f	//st1w {z15.s},p0,[x0,#7,MUL VL]
916*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
917*4757b351SPierre Pronchery210:
918*4757b351SPierre Pronchery.inst	0x04b0e3fd	//incw x29, ALL, MUL #1
919*4757b351SPierre Pronchery	subs	x2,x2,64
920*4757b351SPierre Pronchery	b.gt	100b
921*4757b351SPierre Pronchery	b	110f
922*4757b351SPierre Pronchery101:
923*4757b351SPierre Pronchery	mixin=0
924*4757b351SPierre Pronchery	lsr	x8,x23,#32
925*4757b351SPierre Pronchery.inst	0x05a03ae0	//dup z0.s,w23
926*4757b351SPierre Pronchery.inst	0x05a03af9	//dup z25.s,w23
927*4757b351SPierre Pronchery.if	mixin == 1
928*4757b351SPierre Pronchery	mov	w7,w23
929*4757b351SPierre Pronchery.endif
930*4757b351SPierre Pronchery.inst	0x05a03904	//dup z4.s,w8
931*4757b351SPierre Pronchery.inst	0x05a0391a	//dup z26.s,w8
932*4757b351SPierre Pronchery	lsr	x10,x24,#32
933*4757b351SPierre Pronchery.inst	0x05a03b08	//dup z8.s,w24
934*4757b351SPierre Pronchery.inst	0x05a03b1b	//dup z27.s,w24
935*4757b351SPierre Pronchery.if	mixin == 1
936*4757b351SPierre Pronchery	mov	w9,w24
937*4757b351SPierre Pronchery.endif
938*4757b351SPierre Pronchery.inst	0x05a0394c	//dup z12.s,w10
939*4757b351SPierre Pronchery.inst	0x05a0395c	//dup z28.s,w10
940*4757b351SPierre Pronchery	lsr	x12,x25,#32
941*4757b351SPierre Pronchery.inst	0x05a03b21	//dup z1.s,w25
942*4757b351SPierre Pronchery.inst	0x05a03b3d	//dup z29.s,w25
943*4757b351SPierre Pronchery.if	mixin == 1
944*4757b351SPierre Pronchery	mov	w11,w25
945*4757b351SPierre Pronchery.endif
946*4757b351SPierre Pronchery.inst	0x05a03985	//dup z5.s,w12
947*4757b351SPierre Pronchery.inst	0x05a0399e	//dup z30.s,w12
948*4757b351SPierre Pronchery	lsr	x14,x26,#32
949*4757b351SPierre Pronchery.inst	0x05a03b49	//dup z9.s,w26
950*4757b351SPierre Pronchery.inst	0x05a03b55	//dup z21.s,w26
951*4757b351SPierre Pronchery.if	mixin == 1
952*4757b351SPierre Pronchery	mov	w13,w26
953*4757b351SPierre Pronchery.endif
954*4757b351SPierre Pronchery.inst	0x05a039cd	//dup z13.s,w14
955*4757b351SPierre Pronchery.inst	0x05a039d6	//dup z22.s,w14
956*4757b351SPierre Pronchery	lsr	x16,x27,#32
957*4757b351SPierre Pronchery.inst	0x05a03b62	//dup z2.s,w27
958*4757b351SPierre Pronchery.inst	0x05a03b77	//dup z23.s,w27
959*4757b351SPierre Pronchery.if	mixin == 1
960*4757b351SPierre Pronchery	mov	w15,w27
961*4757b351SPierre Pronchery.endif
962*4757b351SPierre Pronchery.inst	0x05a03a06	//dup z6.s,w16
963*4757b351SPierre Pronchery.inst	0x05a03a18	//dup z24.s,w16
964*4757b351SPierre Pronchery	lsr	x18,x28,#32
965*4757b351SPierre Pronchery.inst	0x05a03b8a	//dup z10.s,w28
966*4757b351SPierre Pronchery.inst	0x05a03b91	//dup z17.s,w28
967*4757b351SPierre Pronchery.if	mixin == 1
968*4757b351SPierre Pronchery	mov	w17,w28
969*4757b351SPierre Pronchery.endif
970*4757b351SPierre Pronchery.inst	0x05a03a4e	//dup z14.s,w18
971*4757b351SPierre Pronchery.inst	0x05a03a52	//dup z18.s,w18
972*4757b351SPierre Pronchery	lsr	x22,x30,#32
973*4757b351SPierre Pronchery.inst	0x05a03bcb	//dup z11.s,w30
974*4757b351SPierre Pronchery.inst	0x05a03bd4	//dup z20.s,w30
975*4757b351SPierre Pronchery.if	mixin == 1
976*4757b351SPierre Pronchery	mov	w21,w30
977*4757b351SPierre Pronchery.endif
978*4757b351SPierre Pronchery.inst	0x05a03acf	//dup z15.s,w22
979*4757b351SPierre Pronchery.inst	0x05a03adf	//dup z31.s,w22
980*4757b351SPierre Pronchery.if	mixin == 1
981*4757b351SPierre Pronchery	add	w20,w29,#1
982*4757b351SPierre Pronchery	mov	w19,w29
983*4757b351SPierre Pronchery.inst	0x04a14690	//index z16.s,w20,1
984*4757b351SPierre Pronchery.inst	0x04a14683	//index z3.s,w20,1
985*4757b351SPierre Pronchery.else
986*4757b351SPierre Pronchery.inst	0x04a147b0	//index z16.s,w29,1
987*4757b351SPierre Pronchery.inst	0x04a147a3	//index z3.s,w29,1
988*4757b351SPierre Pronchery.endif
989*4757b351SPierre Pronchery	lsr	x20,x29,#32
990*4757b351SPierre Pronchery.inst	0x05a03a87	//dup z7.s,w20
991*4757b351SPierre Pronchery.inst	0x05a03a93	//dup z19.s,w20
992*4757b351SPierre Pronchery	mov	x6,#10
993*4757b351SPierre Pronchery10:
994*4757b351SPierre Pronchery.align	5
995*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
996*4757b351SPierre Pronchery.if	mixin == 1
997*4757b351SPierre Pronchery	add	w7,w7,w11
998*4757b351SPierre Pronchery.endif
999*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
1000*4757b351SPierre Pronchery.if	mixin == 1
1001*4757b351SPierre Pronchery	add	w8,w8,w12
1002*4757b351SPierre Pronchery.endif
1003*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
1004*4757b351SPierre Pronchery.if	mixin == 1
1005*4757b351SPierre Pronchery	add	w9,w9,w13
1006*4757b351SPierre Pronchery.endif
1007*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
1008*4757b351SPierre Pronchery.if	mixin == 1
1009*4757b351SPierre Pronchery	add	w10,w10,w14
1010*4757b351SPierre Pronchery.endif
1011*4757b351SPierre Pronchery.if	mixin == 1
1012*4757b351SPierre Pronchery	eor	w19,w19,w7
1013*4757b351SPierre Pronchery.endif
1014*4757b351SPierre Pronchery.inst	0x04703403	//xar z3.s,z3.s,z0.s,16
1015*4757b351SPierre Pronchery.if	mixin == 1
1016*4757b351SPierre Pronchery	ror	w19,w19,16
1017*4757b351SPierre Pronchery.endif
1018*4757b351SPierre Pronchery.if	mixin == 1
1019*4757b351SPierre Pronchery	eor	w20,w20,w8
1020*4757b351SPierre Pronchery.endif
1021*4757b351SPierre Pronchery.inst	0x04703487	//xar z7.s,z7.s,z4.s,16
1022*4757b351SPierre Pronchery.if	mixin == 1
1023*4757b351SPierre Pronchery	ror	w20,w20,16
1024*4757b351SPierre Pronchery.endif
1025*4757b351SPierre Pronchery.if	mixin == 1
1026*4757b351SPierre Pronchery	eor	w21,w21,w9
1027*4757b351SPierre Pronchery.endif
1028*4757b351SPierre Pronchery.inst	0x0470350b	//xar z11.s,z11.s,z8.s,16
1029*4757b351SPierre Pronchery.if	mixin == 1
1030*4757b351SPierre Pronchery	ror	w21,w21,16
1031*4757b351SPierre Pronchery.endif
1032*4757b351SPierre Pronchery.if	mixin == 1
1033*4757b351SPierre Pronchery	eor	w22,w22,w10
1034*4757b351SPierre Pronchery.endif
1035*4757b351SPierre Pronchery.inst	0x0470358f	//xar z15.s,z15.s,z12.s,16
1036*4757b351SPierre Pronchery.if	mixin == 1
1037*4757b351SPierre Pronchery	ror	w22,w22,16
1038*4757b351SPierre Pronchery.endif
1039*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
1040*4757b351SPierre Pronchery.if	mixin == 1
1041*4757b351SPierre Pronchery	add	w15,w15,w19
1042*4757b351SPierre Pronchery.endif
1043*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
1044*4757b351SPierre Pronchery.if	mixin == 1
1045*4757b351SPierre Pronchery	add	w16,w16,w20
1046*4757b351SPierre Pronchery.endif
1047*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
1048*4757b351SPierre Pronchery.if	mixin == 1
1049*4757b351SPierre Pronchery	add	w17,w17,w21
1050*4757b351SPierre Pronchery.endif
1051*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
1052*4757b351SPierre Pronchery.if	mixin == 1
1053*4757b351SPierre Pronchery	add	w18,w18,w22
1054*4757b351SPierre Pronchery.endif
1055*4757b351SPierre Pronchery.if	mixin == 1
1056*4757b351SPierre Pronchery	eor	w11,w11,w15
1057*4757b351SPierre Pronchery.endif
1058*4757b351SPierre Pronchery.inst	0x046c3441	//xar z1.s,z1.s,z2.s,20
1059*4757b351SPierre Pronchery.if	mixin == 1
1060*4757b351SPierre Pronchery	ror	w11,w11,20
1061*4757b351SPierre Pronchery.endif
1062*4757b351SPierre Pronchery.if	mixin == 1
1063*4757b351SPierre Pronchery	eor	w12,w12,w16
1064*4757b351SPierre Pronchery.endif
1065*4757b351SPierre Pronchery.inst	0x046c34c5	//xar z5.s,z5.s,z6.s,20
1066*4757b351SPierre Pronchery.if	mixin == 1
1067*4757b351SPierre Pronchery	ror	w12,w12,20
1068*4757b351SPierre Pronchery.endif
1069*4757b351SPierre Pronchery.if	mixin == 1
1070*4757b351SPierre Pronchery	eor	w13,w13,w17
1071*4757b351SPierre Pronchery.endif
1072*4757b351SPierre Pronchery.inst	0x046c3549	//xar z9.s,z9.s,z10.s,20
1073*4757b351SPierre Pronchery.if	mixin == 1
1074*4757b351SPierre Pronchery	ror	w13,w13,20
1075*4757b351SPierre Pronchery.endif
1076*4757b351SPierre Pronchery.if	mixin == 1
1077*4757b351SPierre Pronchery	eor	w14,w14,w18
1078*4757b351SPierre Pronchery.endif
1079*4757b351SPierre Pronchery.inst	0x046c35cd	//xar z13.s,z13.s,z14.s,20
1080*4757b351SPierre Pronchery.if	mixin == 1
1081*4757b351SPierre Pronchery	ror	w14,w14,20
1082*4757b351SPierre Pronchery.endif
1083*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
1084*4757b351SPierre Pronchery.if	mixin == 1
1085*4757b351SPierre Pronchery	add	w7,w7,w11
1086*4757b351SPierre Pronchery.endif
1087*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
1088*4757b351SPierre Pronchery.if	mixin == 1
1089*4757b351SPierre Pronchery	add	w8,w8,w12
1090*4757b351SPierre Pronchery.endif
1091*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
1092*4757b351SPierre Pronchery.if	mixin == 1
1093*4757b351SPierre Pronchery	add	w9,w9,w13
1094*4757b351SPierre Pronchery.endif
1095*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
1096*4757b351SPierre Pronchery.if	mixin == 1
1097*4757b351SPierre Pronchery	add	w10,w10,w14
1098*4757b351SPierre Pronchery.endif
1099*4757b351SPierre Pronchery.if	mixin == 1
1100*4757b351SPierre Pronchery	eor	w19,w19,w7
1101*4757b351SPierre Pronchery.endif
1102*4757b351SPierre Pronchery.inst	0x04683403	//xar z3.s,z3.s,z0.s,24
1103*4757b351SPierre Pronchery.if	mixin == 1
1104*4757b351SPierre Pronchery	ror	w19,w19,24
1105*4757b351SPierre Pronchery.endif
1106*4757b351SPierre Pronchery.if	mixin == 1
1107*4757b351SPierre Pronchery	eor	w20,w20,w8
1108*4757b351SPierre Pronchery.endif
1109*4757b351SPierre Pronchery.inst	0x04683487	//xar z7.s,z7.s,z4.s,24
1110*4757b351SPierre Pronchery.if	mixin == 1
1111*4757b351SPierre Pronchery	ror	w20,w20,24
1112*4757b351SPierre Pronchery.endif
1113*4757b351SPierre Pronchery.if	mixin == 1
1114*4757b351SPierre Pronchery	eor	w21,w21,w9
1115*4757b351SPierre Pronchery.endif
1116*4757b351SPierre Pronchery.inst	0x0468350b	//xar z11.s,z11.s,z8.s,24
1117*4757b351SPierre Pronchery.if	mixin == 1
1118*4757b351SPierre Pronchery	ror	w21,w21,24
1119*4757b351SPierre Pronchery.endif
1120*4757b351SPierre Pronchery.if	mixin == 1
1121*4757b351SPierre Pronchery	eor	w22,w22,w10
1122*4757b351SPierre Pronchery.endif
1123*4757b351SPierre Pronchery.inst	0x0468358f	//xar z15.s,z15.s,z12.s,24
1124*4757b351SPierre Pronchery.if	mixin == 1
1125*4757b351SPierre Pronchery	ror	w22,w22,24
1126*4757b351SPierre Pronchery.endif
1127*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
1128*4757b351SPierre Pronchery.if	mixin == 1
1129*4757b351SPierre Pronchery	add	w15,w15,w19
1130*4757b351SPierre Pronchery.endif
1131*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
1132*4757b351SPierre Pronchery.if	mixin == 1
1133*4757b351SPierre Pronchery	add	w16,w16,w20
1134*4757b351SPierre Pronchery.endif
1135*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
1136*4757b351SPierre Pronchery.if	mixin == 1
1137*4757b351SPierre Pronchery	add	w17,w17,w21
1138*4757b351SPierre Pronchery.endif
1139*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
1140*4757b351SPierre Pronchery.if	mixin == 1
1141*4757b351SPierre Pronchery	add	w18,w18,w22
1142*4757b351SPierre Pronchery.endif
1143*4757b351SPierre Pronchery.if	mixin == 1
1144*4757b351SPierre Pronchery	eor	w11,w11,w15
1145*4757b351SPierre Pronchery.endif
1146*4757b351SPierre Pronchery.inst	0x04673441	//xar z1.s,z1.s,z2.s,25
1147*4757b351SPierre Pronchery.if	mixin == 1
1148*4757b351SPierre Pronchery	ror	w11,w11,25
1149*4757b351SPierre Pronchery.endif
1150*4757b351SPierre Pronchery.if	mixin == 1
1151*4757b351SPierre Pronchery	eor	w12,w12,w16
1152*4757b351SPierre Pronchery.endif
1153*4757b351SPierre Pronchery.inst	0x046734c5	//xar z5.s,z5.s,z6.s,25
1154*4757b351SPierre Pronchery.if	mixin == 1
1155*4757b351SPierre Pronchery	ror	w12,w12,25
1156*4757b351SPierre Pronchery.endif
1157*4757b351SPierre Pronchery.if	mixin == 1
1158*4757b351SPierre Pronchery	eor	w13,w13,w17
1159*4757b351SPierre Pronchery.endif
1160*4757b351SPierre Pronchery.inst	0x04673549	//xar z9.s,z9.s,z10.s,25
1161*4757b351SPierre Pronchery.if	mixin == 1
1162*4757b351SPierre Pronchery	ror	w13,w13,25
1163*4757b351SPierre Pronchery.endif
1164*4757b351SPierre Pronchery.if	mixin == 1
1165*4757b351SPierre Pronchery	eor	w14,w14,w18
1166*4757b351SPierre Pronchery.endif
1167*4757b351SPierre Pronchery.inst	0x046735cd	//xar z13.s,z13.s,z14.s,25
1168*4757b351SPierre Pronchery.if	mixin == 1
1169*4757b351SPierre Pronchery	ror	w14,w14,25
1170*4757b351SPierre Pronchery.endif
1171*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
1172*4757b351SPierre Pronchery.if	mixin == 1
1173*4757b351SPierre Pronchery	add	w7,w7,w12
1174*4757b351SPierre Pronchery.endif
1175*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
1176*4757b351SPierre Pronchery.if	mixin == 1
1177*4757b351SPierre Pronchery	add	w8,w8,w13
1178*4757b351SPierre Pronchery.endif
1179*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
1180*4757b351SPierre Pronchery.if	mixin == 1
1181*4757b351SPierre Pronchery	add	w9,w9,w14
1182*4757b351SPierre Pronchery.endif
1183*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
1184*4757b351SPierre Pronchery.if	mixin == 1
1185*4757b351SPierre Pronchery	add	w10,w10,w11
1186*4757b351SPierre Pronchery.endif
1187*4757b351SPierre Pronchery.if	mixin == 1
1188*4757b351SPierre Pronchery	eor	w22,w22,w7
1189*4757b351SPierre Pronchery.endif
1190*4757b351SPierre Pronchery.inst	0x0470340f	//xar z15.s,z15.s,z0.s,16
1191*4757b351SPierre Pronchery.if	mixin == 1
1192*4757b351SPierre Pronchery	ror	w22,w22,16
1193*4757b351SPierre Pronchery.endif
1194*4757b351SPierre Pronchery.if	mixin == 1
1195*4757b351SPierre Pronchery	eor	w19,w19,w8
1196*4757b351SPierre Pronchery.endif
1197*4757b351SPierre Pronchery.inst	0x04703483	//xar z3.s,z3.s,z4.s,16
1198*4757b351SPierre Pronchery.if	mixin == 1
1199*4757b351SPierre Pronchery	ror	w19,w19,16
1200*4757b351SPierre Pronchery.endif
1201*4757b351SPierre Pronchery.if	mixin == 1
1202*4757b351SPierre Pronchery	eor	w20,w20,w9
1203*4757b351SPierre Pronchery.endif
1204*4757b351SPierre Pronchery.inst	0x04703507	//xar z7.s,z7.s,z8.s,16
1205*4757b351SPierre Pronchery.if	mixin == 1
1206*4757b351SPierre Pronchery	ror	w20,w20,16
1207*4757b351SPierre Pronchery.endif
1208*4757b351SPierre Pronchery.if	mixin == 1
1209*4757b351SPierre Pronchery	eor	w21,w21,w10
1210*4757b351SPierre Pronchery.endif
1211*4757b351SPierre Pronchery.inst	0x0470358b	//xar z11.s,z11.s,z12.s,16
1212*4757b351SPierre Pronchery.if	mixin == 1
1213*4757b351SPierre Pronchery	ror	w21,w21,16
1214*4757b351SPierre Pronchery.endif
1215*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
1216*4757b351SPierre Pronchery.if	mixin == 1
1217*4757b351SPierre Pronchery	add	w17,w17,w22
1218*4757b351SPierre Pronchery.endif
1219*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
1220*4757b351SPierre Pronchery.if	mixin == 1
1221*4757b351SPierre Pronchery	add	w18,w18,w19
1222*4757b351SPierre Pronchery.endif
1223*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
1224*4757b351SPierre Pronchery.if	mixin == 1
1225*4757b351SPierre Pronchery	add	w15,w15,w20
1226*4757b351SPierre Pronchery.endif
1227*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
1228*4757b351SPierre Pronchery.if	mixin == 1
1229*4757b351SPierre Pronchery	add	w16,w16,w21
1230*4757b351SPierre Pronchery.endif
1231*4757b351SPierre Pronchery.if	mixin == 1
1232*4757b351SPierre Pronchery	eor	w12,w12,w17
1233*4757b351SPierre Pronchery.endif
1234*4757b351SPierre Pronchery.inst	0x046c3545	//xar z5.s,z5.s,z10.s,20
1235*4757b351SPierre Pronchery.if	mixin == 1
1236*4757b351SPierre Pronchery	ror	w12,w12,20
1237*4757b351SPierre Pronchery.endif
1238*4757b351SPierre Pronchery.if	mixin == 1
1239*4757b351SPierre Pronchery	eor	w13,w13,w18
1240*4757b351SPierre Pronchery.endif
1241*4757b351SPierre Pronchery.inst	0x046c35c9	//xar z9.s,z9.s,z14.s,20
1242*4757b351SPierre Pronchery.if	mixin == 1
1243*4757b351SPierre Pronchery	ror	w13,w13,20
1244*4757b351SPierre Pronchery.endif
1245*4757b351SPierre Pronchery.if	mixin == 1
1246*4757b351SPierre Pronchery	eor	w14,w14,w15
1247*4757b351SPierre Pronchery.endif
1248*4757b351SPierre Pronchery.inst	0x046c344d	//xar z13.s,z13.s,z2.s,20
1249*4757b351SPierre Pronchery.if	mixin == 1
1250*4757b351SPierre Pronchery	ror	w14,w14,20
1251*4757b351SPierre Pronchery.endif
1252*4757b351SPierre Pronchery.if	mixin == 1
1253*4757b351SPierre Pronchery	eor	w11,w11,w16
1254*4757b351SPierre Pronchery.endif
1255*4757b351SPierre Pronchery.inst	0x046c34c1	//xar z1.s,z1.s,z6.s,20
1256*4757b351SPierre Pronchery.if	mixin == 1
1257*4757b351SPierre Pronchery	ror	w11,w11,20
1258*4757b351SPierre Pronchery.endif
1259*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
1260*4757b351SPierre Pronchery.if	mixin == 1
1261*4757b351SPierre Pronchery	add	w7,w7,w12
1262*4757b351SPierre Pronchery.endif
1263*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
1264*4757b351SPierre Pronchery.if	mixin == 1
1265*4757b351SPierre Pronchery	add	w8,w8,w13
1266*4757b351SPierre Pronchery.endif
1267*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
1268*4757b351SPierre Pronchery.if	mixin == 1
1269*4757b351SPierre Pronchery	add	w9,w9,w14
1270*4757b351SPierre Pronchery.endif
1271*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
1272*4757b351SPierre Pronchery.if	mixin == 1
1273*4757b351SPierre Pronchery	add	w10,w10,w11
1274*4757b351SPierre Pronchery.endif
1275*4757b351SPierre Pronchery.if	mixin == 1
1276*4757b351SPierre Pronchery	eor	w22,w22,w7
1277*4757b351SPierre Pronchery.endif
1278*4757b351SPierre Pronchery.inst	0x0468340f	//xar z15.s,z15.s,z0.s,24
1279*4757b351SPierre Pronchery.if	mixin == 1
1280*4757b351SPierre Pronchery	ror	w22,w22,24
1281*4757b351SPierre Pronchery.endif
1282*4757b351SPierre Pronchery.if	mixin == 1
1283*4757b351SPierre Pronchery	eor	w19,w19,w8
1284*4757b351SPierre Pronchery.endif
1285*4757b351SPierre Pronchery.inst	0x04683483	//xar z3.s,z3.s,z4.s,24
1286*4757b351SPierre Pronchery.if	mixin == 1
1287*4757b351SPierre Pronchery	ror	w19,w19,24
1288*4757b351SPierre Pronchery.endif
1289*4757b351SPierre Pronchery.if	mixin == 1
1290*4757b351SPierre Pronchery	eor	w20,w20,w9
1291*4757b351SPierre Pronchery.endif
1292*4757b351SPierre Pronchery.inst	0x04683507	//xar z7.s,z7.s,z8.s,24
1293*4757b351SPierre Pronchery.if	mixin == 1
1294*4757b351SPierre Pronchery	ror	w20,w20,24
1295*4757b351SPierre Pronchery.endif
1296*4757b351SPierre Pronchery.if	mixin == 1
1297*4757b351SPierre Pronchery	eor	w21,w21,w10
1298*4757b351SPierre Pronchery.endif
1299*4757b351SPierre Pronchery.inst	0x0468358b	//xar z11.s,z11.s,z12.s,24
1300*4757b351SPierre Pronchery.if	mixin == 1
1301*4757b351SPierre Pronchery	ror	w21,w21,24
1302*4757b351SPierre Pronchery.endif
1303*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
1304*4757b351SPierre Pronchery.if	mixin == 1
1305*4757b351SPierre Pronchery	add	w17,w17,w22
1306*4757b351SPierre Pronchery.endif
1307*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
1308*4757b351SPierre Pronchery.if	mixin == 1
1309*4757b351SPierre Pronchery	add	w18,w18,w19
1310*4757b351SPierre Pronchery.endif
1311*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
1312*4757b351SPierre Pronchery.if	mixin == 1
1313*4757b351SPierre Pronchery	add	w15,w15,w20
1314*4757b351SPierre Pronchery.endif
1315*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
1316*4757b351SPierre Pronchery.if	mixin == 1
1317*4757b351SPierre Pronchery	add	w16,w16,w21
1318*4757b351SPierre Pronchery.endif
1319*4757b351SPierre Pronchery.if	mixin == 1
1320*4757b351SPierre Pronchery	eor	w12,w12,w17
1321*4757b351SPierre Pronchery.endif
1322*4757b351SPierre Pronchery.inst	0x04673545	//xar z5.s,z5.s,z10.s,25
1323*4757b351SPierre Pronchery.if	mixin == 1
1324*4757b351SPierre Pronchery	ror	w12,w12,25
1325*4757b351SPierre Pronchery.endif
1326*4757b351SPierre Pronchery.if	mixin == 1
1327*4757b351SPierre Pronchery	eor	w13,w13,w18
1328*4757b351SPierre Pronchery.endif
1329*4757b351SPierre Pronchery.inst	0x046735c9	//xar z9.s,z9.s,z14.s,25
1330*4757b351SPierre Pronchery.if	mixin == 1
1331*4757b351SPierre Pronchery	ror	w13,w13,25
1332*4757b351SPierre Pronchery.endif
1333*4757b351SPierre Pronchery.if	mixin == 1
1334*4757b351SPierre Pronchery	eor	w14,w14,w15
1335*4757b351SPierre Pronchery.endif
1336*4757b351SPierre Pronchery.inst	0x0467344d	//xar z13.s,z13.s,z2.s,25
1337*4757b351SPierre Pronchery.if	mixin == 1
1338*4757b351SPierre Pronchery	ror	w14,w14,25
1339*4757b351SPierre Pronchery.endif
1340*4757b351SPierre Pronchery.if	mixin == 1
1341*4757b351SPierre Pronchery	eor	w11,w11,w16
1342*4757b351SPierre Pronchery.endif
1343*4757b351SPierre Pronchery.inst	0x046734c1	//xar z1.s,z1.s,z6.s,25
1344*4757b351SPierre Pronchery.if	mixin == 1
1345*4757b351SPierre Pronchery	ror	w11,w11,25
1346*4757b351SPierre Pronchery.endif
1347*4757b351SPierre Pronchery	sub	x6,x6,1
1348*4757b351SPierre Pronchery	cbnz	x6,10b
1349*4757b351SPierre Pronchery.if	mixin == 1
1350*4757b351SPierre Pronchery	add	w7,w7,w23
1351*4757b351SPierre Pronchery.endif
1352*4757b351SPierre Pronchery.inst	0x04b90000	//add z0.s,z0.s,z25.s
1353*4757b351SPierre Pronchery.if	mixin == 1
1354*4757b351SPierre Pronchery	add	x8,x8,x23,lsr #32
1355*4757b351SPierre Pronchery.endif
1356*4757b351SPierre Pronchery.inst	0x04ba0084	//add z4.s,z4.s,z26.s
1357*4757b351SPierre Pronchery.if	mixin == 1
1358*4757b351SPierre Pronchery	add	x7,x7,x8,lsl #32  // pack
1359*4757b351SPierre Pronchery.endif
1360*4757b351SPierre Pronchery.if	mixin == 1
1361*4757b351SPierre Pronchery	add	w9,w9,w24
1362*4757b351SPierre Pronchery.endif
1363*4757b351SPierre Pronchery.inst	0x04bb0108	//add z8.s,z8.s,z27.s
1364*4757b351SPierre Pronchery.if	mixin == 1
1365*4757b351SPierre Pronchery	add	x10,x10,x24,lsr #32
1366*4757b351SPierre Pronchery.endif
1367*4757b351SPierre Pronchery.inst	0x04bc018c	//add z12.s,z12.s,z28.s
1368*4757b351SPierre Pronchery.if	mixin == 1
1369*4757b351SPierre Pronchery	add	x9,x9,x10,lsl #32  // pack
1370*4757b351SPierre Pronchery.endif
1371*4757b351SPierre Pronchery.if	mixin == 1
1372*4757b351SPierre Pronchery	ldp	x8,x10,[x1],#16
1373*4757b351SPierre Pronchery.endif
1374*4757b351SPierre Pronchery.if	mixin == 1
1375*4757b351SPierre Pronchery	add	w11,w11,w25
1376*4757b351SPierre Pronchery.endif
1377*4757b351SPierre Pronchery.inst	0x04bd0021	//add z1.s,z1.s,z29.s
1378*4757b351SPierre Pronchery.if	mixin == 1
1379*4757b351SPierre Pronchery	add	x12,x12,x25,lsr #32
1380*4757b351SPierre Pronchery.endif
1381*4757b351SPierre Pronchery.inst	0x04be00a5	//add z5.s,z5.s,z30.s
1382*4757b351SPierre Pronchery.if	mixin == 1
1383*4757b351SPierre Pronchery	add	x11,x11,x12,lsl #32  // pack
1384*4757b351SPierre Pronchery.endif
1385*4757b351SPierre Pronchery.if	mixin == 1
1386*4757b351SPierre Pronchery	add	w13,w13,w26
1387*4757b351SPierre Pronchery.endif
1388*4757b351SPierre Pronchery.inst	0x04b50129	//add z9.s,z9.s,z21.s
1389*4757b351SPierre Pronchery.if	mixin == 1
1390*4757b351SPierre Pronchery	add	x14,x14,x26,lsr #32
1391*4757b351SPierre Pronchery.endif
1392*4757b351SPierre Pronchery.inst	0x04b601ad	//add z13.s,z13.s,z22.s
1393*4757b351SPierre Pronchery.if	mixin == 1
1394*4757b351SPierre Pronchery	add	x13,x13,x14,lsl #32  // pack
1395*4757b351SPierre Pronchery.endif
1396*4757b351SPierre Pronchery.if	mixin == 1
1397*4757b351SPierre Pronchery	ldp	x12,x14,[x1],#16
1398*4757b351SPierre Pronchery.endif
1399*4757b351SPierre Pronchery.if	mixin == 1
1400*4757b351SPierre Pronchery	add	w15,w15,w27
1401*4757b351SPierre Pronchery.endif
1402*4757b351SPierre Pronchery.inst	0x04b70042	//add z2.s,z2.s,z23.s
1403*4757b351SPierre Pronchery.if	mixin == 1
1404*4757b351SPierre Pronchery	add	x16,x16,x27,lsr #32
1405*4757b351SPierre Pronchery.endif
1406*4757b351SPierre Pronchery.inst	0x04b800c6	//add z6.s,z6.s,z24.s
1407*4757b351SPierre Pronchery.if	mixin == 1
1408*4757b351SPierre Pronchery	add	x15,x15,x16,lsl #32  // pack
1409*4757b351SPierre Pronchery.endif
1410*4757b351SPierre Pronchery.if	mixin == 1
1411*4757b351SPierre Pronchery	add	w17,w17,w28
1412*4757b351SPierre Pronchery.endif
1413*4757b351SPierre Pronchery.inst	0x04b1014a	//add z10.s,z10.s,z17.s
1414*4757b351SPierre Pronchery.if	mixin == 1
1415*4757b351SPierre Pronchery	add	x18,x18,x28,lsr #32
1416*4757b351SPierre Pronchery.endif
1417*4757b351SPierre Pronchery.inst	0x04b201ce	//add z14.s,z14.s,z18.s
1418*4757b351SPierre Pronchery.if	mixin == 1
1419*4757b351SPierre Pronchery	add	x17,x17,x18,lsl #32  // pack
1420*4757b351SPierre Pronchery.endif
1421*4757b351SPierre Pronchery.if	mixin == 1
1422*4757b351SPierre Pronchery	ldp	x16,x18,[x1],#16
1423*4757b351SPierre Pronchery.endif
1424*4757b351SPierre Pronchery.if	mixin == 1
1425*4757b351SPierre Pronchery	add	w19,w19,w29
1426*4757b351SPierre Pronchery.endif
1427*4757b351SPierre Pronchery.inst	0x04b00063	//add z3.s,z3.s,z16.s
1428*4757b351SPierre Pronchery.if	mixin == 1
1429*4757b351SPierre Pronchery	add	x20,x20,x29,lsr #32
1430*4757b351SPierre Pronchery.endif
1431*4757b351SPierre Pronchery.inst	0x04b300e7	//add z7.s,z7.s,z19.s
1432*4757b351SPierre Pronchery.if	mixin == 1
1433*4757b351SPierre Pronchery	add	x19,x19,x20,lsl #32  // pack
1434*4757b351SPierre Pronchery.endif
1435*4757b351SPierre Pronchery.if	mixin == 1
1436*4757b351SPierre Pronchery	add	w21,w21,w30
1437*4757b351SPierre Pronchery.endif
1438*4757b351SPierre Pronchery.inst	0x04b4016b	//add z11.s,z11.s,z20.s
1439*4757b351SPierre Pronchery.if	mixin == 1
1440*4757b351SPierre Pronchery	add	x22,x22,x30,lsr #32
1441*4757b351SPierre Pronchery.endif
1442*4757b351SPierre Pronchery.inst	0x04bf01ef	//add z15.s,z15.s,z31.s
1443*4757b351SPierre Pronchery.if	mixin == 1
1444*4757b351SPierre Pronchery	add	x21,x21,x22,lsl #32  // pack
1445*4757b351SPierre Pronchery.endif
1446*4757b351SPierre Pronchery.if	mixin == 1
1447*4757b351SPierre Pronchery	ldp	x20,x22,[x1],#16
1448*4757b351SPierre Pronchery.endif
1449*4757b351SPierre Pronchery#ifdef	__AARCH64EB__
1450*4757b351SPierre Pronchery	rev	x7,x7
1451*4757b351SPierre Pronchery.inst	0x05a48000	//revb z0.s,p0/m,z0.s
1452*4757b351SPierre Pronchery.inst	0x05a48084	//revb z4.s,p0/m,z4.s
1453*4757b351SPierre Pronchery	rev	x9,x9
1454*4757b351SPierre Pronchery.inst	0x05a48108	//revb z8.s,p0/m,z8.s
1455*4757b351SPierre Pronchery.inst	0x05a4818c	//revb z12.s,p0/m,z12.s
1456*4757b351SPierre Pronchery	rev	x11,x11
1457*4757b351SPierre Pronchery.inst	0x05a48021	//revb z1.s,p0/m,z1.s
1458*4757b351SPierre Pronchery.inst	0x05a480a5	//revb z5.s,p0/m,z5.s
1459*4757b351SPierre Pronchery	rev	x13,x13
1460*4757b351SPierre Pronchery.inst	0x05a48129	//revb z9.s,p0/m,z9.s
1461*4757b351SPierre Pronchery.inst	0x05a481ad	//revb z13.s,p0/m,z13.s
1462*4757b351SPierre Pronchery	rev	x15,x15
1463*4757b351SPierre Pronchery.inst	0x05a48042	//revb z2.s,p0/m,z2.s
1464*4757b351SPierre Pronchery.inst	0x05a480c6	//revb z6.s,p0/m,z6.s
1465*4757b351SPierre Pronchery	rev	x17,x17
1466*4757b351SPierre Pronchery.inst	0x05a4814a	//revb z10.s,p0/m,z10.s
1467*4757b351SPierre Pronchery.inst	0x05a481ce	//revb z14.s,p0/m,z14.s
1468*4757b351SPierre Pronchery	rev	x19,x19
1469*4757b351SPierre Pronchery.inst	0x05a48063	//revb z3.s,p0/m,z3.s
1470*4757b351SPierre Pronchery.inst	0x05a480e7	//revb z7.s,p0/m,z7.s
1471*4757b351SPierre Pronchery	rev	x21,x21
1472*4757b351SPierre Pronchery.inst	0x05a4816b	//revb z11.s,p0/m,z11.s
1473*4757b351SPierre Pronchery.inst	0x05a481ef	//revb z15.s,p0/m,z15.s
1474*4757b351SPierre Pronchery#endif
1475*4757b351SPierre Pronchery.if	mixin == 1
1476*4757b351SPierre Pronchery	add	x29,x29,#1
1477*4757b351SPierre Pronchery.endif
1478*4757b351SPierre Pronchery	cmp	x5,4
1479*4757b351SPierre Pronchery	b.ne	200f
1480*4757b351SPierre Pronchery.if	mixin == 1
1481*4757b351SPierre Pronchery	eor	x7,x7,x8
1482*4757b351SPierre Pronchery.endif
1483*4757b351SPierre Pronchery.if	mixin == 1
1484*4757b351SPierre Pronchery	eor	x9,x9,x10
1485*4757b351SPierre Pronchery.endif
1486*4757b351SPierre Pronchery.if	mixin == 1
1487*4757b351SPierre Pronchery	eor	x11,x11,x12
1488*4757b351SPierre Pronchery.endif
1489*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
1490*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
1491*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
1492*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
1493*4757b351SPierre Pronchery
1494*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
1495*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
1496*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
1497*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
1498*4757b351SPierre Pronchery
1499*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
1500*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
1501*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
1502*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
1503*4757b351SPierre Pronchery
1504*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
1505*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
1506*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
1507*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
1508*4757b351SPierre Pronchery.if	mixin == 1
1509*4757b351SPierre Pronchery	eor	x13,x13,x14
1510*4757b351SPierre Pronchery.endif
1511*4757b351SPierre Pronchery.if	mixin == 1
1512*4757b351SPierre Pronchery	eor	x15,x15,x16
1513*4757b351SPierre Pronchery.endif
1514*4757b351SPierre Pronchery.if	mixin == 1
1515*4757b351SPierre Pronchery	eor	x17,x17,x18
1516*4757b351SPierre Pronchery.endif
1517*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
1518*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
1519*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
1520*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
1521*4757b351SPierre Pronchery
1522*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
1523*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
1524*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
1525*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
1526*4757b351SPierre Pronchery
1527*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
1528*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
1529*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
1530*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
1531*4757b351SPierre Pronchery
1532*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
1533*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
1534*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
1535*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
1536*4757b351SPierre Pronchery.if	mixin == 1
1537*4757b351SPierre Pronchery	eor	x19,x19,x20
1538*4757b351SPierre Pronchery.endif
1539*4757b351SPierre Pronchery.if	mixin == 1
1540*4757b351SPierre Pronchery	eor	x21,x21,x22
1541*4757b351SPierre Pronchery.endif
1542*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
1543*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
1544*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
1545*4757b351SPierre Pronchery.inst	0x04b23021	//eor z1.d,z1.d,z18.d
1546*4757b351SPierre Pronchery.inst	0x04b33042	//eor z2.d,z2.d,z19.d
1547*4757b351SPierre Pronchery.inst	0x04b43063	//eor z3.d,z3.d,z20.d
1548*4757b351SPierre Pronchery.inst	0x04b53084	//eor z4.d,z4.d,z21.d
1549*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
1550*4757b351SPierre Pronchery.inst	0x04b730c6	//eor z6.d,z6.d,z23.d
1551*4757b351SPierre Pronchery.inst	0x04b830e7	//eor z7.d,z7.d,z24.d
1552*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
1553*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
1554*4757b351SPierre Pronchery.if	mixin == 1
1555*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
1556*4757b351SPierre Pronchery.endif
1557*4757b351SPierre Pronchery.inst	0x04b13108	//eor z8.d,z8.d,z17.d
1558*4757b351SPierre Pronchery.inst	0x04b23129	//eor z9.d,z9.d,z18.d
1559*4757b351SPierre Pronchery.if	mixin == 1
1560*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
1561*4757b351SPierre Pronchery.endif
1562*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
1563*4757b351SPierre Pronchery.inst	0x04b4316b	//eor z11.d,z11.d,z20.d
1564*4757b351SPierre Pronchery.if	mixin == 1
1565*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
1566*4757b351SPierre Pronchery.endif
1567*4757b351SPierre Pronchery.inst	0x04b5318c	//eor z12.d,z12.d,z21.d
1568*4757b351SPierre Pronchery.inst	0x04b631ad	//eor z13.d,z13.d,z22.d
1569*4757b351SPierre Pronchery.if	mixin == 1
1570*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
1571*4757b351SPierre Pronchery.endif
1572*4757b351SPierre Pronchery.inst	0x04b731ce	//eor z14.d,z14.d,z23.d
1573*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
1574*4757b351SPierre Pronchery	st1	{v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64
1575*4757b351SPierre Pronchery	st1	{v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64
1576*4757b351SPierre Pronchery	st1	{v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64
1577*4757b351SPierre Pronchery	st1	{v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64
1578*4757b351SPierre Pronchery	b	210f
1579*4757b351SPierre Pronchery200:
1580*4757b351SPierre Pronchery.inst	0x05a16011	//zip1 z17.s,z0.s,z1.s
1581*4757b351SPierre Pronchery.inst	0x05a16412	//zip2 z18.s,z0.s,z1.s
1582*4757b351SPierre Pronchery.inst	0x05a36053	//zip1 z19.s,z2.s,z3.s
1583*4757b351SPierre Pronchery.inst	0x05a36454	//zip2 z20.s,z2.s,z3.s
1584*4757b351SPierre Pronchery
1585*4757b351SPierre Pronchery.inst	0x05a56095	//zip1 z21.s,z4.s,z5.s
1586*4757b351SPierre Pronchery.inst	0x05a56496	//zip2 z22.s,z4.s,z5.s
1587*4757b351SPierre Pronchery.inst	0x05a760d7	//zip1 z23.s,z6.s,z7.s
1588*4757b351SPierre Pronchery.inst	0x05a764d8	//zip2 z24.s,z6.s,z7.s
1589*4757b351SPierre Pronchery
1590*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
1591*4757b351SPierre Pronchery.inst	0x05f36621	//zip2 z1.d,z17.d,z19.d
1592*4757b351SPierre Pronchery.inst	0x05f46242	//zip1 z2.d,z18.d,z20.d
1593*4757b351SPierre Pronchery.inst	0x05f46643	//zip2 z3.d,z18.d,z20.d
1594*4757b351SPierre Pronchery
1595*4757b351SPierre Pronchery.inst	0x05f762a4	//zip1 z4.d,z21.d,z23.d
1596*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
1597*4757b351SPierre Pronchery.inst	0x05f862c6	//zip1 z6.d,z22.d,z24.d
1598*4757b351SPierre Pronchery.inst	0x05f866c7	//zip2 z7.d,z22.d,z24.d
1599*4757b351SPierre Pronchery.if	mixin == 1
1600*4757b351SPierre Pronchery	eor	x7,x7,x8
1601*4757b351SPierre Pronchery.endif
1602*4757b351SPierre Pronchery.if	mixin == 1
1603*4757b351SPierre Pronchery	eor	x9,x9,x10
1604*4757b351SPierre Pronchery.endif
1605*4757b351SPierre Pronchery.inst	0x05a96111	//zip1 z17.s,z8.s,z9.s
1606*4757b351SPierre Pronchery.inst	0x05a96512	//zip2 z18.s,z8.s,z9.s
1607*4757b351SPierre Pronchery.inst	0x05ab6153	//zip1 z19.s,z10.s,z11.s
1608*4757b351SPierre Pronchery.inst	0x05ab6554	//zip2 z20.s,z10.s,z11.s
1609*4757b351SPierre Pronchery
1610*4757b351SPierre Pronchery.inst	0x05ad6195	//zip1 z21.s,z12.s,z13.s
1611*4757b351SPierre Pronchery.inst	0x05ad6596	//zip2 z22.s,z12.s,z13.s
1612*4757b351SPierre Pronchery.inst	0x05af61d7	//zip1 z23.s,z14.s,z15.s
1613*4757b351SPierre Pronchery.inst	0x05af65d8	//zip2 z24.s,z14.s,z15.s
1614*4757b351SPierre Pronchery
1615*4757b351SPierre Pronchery.inst	0x05f36228	//zip1 z8.d,z17.d,z19.d
1616*4757b351SPierre Pronchery.inst	0x05f36629	//zip2 z9.d,z17.d,z19.d
1617*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
1618*4757b351SPierre Pronchery.inst	0x05f4664b	//zip2 z11.d,z18.d,z20.d
1619*4757b351SPierre Pronchery
1620*4757b351SPierre Pronchery.inst	0x05f762ac	//zip1 z12.d,z21.d,z23.d
1621*4757b351SPierre Pronchery.inst	0x05f766ad	//zip2 z13.d,z21.d,z23.d
1622*4757b351SPierre Pronchery.inst	0x05f862ce	//zip1 z14.d,z22.d,z24.d
1623*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
1624*4757b351SPierre Pronchery.if	mixin == 1
1625*4757b351SPierre Pronchery	eor	x11,x11,x12
1626*4757b351SPierre Pronchery.endif
1627*4757b351SPierre Pronchery.if	mixin == 1
1628*4757b351SPierre Pronchery	eor	x13,x13,x14
1629*4757b351SPierre Pronchery.endif
1630*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
1631*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
1632*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
1633*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
1634*4757b351SPierre Pronchery
1635*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
1636*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
1637*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
1638*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
1639*4757b351SPierre Pronchery
1640*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
1641*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
1642*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
1643*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
1644*4757b351SPierre Pronchery
1645*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
1646*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
1647*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
1648*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
1649*4757b351SPierre Pronchery.if	mixin == 1
1650*4757b351SPierre Pronchery	eor	x15,x15,x16
1651*4757b351SPierre Pronchery.endif
1652*4757b351SPierre Pronchery.if	mixin == 1
1653*4757b351SPierre Pronchery	eor	x17,x17,x18
1654*4757b351SPierre Pronchery.endif
1655*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
1656*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
1657*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
1658*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
1659*4757b351SPierre Pronchery
1660*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
1661*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
1662*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
1663*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
1664*4757b351SPierre Pronchery
1665*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
1666*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
1667*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
1668*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
1669*4757b351SPierre Pronchery
1670*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
1671*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
1672*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
1673*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
1674*4757b351SPierre Pronchery.if	mixin == 1
1675*4757b351SPierre Pronchery	eor	x19,x19,x20
1676*4757b351SPierre Pronchery.endif
1677*4757b351SPierre Pronchery.if	mixin == 1
1678*4757b351SPierre Pronchery	eor	x21,x21,x22
1679*4757b351SPierre Pronchery.endif
1680*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
1681*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
1682*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
1683*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
1684*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
1685*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
1686*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
1687*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
1688*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
1689*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
1690*4757b351SPierre Pronchery.inst	0x04b23084	//eor z4.d,z4.d,z18.d
1691*4757b351SPierre Pronchery.inst	0x04b33108	//eor z8.d,z8.d,z19.d
1692*4757b351SPierre Pronchery.inst	0x04b4318c	//eor z12.d,z12.d,z20.d
1693*4757b351SPierre Pronchery.inst	0x04b53021	//eor z1.d,z1.d,z21.d
1694*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
1695*4757b351SPierre Pronchery.inst	0x04b73129	//eor z9.d,z9.d,z23.d
1696*4757b351SPierre Pronchery.inst	0x04b831ad	//eor z13.d,z13.d,z24.d
1697*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
1698*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
1699*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
1700*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
1701*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
1702*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
1703*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
1704*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
1705*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
1706*4757b351SPierre Pronchery.if	mixin == 1
1707*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
1708*4757b351SPierre Pronchery.endif
1709*4757b351SPierre Pronchery.inst	0x04b13042	//eor z2.d,z2.d,z17.d
1710*4757b351SPierre Pronchery.inst	0x04b230c6	//eor z6.d,z6.d,z18.d
1711*4757b351SPierre Pronchery.if	mixin == 1
1712*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
1713*4757b351SPierre Pronchery.endif
1714*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
1715*4757b351SPierre Pronchery.inst	0x04b431ce	//eor z14.d,z14.d,z20.d
1716*4757b351SPierre Pronchery.if	mixin == 1
1717*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
1718*4757b351SPierre Pronchery.endif
1719*4757b351SPierre Pronchery.inst	0x04b53063	//eor z3.d,z3.d,z21.d
1720*4757b351SPierre Pronchery.inst	0x04b630e7	//eor z7.d,z7.d,z22.d
1721*4757b351SPierre Pronchery.if	mixin == 1
1722*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
1723*4757b351SPierre Pronchery.endif
1724*4757b351SPierre Pronchery.inst	0x04b7316b	//eor z11.d,z11.d,z23.d
1725*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
1726*4757b351SPierre Pronchery.inst	0xe540e000	//st1w {z0.s},p0,[x0,#0,MUL VL]
1727*4757b351SPierre Pronchery.inst	0xe541e004	//st1w {z4.s},p0,[x0,#1,MUL VL]
1728*4757b351SPierre Pronchery.inst	0xe542e008	//st1w {z8.s},p0,[x0,#2,MUL VL]
1729*4757b351SPierre Pronchery.inst	0xe543e00c	//st1w {z12.s},p0,[x0,#3,MUL VL]
1730*4757b351SPierre Pronchery.inst	0xe544e001	//st1w {z1.s},p0,[x0,#4,MUL VL]
1731*4757b351SPierre Pronchery.inst	0xe545e005	//st1w {z5.s},p0,[x0,#5,MUL VL]
1732*4757b351SPierre Pronchery.inst	0xe546e009	//st1w {z9.s},p0,[x0,#6,MUL VL]
1733*4757b351SPierre Pronchery.inst	0xe547e00d	//st1w {z13.s},p0,[x0,#7,MUL VL]
1734*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
1735*4757b351SPierre Pronchery.inst	0xe540e002	//st1w {z2.s},p0,[x0,#0,MUL VL]
1736*4757b351SPierre Pronchery.inst	0xe541e006	//st1w {z6.s},p0,[x0,#1,MUL VL]
1737*4757b351SPierre Pronchery.inst	0xe542e00a	//st1w {z10.s},p0,[x0,#2,MUL VL]
1738*4757b351SPierre Pronchery.inst	0xe543e00e	//st1w {z14.s},p0,[x0,#3,MUL VL]
1739*4757b351SPierre Pronchery.inst	0xe544e003	//st1w {z3.s},p0,[x0,#4,MUL VL]
1740*4757b351SPierre Pronchery.inst	0xe545e007	//st1w {z7.s},p0,[x0,#5,MUL VL]
1741*4757b351SPierre Pronchery.inst	0xe546e00b	//st1w {z11.s},p0,[x0,#6,MUL VL]
1742*4757b351SPierre Pronchery.inst	0xe547e00f	//st1w {z15.s},p0,[x0,#7,MUL VL]
1743*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
1744*4757b351SPierre Pronchery210:
1745*4757b351SPierre Pronchery.inst	0x04b0e3fd	//incw x29, ALL, MUL #1
1746*4757b351SPierre Pronchery110:
1747*4757b351SPierre Pronchery	b	2f
1748*4757b351SPierre Pronchery1:
1749*4757b351SPierre Pronchery.align	5
1750*4757b351SPierre Pronchery100:
1751*4757b351SPierre Pronchery	subs	x7,x2,x5,lsl #6
1752*4757b351SPierre Pronchery	b.lt	110f
1753*4757b351SPierre Pronchery	mov	x2,x7
1754*4757b351SPierre Pronchery	b.eq	101f
1755*4757b351SPierre Pronchery	cmp	x2,64
1756*4757b351SPierre Pronchery	b.lt	101f
1757*4757b351SPierre Pronchery	mixin=1
1758*4757b351SPierre Pronchery	lsr	x8,x23,#32
1759*4757b351SPierre Pronchery.inst	0x05a03ae0	//dup z0.s,w23
1760*4757b351SPierre Pronchery.inst	0x05a03af9	//dup z25.s,w23
1761*4757b351SPierre Pronchery.if	mixin == 1
1762*4757b351SPierre Pronchery	mov	w7,w23
1763*4757b351SPierre Pronchery.endif
1764*4757b351SPierre Pronchery.inst	0x05a03904	//dup z4.s,w8
1765*4757b351SPierre Pronchery.inst	0x05a0391a	//dup z26.s,w8
1766*4757b351SPierre Pronchery	lsr	x10,x24,#32
1767*4757b351SPierre Pronchery.inst	0x05a03b08	//dup z8.s,w24
1768*4757b351SPierre Pronchery.inst	0x05a03b1b	//dup z27.s,w24
1769*4757b351SPierre Pronchery.if	mixin == 1
1770*4757b351SPierre Pronchery	mov	w9,w24
1771*4757b351SPierre Pronchery.endif
1772*4757b351SPierre Pronchery.inst	0x05a0394c	//dup z12.s,w10
1773*4757b351SPierre Pronchery.inst	0x05a0395c	//dup z28.s,w10
1774*4757b351SPierre Pronchery	lsr	x12,x25,#32
1775*4757b351SPierre Pronchery.inst	0x05a03b21	//dup z1.s,w25
1776*4757b351SPierre Pronchery.inst	0x05a03b3d	//dup z29.s,w25
1777*4757b351SPierre Pronchery.if	mixin == 1
1778*4757b351SPierre Pronchery	mov	w11,w25
1779*4757b351SPierre Pronchery.endif
1780*4757b351SPierre Pronchery.inst	0x05a03985	//dup z5.s,w12
1781*4757b351SPierre Pronchery.inst	0x05a0399e	//dup z30.s,w12
1782*4757b351SPierre Pronchery	lsr	x14,x26,#32
1783*4757b351SPierre Pronchery.inst	0x05a03b49	//dup z9.s,w26
1784*4757b351SPierre Pronchery.inst	0x05a03b55	//dup z21.s,w26
1785*4757b351SPierre Pronchery.if	mixin == 1
1786*4757b351SPierre Pronchery	mov	w13,w26
1787*4757b351SPierre Pronchery.endif
1788*4757b351SPierre Pronchery.inst	0x05a039cd	//dup z13.s,w14
1789*4757b351SPierre Pronchery.inst	0x05a039d6	//dup z22.s,w14
1790*4757b351SPierre Pronchery	lsr	x16,x27,#32
1791*4757b351SPierre Pronchery.inst	0x05a03b62	//dup z2.s,w27
1792*4757b351SPierre Pronchery.inst	0x05a03b77	//dup z23.s,w27
1793*4757b351SPierre Pronchery.if	mixin == 1
1794*4757b351SPierre Pronchery	mov	w15,w27
1795*4757b351SPierre Pronchery.endif
1796*4757b351SPierre Pronchery.inst	0x05a03a06	//dup z6.s,w16
1797*4757b351SPierre Pronchery.inst	0x05a03a18	//dup z24.s,w16
1798*4757b351SPierre Pronchery	lsr	x18,x28,#32
1799*4757b351SPierre Pronchery.inst	0x05a03b8a	//dup z10.s,w28
1800*4757b351SPierre Pronchery.if	mixin == 1
1801*4757b351SPierre Pronchery	mov	w17,w28
1802*4757b351SPierre Pronchery.endif
1803*4757b351SPierre Pronchery.inst	0x05a03a4e	//dup z14.s,w18
1804*4757b351SPierre Pronchery	lsr	x22,x30,#32
1805*4757b351SPierre Pronchery.inst	0x05a03bcb	//dup z11.s,w30
1806*4757b351SPierre Pronchery.if	mixin == 1
1807*4757b351SPierre Pronchery	mov	w21,w30
1808*4757b351SPierre Pronchery.endif
1809*4757b351SPierre Pronchery.inst	0x05a03acf	//dup z15.s,w22
1810*4757b351SPierre Pronchery.if	mixin == 1
1811*4757b351SPierre Pronchery	add	w20,w29,#1
1812*4757b351SPierre Pronchery	mov	w19,w29
1813*4757b351SPierre Pronchery.inst	0x04a14690	//index z16.s,w20,1
1814*4757b351SPierre Pronchery.inst	0x04a14683	//index z3.s,w20,1
1815*4757b351SPierre Pronchery.else
1816*4757b351SPierre Pronchery.inst	0x04a147b0	//index z16.s,w29,1
1817*4757b351SPierre Pronchery.inst	0x04a147a3	//index z3.s,w29,1
1818*4757b351SPierre Pronchery.endif
1819*4757b351SPierre Pronchery	lsr	x20,x29,#32
1820*4757b351SPierre Pronchery.inst	0x05a03a87	//dup z7.s,w20
1821*4757b351SPierre Pronchery	mov	x6,#10
1822*4757b351SPierre Pronchery10:
1823*4757b351SPierre Pronchery.align	5
1824*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
1825*4757b351SPierre Pronchery.if	mixin == 1
1826*4757b351SPierre Pronchery	add	w7,w7,w11
1827*4757b351SPierre Pronchery.endif
1828*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
1829*4757b351SPierre Pronchery.if	mixin == 1
1830*4757b351SPierre Pronchery	add	w8,w8,w12
1831*4757b351SPierre Pronchery.endif
1832*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
1833*4757b351SPierre Pronchery.if	mixin == 1
1834*4757b351SPierre Pronchery	add	w9,w9,w13
1835*4757b351SPierre Pronchery.endif
1836*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
1837*4757b351SPierre Pronchery.if	mixin == 1
1838*4757b351SPierre Pronchery	add	w10,w10,w14
1839*4757b351SPierre Pronchery.endif
1840*4757b351SPierre Pronchery.inst	0x04a03063	//eor z3.d,z3.d,z0.d
1841*4757b351SPierre Pronchery.if	mixin == 1
1842*4757b351SPierre Pronchery	eor	w19,w19,w7
1843*4757b351SPierre Pronchery.endif
1844*4757b351SPierre Pronchery.inst	0x04a430e7	//eor z7.d,z7.d,z4.d
1845*4757b351SPierre Pronchery.if	mixin == 1
1846*4757b351SPierre Pronchery	eor	w20,w20,w8
1847*4757b351SPierre Pronchery.endif
1848*4757b351SPierre Pronchery.inst	0x04a8316b	//eor z11.d,z11.d,z8.d
1849*4757b351SPierre Pronchery.if	mixin == 1
1850*4757b351SPierre Pronchery	eor	w21,w21,w9
1851*4757b351SPierre Pronchery.endif
1852*4757b351SPierre Pronchery.inst	0x04ac31ef	//eor z15.d,z15.d,z12.d
1853*4757b351SPierre Pronchery.if	mixin == 1
1854*4757b351SPierre Pronchery	eor	w22,w22,w10
1855*4757b351SPierre Pronchery.endif
1856*4757b351SPierre Pronchery.inst	0x05a58063	//revh z3.s,p0/m,z3.s
1857*4757b351SPierre Pronchery.if	mixin == 1
1858*4757b351SPierre Pronchery	ror	w19,w19,#16
1859*4757b351SPierre Pronchery.endif
1860*4757b351SPierre Pronchery.inst	0x05a580e7	//revh z7.s,p0/m,z7.s
1861*4757b351SPierre Pronchery.if	mixin == 1
1862*4757b351SPierre Pronchery	ror	w20,w20,#16
1863*4757b351SPierre Pronchery.endif
1864*4757b351SPierre Pronchery.inst	0x05a5816b	//revh z11.s,p0/m,z11.s
1865*4757b351SPierre Pronchery.if	mixin == 1
1866*4757b351SPierre Pronchery	ror	w21,w21,#16
1867*4757b351SPierre Pronchery.endif
1868*4757b351SPierre Pronchery.inst	0x05a581ef	//revh z15.s,p0/m,z15.s
1869*4757b351SPierre Pronchery.if	mixin == 1
1870*4757b351SPierre Pronchery	ror	w22,w22,#16
1871*4757b351SPierre Pronchery.endif
1872*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
1873*4757b351SPierre Pronchery.if	mixin == 1
1874*4757b351SPierre Pronchery	add	w15,w15,w19
1875*4757b351SPierre Pronchery.endif
1876*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
1877*4757b351SPierre Pronchery.if	mixin == 1
1878*4757b351SPierre Pronchery	add	w16,w16,w20
1879*4757b351SPierre Pronchery.endif
1880*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
1881*4757b351SPierre Pronchery.if	mixin == 1
1882*4757b351SPierre Pronchery	add	w17,w17,w21
1883*4757b351SPierre Pronchery.endif
1884*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
1885*4757b351SPierre Pronchery.if	mixin == 1
1886*4757b351SPierre Pronchery	add	w18,w18,w22
1887*4757b351SPierre Pronchery.endif
1888*4757b351SPierre Pronchery.inst	0x04a23021	//eor z1.d,z1.d,z2.d
1889*4757b351SPierre Pronchery.if	mixin == 1
1890*4757b351SPierre Pronchery	eor	w11,w11,w15
1891*4757b351SPierre Pronchery.endif
1892*4757b351SPierre Pronchery.inst	0x04a630a5	//eor z5.d,z5.d,z6.d
1893*4757b351SPierre Pronchery.if	mixin == 1
1894*4757b351SPierre Pronchery	eor	w12,w12,w16
1895*4757b351SPierre Pronchery.endif
1896*4757b351SPierre Pronchery.inst	0x04aa3129	//eor z9.d,z9.d,z10.d
1897*4757b351SPierre Pronchery.if	mixin == 1
1898*4757b351SPierre Pronchery	eor	w13,w13,w17
1899*4757b351SPierre Pronchery.endif
1900*4757b351SPierre Pronchery.inst	0x04ae31ad	//eor z13.d,z13.d,z14.d
1901*4757b351SPierre Pronchery.if	mixin == 1
1902*4757b351SPierre Pronchery	eor	w14,w14,w18
1903*4757b351SPierre Pronchery.endif
1904*4757b351SPierre Pronchery.inst	0x046c9c31	//lsl z17.s,z1.s,12
1905*4757b351SPierre Pronchery.inst	0x046c9cb2	//lsl z18.s,z5.s,12
1906*4757b351SPierre Pronchery.inst	0x046c9d33	//lsl z19.s,z9.s,12
1907*4757b351SPierre Pronchery.inst	0x046c9db4	//lsl z20.s,z13.s,12
1908*4757b351SPierre Pronchery.inst	0x046c9421	//lsr z1.s,z1.s,20
1909*4757b351SPierre Pronchery.if	mixin == 1
1910*4757b351SPierre Pronchery	ror	w11,w11,20
1911*4757b351SPierre Pronchery.endif
1912*4757b351SPierre Pronchery.inst	0x046c94a5	//lsr z5.s,z5.s,20
1913*4757b351SPierre Pronchery.if	mixin == 1
1914*4757b351SPierre Pronchery	ror	w12,w12,20
1915*4757b351SPierre Pronchery.endif
1916*4757b351SPierre Pronchery.inst	0x046c9529	//lsr z9.s,z9.s,20
1917*4757b351SPierre Pronchery.if	mixin == 1
1918*4757b351SPierre Pronchery	ror	w13,w13,20
1919*4757b351SPierre Pronchery.endif
1920*4757b351SPierre Pronchery.inst	0x046c95ad	//lsr z13.s,z13.s,20
1921*4757b351SPierre Pronchery.if	mixin == 1
1922*4757b351SPierre Pronchery	ror	w14,w14,20
1923*4757b351SPierre Pronchery.endif
1924*4757b351SPierre Pronchery.inst	0x04713021	//orr z1.d,z1.d,z17.d
1925*4757b351SPierre Pronchery.inst	0x047230a5	//orr z5.d,z5.d,z18.d
1926*4757b351SPierre Pronchery.inst	0x04733129	//orr z9.d,z9.d,z19.d
1927*4757b351SPierre Pronchery.inst	0x047431ad	//orr z13.d,z13.d,z20.d
1928*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
1929*4757b351SPierre Pronchery.if	mixin == 1
1930*4757b351SPierre Pronchery	add	w7,w7,w11
1931*4757b351SPierre Pronchery.endif
1932*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
1933*4757b351SPierre Pronchery.if	mixin == 1
1934*4757b351SPierre Pronchery	add	w8,w8,w12
1935*4757b351SPierre Pronchery.endif
1936*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
1937*4757b351SPierre Pronchery.if	mixin == 1
1938*4757b351SPierre Pronchery	add	w9,w9,w13
1939*4757b351SPierre Pronchery.endif
1940*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
1941*4757b351SPierre Pronchery.if	mixin == 1
1942*4757b351SPierre Pronchery	add	w10,w10,w14
1943*4757b351SPierre Pronchery.endif
1944*4757b351SPierre Pronchery.inst	0x04a03063	//eor z3.d,z3.d,z0.d
1945*4757b351SPierre Pronchery.if	mixin == 1
1946*4757b351SPierre Pronchery	eor	w19,w19,w7
1947*4757b351SPierre Pronchery.endif
1948*4757b351SPierre Pronchery.inst	0x04a430e7	//eor z7.d,z7.d,z4.d
1949*4757b351SPierre Pronchery.if	mixin == 1
1950*4757b351SPierre Pronchery	eor	w20,w20,w8
1951*4757b351SPierre Pronchery.endif
1952*4757b351SPierre Pronchery.inst	0x04a8316b	//eor z11.d,z11.d,z8.d
1953*4757b351SPierre Pronchery.if	mixin == 1
1954*4757b351SPierre Pronchery	eor	w21,w21,w9
1955*4757b351SPierre Pronchery.endif
1956*4757b351SPierre Pronchery.inst	0x04ac31ef	//eor z15.d,z15.d,z12.d
1957*4757b351SPierre Pronchery.if	mixin == 1
1958*4757b351SPierre Pronchery	eor	w22,w22,w10
1959*4757b351SPierre Pronchery.endif
1960*4757b351SPierre Pronchery.inst	0x053f3063	//tbl z3.b,{z3.b},z31.b
1961*4757b351SPierre Pronchery.if	mixin == 1
1962*4757b351SPierre Pronchery	ror	w19,w19,#24
1963*4757b351SPierre Pronchery.endif
1964*4757b351SPierre Pronchery.inst	0x053f30e7	//tbl z7.b,{z7.b},z31.b
1965*4757b351SPierre Pronchery.if	mixin == 1
1966*4757b351SPierre Pronchery	ror	w20,w20,#24
1967*4757b351SPierre Pronchery.endif
1968*4757b351SPierre Pronchery.inst	0x053f316b	//tbl z11.b,{z11.b},z31.b
1969*4757b351SPierre Pronchery.if	mixin == 1
1970*4757b351SPierre Pronchery	ror	w21,w21,#24
1971*4757b351SPierre Pronchery.endif
1972*4757b351SPierre Pronchery.inst	0x053f31ef	//tbl z15.b,{z15.b},z31.b
1973*4757b351SPierre Pronchery.if	mixin == 1
1974*4757b351SPierre Pronchery	ror	w22,w22,#24
1975*4757b351SPierre Pronchery.endif
1976*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
1977*4757b351SPierre Pronchery.if	mixin == 1
1978*4757b351SPierre Pronchery	add	w15,w15,w19
1979*4757b351SPierre Pronchery.endif
1980*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
1981*4757b351SPierre Pronchery.if	mixin == 1
1982*4757b351SPierre Pronchery	add	w16,w16,w20
1983*4757b351SPierre Pronchery.endif
1984*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
1985*4757b351SPierre Pronchery.if	mixin == 1
1986*4757b351SPierre Pronchery	add	w17,w17,w21
1987*4757b351SPierre Pronchery.endif
1988*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
1989*4757b351SPierre Pronchery.if	mixin == 1
1990*4757b351SPierre Pronchery	add	w18,w18,w22
1991*4757b351SPierre Pronchery.endif
1992*4757b351SPierre Pronchery.inst	0x04a23021	//eor z1.d,z1.d,z2.d
1993*4757b351SPierre Pronchery.if	mixin == 1
1994*4757b351SPierre Pronchery	eor	w11,w11,w15
1995*4757b351SPierre Pronchery.endif
1996*4757b351SPierre Pronchery.inst	0x04a630a5	//eor z5.d,z5.d,z6.d
1997*4757b351SPierre Pronchery.if	mixin == 1
1998*4757b351SPierre Pronchery	eor	w12,w12,w16
1999*4757b351SPierre Pronchery.endif
2000*4757b351SPierre Pronchery.inst	0x04aa3129	//eor z9.d,z9.d,z10.d
2001*4757b351SPierre Pronchery.if	mixin == 1
2002*4757b351SPierre Pronchery	eor	w13,w13,w17
2003*4757b351SPierre Pronchery.endif
2004*4757b351SPierre Pronchery.inst	0x04ae31ad	//eor z13.d,z13.d,z14.d
2005*4757b351SPierre Pronchery.if	mixin == 1
2006*4757b351SPierre Pronchery	eor	w14,w14,w18
2007*4757b351SPierre Pronchery.endif
2008*4757b351SPierre Pronchery.inst	0x04679c31	//lsl z17.s,z1.s,7
2009*4757b351SPierre Pronchery.inst	0x04679cb2	//lsl z18.s,z5.s,7
2010*4757b351SPierre Pronchery.inst	0x04679d33	//lsl z19.s,z9.s,7
2011*4757b351SPierre Pronchery.inst	0x04679db4	//lsl z20.s,z13.s,7
2012*4757b351SPierre Pronchery.inst	0x04679421	//lsr z1.s,z1.s,25
2013*4757b351SPierre Pronchery.if	mixin == 1
2014*4757b351SPierre Pronchery	ror	w11,w11,25
2015*4757b351SPierre Pronchery.endif
2016*4757b351SPierre Pronchery.inst	0x046794a5	//lsr z5.s,z5.s,25
2017*4757b351SPierre Pronchery.if	mixin == 1
2018*4757b351SPierre Pronchery	ror	w12,w12,25
2019*4757b351SPierre Pronchery.endif
2020*4757b351SPierre Pronchery.inst	0x04679529	//lsr z9.s,z9.s,25
2021*4757b351SPierre Pronchery.if	mixin == 1
2022*4757b351SPierre Pronchery	ror	w13,w13,25
2023*4757b351SPierre Pronchery.endif
2024*4757b351SPierre Pronchery.inst	0x046795ad	//lsr z13.s,z13.s,25
2025*4757b351SPierre Pronchery.if	mixin == 1
2026*4757b351SPierre Pronchery	ror	w14,w14,25
2027*4757b351SPierre Pronchery.endif
2028*4757b351SPierre Pronchery.inst	0x04713021	//orr z1.d,z1.d,z17.d
2029*4757b351SPierre Pronchery.inst	0x047230a5	//orr z5.d,z5.d,z18.d
2030*4757b351SPierre Pronchery.inst	0x04733129	//orr z9.d,z9.d,z19.d
2031*4757b351SPierre Pronchery.inst	0x047431ad	//orr z13.d,z13.d,z20.d
2032*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
2033*4757b351SPierre Pronchery.if	mixin == 1
2034*4757b351SPierre Pronchery	add	w7,w7,w12
2035*4757b351SPierre Pronchery.endif
2036*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
2037*4757b351SPierre Pronchery.if	mixin == 1
2038*4757b351SPierre Pronchery	add	w8,w8,w13
2039*4757b351SPierre Pronchery.endif
2040*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
2041*4757b351SPierre Pronchery.if	mixin == 1
2042*4757b351SPierre Pronchery	add	w9,w9,w14
2043*4757b351SPierre Pronchery.endif
2044*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
2045*4757b351SPierre Pronchery.if	mixin == 1
2046*4757b351SPierre Pronchery	add	w10,w10,w11
2047*4757b351SPierre Pronchery.endif
2048*4757b351SPierre Pronchery.inst	0x04a031ef	//eor z15.d,z15.d,z0.d
2049*4757b351SPierre Pronchery.if	mixin == 1
2050*4757b351SPierre Pronchery	eor	w22,w22,w7
2051*4757b351SPierre Pronchery.endif
2052*4757b351SPierre Pronchery.inst	0x04a43063	//eor z3.d,z3.d,z4.d
2053*4757b351SPierre Pronchery.if	mixin == 1
2054*4757b351SPierre Pronchery	eor	w19,w19,w8
2055*4757b351SPierre Pronchery.endif
2056*4757b351SPierre Pronchery.inst	0x04a830e7	//eor z7.d,z7.d,z8.d
2057*4757b351SPierre Pronchery.if	mixin == 1
2058*4757b351SPierre Pronchery	eor	w20,w20,w9
2059*4757b351SPierre Pronchery.endif
2060*4757b351SPierre Pronchery.inst	0x04ac316b	//eor z11.d,z11.d,z12.d
2061*4757b351SPierre Pronchery.if	mixin == 1
2062*4757b351SPierre Pronchery	eor	w21,w21,w10
2063*4757b351SPierre Pronchery.endif
2064*4757b351SPierre Pronchery.inst	0x05a581ef	//revh z15.s,p0/m,z15.s
2065*4757b351SPierre Pronchery.if	mixin == 1
2066*4757b351SPierre Pronchery	ror	w22,w22,#16
2067*4757b351SPierre Pronchery.endif
2068*4757b351SPierre Pronchery.inst	0x05a58063	//revh z3.s,p0/m,z3.s
2069*4757b351SPierre Pronchery.if	mixin == 1
2070*4757b351SPierre Pronchery	ror	w19,w19,#16
2071*4757b351SPierre Pronchery.endif
2072*4757b351SPierre Pronchery.inst	0x05a580e7	//revh z7.s,p0/m,z7.s
2073*4757b351SPierre Pronchery.if	mixin == 1
2074*4757b351SPierre Pronchery	ror	w20,w20,#16
2075*4757b351SPierre Pronchery.endif
2076*4757b351SPierre Pronchery.inst	0x05a5816b	//revh z11.s,p0/m,z11.s
2077*4757b351SPierre Pronchery.if	mixin == 1
2078*4757b351SPierre Pronchery	ror	w21,w21,#16
2079*4757b351SPierre Pronchery.endif
2080*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
2081*4757b351SPierre Pronchery.if	mixin == 1
2082*4757b351SPierre Pronchery	add	w17,w17,w22
2083*4757b351SPierre Pronchery.endif
2084*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
2085*4757b351SPierre Pronchery.if	mixin == 1
2086*4757b351SPierre Pronchery	add	w18,w18,w19
2087*4757b351SPierre Pronchery.endif
2088*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
2089*4757b351SPierre Pronchery.if	mixin == 1
2090*4757b351SPierre Pronchery	add	w15,w15,w20
2091*4757b351SPierre Pronchery.endif
2092*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
2093*4757b351SPierre Pronchery.if	mixin == 1
2094*4757b351SPierre Pronchery	add	w16,w16,w21
2095*4757b351SPierre Pronchery.endif
2096*4757b351SPierre Pronchery.inst	0x04aa30a5	//eor z5.d,z5.d,z10.d
2097*4757b351SPierre Pronchery.if	mixin == 1
2098*4757b351SPierre Pronchery	eor	w12,w12,w17
2099*4757b351SPierre Pronchery.endif
2100*4757b351SPierre Pronchery.inst	0x04ae3129	//eor z9.d,z9.d,z14.d
2101*4757b351SPierre Pronchery.if	mixin == 1
2102*4757b351SPierre Pronchery	eor	w13,w13,w18
2103*4757b351SPierre Pronchery.endif
2104*4757b351SPierre Pronchery.inst	0x04a231ad	//eor z13.d,z13.d,z2.d
2105*4757b351SPierre Pronchery.if	mixin == 1
2106*4757b351SPierre Pronchery	eor	w14,w14,w15
2107*4757b351SPierre Pronchery.endif
2108*4757b351SPierre Pronchery.inst	0x04a63021	//eor z1.d,z1.d,z6.d
2109*4757b351SPierre Pronchery.if	mixin == 1
2110*4757b351SPierre Pronchery	eor	w11,w11,w16
2111*4757b351SPierre Pronchery.endif
2112*4757b351SPierre Pronchery.inst	0x046c9cb1	//lsl z17.s,z5.s,12
2113*4757b351SPierre Pronchery.inst	0x046c9d32	//lsl z18.s,z9.s,12
2114*4757b351SPierre Pronchery.inst	0x046c9db3	//lsl z19.s,z13.s,12
2115*4757b351SPierre Pronchery.inst	0x046c9c34	//lsl z20.s,z1.s,12
2116*4757b351SPierre Pronchery.inst	0x046c94a5	//lsr z5.s,z5.s,20
2117*4757b351SPierre Pronchery.if	mixin == 1
2118*4757b351SPierre Pronchery	ror	w12,w12,20
2119*4757b351SPierre Pronchery.endif
2120*4757b351SPierre Pronchery.inst	0x046c9529	//lsr z9.s,z9.s,20
2121*4757b351SPierre Pronchery.if	mixin == 1
2122*4757b351SPierre Pronchery	ror	w13,w13,20
2123*4757b351SPierre Pronchery.endif
2124*4757b351SPierre Pronchery.inst	0x046c95ad	//lsr z13.s,z13.s,20
2125*4757b351SPierre Pronchery.if	mixin == 1
2126*4757b351SPierre Pronchery	ror	w14,w14,20
2127*4757b351SPierre Pronchery.endif
2128*4757b351SPierre Pronchery.inst	0x046c9421	//lsr z1.s,z1.s,20
2129*4757b351SPierre Pronchery.if	mixin == 1
2130*4757b351SPierre Pronchery	ror	w11,w11,20
2131*4757b351SPierre Pronchery.endif
2132*4757b351SPierre Pronchery.inst	0x047130a5	//orr z5.d,z5.d,z17.d
2133*4757b351SPierre Pronchery.inst	0x04723129	//orr z9.d,z9.d,z18.d
2134*4757b351SPierre Pronchery.inst	0x047331ad	//orr z13.d,z13.d,z19.d
2135*4757b351SPierre Pronchery.inst	0x04743021	//orr z1.d,z1.d,z20.d
2136*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
2137*4757b351SPierre Pronchery.if	mixin == 1
2138*4757b351SPierre Pronchery	add	w7,w7,w12
2139*4757b351SPierre Pronchery.endif
2140*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
2141*4757b351SPierre Pronchery.if	mixin == 1
2142*4757b351SPierre Pronchery	add	w8,w8,w13
2143*4757b351SPierre Pronchery.endif
2144*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
2145*4757b351SPierre Pronchery.if	mixin == 1
2146*4757b351SPierre Pronchery	add	w9,w9,w14
2147*4757b351SPierre Pronchery.endif
2148*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
2149*4757b351SPierre Pronchery.if	mixin == 1
2150*4757b351SPierre Pronchery	add	w10,w10,w11
2151*4757b351SPierre Pronchery.endif
2152*4757b351SPierre Pronchery.inst	0x04a031ef	//eor z15.d,z15.d,z0.d
2153*4757b351SPierre Pronchery.if	mixin == 1
2154*4757b351SPierre Pronchery	eor	w22,w22,w7
2155*4757b351SPierre Pronchery.endif
2156*4757b351SPierre Pronchery.inst	0x04a43063	//eor z3.d,z3.d,z4.d
2157*4757b351SPierre Pronchery.if	mixin == 1
2158*4757b351SPierre Pronchery	eor	w19,w19,w8
2159*4757b351SPierre Pronchery.endif
2160*4757b351SPierre Pronchery.inst	0x04a830e7	//eor z7.d,z7.d,z8.d
2161*4757b351SPierre Pronchery.if	mixin == 1
2162*4757b351SPierre Pronchery	eor	w20,w20,w9
2163*4757b351SPierre Pronchery.endif
2164*4757b351SPierre Pronchery.inst	0x04ac316b	//eor z11.d,z11.d,z12.d
2165*4757b351SPierre Pronchery.if	mixin == 1
2166*4757b351SPierre Pronchery	eor	w21,w21,w10
2167*4757b351SPierre Pronchery.endif
2168*4757b351SPierre Pronchery.inst	0x053f31ef	//tbl z15.b,{z15.b},z31.b
2169*4757b351SPierre Pronchery.if	mixin == 1
2170*4757b351SPierre Pronchery	ror	w22,w22,#24
2171*4757b351SPierre Pronchery.endif
2172*4757b351SPierre Pronchery.inst	0x053f3063	//tbl z3.b,{z3.b},z31.b
2173*4757b351SPierre Pronchery.if	mixin == 1
2174*4757b351SPierre Pronchery	ror	w19,w19,#24
2175*4757b351SPierre Pronchery.endif
2176*4757b351SPierre Pronchery.inst	0x053f30e7	//tbl z7.b,{z7.b},z31.b
2177*4757b351SPierre Pronchery.if	mixin == 1
2178*4757b351SPierre Pronchery	ror	w20,w20,#24
2179*4757b351SPierre Pronchery.endif
2180*4757b351SPierre Pronchery.inst	0x053f316b	//tbl z11.b,{z11.b},z31.b
2181*4757b351SPierre Pronchery.if	mixin == 1
2182*4757b351SPierre Pronchery	ror	w21,w21,#24
2183*4757b351SPierre Pronchery.endif
2184*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
2185*4757b351SPierre Pronchery.if	mixin == 1
2186*4757b351SPierre Pronchery	add	w17,w17,w22
2187*4757b351SPierre Pronchery.endif
2188*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
2189*4757b351SPierre Pronchery.if	mixin == 1
2190*4757b351SPierre Pronchery	add	w18,w18,w19
2191*4757b351SPierre Pronchery.endif
2192*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
2193*4757b351SPierre Pronchery.if	mixin == 1
2194*4757b351SPierre Pronchery	add	w15,w15,w20
2195*4757b351SPierre Pronchery.endif
2196*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
2197*4757b351SPierre Pronchery.if	mixin == 1
2198*4757b351SPierre Pronchery	add	w16,w16,w21
2199*4757b351SPierre Pronchery.endif
2200*4757b351SPierre Pronchery.inst	0x04aa30a5	//eor z5.d,z5.d,z10.d
2201*4757b351SPierre Pronchery.if	mixin == 1
2202*4757b351SPierre Pronchery	eor	w12,w12,w17
2203*4757b351SPierre Pronchery.endif
2204*4757b351SPierre Pronchery.inst	0x04ae3129	//eor z9.d,z9.d,z14.d
2205*4757b351SPierre Pronchery.if	mixin == 1
2206*4757b351SPierre Pronchery	eor	w13,w13,w18
2207*4757b351SPierre Pronchery.endif
2208*4757b351SPierre Pronchery.inst	0x04a231ad	//eor z13.d,z13.d,z2.d
2209*4757b351SPierre Pronchery.if	mixin == 1
2210*4757b351SPierre Pronchery	eor	w14,w14,w15
2211*4757b351SPierre Pronchery.endif
2212*4757b351SPierre Pronchery.inst	0x04a63021	//eor z1.d,z1.d,z6.d
2213*4757b351SPierre Pronchery.if	mixin == 1
2214*4757b351SPierre Pronchery	eor	w11,w11,w16
2215*4757b351SPierre Pronchery.endif
2216*4757b351SPierre Pronchery.inst	0x04679cb1	//lsl z17.s,z5.s,7
2217*4757b351SPierre Pronchery.inst	0x04679d32	//lsl z18.s,z9.s,7
2218*4757b351SPierre Pronchery.inst	0x04679db3	//lsl z19.s,z13.s,7
2219*4757b351SPierre Pronchery.inst	0x04679c34	//lsl z20.s,z1.s,7
2220*4757b351SPierre Pronchery.inst	0x046794a5	//lsr z5.s,z5.s,25
2221*4757b351SPierre Pronchery.if	mixin == 1
2222*4757b351SPierre Pronchery	ror	w12,w12,25
2223*4757b351SPierre Pronchery.endif
2224*4757b351SPierre Pronchery.inst	0x04679529	//lsr z9.s,z9.s,25
2225*4757b351SPierre Pronchery.if	mixin == 1
2226*4757b351SPierre Pronchery	ror	w13,w13,25
2227*4757b351SPierre Pronchery.endif
2228*4757b351SPierre Pronchery.inst	0x046795ad	//lsr z13.s,z13.s,25
2229*4757b351SPierre Pronchery.if	mixin == 1
2230*4757b351SPierre Pronchery	ror	w14,w14,25
2231*4757b351SPierre Pronchery.endif
2232*4757b351SPierre Pronchery.inst	0x04679421	//lsr z1.s,z1.s,25
2233*4757b351SPierre Pronchery.if	mixin == 1
2234*4757b351SPierre Pronchery	ror	w11,w11,25
2235*4757b351SPierre Pronchery.endif
2236*4757b351SPierre Pronchery.inst	0x047130a5	//orr z5.d,z5.d,z17.d
2237*4757b351SPierre Pronchery.inst	0x04723129	//orr z9.d,z9.d,z18.d
2238*4757b351SPierre Pronchery.inst	0x047331ad	//orr z13.d,z13.d,z19.d
2239*4757b351SPierre Pronchery.inst	0x04743021	//orr z1.d,z1.d,z20.d
2240*4757b351SPierre Pronchery	sub	x6,x6,1
2241*4757b351SPierre Pronchery	cbnz	x6,10b
2242*4757b351SPierre Pronchery	lsr	x6,x28,#32
2243*4757b351SPierre Pronchery.inst	0x05a03b91	//dup z17.s,w28
2244*4757b351SPierre Pronchery.inst	0x05a038d2	//dup z18.s,w6
2245*4757b351SPierre Pronchery	lsr	x6,x29,#32
2246*4757b351SPierre Pronchery.inst	0x05a038d3	//dup z19.s,w6
2247*4757b351SPierre Pronchery	lsr	x6,x30,#32
2248*4757b351SPierre Pronchery.if	mixin == 1
2249*4757b351SPierre Pronchery	add	w7,w7,w23
2250*4757b351SPierre Pronchery.endif
2251*4757b351SPierre Pronchery.inst	0x04b90000	//add z0.s,z0.s,z25.s
2252*4757b351SPierre Pronchery.if	mixin == 1
2253*4757b351SPierre Pronchery	add	x8,x8,x23,lsr #32
2254*4757b351SPierre Pronchery.endif
2255*4757b351SPierre Pronchery.inst	0x04ba0084	//add z4.s,z4.s,z26.s
2256*4757b351SPierre Pronchery.if	mixin == 1
2257*4757b351SPierre Pronchery	add	x7,x7,x8,lsl #32  // pack
2258*4757b351SPierre Pronchery.endif
2259*4757b351SPierre Pronchery.if	mixin == 1
2260*4757b351SPierre Pronchery	add	w9,w9,w24
2261*4757b351SPierre Pronchery.endif
2262*4757b351SPierre Pronchery.inst	0x04bb0108	//add z8.s,z8.s,z27.s
2263*4757b351SPierre Pronchery.if	mixin == 1
2264*4757b351SPierre Pronchery	add	x10,x10,x24,lsr #32
2265*4757b351SPierre Pronchery.endif
2266*4757b351SPierre Pronchery.inst	0x04bc018c	//add z12.s,z12.s,z28.s
2267*4757b351SPierre Pronchery.if	mixin == 1
2268*4757b351SPierre Pronchery	add	x9,x9,x10,lsl #32  // pack
2269*4757b351SPierre Pronchery.endif
2270*4757b351SPierre Pronchery.if	mixin == 1
2271*4757b351SPierre Pronchery	ldp	x8,x10,[x1],#16
2272*4757b351SPierre Pronchery.endif
2273*4757b351SPierre Pronchery.if	mixin == 1
2274*4757b351SPierre Pronchery	add	w11,w11,w25
2275*4757b351SPierre Pronchery.endif
2276*4757b351SPierre Pronchery.inst	0x04bd0021	//add z1.s,z1.s,z29.s
2277*4757b351SPierre Pronchery.if	mixin == 1
2278*4757b351SPierre Pronchery	add	x12,x12,x25,lsr #32
2279*4757b351SPierre Pronchery.endif
2280*4757b351SPierre Pronchery.inst	0x04be00a5	//add z5.s,z5.s,z30.s
2281*4757b351SPierre Pronchery.if	mixin == 1
2282*4757b351SPierre Pronchery	add	x11,x11,x12,lsl #32  // pack
2283*4757b351SPierre Pronchery.endif
2284*4757b351SPierre Pronchery.if	mixin == 1
2285*4757b351SPierre Pronchery	add	w13,w13,w26
2286*4757b351SPierre Pronchery.endif
2287*4757b351SPierre Pronchery.inst	0x04b50129	//add z9.s,z9.s,z21.s
2288*4757b351SPierre Pronchery.if	mixin == 1
2289*4757b351SPierre Pronchery	add	x14,x14,x26,lsr #32
2290*4757b351SPierre Pronchery.endif
2291*4757b351SPierre Pronchery.inst	0x04b601ad	//add z13.s,z13.s,z22.s
2292*4757b351SPierre Pronchery.if	mixin == 1
2293*4757b351SPierre Pronchery	add	x13,x13,x14,lsl #32  // pack
2294*4757b351SPierre Pronchery.endif
2295*4757b351SPierre Pronchery.if	mixin == 1
2296*4757b351SPierre Pronchery	ldp	x12,x14,[x1],#16
2297*4757b351SPierre Pronchery.endif
2298*4757b351SPierre Pronchery.if	mixin == 1
2299*4757b351SPierre Pronchery	add	w15,w15,w27
2300*4757b351SPierre Pronchery.endif
2301*4757b351SPierre Pronchery.inst	0x04b70042	//add z2.s,z2.s,z23.s
2302*4757b351SPierre Pronchery.if	mixin == 1
2303*4757b351SPierre Pronchery	add	x16,x16,x27,lsr #32
2304*4757b351SPierre Pronchery.endif
2305*4757b351SPierre Pronchery.inst	0x04b800c6	//add z6.s,z6.s,z24.s
2306*4757b351SPierre Pronchery.if	mixin == 1
2307*4757b351SPierre Pronchery	add	x15,x15,x16,lsl #32  // pack
2308*4757b351SPierre Pronchery.endif
2309*4757b351SPierre Pronchery.if	mixin == 1
2310*4757b351SPierre Pronchery	add	w17,w17,w28
2311*4757b351SPierre Pronchery.endif
2312*4757b351SPierre Pronchery.inst	0x04b1014a	//add z10.s,z10.s,z17.s
2313*4757b351SPierre Pronchery.if	mixin == 1
2314*4757b351SPierre Pronchery	add	x18,x18,x28,lsr #32
2315*4757b351SPierre Pronchery.endif
2316*4757b351SPierre Pronchery.inst	0x04b201ce	//add z14.s,z14.s,z18.s
2317*4757b351SPierre Pronchery.if	mixin == 1
2318*4757b351SPierre Pronchery	add	x17,x17,x18,lsl #32  // pack
2319*4757b351SPierre Pronchery.endif
2320*4757b351SPierre Pronchery.if	mixin == 1
2321*4757b351SPierre Pronchery	ldp	x16,x18,[x1],#16
2322*4757b351SPierre Pronchery.endif
2323*4757b351SPierre Pronchery.inst	0x05a03bd4	//dup z20.s,w30
2324*4757b351SPierre Pronchery.inst	0x05a038d9	//dup z25.s,w6	// bak[15] not available for SVE
2325*4757b351SPierre Pronchery.if	mixin == 1
2326*4757b351SPierre Pronchery	add	w19,w19,w29
2327*4757b351SPierre Pronchery.endif
2328*4757b351SPierre Pronchery.inst	0x04b00063	//add z3.s,z3.s,z16.s
2329*4757b351SPierre Pronchery.if	mixin == 1
2330*4757b351SPierre Pronchery	add	x20,x20,x29,lsr #32
2331*4757b351SPierre Pronchery.endif
2332*4757b351SPierre Pronchery.inst	0x04b300e7	//add z7.s,z7.s,z19.s
2333*4757b351SPierre Pronchery.if	mixin == 1
2334*4757b351SPierre Pronchery	add	x19,x19,x20,lsl #32  // pack
2335*4757b351SPierre Pronchery.endif
2336*4757b351SPierre Pronchery.if	mixin == 1
2337*4757b351SPierre Pronchery	add	w21,w21,w30
2338*4757b351SPierre Pronchery.endif
2339*4757b351SPierre Pronchery.inst	0x04b4016b	//add z11.s,z11.s,z20.s
2340*4757b351SPierre Pronchery.if	mixin == 1
2341*4757b351SPierre Pronchery	add	x22,x22,x30,lsr #32
2342*4757b351SPierre Pronchery.endif
2343*4757b351SPierre Pronchery.inst	0x04b901ef	//add z15.s,z15.s,z25.s
2344*4757b351SPierre Pronchery.if	mixin == 1
2345*4757b351SPierre Pronchery	add	x21,x21,x22,lsl #32  // pack
2346*4757b351SPierre Pronchery.endif
2347*4757b351SPierre Pronchery.if	mixin == 1
2348*4757b351SPierre Pronchery	ldp	x20,x22,[x1],#16
2349*4757b351SPierre Pronchery.endif
2350*4757b351SPierre Pronchery#ifdef	__AARCH64EB__
2351*4757b351SPierre Pronchery	rev	x7,x7
2352*4757b351SPierre Pronchery.inst	0x05a48000	//revb z0.s,p0/m,z0.s
2353*4757b351SPierre Pronchery.inst	0x05a48084	//revb z4.s,p0/m,z4.s
2354*4757b351SPierre Pronchery	rev	x9,x9
2355*4757b351SPierre Pronchery.inst	0x05a48108	//revb z8.s,p0/m,z8.s
2356*4757b351SPierre Pronchery.inst	0x05a4818c	//revb z12.s,p0/m,z12.s
2357*4757b351SPierre Pronchery	rev	x11,x11
2358*4757b351SPierre Pronchery.inst	0x05a48021	//revb z1.s,p0/m,z1.s
2359*4757b351SPierre Pronchery.inst	0x05a480a5	//revb z5.s,p0/m,z5.s
2360*4757b351SPierre Pronchery	rev	x13,x13
2361*4757b351SPierre Pronchery.inst	0x05a48129	//revb z9.s,p0/m,z9.s
2362*4757b351SPierre Pronchery.inst	0x05a481ad	//revb z13.s,p0/m,z13.s
2363*4757b351SPierre Pronchery	rev	x15,x15
2364*4757b351SPierre Pronchery.inst	0x05a48042	//revb z2.s,p0/m,z2.s
2365*4757b351SPierre Pronchery.inst	0x05a480c6	//revb z6.s,p0/m,z6.s
2366*4757b351SPierre Pronchery	rev	x17,x17
2367*4757b351SPierre Pronchery.inst	0x05a4814a	//revb z10.s,p0/m,z10.s
2368*4757b351SPierre Pronchery.inst	0x05a481ce	//revb z14.s,p0/m,z14.s
2369*4757b351SPierre Pronchery	rev	x19,x19
2370*4757b351SPierre Pronchery.inst	0x05a48063	//revb z3.s,p0/m,z3.s
2371*4757b351SPierre Pronchery.inst	0x05a480e7	//revb z7.s,p0/m,z7.s
2372*4757b351SPierre Pronchery	rev	x21,x21
2373*4757b351SPierre Pronchery.inst	0x05a4816b	//revb z11.s,p0/m,z11.s
2374*4757b351SPierre Pronchery.inst	0x05a481ef	//revb z15.s,p0/m,z15.s
2375*4757b351SPierre Pronchery#endif
2376*4757b351SPierre Pronchery.if	mixin == 1
2377*4757b351SPierre Pronchery	add	x29,x29,#1
2378*4757b351SPierre Pronchery.endif
2379*4757b351SPierre Pronchery	cmp	x5,4
2380*4757b351SPierre Pronchery	b.ne	200f
2381*4757b351SPierre Pronchery.if	mixin == 1
2382*4757b351SPierre Pronchery	eor	x7,x7,x8
2383*4757b351SPierre Pronchery.endif
2384*4757b351SPierre Pronchery.if	mixin == 1
2385*4757b351SPierre Pronchery	eor	x9,x9,x10
2386*4757b351SPierre Pronchery.endif
2387*4757b351SPierre Pronchery.if	mixin == 1
2388*4757b351SPierre Pronchery	eor	x11,x11,x12
2389*4757b351SPierre Pronchery.endif
2390*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
2391*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
2392*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
2393*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
2394*4757b351SPierre Pronchery
2395*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
2396*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
2397*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
2398*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
2399*4757b351SPierre Pronchery
2400*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
2401*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
2402*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
2403*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
2404*4757b351SPierre Pronchery
2405*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
2406*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
2407*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
2408*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
2409*4757b351SPierre Pronchery.if	mixin == 1
2410*4757b351SPierre Pronchery	eor	x13,x13,x14
2411*4757b351SPierre Pronchery.endif
2412*4757b351SPierre Pronchery.if	mixin == 1
2413*4757b351SPierre Pronchery	eor	x15,x15,x16
2414*4757b351SPierre Pronchery.endif
2415*4757b351SPierre Pronchery.if	mixin == 1
2416*4757b351SPierre Pronchery	eor	x17,x17,x18
2417*4757b351SPierre Pronchery.endif
2418*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
2419*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
2420*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
2421*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
2422*4757b351SPierre Pronchery
2423*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
2424*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
2425*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
2426*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
2427*4757b351SPierre Pronchery
2428*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
2429*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
2430*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
2431*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
2432*4757b351SPierre Pronchery
2433*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
2434*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
2435*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
2436*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
2437*4757b351SPierre Pronchery.if	mixin == 1
2438*4757b351SPierre Pronchery	eor	x19,x19,x20
2439*4757b351SPierre Pronchery.endif
2440*4757b351SPierre Pronchery.if	mixin == 1
2441*4757b351SPierre Pronchery	eor	x21,x21,x22
2442*4757b351SPierre Pronchery.endif
2443*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
2444*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
2445*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
2446*4757b351SPierre Pronchery.inst	0x04b23021	//eor z1.d,z1.d,z18.d
2447*4757b351SPierre Pronchery.inst	0x04b33042	//eor z2.d,z2.d,z19.d
2448*4757b351SPierre Pronchery.inst	0x04b43063	//eor z3.d,z3.d,z20.d
2449*4757b351SPierre Pronchery.inst	0x04b53084	//eor z4.d,z4.d,z21.d
2450*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
2451*4757b351SPierre Pronchery.inst	0x04b730c6	//eor z6.d,z6.d,z23.d
2452*4757b351SPierre Pronchery.inst	0x04b830e7	//eor z7.d,z7.d,z24.d
2453*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
2454*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
2455*4757b351SPierre Pronchery.if	mixin == 1
2456*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
2457*4757b351SPierre Pronchery.endif
2458*4757b351SPierre Pronchery.inst	0x04b13108	//eor z8.d,z8.d,z17.d
2459*4757b351SPierre Pronchery.inst	0x04b23129	//eor z9.d,z9.d,z18.d
2460*4757b351SPierre Pronchery.if	mixin == 1
2461*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
2462*4757b351SPierre Pronchery.endif
2463*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
2464*4757b351SPierre Pronchery.inst	0x04b4316b	//eor z11.d,z11.d,z20.d
2465*4757b351SPierre Pronchery.if	mixin == 1
2466*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
2467*4757b351SPierre Pronchery.endif
2468*4757b351SPierre Pronchery.inst	0x04b5318c	//eor z12.d,z12.d,z21.d
2469*4757b351SPierre Pronchery.inst	0x04b631ad	//eor z13.d,z13.d,z22.d
2470*4757b351SPierre Pronchery.if	mixin == 1
2471*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
2472*4757b351SPierre Pronchery.endif
2473*4757b351SPierre Pronchery.inst	0x04b731ce	//eor z14.d,z14.d,z23.d
2474*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
2475*4757b351SPierre Pronchery	st1	{v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64
2476*4757b351SPierre Pronchery	st1	{v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64
2477*4757b351SPierre Pronchery	st1	{v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64
2478*4757b351SPierre Pronchery	st1	{v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64
2479*4757b351SPierre Pronchery	b	210f
2480*4757b351SPierre Pronchery200:
2481*4757b351SPierre Pronchery.inst	0x05a16011	//zip1 z17.s,z0.s,z1.s
2482*4757b351SPierre Pronchery.inst	0x05a16412	//zip2 z18.s,z0.s,z1.s
2483*4757b351SPierre Pronchery.inst	0x05a36053	//zip1 z19.s,z2.s,z3.s
2484*4757b351SPierre Pronchery.inst	0x05a36454	//zip2 z20.s,z2.s,z3.s
2485*4757b351SPierre Pronchery
2486*4757b351SPierre Pronchery.inst	0x05a56095	//zip1 z21.s,z4.s,z5.s
2487*4757b351SPierre Pronchery.inst	0x05a56496	//zip2 z22.s,z4.s,z5.s
2488*4757b351SPierre Pronchery.inst	0x05a760d7	//zip1 z23.s,z6.s,z7.s
2489*4757b351SPierre Pronchery.inst	0x05a764d8	//zip2 z24.s,z6.s,z7.s
2490*4757b351SPierre Pronchery
2491*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
2492*4757b351SPierre Pronchery.inst	0x05f36621	//zip2 z1.d,z17.d,z19.d
2493*4757b351SPierre Pronchery.inst	0x05f46242	//zip1 z2.d,z18.d,z20.d
2494*4757b351SPierre Pronchery.inst	0x05f46643	//zip2 z3.d,z18.d,z20.d
2495*4757b351SPierre Pronchery
2496*4757b351SPierre Pronchery.inst	0x05f762a4	//zip1 z4.d,z21.d,z23.d
2497*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
2498*4757b351SPierre Pronchery.inst	0x05f862c6	//zip1 z6.d,z22.d,z24.d
2499*4757b351SPierre Pronchery.inst	0x05f866c7	//zip2 z7.d,z22.d,z24.d
2500*4757b351SPierre Pronchery.if	mixin == 1
2501*4757b351SPierre Pronchery	eor	x7,x7,x8
2502*4757b351SPierre Pronchery.endif
2503*4757b351SPierre Pronchery.if	mixin == 1
2504*4757b351SPierre Pronchery	eor	x9,x9,x10
2505*4757b351SPierre Pronchery.endif
2506*4757b351SPierre Pronchery.inst	0x05a96111	//zip1 z17.s,z8.s,z9.s
2507*4757b351SPierre Pronchery.inst	0x05a96512	//zip2 z18.s,z8.s,z9.s
2508*4757b351SPierre Pronchery.inst	0x05ab6153	//zip1 z19.s,z10.s,z11.s
2509*4757b351SPierre Pronchery.inst	0x05ab6554	//zip2 z20.s,z10.s,z11.s
2510*4757b351SPierre Pronchery
2511*4757b351SPierre Pronchery.inst	0x05ad6195	//zip1 z21.s,z12.s,z13.s
2512*4757b351SPierre Pronchery.inst	0x05ad6596	//zip2 z22.s,z12.s,z13.s
2513*4757b351SPierre Pronchery.inst	0x05af61d7	//zip1 z23.s,z14.s,z15.s
2514*4757b351SPierre Pronchery.inst	0x05af65d8	//zip2 z24.s,z14.s,z15.s
2515*4757b351SPierre Pronchery
2516*4757b351SPierre Pronchery.inst	0x05f36228	//zip1 z8.d,z17.d,z19.d
2517*4757b351SPierre Pronchery.inst	0x05f36629	//zip2 z9.d,z17.d,z19.d
2518*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
2519*4757b351SPierre Pronchery.inst	0x05f4664b	//zip2 z11.d,z18.d,z20.d
2520*4757b351SPierre Pronchery
2521*4757b351SPierre Pronchery.inst	0x05f762ac	//zip1 z12.d,z21.d,z23.d
2522*4757b351SPierre Pronchery.inst	0x05f766ad	//zip2 z13.d,z21.d,z23.d
2523*4757b351SPierre Pronchery.inst	0x05f862ce	//zip1 z14.d,z22.d,z24.d
2524*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
2525*4757b351SPierre Pronchery.if	mixin == 1
2526*4757b351SPierre Pronchery	eor	x11,x11,x12
2527*4757b351SPierre Pronchery.endif
2528*4757b351SPierre Pronchery.if	mixin == 1
2529*4757b351SPierre Pronchery	eor	x13,x13,x14
2530*4757b351SPierre Pronchery.endif
2531*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
2532*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
2533*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
2534*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
2535*4757b351SPierre Pronchery
2536*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
2537*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
2538*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
2539*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
2540*4757b351SPierre Pronchery
2541*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
2542*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
2543*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
2544*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
2545*4757b351SPierre Pronchery
2546*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
2547*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
2548*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
2549*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
2550*4757b351SPierre Pronchery.if	mixin == 1
2551*4757b351SPierre Pronchery	eor	x15,x15,x16
2552*4757b351SPierre Pronchery.endif
2553*4757b351SPierre Pronchery.if	mixin == 1
2554*4757b351SPierre Pronchery	eor	x17,x17,x18
2555*4757b351SPierre Pronchery.endif
2556*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
2557*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
2558*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
2559*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
2560*4757b351SPierre Pronchery
2561*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
2562*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
2563*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
2564*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
2565*4757b351SPierre Pronchery
2566*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
2567*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
2568*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
2569*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
2570*4757b351SPierre Pronchery
2571*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
2572*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
2573*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
2574*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
2575*4757b351SPierre Pronchery.if	mixin == 1
2576*4757b351SPierre Pronchery	eor	x19,x19,x20
2577*4757b351SPierre Pronchery.endif
2578*4757b351SPierre Pronchery.if	mixin == 1
2579*4757b351SPierre Pronchery	eor	x21,x21,x22
2580*4757b351SPierre Pronchery.endif
2581*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
2582*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
2583*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
2584*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
2585*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
2586*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
2587*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
2588*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
2589*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
2590*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
2591*4757b351SPierre Pronchery.inst	0x04b23084	//eor z4.d,z4.d,z18.d
2592*4757b351SPierre Pronchery.inst	0x04b33108	//eor z8.d,z8.d,z19.d
2593*4757b351SPierre Pronchery.inst	0x04b4318c	//eor z12.d,z12.d,z20.d
2594*4757b351SPierre Pronchery.inst	0x04b53021	//eor z1.d,z1.d,z21.d
2595*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
2596*4757b351SPierre Pronchery.inst	0x04b73129	//eor z9.d,z9.d,z23.d
2597*4757b351SPierre Pronchery.inst	0x04b831ad	//eor z13.d,z13.d,z24.d
2598*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
2599*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
2600*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
2601*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
2602*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
2603*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
2604*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
2605*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
2606*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
2607*4757b351SPierre Pronchery.if	mixin == 1
2608*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
2609*4757b351SPierre Pronchery.endif
2610*4757b351SPierre Pronchery.inst	0x04b13042	//eor z2.d,z2.d,z17.d
2611*4757b351SPierre Pronchery.inst	0x04b230c6	//eor z6.d,z6.d,z18.d
2612*4757b351SPierre Pronchery.if	mixin == 1
2613*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
2614*4757b351SPierre Pronchery.endif
2615*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
2616*4757b351SPierre Pronchery.inst	0x04b431ce	//eor z14.d,z14.d,z20.d
2617*4757b351SPierre Pronchery.if	mixin == 1
2618*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
2619*4757b351SPierre Pronchery.endif
2620*4757b351SPierre Pronchery.inst	0x04b53063	//eor z3.d,z3.d,z21.d
2621*4757b351SPierre Pronchery.inst	0x04b630e7	//eor z7.d,z7.d,z22.d
2622*4757b351SPierre Pronchery.if	mixin == 1
2623*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
2624*4757b351SPierre Pronchery.endif
2625*4757b351SPierre Pronchery.inst	0x04b7316b	//eor z11.d,z11.d,z23.d
2626*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
2627*4757b351SPierre Pronchery.inst	0xe540e000	//st1w {z0.s},p0,[x0,#0,MUL VL]
2628*4757b351SPierre Pronchery.inst	0xe541e004	//st1w {z4.s},p0,[x0,#1,MUL VL]
2629*4757b351SPierre Pronchery.inst	0xe542e008	//st1w {z8.s},p0,[x0,#2,MUL VL]
2630*4757b351SPierre Pronchery.inst	0xe543e00c	//st1w {z12.s},p0,[x0,#3,MUL VL]
2631*4757b351SPierre Pronchery.inst	0xe544e001	//st1w {z1.s},p0,[x0,#4,MUL VL]
2632*4757b351SPierre Pronchery.inst	0xe545e005	//st1w {z5.s},p0,[x0,#5,MUL VL]
2633*4757b351SPierre Pronchery.inst	0xe546e009	//st1w {z9.s},p0,[x0,#6,MUL VL]
2634*4757b351SPierre Pronchery.inst	0xe547e00d	//st1w {z13.s},p0,[x0,#7,MUL VL]
2635*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
2636*4757b351SPierre Pronchery.inst	0xe540e002	//st1w {z2.s},p0,[x0,#0,MUL VL]
2637*4757b351SPierre Pronchery.inst	0xe541e006	//st1w {z6.s},p0,[x0,#1,MUL VL]
2638*4757b351SPierre Pronchery.inst	0xe542e00a	//st1w {z10.s},p0,[x0,#2,MUL VL]
2639*4757b351SPierre Pronchery.inst	0xe543e00e	//st1w {z14.s},p0,[x0,#3,MUL VL]
2640*4757b351SPierre Pronchery.inst	0xe544e003	//st1w {z3.s},p0,[x0,#4,MUL VL]
2641*4757b351SPierre Pronchery.inst	0xe545e007	//st1w {z7.s},p0,[x0,#5,MUL VL]
2642*4757b351SPierre Pronchery.inst	0xe546e00b	//st1w {z11.s},p0,[x0,#6,MUL VL]
2643*4757b351SPierre Pronchery.inst	0xe547e00f	//st1w {z15.s},p0,[x0,#7,MUL VL]
2644*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
2645*4757b351SPierre Pronchery210:
2646*4757b351SPierre Pronchery.inst	0x04b0e3fd	//incw x29, ALL, MUL #1
2647*4757b351SPierre Pronchery	subs	x2,x2,64
2648*4757b351SPierre Pronchery	b.gt	100b
2649*4757b351SPierre Pronchery	b	110f
2650*4757b351SPierre Pronchery101:
2651*4757b351SPierre Pronchery	mixin=0
2652*4757b351SPierre Pronchery	lsr	x8,x23,#32
2653*4757b351SPierre Pronchery.inst	0x05a03ae0	//dup z0.s,w23
2654*4757b351SPierre Pronchery.inst	0x05a03af9	//dup z25.s,w23
2655*4757b351SPierre Pronchery.if	mixin == 1
2656*4757b351SPierre Pronchery	mov	w7,w23
2657*4757b351SPierre Pronchery.endif
2658*4757b351SPierre Pronchery.inst	0x05a03904	//dup z4.s,w8
2659*4757b351SPierre Pronchery.inst	0x05a0391a	//dup z26.s,w8
2660*4757b351SPierre Pronchery	lsr	x10,x24,#32
2661*4757b351SPierre Pronchery.inst	0x05a03b08	//dup z8.s,w24
2662*4757b351SPierre Pronchery.inst	0x05a03b1b	//dup z27.s,w24
2663*4757b351SPierre Pronchery.if	mixin == 1
2664*4757b351SPierre Pronchery	mov	w9,w24
2665*4757b351SPierre Pronchery.endif
2666*4757b351SPierre Pronchery.inst	0x05a0394c	//dup z12.s,w10
2667*4757b351SPierre Pronchery.inst	0x05a0395c	//dup z28.s,w10
2668*4757b351SPierre Pronchery	lsr	x12,x25,#32
2669*4757b351SPierre Pronchery.inst	0x05a03b21	//dup z1.s,w25
2670*4757b351SPierre Pronchery.inst	0x05a03b3d	//dup z29.s,w25
2671*4757b351SPierre Pronchery.if	mixin == 1
2672*4757b351SPierre Pronchery	mov	w11,w25
2673*4757b351SPierre Pronchery.endif
2674*4757b351SPierre Pronchery.inst	0x05a03985	//dup z5.s,w12
2675*4757b351SPierre Pronchery.inst	0x05a0399e	//dup z30.s,w12
2676*4757b351SPierre Pronchery	lsr	x14,x26,#32
2677*4757b351SPierre Pronchery.inst	0x05a03b49	//dup z9.s,w26
2678*4757b351SPierre Pronchery.inst	0x05a03b55	//dup z21.s,w26
2679*4757b351SPierre Pronchery.if	mixin == 1
2680*4757b351SPierre Pronchery	mov	w13,w26
2681*4757b351SPierre Pronchery.endif
2682*4757b351SPierre Pronchery.inst	0x05a039cd	//dup z13.s,w14
2683*4757b351SPierre Pronchery.inst	0x05a039d6	//dup z22.s,w14
2684*4757b351SPierre Pronchery	lsr	x16,x27,#32
2685*4757b351SPierre Pronchery.inst	0x05a03b62	//dup z2.s,w27
2686*4757b351SPierre Pronchery.inst	0x05a03b77	//dup z23.s,w27
2687*4757b351SPierre Pronchery.if	mixin == 1
2688*4757b351SPierre Pronchery	mov	w15,w27
2689*4757b351SPierre Pronchery.endif
2690*4757b351SPierre Pronchery.inst	0x05a03a06	//dup z6.s,w16
2691*4757b351SPierre Pronchery.inst	0x05a03a18	//dup z24.s,w16
2692*4757b351SPierre Pronchery	lsr	x18,x28,#32
2693*4757b351SPierre Pronchery.inst	0x05a03b8a	//dup z10.s,w28
2694*4757b351SPierre Pronchery.if	mixin == 1
2695*4757b351SPierre Pronchery	mov	w17,w28
2696*4757b351SPierre Pronchery.endif
2697*4757b351SPierre Pronchery.inst	0x05a03a4e	//dup z14.s,w18
2698*4757b351SPierre Pronchery	lsr	x22,x30,#32
2699*4757b351SPierre Pronchery.inst	0x05a03bcb	//dup z11.s,w30
2700*4757b351SPierre Pronchery.if	mixin == 1
2701*4757b351SPierre Pronchery	mov	w21,w30
2702*4757b351SPierre Pronchery.endif
2703*4757b351SPierre Pronchery.inst	0x05a03acf	//dup z15.s,w22
2704*4757b351SPierre Pronchery.if	mixin == 1
2705*4757b351SPierre Pronchery	add	w20,w29,#1
2706*4757b351SPierre Pronchery	mov	w19,w29
2707*4757b351SPierre Pronchery.inst	0x04a14690	//index z16.s,w20,1
2708*4757b351SPierre Pronchery.inst	0x04a14683	//index z3.s,w20,1
2709*4757b351SPierre Pronchery.else
2710*4757b351SPierre Pronchery.inst	0x04a147b0	//index z16.s,w29,1
2711*4757b351SPierre Pronchery.inst	0x04a147a3	//index z3.s,w29,1
2712*4757b351SPierre Pronchery.endif
2713*4757b351SPierre Pronchery	lsr	x20,x29,#32
2714*4757b351SPierre Pronchery.inst	0x05a03a87	//dup z7.s,w20
2715*4757b351SPierre Pronchery	mov	x6,#10
2716*4757b351SPierre Pronchery10:
2717*4757b351SPierre Pronchery.align	5
2718*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
2719*4757b351SPierre Pronchery.if	mixin == 1
2720*4757b351SPierre Pronchery	add	w7,w7,w11
2721*4757b351SPierre Pronchery.endif
2722*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
2723*4757b351SPierre Pronchery.if	mixin == 1
2724*4757b351SPierre Pronchery	add	w8,w8,w12
2725*4757b351SPierre Pronchery.endif
2726*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
2727*4757b351SPierre Pronchery.if	mixin == 1
2728*4757b351SPierre Pronchery	add	w9,w9,w13
2729*4757b351SPierre Pronchery.endif
2730*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
2731*4757b351SPierre Pronchery.if	mixin == 1
2732*4757b351SPierre Pronchery	add	w10,w10,w14
2733*4757b351SPierre Pronchery.endif
2734*4757b351SPierre Pronchery.inst	0x04a03063	//eor z3.d,z3.d,z0.d
2735*4757b351SPierre Pronchery.if	mixin == 1
2736*4757b351SPierre Pronchery	eor	w19,w19,w7
2737*4757b351SPierre Pronchery.endif
2738*4757b351SPierre Pronchery.inst	0x04a430e7	//eor z7.d,z7.d,z4.d
2739*4757b351SPierre Pronchery.if	mixin == 1
2740*4757b351SPierre Pronchery	eor	w20,w20,w8
2741*4757b351SPierre Pronchery.endif
2742*4757b351SPierre Pronchery.inst	0x04a8316b	//eor z11.d,z11.d,z8.d
2743*4757b351SPierre Pronchery.if	mixin == 1
2744*4757b351SPierre Pronchery	eor	w21,w21,w9
2745*4757b351SPierre Pronchery.endif
2746*4757b351SPierre Pronchery.inst	0x04ac31ef	//eor z15.d,z15.d,z12.d
2747*4757b351SPierre Pronchery.if	mixin == 1
2748*4757b351SPierre Pronchery	eor	w22,w22,w10
2749*4757b351SPierre Pronchery.endif
2750*4757b351SPierre Pronchery.inst	0x05a58063	//revh z3.s,p0/m,z3.s
2751*4757b351SPierre Pronchery.if	mixin == 1
2752*4757b351SPierre Pronchery	ror	w19,w19,#16
2753*4757b351SPierre Pronchery.endif
2754*4757b351SPierre Pronchery.inst	0x05a580e7	//revh z7.s,p0/m,z7.s
2755*4757b351SPierre Pronchery.if	mixin == 1
2756*4757b351SPierre Pronchery	ror	w20,w20,#16
2757*4757b351SPierre Pronchery.endif
2758*4757b351SPierre Pronchery.inst	0x05a5816b	//revh z11.s,p0/m,z11.s
2759*4757b351SPierre Pronchery.if	mixin == 1
2760*4757b351SPierre Pronchery	ror	w21,w21,#16
2761*4757b351SPierre Pronchery.endif
2762*4757b351SPierre Pronchery.inst	0x05a581ef	//revh z15.s,p0/m,z15.s
2763*4757b351SPierre Pronchery.if	mixin == 1
2764*4757b351SPierre Pronchery	ror	w22,w22,#16
2765*4757b351SPierre Pronchery.endif
2766*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
2767*4757b351SPierre Pronchery.if	mixin == 1
2768*4757b351SPierre Pronchery	add	w15,w15,w19
2769*4757b351SPierre Pronchery.endif
2770*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
2771*4757b351SPierre Pronchery.if	mixin == 1
2772*4757b351SPierre Pronchery	add	w16,w16,w20
2773*4757b351SPierre Pronchery.endif
2774*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
2775*4757b351SPierre Pronchery.if	mixin == 1
2776*4757b351SPierre Pronchery	add	w17,w17,w21
2777*4757b351SPierre Pronchery.endif
2778*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
2779*4757b351SPierre Pronchery.if	mixin == 1
2780*4757b351SPierre Pronchery	add	w18,w18,w22
2781*4757b351SPierre Pronchery.endif
2782*4757b351SPierre Pronchery.inst	0x04a23021	//eor z1.d,z1.d,z2.d
2783*4757b351SPierre Pronchery.if	mixin == 1
2784*4757b351SPierre Pronchery	eor	w11,w11,w15
2785*4757b351SPierre Pronchery.endif
2786*4757b351SPierre Pronchery.inst	0x04a630a5	//eor z5.d,z5.d,z6.d
2787*4757b351SPierre Pronchery.if	mixin == 1
2788*4757b351SPierre Pronchery	eor	w12,w12,w16
2789*4757b351SPierre Pronchery.endif
2790*4757b351SPierre Pronchery.inst	0x04aa3129	//eor z9.d,z9.d,z10.d
2791*4757b351SPierre Pronchery.if	mixin == 1
2792*4757b351SPierre Pronchery	eor	w13,w13,w17
2793*4757b351SPierre Pronchery.endif
2794*4757b351SPierre Pronchery.inst	0x04ae31ad	//eor z13.d,z13.d,z14.d
2795*4757b351SPierre Pronchery.if	mixin == 1
2796*4757b351SPierre Pronchery	eor	w14,w14,w18
2797*4757b351SPierre Pronchery.endif
2798*4757b351SPierre Pronchery.inst	0x046c9c31	//lsl z17.s,z1.s,12
2799*4757b351SPierre Pronchery.inst	0x046c9cb2	//lsl z18.s,z5.s,12
2800*4757b351SPierre Pronchery.inst	0x046c9d33	//lsl z19.s,z9.s,12
2801*4757b351SPierre Pronchery.inst	0x046c9db4	//lsl z20.s,z13.s,12
2802*4757b351SPierre Pronchery.inst	0x046c9421	//lsr z1.s,z1.s,20
2803*4757b351SPierre Pronchery.if	mixin == 1
2804*4757b351SPierre Pronchery	ror	w11,w11,20
2805*4757b351SPierre Pronchery.endif
2806*4757b351SPierre Pronchery.inst	0x046c94a5	//lsr z5.s,z5.s,20
2807*4757b351SPierre Pronchery.if	mixin == 1
2808*4757b351SPierre Pronchery	ror	w12,w12,20
2809*4757b351SPierre Pronchery.endif
2810*4757b351SPierre Pronchery.inst	0x046c9529	//lsr z9.s,z9.s,20
2811*4757b351SPierre Pronchery.if	mixin == 1
2812*4757b351SPierre Pronchery	ror	w13,w13,20
2813*4757b351SPierre Pronchery.endif
2814*4757b351SPierre Pronchery.inst	0x046c95ad	//lsr z13.s,z13.s,20
2815*4757b351SPierre Pronchery.if	mixin == 1
2816*4757b351SPierre Pronchery	ror	w14,w14,20
2817*4757b351SPierre Pronchery.endif
2818*4757b351SPierre Pronchery.inst	0x04713021	//orr z1.d,z1.d,z17.d
2819*4757b351SPierre Pronchery.inst	0x047230a5	//orr z5.d,z5.d,z18.d
2820*4757b351SPierre Pronchery.inst	0x04733129	//orr z9.d,z9.d,z19.d
2821*4757b351SPierre Pronchery.inst	0x047431ad	//orr z13.d,z13.d,z20.d
2822*4757b351SPierre Pronchery.inst	0x04a10000	//add z0.s,z0.s,z1.s
2823*4757b351SPierre Pronchery.if	mixin == 1
2824*4757b351SPierre Pronchery	add	w7,w7,w11
2825*4757b351SPierre Pronchery.endif
2826*4757b351SPierre Pronchery.inst	0x04a50084	//add z4.s,z4.s,z5.s
2827*4757b351SPierre Pronchery.if	mixin == 1
2828*4757b351SPierre Pronchery	add	w8,w8,w12
2829*4757b351SPierre Pronchery.endif
2830*4757b351SPierre Pronchery.inst	0x04a90108	//add z8.s,z8.s,z9.s
2831*4757b351SPierre Pronchery.if	mixin == 1
2832*4757b351SPierre Pronchery	add	w9,w9,w13
2833*4757b351SPierre Pronchery.endif
2834*4757b351SPierre Pronchery.inst	0x04ad018c	//add z12.s,z12.s,z13.s
2835*4757b351SPierre Pronchery.if	mixin == 1
2836*4757b351SPierre Pronchery	add	w10,w10,w14
2837*4757b351SPierre Pronchery.endif
2838*4757b351SPierre Pronchery.inst	0x04a03063	//eor z3.d,z3.d,z0.d
2839*4757b351SPierre Pronchery.if	mixin == 1
2840*4757b351SPierre Pronchery	eor	w19,w19,w7
2841*4757b351SPierre Pronchery.endif
2842*4757b351SPierre Pronchery.inst	0x04a430e7	//eor z7.d,z7.d,z4.d
2843*4757b351SPierre Pronchery.if	mixin == 1
2844*4757b351SPierre Pronchery	eor	w20,w20,w8
2845*4757b351SPierre Pronchery.endif
2846*4757b351SPierre Pronchery.inst	0x04a8316b	//eor z11.d,z11.d,z8.d
2847*4757b351SPierre Pronchery.if	mixin == 1
2848*4757b351SPierre Pronchery	eor	w21,w21,w9
2849*4757b351SPierre Pronchery.endif
2850*4757b351SPierre Pronchery.inst	0x04ac31ef	//eor z15.d,z15.d,z12.d
2851*4757b351SPierre Pronchery.if	mixin == 1
2852*4757b351SPierre Pronchery	eor	w22,w22,w10
2853*4757b351SPierre Pronchery.endif
2854*4757b351SPierre Pronchery.inst	0x053f3063	//tbl z3.b,{z3.b},z31.b
2855*4757b351SPierre Pronchery.if	mixin == 1
2856*4757b351SPierre Pronchery	ror	w19,w19,#24
2857*4757b351SPierre Pronchery.endif
2858*4757b351SPierre Pronchery.inst	0x053f30e7	//tbl z7.b,{z7.b},z31.b
2859*4757b351SPierre Pronchery.if	mixin == 1
2860*4757b351SPierre Pronchery	ror	w20,w20,#24
2861*4757b351SPierre Pronchery.endif
2862*4757b351SPierre Pronchery.inst	0x053f316b	//tbl z11.b,{z11.b},z31.b
2863*4757b351SPierre Pronchery.if	mixin == 1
2864*4757b351SPierre Pronchery	ror	w21,w21,#24
2865*4757b351SPierre Pronchery.endif
2866*4757b351SPierre Pronchery.inst	0x053f31ef	//tbl z15.b,{z15.b},z31.b
2867*4757b351SPierre Pronchery.if	mixin == 1
2868*4757b351SPierre Pronchery	ror	w22,w22,#24
2869*4757b351SPierre Pronchery.endif
2870*4757b351SPierre Pronchery.inst	0x04a30042	//add z2.s,z2.s,z3.s
2871*4757b351SPierre Pronchery.if	mixin == 1
2872*4757b351SPierre Pronchery	add	w15,w15,w19
2873*4757b351SPierre Pronchery.endif
2874*4757b351SPierre Pronchery.inst	0x04a700c6	//add z6.s,z6.s,z7.s
2875*4757b351SPierre Pronchery.if	mixin == 1
2876*4757b351SPierre Pronchery	add	w16,w16,w20
2877*4757b351SPierre Pronchery.endif
2878*4757b351SPierre Pronchery.inst	0x04ab014a	//add z10.s,z10.s,z11.s
2879*4757b351SPierre Pronchery.if	mixin == 1
2880*4757b351SPierre Pronchery	add	w17,w17,w21
2881*4757b351SPierre Pronchery.endif
2882*4757b351SPierre Pronchery.inst	0x04af01ce	//add z14.s,z14.s,z15.s
2883*4757b351SPierre Pronchery.if	mixin == 1
2884*4757b351SPierre Pronchery	add	w18,w18,w22
2885*4757b351SPierre Pronchery.endif
2886*4757b351SPierre Pronchery.inst	0x04a23021	//eor z1.d,z1.d,z2.d
2887*4757b351SPierre Pronchery.if	mixin == 1
2888*4757b351SPierre Pronchery	eor	w11,w11,w15
2889*4757b351SPierre Pronchery.endif
2890*4757b351SPierre Pronchery.inst	0x04a630a5	//eor z5.d,z5.d,z6.d
2891*4757b351SPierre Pronchery.if	mixin == 1
2892*4757b351SPierre Pronchery	eor	w12,w12,w16
2893*4757b351SPierre Pronchery.endif
2894*4757b351SPierre Pronchery.inst	0x04aa3129	//eor z9.d,z9.d,z10.d
2895*4757b351SPierre Pronchery.if	mixin == 1
2896*4757b351SPierre Pronchery	eor	w13,w13,w17
2897*4757b351SPierre Pronchery.endif
2898*4757b351SPierre Pronchery.inst	0x04ae31ad	//eor z13.d,z13.d,z14.d
2899*4757b351SPierre Pronchery.if	mixin == 1
2900*4757b351SPierre Pronchery	eor	w14,w14,w18
2901*4757b351SPierre Pronchery.endif
2902*4757b351SPierre Pronchery.inst	0x04679c31	//lsl z17.s,z1.s,7
2903*4757b351SPierre Pronchery.inst	0x04679cb2	//lsl z18.s,z5.s,7
2904*4757b351SPierre Pronchery.inst	0x04679d33	//lsl z19.s,z9.s,7
2905*4757b351SPierre Pronchery.inst	0x04679db4	//lsl z20.s,z13.s,7
2906*4757b351SPierre Pronchery.inst	0x04679421	//lsr z1.s,z1.s,25
2907*4757b351SPierre Pronchery.if	mixin == 1
2908*4757b351SPierre Pronchery	ror	w11,w11,25
2909*4757b351SPierre Pronchery.endif
2910*4757b351SPierre Pronchery.inst	0x046794a5	//lsr z5.s,z5.s,25
2911*4757b351SPierre Pronchery.if	mixin == 1
2912*4757b351SPierre Pronchery	ror	w12,w12,25
2913*4757b351SPierre Pronchery.endif
2914*4757b351SPierre Pronchery.inst	0x04679529	//lsr z9.s,z9.s,25
2915*4757b351SPierre Pronchery.if	mixin == 1
2916*4757b351SPierre Pronchery	ror	w13,w13,25
2917*4757b351SPierre Pronchery.endif
2918*4757b351SPierre Pronchery.inst	0x046795ad	//lsr z13.s,z13.s,25
2919*4757b351SPierre Pronchery.if	mixin == 1
2920*4757b351SPierre Pronchery	ror	w14,w14,25
2921*4757b351SPierre Pronchery.endif
2922*4757b351SPierre Pronchery.inst	0x04713021	//orr z1.d,z1.d,z17.d
2923*4757b351SPierre Pronchery.inst	0x047230a5	//orr z5.d,z5.d,z18.d
2924*4757b351SPierre Pronchery.inst	0x04733129	//orr z9.d,z9.d,z19.d
2925*4757b351SPierre Pronchery.inst	0x047431ad	//orr z13.d,z13.d,z20.d
2926*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
2927*4757b351SPierre Pronchery.if	mixin == 1
2928*4757b351SPierre Pronchery	add	w7,w7,w12
2929*4757b351SPierre Pronchery.endif
2930*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
2931*4757b351SPierre Pronchery.if	mixin == 1
2932*4757b351SPierre Pronchery	add	w8,w8,w13
2933*4757b351SPierre Pronchery.endif
2934*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
2935*4757b351SPierre Pronchery.if	mixin == 1
2936*4757b351SPierre Pronchery	add	w9,w9,w14
2937*4757b351SPierre Pronchery.endif
2938*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
2939*4757b351SPierre Pronchery.if	mixin == 1
2940*4757b351SPierre Pronchery	add	w10,w10,w11
2941*4757b351SPierre Pronchery.endif
2942*4757b351SPierre Pronchery.inst	0x04a031ef	//eor z15.d,z15.d,z0.d
2943*4757b351SPierre Pronchery.if	mixin == 1
2944*4757b351SPierre Pronchery	eor	w22,w22,w7
2945*4757b351SPierre Pronchery.endif
2946*4757b351SPierre Pronchery.inst	0x04a43063	//eor z3.d,z3.d,z4.d
2947*4757b351SPierre Pronchery.if	mixin == 1
2948*4757b351SPierre Pronchery	eor	w19,w19,w8
2949*4757b351SPierre Pronchery.endif
2950*4757b351SPierre Pronchery.inst	0x04a830e7	//eor z7.d,z7.d,z8.d
2951*4757b351SPierre Pronchery.if	mixin == 1
2952*4757b351SPierre Pronchery	eor	w20,w20,w9
2953*4757b351SPierre Pronchery.endif
2954*4757b351SPierre Pronchery.inst	0x04ac316b	//eor z11.d,z11.d,z12.d
2955*4757b351SPierre Pronchery.if	mixin == 1
2956*4757b351SPierre Pronchery	eor	w21,w21,w10
2957*4757b351SPierre Pronchery.endif
2958*4757b351SPierre Pronchery.inst	0x05a581ef	//revh z15.s,p0/m,z15.s
2959*4757b351SPierre Pronchery.if	mixin == 1
2960*4757b351SPierre Pronchery	ror	w22,w22,#16
2961*4757b351SPierre Pronchery.endif
2962*4757b351SPierre Pronchery.inst	0x05a58063	//revh z3.s,p0/m,z3.s
2963*4757b351SPierre Pronchery.if	mixin == 1
2964*4757b351SPierre Pronchery	ror	w19,w19,#16
2965*4757b351SPierre Pronchery.endif
2966*4757b351SPierre Pronchery.inst	0x05a580e7	//revh z7.s,p0/m,z7.s
2967*4757b351SPierre Pronchery.if	mixin == 1
2968*4757b351SPierre Pronchery	ror	w20,w20,#16
2969*4757b351SPierre Pronchery.endif
2970*4757b351SPierre Pronchery.inst	0x05a5816b	//revh z11.s,p0/m,z11.s
2971*4757b351SPierre Pronchery.if	mixin == 1
2972*4757b351SPierre Pronchery	ror	w21,w21,#16
2973*4757b351SPierre Pronchery.endif
2974*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
2975*4757b351SPierre Pronchery.if	mixin == 1
2976*4757b351SPierre Pronchery	add	w17,w17,w22
2977*4757b351SPierre Pronchery.endif
2978*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
2979*4757b351SPierre Pronchery.if	mixin == 1
2980*4757b351SPierre Pronchery	add	w18,w18,w19
2981*4757b351SPierre Pronchery.endif
2982*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
2983*4757b351SPierre Pronchery.if	mixin == 1
2984*4757b351SPierre Pronchery	add	w15,w15,w20
2985*4757b351SPierre Pronchery.endif
2986*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
2987*4757b351SPierre Pronchery.if	mixin == 1
2988*4757b351SPierre Pronchery	add	w16,w16,w21
2989*4757b351SPierre Pronchery.endif
2990*4757b351SPierre Pronchery.inst	0x04aa30a5	//eor z5.d,z5.d,z10.d
2991*4757b351SPierre Pronchery.if	mixin == 1
2992*4757b351SPierre Pronchery	eor	w12,w12,w17
2993*4757b351SPierre Pronchery.endif
2994*4757b351SPierre Pronchery.inst	0x04ae3129	//eor z9.d,z9.d,z14.d
2995*4757b351SPierre Pronchery.if	mixin == 1
2996*4757b351SPierre Pronchery	eor	w13,w13,w18
2997*4757b351SPierre Pronchery.endif
2998*4757b351SPierre Pronchery.inst	0x04a231ad	//eor z13.d,z13.d,z2.d
2999*4757b351SPierre Pronchery.if	mixin == 1
3000*4757b351SPierre Pronchery	eor	w14,w14,w15
3001*4757b351SPierre Pronchery.endif
3002*4757b351SPierre Pronchery.inst	0x04a63021	//eor z1.d,z1.d,z6.d
3003*4757b351SPierre Pronchery.if	mixin == 1
3004*4757b351SPierre Pronchery	eor	w11,w11,w16
3005*4757b351SPierre Pronchery.endif
3006*4757b351SPierre Pronchery.inst	0x046c9cb1	//lsl z17.s,z5.s,12
3007*4757b351SPierre Pronchery.inst	0x046c9d32	//lsl z18.s,z9.s,12
3008*4757b351SPierre Pronchery.inst	0x046c9db3	//lsl z19.s,z13.s,12
3009*4757b351SPierre Pronchery.inst	0x046c9c34	//lsl z20.s,z1.s,12
3010*4757b351SPierre Pronchery.inst	0x046c94a5	//lsr z5.s,z5.s,20
3011*4757b351SPierre Pronchery.if	mixin == 1
3012*4757b351SPierre Pronchery	ror	w12,w12,20
3013*4757b351SPierre Pronchery.endif
3014*4757b351SPierre Pronchery.inst	0x046c9529	//lsr z9.s,z9.s,20
3015*4757b351SPierre Pronchery.if	mixin == 1
3016*4757b351SPierre Pronchery	ror	w13,w13,20
3017*4757b351SPierre Pronchery.endif
3018*4757b351SPierre Pronchery.inst	0x046c95ad	//lsr z13.s,z13.s,20
3019*4757b351SPierre Pronchery.if	mixin == 1
3020*4757b351SPierre Pronchery	ror	w14,w14,20
3021*4757b351SPierre Pronchery.endif
3022*4757b351SPierre Pronchery.inst	0x046c9421	//lsr z1.s,z1.s,20
3023*4757b351SPierre Pronchery.if	mixin == 1
3024*4757b351SPierre Pronchery	ror	w11,w11,20
3025*4757b351SPierre Pronchery.endif
3026*4757b351SPierre Pronchery.inst	0x047130a5	//orr z5.d,z5.d,z17.d
3027*4757b351SPierre Pronchery.inst	0x04723129	//orr z9.d,z9.d,z18.d
3028*4757b351SPierre Pronchery.inst	0x047331ad	//orr z13.d,z13.d,z19.d
3029*4757b351SPierre Pronchery.inst	0x04743021	//orr z1.d,z1.d,z20.d
3030*4757b351SPierre Pronchery.inst	0x04a50000	//add z0.s,z0.s,z5.s
3031*4757b351SPierre Pronchery.if	mixin == 1
3032*4757b351SPierre Pronchery	add	w7,w7,w12
3033*4757b351SPierre Pronchery.endif
3034*4757b351SPierre Pronchery.inst	0x04a90084	//add z4.s,z4.s,z9.s
3035*4757b351SPierre Pronchery.if	mixin == 1
3036*4757b351SPierre Pronchery	add	w8,w8,w13
3037*4757b351SPierre Pronchery.endif
3038*4757b351SPierre Pronchery.inst	0x04ad0108	//add z8.s,z8.s,z13.s
3039*4757b351SPierre Pronchery.if	mixin == 1
3040*4757b351SPierre Pronchery	add	w9,w9,w14
3041*4757b351SPierre Pronchery.endif
3042*4757b351SPierre Pronchery.inst	0x04a1018c	//add z12.s,z12.s,z1.s
3043*4757b351SPierre Pronchery.if	mixin == 1
3044*4757b351SPierre Pronchery	add	w10,w10,w11
3045*4757b351SPierre Pronchery.endif
3046*4757b351SPierre Pronchery.inst	0x04a031ef	//eor z15.d,z15.d,z0.d
3047*4757b351SPierre Pronchery.if	mixin == 1
3048*4757b351SPierre Pronchery	eor	w22,w22,w7
3049*4757b351SPierre Pronchery.endif
3050*4757b351SPierre Pronchery.inst	0x04a43063	//eor z3.d,z3.d,z4.d
3051*4757b351SPierre Pronchery.if	mixin == 1
3052*4757b351SPierre Pronchery	eor	w19,w19,w8
3053*4757b351SPierre Pronchery.endif
3054*4757b351SPierre Pronchery.inst	0x04a830e7	//eor z7.d,z7.d,z8.d
3055*4757b351SPierre Pronchery.if	mixin == 1
3056*4757b351SPierre Pronchery	eor	w20,w20,w9
3057*4757b351SPierre Pronchery.endif
3058*4757b351SPierre Pronchery.inst	0x04ac316b	//eor z11.d,z11.d,z12.d
3059*4757b351SPierre Pronchery.if	mixin == 1
3060*4757b351SPierre Pronchery	eor	w21,w21,w10
3061*4757b351SPierre Pronchery.endif
3062*4757b351SPierre Pronchery.inst	0x053f31ef	//tbl z15.b,{z15.b},z31.b
3063*4757b351SPierre Pronchery.if	mixin == 1
3064*4757b351SPierre Pronchery	ror	w22,w22,#24
3065*4757b351SPierre Pronchery.endif
3066*4757b351SPierre Pronchery.inst	0x053f3063	//tbl z3.b,{z3.b},z31.b
3067*4757b351SPierre Pronchery.if	mixin == 1
3068*4757b351SPierre Pronchery	ror	w19,w19,#24
3069*4757b351SPierre Pronchery.endif
3070*4757b351SPierre Pronchery.inst	0x053f30e7	//tbl z7.b,{z7.b},z31.b
3071*4757b351SPierre Pronchery.if	mixin == 1
3072*4757b351SPierre Pronchery	ror	w20,w20,#24
3073*4757b351SPierre Pronchery.endif
3074*4757b351SPierre Pronchery.inst	0x053f316b	//tbl z11.b,{z11.b},z31.b
3075*4757b351SPierre Pronchery.if	mixin == 1
3076*4757b351SPierre Pronchery	ror	w21,w21,#24
3077*4757b351SPierre Pronchery.endif
3078*4757b351SPierre Pronchery.inst	0x04af014a	//add z10.s,z10.s,z15.s
3079*4757b351SPierre Pronchery.if	mixin == 1
3080*4757b351SPierre Pronchery	add	w17,w17,w22
3081*4757b351SPierre Pronchery.endif
3082*4757b351SPierre Pronchery.inst	0x04a301ce	//add z14.s,z14.s,z3.s
3083*4757b351SPierre Pronchery.if	mixin == 1
3084*4757b351SPierre Pronchery	add	w18,w18,w19
3085*4757b351SPierre Pronchery.endif
3086*4757b351SPierre Pronchery.inst	0x04a70042	//add z2.s,z2.s,z7.s
3087*4757b351SPierre Pronchery.if	mixin == 1
3088*4757b351SPierre Pronchery	add	w15,w15,w20
3089*4757b351SPierre Pronchery.endif
3090*4757b351SPierre Pronchery.inst	0x04ab00c6	//add z6.s,z6.s,z11.s
3091*4757b351SPierre Pronchery.if	mixin == 1
3092*4757b351SPierre Pronchery	add	w16,w16,w21
3093*4757b351SPierre Pronchery.endif
3094*4757b351SPierre Pronchery.inst	0x04aa30a5	//eor z5.d,z5.d,z10.d
3095*4757b351SPierre Pronchery.if	mixin == 1
3096*4757b351SPierre Pronchery	eor	w12,w12,w17
3097*4757b351SPierre Pronchery.endif
3098*4757b351SPierre Pronchery.inst	0x04ae3129	//eor z9.d,z9.d,z14.d
3099*4757b351SPierre Pronchery.if	mixin == 1
3100*4757b351SPierre Pronchery	eor	w13,w13,w18
3101*4757b351SPierre Pronchery.endif
3102*4757b351SPierre Pronchery.inst	0x04a231ad	//eor z13.d,z13.d,z2.d
3103*4757b351SPierre Pronchery.if	mixin == 1
3104*4757b351SPierre Pronchery	eor	w14,w14,w15
3105*4757b351SPierre Pronchery.endif
3106*4757b351SPierre Pronchery.inst	0x04a63021	//eor z1.d,z1.d,z6.d
3107*4757b351SPierre Pronchery.if	mixin == 1
3108*4757b351SPierre Pronchery	eor	w11,w11,w16
3109*4757b351SPierre Pronchery.endif
3110*4757b351SPierre Pronchery.inst	0x04679cb1	//lsl z17.s,z5.s,7
3111*4757b351SPierre Pronchery.inst	0x04679d32	//lsl z18.s,z9.s,7
3112*4757b351SPierre Pronchery.inst	0x04679db3	//lsl z19.s,z13.s,7
3113*4757b351SPierre Pronchery.inst	0x04679c34	//lsl z20.s,z1.s,7
3114*4757b351SPierre Pronchery.inst	0x046794a5	//lsr z5.s,z5.s,25
3115*4757b351SPierre Pronchery.if	mixin == 1
3116*4757b351SPierre Pronchery	ror	w12,w12,25
3117*4757b351SPierre Pronchery.endif
3118*4757b351SPierre Pronchery.inst	0x04679529	//lsr z9.s,z9.s,25
3119*4757b351SPierre Pronchery.if	mixin == 1
3120*4757b351SPierre Pronchery	ror	w13,w13,25
3121*4757b351SPierre Pronchery.endif
3122*4757b351SPierre Pronchery.inst	0x046795ad	//lsr z13.s,z13.s,25
3123*4757b351SPierre Pronchery.if	mixin == 1
3124*4757b351SPierre Pronchery	ror	w14,w14,25
3125*4757b351SPierre Pronchery.endif
3126*4757b351SPierre Pronchery.inst	0x04679421	//lsr z1.s,z1.s,25
3127*4757b351SPierre Pronchery.if	mixin == 1
3128*4757b351SPierre Pronchery	ror	w11,w11,25
3129*4757b351SPierre Pronchery.endif
3130*4757b351SPierre Pronchery.inst	0x047130a5	//orr z5.d,z5.d,z17.d
3131*4757b351SPierre Pronchery.inst	0x04723129	//orr z9.d,z9.d,z18.d
3132*4757b351SPierre Pronchery.inst	0x047331ad	//orr z13.d,z13.d,z19.d
3133*4757b351SPierre Pronchery.inst	0x04743021	//orr z1.d,z1.d,z20.d
3134*4757b351SPierre Pronchery	sub	x6,x6,1
3135*4757b351SPierre Pronchery	cbnz	x6,10b
3136*4757b351SPierre Pronchery	lsr	x6,x28,#32
3137*4757b351SPierre Pronchery.inst	0x05a03b91	//dup z17.s,w28
3138*4757b351SPierre Pronchery.inst	0x05a038d2	//dup z18.s,w6
3139*4757b351SPierre Pronchery	lsr	x6,x29,#32
3140*4757b351SPierre Pronchery.inst	0x05a038d3	//dup z19.s,w6
3141*4757b351SPierre Pronchery	lsr	x6,x30,#32
3142*4757b351SPierre Pronchery.if	mixin == 1
3143*4757b351SPierre Pronchery	add	w7,w7,w23
3144*4757b351SPierre Pronchery.endif
3145*4757b351SPierre Pronchery.inst	0x04b90000	//add z0.s,z0.s,z25.s
3146*4757b351SPierre Pronchery.if	mixin == 1
3147*4757b351SPierre Pronchery	add	x8,x8,x23,lsr #32
3148*4757b351SPierre Pronchery.endif
3149*4757b351SPierre Pronchery.inst	0x04ba0084	//add z4.s,z4.s,z26.s
3150*4757b351SPierre Pronchery.if	mixin == 1
3151*4757b351SPierre Pronchery	add	x7,x7,x8,lsl #32  // pack
3152*4757b351SPierre Pronchery.endif
3153*4757b351SPierre Pronchery.if	mixin == 1
3154*4757b351SPierre Pronchery	add	w9,w9,w24
3155*4757b351SPierre Pronchery.endif
3156*4757b351SPierre Pronchery.inst	0x04bb0108	//add z8.s,z8.s,z27.s
3157*4757b351SPierre Pronchery.if	mixin == 1
3158*4757b351SPierre Pronchery	add	x10,x10,x24,lsr #32
3159*4757b351SPierre Pronchery.endif
3160*4757b351SPierre Pronchery.inst	0x04bc018c	//add z12.s,z12.s,z28.s
3161*4757b351SPierre Pronchery.if	mixin == 1
3162*4757b351SPierre Pronchery	add	x9,x9,x10,lsl #32  // pack
3163*4757b351SPierre Pronchery.endif
3164*4757b351SPierre Pronchery.if	mixin == 1
3165*4757b351SPierre Pronchery	ldp	x8,x10,[x1],#16
3166*4757b351SPierre Pronchery.endif
3167*4757b351SPierre Pronchery.if	mixin == 1
3168*4757b351SPierre Pronchery	add	w11,w11,w25
3169*4757b351SPierre Pronchery.endif
3170*4757b351SPierre Pronchery.inst	0x04bd0021	//add z1.s,z1.s,z29.s
3171*4757b351SPierre Pronchery.if	mixin == 1
3172*4757b351SPierre Pronchery	add	x12,x12,x25,lsr #32
3173*4757b351SPierre Pronchery.endif
3174*4757b351SPierre Pronchery.inst	0x04be00a5	//add z5.s,z5.s,z30.s
3175*4757b351SPierre Pronchery.if	mixin == 1
3176*4757b351SPierre Pronchery	add	x11,x11,x12,lsl #32  // pack
3177*4757b351SPierre Pronchery.endif
3178*4757b351SPierre Pronchery.if	mixin == 1
3179*4757b351SPierre Pronchery	add	w13,w13,w26
3180*4757b351SPierre Pronchery.endif
3181*4757b351SPierre Pronchery.inst	0x04b50129	//add z9.s,z9.s,z21.s
3182*4757b351SPierre Pronchery.if	mixin == 1
3183*4757b351SPierre Pronchery	add	x14,x14,x26,lsr #32
3184*4757b351SPierre Pronchery.endif
3185*4757b351SPierre Pronchery.inst	0x04b601ad	//add z13.s,z13.s,z22.s
3186*4757b351SPierre Pronchery.if	mixin == 1
3187*4757b351SPierre Pronchery	add	x13,x13,x14,lsl #32  // pack
3188*4757b351SPierre Pronchery.endif
3189*4757b351SPierre Pronchery.if	mixin == 1
3190*4757b351SPierre Pronchery	ldp	x12,x14,[x1],#16
3191*4757b351SPierre Pronchery.endif
3192*4757b351SPierre Pronchery.if	mixin == 1
3193*4757b351SPierre Pronchery	add	w15,w15,w27
3194*4757b351SPierre Pronchery.endif
3195*4757b351SPierre Pronchery.inst	0x04b70042	//add z2.s,z2.s,z23.s
3196*4757b351SPierre Pronchery.if	mixin == 1
3197*4757b351SPierre Pronchery	add	x16,x16,x27,lsr #32
3198*4757b351SPierre Pronchery.endif
3199*4757b351SPierre Pronchery.inst	0x04b800c6	//add z6.s,z6.s,z24.s
3200*4757b351SPierre Pronchery.if	mixin == 1
3201*4757b351SPierre Pronchery	add	x15,x15,x16,lsl #32  // pack
3202*4757b351SPierre Pronchery.endif
3203*4757b351SPierre Pronchery.if	mixin == 1
3204*4757b351SPierre Pronchery	add	w17,w17,w28
3205*4757b351SPierre Pronchery.endif
3206*4757b351SPierre Pronchery.inst	0x04b1014a	//add z10.s,z10.s,z17.s
3207*4757b351SPierre Pronchery.if	mixin == 1
3208*4757b351SPierre Pronchery	add	x18,x18,x28,lsr #32
3209*4757b351SPierre Pronchery.endif
3210*4757b351SPierre Pronchery.inst	0x04b201ce	//add z14.s,z14.s,z18.s
3211*4757b351SPierre Pronchery.if	mixin == 1
3212*4757b351SPierre Pronchery	add	x17,x17,x18,lsl #32  // pack
3213*4757b351SPierre Pronchery.endif
3214*4757b351SPierre Pronchery.if	mixin == 1
3215*4757b351SPierre Pronchery	ldp	x16,x18,[x1],#16
3216*4757b351SPierre Pronchery.endif
3217*4757b351SPierre Pronchery.inst	0x05a03bd4	//dup z20.s,w30
3218*4757b351SPierre Pronchery.inst	0x05a038d9	//dup z25.s,w6	// bak[15] not available for SVE
3219*4757b351SPierre Pronchery.if	mixin == 1
3220*4757b351SPierre Pronchery	add	w19,w19,w29
3221*4757b351SPierre Pronchery.endif
3222*4757b351SPierre Pronchery.inst	0x04b00063	//add z3.s,z3.s,z16.s
3223*4757b351SPierre Pronchery.if	mixin == 1
3224*4757b351SPierre Pronchery	add	x20,x20,x29,lsr #32
3225*4757b351SPierre Pronchery.endif
3226*4757b351SPierre Pronchery.inst	0x04b300e7	//add z7.s,z7.s,z19.s
3227*4757b351SPierre Pronchery.if	mixin == 1
3228*4757b351SPierre Pronchery	add	x19,x19,x20,lsl #32  // pack
3229*4757b351SPierre Pronchery.endif
3230*4757b351SPierre Pronchery.if	mixin == 1
3231*4757b351SPierre Pronchery	add	w21,w21,w30
3232*4757b351SPierre Pronchery.endif
3233*4757b351SPierre Pronchery.inst	0x04b4016b	//add z11.s,z11.s,z20.s
3234*4757b351SPierre Pronchery.if	mixin == 1
3235*4757b351SPierre Pronchery	add	x22,x22,x30,lsr #32
3236*4757b351SPierre Pronchery.endif
3237*4757b351SPierre Pronchery.inst	0x04b901ef	//add z15.s,z15.s,z25.s
3238*4757b351SPierre Pronchery.if	mixin == 1
3239*4757b351SPierre Pronchery	add	x21,x21,x22,lsl #32  // pack
3240*4757b351SPierre Pronchery.endif
3241*4757b351SPierre Pronchery.if	mixin == 1
3242*4757b351SPierre Pronchery	ldp	x20,x22,[x1],#16
3243*4757b351SPierre Pronchery.endif
3244*4757b351SPierre Pronchery#ifdef	__AARCH64EB__
3245*4757b351SPierre Pronchery	rev	x7,x7
3246*4757b351SPierre Pronchery.inst	0x05a48000	//revb z0.s,p0/m,z0.s
3247*4757b351SPierre Pronchery.inst	0x05a48084	//revb z4.s,p0/m,z4.s
3248*4757b351SPierre Pronchery	rev	x9,x9
3249*4757b351SPierre Pronchery.inst	0x05a48108	//revb z8.s,p0/m,z8.s
3250*4757b351SPierre Pronchery.inst	0x05a4818c	//revb z12.s,p0/m,z12.s
3251*4757b351SPierre Pronchery	rev	x11,x11
3252*4757b351SPierre Pronchery.inst	0x05a48021	//revb z1.s,p0/m,z1.s
3253*4757b351SPierre Pronchery.inst	0x05a480a5	//revb z5.s,p0/m,z5.s
3254*4757b351SPierre Pronchery	rev	x13,x13
3255*4757b351SPierre Pronchery.inst	0x05a48129	//revb z9.s,p0/m,z9.s
3256*4757b351SPierre Pronchery.inst	0x05a481ad	//revb z13.s,p0/m,z13.s
3257*4757b351SPierre Pronchery	rev	x15,x15
3258*4757b351SPierre Pronchery.inst	0x05a48042	//revb z2.s,p0/m,z2.s
3259*4757b351SPierre Pronchery.inst	0x05a480c6	//revb z6.s,p0/m,z6.s
3260*4757b351SPierre Pronchery	rev	x17,x17
3261*4757b351SPierre Pronchery.inst	0x05a4814a	//revb z10.s,p0/m,z10.s
3262*4757b351SPierre Pronchery.inst	0x05a481ce	//revb z14.s,p0/m,z14.s
3263*4757b351SPierre Pronchery	rev	x19,x19
3264*4757b351SPierre Pronchery.inst	0x05a48063	//revb z3.s,p0/m,z3.s
3265*4757b351SPierre Pronchery.inst	0x05a480e7	//revb z7.s,p0/m,z7.s
3266*4757b351SPierre Pronchery	rev	x21,x21
3267*4757b351SPierre Pronchery.inst	0x05a4816b	//revb z11.s,p0/m,z11.s
3268*4757b351SPierre Pronchery.inst	0x05a481ef	//revb z15.s,p0/m,z15.s
3269*4757b351SPierre Pronchery#endif
3270*4757b351SPierre Pronchery.if	mixin == 1
3271*4757b351SPierre Pronchery	add	x29,x29,#1
3272*4757b351SPierre Pronchery.endif
3273*4757b351SPierre Pronchery	cmp	x5,4
3274*4757b351SPierre Pronchery	b.ne	200f
3275*4757b351SPierre Pronchery.if	mixin == 1
3276*4757b351SPierre Pronchery	eor	x7,x7,x8
3277*4757b351SPierre Pronchery.endif
3278*4757b351SPierre Pronchery.if	mixin == 1
3279*4757b351SPierre Pronchery	eor	x9,x9,x10
3280*4757b351SPierre Pronchery.endif
3281*4757b351SPierre Pronchery.if	mixin == 1
3282*4757b351SPierre Pronchery	eor	x11,x11,x12
3283*4757b351SPierre Pronchery.endif
3284*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
3285*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
3286*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
3287*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
3288*4757b351SPierre Pronchery
3289*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
3290*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
3291*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
3292*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
3293*4757b351SPierre Pronchery
3294*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
3295*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
3296*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
3297*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
3298*4757b351SPierre Pronchery
3299*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
3300*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
3301*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
3302*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
3303*4757b351SPierre Pronchery.if	mixin == 1
3304*4757b351SPierre Pronchery	eor	x13,x13,x14
3305*4757b351SPierre Pronchery.endif
3306*4757b351SPierre Pronchery.if	mixin == 1
3307*4757b351SPierre Pronchery	eor	x15,x15,x16
3308*4757b351SPierre Pronchery.endif
3309*4757b351SPierre Pronchery.if	mixin == 1
3310*4757b351SPierre Pronchery	eor	x17,x17,x18
3311*4757b351SPierre Pronchery.endif
3312*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
3313*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
3314*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
3315*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
3316*4757b351SPierre Pronchery
3317*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
3318*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
3319*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
3320*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
3321*4757b351SPierre Pronchery
3322*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
3323*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
3324*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
3325*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
3326*4757b351SPierre Pronchery
3327*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
3328*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
3329*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
3330*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
3331*4757b351SPierre Pronchery.if	mixin == 1
3332*4757b351SPierre Pronchery	eor	x19,x19,x20
3333*4757b351SPierre Pronchery.endif
3334*4757b351SPierre Pronchery.if	mixin == 1
3335*4757b351SPierre Pronchery	eor	x21,x21,x22
3336*4757b351SPierre Pronchery.endif
3337*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
3338*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
3339*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
3340*4757b351SPierre Pronchery.inst	0x04b23021	//eor z1.d,z1.d,z18.d
3341*4757b351SPierre Pronchery.inst	0x04b33042	//eor z2.d,z2.d,z19.d
3342*4757b351SPierre Pronchery.inst	0x04b43063	//eor z3.d,z3.d,z20.d
3343*4757b351SPierre Pronchery.inst	0x04b53084	//eor z4.d,z4.d,z21.d
3344*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
3345*4757b351SPierre Pronchery.inst	0x04b730c6	//eor z6.d,z6.d,z23.d
3346*4757b351SPierre Pronchery.inst	0x04b830e7	//eor z7.d,z7.d,z24.d
3347*4757b351SPierre Pronchery	ld1	{v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64
3348*4757b351SPierre Pronchery	ld1	{v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64
3349*4757b351SPierre Pronchery.if	mixin == 1
3350*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
3351*4757b351SPierre Pronchery.endif
3352*4757b351SPierre Pronchery.inst	0x04b13108	//eor z8.d,z8.d,z17.d
3353*4757b351SPierre Pronchery.inst	0x04b23129	//eor z9.d,z9.d,z18.d
3354*4757b351SPierre Pronchery.if	mixin == 1
3355*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
3356*4757b351SPierre Pronchery.endif
3357*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
3358*4757b351SPierre Pronchery.inst	0x04b4316b	//eor z11.d,z11.d,z20.d
3359*4757b351SPierre Pronchery.if	mixin == 1
3360*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
3361*4757b351SPierre Pronchery.endif
3362*4757b351SPierre Pronchery.inst	0x04b5318c	//eor z12.d,z12.d,z21.d
3363*4757b351SPierre Pronchery.inst	0x04b631ad	//eor z13.d,z13.d,z22.d
3364*4757b351SPierre Pronchery.if	mixin == 1
3365*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
3366*4757b351SPierre Pronchery.endif
3367*4757b351SPierre Pronchery.inst	0x04b731ce	//eor z14.d,z14.d,z23.d
3368*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
3369*4757b351SPierre Pronchery	st1	{v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64
3370*4757b351SPierre Pronchery	st1	{v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64
3371*4757b351SPierre Pronchery	st1	{v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64
3372*4757b351SPierre Pronchery	st1	{v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64
3373*4757b351SPierre Pronchery	b	210f
3374*4757b351SPierre Pronchery200:
3375*4757b351SPierre Pronchery.inst	0x05a16011	//zip1 z17.s,z0.s,z1.s
3376*4757b351SPierre Pronchery.inst	0x05a16412	//zip2 z18.s,z0.s,z1.s
3377*4757b351SPierre Pronchery.inst	0x05a36053	//zip1 z19.s,z2.s,z3.s
3378*4757b351SPierre Pronchery.inst	0x05a36454	//zip2 z20.s,z2.s,z3.s
3379*4757b351SPierre Pronchery
3380*4757b351SPierre Pronchery.inst	0x05a56095	//zip1 z21.s,z4.s,z5.s
3381*4757b351SPierre Pronchery.inst	0x05a56496	//zip2 z22.s,z4.s,z5.s
3382*4757b351SPierre Pronchery.inst	0x05a760d7	//zip1 z23.s,z6.s,z7.s
3383*4757b351SPierre Pronchery.inst	0x05a764d8	//zip2 z24.s,z6.s,z7.s
3384*4757b351SPierre Pronchery
3385*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
3386*4757b351SPierre Pronchery.inst	0x05f36621	//zip2 z1.d,z17.d,z19.d
3387*4757b351SPierre Pronchery.inst	0x05f46242	//zip1 z2.d,z18.d,z20.d
3388*4757b351SPierre Pronchery.inst	0x05f46643	//zip2 z3.d,z18.d,z20.d
3389*4757b351SPierre Pronchery
3390*4757b351SPierre Pronchery.inst	0x05f762a4	//zip1 z4.d,z21.d,z23.d
3391*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
3392*4757b351SPierre Pronchery.inst	0x05f862c6	//zip1 z6.d,z22.d,z24.d
3393*4757b351SPierre Pronchery.inst	0x05f866c7	//zip2 z7.d,z22.d,z24.d
3394*4757b351SPierre Pronchery.if	mixin == 1
3395*4757b351SPierre Pronchery	eor	x7,x7,x8
3396*4757b351SPierre Pronchery.endif
3397*4757b351SPierre Pronchery.if	mixin == 1
3398*4757b351SPierre Pronchery	eor	x9,x9,x10
3399*4757b351SPierre Pronchery.endif
3400*4757b351SPierre Pronchery.inst	0x05a96111	//zip1 z17.s,z8.s,z9.s
3401*4757b351SPierre Pronchery.inst	0x05a96512	//zip2 z18.s,z8.s,z9.s
3402*4757b351SPierre Pronchery.inst	0x05ab6153	//zip1 z19.s,z10.s,z11.s
3403*4757b351SPierre Pronchery.inst	0x05ab6554	//zip2 z20.s,z10.s,z11.s
3404*4757b351SPierre Pronchery
3405*4757b351SPierre Pronchery.inst	0x05ad6195	//zip1 z21.s,z12.s,z13.s
3406*4757b351SPierre Pronchery.inst	0x05ad6596	//zip2 z22.s,z12.s,z13.s
3407*4757b351SPierre Pronchery.inst	0x05af61d7	//zip1 z23.s,z14.s,z15.s
3408*4757b351SPierre Pronchery.inst	0x05af65d8	//zip2 z24.s,z14.s,z15.s
3409*4757b351SPierre Pronchery
3410*4757b351SPierre Pronchery.inst	0x05f36228	//zip1 z8.d,z17.d,z19.d
3411*4757b351SPierre Pronchery.inst	0x05f36629	//zip2 z9.d,z17.d,z19.d
3412*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
3413*4757b351SPierre Pronchery.inst	0x05f4664b	//zip2 z11.d,z18.d,z20.d
3414*4757b351SPierre Pronchery
3415*4757b351SPierre Pronchery.inst	0x05f762ac	//zip1 z12.d,z21.d,z23.d
3416*4757b351SPierre Pronchery.inst	0x05f766ad	//zip2 z13.d,z21.d,z23.d
3417*4757b351SPierre Pronchery.inst	0x05f862ce	//zip1 z14.d,z22.d,z24.d
3418*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
3419*4757b351SPierre Pronchery.if	mixin == 1
3420*4757b351SPierre Pronchery	eor	x11,x11,x12
3421*4757b351SPierre Pronchery.endif
3422*4757b351SPierre Pronchery.if	mixin == 1
3423*4757b351SPierre Pronchery	eor	x13,x13,x14
3424*4757b351SPierre Pronchery.endif
3425*4757b351SPierre Pronchery.inst	0x05a46011	//zip1 z17.s,z0.s,z4.s
3426*4757b351SPierre Pronchery.inst	0x05a46412	//zip2 z18.s,z0.s,z4.s
3427*4757b351SPierre Pronchery.inst	0x05ac6113	//zip1 z19.s,z8.s,z12.s
3428*4757b351SPierre Pronchery.inst	0x05ac6514	//zip2 z20.s,z8.s,z12.s
3429*4757b351SPierre Pronchery
3430*4757b351SPierre Pronchery.inst	0x05a56035	//zip1 z21.s,z1.s,z5.s
3431*4757b351SPierre Pronchery.inst	0x05a56436	//zip2 z22.s,z1.s,z5.s
3432*4757b351SPierre Pronchery.inst	0x05ad6137	//zip1 z23.s,z9.s,z13.s
3433*4757b351SPierre Pronchery.inst	0x05ad6538	//zip2 z24.s,z9.s,z13.s
3434*4757b351SPierre Pronchery
3435*4757b351SPierre Pronchery.inst	0x05f36220	//zip1 z0.d,z17.d,z19.d
3436*4757b351SPierre Pronchery.inst	0x05f36624	//zip2 z4.d,z17.d,z19.d
3437*4757b351SPierre Pronchery.inst	0x05f46248	//zip1 z8.d,z18.d,z20.d
3438*4757b351SPierre Pronchery.inst	0x05f4664c	//zip2 z12.d,z18.d,z20.d
3439*4757b351SPierre Pronchery
3440*4757b351SPierre Pronchery.inst	0x05f762a1	//zip1 z1.d,z21.d,z23.d
3441*4757b351SPierre Pronchery.inst	0x05f766a5	//zip2 z5.d,z21.d,z23.d
3442*4757b351SPierre Pronchery.inst	0x05f862c9	//zip1 z9.d,z22.d,z24.d
3443*4757b351SPierre Pronchery.inst	0x05f866cd	//zip2 z13.d,z22.d,z24.d
3444*4757b351SPierre Pronchery.if	mixin == 1
3445*4757b351SPierre Pronchery	eor	x15,x15,x16
3446*4757b351SPierre Pronchery.endif
3447*4757b351SPierre Pronchery.if	mixin == 1
3448*4757b351SPierre Pronchery	eor	x17,x17,x18
3449*4757b351SPierre Pronchery.endif
3450*4757b351SPierre Pronchery.inst	0x05a66051	//zip1 z17.s,z2.s,z6.s
3451*4757b351SPierre Pronchery.inst	0x05a66452	//zip2 z18.s,z2.s,z6.s
3452*4757b351SPierre Pronchery.inst	0x05ae6153	//zip1 z19.s,z10.s,z14.s
3453*4757b351SPierre Pronchery.inst	0x05ae6554	//zip2 z20.s,z10.s,z14.s
3454*4757b351SPierre Pronchery
3455*4757b351SPierre Pronchery.inst	0x05a76075	//zip1 z21.s,z3.s,z7.s
3456*4757b351SPierre Pronchery.inst	0x05a76476	//zip2 z22.s,z3.s,z7.s
3457*4757b351SPierre Pronchery.inst	0x05af6177	//zip1 z23.s,z11.s,z15.s
3458*4757b351SPierre Pronchery.inst	0x05af6578	//zip2 z24.s,z11.s,z15.s
3459*4757b351SPierre Pronchery
3460*4757b351SPierre Pronchery.inst	0x05f36222	//zip1 z2.d,z17.d,z19.d
3461*4757b351SPierre Pronchery.inst	0x05f36626	//zip2 z6.d,z17.d,z19.d
3462*4757b351SPierre Pronchery.inst	0x05f4624a	//zip1 z10.d,z18.d,z20.d
3463*4757b351SPierre Pronchery.inst	0x05f4664e	//zip2 z14.d,z18.d,z20.d
3464*4757b351SPierre Pronchery
3465*4757b351SPierre Pronchery.inst	0x05f762a3	//zip1 z3.d,z21.d,z23.d
3466*4757b351SPierre Pronchery.inst	0x05f766a7	//zip2 z7.d,z21.d,z23.d
3467*4757b351SPierre Pronchery.inst	0x05f862cb	//zip1 z11.d,z22.d,z24.d
3468*4757b351SPierre Pronchery.inst	0x05f866cf	//zip2 z15.d,z22.d,z24.d
3469*4757b351SPierre Pronchery.if	mixin == 1
3470*4757b351SPierre Pronchery	eor	x19,x19,x20
3471*4757b351SPierre Pronchery.endif
3472*4757b351SPierre Pronchery.if	mixin == 1
3473*4757b351SPierre Pronchery	eor	x21,x21,x22
3474*4757b351SPierre Pronchery.endif
3475*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
3476*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
3477*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
3478*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
3479*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
3480*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
3481*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
3482*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
3483*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
3484*4757b351SPierre Pronchery.inst	0x04b13000	//eor z0.d,z0.d,z17.d
3485*4757b351SPierre Pronchery.inst	0x04b23084	//eor z4.d,z4.d,z18.d
3486*4757b351SPierre Pronchery.inst	0x04b33108	//eor z8.d,z8.d,z19.d
3487*4757b351SPierre Pronchery.inst	0x04b4318c	//eor z12.d,z12.d,z20.d
3488*4757b351SPierre Pronchery.inst	0x04b53021	//eor z1.d,z1.d,z21.d
3489*4757b351SPierre Pronchery.inst	0x04b630a5	//eor z5.d,z5.d,z22.d
3490*4757b351SPierre Pronchery.inst	0x04b73129	//eor z9.d,z9.d,z23.d
3491*4757b351SPierre Pronchery.inst	0x04b831ad	//eor z13.d,z13.d,z24.d
3492*4757b351SPierre Pronchery.inst	0xa540a031	//ld1w {z17.s},p0/z,[x1,#0,MUL VL]
3493*4757b351SPierre Pronchery.inst	0xa541a032	//ld1w {z18.s},p0/z,[x1,#1,MUL VL]
3494*4757b351SPierre Pronchery.inst	0xa542a033	//ld1w {z19.s},p0/z,[x1,#2,MUL VL]
3495*4757b351SPierre Pronchery.inst	0xa543a034	//ld1w {z20.s},p0/z,[x1,#3,MUL VL]
3496*4757b351SPierre Pronchery.inst	0xa544a035	//ld1w {z21.s},p0/z,[x1,#4,MUL VL]
3497*4757b351SPierre Pronchery.inst	0xa545a036	//ld1w {z22.s},p0/z,[x1,#5,MUL VL]
3498*4757b351SPierre Pronchery.inst	0xa546a037	//ld1w {z23.s},p0/z,[x1,#6,MUL VL]
3499*4757b351SPierre Pronchery.inst	0xa547a038	//ld1w {z24.s},p0/z,[x1,#7,MUL VL]
3500*4757b351SPierre Pronchery.inst	0x04215101	//addvl x1,x1,8
3501*4757b351SPierre Pronchery.if	mixin == 1
3502*4757b351SPierre Pronchery	stp	x7,x9,[x0],#16
3503*4757b351SPierre Pronchery.endif
3504*4757b351SPierre Pronchery.inst	0x04b13042	//eor z2.d,z2.d,z17.d
3505*4757b351SPierre Pronchery.inst	0x04b230c6	//eor z6.d,z6.d,z18.d
3506*4757b351SPierre Pronchery.if	mixin == 1
3507*4757b351SPierre Pronchery	stp	x11,x13,[x0],#16
3508*4757b351SPierre Pronchery.endif
3509*4757b351SPierre Pronchery.inst	0x04b3314a	//eor z10.d,z10.d,z19.d
3510*4757b351SPierre Pronchery.inst	0x04b431ce	//eor z14.d,z14.d,z20.d
3511*4757b351SPierre Pronchery.if	mixin == 1
3512*4757b351SPierre Pronchery	stp	x15,x17,[x0],#16
3513*4757b351SPierre Pronchery.endif
3514*4757b351SPierre Pronchery.inst	0x04b53063	//eor z3.d,z3.d,z21.d
3515*4757b351SPierre Pronchery.inst	0x04b630e7	//eor z7.d,z7.d,z22.d
3516*4757b351SPierre Pronchery.if	mixin == 1
3517*4757b351SPierre Pronchery	stp	x19,x21,[x0],#16
3518*4757b351SPierre Pronchery.endif
3519*4757b351SPierre Pronchery.inst	0x04b7316b	//eor z11.d,z11.d,z23.d
3520*4757b351SPierre Pronchery.inst	0x04b831ef	//eor z15.d,z15.d,z24.d
3521*4757b351SPierre Pronchery.inst	0xe540e000	//st1w {z0.s},p0,[x0,#0,MUL VL]
3522*4757b351SPierre Pronchery.inst	0xe541e004	//st1w {z4.s},p0,[x0,#1,MUL VL]
3523*4757b351SPierre Pronchery.inst	0xe542e008	//st1w {z8.s},p0,[x0,#2,MUL VL]
3524*4757b351SPierre Pronchery.inst	0xe543e00c	//st1w {z12.s},p0,[x0,#3,MUL VL]
3525*4757b351SPierre Pronchery.inst	0xe544e001	//st1w {z1.s},p0,[x0,#4,MUL VL]
3526*4757b351SPierre Pronchery.inst	0xe545e005	//st1w {z5.s},p0,[x0,#5,MUL VL]
3527*4757b351SPierre Pronchery.inst	0xe546e009	//st1w {z9.s},p0,[x0,#6,MUL VL]
3528*4757b351SPierre Pronchery.inst	0xe547e00d	//st1w {z13.s},p0,[x0,#7,MUL VL]
3529*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
3530*4757b351SPierre Pronchery.inst	0xe540e002	//st1w {z2.s},p0,[x0,#0,MUL VL]
3531*4757b351SPierre Pronchery.inst	0xe541e006	//st1w {z6.s},p0,[x0,#1,MUL VL]
3532*4757b351SPierre Pronchery.inst	0xe542e00a	//st1w {z10.s},p0,[x0,#2,MUL VL]
3533*4757b351SPierre Pronchery.inst	0xe543e00e	//st1w {z14.s},p0,[x0,#3,MUL VL]
3534*4757b351SPierre Pronchery.inst	0xe544e003	//st1w {z3.s},p0,[x0,#4,MUL VL]
3535*4757b351SPierre Pronchery.inst	0xe545e007	//st1w {z7.s},p0,[x0,#5,MUL VL]
3536*4757b351SPierre Pronchery.inst	0xe546e00b	//st1w {z11.s},p0,[x0,#6,MUL VL]
3537*4757b351SPierre Pronchery.inst	0xe547e00f	//st1w {z15.s},p0,[x0,#7,MUL VL]
3538*4757b351SPierre Pronchery.inst	0x04205100	//addvl x0,x0,8
3539*4757b351SPierre Pronchery210:
3540*4757b351SPierre Pronchery.inst	0x04b0e3fd	//incw x29, ALL, MUL #1
3541*4757b351SPierre Pronchery110:
3542*4757b351SPierre Pronchery2:
3543*4757b351SPierre Pronchery	str	w29,[x4]
3544*4757b351SPierre Pronchery	ldp	d10,d11,[sp,16]
3545*4757b351SPierre Pronchery	ldp	d12,d13,[sp,32]
3546*4757b351SPierre Pronchery	ldp	d14,d15,[sp,48]
3547*4757b351SPierre Pronchery	ldp	x16,x17,[sp,64]
3548*4757b351SPierre Pronchery	ldp	x18,x19,[sp,80]
3549*4757b351SPierre Pronchery	ldp	x20,x21,[sp,96]
3550*4757b351SPierre Pronchery	ldp	x22,x23,[sp,112]
3551*4757b351SPierre Pronchery	ldp	x24,x25,[sp,128]
3552*4757b351SPierre Pronchery	ldp	x26,x27,[sp,144]
3553*4757b351SPierre Pronchery	ldp	x28,x29,[sp,160]
3554*4757b351SPierre Pronchery	ldr	x30,[sp,176]
3555*4757b351SPierre Pronchery	ldp	d8,d9,[sp],192
3556*4757b351SPierre Pronchery	AARCH64_VALIDATE_LINK_REGISTER
3557*4757b351SPierre Pronchery.Lreturn:
3558*4757b351SPierre Pronchery	ret
3559*4757b351SPierre Pronchery.size	ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve
3560