xref: /freebsd/sys/crypto/openssl/arm/aesv8-armx.S (revision edf8578117e8844e02c0121147f45e4609b30680)
1/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
2#include "arm_arch.h"
3
4#if __ARM_MAX_ARCH__>=7
5.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
6.fpu	neon
7#ifdef	__thumb2__
8.syntax	unified
9.thumb
10# define INST(a,b,c,d)	.byte	c,d|0xc,a,b
11#else
12.code	32
13# define INST(a,b,c,d)	.byte	a,b,c,d
14#endif
15
16.text
17.align	5
18.Lrcon:
19.long	0x01,0x01,0x01,0x01
20.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
21.long	0x1b,0x1b,0x1b,0x1b
22
23.globl	aes_v8_set_encrypt_key
24.type	aes_v8_set_encrypt_key,%function
25.align	5
26aes_v8_set_encrypt_key:
27.Lenc_key:
28	mov	r3,#-1
29	cmp	r0,#0
30	beq	.Lenc_key_abort
31	cmp	r2,#0
32	beq	.Lenc_key_abort
33	mov	r3,#-2
34	cmp	r1,#128
35	blt	.Lenc_key_abort
36	cmp	r1,#256
37	bgt	.Lenc_key_abort
38	tst	r1,#0x3f
39	bne	.Lenc_key_abort
40
41	adr	r3,.Lrcon
42	cmp	r1,#192
43
44	veor	q0,q0,q0
45	vld1.8	{q3},[r0]!
46	mov	r1,#8		@ reuse r1
47	vld1.32	{q1,q2},[r3]!
48
49	blt	.Loop128
50	beq	.L192
51	b	.L256
52
53.align	4
54.Loop128:
55	vtbl.8	d20,{q3},d4
56	vtbl.8	d21,{q3},d5
57	vext.8	q9,q0,q3,#12
58	vst1.32	{q3},[r2]!
59	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
60	subs	r1,r1,#1
61
62	veor	q3,q3,q9
63	vext.8	q9,q0,q9,#12
64	veor	q3,q3,q9
65	vext.8	q9,q0,q9,#12
66	veor	q10,q10,q1
67	veor	q3,q3,q9
68	vshl.u8	q1,q1,#1
69	veor	q3,q3,q10
70	bne	.Loop128
71
72	vld1.32	{q1},[r3]
73
74	vtbl.8	d20,{q3},d4
75	vtbl.8	d21,{q3},d5
76	vext.8	q9,q0,q3,#12
77	vst1.32	{q3},[r2]!
78	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
79
80	veor	q3,q3,q9
81	vext.8	q9,q0,q9,#12
82	veor	q3,q3,q9
83	vext.8	q9,q0,q9,#12
84	veor	q10,q10,q1
85	veor	q3,q3,q9
86	vshl.u8	q1,q1,#1
87	veor	q3,q3,q10
88
89	vtbl.8	d20,{q3},d4
90	vtbl.8	d21,{q3},d5
91	vext.8	q9,q0,q3,#12
92	vst1.32	{q3},[r2]!
93	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
94
95	veor	q3,q3,q9
96	vext.8	q9,q0,q9,#12
97	veor	q3,q3,q9
98	vext.8	q9,q0,q9,#12
99	veor	q10,q10,q1
100	veor	q3,q3,q9
101	veor	q3,q3,q10
102	vst1.32	{q3},[r2]
103	add	r2,r2,#0x50
104
105	mov	r12,#10
106	b	.Ldone
107
108.align	4
109.L192:
110	vld1.8	{d16},[r0]!
111	vmov.i8	q10,#8			@ borrow q10
112	vst1.32	{q3},[r2]!
113	vsub.i8	q2,q2,q10	@ adjust the mask
114
115.Loop192:
116	vtbl.8	d20,{q8},d4
117	vtbl.8	d21,{q8},d5
118	vext.8	q9,q0,q3,#12
119#ifdef __ARMEB__
120	vst1.32	{q8},[r2]!
121	sub	r2,r2,#8
122#else
123	vst1.32	{d16},[r2]!
124#endif
125	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
126	subs	r1,r1,#1
127
128	veor	q3,q3,q9
129	vext.8	q9,q0,q9,#12
130	veor	q3,q3,q9
131	vext.8	q9,q0,q9,#12
132	veor	q3,q3,q9
133
134	vdup.32	q9,d7[1]
135	veor	q9,q9,q8
136	veor	q10,q10,q1
137	vext.8	q8,q0,q8,#12
138	vshl.u8	q1,q1,#1
139	veor	q8,q8,q9
140	veor	q3,q3,q10
141	veor	q8,q8,q10
142	vst1.32	{q3},[r2]!
143	bne	.Loop192
144
145	mov	r12,#12
146	add	r2,r2,#0x20
147	b	.Ldone
148
149.align	4
150.L256:
151	vld1.8	{q8},[r0]
152	mov	r1,#7
153	mov	r12,#14
154	vst1.32	{q3},[r2]!
155
156.Loop256:
157	vtbl.8	d20,{q8},d4
158	vtbl.8	d21,{q8},d5
159	vext.8	q9,q0,q3,#12
160	vst1.32	{q8},[r2]!
161	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
162	subs	r1,r1,#1
163
164	veor	q3,q3,q9
165	vext.8	q9,q0,q9,#12
166	veor	q3,q3,q9
167	vext.8	q9,q0,q9,#12
168	veor	q10,q10,q1
169	veor	q3,q3,q9
170	vshl.u8	q1,q1,#1
171	veor	q3,q3,q10
172	vst1.32	{q3},[r2]!
173	beq	.Ldone
174
175	vdup.32	q10,d7[1]
176	vext.8	q9,q0,q8,#12
177	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
178
179	veor	q8,q8,q9
180	vext.8	q9,q0,q9,#12
181	veor	q8,q8,q9
182	vext.8	q9,q0,q9,#12
183	veor	q8,q8,q9
184
185	veor	q8,q8,q10
186	b	.Loop256
187
188.Ldone:
189	str	r12,[r2]
190	mov	r3,#0
191
192.Lenc_key_abort:
193	mov	r0,r3			@ return value
194
195	bx	lr
196.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
197
198.globl	aes_v8_set_decrypt_key
199.type	aes_v8_set_decrypt_key,%function
200.align	5
201aes_v8_set_decrypt_key:
202	stmdb	sp!,{r4,lr}
203	bl	.Lenc_key
204
205	cmp	r0,#0
206	bne	.Ldec_key_abort
207
208	sub	r2,r2,#240		@ restore original r2
209	mov	r4,#-16
210	add	r0,r2,r12,lsl#4	@ end of key schedule
211
212	vld1.32	{q0},[r2]
213	vld1.32	{q1},[r0]
214	vst1.32	{q0},[r0],r4
215	vst1.32	{q1},[r2]!
216
217.Loop_imc:
218	vld1.32	{q0},[r2]
219	vld1.32	{q1},[r0]
220	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
221	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
222	vst1.32	{q0},[r0],r4
223	vst1.32	{q1},[r2]!
224	cmp	r0,r2
225	bhi	.Loop_imc
226
227	vld1.32	{q0},[r2]
228	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
229	vst1.32	{q0},[r0]
230
231	eor	r0,r0,r0		@ return value
232.Ldec_key_abort:
233	ldmia	sp!,{r4,pc}
234.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
235.globl	aes_v8_encrypt
236.type	aes_v8_encrypt,%function
237.align	5
238aes_v8_encrypt:
239	ldr	r3,[r2,#240]
240	vld1.32	{q0},[r2]!
241	vld1.8	{q2},[r0]
242	sub	r3,r3,#2
243	vld1.32	{q1},[r2]!
244
245.Loop_enc:
246	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
247	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
248	vld1.32	{q0},[r2]!
249	subs	r3,r3,#2
250	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
251	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
252	vld1.32	{q1},[r2]!
253	bgt	.Loop_enc
254
255	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
256	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
257	vld1.32	{q0},[r2]
258	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
259	veor	q2,q2,q0
260
261	vst1.8	{q2},[r1]
262	bx	lr
263.size	aes_v8_encrypt,.-aes_v8_encrypt
264.globl	aes_v8_decrypt
265.type	aes_v8_decrypt,%function
266.align	5
267aes_v8_decrypt:
268	ldr	r3,[r2,#240]
269	vld1.32	{q0},[r2]!
270	vld1.8	{q2},[r0]
271	sub	r3,r3,#2
272	vld1.32	{q1},[r2]!
273
274.Loop_dec:
275	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
276	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
277	vld1.32	{q0},[r2]!
278	subs	r3,r3,#2
279	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
280	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
281	vld1.32	{q1},[r2]!
282	bgt	.Loop_dec
283
284	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
285	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
286	vld1.32	{q0},[r2]
287	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
288	veor	q2,q2,q0
289
290	vst1.8	{q2},[r1]
291	bx	lr
292.size	aes_v8_decrypt,.-aes_v8_decrypt
293.globl	aes_v8_ecb_encrypt
294.type	aes_v8_ecb_encrypt,%function
295.align	5
296aes_v8_ecb_encrypt:
297	mov	ip,sp
298	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
299	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}			@ ABI specification says so
300	ldmia	ip,{r4,r5}			@ load remaining args
301	subs	r2,r2,#16
302	mov	r8,#16
303	blo	.Lecb_done
304	it	eq
305	moveq	r8,#0
306
307	cmp	r4,#0					@ en- or decrypting?
308	ldr	r5,[r3,#240]
309	and	r2,r2,#-16
310	vld1.8	{q0},[r0],r8
311
312	vld1.32	{q8,q9},[r3]				@ load key schedule...
313	sub	r5,r5,#6
314	add	r7,r3,r5,lsl#4				@ pointer to last 7 round keys
315	sub	r5,r5,#2
316	vld1.32	{q10,q11},[r7]!
317	vld1.32	{q12,q13},[r7]!
318	vld1.32	{q14,q15},[r7]!
319	vld1.32	{q7},[r7]
320
321	add	r7,r3,#32
322	mov	r6,r5
323	beq	.Lecb_dec
324
325	vld1.8	{q1},[r0]!
326	subs	r2,r2,#32				@ bias
327	add	r6,r5,#2
328	vorr	q3,q1,q1
329	vorr	q10,q1,q1
330	vorr	q1,q0,q0
331	blo	.Lecb_enc_tail
332
333	vorr	q1,q3,q3
334	vld1.8	{q10},[r0]!
335.Loop3x_ecb_enc:
336	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
337	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
338	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
339	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
340	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
341	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
342	vld1.32	{q8},[r7]!
343	subs	r6,r6,#2
344	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
345	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
346	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
347	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
348	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
349	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
350	vld1.32	{q9},[r7]!
351	bgt	.Loop3x_ecb_enc
352
353	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
354	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
355	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
356	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
357	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
358	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
359	subs	r2,r2,#0x30
360	it	lo
361	movlo	r6,r2				@ r6, r6, is zero at this point
362	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
363	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
364	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
365	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
366	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
367	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
368	add	r0,r0,r6			@ r0 is adjusted in such way that
369						@ at exit from the loop q1-q10
370						@ are loaded with last "words"
371	mov	r7,r3
372	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
373	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
374	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
375	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
376	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
377	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
378	vld1.8	{q2},[r0]!
379	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
380	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
381	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
382	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
383	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
384	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
385	vld1.8	{q3},[r0]!
386	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
387	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
388	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
389	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
390	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
391	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
392	vld1.8	{q11},[r0]!
393	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
394	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
395	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
396	vld1.32	{q8},[r7]!		@ re-pre-load rndkey[0]
397	add	r6,r5,#2
398	veor	q4,q7,q0
399	veor	q5,q7,q1
400	veor	q10,q10,q7
401	vld1.32	{q9},[r7]!		@ re-pre-load rndkey[1]
402	vst1.8	{q4},[r1]!
403	vorr	q0,q2,q2
404	vst1.8	{q5},[r1]!
405	vorr	q1,q3,q3
406	vst1.8	{q10},[r1]!
407	vorr	q10,q11,q11
408	bhs	.Loop3x_ecb_enc
409
410	cmn	r2,#0x30
411	beq	.Lecb_done
412	nop
413
414.Lecb_enc_tail:
415	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
416	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
417	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
418	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
419	vld1.32	{q8},[r7]!
420	subs	r6,r6,#2
421	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
422	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
423	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
424	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
425	vld1.32	{q9},[r7]!
426	bgt	.Lecb_enc_tail
427
428	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
429	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
430	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
431	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
432	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
433	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
434	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
435	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
436	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
437	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
438	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
439	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
440	cmn	r2,#0x20
441	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
442	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
443	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
444	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
445	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
446	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
447	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
448	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
449	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
450	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
451	beq	.Lecb_enc_one
452	veor	q5,q7,q1
453	veor	q9,q7,q10
454	vst1.8	{q5},[r1]!
455	vst1.8	{q9},[r1]!
456	b	.Lecb_done
457
458.Lecb_enc_one:
459	veor	q5,q7,q10
460	vst1.8	{q5},[r1]!
461	b	.Lecb_done
462.align	5
463.Lecb_dec:
464	vld1.8	{q1},[r0]!
465	subs	r2,r2,#32			@ bias
466	add	r6,r5,#2
467	vorr	q3,q1,q1
468	vorr	q10,q1,q1
469	vorr	q1,q0,q0
470	blo	.Lecb_dec_tail
471
472	vorr	q1,q3,q3
473	vld1.8	{q10},[r0]!
474.Loop3x_ecb_dec:
475	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
476	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
477	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
478	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
479	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
480	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
481	vld1.32	{q8},[r7]!
482	subs	r6,r6,#2
483	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
484	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
485	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
486	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
487	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
488	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
489	vld1.32	{q9},[r7]!
490	bgt	.Loop3x_ecb_dec
491
492	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
493	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
494	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
495	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
496	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
497	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
498	subs	r2,r2,#0x30
499	it	lo
500	movlo	r6,r2				@ r6, r6, is zero at this point
501	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
502	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
503	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
504	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
505	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
506	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
507	add	r0,r0,r6 			@ r0 is adjusted in such way that
508						@ at exit from the loop q1-q10
509						@ are loaded with last "words"
510	mov	r7,r3
511	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
512	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
513	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
514	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
515	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
516	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
517	vld1.8	{q2},[r0]!
518	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
519	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
520	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
521	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
522	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
523	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
524	vld1.8	{q3},[r0]!
525	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
526	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
527	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
528	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
529	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
530	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
531	vld1.8	{q11},[r0]!
532	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
533	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
534	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
535	vld1.32	{q8},[r7]!			@ re-pre-load rndkey[0]
536	add	r6,r5,#2
537	veor	q4,q7,q0
538	veor	q5,q7,q1
539	veor	q10,q10,q7
540	vld1.32	{q9},[r7]!			@ re-pre-load rndkey[1]
541	vst1.8	{q4},[r1]!
542	vorr	q0,q2,q2
543	vst1.8	{q5},[r1]!
544	vorr	q1,q3,q3
545	vst1.8	{q10},[r1]!
546	vorr	q10,q11,q11
547	bhs	.Loop3x_ecb_dec
548
549	cmn	r2,#0x30
550	beq	.Lecb_done
551	nop
552
553.Lecb_dec_tail:
554	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
555	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
556	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
557	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
558	vld1.32	{q8},[r7]!
559	subs	r6,r6,#2
560	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
561	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
562	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
563	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
564	vld1.32	{q9},[r7]!
565	bgt	.Lecb_dec_tail
566
567	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
568	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
569	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
570	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
571	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
572	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
573	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
574	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
575	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
576	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
577	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
578	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
579	cmn	r2,#0x20
580	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
581	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
582	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
583	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
584	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
585	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
586	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
587	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
588	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
589	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
590	beq	.Lecb_dec_one
591	veor	q5,q7,q1
592	veor	q9,q7,q10
593	vst1.8	{q5},[r1]!
594	vst1.8	{q9},[r1]!
595	b	.Lecb_done
596
597.Lecb_dec_one:
598	veor	q5,q7,q10
599	vst1.8	{q5},[r1]!
600
601.Lecb_done:
602	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
603	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
604.size	aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt
605.globl	aes_v8_cbc_encrypt
606.type	aes_v8_cbc_encrypt,%function
607.align	5
608aes_v8_cbc_encrypt:
609	mov	ip,sp
610	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
611	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
612	ldmia	ip,{r4,r5}		@ load remaining args
613	subs	r2,r2,#16
614	mov	r8,#16
615	blo	.Lcbc_abort
616	it	eq
617	moveq	r8,#0
618
619	cmp	r5,#0			@ en- or decrypting?
620	ldr	r5,[r3,#240]
621	and	r2,r2,#-16
622	vld1.8	{q6},[r4]
623	vld1.8	{q0},[r0],r8
624
625	vld1.32	{q8,q9},[r3]		@ load key schedule...
626	sub	r5,r5,#6
627	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
628	sub	r5,r5,#2
629	vld1.32	{q10,q11},[r7]!
630	vld1.32	{q12,q13},[r7]!
631	vld1.32	{q14,q15},[r7]!
632	vld1.32	{q7},[r7]
633
634	add	r7,r3,#32
635	mov	r6,r5
636	beq	.Lcbc_dec
637
638	cmp	r5,#2
639	veor	q0,q0,q6
640	veor	q5,q8,q7
641	beq	.Lcbc_enc128
642
643	vld1.32	{q2,q3},[r7]
644	add	r7,r3,#16
645	add	r6,r3,#16*4
646	add	r12,r3,#16*5
647	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
648	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
649	add	r14,r3,#16*6
650	add	r3,r3,#16*7
651	b	.Lenter_cbc_enc
652
653.align	4
654.Loop_cbc_enc:
655	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
656	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
657	vst1.8	{q6},[r1]!
658.Lenter_cbc_enc:
659	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
660	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
661	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
662	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
663	vld1.32	{q8},[r6]
664	cmp	r5,#4
665	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
666	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
667	vld1.32	{q9},[r12]
668	beq	.Lcbc_enc192
669
670	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
671	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
672	vld1.32	{q8},[r14]
673	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
674	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
675	vld1.32	{q9},[r3]
676	nop
677
678.Lcbc_enc192:
679	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
680	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
681	subs	r2,r2,#16
682	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
683	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
684	it	eq
685	moveq	r8,#0
686	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
687	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
688	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
689	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
690	vld1.8	{q8},[r0],r8
691	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
692	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
693	veor	q8,q8,q5
694	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
695	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
696	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
697	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
698	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
699	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
700	veor	q6,q0,q7
701	bhs	.Loop_cbc_enc
702
703	vst1.8	{q6},[r1]!
704	b	.Lcbc_done
705
706.align	5
707.Lcbc_enc128:
708	vld1.32	{q2,q3},[r7]
709	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
710	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
711	b	.Lenter_cbc_enc128
712.Loop_cbc_enc128:
713	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
714	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
715	vst1.8	{q6},[r1]!
716.Lenter_cbc_enc128:
717	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
718	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
719	subs	r2,r2,#16
720	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
721	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
722	it	eq
723	moveq	r8,#0
724	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
725	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
726	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
727	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
728	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
729	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
730	vld1.8	{q8},[r0],r8
731	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
732	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
733	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
734	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
735	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
736	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
737	veor	q8,q8,q5
738	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
739	veor	q6,q0,q7
740	bhs	.Loop_cbc_enc128
741
742	vst1.8	{q6},[r1]!
743	b	.Lcbc_done
744.align	5
745.Lcbc_dec:
746	vld1.8	{q10},[r0]!
747	subs	r2,r2,#32		@ bias
748	add	r6,r5,#2
749	vorr	q3,q0,q0
750	vorr	q1,q0,q0
751	vorr	q11,q10,q10
752	blo	.Lcbc_dec_tail
753
754	vorr	q1,q10,q10
755	vld1.8	{q10},[r0]!
756	vorr	q2,q0,q0
757	vorr	q3,q1,q1
758	vorr	q11,q10,q10
759.Loop3x_cbc_dec:
760	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
761	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
762	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
763	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
764	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
765	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
766	vld1.32	{q8},[r7]!
767	subs	r6,r6,#2
768	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
769	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
770	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
771	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
772	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
773	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
774	vld1.32	{q9},[r7]!
775	bgt	.Loop3x_cbc_dec
776
777	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
778	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
779	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
780	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
781	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
782	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
783	veor	q4,q6,q7
784	subs	r2,r2,#0x30
785	veor	q5,q2,q7
786	it	lo
787	movlo	r6,r2			@ r6, r6, is zero at this point
788	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
789	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
790	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
791	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
792	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
793	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
794	veor	q9,q3,q7
795	add	r0,r0,r6		@ r0 is adjusted in such way that
796					@ at exit from the loop q1-q10
797					@ are loaded with last "words"
798	vorr	q6,q11,q11
799	mov	r7,r3
800	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
801	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
802	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
803	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
804	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
805	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
806	vld1.8	{q2},[r0]!
807	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
808	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
809	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
810	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
811	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
812	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
813	vld1.8	{q3},[r0]!
814	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
815	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
816	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
817	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
818	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
819	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
820	vld1.8	{q11},[r0]!
821	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
822	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
823	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
824	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
825	add	r6,r5,#2
826	veor	q4,q4,q0
827	veor	q5,q5,q1
828	veor	q10,q10,q9
829	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
830	vst1.8	{q4},[r1]!
831	vorr	q0,q2,q2
832	vst1.8	{q5},[r1]!
833	vorr	q1,q3,q3
834	vst1.8	{q10},[r1]!
835	vorr	q10,q11,q11
836	bhs	.Loop3x_cbc_dec
837
838	cmn	r2,#0x30
839	beq	.Lcbc_done
840	nop
841
842.Lcbc_dec_tail:
843	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
844	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
845	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
846	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
847	vld1.32	{q8},[r7]!
848	subs	r6,r6,#2
849	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
850	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
851	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
852	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
853	vld1.32	{q9},[r7]!
854	bgt	.Lcbc_dec_tail
855
856	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
857	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
858	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
859	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
860	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
861	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
862	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
863	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
864	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
865	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
866	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
867	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
868	cmn	r2,#0x20
869	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
870	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
871	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
872	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
873	veor	q5,q6,q7
874	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
875	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
876	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
877	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
878	veor	q9,q3,q7
879	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
880	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
881	beq	.Lcbc_dec_one
882	veor	q5,q5,q1
883	veor	q9,q9,q10
884	vorr	q6,q11,q11
885	vst1.8	{q5},[r1]!
886	vst1.8	{q9},[r1]!
887	b	.Lcbc_done
888
889.Lcbc_dec_one:
890	veor	q5,q5,q10
891	vorr	q6,q11,q11
892	vst1.8	{q5},[r1]!
893
894.Lcbc_done:
895	vst1.8	{q6},[r4]
896.Lcbc_abort:
897	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
898	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
899.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
900.globl	aes_v8_ctr32_encrypt_blocks
901.type	aes_v8_ctr32_encrypt_blocks,%function
902.align	5
903aes_v8_ctr32_encrypt_blocks:
904	mov	ip,sp
905	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
906	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
907	ldr	r4, [ip]		@ load remaining arg
908	ldr	r5,[r3,#240]
909
910	ldr	r8, [r4, #12]
911#ifdef __ARMEB__
912	vld1.8	{q0},[r4]
913#else
914	vld1.32	{q0},[r4]
915#endif
916	vld1.32	{q8,q9},[r3]		@ load key schedule...
917	sub	r5,r5,#4
918	mov	r12,#16
919	cmp	r2,#2
920	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
921	sub	r5,r5,#2
922	vld1.32	{q12,q13},[r7]!
923	vld1.32	{q14,q15},[r7]!
924	vld1.32	{q7},[r7]
925	add	r7,r3,#32
926	mov	r6,r5
927	it	lo
928	movlo	r12,#0
929#ifndef __ARMEB__
930	rev	r8, r8
931#endif
932	add	r10, r8, #1
933	vorr	q6,q0,q0
934	rev	r10, r10
935	vmov.32	d13[1],r10
936	add	r8, r8, #2
937	vorr	q1,q6,q6
938	bls	.Lctr32_tail
939	rev	r12, r8
940	vmov.32	d13[1],r12
941	sub	r2,r2,#3		@ bias
942	vorr	q10,q6,q6
943	b	.Loop3x_ctr32
944
945.align	4
946.Loop3x_ctr32:
947	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
948	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
949	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
950	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
951	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
952	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
953	vld1.32	{q8},[r7]!
954	subs	r6,r6,#2
955	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
956	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
957	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
958	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
959	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
960	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
961	vld1.32	{q9},[r7]!
962	bgt	.Loop3x_ctr32
963
964	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
965	INST(0x80,0x83,0xb0,0xf3)	@ aesmc q4,q0
966	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
967	INST(0x82,0xa3,0xb0,0xf3)	@ aesmc q5,q1
968	vld1.8	{q2},[r0]!
969	add	r9,r8,#1
970	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
971	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
972	vld1.8	{q3},[r0]!
973	rev	r9,r9
974	INST(0x22,0x83,0xb0,0xf3)	@ aese q4,q9
975	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
976	INST(0x22,0xa3,0xb0,0xf3)	@ aese q5,q9
977	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
978	vld1.8	{q11},[r0]!
979	mov	r7,r3
980	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
981	INST(0xa4,0x23,0xf0,0xf3)	@ aesmc q9,q10
982	INST(0x28,0x83,0xb0,0xf3)	@ aese q4,q12
983	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
984	INST(0x28,0xa3,0xb0,0xf3)	@ aese q5,q12
985	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
986	veor	q2,q2,q7
987	add	r10,r8,#2
988	INST(0x28,0x23,0xf0,0xf3)	@ aese q9,q12
989	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
990	veor	q3,q3,q7
991	add	r8,r8,#3
992	INST(0x2a,0x83,0xb0,0xf3)	@ aese q4,q13
993	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
994	INST(0x2a,0xa3,0xb0,0xf3)	@ aese q5,q13
995	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
996	veor	q11,q11,q7
997	vmov.32	d13[1], r9
998	INST(0x2a,0x23,0xf0,0xf3)	@ aese q9,q13
999	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1000	vorr	q0,q6,q6
1001	rev	r10,r10
1002	INST(0x2c,0x83,0xb0,0xf3)	@ aese q4,q14
1003	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
1004	vmov.32	d13[1], r10
1005	rev	r12,r8
1006	INST(0x2c,0xa3,0xb0,0xf3)	@ aese q5,q14
1007	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
1008	vorr	q1,q6,q6
1009	vmov.32	d13[1], r12
1010	INST(0x2c,0x23,0xf0,0xf3)	@ aese q9,q14
1011	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1012	vorr	q10,q6,q6
1013	subs	r2,r2,#3
1014	INST(0x2e,0x83,0xb0,0xf3)	@ aese q4,q15
1015	INST(0x2e,0xa3,0xb0,0xf3)	@ aese q5,q15
1016	INST(0x2e,0x23,0xf0,0xf3)	@ aese q9,q15
1017
1018	veor	q2,q2,q4
1019	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
1020	vst1.8	{q2},[r1]!
1021	veor	q3,q3,q5
1022	mov	r6,r5
1023	vst1.8	{q3},[r1]!
1024	veor	q11,q11,q9
1025	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
1026	vst1.8	{q11},[r1]!
1027	bhs	.Loop3x_ctr32
1028
1029	adds	r2,r2,#3
1030	beq	.Lctr32_done
1031	cmp	r2,#1
1032	mov	r12,#16
1033	it	eq
1034	moveq	r12,#0
1035
1036.Lctr32_tail:
1037	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1038	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1039	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1040	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1041	vld1.32	{q8},[r7]!
1042	subs	r6,r6,#2
1043	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1044	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1045	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1046	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1047	vld1.32	{q9},[r7]!
1048	bgt	.Lctr32_tail
1049
1050	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1051	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1052	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1053	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1054	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1055	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1056	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1057	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1058	vld1.8	{q2},[r0],r12
1059	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
1060	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1061	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
1062	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1063	vld1.8	{q3},[r0]
1064	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
1065	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1066	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
1067	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1068	veor	q2,q2,q7
1069	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
1070	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1071	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
1072	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1073	veor	q3,q3,q7
1074	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
1075	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
1076
1077	cmp	r2,#1
1078	veor	q2,q2,q0
1079	veor	q3,q3,q1
1080	vst1.8	{q2},[r1]!
1081	beq	.Lctr32_done
1082	vst1.8	{q3},[r1]
1083
1084.Lctr32_done:
1085	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
1086	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
1087.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
1088#endif
1089