xref: /freebsd/sys/crypto/openssl/arm/aesv8-armx.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
2bc3d5698SJohn Baldwin#include "arm_arch.h"
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
5bc3d5698SJohn Baldwin.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
6bc3d5698SJohn Baldwin.fpu	neon
7*c0855eaaSJohn Baldwin#ifdef	__thumb2__
8*c0855eaaSJohn Baldwin.syntax	unified
9*c0855eaaSJohn Baldwin.thumb
10*c0855eaaSJohn Baldwin# define INST(a,b,c,d)	.byte	c,d|0xc,a,b
11*c0855eaaSJohn Baldwin#else
12bc3d5698SJohn Baldwin.code	32
13*c0855eaaSJohn Baldwin# define INST(a,b,c,d)	.byte	a,b,c,d
14*c0855eaaSJohn Baldwin#endif
15*c0855eaaSJohn Baldwin
16*c0855eaaSJohn Baldwin.text
17bc3d5698SJohn Baldwin.align	5
18bc3d5698SJohn Baldwin.Lrcon:
19bc3d5698SJohn Baldwin.long	0x01,0x01,0x01,0x01
20bc3d5698SJohn Baldwin.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
21bc3d5698SJohn Baldwin.long	0x1b,0x1b,0x1b,0x1b
22bc3d5698SJohn Baldwin
23bc3d5698SJohn Baldwin.globl	aes_v8_set_encrypt_key
24bc3d5698SJohn Baldwin.type	aes_v8_set_encrypt_key,%function
25bc3d5698SJohn Baldwin.align	5
26bc3d5698SJohn Baldwinaes_v8_set_encrypt_key:
27bc3d5698SJohn Baldwin.Lenc_key:
28bc3d5698SJohn Baldwin	mov	r3,#-1
29bc3d5698SJohn Baldwin	cmp	r0,#0
30bc3d5698SJohn Baldwin	beq	.Lenc_key_abort
31bc3d5698SJohn Baldwin	cmp	r2,#0
32bc3d5698SJohn Baldwin	beq	.Lenc_key_abort
33bc3d5698SJohn Baldwin	mov	r3,#-2
34bc3d5698SJohn Baldwin	cmp	r1,#128
35bc3d5698SJohn Baldwin	blt	.Lenc_key_abort
36bc3d5698SJohn Baldwin	cmp	r1,#256
37bc3d5698SJohn Baldwin	bgt	.Lenc_key_abort
38bc3d5698SJohn Baldwin	tst	r1,#0x3f
39bc3d5698SJohn Baldwin	bne	.Lenc_key_abort
40bc3d5698SJohn Baldwin
41bc3d5698SJohn Baldwin	adr	r3,.Lrcon
42bc3d5698SJohn Baldwin	cmp	r1,#192
43bc3d5698SJohn Baldwin
44bc3d5698SJohn Baldwin	veor	q0,q0,q0
45bc3d5698SJohn Baldwin	vld1.8	{q3},[r0]!
46bc3d5698SJohn Baldwin	mov	r1,#8		@ reuse r1
47bc3d5698SJohn Baldwin	vld1.32	{q1,q2},[r3]!
48bc3d5698SJohn Baldwin
49bc3d5698SJohn Baldwin	blt	.Loop128
50bc3d5698SJohn Baldwin	beq	.L192
51bc3d5698SJohn Baldwin	b	.L256
52bc3d5698SJohn Baldwin
53bc3d5698SJohn Baldwin.align	4
54bc3d5698SJohn Baldwin.Loop128:
55bc3d5698SJohn Baldwin	vtbl.8	d20,{q3},d4
56bc3d5698SJohn Baldwin	vtbl.8	d21,{q3},d5
57bc3d5698SJohn Baldwin	vext.8	q9,q0,q3,#12
58bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
59*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
60bc3d5698SJohn Baldwin	subs	r1,r1,#1
61bc3d5698SJohn Baldwin
62bc3d5698SJohn Baldwin	veor	q3,q3,q9
63bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
64bc3d5698SJohn Baldwin	veor	q3,q3,q9
65bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
66bc3d5698SJohn Baldwin	veor	q10,q10,q1
67bc3d5698SJohn Baldwin	veor	q3,q3,q9
68bc3d5698SJohn Baldwin	vshl.u8	q1,q1,#1
69bc3d5698SJohn Baldwin	veor	q3,q3,q10
70bc3d5698SJohn Baldwin	bne	.Loop128
71bc3d5698SJohn Baldwin
72bc3d5698SJohn Baldwin	vld1.32	{q1},[r3]
73bc3d5698SJohn Baldwin
74bc3d5698SJohn Baldwin	vtbl.8	d20,{q3},d4
75bc3d5698SJohn Baldwin	vtbl.8	d21,{q3},d5
76bc3d5698SJohn Baldwin	vext.8	q9,q0,q3,#12
77bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
78*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
79bc3d5698SJohn Baldwin
80bc3d5698SJohn Baldwin	veor	q3,q3,q9
81bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
82bc3d5698SJohn Baldwin	veor	q3,q3,q9
83bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
84bc3d5698SJohn Baldwin	veor	q10,q10,q1
85bc3d5698SJohn Baldwin	veor	q3,q3,q9
86bc3d5698SJohn Baldwin	vshl.u8	q1,q1,#1
87bc3d5698SJohn Baldwin	veor	q3,q3,q10
88bc3d5698SJohn Baldwin
89bc3d5698SJohn Baldwin	vtbl.8	d20,{q3},d4
90bc3d5698SJohn Baldwin	vtbl.8	d21,{q3},d5
91bc3d5698SJohn Baldwin	vext.8	q9,q0,q3,#12
92bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
93*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
94bc3d5698SJohn Baldwin
95bc3d5698SJohn Baldwin	veor	q3,q3,q9
96bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
97bc3d5698SJohn Baldwin	veor	q3,q3,q9
98bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
99bc3d5698SJohn Baldwin	veor	q10,q10,q1
100bc3d5698SJohn Baldwin	veor	q3,q3,q9
101bc3d5698SJohn Baldwin	veor	q3,q3,q10
102bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]
103bc3d5698SJohn Baldwin	add	r2,r2,#0x50
104bc3d5698SJohn Baldwin
105bc3d5698SJohn Baldwin	mov	r12,#10
106bc3d5698SJohn Baldwin	b	.Ldone
107bc3d5698SJohn Baldwin
108bc3d5698SJohn Baldwin.align	4
109bc3d5698SJohn Baldwin.L192:
110bc3d5698SJohn Baldwin	vld1.8	{d16},[r0]!
111bc3d5698SJohn Baldwin	vmov.i8	q10,#8			@ borrow q10
112bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
113bc3d5698SJohn Baldwin	vsub.i8	q2,q2,q10	@ adjust the mask
114bc3d5698SJohn Baldwin
115bc3d5698SJohn Baldwin.Loop192:
116bc3d5698SJohn Baldwin	vtbl.8	d20,{q8},d4
117bc3d5698SJohn Baldwin	vtbl.8	d21,{q8},d5
118bc3d5698SJohn Baldwin	vext.8	q9,q0,q3,#12
119c3c73b4fSJung-uk Kim#ifdef __ARMEB__
120c3c73b4fSJung-uk Kim	vst1.32	{q8},[r2]!
121c3c73b4fSJung-uk Kim	sub	r2,r2,#8
122c3c73b4fSJung-uk Kim#else
123bc3d5698SJohn Baldwin	vst1.32	{d16},[r2]!
124c3c73b4fSJung-uk Kim#endif
125*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
126bc3d5698SJohn Baldwin	subs	r1,r1,#1
127bc3d5698SJohn Baldwin
128bc3d5698SJohn Baldwin	veor	q3,q3,q9
129bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
130bc3d5698SJohn Baldwin	veor	q3,q3,q9
131bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
132bc3d5698SJohn Baldwin	veor	q3,q3,q9
133bc3d5698SJohn Baldwin
134bc3d5698SJohn Baldwin	vdup.32	q9,d7[1]
135bc3d5698SJohn Baldwin	veor	q9,q9,q8
136bc3d5698SJohn Baldwin	veor	q10,q10,q1
137bc3d5698SJohn Baldwin	vext.8	q8,q0,q8,#12
138bc3d5698SJohn Baldwin	vshl.u8	q1,q1,#1
139bc3d5698SJohn Baldwin	veor	q8,q8,q9
140bc3d5698SJohn Baldwin	veor	q3,q3,q10
141bc3d5698SJohn Baldwin	veor	q8,q8,q10
142bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
143bc3d5698SJohn Baldwin	bne	.Loop192
144bc3d5698SJohn Baldwin
145bc3d5698SJohn Baldwin	mov	r12,#12
146bc3d5698SJohn Baldwin	add	r2,r2,#0x20
147bc3d5698SJohn Baldwin	b	.Ldone
148bc3d5698SJohn Baldwin
149bc3d5698SJohn Baldwin.align	4
150bc3d5698SJohn Baldwin.L256:
151bc3d5698SJohn Baldwin	vld1.8	{q8},[r0]
152bc3d5698SJohn Baldwin	mov	r1,#7
153bc3d5698SJohn Baldwin	mov	r12,#14
154bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
155bc3d5698SJohn Baldwin
156bc3d5698SJohn Baldwin.Loop256:
157bc3d5698SJohn Baldwin	vtbl.8	d20,{q8},d4
158bc3d5698SJohn Baldwin	vtbl.8	d21,{q8},d5
159bc3d5698SJohn Baldwin	vext.8	q9,q0,q3,#12
160bc3d5698SJohn Baldwin	vst1.32	{q8},[r2]!
161*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
162bc3d5698SJohn Baldwin	subs	r1,r1,#1
163bc3d5698SJohn Baldwin
164bc3d5698SJohn Baldwin	veor	q3,q3,q9
165bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
166bc3d5698SJohn Baldwin	veor	q3,q3,q9
167bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
168bc3d5698SJohn Baldwin	veor	q10,q10,q1
169bc3d5698SJohn Baldwin	veor	q3,q3,q9
170bc3d5698SJohn Baldwin	vshl.u8	q1,q1,#1
171bc3d5698SJohn Baldwin	veor	q3,q3,q10
172bc3d5698SJohn Baldwin	vst1.32	{q3},[r2]!
173bc3d5698SJohn Baldwin	beq	.Ldone
174bc3d5698SJohn Baldwin
175bc3d5698SJohn Baldwin	vdup.32	q10,d7[1]
176bc3d5698SJohn Baldwin	vext.8	q9,q0,q8,#12
177*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xf0,0xf3)	@ aese q10,q0
178bc3d5698SJohn Baldwin
179bc3d5698SJohn Baldwin	veor	q8,q8,q9
180bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
181bc3d5698SJohn Baldwin	veor	q8,q8,q9
182bc3d5698SJohn Baldwin	vext.8	q9,q0,q9,#12
183bc3d5698SJohn Baldwin	veor	q8,q8,q9
184bc3d5698SJohn Baldwin
185bc3d5698SJohn Baldwin	veor	q8,q8,q10
186bc3d5698SJohn Baldwin	b	.Loop256
187bc3d5698SJohn Baldwin
188bc3d5698SJohn Baldwin.Ldone:
189bc3d5698SJohn Baldwin	str	r12,[r2]
190bc3d5698SJohn Baldwin	mov	r3,#0
191bc3d5698SJohn Baldwin
192bc3d5698SJohn Baldwin.Lenc_key_abort:
193bc3d5698SJohn Baldwin	mov	r0,r3			@ return value
194bc3d5698SJohn Baldwin
195bc3d5698SJohn Baldwin	bx	lr
196bc3d5698SJohn Baldwin.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
197bc3d5698SJohn Baldwin
198bc3d5698SJohn Baldwin.globl	aes_v8_set_decrypt_key
199bc3d5698SJohn Baldwin.type	aes_v8_set_decrypt_key,%function
200bc3d5698SJohn Baldwin.align	5
201bc3d5698SJohn Baldwinaes_v8_set_decrypt_key:
202bc3d5698SJohn Baldwin	stmdb	sp!,{r4,lr}
203bc3d5698SJohn Baldwin	bl	.Lenc_key
204bc3d5698SJohn Baldwin
205bc3d5698SJohn Baldwin	cmp	r0,#0
206bc3d5698SJohn Baldwin	bne	.Ldec_key_abort
207bc3d5698SJohn Baldwin
208bc3d5698SJohn Baldwin	sub	r2,r2,#240		@ restore original r2
209bc3d5698SJohn Baldwin	mov	r4,#-16
210bc3d5698SJohn Baldwin	add	r0,r2,r12,lsl#4	@ end of key schedule
211bc3d5698SJohn Baldwin
212bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]
213bc3d5698SJohn Baldwin	vld1.32	{q1},[r0]
214bc3d5698SJohn Baldwin	vst1.32	{q0},[r0],r4
215bc3d5698SJohn Baldwin	vst1.32	{q1},[r2]!
216bc3d5698SJohn Baldwin
217bc3d5698SJohn Baldwin.Loop_imc:
218bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]
219bc3d5698SJohn Baldwin	vld1.32	{q1},[r0]
220*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
221*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
222bc3d5698SJohn Baldwin	vst1.32	{q0},[r0],r4
223bc3d5698SJohn Baldwin	vst1.32	{q1},[r2]!
224bc3d5698SJohn Baldwin	cmp	r0,r2
225bc3d5698SJohn Baldwin	bhi	.Loop_imc
226bc3d5698SJohn Baldwin
227bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]
228*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
229bc3d5698SJohn Baldwin	vst1.32	{q0},[r0]
230bc3d5698SJohn Baldwin
231bc3d5698SJohn Baldwin	eor	r0,r0,r0		@ return value
232bc3d5698SJohn Baldwin.Ldec_key_abort:
233bc3d5698SJohn Baldwin	ldmia	sp!,{r4,pc}
234bc3d5698SJohn Baldwin.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
235bc3d5698SJohn Baldwin.globl	aes_v8_encrypt
236bc3d5698SJohn Baldwin.type	aes_v8_encrypt,%function
237bc3d5698SJohn Baldwin.align	5
238bc3d5698SJohn Baldwinaes_v8_encrypt:
239bc3d5698SJohn Baldwin	ldr	r3,[r2,#240]
240bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]!
241bc3d5698SJohn Baldwin	vld1.8	{q2},[r0]
242bc3d5698SJohn Baldwin	sub	r3,r3,#2
243bc3d5698SJohn Baldwin	vld1.32	{q1},[r2]!
244bc3d5698SJohn Baldwin
245bc3d5698SJohn Baldwin.Loop_enc:
246*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
247*c0855eaaSJohn Baldwin	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
248bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]!
249bc3d5698SJohn Baldwin	subs	r3,r3,#2
250*c0855eaaSJohn Baldwin	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
251*c0855eaaSJohn Baldwin	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
252bc3d5698SJohn Baldwin	vld1.32	{q1},[r2]!
253bc3d5698SJohn Baldwin	bgt	.Loop_enc
254bc3d5698SJohn Baldwin
255*c0855eaaSJohn Baldwin	INST(0x00,0x43,0xb0,0xf3)	@ aese q2,q0
256*c0855eaaSJohn Baldwin	INST(0x84,0x43,0xb0,0xf3)	@ aesmc q2,q2
257bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]
258*c0855eaaSJohn Baldwin	INST(0x02,0x43,0xb0,0xf3)	@ aese q2,q1
259bc3d5698SJohn Baldwin	veor	q2,q2,q0
260bc3d5698SJohn Baldwin
261bc3d5698SJohn Baldwin	vst1.8	{q2},[r1]
262bc3d5698SJohn Baldwin	bx	lr
263bc3d5698SJohn Baldwin.size	aes_v8_encrypt,.-aes_v8_encrypt
264bc3d5698SJohn Baldwin.globl	aes_v8_decrypt
265bc3d5698SJohn Baldwin.type	aes_v8_decrypt,%function
266bc3d5698SJohn Baldwin.align	5
267bc3d5698SJohn Baldwinaes_v8_decrypt:
268bc3d5698SJohn Baldwin	ldr	r3,[r2,#240]
269bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]!
270bc3d5698SJohn Baldwin	vld1.8	{q2},[r0]
271bc3d5698SJohn Baldwin	sub	r3,r3,#2
272bc3d5698SJohn Baldwin	vld1.32	{q1},[r2]!
273bc3d5698SJohn Baldwin
274bc3d5698SJohn Baldwin.Loop_dec:
275*c0855eaaSJohn Baldwin	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
276*c0855eaaSJohn Baldwin	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
277bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]!
278bc3d5698SJohn Baldwin	subs	r3,r3,#2
279*c0855eaaSJohn Baldwin	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
280*c0855eaaSJohn Baldwin	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
281bc3d5698SJohn Baldwin	vld1.32	{q1},[r2]!
282bc3d5698SJohn Baldwin	bgt	.Loop_dec
283bc3d5698SJohn Baldwin
284*c0855eaaSJohn Baldwin	INST(0x40,0x43,0xb0,0xf3)	@ aesd q2,q0
285*c0855eaaSJohn Baldwin	INST(0xc4,0x43,0xb0,0xf3)	@ aesimc q2,q2
286bc3d5698SJohn Baldwin	vld1.32	{q0},[r2]
287*c0855eaaSJohn Baldwin	INST(0x42,0x43,0xb0,0xf3)	@ aesd q2,q1
288bc3d5698SJohn Baldwin	veor	q2,q2,q0
289bc3d5698SJohn Baldwin
290bc3d5698SJohn Baldwin	vst1.8	{q2},[r1]
291bc3d5698SJohn Baldwin	bx	lr
292bc3d5698SJohn Baldwin.size	aes_v8_decrypt,.-aes_v8_decrypt
293*c0855eaaSJohn Baldwin.globl	aes_v8_ecb_encrypt
294*c0855eaaSJohn Baldwin.type	aes_v8_ecb_encrypt,%function
295*c0855eaaSJohn Baldwin.align	5
296*c0855eaaSJohn Baldwinaes_v8_ecb_encrypt:
297*c0855eaaSJohn Baldwin	mov	ip,sp
298*c0855eaaSJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
299*c0855eaaSJohn Baldwin	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}			@ ABI specification says so
300*c0855eaaSJohn Baldwin	ldmia	ip,{r4,r5}			@ load remaining args
301*c0855eaaSJohn Baldwin	subs	r2,r2,#16
302*c0855eaaSJohn Baldwin	mov	r8,#16
303*c0855eaaSJohn Baldwin	blo	.Lecb_done
304*c0855eaaSJohn Baldwin	it	eq
305*c0855eaaSJohn Baldwin	moveq	r8,#0
306*c0855eaaSJohn Baldwin
307*c0855eaaSJohn Baldwin	cmp	r4,#0					@ en- or decrypting?
308*c0855eaaSJohn Baldwin	ldr	r5,[r3,#240]
309*c0855eaaSJohn Baldwin	and	r2,r2,#-16
310*c0855eaaSJohn Baldwin	vld1.8	{q0},[r0],r8
311*c0855eaaSJohn Baldwin
312*c0855eaaSJohn Baldwin	vld1.32	{q8,q9},[r3]				@ load key schedule...
313*c0855eaaSJohn Baldwin	sub	r5,r5,#6
314*c0855eaaSJohn Baldwin	add	r7,r3,r5,lsl#4				@ pointer to last 7 round keys
315*c0855eaaSJohn Baldwin	sub	r5,r5,#2
316*c0855eaaSJohn Baldwin	vld1.32	{q10,q11},[r7]!
317*c0855eaaSJohn Baldwin	vld1.32	{q12,q13},[r7]!
318*c0855eaaSJohn Baldwin	vld1.32	{q14,q15},[r7]!
319*c0855eaaSJohn Baldwin	vld1.32	{q7},[r7]
320*c0855eaaSJohn Baldwin
321*c0855eaaSJohn Baldwin	add	r7,r3,#32
322*c0855eaaSJohn Baldwin	mov	r6,r5
323*c0855eaaSJohn Baldwin	beq	.Lecb_dec
324*c0855eaaSJohn Baldwin
325*c0855eaaSJohn Baldwin	vld1.8	{q1},[r0]!
326*c0855eaaSJohn Baldwin	subs	r2,r2,#32				@ bias
327*c0855eaaSJohn Baldwin	add	r6,r5,#2
328*c0855eaaSJohn Baldwin	vorr	q3,q1,q1
329*c0855eaaSJohn Baldwin	vorr	q10,q1,q1
330*c0855eaaSJohn Baldwin	vorr	q1,q0,q0
331*c0855eaaSJohn Baldwin	blo	.Lecb_enc_tail
332*c0855eaaSJohn Baldwin
333*c0855eaaSJohn Baldwin	vorr	q1,q3,q3
334*c0855eaaSJohn Baldwin	vld1.8	{q10},[r0]!
335*c0855eaaSJohn Baldwin.Loop3x_ecb_enc:
336*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
337*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
338*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
339*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
340*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
341*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
342*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!
343*c0855eaaSJohn Baldwin	subs	r6,r6,#2
344*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
345*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
346*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
347*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
348*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
349*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
350*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!
351*c0855eaaSJohn Baldwin	bgt	.Loop3x_ecb_enc
352*c0855eaaSJohn Baldwin
353*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
354*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
355*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
356*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
357*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
358*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
359*c0855eaaSJohn Baldwin	subs	r2,r2,#0x30
360*c0855eaaSJohn Baldwin	it	lo
361*c0855eaaSJohn Baldwin	movlo	r6,r2				@ r6, r6, is zero at this point
362*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
363*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
364*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
365*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
366*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
367*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
368*c0855eaaSJohn Baldwin	add	r0,r0,r6			@ r0 is adjusted in such way that
369*c0855eaaSJohn Baldwin						@ at exit from the loop q1-q10
370*c0855eaaSJohn Baldwin						@ are loaded with last "words"
371*c0855eaaSJohn Baldwin	mov	r7,r3
372*c0855eaaSJohn Baldwin	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
373*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
374*c0855eaaSJohn Baldwin	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
375*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
376*c0855eaaSJohn Baldwin	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
377*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
378*c0855eaaSJohn Baldwin	vld1.8	{q2},[r0]!
379*c0855eaaSJohn Baldwin	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
380*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
381*c0855eaaSJohn Baldwin	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
382*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
383*c0855eaaSJohn Baldwin	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
384*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
385*c0855eaaSJohn Baldwin	vld1.8	{q3},[r0]!
386*c0855eaaSJohn Baldwin	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
387*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
388*c0855eaaSJohn Baldwin	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
389*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
390*c0855eaaSJohn Baldwin	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
391*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
392*c0855eaaSJohn Baldwin	vld1.8	{q11},[r0]!
393*c0855eaaSJohn Baldwin	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
394*c0855eaaSJohn Baldwin	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
395*c0855eaaSJohn Baldwin	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
396*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!		@ re-pre-load rndkey[0]
397*c0855eaaSJohn Baldwin	add	r6,r5,#2
398*c0855eaaSJohn Baldwin	veor	q4,q7,q0
399*c0855eaaSJohn Baldwin	veor	q5,q7,q1
400*c0855eaaSJohn Baldwin	veor	q10,q10,q7
401*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!		@ re-pre-load rndkey[1]
402*c0855eaaSJohn Baldwin	vst1.8	{q4},[r1]!
403*c0855eaaSJohn Baldwin	vorr	q0,q2,q2
404*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
405*c0855eaaSJohn Baldwin	vorr	q1,q3,q3
406*c0855eaaSJohn Baldwin	vst1.8	{q10},[r1]!
407*c0855eaaSJohn Baldwin	vorr	q10,q11,q11
408*c0855eaaSJohn Baldwin	bhs	.Loop3x_ecb_enc
409*c0855eaaSJohn Baldwin
410*c0855eaaSJohn Baldwin	cmn	r2,#0x30
411*c0855eaaSJohn Baldwin	beq	.Lecb_done
412*c0855eaaSJohn Baldwin	nop
413*c0855eaaSJohn Baldwin
414*c0855eaaSJohn Baldwin.Lecb_enc_tail:
415*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
416*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
417*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
418*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
419*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!
420*c0855eaaSJohn Baldwin	subs	r6,r6,#2
421*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
422*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
423*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
424*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
425*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!
426*c0855eaaSJohn Baldwin	bgt	.Lecb_enc_tail
427*c0855eaaSJohn Baldwin
428*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
429*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
430*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
431*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
432*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
433*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
434*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
435*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
436*c0855eaaSJohn Baldwin	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
437*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
438*c0855eaaSJohn Baldwin	INST(0x28,0x43,0xf0,0xf3)	@ aese q10,q12
439*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
440*c0855eaaSJohn Baldwin	cmn	r2,#0x20
441*c0855eaaSJohn Baldwin	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
442*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
443*c0855eaaSJohn Baldwin	INST(0x2a,0x43,0xf0,0xf3)	@ aese q10,q13
444*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
445*c0855eaaSJohn Baldwin	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
446*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
447*c0855eaaSJohn Baldwin	INST(0x2c,0x43,0xf0,0xf3)	@ aese q10,q14
448*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
449*c0855eaaSJohn Baldwin	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
450*c0855eaaSJohn Baldwin	INST(0x2e,0x43,0xf0,0xf3)	@ aese q10,q15
451*c0855eaaSJohn Baldwin	beq	.Lecb_enc_one
452*c0855eaaSJohn Baldwin	veor	q5,q7,q1
453*c0855eaaSJohn Baldwin	veor	q9,q7,q10
454*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
455*c0855eaaSJohn Baldwin	vst1.8	{q9},[r1]!
456*c0855eaaSJohn Baldwin	b	.Lecb_done
457*c0855eaaSJohn Baldwin
458*c0855eaaSJohn Baldwin.Lecb_enc_one:
459*c0855eaaSJohn Baldwin	veor	q5,q7,q10
460*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
461*c0855eaaSJohn Baldwin	b	.Lecb_done
462*c0855eaaSJohn Baldwin.align	5
463*c0855eaaSJohn Baldwin.Lecb_dec:
464*c0855eaaSJohn Baldwin	vld1.8	{q1},[r0]!
465*c0855eaaSJohn Baldwin	subs	r2,r2,#32			@ bias
466*c0855eaaSJohn Baldwin	add	r6,r5,#2
467*c0855eaaSJohn Baldwin	vorr	q3,q1,q1
468*c0855eaaSJohn Baldwin	vorr	q10,q1,q1
469*c0855eaaSJohn Baldwin	vorr	q1,q0,q0
470*c0855eaaSJohn Baldwin	blo	.Lecb_dec_tail
471*c0855eaaSJohn Baldwin
472*c0855eaaSJohn Baldwin	vorr	q1,q3,q3
473*c0855eaaSJohn Baldwin	vld1.8	{q10},[r0]!
474*c0855eaaSJohn Baldwin.Loop3x_ecb_dec:
475*c0855eaaSJohn Baldwin	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
476*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
477*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
478*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
479*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
480*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
481*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!
482*c0855eaaSJohn Baldwin	subs	r6,r6,#2
483*c0855eaaSJohn Baldwin	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
484*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
485*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
486*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
487*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
488*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
489*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!
490*c0855eaaSJohn Baldwin	bgt	.Loop3x_ecb_dec
491*c0855eaaSJohn Baldwin
492*c0855eaaSJohn Baldwin	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
493*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
494*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
495*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
496*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
497*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
498*c0855eaaSJohn Baldwin	subs	r2,r2,#0x30
499*c0855eaaSJohn Baldwin	it	lo
500*c0855eaaSJohn Baldwin	movlo	r6,r2				@ r6, r6, is zero at this point
501*c0855eaaSJohn Baldwin	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
502*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
503*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
504*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
505*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
506*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
507*c0855eaaSJohn Baldwin	add	r0,r0,r6 			@ r0 is adjusted in such way that
508*c0855eaaSJohn Baldwin						@ at exit from the loop q1-q10
509*c0855eaaSJohn Baldwin						@ are loaded with last "words"
510*c0855eaaSJohn Baldwin	mov	r7,r3
511*c0855eaaSJohn Baldwin	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
512*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
513*c0855eaaSJohn Baldwin	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
514*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
515*c0855eaaSJohn Baldwin	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
516*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
517*c0855eaaSJohn Baldwin	vld1.8	{q2},[r0]!
518*c0855eaaSJohn Baldwin	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
519*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
520*c0855eaaSJohn Baldwin	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
521*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
522*c0855eaaSJohn Baldwin	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
523*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
524*c0855eaaSJohn Baldwin	vld1.8	{q3},[r0]!
525*c0855eaaSJohn Baldwin	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
526*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
527*c0855eaaSJohn Baldwin	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
528*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
529*c0855eaaSJohn Baldwin	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
530*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
531*c0855eaaSJohn Baldwin	vld1.8	{q11},[r0]!
532*c0855eaaSJohn Baldwin	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
533*c0855eaaSJohn Baldwin	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
534*c0855eaaSJohn Baldwin	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
535*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!			@ re-pre-load rndkey[0]
536*c0855eaaSJohn Baldwin	add	r6,r5,#2
537*c0855eaaSJohn Baldwin	veor	q4,q7,q0
538*c0855eaaSJohn Baldwin	veor	q5,q7,q1
539*c0855eaaSJohn Baldwin	veor	q10,q10,q7
540*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!			@ re-pre-load rndkey[1]
541*c0855eaaSJohn Baldwin	vst1.8	{q4},[r1]!
542*c0855eaaSJohn Baldwin	vorr	q0,q2,q2
543*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
544*c0855eaaSJohn Baldwin	vorr	q1,q3,q3
545*c0855eaaSJohn Baldwin	vst1.8	{q10},[r1]!
546*c0855eaaSJohn Baldwin	vorr	q10,q11,q11
547*c0855eaaSJohn Baldwin	bhs	.Loop3x_ecb_dec
548*c0855eaaSJohn Baldwin
549*c0855eaaSJohn Baldwin	cmn	r2,#0x30
550*c0855eaaSJohn Baldwin	beq	.Lecb_done
551*c0855eaaSJohn Baldwin	nop
552*c0855eaaSJohn Baldwin
553*c0855eaaSJohn Baldwin.Lecb_dec_tail:
554*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
555*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
556*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
557*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
558*c0855eaaSJohn Baldwin	vld1.32	{q8},[r7]!
559*c0855eaaSJohn Baldwin	subs	r6,r6,#2
560*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
561*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
562*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
563*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
564*c0855eaaSJohn Baldwin	vld1.32	{q9},[r7]!
565*c0855eaaSJohn Baldwin	bgt	.Lecb_dec_tail
566*c0855eaaSJohn Baldwin
567*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
568*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
569*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
570*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
571*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
572*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
573*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
574*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
575*c0855eaaSJohn Baldwin	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
576*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
577*c0855eaaSJohn Baldwin	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
578*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
579*c0855eaaSJohn Baldwin	cmn	r2,#0x20
580*c0855eaaSJohn Baldwin	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
581*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
582*c0855eaaSJohn Baldwin	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
583*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
584*c0855eaaSJohn Baldwin	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
585*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
586*c0855eaaSJohn Baldwin	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
587*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
588*c0855eaaSJohn Baldwin	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
589*c0855eaaSJohn Baldwin	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
590*c0855eaaSJohn Baldwin	beq	.Lecb_dec_one
591*c0855eaaSJohn Baldwin	veor	q5,q7,q1
592*c0855eaaSJohn Baldwin	veor	q9,q7,q10
593*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
594*c0855eaaSJohn Baldwin	vst1.8	{q9},[r1]!
595*c0855eaaSJohn Baldwin	b	.Lecb_done
596*c0855eaaSJohn Baldwin
597*c0855eaaSJohn Baldwin.Lecb_dec_one:
598*c0855eaaSJohn Baldwin	veor	q5,q7,q10
599*c0855eaaSJohn Baldwin	vst1.8	{q5},[r1]!
600*c0855eaaSJohn Baldwin
601*c0855eaaSJohn Baldwin.Lecb_done:
602*c0855eaaSJohn Baldwin	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
603*c0855eaaSJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
604*c0855eaaSJohn Baldwin.size	aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt
605bc3d5698SJohn Baldwin.globl	aes_v8_cbc_encrypt
606bc3d5698SJohn Baldwin.type	aes_v8_cbc_encrypt,%function
607bc3d5698SJohn Baldwin.align	5
608bc3d5698SJohn Baldwinaes_v8_cbc_encrypt:
609bc3d5698SJohn Baldwin	mov	ip,sp
610bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
611bc3d5698SJohn Baldwin	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
612bc3d5698SJohn Baldwin	ldmia	ip,{r4,r5}		@ load remaining args
613bc3d5698SJohn Baldwin	subs	r2,r2,#16
614bc3d5698SJohn Baldwin	mov	r8,#16
615bc3d5698SJohn Baldwin	blo	.Lcbc_abort
616*c0855eaaSJohn Baldwin	it	eq
617bc3d5698SJohn Baldwin	moveq	r8,#0
618bc3d5698SJohn Baldwin
619bc3d5698SJohn Baldwin	cmp	r5,#0			@ en- or decrypting?
620bc3d5698SJohn Baldwin	ldr	r5,[r3,#240]
621bc3d5698SJohn Baldwin	and	r2,r2,#-16
622bc3d5698SJohn Baldwin	vld1.8	{q6},[r4]
623bc3d5698SJohn Baldwin	vld1.8	{q0},[r0],r8
624bc3d5698SJohn Baldwin
625bc3d5698SJohn Baldwin	vld1.32	{q8,q9},[r3]		@ load key schedule...
626bc3d5698SJohn Baldwin	sub	r5,r5,#6
627bc3d5698SJohn Baldwin	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
628bc3d5698SJohn Baldwin	sub	r5,r5,#2
629bc3d5698SJohn Baldwin	vld1.32	{q10,q11},[r7]!
630bc3d5698SJohn Baldwin	vld1.32	{q12,q13},[r7]!
631bc3d5698SJohn Baldwin	vld1.32	{q14,q15},[r7]!
632bc3d5698SJohn Baldwin	vld1.32	{q7},[r7]
633bc3d5698SJohn Baldwin
634bc3d5698SJohn Baldwin	add	r7,r3,#32
635bc3d5698SJohn Baldwin	mov	r6,r5
636bc3d5698SJohn Baldwin	beq	.Lcbc_dec
637bc3d5698SJohn Baldwin
638bc3d5698SJohn Baldwin	cmp	r5,#2
639bc3d5698SJohn Baldwin	veor	q0,q0,q6
640bc3d5698SJohn Baldwin	veor	q5,q8,q7
641bc3d5698SJohn Baldwin	beq	.Lcbc_enc128
642bc3d5698SJohn Baldwin
643bc3d5698SJohn Baldwin	vld1.32	{q2,q3},[r7]
644bc3d5698SJohn Baldwin	add	r7,r3,#16
645bc3d5698SJohn Baldwin	add	r6,r3,#16*4
646bc3d5698SJohn Baldwin	add	r12,r3,#16*5
647*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
648*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
649bc3d5698SJohn Baldwin	add	r14,r3,#16*6
650bc3d5698SJohn Baldwin	add	r3,r3,#16*7
651bc3d5698SJohn Baldwin	b	.Lenter_cbc_enc
652bc3d5698SJohn Baldwin
653bc3d5698SJohn Baldwin.align	4
654bc3d5698SJohn Baldwin.Loop_cbc_enc:
655*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
656*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
657bc3d5698SJohn Baldwin	vst1.8	{q6},[r1]!
658bc3d5698SJohn Baldwin.Lenter_cbc_enc:
659*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
660*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
661*c0855eaaSJohn Baldwin	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
662*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
663bc3d5698SJohn Baldwin	vld1.32	{q8},[r6]
664bc3d5698SJohn Baldwin	cmp	r5,#4
665*c0855eaaSJohn Baldwin	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
666*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
667bc3d5698SJohn Baldwin	vld1.32	{q9},[r12]
668bc3d5698SJohn Baldwin	beq	.Lcbc_enc192
669bc3d5698SJohn Baldwin
670*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
671*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
672bc3d5698SJohn Baldwin	vld1.32	{q8},[r14]
673*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
674*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
675bc3d5698SJohn Baldwin	vld1.32	{q9},[r3]
676bc3d5698SJohn Baldwin	nop
677bc3d5698SJohn Baldwin
678bc3d5698SJohn Baldwin.Lcbc_enc192:
679*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
680*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
681bc3d5698SJohn Baldwin	subs	r2,r2,#16
682*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
683*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
684*c0855eaaSJohn Baldwin	it	eq
685bc3d5698SJohn Baldwin	moveq	r8,#0
686*c0855eaaSJohn Baldwin	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
687*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
688*c0855eaaSJohn Baldwin	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
689*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
690bc3d5698SJohn Baldwin	vld1.8	{q8},[r0],r8
691*c0855eaaSJohn Baldwin	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
692*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
693bc3d5698SJohn Baldwin	veor	q8,q8,q5
694*c0855eaaSJohn Baldwin	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
695*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
696bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
697*c0855eaaSJohn Baldwin	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
698*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
699*c0855eaaSJohn Baldwin	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
700bc3d5698SJohn Baldwin	veor	q6,q0,q7
701bc3d5698SJohn Baldwin	bhs	.Loop_cbc_enc
702bc3d5698SJohn Baldwin
703bc3d5698SJohn Baldwin	vst1.8	{q6},[r1]!
704bc3d5698SJohn Baldwin	b	.Lcbc_done
705bc3d5698SJohn Baldwin
706bc3d5698SJohn Baldwin.align	5
707bc3d5698SJohn Baldwin.Lcbc_enc128:
708bc3d5698SJohn Baldwin	vld1.32	{q2,q3},[r7]
709*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
710*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
711bc3d5698SJohn Baldwin	b	.Lenter_cbc_enc128
712bc3d5698SJohn Baldwin.Loop_cbc_enc128:
713*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
714*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
715bc3d5698SJohn Baldwin	vst1.8	{q6},[r1]!
716bc3d5698SJohn Baldwin.Lenter_cbc_enc128:
717*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
718*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
719bc3d5698SJohn Baldwin	subs	r2,r2,#16
720*c0855eaaSJohn Baldwin	INST(0x04,0x03,0xb0,0xf3)	@ aese q0,q2
721*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
722*c0855eaaSJohn Baldwin	it	eq
723bc3d5698SJohn Baldwin	moveq	r8,#0
724*c0855eaaSJohn Baldwin	INST(0x06,0x03,0xb0,0xf3)	@ aese q0,q3
725*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
726*c0855eaaSJohn Baldwin	INST(0x24,0x03,0xb0,0xf3)	@ aese q0,q10
727*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
728*c0855eaaSJohn Baldwin	INST(0x26,0x03,0xb0,0xf3)	@ aese q0,q11
729*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
730bc3d5698SJohn Baldwin	vld1.8	{q8},[r0],r8
731*c0855eaaSJohn Baldwin	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
732*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
733*c0855eaaSJohn Baldwin	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
734*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
735*c0855eaaSJohn Baldwin	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
736*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
737bc3d5698SJohn Baldwin	veor	q8,q8,q5
738*c0855eaaSJohn Baldwin	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
739bc3d5698SJohn Baldwin	veor	q6,q0,q7
740bc3d5698SJohn Baldwin	bhs	.Loop_cbc_enc128
741bc3d5698SJohn Baldwin
742bc3d5698SJohn Baldwin	vst1.8	{q6},[r1]!
743bc3d5698SJohn Baldwin	b	.Lcbc_done
744bc3d5698SJohn Baldwin.align	5
745bc3d5698SJohn Baldwin.Lcbc_dec:
746bc3d5698SJohn Baldwin	vld1.8	{q10},[r0]!
747bc3d5698SJohn Baldwin	subs	r2,r2,#32		@ bias
748bc3d5698SJohn Baldwin	add	r6,r5,#2
749bc3d5698SJohn Baldwin	vorr	q3,q0,q0
750bc3d5698SJohn Baldwin	vorr	q1,q0,q0
751bc3d5698SJohn Baldwin	vorr	q11,q10,q10
752bc3d5698SJohn Baldwin	blo	.Lcbc_dec_tail
753bc3d5698SJohn Baldwin
754bc3d5698SJohn Baldwin	vorr	q1,q10,q10
755bc3d5698SJohn Baldwin	vld1.8	{q10},[r0]!
756bc3d5698SJohn Baldwin	vorr	q2,q0,q0
757bc3d5698SJohn Baldwin	vorr	q3,q1,q1
758bc3d5698SJohn Baldwin	vorr	q11,q10,q10
759bc3d5698SJohn Baldwin.Loop3x_cbc_dec:
760*c0855eaaSJohn Baldwin	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
761*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
762*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
763*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
764*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
765*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
766bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!
767bc3d5698SJohn Baldwin	subs	r6,r6,#2
768*c0855eaaSJohn Baldwin	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
769*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
770*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
771*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
772*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
773*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
774bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!
775bc3d5698SJohn Baldwin	bgt	.Loop3x_cbc_dec
776bc3d5698SJohn Baldwin
777*c0855eaaSJohn Baldwin	INST(0x60,0x03,0xb0,0xf3)	@ aesd q0,q8
778*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
779*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
780*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
781*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
782*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
783bc3d5698SJohn Baldwin	veor	q4,q6,q7
784bc3d5698SJohn Baldwin	subs	r2,r2,#0x30
785bc3d5698SJohn Baldwin	veor	q5,q2,q7
786*c0855eaaSJohn Baldwin	it	lo
787bc3d5698SJohn Baldwin	movlo	r6,r2			@ r6, r6, is zero at this point
788*c0855eaaSJohn Baldwin	INST(0x62,0x03,0xb0,0xf3)	@ aesd q0,q9
789*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
790*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
791*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
792*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
793*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
794bc3d5698SJohn Baldwin	veor	q9,q3,q7
795bc3d5698SJohn Baldwin	add	r0,r0,r6		@ r0 is adjusted in such way that
796bc3d5698SJohn Baldwin					@ at exit from the loop q1-q10
797bc3d5698SJohn Baldwin					@ are loaded with last "words"
798bc3d5698SJohn Baldwin	vorr	q6,q11,q11
799bc3d5698SJohn Baldwin	mov	r7,r3
800*c0855eaaSJohn Baldwin	INST(0x68,0x03,0xb0,0xf3)	@ aesd q0,q12
801*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
802*c0855eaaSJohn Baldwin	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
803*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
804*c0855eaaSJohn Baldwin	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
805*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
806bc3d5698SJohn Baldwin	vld1.8	{q2},[r0]!
807*c0855eaaSJohn Baldwin	INST(0x6a,0x03,0xb0,0xf3)	@ aesd q0,q13
808*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
809*c0855eaaSJohn Baldwin	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
810*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
811*c0855eaaSJohn Baldwin	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
812*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
813bc3d5698SJohn Baldwin	vld1.8	{q3},[r0]!
814*c0855eaaSJohn Baldwin	INST(0x6c,0x03,0xb0,0xf3)	@ aesd q0,q14
815*c0855eaaSJohn Baldwin	INST(0xc0,0x03,0xb0,0xf3)	@ aesimc q0,q0
816*c0855eaaSJohn Baldwin	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
817*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
818*c0855eaaSJohn Baldwin	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
819*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
820bc3d5698SJohn Baldwin	vld1.8	{q11},[r0]!
821*c0855eaaSJohn Baldwin	INST(0x6e,0x03,0xb0,0xf3)	@ aesd q0,q15
822*c0855eaaSJohn Baldwin	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
823*c0855eaaSJohn Baldwin	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
824bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
825bc3d5698SJohn Baldwin	add	r6,r5,#2
826bc3d5698SJohn Baldwin	veor	q4,q4,q0
827bc3d5698SJohn Baldwin	veor	q5,q5,q1
828bc3d5698SJohn Baldwin	veor	q10,q10,q9
829bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
830bc3d5698SJohn Baldwin	vst1.8	{q4},[r1]!
831bc3d5698SJohn Baldwin	vorr	q0,q2,q2
832bc3d5698SJohn Baldwin	vst1.8	{q5},[r1]!
833bc3d5698SJohn Baldwin	vorr	q1,q3,q3
834bc3d5698SJohn Baldwin	vst1.8	{q10},[r1]!
835bc3d5698SJohn Baldwin	vorr	q10,q11,q11
836bc3d5698SJohn Baldwin	bhs	.Loop3x_cbc_dec
837bc3d5698SJohn Baldwin
838bc3d5698SJohn Baldwin	cmn	r2,#0x30
839bc3d5698SJohn Baldwin	beq	.Lcbc_done
840bc3d5698SJohn Baldwin	nop
841bc3d5698SJohn Baldwin
842bc3d5698SJohn Baldwin.Lcbc_dec_tail:
843*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
844*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
845*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
846*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
847bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!
848bc3d5698SJohn Baldwin	subs	r6,r6,#2
849*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
850*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
851*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
852*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
853bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!
854bc3d5698SJohn Baldwin	bgt	.Lcbc_dec_tail
855bc3d5698SJohn Baldwin
856*c0855eaaSJohn Baldwin	INST(0x60,0x23,0xb0,0xf3)	@ aesd q1,q8
857*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
858*c0855eaaSJohn Baldwin	INST(0x60,0x43,0xf0,0xf3)	@ aesd q10,q8
859*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
860*c0855eaaSJohn Baldwin	INST(0x62,0x23,0xb0,0xf3)	@ aesd q1,q9
861*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
862*c0855eaaSJohn Baldwin	INST(0x62,0x43,0xf0,0xf3)	@ aesd q10,q9
863*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
864*c0855eaaSJohn Baldwin	INST(0x68,0x23,0xb0,0xf3)	@ aesd q1,q12
865*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
866*c0855eaaSJohn Baldwin	INST(0x68,0x43,0xf0,0xf3)	@ aesd q10,q12
867*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
868bc3d5698SJohn Baldwin	cmn	r2,#0x20
869*c0855eaaSJohn Baldwin	INST(0x6a,0x23,0xb0,0xf3)	@ aesd q1,q13
870*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
871*c0855eaaSJohn Baldwin	INST(0x6a,0x43,0xf0,0xf3)	@ aesd q10,q13
872*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
873bc3d5698SJohn Baldwin	veor	q5,q6,q7
874*c0855eaaSJohn Baldwin	INST(0x6c,0x23,0xb0,0xf3)	@ aesd q1,q14
875*c0855eaaSJohn Baldwin	INST(0xc2,0x23,0xb0,0xf3)	@ aesimc q1,q1
876*c0855eaaSJohn Baldwin	INST(0x6c,0x43,0xf0,0xf3)	@ aesd q10,q14
877*c0855eaaSJohn Baldwin	INST(0xe4,0x43,0xf0,0xf3)	@ aesimc q10,q10
878bc3d5698SJohn Baldwin	veor	q9,q3,q7
879*c0855eaaSJohn Baldwin	INST(0x6e,0x23,0xb0,0xf3)	@ aesd q1,q15
880*c0855eaaSJohn Baldwin	INST(0x6e,0x43,0xf0,0xf3)	@ aesd q10,q15
881bc3d5698SJohn Baldwin	beq	.Lcbc_dec_one
882bc3d5698SJohn Baldwin	veor	q5,q5,q1
883bc3d5698SJohn Baldwin	veor	q9,q9,q10
884bc3d5698SJohn Baldwin	vorr	q6,q11,q11
885bc3d5698SJohn Baldwin	vst1.8	{q5},[r1]!
886bc3d5698SJohn Baldwin	vst1.8	{q9},[r1]!
887bc3d5698SJohn Baldwin	b	.Lcbc_done
888bc3d5698SJohn Baldwin
889bc3d5698SJohn Baldwin.Lcbc_dec_one:
890bc3d5698SJohn Baldwin	veor	q5,q5,q10
891bc3d5698SJohn Baldwin	vorr	q6,q11,q11
892bc3d5698SJohn Baldwin	vst1.8	{q5},[r1]!
893bc3d5698SJohn Baldwin
894bc3d5698SJohn Baldwin.Lcbc_done:
895bc3d5698SJohn Baldwin	vst1.8	{q6},[r4]
896bc3d5698SJohn Baldwin.Lcbc_abort:
897bc3d5698SJohn Baldwin	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
898bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
899bc3d5698SJohn Baldwin.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
900bc3d5698SJohn Baldwin.globl	aes_v8_ctr32_encrypt_blocks
901bc3d5698SJohn Baldwin.type	aes_v8_ctr32_encrypt_blocks,%function
902bc3d5698SJohn Baldwin.align	5
903bc3d5698SJohn Baldwinaes_v8_ctr32_encrypt_blocks:
904bc3d5698SJohn Baldwin	mov	ip,sp
905bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
906bc3d5698SJohn Baldwin	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
907bc3d5698SJohn Baldwin	ldr	r4, [ip]		@ load remaining arg
908bc3d5698SJohn Baldwin	ldr	r5,[r3,#240]
909bc3d5698SJohn Baldwin
910bc3d5698SJohn Baldwin	ldr	r8, [r4, #12]
911c3c73b4fSJung-uk Kim#ifdef __ARMEB__
912c3c73b4fSJung-uk Kim	vld1.8	{q0},[r4]
913c3c73b4fSJung-uk Kim#else
914bc3d5698SJohn Baldwin	vld1.32	{q0},[r4]
915c3c73b4fSJung-uk Kim#endif
916bc3d5698SJohn Baldwin	vld1.32	{q8,q9},[r3]		@ load key schedule...
917bc3d5698SJohn Baldwin	sub	r5,r5,#4
918bc3d5698SJohn Baldwin	mov	r12,#16
919bc3d5698SJohn Baldwin	cmp	r2,#2
920bc3d5698SJohn Baldwin	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
921bc3d5698SJohn Baldwin	sub	r5,r5,#2
922bc3d5698SJohn Baldwin	vld1.32	{q12,q13},[r7]!
923bc3d5698SJohn Baldwin	vld1.32	{q14,q15},[r7]!
924bc3d5698SJohn Baldwin	vld1.32	{q7},[r7]
925bc3d5698SJohn Baldwin	add	r7,r3,#32
926bc3d5698SJohn Baldwin	mov	r6,r5
927*c0855eaaSJohn Baldwin	it	lo
928bc3d5698SJohn Baldwin	movlo	r12,#0
929bc3d5698SJohn Baldwin#ifndef __ARMEB__
930bc3d5698SJohn Baldwin	rev	r8, r8
931bc3d5698SJohn Baldwin#endif
932bc3d5698SJohn Baldwin	add	r10, r8, #1
933bc3d5698SJohn Baldwin	vorr	q6,q0,q0
934bc3d5698SJohn Baldwin	rev	r10, r10
935c3c73b4fSJung-uk Kim	vmov.32	d13[1],r10
936c3c73b4fSJung-uk Kim	add	r8, r8, #2
937c3c73b4fSJung-uk Kim	vorr	q1,q6,q6
938bc3d5698SJohn Baldwin	bls	.Lctr32_tail
939bc3d5698SJohn Baldwin	rev	r12, r8
940c3c73b4fSJung-uk Kim	vmov.32	d13[1],r12
941bc3d5698SJohn Baldwin	sub	r2,r2,#3		@ bias
942c3c73b4fSJung-uk Kim	vorr	q10,q6,q6
943bc3d5698SJohn Baldwin	b	.Loop3x_ctr32
944bc3d5698SJohn Baldwin
945bc3d5698SJohn Baldwin.align	4
946bc3d5698SJohn Baldwin.Loop3x_ctr32:
947*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
948*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
949*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
950*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
951*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
952*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
953bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!
954bc3d5698SJohn Baldwin	subs	r6,r6,#2
955*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
956*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
957*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
958*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
959*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
960*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
961bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!
962bc3d5698SJohn Baldwin	bgt	.Loop3x_ctr32
963bc3d5698SJohn Baldwin
964*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
965*c0855eaaSJohn Baldwin	INST(0x80,0x83,0xb0,0xf3)	@ aesmc q4,q0
966*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
967*c0855eaaSJohn Baldwin	INST(0x82,0xa3,0xb0,0xf3)	@ aesmc q5,q1
968bc3d5698SJohn Baldwin	vld1.8	{q2},[r0]!
969c3c73b4fSJung-uk Kim	add	r9,r8,#1
970*c0855eaaSJohn Baldwin	INST(0x20,0x43,0xf0,0xf3)	@ aese q10,q8
971*c0855eaaSJohn Baldwin	INST(0xa4,0x43,0xf0,0xf3)	@ aesmc q10,q10
972bc3d5698SJohn Baldwin	vld1.8	{q3},[r0]!
973c3c73b4fSJung-uk Kim	rev	r9,r9
974*c0855eaaSJohn Baldwin	INST(0x22,0x83,0xb0,0xf3)	@ aese q4,q9
975*c0855eaaSJohn Baldwin	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
976*c0855eaaSJohn Baldwin	INST(0x22,0xa3,0xb0,0xf3)	@ aese q5,q9
977*c0855eaaSJohn Baldwin	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
978bc3d5698SJohn Baldwin	vld1.8	{q11},[r0]!
979bc3d5698SJohn Baldwin	mov	r7,r3
980*c0855eaaSJohn Baldwin	INST(0x22,0x43,0xf0,0xf3)	@ aese q10,q9
981*c0855eaaSJohn Baldwin	INST(0xa4,0x23,0xf0,0xf3)	@ aesmc q9,q10
982*c0855eaaSJohn Baldwin	INST(0x28,0x83,0xb0,0xf3)	@ aese q4,q12
983*c0855eaaSJohn Baldwin	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
984*c0855eaaSJohn Baldwin	INST(0x28,0xa3,0xb0,0xf3)	@ aese q5,q12
985*c0855eaaSJohn Baldwin	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
986bc3d5698SJohn Baldwin	veor	q2,q2,q7
987bc3d5698SJohn Baldwin	add	r10,r8,#2
988*c0855eaaSJohn Baldwin	INST(0x28,0x23,0xf0,0xf3)	@ aese q9,q12
989*c0855eaaSJohn Baldwin	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
990bc3d5698SJohn Baldwin	veor	q3,q3,q7
991bc3d5698SJohn Baldwin	add	r8,r8,#3
992*c0855eaaSJohn Baldwin	INST(0x2a,0x83,0xb0,0xf3)	@ aese q4,q13
993*c0855eaaSJohn Baldwin	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
994*c0855eaaSJohn Baldwin	INST(0x2a,0xa3,0xb0,0xf3)	@ aese q5,q13
995*c0855eaaSJohn Baldwin	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
996bc3d5698SJohn Baldwin	veor	q11,q11,q7
997c3c73b4fSJung-uk Kim	vmov.32	d13[1], r9
998*c0855eaaSJohn Baldwin	INST(0x2a,0x23,0xf0,0xf3)	@ aese q9,q13
999*c0855eaaSJohn Baldwin	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1000c3c73b4fSJung-uk Kim	vorr	q0,q6,q6
1001bc3d5698SJohn Baldwin	rev	r10,r10
1002*c0855eaaSJohn Baldwin	INST(0x2c,0x83,0xb0,0xf3)	@ aese q4,q14
1003*c0855eaaSJohn Baldwin	INST(0x88,0x83,0xb0,0xf3)	@ aesmc q4,q4
1004c3c73b4fSJung-uk Kim	vmov.32	d13[1], r10
1005c3c73b4fSJung-uk Kim	rev	r12,r8
1006*c0855eaaSJohn Baldwin	INST(0x2c,0xa3,0xb0,0xf3)	@ aese q5,q14
1007*c0855eaaSJohn Baldwin	INST(0x8a,0xa3,0xb0,0xf3)	@ aesmc q5,q5
1008c3c73b4fSJung-uk Kim	vorr	q1,q6,q6
1009c3c73b4fSJung-uk Kim	vmov.32	d13[1], r12
1010*c0855eaaSJohn Baldwin	INST(0x2c,0x23,0xf0,0xf3)	@ aese q9,q14
1011*c0855eaaSJohn Baldwin	INST(0xa2,0x23,0xf0,0xf3)	@ aesmc q9,q9
1012c3c73b4fSJung-uk Kim	vorr	q10,q6,q6
1013bc3d5698SJohn Baldwin	subs	r2,r2,#3
1014*c0855eaaSJohn Baldwin	INST(0x2e,0x83,0xb0,0xf3)	@ aese q4,q15
1015*c0855eaaSJohn Baldwin	INST(0x2e,0xa3,0xb0,0xf3)	@ aese q5,q15
1016*c0855eaaSJohn Baldwin	INST(0x2e,0x23,0xf0,0xf3)	@ aese q9,q15
1017bc3d5698SJohn Baldwin
1018bc3d5698SJohn Baldwin	veor	q2,q2,q4
1019bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
1020bc3d5698SJohn Baldwin	vst1.8	{q2},[r1]!
1021bc3d5698SJohn Baldwin	veor	q3,q3,q5
1022bc3d5698SJohn Baldwin	mov	r6,r5
1023bc3d5698SJohn Baldwin	vst1.8	{q3},[r1]!
1024bc3d5698SJohn Baldwin	veor	q11,q11,q9
1025bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
1026bc3d5698SJohn Baldwin	vst1.8	{q11},[r1]!
1027bc3d5698SJohn Baldwin	bhs	.Loop3x_ctr32
1028bc3d5698SJohn Baldwin
1029bc3d5698SJohn Baldwin	adds	r2,r2,#3
1030bc3d5698SJohn Baldwin	beq	.Lctr32_done
1031bc3d5698SJohn Baldwin	cmp	r2,#1
1032bc3d5698SJohn Baldwin	mov	r12,#16
1033*c0855eaaSJohn Baldwin	it	eq
1034bc3d5698SJohn Baldwin	moveq	r12,#0
1035bc3d5698SJohn Baldwin
1036bc3d5698SJohn Baldwin.Lctr32_tail:
1037*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1038*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1039*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1040*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1041bc3d5698SJohn Baldwin	vld1.32	{q8},[r7]!
1042bc3d5698SJohn Baldwin	subs	r6,r6,#2
1043*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1044*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1045*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1046*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1047bc3d5698SJohn Baldwin	vld1.32	{q9},[r7]!
1048bc3d5698SJohn Baldwin	bgt	.Lctr32_tail
1049bc3d5698SJohn Baldwin
1050*c0855eaaSJohn Baldwin	INST(0x20,0x03,0xb0,0xf3)	@ aese q0,q8
1051*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1052*c0855eaaSJohn Baldwin	INST(0x20,0x23,0xb0,0xf3)	@ aese q1,q8
1053*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1054*c0855eaaSJohn Baldwin	INST(0x22,0x03,0xb0,0xf3)	@ aese q0,q9
1055*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1056*c0855eaaSJohn Baldwin	INST(0x22,0x23,0xb0,0xf3)	@ aese q1,q9
1057*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1058bc3d5698SJohn Baldwin	vld1.8	{q2},[r0],r12
1059*c0855eaaSJohn Baldwin	INST(0x28,0x03,0xb0,0xf3)	@ aese q0,q12
1060*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1061*c0855eaaSJohn Baldwin	INST(0x28,0x23,0xb0,0xf3)	@ aese q1,q12
1062*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1063bc3d5698SJohn Baldwin	vld1.8	{q3},[r0]
1064*c0855eaaSJohn Baldwin	INST(0x2a,0x03,0xb0,0xf3)	@ aese q0,q13
1065*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1066*c0855eaaSJohn Baldwin	INST(0x2a,0x23,0xb0,0xf3)	@ aese q1,q13
1067*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1068bc3d5698SJohn Baldwin	veor	q2,q2,q7
1069*c0855eaaSJohn Baldwin	INST(0x2c,0x03,0xb0,0xf3)	@ aese q0,q14
1070*c0855eaaSJohn Baldwin	INST(0x80,0x03,0xb0,0xf3)	@ aesmc q0,q0
1071*c0855eaaSJohn Baldwin	INST(0x2c,0x23,0xb0,0xf3)	@ aese q1,q14
1072*c0855eaaSJohn Baldwin	INST(0x82,0x23,0xb0,0xf3)	@ aesmc q1,q1
1073bc3d5698SJohn Baldwin	veor	q3,q3,q7
1074*c0855eaaSJohn Baldwin	INST(0x2e,0x03,0xb0,0xf3)	@ aese q0,q15
1075*c0855eaaSJohn Baldwin	INST(0x2e,0x23,0xb0,0xf3)	@ aese q1,q15
1076bc3d5698SJohn Baldwin
1077bc3d5698SJohn Baldwin	cmp	r2,#1
1078bc3d5698SJohn Baldwin	veor	q2,q2,q0
1079bc3d5698SJohn Baldwin	veor	q3,q3,q1
1080bc3d5698SJohn Baldwin	vst1.8	{q2},[r1]!
1081bc3d5698SJohn Baldwin	beq	.Lctr32_done
1082bc3d5698SJohn Baldwin	vst1.8	{q3},[r1]
1083bc3d5698SJohn Baldwin
1084bc3d5698SJohn Baldwin.Lctr32_done:
1085bc3d5698SJohn Baldwin	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
1086bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
1087bc3d5698SJohn Baldwin.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
1088bc3d5698SJohn Baldwin#endif
1089