xref: /freebsd/sys/crypto/openssl/arm/aesv8-armx.S (revision f2d48b5e2c3b45850585e4d7aee324fe148afbf2)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
3#include "arm_arch.h"
4
5#if __ARM_MAX_ARCH__>=7
6.text
7.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
8.fpu	neon
9.code	32
10#undef	__thumb2__
11.align	5
12.Lrcon:
13.long	0x01,0x01,0x01,0x01
14.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
15.long	0x1b,0x1b,0x1b,0x1b
16
17.globl	aes_v8_set_encrypt_key
18.type	aes_v8_set_encrypt_key,%function
19.align	5
20aes_v8_set_encrypt_key:
21.Lenc_key:
22	mov	r3,#-1
23	cmp	r0,#0
24	beq	.Lenc_key_abort
25	cmp	r2,#0
26	beq	.Lenc_key_abort
27	mov	r3,#-2
28	cmp	r1,#128
29	blt	.Lenc_key_abort
30	cmp	r1,#256
31	bgt	.Lenc_key_abort
32	tst	r1,#0x3f
33	bne	.Lenc_key_abort
34
35	adr	r3,.Lrcon
36	cmp	r1,#192
37
38	veor	q0,q0,q0
39	vld1.8	{q3},[r0]!
40	mov	r1,#8		@ reuse r1
41	vld1.32	{q1,q2},[r3]!
42
43	blt	.Loop128
44	beq	.L192
45	b	.L256
46
47.align	4
48.Loop128:
49	vtbl.8	d20,{q3},d4
50	vtbl.8	d21,{q3},d5
51	vext.8	q9,q0,q3,#12
52	vst1.32	{q3},[r2]!
53.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
54	subs	r1,r1,#1
55
56	veor	q3,q3,q9
57	vext.8	q9,q0,q9,#12
58	veor	q3,q3,q9
59	vext.8	q9,q0,q9,#12
60	veor	q10,q10,q1
61	veor	q3,q3,q9
62	vshl.u8	q1,q1,#1
63	veor	q3,q3,q10
64	bne	.Loop128
65
66	vld1.32	{q1},[r3]
67
68	vtbl.8	d20,{q3},d4
69	vtbl.8	d21,{q3},d5
70	vext.8	q9,q0,q3,#12
71	vst1.32	{q3},[r2]!
72.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
73
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q3,q3,q9
77	vext.8	q9,q0,q9,#12
78	veor	q10,q10,q1
79	veor	q3,q3,q9
80	vshl.u8	q1,q1,#1
81	veor	q3,q3,q10
82
83	vtbl.8	d20,{q3},d4
84	vtbl.8	d21,{q3},d5
85	vext.8	q9,q0,q3,#12
86	vst1.32	{q3},[r2]!
87.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
88
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q10,q10,q1
94	veor	q3,q3,q9
95	veor	q3,q3,q10
96	vst1.32	{q3},[r2]
97	add	r2,r2,#0x50
98
99	mov	r12,#10
100	b	.Ldone
101
102.align	4
103.L192:
104	vld1.8	{d16},[r0]!
105	vmov.i8	q10,#8			@ borrow q10
106	vst1.32	{q3},[r2]!
107	vsub.i8	q2,q2,q10	@ adjust the mask
108
109.Loop192:
110	vtbl.8	d20,{q8},d4
111	vtbl.8	d21,{q8},d5
112	vext.8	q9,q0,q3,#12
113#ifdef __ARMEB__
114	vst1.32	{q8},[r2]!
115	sub	r2,r2,#8
116#else
117	vst1.32	{d16},[r2]!
118#endif
119.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
120	subs	r1,r1,#1
121
122	veor	q3,q3,q9
123	vext.8	q9,q0,q9,#12
124	veor	q3,q3,q9
125	vext.8	q9,q0,q9,#12
126	veor	q3,q3,q9
127
128	vdup.32	q9,d7[1]
129	veor	q9,q9,q8
130	veor	q10,q10,q1
131	vext.8	q8,q0,q8,#12
132	vshl.u8	q1,q1,#1
133	veor	q8,q8,q9
134	veor	q3,q3,q10
135	veor	q8,q8,q10
136	vst1.32	{q3},[r2]!
137	bne	.Loop192
138
139	mov	r12,#12
140	add	r2,r2,#0x20
141	b	.Ldone
142
143.align	4
144.L256:
145	vld1.8	{q8},[r0]
146	mov	r1,#7
147	mov	r12,#14
148	vst1.32	{q3},[r2]!
149
150.Loop256:
151	vtbl.8	d20,{q8},d4
152	vtbl.8	d21,{q8},d5
153	vext.8	q9,q0,q3,#12
154	vst1.32	{q8},[r2]!
155.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
156	subs	r1,r1,#1
157
158	veor	q3,q3,q9
159	vext.8	q9,q0,q9,#12
160	veor	q3,q3,q9
161	vext.8	q9,q0,q9,#12
162	veor	q10,q10,q1
163	veor	q3,q3,q9
164	vshl.u8	q1,q1,#1
165	veor	q3,q3,q10
166	vst1.32	{q3},[r2]!
167	beq	.Ldone
168
169	vdup.32	q10,d7[1]
170	vext.8	q9,q0,q8,#12
171.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
172
173	veor	q8,q8,q9
174	vext.8	q9,q0,q9,#12
175	veor	q8,q8,q9
176	vext.8	q9,q0,q9,#12
177	veor	q8,q8,q9
178
179	veor	q8,q8,q10
180	b	.Loop256
181
182.Ldone:
183	str	r12,[r2]
184	mov	r3,#0
185
186.Lenc_key_abort:
187	mov	r0,r3			@ return value
188
189	bx	lr
190.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
191
192.globl	aes_v8_set_decrypt_key
193.type	aes_v8_set_decrypt_key,%function
194.align	5
195aes_v8_set_decrypt_key:
196	stmdb	sp!,{r4,lr}
197	bl	.Lenc_key
198
199	cmp	r0,#0
200	bne	.Ldec_key_abort
201
202	sub	r2,r2,#240		@ restore original r2
203	mov	r4,#-16
204	add	r0,r2,r12,lsl#4	@ end of key schedule
205
206	vld1.32	{q0},[r2]
207	vld1.32	{q1},[r0]
208	vst1.32	{q0},[r0],r4
209	vst1.32	{q1},[r2]!
210
211.Loop_imc:
212	vld1.32	{q0},[r2]
213	vld1.32	{q1},[r0]
214.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
215.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
216	vst1.32	{q0},[r0],r4
217	vst1.32	{q1},[r2]!
218	cmp	r0,r2
219	bhi	.Loop_imc
220
221	vld1.32	{q0},[r2]
222.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
223	vst1.32	{q0},[r0]
224
225	eor	r0,r0,r0		@ return value
226.Ldec_key_abort:
227	ldmia	sp!,{r4,pc}
228.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
229.globl	aes_v8_encrypt
230.type	aes_v8_encrypt,%function
231.align	5
232aes_v8_encrypt:
233	ldr	r3,[r2,#240]
234	vld1.32	{q0},[r2]!
235	vld1.8	{q2},[r0]
236	sub	r3,r3,#2
237	vld1.32	{q1},[r2]!
238
239.Loop_enc:
240.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
241.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
242	vld1.32	{q0},[r2]!
243	subs	r3,r3,#2
244.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
245.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
246	vld1.32	{q1},[r2]!
247	bgt	.Loop_enc
248
249.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
250.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
251	vld1.32	{q0},[r2]
252.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
253	veor	q2,q2,q0
254
255	vst1.8	{q2},[r1]
256	bx	lr
257.size	aes_v8_encrypt,.-aes_v8_encrypt
258.globl	aes_v8_decrypt
259.type	aes_v8_decrypt,%function
260.align	5
261aes_v8_decrypt:
262	ldr	r3,[r2,#240]
263	vld1.32	{q0},[r2]!
264	vld1.8	{q2},[r0]
265	sub	r3,r3,#2
266	vld1.32	{q1},[r2]!
267
268.Loop_dec:
269.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
270.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
271	vld1.32	{q0},[r2]!
272	subs	r3,r3,#2
273.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
274.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
275	vld1.32	{q1},[r2]!
276	bgt	.Loop_dec
277
278.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
279.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
280	vld1.32	{q0},[r2]
281.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
282	veor	q2,q2,q0
283
284	vst1.8	{q2},[r1]
285	bx	lr
286.size	aes_v8_decrypt,.-aes_v8_decrypt
287.globl	aes_v8_cbc_encrypt
288.type	aes_v8_cbc_encrypt,%function
289.align	5
290aes_v8_cbc_encrypt:
291	mov	ip,sp
292	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
293	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
294	ldmia	ip,{r4,r5}		@ load remaining args
295	subs	r2,r2,#16
296	mov	r8,#16
297	blo	.Lcbc_abort
298	moveq	r8,#0
299
300	cmp	r5,#0			@ en- or decrypting?
301	ldr	r5,[r3,#240]
302	and	r2,r2,#-16
303	vld1.8	{q6},[r4]
304	vld1.8	{q0},[r0],r8
305
306	vld1.32	{q8,q9},[r3]		@ load key schedule...
307	sub	r5,r5,#6
308	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
309	sub	r5,r5,#2
310	vld1.32	{q10,q11},[r7]!
311	vld1.32	{q12,q13},[r7]!
312	vld1.32	{q14,q15},[r7]!
313	vld1.32	{q7},[r7]
314
315	add	r7,r3,#32
316	mov	r6,r5
317	beq	.Lcbc_dec
318
319	cmp	r5,#2
320	veor	q0,q0,q6
321	veor	q5,q8,q7
322	beq	.Lcbc_enc128
323
324	vld1.32	{q2,q3},[r7]
325	add	r7,r3,#16
326	add	r6,r3,#16*4
327	add	r12,r3,#16*5
328.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
329.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
330	add	r14,r3,#16*6
331	add	r3,r3,#16*7
332	b	.Lenter_cbc_enc
333
334.align	4
335.Loop_cbc_enc:
336.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
337.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
338	vst1.8	{q6},[r1]!
339.Lenter_cbc_enc:
340.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
341.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
342.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
343.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
344	vld1.32	{q8},[r6]
345	cmp	r5,#4
346.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
347.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
348	vld1.32	{q9},[r12]
349	beq	.Lcbc_enc192
350
351.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
352.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
353	vld1.32	{q8},[r14]
354.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
355.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
356	vld1.32	{q9},[r3]
357	nop
358
359.Lcbc_enc192:
360.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
361.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
362	subs	r2,r2,#16
363.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
364.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
365	moveq	r8,#0
366.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
369.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
370	vld1.8	{q8},[r0],r8
371.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
372.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
373	veor	q8,q8,q5
374.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
375.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
376	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
377.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
378.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
379.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
380	veor	q6,q0,q7
381	bhs	.Loop_cbc_enc
382
383	vst1.8	{q6},[r1]!
384	b	.Lcbc_done
385
386.align	5
387.Lcbc_enc128:
388	vld1.32	{q2,q3},[r7]
389.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
390.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
391	b	.Lenter_cbc_enc128
392.Loop_cbc_enc128:
393.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
394.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
395	vst1.8	{q6},[r1]!
396.Lenter_cbc_enc128:
397.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
398.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
399	subs	r2,r2,#16
400.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
401.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
402	moveq	r8,#0
403.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
404.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
405.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
408.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
409	vld1.8	{q8},[r0],r8
410.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
411.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
412.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
413.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
414.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
415.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
416	veor	q8,q8,q5
417.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
418	veor	q6,q0,q7
419	bhs	.Loop_cbc_enc128
420
421	vst1.8	{q6},[r1]!
422	b	.Lcbc_done
423.align	5
424.Lcbc_dec:
425	vld1.8	{q10},[r0]!
426	subs	r2,r2,#32		@ bias
427	add	r6,r5,#2
428	vorr	q3,q0,q0
429	vorr	q1,q0,q0
430	vorr	q11,q10,q10
431	blo	.Lcbc_dec_tail
432
433	vorr	q1,q10,q10
434	vld1.8	{q10},[r0]!
435	vorr	q2,q0,q0
436	vorr	q3,q1,q1
437	vorr	q11,q10,q10
438
439.Loop3x_cbc_dec:
440.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
441.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
442.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
443.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
444.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
445.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
446	vld1.32	{q8},[r7]!
447	subs	r6,r6,#2
448.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
449.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
450.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
451.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
452.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
453.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
454	vld1.32	{q9},[r7]!
455	bgt	.Loop3x_cbc_dec
456
457.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
458.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
459.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
460.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
461.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
462.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
463	veor	q4,q6,q7
464	subs	r2,r2,#0x30
465	veor	q5,q2,q7
466	movlo	r6,r2			@ r6, r6, is zero at this point
467.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
468.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
469.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
470.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
471.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
472.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
473	veor	q9,q3,q7
474	add	r0,r0,r6		@ r0 is adjusted in such way that
475					@ at exit from the loop q1-q10
476					@ are loaded with last "words"
477	vorr	q6,q11,q11
478	mov	r7,r3
479.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
480.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
481.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
482.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
483.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
484.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
485	vld1.8	{q2},[r0]!
486.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
487.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
488.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
489.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
490.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
491.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
492	vld1.8	{q3},[r0]!
493.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
494.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
495.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
496.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
497.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
498.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
499	vld1.8	{q11},[r0]!
500.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
501.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
502.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
503	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
504	add	r6,r5,#2
505	veor	q4,q4,q0
506	veor	q5,q5,q1
507	veor	q10,q10,q9
508	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
509	vst1.8	{q4},[r1]!
510	vorr	q0,q2,q2
511	vst1.8	{q5},[r1]!
512	vorr	q1,q3,q3
513	vst1.8	{q10},[r1]!
514	vorr	q10,q11,q11
515	bhs	.Loop3x_cbc_dec
516
517	cmn	r2,#0x30
518	beq	.Lcbc_done
519	nop
520
521.Lcbc_dec_tail:
522.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
523.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
524.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
525.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
526	vld1.32	{q8},[r7]!
527	subs	r6,r6,#2
528.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
529.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
530.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
531.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
532	vld1.32	{q9},[r7]!
533	bgt	.Lcbc_dec_tail
534
535.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
536.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
537.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
538.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
539.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
540.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
541.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
542.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
543.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
544.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
545.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
546.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
547	cmn	r2,#0x20
548.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
549.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
550.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
551.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
552	veor	q5,q6,q7
553.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
554.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
555.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
556.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
557	veor	q9,q3,q7
558.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
559.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
560	beq	.Lcbc_dec_one
561	veor	q5,q5,q1
562	veor	q9,q9,q10
563	vorr	q6,q11,q11
564	vst1.8	{q5},[r1]!
565	vst1.8	{q9},[r1]!
566	b	.Lcbc_done
567
568.Lcbc_dec_one:
569	veor	q5,q5,q10
570	vorr	q6,q11,q11
571	vst1.8	{q5},[r1]!
572
573.Lcbc_done:
574	vst1.8	{q6},[r4]
575.Lcbc_abort:
576	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
577	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
578.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
579.globl	aes_v8_ctr32_encrypt_blocks
580.type	aes_v8_ctr32_encrypt_blocks,%function
581.align	5
582aes_v8_ctr32_encrypt_blocks:
583	mov	ip,sp
584	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
585	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
586	ldr	r4, [ip]		@ load remaining arg
587	ldr	r5,[r3,#240]
588
589	ldr	r8, [r4, #12]
590#ifdef __ARMEB__
591	vld1.8	{q0},[r4]
592#else
593	vld1.32	{q0},[r4]
594#endif
595	vld1.32	{q8,q9},[r3]		@ load key schedule...
596	sub	r5,r5,#4
597	mov	r12,#16
598	cmp	r2,#2
599	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
600	sub	r5,r5,#2
601	vld1.32	{q12,q13},[r7]!
602	vld1.32	{q14,q15},[r7]!
603	vld1.32	{q7},[r7]
604	add	r7,r3,#32
605	mov	r6,r5
606	movlo	r12,#0
607#ifndef __ARMEB__
608	rev	r8, r8
609#endif
610	add	r10, r8, #1
611	vorr	q6,q0,q0
612	rev	r10, r10
613	vmov.32	d13[1],r10
614	add	r8, r8, #2
615	vorr	q1,q6,q6
616	bls	.Lctr32_tail
617	rev	r12, r8
618	vmov.32	d13[1],r12
619	sub	r2,r2,#3		@ bias
620	vorr	q10,q6,q6
621	b	.Loop3x_ctr32
622
623.align	4
624.Loop3x_ctr32:
625.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
626.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
627.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
628.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
629.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
630.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
631	vld1.32	{q8},[r7]!
632	subs	r6,r6,#2
633.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
634.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
635.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
636.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
637.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
638.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
639	vld1.32	{q9},[r7]!
640	bgt	.Loop3x_ctr32
641
642.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
643.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
644.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
645.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
646	vld1.8	{q2},[r0]!
647	add	r9,r8,#1
648.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
649.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
650	vld1.8	{q3},[r0]!
651	rev	r9,r9
652.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
653.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
654.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
655.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
656	vld1.8	{q11},[r0]!
657	mov	r7,r3
658.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
659.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
660.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
661.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
662.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
663.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
664	veor	q2,q2,q7
665	add	r10,r8,#2
666.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
667.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
668	veor	q3,q3,q7
669	add	r8,r8,#3
670.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
671.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
672.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
673.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
674	veor	q11,q11,q7
675	vmov.32	d13[1], r9
676.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
677.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
678	vorr	q0,q6,q6
679	rev	r10,r10
680.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
681.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
682	vmov.32	d13[1], r10
683	rev	r12,r8
684.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
685.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
686	vorr	q1,q6,q6
687	vmov.32	d13[1], r12
688.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
689.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
690	vorr	q10,q6,q6
691	subs	r2,r2,#3
692.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
693.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
694.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
695
696	veor	q2,q2,q4
697	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
698	vst1.8	{q2},[r1]!
699	veor	q3,q3,q5
700	mov	r6,r5
701	vst1.8	{q3},[r1]!
702	veor	q11,q11,q9
703	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
704	vst1.8	{q11},[r1]!
705	bhs	.Loop3x_ctr32
706
707	adds	r2,r2,#3
708	beq	.Lctr32_done
709	cmp	r2,#1
710	mov	r12,#16
711	moveq	r12,#0
712
713.Lctr32_tail:
714.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
715.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
716.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
717.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
718	vld1.32	{q8},[r7]!
719	subs	r6,r6,#2
720.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
721.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
722.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
723.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
724	vld1.32	{q9},[r7]!
725	bgt	.Lctr32_tail
726
727.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
728.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
729.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
730.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
731.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
732.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
733.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
734.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
735	vld1.8	{q2},[r0],r12
736.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
737.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
738.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
739.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
740	vld1.8	{q3},[r0]
741.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
742.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
743.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
744.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
745	veor	q2,q2,q7
746.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
747.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
748.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
749.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
750	veor	q3,q3,q7
751.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
752.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
753
754	cmp	r2,#1
755	veor	q2,q2,q0
756	veor	q3,q3,q1
757	vst1.8	{q2},[r1]!
758	beq	.Lctr32_done
759	vst1.8	{q3},[r1]
760
761.Lctr32_done:
762	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
763	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
764.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
765#endif
766