xref: /freebsd/sys/crypto/openssl/arm/aesv8-armx.S (revision 25fb30bd9abc492359ad1f66901a06cb8cd08370)
1/* $FreeBSD$ */
2/* Do not modify. This file is auto-generated from aesv8-armx.pl. */
3#include "arm_arch.h"
4
5#if __ARM_MAX_ARCH__>=7
6.text
7.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
8.fpu	neon
9.code	32
10#undef	__thumb2__
11.align	5
12.Lrcon:
13.long	0x01,0x01,0x01,0x01
14.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
15.long	0x1b,0x1b,0x1b,0x1b
16
17.globl	aes_v8_set_encrypt_key
18.type	aes_v8_set_encrypt_key,%function
19.align	5
20aes_v8_set_encrypt_key:
21.Lenc_key:
22	mov	r3,#-1
23	cmp	r0,#0
24	beq	.Lenc_key_abort
25	cmp	r2,#0
26	beq	.Lenc_key_abort
27	mov	r3,#-2
28	cmp	r1,#128
29	blt	.Lenc_key_abort
30	cmp	r1,#256
31	bgt	.Lenc_key_abort
32	tst	r1,#0x3f
33	bne	.Lenc_key_abort
34
35	adr	r3,.Lrcon
36	cmp	r1,#192
37
38	veor	q0,q0,q0
39	vld1.8	{q3},[r0]!
40	mov	r1,#8		@ reuse r1
41	vld1.32	{q1,q2},[r3]!
42
43	blt	.Loop128
44	beq	.L192
45	b	.L256
46
47.align	4
48.Loop128:
49	vtbl.8	d20,{q3},d4
50	vtbl.8	d21,{q3},d5
51	vext.8	q9,q0,q3,#12
52	vst1.32	{q3},[r2]!
53.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
54	subs	r1,r1,#1
55
56	veor	q3,q3,q9
57	vext.8	q9,q0,q9,#12
58	veor	q3,q3,q9
59	vext.8	q9,q0,q9,#12
60	veor	q10,q10,q1
61	veor	q3,q3,q9
62	vshl.u8	q1,q1,#1
63	veor	q3,q3,q10
64	bne	.Loop128
65
66	vld1.32	{q1},[r3]
67
68	vtbl.8	d20,{q3},d4
69	vtbl.8	d21,{q3},d5
70	vext.8	q9,q0,q3,#12
71	vst1.32	{q3},[r2]!
72.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
73
74	veor	q3,q3,q9
75	vext.8	q9,q0,q9,#12
76	veor	q3,q3,q9
77	vext.8	q9,q0,q9,#12
78	veor	q10,q10,q1
79	veor	q3,q3,q9
80	vshl.u8	q1,q1,#1
81	veor	q3,q3,q10
82
83	vtbl.8	d20,{q3},d4
84	vtbl.8	d21,{q3},d5
85	vext.8	q9,q0,q3,#12
86	vst1.32	{q3},[r2]!
87.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
88
89	veor	q3,q3,q9
90	vext.8	q9,q0,q9,#12
91	veor	q3,q3,q9
92	vext.8	q9,q0,q9,#12
93	veor	q10,q10,q1
94	veor	q3,q3,q9
95	veor	q3,q3,q10
96	vst1.32	{q3},[r2]
97	add	r2,r2,#0x50
98
99	mov	r12,#10
100	b	.Ldone
101
102.align	4
103.L192:
104	vld1.8	{d16},[r0]!
105	vmov.i8	q10,#8			@ borrow q10
106	vst1.32	{q3},[r2]!
107	vsub.i8	q2,q2,q10	@ adjust the mask
108
109.Loop192:
110	vtbl.8	d20,{q8},d4
111	vtbl.8	d21,{q8},d5
112	vext.8	q9,q0,q3,#12
113	vst1.32	{d16},[r2]!
114.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
115	subs	r1,r1,#1
116
117	veor	q3,q3,q9
118	vext.8	q9,q0,q9,#12
119	veor	q3,q3,q9
120	vext.8	q9,q0,q9,#12
121	veor	q3,q3,q9
122
123	vdup.32	q9,d7[1]
124	veor	q9,q9,q8
125	veor	q10,q10,q1
126	vext.8	q8,q0,q8,#12
127	vshl.u8	q1,q1,#1
128	veor	q8,q8,q9
129	veor	q3,q3,q10
130	veor	q8,q8,q10
131	vst1.32	{q3},[r2]!
132	bne	.Loop192
133
134	mov	r12,#12
135	add	r2,r2,#0x20
136	b	.Ldone
137
138.align	4
139.L256:
140	vld1.8	{q8},[r0]
141	mov	r1,#7
142	mov	r12,#14
143	vst1.32	{q3},[r2]!
144
145.Loop256:
146	vtbl.8	d20,{q8},d4
147	vtbl.8	d21,{q8},d5
148	vext.8	q9,q0,q3,#12
149	vst1.32	{q8},[r2]!
150.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
151	subs	r1,r1,#1
152
153	veor	q3,q3,q9
154	vext.8	q9,q0,q9,#12
155	veor	q3,q3,q9
156	vext.8	q9,q0,q9,#12
157	veor	q10,q10,q1
158	veor	q3,q3,q9
159	vshl.u8	q1,q1,#1
160	veor	q3,q3,q10
161	vst1.32	{q3},[r2]!
162	beq	.Ldone
163
164	vdup.32	q10,d7[1]
165	vext.8	q9,q0,q8,#12
166.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
167
168	veor	q8,q8,q9
169	vext.8	q9,q0,q9,#12
170	veor	q8,q8,q9
171	vext.8	q9,q0,q9,#12
172	veor	q8,q8,q9
173
174	veor	q8,q8,q10
175	b	.Loop256
176
177.Ldone:
178	str	r12,[r2]
179	mov	r3,#0
180
181.Lenc_key_abort:
182	mov	r0,r3			@ return value
183
184	bx	lr
185.size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
186
187.globl	aes_v8_set_decrypt_key
188.type	aes_v8_set_decrypt_key,%function
189.align	5
190aes_v8_set_decrypt_key:
191	stmdb	sp!,{r4,lr}
192	bl	.Lenc_key
193
194	cmp	r0,#0
195	bne	.Ldec_key_abort
196
197	sub	r2,r2,#240		@ restore original r2
198	mov	r4,#-16
199	add	r0,r2,r12,lsl#4	@ end of key schedule
200
201	vld1.32	{q0},[r2]
202	vld1.32	{q1},[r0]
203	vst1.32	{q0},[r0],r4
204	vst1.32	{q1},[r2]!
205
206.Loop_imc:
207	vld1.32	{q0},[r2]
208	vld1.32	{q1},[r0]
209.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
210.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
211	vst1.32	{q0},[r0],r4
212	vst1.32	{q1},[r2]!
213	cmp	r0,r2
214	bhi	.Loop_imc
215
216	vld1.32	{q0},[r2]
217.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
218	vst1.32	{q0},[r0]
219
220	eor	r0,r0,r0		@ return value
221.Ldec_key_abort:
222	ldmia	sp!,{r4,pc}
223.size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
224.globl	aes_v8_encrypt
225.type	aes_v8_encrypt,%function
226.align	5
227aes_v8_encrypt:
228	ldr	r3,[r2,#240]
229	vld1.32	{q0},[r2]!
230	vld1.8	{q2},[r0]
231	sub	r3,r3,#2
232	vld1.32	{q1},[r2]!
233
234.Loop_enc:
235.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
236.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
237	vld1.32	{q0},[r2]!
238	subs	r3,r3,#2
239.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
240.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
241	vld1.32	{q1},[r2]!
242	bgt	.Loop_enc
243
244.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
245.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
246	vld1.32	{q0},[r2]
247.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
248	veor	q2,q2,q0
249
250	vst1.8	{q2},[r1]
251	bx	lr
252.size	aes_v8_encrypt,.-aes_v8_encrypt
253.globl	aes_v8_decrypt
254.type	aes_v8_decrypt,%function
255.align	5
256aes_v8_decrypt:
257	ldr	r3,[r2,#240]
258	vld1.32	{q0},[r2]!
259	vld1.8	{q2},[r0]
260	sub	r3,r3,#2
261	vld1.32	{q1},[r2]!
262
263.Loop_dec:
264.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
265.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
266	vld1.32	{q0},[r2]!
267	subs	r3,r3,#2
268.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
269.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
270	vld1.32	{q1},[r2]!
271	bgt	.Loop_dec
272
273.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
274.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
275	vld1.32	{q0},[r2]
276.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
277	veor	q2,q2,q0
278
279	vst1.8	{q2},[r1]
280	bx	lr
281.size	aes_v8_decrypt,.-aes_v8_decrypt
282.globl	aes_v8_cbc_encrypt
283.type	aes_v8_cbc_encrypt,%function
284.align	5
285aes_v8_cbc_encrypt:
286	mov	ip,sp
287	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
288	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
289	ldmia	ip,{r4,r5}		@ load remaining args
290	subs	r2,r2,#16
291	mov	r8,#16
292	blo	.Lcbc_abort
293	moveq	r8,#0
294
295	cmp	r5,#0			@ en- or decrypting?
296	ldr	r5,[r3,#240]
297	and	r2,r2,#-16
298	vld1.8	{q6},[r4]
299	vld1.8	{q0},[r0],r8
300
301	vld1.32	{q8,q9},[r3]		@ load key schedule...
302	sub	r5,r5,#6
303	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
304	sub	r5,r5,#2
305	vld1.32	{q10,q11},[r7]!
306	vld1.32	{q12,q13},[r7]!
307	vld1.32	{q14,q15},[r7]!
308	vld1.32	{q7},[r7]
309
310	add	r7,r3,#32
311	mov	r6,r5
312	beq	.Lcbc_dec
313
314	cmp	r5,#2
315	veor	q0,q0,q6
316	veor	q5,q8,q7
317	beq	.Lcbc_enc128
318
319	vld1.32	{q2,q3},[r7]
320	add	r7,r3,#16
321	add	r6,r3,#16*4
322	add	r12,r3,#16*5
323.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
324.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
325	add	r14,r3,#16*6
326	add	r3,r3,#16*7
327	b	.Lenter_cbc_enc
328
329.align	4
330.Loop_cbc_enc:
331.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
332.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
333	vst1.8	{q6},[r1]!
334.Lenter_cbc_enc:
335.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
336.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
337.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
338.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
339	vld1.32	{q8},[r6]
340	cmp	r5,#4
341.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
342.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
343	vld1.32	{q9},[r12]
344	beq	.Lcbc_enc192
345
346.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
347.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
348	vld1.32	{q8},[r14]
349.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
350.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
351	vld1.32	{q9},[r3]
352	nop
353
354.Lcbc_enc192:
355.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
356.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
357	subs	r2,r2,#16
358.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
359.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
360	moveq	r8,#0
361.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
362.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
363.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
364.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
365	vld1.8	{q8},[r0],r8
366.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
367.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
368	veor	q8,q8,q5
369.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
370.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
371	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
372.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
373.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
374.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
375	veor	q6,q0,q7
376	bhs	.Loop_cbc_enc
377
378	vst1.8	{q6},[r1]!
379	b	.Lcbc_done
380
381.align	5
382.Lcbc_enc128:
383	vld1.32	{q2,q3},[r7]
384.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
385.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
386	b	.Lenter_cbc_enc128
387.Loop_cbc_enc128:
388.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
389.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
390	vst1.8	{q6},[r1]!
391.Lenter_cbc_enc128:
392.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
393.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
394	subs	r2,r2,#16
395.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
396.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
397	moveq	r8,#0
398.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
399.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
400.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
401.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
402.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
403.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
404	vld1.8	{q8},[r0],r8
405.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
406.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
407.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
408.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
409.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
410.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
411	veor	q8,q8,q5
412.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
413	veor	q6,q0,q7
414	bhs	.Loop_cbc_enc128
415
416	vst1.8	{q6},[r1]!
417	b	.Lcbc_done
418.align	5
419.Lcbc_dec:
420	vld1.8	{q10},[r0]!
421	subs	r2,r2,#32		@ bias
422	add	r6,r5,#2
423	vorr	q3,q0,q0
424	vorr	q1,q0,q0
425	vorr	q11,q10,q10
426	blo	.Lcbc_dec_tail
427
428	vorr	q1,q10,q10
429	vld1.8	{q10},[r0]!
430	vorr	q2,q0,q0
431	vorr	q3,q1,q1
432	vorr	q11,q10,q10
433
434.Loop3x_cbc_dec:
435.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
436.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
437.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
438.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
439.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
440.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
441	vld1.32	{q8},[r7]!
442	subs	r6,r6,#2
443.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
444.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
445.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
446.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
447.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
448.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
449	vld1.32	{q9},[r7]!
450	bgt	.Loop3x_cbc_dec
451
452.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
453.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
454.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
455.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
456.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
457.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
458	veor	q4,q6,q7
459	subs	r2,r2,#0x30
460	veor	q5,q2,q7
461	movlo	r6,r2			@ r6, r6, is zero at this point
462.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
463.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
464.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
465.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
466.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
467.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
468	veor	q9,q3,q7
469	add	r0,r0,r6		@ r0 is adjusted in such way that
470					@ at exit from the loop q1-q10
471					@ are loaded with last "words"
472	vorr	q6,q11,q11
473	mov	r7,r3
474.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
475.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
476.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
477.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
478.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
479.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
480	vld1.8	{q2},[r0]!
481.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
482.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
483.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
484.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
485.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
486.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
487	vld1.8	{q3},[r0]!
488.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
489.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
490.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
491.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
492.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
493.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
494	vld1.8	{q11},[r0]!
495.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
496.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
497.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
498	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
499	add	r6,r5,#2
500	veor	q4,q4,q0
501	veor	q5,q5,q1
502	veor	q10,q10,q9
503	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
504	vst1.8	{q4},[r1]!
505	vorr	q0,q2,q2
506	vst1.8	{q5},[r1]!
507	vorr	q1,q3,q3
508	vst1.8	{q10},[r1]!
509	vorr	q10,q11,q11
510	bhs	.Loop3x_cbc_dec
511
512	cmn	r2,#0x30
513	beq	.Lcbc_done
514	nop
515
516.Lcbc_dec_tail:
517.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
518.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
519.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
520.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
521	vld1.32	{q8},[r7]!
522	subs	r6,r6,#2
523.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
524.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
525.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
526.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
527	vld1.32	{q9},[r7]!
528	bgt	.Lcbc_dec_tail
529
530.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
531.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
532.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
533.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
534.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
535.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
536.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
537.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
538.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
539.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
540.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
541.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
542	cmn	r2,#0x20
543.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
544.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
545.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
546.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
547	veor	q5,q6,q7
548.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
549.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
550.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
551.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
552	veor	q9,q3,q7
553.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
554.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
555	beq	.Lcbc_dec_one
556	veor	q5,q5,q1
557	veor	q9,q9,q10
558	vorr	q6,q11,q11
559	vst1.8	{q5},[r1]!
560	vst1.8	{q9},[r1]!
561	b	.Lcbc_done
562
563.Lcbc_dec_one:
564	veor	q5,q5,q10
565	vorr	q6,q11,q11
566	vst1.8	{q5},[r1]!
567
568.Lcbc_done:
569	vst1.8	{q6},[r4]
570.Lcbc_abort:
571	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
572	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
573.size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
574.globl	aes_v8_ctr32_encrypt_blocks
575.type	aes_v8_ctr32_encrypt_blocks,%function
576.align	5
577aes_v8_ctr32_encrypt_blocks:
578	mov	ip,sp
579	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
580	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
581	ldr	r4, [ip]		@ load remaining arg
582	ldr	r5,[r3,#240]
583
584	ldr	r8, [r4, #12]
585	vld1.32	{q0},[r4]
586
587	vld1.32	{q8,q9},[r3]		@ load key schedule...
588	sub	r5,r5,#4
589	mov	r12,#16
590	cmp	r2,#2
591	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
592	sub	r5,r5,#2
593	vld1.32	{q12,q13},[r7]!
594	vld1.32	{q14,q15},[r7]!
595	vld1.32	{q7},[r7]
596	add	r7,r3,#32
597	mov	r6,r5
598	movlo	r12,#0
599#ifndef __ARMEB__
600	rev	r8, r8
601#endif
602	vorr	q1,q0,q0
603	add	r10, r8, #1
604	vorr	q10,q0,q0
605	add	r8, r8, #2
606	vorr	q6,q0,q0
607	rev	r10, r10
608	vmov.32	d3[1],r10
609	bls	.Lctr32_tail
610	rev	r12, r8
611	sub	r2,r2,#3		@ bias
612	vmov.32	d21[1],r12
613	b	.Loop3x_ctr32
614
615.align	4
616.Loop3x_ctr32:
617.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
618.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
619.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
620.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
621.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
622.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
623	vld1.32	{q8},[r7]!
624	subs	r6,r6,#2
625.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
626.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
627.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
628.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
629.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
630.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
631	vld1.32	{q9},[r7]!
632	bgt	.Loop3x_ctr32
633
634.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
635.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
636.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
637.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
638	vld1.8	{q2},[r0]!
639	vorr	q0,q6,q6
640.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
641.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
642	vld1.8	{q3},[r0]!
643	vorr	q1,q6,q6
644.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
645.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
646.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
647.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
648	vld1.8	{q11},[r0]!
649	mov	r7,r3
650.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
651.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
652	vorr	q10,q6,q6
653	add	r9,r8,#1
654.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
655.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
656.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
657.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
658	veor	q2,q2,q7
659	add	r10,r8,#2
660.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
661.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
662	veor	q3,q3,q7
663	add	r8,r8,#3
664.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
665.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
666.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
667.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
668	veor	q11,q11,q7
669	rev	r9,r9
670.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
671.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
672	vmov.32	d1[1], r9
673	rev	r10,r10
674.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
675.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
676.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
677.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
678	vmov.32	d3[1], r10
679	rev	r12,r8
680.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
681.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
682	vmov.32	d21[1], r12
683	subs	r2,r2,#3
684.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
685.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
686.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
687
688	veor	q2,q2,q4
689	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
690	vst1.8	{q2},[r1]!
691	veor	q3,q3,q5
692	mov	r6,r5
693	vst1.8	{q3},[r1]!
694	veor	q11,q11,q9
695	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
696	vst1.8	{q11},[r1]!
697	bhs	.Loop3x_ctr32
698
699	adds	r2,r2,#3
700	beq	.Lctr32_done
701	cmp	r2,#1
702	mov	r12,#16
703	moveq	r12,#0
704
705.Lctr32_tail:
706.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
707.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
708.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
709.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
710	vld1.32	{q8},[r7]!
711	subs	r6,r6,#2
712.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
713.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
714.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
715.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
716	vld1.32	{q9},[r7]!
717	bgt	.Lctr32_tail
718
719.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
720.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
721.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
722.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
723.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
724.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
725.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
726.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
727	vld1.8	{q2},[r0],r12
728.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
729.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
730.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
731.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
732	vld1.8	{q3},[r0]
733.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
734.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
735.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
736.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
737	veor	q2,q2,q7
738.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
739.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
740.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
741.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
742	veor	q3,q3,q7
743.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
744.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
745
746	cmp	r2,#1
747	veor	q2,q2,q0
748	veor	q3,q3,q1
749	vst1.8	{q2},[r1]!
750	beq	.Lctr32_done
751	vst1.8	{q3},[r1]
752
753.Lctr32_done:
754	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
755	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
756.size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
757#endif
758