xref: /freebsd/sys/crypto/openssl/arm/keccak1600-armv4.S (revision bc3d5698008e9b3b19495e853cbc2598979ccf8a)
1*bc3d5698SJohn Baldwin/* $FreeBSD$ */
2*bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
3*bc3d5698SJohn Baldwin#include "arm_arch.h"
4*bc3d5698SJohn Baldwin
5*bc3d5698SJohn Baldwin.text
6*bc3d5698SJohn Baldwin
7*bc3d5698SJohn Baldwin#if defined(__thumb2__)
8*bc3d5698SJohn Baldwin.syntax	unified
9*bc3d5698SJohn Baldwin.thumb
10*bc3d5698SJohn Baldwin#else
11*bc3d5698SJohn Baldwin.code	32
12*bc3d5698SJohn Baldwin#endif
13*bc3d5698SJohn Baldwin
14*bc3d5698SJohn Baldwin.type	iotas32, %object
15*bc3d5698SJohn Baldwin.align	5
16*bc3d5698SJohn Baldwiniotas32:
17*bc3d5698SJohn Baldwin.long	0x00000001, 0x00000000
18*bc3d5698SJohn Baldwin.long	0x00000000, 0x00000089
19*bc3d5698SJohn Baldwin.long	0x00000000, 0x8000008b
20*bc3d5698SJohn Baldwin.long	0x00000000, 0x80008080
21*bc3d5698SJohn Baldwin.long	0x00000001, 0x0000008b
22*bc3d5698SJohn Baldwin.long	0x00000001, 0x00008000
23*bc3d5698SJohn Baldwin.long	0x00000001, 0x80008088
24*bc3d5698SJohn Baldwin.long	0x00000001, 0x80000082
25*bc3d5698SJohn Baldwin.long	0x00000000, 0x0000000b
26*bc3d5698SJohn Baldwin.long	0x00000000, 0x0000000a
27*bc3d5698SJohn Baldwin.long	0x00000001, 0x00008082
28*bc3d5698SJohn Baldwin.long	0x00000000, 0x00008003
29*bc3d5698SJohn Baldwin.long	0x00000001, 0x0000808b
30*bc3d5698SJohn Baldwin.long	0x00000001, 0x8000000b
31*bc3d5698SJohn Baldwin.long	0x00000001, 0x8000008a
32*bc3d5698SJohn Baldwin.long	0x00000001, 0x80000081
33*bc3d5698SJohn Baldwin.long	0x00000000, 0x80000081
34*bc3d5698SJohn Baldwin.long	0x00000000, 0x80000008
35*bc3d5698SJohn Baldwin.long	0x00000000, 0x00000083
36*bc3d5698SJohn Baldwin.long	0x00000000, 0x80008003
37*bc3d5698SJohn Baldwin.long	0x00000001, 0x80008088
38*bc3d5698SJohn Baldwin.long	0x00000000, 0x80000088
39*bc3d5698SJohn Baldwin.long	0x00000001, 0x00008000
40*bc3d5698SJohn Baldwin.long	0x00000000, 0x80008082
41*bc3d5698SJohn Baldwin.size	iotas32,.-iotas32
42*bc3d5698SJohn Baldwin
43*bc3d5698SJohn Baldwin.type	KeccakF1600_int, %function
44*bc3d5698SJohn Baldwin.align	5
45*bc3d5698SJohn BaldwinKeccakF1600_int:
46*bc3d5698SJohn Baldwin	add	r9,sp,#176
47*bc3d5698SJohn Baldwin	add	r12,sp,#0
48*bc3d5698SJohn Baldwin	add	r10,sp,#40
49*bc3d5698SJohn Baldwin	ldmia	r9,{r4,r5,r6,r7,r8,r9}		@ A[4][2..4]
50*bc3d5698SJohn BaldwinKeccakF1600_enter:
51*bc3d5698SJohn Baldwin	str	lr,[sp,#440]
52*bc3d5698SJohn Baldwin	eor	r11,r11,r11
53*bc3d5698SJohn Baldwin	str	r11,[sp,#444]
54*bc3d5698SJohn Baldwin	b	.Lround2x
55*bc3d5698SJohn Baldwin
56*bc3d5698SJohn Baldwin.align	4
57*bc3d5698SJohn Baldwin.Lround2x:
58*bc3d5698SJohn Baldwin	ldmia	r12,{r0,r1,r2,r3}		@ A[0][0..1]
59*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][0..1]
60*bc3d5698SJohn Baldwin#ifdef	__thumb2__
61*bc3d5698SJohn Baldwin	eor	r0,r0,r10
62*bc3d5698SJohn Baldwin	eor	r1,r1,r11
63*bc3d5698SJohn Baldwin	eor	r2,r2,r12
64*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#56]
65*bc3d5698SJohn Baldwin	eor	r3,r3,r14
66*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#64]
67*bc3d5698SJohn Baldwin	eor	r4,r4,r10
68*bc3d5698SJohn Baldwin	eor	r5,r5,r11
69*bc3d5698SJohn Baldwin	eor	r6,r6,r12
70*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#72]
71*bc3d5698SJohn Baldwin	eor	r7,r7,r14
72*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#80]
73*bc3d5698SJohn Baldwin	eor	r8,r8,r10
74*bc3d5698SJohn Baldwin	eor	r9,r9,r11
75*bc3d5698SJohn Baldwin	eor	r0,r0,r12
76*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#88]
77*bc3d5698SJohn Baldwin	eor	r1,r1,r14
78*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#96]
79*bc3d5698SJohn Baldwin	eor	r2,r2,r10
80*bc3d5698SJohn Baldwin	eor	r3,r3,r11
81*bc3d5698SJohn Baldwin	eor	r4,r4,r12
82*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#104]
83*bc3d5698SJohn Baldwin	eor	r5,r5,r14
84*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#112]
85*bc3d5698SJohn Baldwin	eor	r6,r6,r10
86*bc3d5698SJohn Baldwin	eor	r7,r7,r11
87*bc3d5698SJohn Baldwin	eor	r8,r8,r12
88*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#120]
89*bc3d5698SJohn Baldwin	eor	r9,r9,r14
90*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#128]
91*bc3d5698SJohn Baldwin	eor	r0,r0,r10
92*bc3d5698SJohn Baldwin	eor	r1,r1,r11
93*bc3d5698SJohn Baldwin	eor	r2,r2,r12
94*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#136]
95*bc3d5698SJohn Baldwin	eor	r3,r3,r14
96*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#144]
97*bc3d5698SJohn Baldwin	eor	r4,r4,r10
98*bc3d5698SJohn Baldwin	eor	r5,r5,r11
99*bc3d5698SJohn Baldwin	eor	r6,r6,r12
100*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#152]
101*bc3d5698SJohn Baldwin	eor	r7,r7,r14
102*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#160]
103*bc3d5698SJohn Baldwin	eor	r8,r8,r10
104*bc3d5698SJohn Baldwin	eor	r9,r9,r11
105*bc3d5698SJohn Baldwin	eor	r0,r0,r12
106*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#168]
107*bc3d5698SJohn Baldwin	eor	r1,r1,r14
108*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#16]
109*bc3d5698SJohn Baldwin	eor	r2,r2,r10
110*bc3d5698SJohn Baldwin	eor	r3,r3,r11
111*bc3d5698SJohn Baldwin	eor	r4,r4,r12
112*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#24]
113*bc3d5698SJohn Baldwin	eor	r5,r5,r14
114*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#32]
115*bc3d5698SJohn Baldwin#else
116*bc3d5698SJohn Baldwin	eor	r0,r0,r10
117*bc3d5698SJohn Baldwin	add	r10,sp,#56
118*bc3d5698SJohn Baldwin	eor	r1,r1,r11
119*bc3d5698SJohn Baldwin	eor	r2,r2,r12
120*bc3d5698SJohn Baldwin	eor	r3,r3,r14
121*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][2..3]
122*bc3d5698SJohn Baldwin	eor	r4,r4,r10
123*bc3d5698SJohn Baldwin	add	r10,sp,#72
124*bc3d5698SJohn Baldwin	eor	r5,r5,r11
125*bc3d5698SJohn Baldwin	eor	r6,r6,r12
126*bc3d5698SJohn Baldwin	eor	r7,r7,r14
127*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][4]..A[2][0]
128*bc3d5698SJohn Baldwin	eor	r8,r8,r10
129*bc3d5698SJohn Baldwin	add	r10,sp,#88
130*bc3d5698SJohn Baldwin	eor	r9,r9,r11
131*bc3d5698SJohn Baldwin	eor	r0,r0,r12
132*bc3d5698SJohn Baldwin	eor	r1,r1,r14
133*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][1..2]
134*bc3d5698SJohn Baldwin	eor	r2,r2,r10
135*bc3d5698SJohn Baldwin	add	r10,sp,#104
136*bc3d5698SJohn Baldwin	eor	r3,r3,r11
137*bc3d5698SJohn Baldwin	eor	r4,r4,r12
138*bc3d5698SJohn Baldwin	eor	r5,r5,r14
139*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][3..4]
140*bc3d5698SJohn Baldwin	eor	r6,r6,r10
141*bc3d5698SJohn Baldwin	add	r10,sp,#120
142*bc3d5698SJohn Baldwin	eor	r7,r7,r11
143*bc3d5698SJohn Baldwin	eor	r8,r8,r12
144*bc3d5698SJohn Baldwin	eor	r9,r9,r14
145*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][0..1]
146*bc3d5698SJohn Baldwin	eor	r0,r0,r10
147*bc3d5698SJohn Baldwin	add	r10,sp,#136
148*bc3d5698SJohn Baldwin	eor	r1,r1,r11
149*bc3d5698SJohn Baldwin	eor	r2,r2,r12
150*bc3d5698SJohn Baldwin	eor	r3,r3,r14
151*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][2..3]
152*bc3d5698SJohn Baldwin	eor	r4,r4,r10
153*bc3d5698SJohn Baldwin	add	r10,sp,#152
154*bc3d5698SJohn Baldwin	eor	r5,r5,r11
155*bc3d5698SJohn Baldwin	eor	r6,r6,r12
156*bc3d5698SJohn Baldwin	eor	r7,r7,r14
157*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][4]..A[4][0]
158*bc3d5698SJohn Baldwin	eor	r8,r8,r10
159*bc3d5698SJohn Baldwin	ldr	r10,[sp,#168]		@ A[4][1]
160*bc3d5698SJohn Baldwin	eor	r9,r9,r11
161*bc3d5698SJohn Baldwin	ldr	r11,[sp,#168+4]
162*bc3d5698SJohn Baldwin	eor	r0,r0,r12
163*bc3d5698SJohn Baldwin	ldr	r12,[sp,#16]		@ A[0][2]
164*bc3d5698SJohn Baldwin	eor	r1,r1,r14
165*bc3d5698SJohn Baldwin	ldr	r14,[sp,#16+4]
166*bc3d5698SJohn Baldwin	eor	r2,r2,r10
167*bc3d5698SJohn Baldwin	add	r10,sp,#24
168*bc3d5698SJohn Baldwin	eor	r3,r3,r11
169*bc3d5698SJohn Baldwin	eor	r4,r4,r12
170*bc3d5698SJohn Baldwin	eor	r5,r5,r14
171*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[0][3..4]
172*bc3d5698SJohn Baldwin#endif
173*bc3d5698SJohn Baldwin	eor	r6,r6,r10
174*bc3d5698SJohn Baldwin	eor	r7,r7,r11
175*bc3d5698SJohn Baldwin	eor	r8,r8,r12
176*bc3d5698SJohn Baldwin	eor	r9,r9,r14
177*bc3d5698SJohn Baldwin
178*bc3d5698SJohn Baldwin	eor	r10,r0,r5,ror#32-1	@ E[0] = ROL64(C[2], 1) ^ C[0];
179*bc3d5698SJohn Baldwin#ifndef	__thumb2__
180*bc3d5698SJohn Baldwin	str	r10,[sp,#208]		@ D[1] = E[0]
181*bc3d5698SJohn Baldwin#endif
182*bc3d5698SJohn Baldwin	eor	r11,r1,r4
183*bc3d5698SJohn Baldwin#ifndef	__thumb2__
184*bc3d5698SJohn Baldwin	str	r11,[sp,#208+4]
185*bc3d5698SJohn Baldwin#else
186*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#208]		@ D[1] = E[0]
187*bc3d5698SJohn Baldwin#endif
188*bc3d5698SJohn Baldwin	eor	r12,r6,r1,ror#32-1	@ E[1] = ROL64(C[0], 1) ^ C[3];
189*bc3d5698SJohn Baldwin	eor	r14,r7,r0
190*bc3d5698SJohn Baldwin#ifndef	__thumb2__
191*bc3d5698SJohn Baldwin	str	r12,[sp,#232]		@ D[4] = E[1]
192*bc3d5698SJohn Baldwin#endif
193*bc3d5698SJohn Baldwin	eor	r0,r8,r3,ror#32-1	@ C[0] = ROL64(C[1], 1) ^ C[4];
194*bc3d5698SJohn Baldwin#ifndef	__thumb2__
195*bc3d5698SJohn Baldwin	str	r14,[sp,#232+4]
196*bc3d5698SJohn Baldwin#else
197*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#232]		@ D[4] = E[1]
198*bc3d5698SJohn Baldwin#endif
199*bc3d5698SJohn Baldwin	eor	r1,r9,r2
200*bc3d5698SJohn Baldwin#ifndef	__thumb2__
201*bc3d5698SJohn Baldwin	str	r0,[sp,#200]		@ D[0] = C[0]
202*bc3d5698SJohn Baldwin#endif
203*bc3d5698SJohn Baldwin	eor	r2,r2,r7,ror#32-1	@ C[1] = ROL64(C[3], 1) ^ C[1];
204*bc3d5698SJohn Baldwin#ifndef	__thumb2__
205*bc3d5698SJohn Baldwin	ldr	r7,[sp,#144]
206*bc3d5698SJohn Baldwin#endif
207*bc3d5698SJohn Baldwin	eor	r3,r3,r6
208*bc3d5698SJohn Baldwin#ifndef	__thumb2__
209*bc3d5698SJohn Baldwin	str	r1,[sp,#200+4]
210*bc3d5698SJohn Baldwin#else
211*bc3d5698SJohn Baldwin	strd	r0,r1,[sp,#200]		@ D[0] = C[0]
212*bc3d5698SJohn Baldwin#endif
213*bc3d5698SJohn Baldwin#ifndef	__thumb2__
214*bc3d5698SJohn Baldwin	ldr	r6,[sp,#144+4]
215*bc3d5698SJohn Baldwin#else
216*bc3d5698SJohn Baldwin	ldrd	r7,r6,[sp,#144]
217*bc3d5698SJohn Baldwin#endif
218*bc3d5698SJohn Baldwin#ifndef	__thumb2__
219*bc3d5698SJohn Baldwin	str	r2,[sp,#216]		@ D[2] = C[1]
220*bc3d5698SJohn Baldwin#endif
221*bc3d5698SJohn Baldwin	eor	r4,r4,r9,ror#32-1	@ C[2] = ROL64(C[4], 1) ^ C[2];
222*bc3d5698SJohn Baldwin#ifndef	__thumb2__
223*bc3d5698SJohn Baldwin	str	r3,[sp,#216+4]
224*bc3d5698SJohn Baldwin#else
225*bc3d5698SJohn Baldwin	strd	r2,r3,[sp,#216]		@ D[2] = C[1]
226*bc3d5698SJohn Baldwin#endif
227*bc3d5698SJohn Baldwin	eor	r5,r5,r8
228*bc3d5698SJohn Baldwin
229*bc3d5698SJohn Baldwin#ifndef	__thumb2__
230*bc3d5698SJohn Baldwin	ldr	r8,[sp,#192]
231*bc3d5698SJohn Baldwin#endif
232*bc3d5698SJohn Baldwin#ifndef	__thumb2__
233*bc3d5698SJohn Baldwin	ldr	r9,[sp,#192+4]
234*bc3d5698SJohn Baldwin#else
235*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#192]
236*bc3d5698SJohn Baldwin#endif
237*bc3d5698SJohn Baldwin#ifndef	__thumb2__
238*bc3d5698SJohn Baldwin	str	r4,[sp,#224]		@ D[3] = C[2]
239*bc3d5698SJohn Baldwin#endif
240*bc3d5698SJohn Baldwin	eor	r7,r7,r4
241*bc3d5698SJohn Baldwin#ifndef	__thumb2__
242*bc3d5698SJohn Baldwin	str	r5,[sp,#224+4]
243*bc3d5698SJohn Baldwin#else
244*bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#224]		@ D[3] = C[2]
245*bc3d5698SJohn Baldwin#endif
246*bc3d5698SJohn Baldwin	eor	r6,r6,r5
247*bc3d5698SJohn Baldwin#ifndef	__thumb2__
248*bc3d5698SJohn Baldwin	ldr	r4,[sp,#0]
249*bc3d5698SJohn Baldwin#endif
250*bc3d5698SJohn Baldwin	@ mov	r7,r7,ror#32-10		@ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
251*bc3d5698SJohn Baldwin	@ mov	r6,r6,ror#32-11
252*bc3d5698SJohn Baldwin#ifndef	__thumb2__
253*bc3d5698SJohn Baldwin	ldr	r5,[sp,#0+4]
254*bc3d5698SJohn Baldwin#else
255*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#0]
256*bc3d5698SJohn Baldwin#endif
257*bc3d5698SJohn Baldwin	eor	r8,r8,r12
258*bc3d5698SJohn Baldwin	eor	r9,r9,r14
259*bc3d5698SJohn Baldwin#ifndef	__thumb2__
260*bc3d5698SJohn Baldwin	ldr	r12,[sp,#96]
261*bc3d5698SJohn Baldwin#endif
262*bc3d5698SJohn Baldwin	eor	r0,r0,r4
263*bc3d5698SJohn Baldwin#ifndef	__thumb2__
264*bc3d5698SJohn Baldwin	ldr	r14,[sp,#96+4]
265*bc3d5698SJohn Baldwin#else
266*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#96]
267*bc3d5698SJohn Baldwin#endif
268*bc3d5698SJohn Baldwin	@ mov	r8,r8,ror#32-7		@ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
269*bc3d5698SJohn Baldwin	@ mov	r9,r9,ror#32-7
270*bc3d5698SJohn Baldwin	eor	r1,r1,r5		@ C[0] =       A[0][0] ^ C[0];
271*bc3d5698SJohn Baldwin	eor	r12,r12,r2
272*bc3d5698SJohn Baldwin#ifndef	__thumb2__
273*bc3d5698SJohn Baldwin	ldr	r2,[sp,#48]
274*bc3d5698SJohn Baldwin#endif
275*bc3d5698SJohn Baldwin	eor	r14,r14,r3
276*bc3d5698SJohn Baldwin#ifndef	__thumb2__
277*bc3d5698SJohn Baldwin	ldr	r3,[sp,#48+4]
278*bc3d5698SJohn Baldwin#else
279*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#48]
280*bc3d5698SJohn Baldwin#endif
281*bc3d5698SJohn Baldwin	mov	r5,r12,ror#32-21		@ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
282*bc3d5698SJohn Baldwin	ldr	r12,[sp,#444]			@ load counter
283*bc3d5698SJohn Baldwin	eor	r2,r2,r10
284*bc3d5698SJohn Baldwin	adr	r10,iotas32
285*bc3d5698SJohn Baldwin	mov	r4,r14,ror#32-22
286*bc3d5698SJohn Baldwin	add	r14,r10,r12
287*bc3d5698SJohn Baldwin	eor	r3,r3,r11
288*bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11}		@ iotas[i]
289*bc3d5698SJohn Baldwin	bic	r12,r4,r2,ror#32-22
290*bc3d5698SJohn Baldwin	bic	r14,r5,r3,ror#32-22
291*bc3d5698SJohn Baldwin	mov	r2,r2,ror#32-22		@ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
292*bc3d5698SJohn Baldwin	mov	r3,r3,ror#32-22
293*bc3d5698SJohn Baldwin	eor	r12,r12,r0
294*bc3d5698SJohn Baldwin	eor	r14,r14,r1
295*bc3d5698SJohn Baldwin	eor	r10,r10,r12
296*bc3d5698SJohn Baldwin	eor	r11,r11,r14
297*bc3d5698SJohn Baldwin#ifndef	__thumb2__
298*bc3d5698SJohn Baldwin	str	r10,[sp,#240]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
299*bc3d5698SJohn Baldwin#endif
300*bc3d5698SJohn Baldwin	bic	r12,r6,r4,ror#11
301*bc3d5698SJohn Baldwin#ifndef	__thumb2__
302*bc3d5698SJohn Baldwin	str	r11,[sp,#240+4]
303*bc3d5698SJohn Baldwin#else
304*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#240]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
305*bc3d5698SJohn Baldwin#endif
306*bc3d5698SJohn Baldwin	bic	r14,r7,r5,ror#10
307*bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#32-(11-7)
308*bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#32-(10-7)
309*bc3d5698SJohn Baldwin	eor	r12,r2,r12,ror#32-11
310*bc3d5698SJohn Baldwin#ifndef	__thumb2__
311*bc3d5698SJohn Baldwin	str	r12,[sp,#248]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
312*bc3d5698SJohn Baldwin#endif
313*bc3d5698SJohn Baldwin	eor	r14,r3,r14,ror#32-10
314*bc3d5698SJohn Baldwin#ifndef	__thumb2__
315*bc3d5698SJohn Baldwin	str	r14,[sp,#248+4]
316*bc3d5698SJohn Baldwin#else
317*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#248]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
318*bc3d5698SJohn Baldwin#endif
319*bc3d5698SJohn Baldwin	eor	r10,r4,r10,ror#32-7
320*bc3d5698SJohn Baldwin	eor	r11,r5,r11,ror#32-7
321*bc3d5698SJohn Baldwin#ifndef	__thumb2__
322*bc3d5698SJohn Baldwin	str	r10,[sp,#256]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
323*bc3d5698SJohn Baldwin#endif
324*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#32-7
325*bc3d5698SJohn Baldwin#ifndef	__thumb2__
326*bc3d5698SJohn Baldwin	str	r11,[sp,#256+4]
327*bc3d5698SJohn Baldwin#else
328*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#256]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
329*bc3d5698SJohn Baldwin#endif
330*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#32-7
331*bc3d5698SJohn Baldwin	eor	r12,r12,r6,ror#32-11
332*bc3d5698SJohn Baldwin#ifndef	__thumb2__
333*bc3d5698SJohn Baldwin	str	r12,[sp,#264]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
334*bc3d5698SJohn Baldwin#endif
335*bc3d5698SJohn Baldwin	eor	r14,r14,r7,ror#32-10
336*bc3d5698SJohn Baldwin#ifndef	__thumb2__
337*bc3d5698SJohn Baldwin	str	r14,[sp,#264+4]
338*bc3d5698SJohn Baldwin#else
339*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#264]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
340*bc3d5698SJohn Baldwin#endif
341*bc3d5698SJohn Baldwin	bic	r10,r2,r0
342*bc3d5698SJohn Baldwin	add	r14,sp,#224
343*bc3d5698SJohn Baldwin#ifndef	__thumb2__
344*bc3d5698SJohn Baldwin	ldr	r0,[sp,#24]		@ A[0][3]
345*bc3d5698SJohn Baldwin#endif
346*bc3d5698SJohn Baldwin	bic	r11,r3,r1
347*bc3d5698SJohn Baldwin#ifndef	__thumb2__
348*bc3d5698SJohn Baldwin	ldr	r1,[sp,#24+4]
349*bc3d5698SJohn Baldwin#else
350*bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#24]		@ A[0][3]
351*bc3d5698SJohn Baldwin#endif
352*bc3d5698SJohn Baldwin	eor	r10,r10,r8,ror#32-7
353*bc3d5698SJohn Baldwin	eor	r11,r11,r9,ror#32-7
354*bc3d5698SJohn Baldwin#ifndef	__thumb2__
355*bc3d5698SJohn Baldwin	str	r10,[sp,#272]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
356*bc3d5698SJohn Baldwin#endif
357*bc3d5698SJohn Baldwin	add	r9,sp,#200
358*bc3d5698SJohn Baldwin#ifndef	__thumb2__
359*bc3d5698SJohn Baldwin	str	r11,[sp,#272+4]
360*bc3d5698SJohn Baldwin#else
361*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#272]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
362*bc3d5698SJohn Baldwin#endif
363*bc3d5698SJohn Baldwin
364*bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[3..4]
365*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[0..1]
366*bc3d5698SJohn Baldwin
367*bc3d5698SJohn Baldwin#ifndef	__thumb2__
368*bc3d5698SJohn Baldwin	ldr	r2,[sp,#72]		@ A[1][4]
369*bc3d5698SJohn Baldwin#endif
370*bc3d5698SJohn Baldwin	eor	r0,r0,r10
371*bc3d5698SJohn Baldwin#ifndef	__thumb2__
372*bc3d5698SJohn Baldwin	ldr	r3,[sp,#72+4]
373*bc3d5698SJohn Baldwin#else
374*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#72]		@ A[1][4]
375*bc3d5698SJohn Baldwin#endif
376*bc3d5698SJohn Baldwin	eor	r1,r1,r11
377*bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-14		@ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
378*bc3d5698SJohn Baldwin#ifndef	__thumb2__
379*bc3d5698SJohn Baldwin	ldr	r10,[sp,#128]		@ A[3][1]
380*bc3d5698SJohn Baldwin#endif
381*bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-14
382*bc3d5698SJohn Baldwin#ifndef	__thumb2__
383*bc3d5698SJohn Baldwin	ldr	r11,[sp,#128+4]
384*bc3d5698SJohn Baldwin#else
385*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#128]		@ A[3][1]
386*bc3d5698SJohn Baldwin#endif
387*bc3d5698SJohn Baldwin
388*bc3d5698SJohn Baldwin	eor	r2,r2,r12
389*bc3d5698SJohn Baldwin#ifndef	__thumb2__
390*bc3d5698SJohn Baldwin	ldr	r4,[sp,#80]		@ A[2][0]
391*bc3d5698SJohn Baldwin#endif
392*bc3d5698SJohn Baldwin	eor	r3,r3,r14
393*bc3d5698SJohn Baldwin#ifndef	__thumb2__
394*bc3d5698SJohn Baldwin	ldr	r5,[sp,#80+4]
395*bc3d5698SJohn Baldwin#else
396*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#80]		@ A[2][0]
397*bc3d5698SJohn Baldwin#endif
398*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-10		@ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
399*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-10
400*bc3d5698SJohn Baldwin
401*bc3d5698SJohn Baldwin	eor	r6,r6,r4
402*bc3d5698SJohn Baldwin#ifndef	__thumb2__
403*bc3d5698SJohn Baldwin	ldr	r12,[sp,#216]		@ D[2]
404*bc3d5698SJohn Baldwin#endif
405*bc3d5698SJohn Baldwin	eor	r7,r7,r5
406*bc3d5698SJohn Baldwin#ifndef	__thumb2__
407*bc3d5698SJohn Baldwin	ldr	r14,[sp,#216+4]
408*bc3d5698SJohn Baldwin#else
409*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#216]		@ D[2]
410*bc3d5698SJohn Baldwin#endif
411*bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-1		@ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
412*bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-2
413*bc3d5698SJohn Baldwin
414*bc3d5698SJohn Baldwin	eor	r10,r10,r8
415*bc3d5698SJohn Baldwin#ifndef	__thumb2__
416*bc3d5698SJohn Baldwin	ldr	r8,[sp,#176]		@ A[4][2]
417*bc3d5698SJohn Baldwin#endif
418*bc3d5698SJohn Baldwin	eor	r11,r11,r9
419*bc3d5698SJohn Baldwin#ifndef	__thumb2__
420*bc3d5698SJohn Baldwin	ldr	r9,[sp,#176+4]
421*bc3d5698SJohn Baldwin#else
422*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#176]		@ A[4][2]
423*bc3d5698SJohn Baldwin#endif
424*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-22		@ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
425*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-23
426*bc3d5698SJohn Baldwin
427*bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-10
428*bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-10
429*bc3d5698SJohn Baldwin	eor	r12,r12,r8
430*bc3d5698SJohn Baldwin	eor	r14,r14,r9
431*bc3d5698SJohn Baldwin	mov	r9,r12,ror#32-30		@ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
432*bc3d5698SJohn Baldwin	mov	r8,r14,ror#32-31
433*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
434*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-14
435*bc3d5698SJohn Baldwin#ifndef	__thumb2__
436*bc3d5698SJohn Baldwin	str	r10,[sp,#280]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
437*bc3d5698SJohn Baldwin#endif
438*bc3d5698SJohn Baldwin	bic	r12,r6,r4
439*bc3d5698SJohn Baldwin#ifndef	__thumb2__
440*bc3d5698SJohn Baldwin	str	r11,[sp,#280+4]
441*bc3d5698SJohn Baldwin#else
442*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#280]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
443*bc3d5698SJohn Baldwin#endif
444*bc3d5698SJohn Baldwin	bic	r14,r7,r5
445*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-10
446*bc3d5698SJohn Baldwin#ifndef	__thumb2__
447*bc3d5698SJohn Baldwin	str	r12,[sp,#288]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
448*bc3d5698SJohn Baldwin#endif
449*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-10
450*bc3d5698SJohn Baldwin#ifndef	__thumb2__
451*bc3d5698SJohn Baldwin	str	r14,[sp,#288+4]
452*bc3d5698SJohn Baldwin#else
453*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#288]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
454*bc3d5698SJohn Baldwin#endif
455*bc3d5698SJohn Baldwin	bic	r10,r8,r6
456*bc3d5698SJohn Baldwin	bic	r11,r9,r7
457*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
458*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#14
459*bc3d5698SJohn Baldwin	eor	r10,r10,r4
460*bc3d5698SJohn Baldwin	eor	r11,r11,r5
461*bc3d5698SJohn Baldwin#ifndef	__thumb2__
462*bc3d5698SJohn Baldwin	str	r10,[sp,#296]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
463*bc3d5698SJohn Baldwin#endif
464*bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#32-(14-10)
465*bc3d5698SJohn Baldwin#ifndef	__thumb2__
466*bc3d5698SJohn Baldwin	str	r11,[sp,#296+4]
467*bc3d5698SJohn Baldwin#else
468*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#296]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
469*bc3d5698SJohn Baldwin#endif
470*bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
471*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-(14-10)
472*bc3d5698SJohn Baldwin#ifndef	__thumb2__
473*bc3d5698SJohn Baldwin	str	r12,[sp,#304]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
474*bc3d5698SJohn Baldwin#endif
475*bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-14
476*bc3d5698SJohn Baldwin#ifndef	__thumb2__
477*bc3d5698SJohn Baldwin	str	r14,[sp,#304+4]
478*bc3d5698SJohn Baldwin#else
479*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#304]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
480*bc3d5698SJohn Baldwin#endif
481*bc3d5698SJohn Baldwin	add	r12,sp,#208
482*bc3d5698SJohn Baldwin#ifndef	__thumb2__
483*bc3d5698SJohn Baldwin	ldr	r1,[sp,#8]		@ A[0][1]
484*bc3d5698SJohn Baldwin#endif
485*bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-10
486*bc3d5698SJohn Baldwin#ifndef	__thumb2__
487*bc3d5698SJohn Baldwin	ldr	r0,[sp,#8+4]
488*bc3d5698SJohn Baldwin#else
489*bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#8]		@ A[0][1]
490*bc3d5698SJohn Baldwin#endif
491*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-10
492*bc3d5698SJohn Baldwin#ifndef	__thumb2__
493*bc3d5698SJohn Baldwin	str	r10,[sp,#312]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
494*bc3d5698SJohn Baldwin#endif
495*bc3d5698SJohn Baldwin#ifndef	__thumb2__
496*bc3d5698SJohn Baldwin	str	r11,[sp,#312+4]
497*bc3d5698SJohn Baldwin#else
498*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#312]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
499*bc3d5698SJohn Baldwin#endif
500*bc3d5698SJohn Baldwin
501*bc3d5698SJohn Baldwin	add	r9,sp,#224
502*bc3d5698SJohn Baldwin	ldmia	r12,{r10,r11,r12,r14}	@ D[1..2]
503*bc3d5698SJohn Baldwin#ifndef	__thumb2__
504*bc3d5698SJohn Baldwin	ldr	r2,[sp,#56]		@ A[1][2]
505*bc3d5698SJohn Baldwin#endif
506*bc3d5698SJohn Baldwin#ifndef	__thumb2__
507*bc3d5698SJohn Baldwin	ldr	r3,[sp,#56+4]
508*bc3d5698SJohn Baldwin#else
509*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#56]		@ A[1][2]
510*bc3d5698SJohn Baldwin#endif
511*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[3..4]
512*bc3d5698SJohn Baldwin
513*bc3d5698SJohn Baldwin	eor	r1,r1,r10
514*bc3d5698SJohn Baldwin#ifndef	__thumb2__
515*bc3d5698SJohn Baldwin	ldr	r4,[sp,#104]		@ A[2][3]
516*bc3d5698SJohn Baldwin#endif
517*bc3d5698SJohn Baldwin	eor	r0,r0,r11
518*bc3d5698SJohn Baldwin#ifndef	__thumb2__
519*bc3d5698SJohn Baldwin	ldr	r5,[sp,#104+4]
520*bc3d5698SJohn Baldwin#else
521*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#104]		@ A[2][3]
522*bc3d5698SJohn Baldwin#endif
523*bc3d5698SJohn Baldwin	mov	r0,r0,ror#32-1		@ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
524*bc3d5698SJohn Baldwin
525*bc3d5698SJohn Baldwin	eor	r2,r2,r12
526*bc3d5698SJohn Baldwin#ifndef	__thumb2__
527*bc3d5698SJohn Baldwin	ldr	r10,[sp,#152]		@ A[3][4]
528*bc3d5698SJohn Baldwin#endif
529*bc3d5698SJohn Baldwin	eor	r3,r3,r14
530*bc3d5698SJohn Baldwin#ifndef	__thumb2__
531*bc3d5698SJohn Baldwin	ldr	r11,[sp,#152+4]
532*bc3d5698SJohn Baldwin#else
533*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#152]		@ A[3][4]
534*bc3d5698SJohn Baldwin#endif
535*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-3		@ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
536*bc3d5698SJohn Baldwin#ifndef	__thumb2__
537*bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
538*bc3d5698SJohn Baldwin#endif
539*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-3
540*bc3d5698SJohn Baldwin#ifndef	__thumb2__
541*bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
542*bc3d5698SJohn Baldwin#else
543*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
544*bc3d5698SJohn Baldwin#endif
545*bc3d5698SJohn Baldwin
546*bc3d5698SJohn Baldwin	eor	r4,r4,r6
547*bc3d5698SJohn Baldwin	eor	r5,r5,r7
548*bc3d5698SJohn Baldwin	@ mov	r5,r6,ror#32-12		@ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
549*bc3d5698SJohn Baldwin	@ mov	r4,r7,ror#32-13		@ [track reverse order below]
550*bc3d5698SJohn Baldwin
551*bc3d5698SJohn Baldwin	eor	r10,r10,r8
552*bc3d5698SJohn Baldwin#ifndef	__thumb2__
553*bc3d5698SJohn Baldwin	ldr	r8,[sp,#160]		@ A[4][0]
554*bc3d5698SJohn Baldwin#endif
555*bc3d5698SJohn Baldwin	eor	r11,r11,r9
556*bc3d5698SJohn Baldwin#ifndef	__thumb2__
557*bc3d5698SJohn Baldwin	ldr	r9,[sp,#160+4]
558*bc3d5698SJohn Baldwin#else
559*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#160]		@ A[4][0]
560*bc3d5698SJohn Baldwin#endif
561*bc3d5698SJohn Baldwin	mov	r6,r10,ror#32-4		@ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
562*bc3d5698SJohn Baldwin	mov	r7,r11,ror#32-4
563*bc3d5698SJohn Baldwin
564*bc3d5698SJohn Baldwin	eor	r12,r12,r8
565*bc3d5698SJohn Baldwin	eor	r14,r14,r9
566*bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-9		@ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
567*bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-9
568*bc3d5698SJohn Baldwin
569*bc3d5698SJohn Baldwin	bic	r10,r5,r2,ror#13-3
570*bc3d5698SJohn Baldwin	bic	r11,r4,r3,ror#12-3
571*bc3d5698SJohn Baldwin	bic	r12,r6,r5,ror#32-13
572*bc3d5698SJohn Baldwin	bic	r14,r7,r4,ror#32-12
573*bc3d5698SJohn Baldwin	eor	r10,r0,r10,ror#32-13
574*bc3d5698SJohn Baldwin	eor	r11,r1,r11,ror#32-12
575*bc3d5698SJohn Baldwin#ifndef	__thumb2__
576*bc3d5698SJohn Baldwin	str	r10,[sp,#320]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
577*bc3d5698SJohn Baldwin#endif
578*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-3
579*bc3d5698SJohn Baldwin#ifndef	__thumb2__
580*bc3d5698SJohn Baldwin	str	r11,[sp,#320+4]
581*bc3d5698SJohn Baldwin#else
582*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#320]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
583*bc3d5698SJohn Baldwin#endif
584*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-3
585*bc3d5698SJohn Baldwin#ifndef	__thumb2__
586*bc3d5698SJohn Baldwin	str	r12,[sp,#328]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
587*bc3d5698SJohn Baldwin#endif
588*bc3d5698SJohn Baldwin	bic	r10,r8,r6
589*bc3d5698SJohn Baldwin	bic	r11,r9,r7
590*bc3d5698SJohn Baldwin#ifndef	__thumb2__
591*bc3d5698SJohn Baldwin	str	r14,[sp,#328+4]
592*bc3d5698SJohn Baldwin#else
593*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#328]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
594*bc3d5698SJohn Baldwin#endif
595*bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#32-13
596*bc3d5698SJohn Baldwin	eor	r11,r11,r4,ror#32-12
597*bc3d5698SJohn Baldwin#ifndef	__thumb2__
598*bc3d5698SJohn Baldwin	str	r10,[sp,#336]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
599*bc3d5698SJohn Baldwin#endif
600*bc3d5698SJohn Baldwin	bic	r12,r0,r8
601*bc3d5698SJohn Baldwin#ifndef	__thumb2__
602*bc3d5698SJohn Baldwin	str	r11,[sp,#336+4]
603*bc3d5698SJohn Baldwin#else
604*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#336]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
605*bc3d5698SJohn Baldwin#endif
606*bc3d5698SJohn Baldwin	bic	r14,r1,r9
607*bc3d5698SJohn Baldwin	eor	r12,r12,r6
608*bc3d5698SJohn Baldwin	eor	r14,r14,r7
609*bc3d5698SJohn Baldwin#ifndef	__thumb2__
610*bc3d5698SJohn Baldwin	str	r12,[sp,#344]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
611*bc3d5698SJohn Baldwin#endif
612*bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#3
613*bc3d5698SJohn Baldwin#ifndef	__thumb2__
614*bc3d5698SJohn Baldwin	str	r14,[sp,#344+4]
615*bc3d5698SJohn Baldwin#else
616*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#344]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
617*bc3d5698SJohn Baldwin#endif
618*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#3
619*bc3d5698SJohn Baldwin#ifndef	__thumb2__
620*bc3d5698SJohn Baldwin	ldr	r1,[sp,#32]		@ A[0][4] [in reverse order]
621*bc3d5698SJohn Baldwin#endif
622*bc3d5698SJohn Baldwin	eor	r10,r8,r10,ror#32-3
623*bc3d5698SJohn Baldwin#ifndef	__thumb2__
624*bc3d5698SJohn Baldwin	ldr	r0,[sp,#32+4]
625*bc3d5698SJohn Baldwin#else
626*bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#32]		@ A[0][4] [in reverse order]
627*bc3d5698SJohn Baldwin#endif
628*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-3
629*bc3d5698SJohn Baldwin#ifndef	__thumb2__
630*bc3d5698SJohn Baldwin	str	r10,[sp,#352]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
631*bc3d5698SJohn Baldwin#endif
632*bc3d5698SJohn Baldwin	add	r9,sp,#208
633*bc3d5698SJohn Baldwin#ifndef	__thumb2__
634*bc3d5698SJohn Baldwin	str	r11,[sp,#352+4]
635*bc3d5698SJohn Baldwin#else
636*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#352]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
637*bc3d5698SJohn Baldwin#endif
638*bc3d5698SJohn Baldwin
639*bc3d5698SJohn Baldwin#ifndef	__thumb2__
640*bc3d5698SJohn Baldwin	ldr	r10,[sp,#232]		@ D[4]
641*bc3d5698SJohn Baldwin#endif
642*bc3d5698SJohn Baldwin#ifndef	__thumb2__
643*bc3d5698SJohn Baldwin	ldr	r11,[sp,#232+4]
644*bc3d5698SJohn Baldwin#else
645*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#232]		@ D[4]
646*bc3d5698SJohn Baldwin#endif
647*bc3d5698SJohn Baldwin#ifndef	__thumb2__
648*bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
649*bc3d5698SJohn Baldwin#endif
650*bc3d5698SJohn Baldwin#ifndef	__thumb2__
651*bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
652*bc3d5698SJohn Baldwin#else
653*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
654*bc3d5698SJohn Baldwin#endif
655*bc3d5698SJohn Baldwin
656*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[1..2]
657*bc3d5698SJohn Baldwin
658*bc3d5698SJohn Baldwin	eor	r1,r1,r10
659*bc3d5698SJohn Baldwin#ifndef	__thumb2__
660*bc3d5698SJohn Baldwin	ldr	r2,[sp,#40]		@ A[1][0]
661*bc3d5698SJohn Baldwin#endif
662*bc3d5698SJohn Baldwin	eor	r0,r0,r11
663*bc3d5698SJohn Baldwin#ifndef	__thumb2__
664*bc3d5698SJohn Baldwin	ldr	r3,[sp,#40+4]
665*bc3d5698SJohn Baldwin#else
666*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#40]		@ A[1][0]
667*bc3d5698SJohn Baldwin#endif
668*bc3d5698SJohn Baldwin	@ mov	r1,r10,ror#32-13		@ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
669*bc3d5698SJohn Baldwin#ifndef	__thumb2__
670*bc3d5698SJohn Baldwin	ldr	r4,[sp,#88]		@ A[2][1]
671*bc3d5698SJohn Baldwin#endif
672*bc3d5698SJohn Baldwin	@ mov	r0,r11,ror#32-14		@ [was loaded in reverse order]
673*bc3d5698SJohn Baldwin#ifndef	__thumb2__
674*bc3d5698SJohn Baldwin	ldr	r5,[sp,#88+4]
675*bc3d5698SJohn Baldwin#else
676*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#88]		@ A[2][1]
677*bc3d5698SJohn Baldwin#endif
678*bc3d5698SJohn Baldwin
679*bc3d5698SJohn Baldwin	eor	r2,r2,r12
680*bc3d5698SJohn Baldwin#ifndef	__thumb2__
681*bc3d5698SJohn Baldwin	ldr	r10,[sp,#136]		@ A[3][2]
682*bc3d5698SJohn Baldwin#endif
683*bc3d5698SJohn Baldwin	eor	r3,r3,r14
684*bc3d5698SJohn Baldwin#ifndef	__thumb2__
685*bc3d5698SJohn Baldwin	ldr	r11,[sp,#136+4]
686*bc3d5698SJohn Baldwin#else
687*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#136]		@ A[3][2]
688*bc3d5698SJohn Baldwin#endif
689*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-18		@ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
690*bc3d5698SJohn Baldwin#ifndef	__thumb2__
691*bc3d5698SJohn Baldwin	ldr	r12,[sp,#224]		@ D[3]
692*bc3d5698SJohn Baldwin#endif
693*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-18
694*bc3d5698SJohn Baldwin#ifndef	__thumb2__
695*bc3d5698SJohn Baldwin	ldr	r14,[sp,#224+4]
696*bc3d5698SJohn Baldwin#else
697*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#224]		@ D[3]
698*bc3d5698SJohn Baldwin#endif
699*bc3d5698SJohn Baldwin
700*bc3d5698SJohn Baldwin	eor	r6,r6,r4
701*bc3d5698SJohn Baldwin	eor	r7,r7,r5
702*bc3d5698SJohn Baldwin	mov	r4,r6,ror#32-5		@ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
703*bc3d5698SJohn Baldwin	mov	r5,r7,ror#32-5
704*bc3d5698SJohn Baldwin
705*bc3d5698SJohn Baldwin	eor	r10,r10,r8
706*bc3d5698SJohn Baldwin#ifndef	__thumb2__
707*bc3d5698SJohn Baldwin	ldr	r8,[sp,#184]		@ A[4][3]
708*bc3d5698SJohn Baldwin#endif
709*bc3d5698SJohn Baldwin	eor	r11,r11,r9
710*bc3d5698SJohn Baldwin#ifndef	__thumb2__
711*bc3d5698SJohn Baldwin	ldr	r9,[sp,#184+4]
712*bc3d5698SJohn Baldwin#else
713*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#184]		@ A[4][3]
714*bc3d5698SJohn Baldwin#endif
715*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-7		@ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
716*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-8
717*bc3d5698SJohn Baldwin
718*bc3d5698SJohn Baldwin	eor	r12,r12,r8
719*bc3d5698SJohn Baldwin	eor	r14,r14,r9
720*bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-28		@ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
721*bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-28
722*bc3d5698SJohn Baldwin
723*bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-18
724*bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-18
725*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
726*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-13
727*bc3d5698SJohn Baldwin#ifndef	__thumb2__
728*bc3d5698SJohn Baldwin	str	r10,[sp,#360]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
729*bc3d5698SJohn Baldwin#endif
730*bc3d5698SJohn Baldwin	bic	r12,r6,r4
731*bc3d5698SJohn Baldwin#ifndef	__thumb2__
732*bc3d5698SJohn Baldwin	str	r11,[sp,#360+4]
733*bc3d5698SJohn Baldwin#else
734*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#360]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
735*bc3d5698SJohn Baldwin#endif
736*bc3d5698SJohn Baldwin	bic	r14,r7,r5
737*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-18
738*bc3d5698SJohn Baldwin#ifndef	__thumb2__
739*bc3d5698SJohn Baldwin	str	r12,[sp,#368]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
740*bc3d5698SJohn Baldwin#endif
741*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-18
742*bc3d5698SJohn Baldwin#ifndef	__thumb2__
743*bc3d5698SJohn Baldwin	str	r14,[sp,#368+4]
744*bc3d5698SJohn Baldwin#else
745*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#368]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
746*bc3d5698SJohn Baldwin#endif
747*bc3d5698SJohn Baldwin	bic	r10,r8,r6
748*bc3d5698SJohn Baldwin	bic	r11,r9,r7
749*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
750*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#13
751*bc3d5698SJohn Baldwin	eor	r10,r10,r4
752*bc3d5698SJohn Baldwin	eor	r11,r11,r5
753*bc3d5698SJohn Baldwin#ifndef	__thumb2__
754*bc3d5698SJohn Baldwin	str	r10,[sp,#376]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
755*bc3d5698SJohn Baldwin#endif
756*bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#18-14
757*bc3d5698SJohn Baldwin#ifndef	__thumb2__
758*bc3d5698SJohn Baldwin	str	r11,[sp,#376+4]
759*bc3d5698SJohn Baldwin#else
760*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#376]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
761*bc3d5698SJohn Baldwin#endif
762*bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
763*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#18-13
764*bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-13
765*bc3d5698SJohn Baldwin#ifndef	__thumb2__
766*bc3d5698SJohn Baldwin	str	r12,[sp,#384]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
767*bc3d5698SJohn Baldwin#endif
768*bc3d5698SJohn Baldwin#ifndef	__thumb2__
769*bc3d5698SJohn Baldwin	str	r14,[sp,#384+4]
770*bc3d5698SJohn Baldwin#else
771*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#384]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
772*bc3d5698SJohn Baldwin#endif
773*bc3d5698SJohn Baldwin	add	r14,sp,#216
774*bc3d5698SJohn Baldwin#ifndef	__thumb2__
775*bc3d5698SJohn Baldwin	ldr	r0,[sp,#16]		@ A[0][2]
776*bc3d5698SJohn Baldwin#endif
777*bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-18
778*bc3d5698SJohn Baldwin#ifndef	__thumb2__
779*bc3d5698SJohn Baldwin	ldr	r1,[sp,#16+4]
780*bc3d5698SJohn Baldwin#else
781*bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#16]		@ A[0][2]
782*bc3d5698SJohn Baldwin#endif
783*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-18
784*bc3d5698SJohn Baldwin#ifndef	__thumb2__
785*bc3d5698SJohn Baldwin	str	r10,[sp,#392]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
786*bc3d5698SJohn Baldwin#endif
787*bc3d5698SJohn Baldwin#ifndef	__thumb2__
788*bc3d5698SJohn Baldwin	str	r11,[sp,#392+4]
789*bc3d5698SJohn Baldwin#else
790*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#392]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
791*bc3d5698SJohn Baldwin#endif
792*bc3d5698SJohn Baldwin
793*bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[2..3]
794*bc3d5698SJohn Baldwin#ifndef	__thumb2__
795*bc3d5698SJohn Baldwin	ldr	r2,[sp,#64]		@ A[1][3]
796*bc3d5698SJohn Baldwin#endif
797*bc3d5698SJohn Baldwin#ifndef	__thumb2__
798*bc3d5698SJohn Baldwin	ldr	r3,[sp,#64+4]
799*bc3d5698SJohn Baldwin#else
800*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#64]		@ A[1][3]
801*bc3d5698SJohn Baldwin#endif
802*bc3d5698SJohn Baldwin#ifndef	__thumb2__
803*bc3d5698SJohn Baldwin	ldr	r6,[sp,#232]		@ D[4]
804*bc3d5698SJohn Baldwin#endif
805*bc3d5698SJohn Baldwin#ifndef	__thumb2__
806*bc3d5698SJohn Baldwin	ldr	r7,[sp,#232+4]
807*bc3d5698SJohn Baldwin#else
808*bc3d5698SJohn Baldwin	ldrd	r6,r7,[sp,#232]		@ D[4]
809*bc3d5698SJohn Baldwin#endif
810*bc3d5698SJohn Baldwin
811*bc3d5698SJohn Baldwin	eor	r0,r0,r10
812*bc3d5698SJohn Baldwin#ifndef	__thumb2__
813*bc3d5698SJohn Baldwin	ldr	r4,[sp,#112]		@ A[2][4]
814*bc3d5698SJohn Baldwin#endif
815*bc3d5698SJohn Baldwin	eor	r1,r1,r11
816*bc3d5698SJohn Baldwin#ifndef	__thumb2__
817*bc3d5698SJohn Baldwin	ldr	r5,[sp,#112+4]
818*bc3d5698SJohn Baldwin#else
819*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#112]		@ A[2][4]
820*bc3d5698SJohn Baldwin#endif
821*bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-31		@ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
822*bc3d5698SJohn Baldwin#ifndef	__thumb2__
823*bc3d5698SJohn Baldwin	ldr	r8,[sp,#200]		@ D[0]
824*bc3d5698SJohn Baldwin#endif
825*bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-31
826*bc3d5698SJohn Baldwin#ifndef	__thumb2__
827*bc3d5698SJohn Baldwin	ldr	r9,[sp,#200+4]
828*bc3d5698SJohn Baldwin#else
829*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#200]		@ D[0]
830*bc3d5698SJohn Baldwin#endif
831*bc3d5698SJohn Baldwin
832*bc3d5698SJohn Baldwin	eor	r12,r12,r2
833*bc3d5698SJohn Baldwin#ifndef	__thumb2__
834*bc3d5698SJohn Baldwin	ldr	r10,[sp,#120]		@ A[3][0]
835*bc3d5698SJohn Baldwin#endif
836*bc3d5698SJohn Baldwin	eor	r14,r14,r3
837*bc3d5698SJohn Baldwin#ifndef	__thumb2__
838*bc3d5698SJohn Baldwin	ldr	r11,[sp,#120+4]
839*bc3d5698SJohn Baldwin#else
840*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#120]		@ A[3][0]
841*bc3d5698SJohn Baldwin#endif
842*bc3d5698SJohn Baldwin	mov	r3,r12,ror#32-27		@ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
843*bc3d5698SJohn Baldwin#ifndef	__thumb2__
844*bc3d5698SJohn Baldwin	ldr	r12,[sp,#208]		@ D[1]
845*bc3d5698SJohn Baldwin#endif
846*bc3d5698SJohn Baldwin	mov	r2,r14,ror#32-28
847*bc3d5698SJohn Baldwin#ifndef	__thumb2__
848*bc3d5698SJohn Baldwin	ldr	r14,[sp,#208+4]
849*bc3d5698SJohn Baldwin#else
850*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#208]		@ D[1]
851*bc3d5698SJohn Baldwin#endif
852*bc3d5698SJohn Baldwin
853*bc3d5698SJohn Baldwin	eor	r6,r6,r4
854*bc3d5698SJohn Baldwin	eor	r7,r7,r5
855*bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-19		@ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
856*bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-20
857*bc3d5698SJohn Baldwin
858*bc3d5698SJohn Baldwin	eor	r10,r10,r8
859*bc3d5698SJohn Baldwin#ifndef	__thumb2__
860*bc3d5698SJohn Baldwin	ldr	r8,[sp,#168]		@ A[4][1]
861*bc3d5698SJohn Baldwin#endif
862*bc3d5698SJohn Baldwin	eor	r11,r11,r9
863*bc3d5698SJohn Baldwin#ifndef	__thumb2__
864*bc3d5698SJohn Baldwin	ldr	r9,[sp,#168+4]
865*bc3d5698SJohn Baldwin#else
866*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#168]		@ A[4][1]
867*bc3d5698SJohn Baldwin#endif
868*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-20		@ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
869*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-21
870*bc3d5698SJohn Baldwin
871*bc3d5698SJohn Baldwin	eor	r8,r8,r12
872*bc3d5698SJohn Baldwin	eor	r9,r9,r14
873*bc3d5698SJohn Baldwin	@ mov	r8,r2,ror#32-1		@ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
874*bc3d5698SJohn Baldwin	@ mov	r9,r3,ror#32-1
875*bc3d5698SJohn Baldwin
876*bc3d5698SJohn Baldwin	bic	r10,r4,r2
877*bc3d5698SJohn Baldwin	bic	r11,r5,r3
878*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-31
879*bc3d5698SJohn Baldwin#ifndef	__thumb2__
880*bc3d5698SJohn Baldwin	str	r10,[sp,#400]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
881*bc3d5698SJohn Baldwin#endif
882*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-31
883*bc3d5698SJohn Baldwin#ifndef	__thumb2__
884*bc3d5698SJohn Baldwin	str	r11,[sp,#400+4]
885*bc3d5698SJohn Baldwin#else
886*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#400]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
887*bc3d5698SJohn Baldwin#endif
888*bc3d5698SJohn Baldwin	bic	r12,r6,r4
889*bc3d5698SJohn Baldwin	bic	r14,r7,r5
890*bc3d5698SJohn Baldwin	eor	r12,r12,r2
891*bc3d5698SJohn Baldwin	eor	r14,r14,r3
892*bc3d5698SJohn Baldwin#ifndef	__thumb2__
893*bc3d5698SJohn Baldwin	str	r12,[sp,#408]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
894*bc3d5698SJohn Baldwin#endif
895*bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#1
896*bc3d5698SJohn Baldwin#ifndef	__thumb2__
897*bc3d5698SJohn Baldwin	str	r14,[sp,#408+4]
898*bc3d5698SJohn Baldwin#else
899*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#408]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
900*bc3d5698SJohn Baldwin#endif
901*bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#1
902*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#31-1
903*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#31-1
904*bc3d5698SJohn Baldwin	eor	r4,r4,r10,ror#32-1
905*bc3d5698SJohn Baldwin#ifndef	__thumb2__
906*bc3d5698SJohn Baldwin	str	r4,[sp,#416]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
907*bc3d5698SJohn Baldwin#endif
908*bc3d5698SJohn Baldwin	eor	r5,r5,r11,ror#32-1
909*bc3d5698SJohn Baldwin#ifndef	__thumb2__
910*bc3d5698SJohn Baldwin	str	r5,[sp,#416+4]
911*bc3d5698SJohn Baldwin#else
912*bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#416]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
913*bc3d5698SJohn Baldwin#endif
914*bc3d5698SJohn Baldwin	eor	r6,r6,r12,ror#32-31
915*bc3d5698SJohn Baldwin	eor	r7,r7,r14,ror#32-31
916*bc3d5698SJohn Baldwin#ifndef	__thumb2__
917*bc3d5698SJohn Baldwin	str	r6,[sp,#424]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
918*bc3d5698SJohn Baldwin#endif
919*bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#32-31
920*bc3d5698SJohn Baldwin#ifndef	__thumb2__
921*bc3d5698SJohn Baldwin	str	r7,[sp,#424+4]
922*bc3d5698SJohn Baldwin#else
923*bc3d5698SJohn Baldwin	strd	r6,r7,[sp,#424]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
924*bc3d5698SJohn Baldwin#endif
925*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-31
926*bc3d5698SJohn Baldwin	add	r12,sp,#240
927*bc3d5698SJohn Baldwin	eor	r8,r10,r8,ror#32-1
928*bc3d5698SJohn Baldwin	add	r10,sp,#280
929*bc3d5698SJohn Baldwin	eor	r9,r11,r9,ror#32-1
930*bc3d5698SJohn Baldwin#ifndef	__thumb2__
931*bc3d5698SJohn Baldwin	str	r8,[sp,#432]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
932*bc3d5698SJohn Baldwin#endif
933*bc3d5698SJohn Baldwin#ifndef	__thumb2__
934*bc3d5698SJohn Baldwin	str	r9,[sp,#432+4]
935*bc3d5698SJohn Baldwin#else
936*bc3d5698SJohn Baldwin	strd	r8,r9,[sp,#432]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
937*bc3d5698SJohn Baldwin#endif
938*bc3d5698SJohn Baldwin	ldmia	r12,{r0,r1,r2,r3}		@ A[0][0..1]
939*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][0..1]
940*bc3d5698SJohn Baldwin#ifdef	__thumb2__
941*bc3d5698SJohn Baldwin	eor	r0,r0,r10
942*bc3d5698SJohn Baldwin	eor	r1,r1,r11
943*bc3d5698SJohn Baldwin	eor	r2,r2,r12
944*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#296]
945*bc3d5698SJohn Baldwin	eor	r3,r3,r14
946*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#304]
947*bc3d5698SJohn Baldwin	eor	r4,r4,r10
948*bc3d5698SJohn Baldwin	eor	r5,r5,r11
949*bc3d5698SJohn Baldwin	eor	r6,r6,r12
950*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#312]
951*bc3d5698SJohn Baldwin	eor	r7,r7,r14
952*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#320]
953*bc3d5698SJohn Baldwin	eor	r8,r8,r10
954*bc3d5698SJohn Baldwin	eor	r9,r9,r11
955*bc3d5698SJohn Baldwin	eor	r0,r0,r12
956*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#328]
957*bc3d5698SJohn Baldwin	eor	r1,r1,r14
958*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#336]
959*bc3d5698SJohn Baldwin	eor	r2,r2,r10
960*bc3d5698SJohn Baldwin	eor	r3,r3,r11
961*bc3d5698SJohn Baldwin	eor	r4,r4,r12
962*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#344]
963*bc3d5698SJohn Baldwin	eor	r5,r5,r14
964*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#352]
965*bc3d5698SJohn Baldwin	eor	r6,r6,r10
966*bc3d5698SJohn Baldwin	eor	r7,r7,r11
967*bc3d5698SJohn Baldwin	eor	r8,r8,r12
968*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#360]
969*bc3d5698SJohn Baldwin	eor	r9,r9,r14
970*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#368]
971*bc3d5698SJohn Baldwin	eor	r0,r0,r10
972*bc3d5698SJohn Baldwin	eor	r1,r1,r11
973*bc3d5698SJohn Baldwin	eor	r2,r2,r12
974*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#376]
975*bc3d5698SJohn Baldwin	eor	r3,r3,r14
976*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#384]
977*bc3d5698SJohn Baldwin	eor	r4,r4,r10
978*bc3d5698SJohn Baldwin	eor	r5,r5,r11
979*bc3d5698SJohn Baldwin	eor	r6,r6,r12
980*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#392]
981*bc3d5698SJohn Baldwin	eor	r7,r7,r14
982*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#400]
983*bc3d5698SJohn Baldwin	eor	r8,r8,r10
984*bc3d5698SJohn Baldwin	eor	r9,r9,r11
985*bc3d5698SJohn Baldwin	eor	r0,r0,r12
986*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#408]
987*bc3d5698SJohn Baldwin	eor	r1,r1,r14
988*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#256]
989*bc3d5698SJohn Baldwin	eor	r2,r2,r10
990*bc3d5698SJohn Baldwin	eor	r3,r3,r11
991*bc3d5698SJohn Baldwin	eor	r4,r4,r12
992*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#264]
993*bc3d5698SJohn Baldwin	eor	r5,r5,r14
994*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#272]
995*bc3d5698SJohn Baldwin#else
996*bc3d5698SJohn Baldwin	eor	r0,r0,r10
997*bc3d5698SJohn Baldwin	add	r10,sp,#296
998*bc3d5698SJohn Baldwin	eor	r1,r1,r11
999*bc3d5698SJohn Baldwin	eor	r2,r2,r12
1000*bc3d5698SJohn Baldwin	eor	r3,r3,r14
1001*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][2..3]
1002*bc3d5698SJohn Baldwin	eor	r4,r4,r10
1003*bc3d5698SJohn Baldwin	add	r10,sp,#312
1004*bc3d5698SJohn Baldwin	eor	r5,r5,r11
1005*bc3d5698SJohn Baldwin	eor	r6,r6,r12
1006*bc3d5698SJohn Baldwin	eor	r7,r7,r14
1007*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][4]..A[2][0]
1008*bc3d5698SJohn Baldwin	eor	r8,r8,r10
1009*bc3d5698SJohn Baldwin	add	r10,sp,#328
1010*bc3d5698SJohn Baldwin	eor	r9,r9,r11
1011*bc3d5698SJohn Baldwin	eor	r0,r0,r12
1012*bc3d5698SJohn Baldwin	eor	r1,r1,r14
1013*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][1..2]
1014*bc3d5698SJohn Baldwin	eor	r2,r2,r10
1015*bc3d5698SJohn Baldwin	add	r10,sp,#344
1016*bc3d5698SJohn Baldwin	eor	r3,r3,r11
1017*bc3d5698SJohn Baldwin	eor	r4,r4,r12
1018*bc3d5698SJohn Baldwin	eor	r5,r5,r14
1019*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][3..4]
1020*bc3d5698SJohn Baldwin	eor	r6,r6,r10
1021*bc3d5698SJohn Baldwin	add	r10,sp,#360
1022*bc3d5698SJohn Baldwin	eor	r7,r7,r11
1023*bc3d5698SJohn Baldwin	eor	r8,r8,r12
1024*bc3d5698SJohn Baldwin	eor	r9,r9,r14
1025*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][0..1]
1026*bc3d5698SJohn Baldwin	eor	r0,r0,r10
1027*bc3d5698SJohn Baldwin	add	r10,sp,#376
1028*bc3d5698SJohn Baldwin	eor	r1,r1,r11
1029*bc3d5698SJohn Baldwin	eor	r2,r2,r12
1030*bc3d5698SJohn Baldwin	eor	r3,r3,r14
1031*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][2..3]
1032*bc3d5698SJohn Baldwin	eor	r4,r4,r10
1033*bc3d5698SJohn Baldwin	add	r10,sp,#392
1034*bc3d5698SJohn Baldwin	eor	r5,r5,r11
1035*bc3d5698SJohn Baldwin	eor	r6,r6,r12
1036*bc3d5698SJohn Baldwin	eor	r7,r7,r14
1037*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][4]..A[4][0]
1038*bc3d5698SJohn Baldwin	eor	r8,r8,r10
1039*bc3d5698SJohn Baldwin	ldr	r10,[sp,#408]		@ A[4][1]
1040*bc3d5698SJohn Baldwin	eor	r9,r9,r11
1041*bc3d5698SJohn Baldwin	ldr	r11,[sp,#408+4]
1042*bc3d5698SJohn Baldwin	eor	r0,r0,r12
1043*bc3d5698SJohn Baldwin	ldr	r12,[sp,#256]		@ A[0][2]
1044*bc3d5698SJohn Baldwin	eor	r1,r1,r14
1045*bc3d5698SJohn Baldwin	ldr	r14,[sp,#256+4]
1046*bc3d5698SJohn Baldwin	eor	r2,r2,r10
1047*bc3d5698SJohn Baldwin	add	r10,sp,#264
1048*bc3d5698SJohn Baldwin	eor	r3,r3,r11
1049*bc3d5698SJohn Baldwin	eor	r4,r4,r12
1050*bc3d5698SJohn Baldwin	eor	r5,r5,r14
1051*bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[0][3..4]
1052*bc3d5698SJohn Baldwin#endif
1053*bc3d5698SJohn Baldwin	eor	r6,r6,r10
1054*bc3d5698SJohn Baldwin	eor	r7,r7,r11
1055*bc3d5698SJohn Baldwin	eor	r8,r8,r12
1056*bc3d5698SJohn Baldwin	eor	r9,r9,r14
1057*bc3d5698SJohn Baldwin
1058*bc3d5698SJohn Baldwin	eor	r10,r0,r5,ror#32-1	@ E[0] = ROL64(C[2], 1) ^ C[0];
1059*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1060*bc3d5698SJohn Baldwin	str	r10,[sp,#208]		@ D[1] = E[0]
1061*bc3d5698SJohn Baldwin#endif
1062*bc3d5698SJohn Baldwin	eor	r11,r1,r4
1063*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1064*bc3d5698SJohn Baldwin	str	r11,[sp,#208+4]
1065*bc3d5698SJohn Baldwin#else
1066*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#208]		@ D[1] = E[0]
1067*bc3d5698SJohn Baldwin#endif
1068*bc3d5698SJohn Baldwin	eor	r12,r6,r1,ror#32-1	@ E[1] = ROL64(C[0], 1) ^ C[3];
1069*bc3d5698SJohn Baldwin	eor	r14,r7,r0
1070*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1071*bc3d5698SJohn Baldwin	str	r12,[sp,#232]		@ D[4] = E[1]
1072*bc3d5698SJohn Baldwin#endif
1073*bc3d5698SJohn Baldwin	eor	r0,r8,r3,ror#32-1	@ C[0] = ROL64(C[1], 1) ^ C[4];
1074*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1075*bc3d5698SJohn Baldwin	str	r14,[sp,#232+4]
1076*bc3d5698SJohn Baldwin#else
1077*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#232]		@ D[4] = E[1]
1078*bc3d5698SJohn Baldwin#endif
1079*bc3d5698SJohn Baldwin	eor	r1,r9,r2
1080*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1081*bc3d5698SJohn Baldwin	str	r0,[sp,#200]		@ D[0] = C[0]
1082*bc3d5698SJohn Baldwin#endif
1083*bc3d5698SJohn Baldwin	eor	r2,r2,r7,ror#32-1	@ C[1] = ROL64(C[3], 1) ^ C[1];
1084*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1085*bc3d5698SJohn Baldwin	ldr	r7,[sp,#384]
1086*bc3d5698SJohn Baldwin#endif
1087*bc3d5698SJohn Baldwin	eor	r3,r3,r6
1088*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1089*bc3d5698SJohn Baldwin	str	r1,[sp,#200+4]
1090*bc3d5698SJohn Baldwin#else
1091*bc3d5698SJohn Baldwin	strd	r0,r1,[sp,#200]		@ D[0] = C[0]
1092*bc3d5698SJohn Baldwin#endif
1093*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1094*bc3d5698SJohn Baldwin	ldr	r6,[sp,#384+4]
1095*bc3d5698SJohn Baldwin#else
1096*bc3d5698SJohn Baldwin	ldrd	r7,r6,[sp,#384]
1097*bc3d5698SJohn Baldwin#endif
1098*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1099*bc3d5698SJohn Baldwin	str	r2,[sp,#216]		@ D[2] = C[1]
1100*bc3d5698SJohn Baldwin#endif
1101*bc3d5698SJohn Baldwin	eor	r4,r4,r9,ror#32-1	@ C[2] = ROL64(C[4], 1) ^ C[2];
1102*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1103*bc3d5698SJohn Baldwin	str	r3,[sp,#216+4]
1104*bc3d5698SJohn Baldwin#else
1105*bc3d5698SJohn Baldwin	strd	r2,r3,[sp,#216]		@ D[2] = C[1]
1106*bc3d5698SJohn Baldwin#endif
1107*bc3d5698SJohn Baldwin	eor	r5,r5,r8
1108*bc3d5698SJohn Baldwin
1109*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1110*bc3d5698SJohn Baldwin	ldr	r8,[sp,#432]
1111*bc3d5698SJohn Baldwin#endif
1112*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1113*bc3d5698SJohn Baldwin	ldr	r9,[sp,#432+4]
1114*bc3d5698SJohn Baldwin#else
1115*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#432]
1116*bc3d5698SJohn Baldwin#endif
1117*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1118*bc3d5698SJohn Baldwin	str	r4,[sp,#224]		@ D[3] = C[2]
1119*bc3d5698SJohn Baldwin#endif
1120*bc3d5698SJohn Baldwin	eor	r7,r7,r4
1121*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1122*bc3d5698SJohn Baldwin	str	r5,[sp,#224+4]
1123*bc3d5698SJohn Baldwin#else
1124*bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#224]		@ D[3] = C[2]
1125*bc3d5698SJohn Baldwin#endif
1126*bc3d5698SJohn Baldwin	eor	r6,r6,r5
1127*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1128*bc3d5698SJohn Baldwin	ldr	r4,[sp,#240]
1129*bc3d5698SJohn Baldwin#endif
1130*bc3d5698SJohn Baldwin	@ mov	r7,r7,ror#32-10		@ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
1131*bc3d5698SJohn Baldwin	@ mov	r6,r6,ror#32-11
1132*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1133*bc3d5698SJohn Baldwin	ldr	r5,[sp,#240+4]
1134*bc3d5698SJohn Baldwin#else
1135*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#240]
1136*bc3d5698SJohn Baldwin#endif
1137*bc3d5698SJohn Baldwin	eor	r8,r8,r12
1138*bc3d5698SJohn Baldwin	eor	r9,r9,r14
1139*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1140*bc3d5698SJohn Baldwin	ldr	r12,[sp,#336]
1141*bc3d5698SJohn Baldwin#endif
1142*bc3d5698SJohn Baldwin	eor	r0,r0,r4
1143*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1144*bc3d5698SJohn Baldwin	ldr	r14,[sp,#336+4]
1145*bc3d5698SJohn Baldwin#else
1146*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#336]
1147*bc3d5698SJohn Baldwin#endif
1148*bc3d5698SJohn Baldwin	@ mov	r8,r8,ror#32-7		@ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
1149*bc3d5698SJohn Baldwin	@ mov	r9,r9,ror#32-7
1150*bc3d5698SJohn Baldwin	eor	r1,r1,r5		@ C[0] =       A[0][0] ^ C[0];
1151*bc3d5698SJohn Baldwin	eor	r12,r12,r2
1152*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1153*bc3d5698SJohn Baldwin	ldr	r2,[sp,#288]
1154*bc3d5698SJohn Baldwin#endif
1155*bc3d5698SJohn Baldwin	eor	r14,r14,r3
1156*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1157*bc3d5698SJohn Baldwin	ldr	r3,[sp,#288+4]
1158*bc3d5698SJohn Baldwin#else
1159*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#288]
1160*bc3d5698SJohn Baldwin#endif
1161*bc3d5698SJohn Baldwin	mov	r5,r12,ror#32-21		@ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1162*bc3d5698SJohn Baldwin	ldr	r12,[sp,#444]			@ load counter
1163*bc3d5698SJohn Baldwin	eor	r2,r2,r10
1164*bc3d5698SJohn Baldwin	adr	r10,iotas32
1165*bc3d5698SJohn Baldwin	mov	r4,r14,ror#32-22
1166*bc3d5698SJohn Baldwin	add	r14,r10,r12
1167*bc3d5698SJohn Baldwin	eor	r3,r3,r11
1168*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1169*bc3d5698SJohn Baldwin	ldr	r10,[r14,#8]		@ iotas[i].lo
1170*bc3d5698SJohn Baldwin#endif
1171*bc3d5698SJohn Baldwin	add	r12,r12,#16
1172*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1173*bc3d5698SJohn Baldwin	ldr	r11,[r14,#12]		@ iotas[i].hi
1174*bc3d5698SJohn Baldwin#else
1175*bc3d5698SJohn Baldwin	ldrd	r10,r11,[r14,#8]		@ iotas[i].lo
1176*bc3d5698SJohn Baldwin#endif
1177*bc3d5698SJohn Baldwin	cmp	r12,#192
1178*bc3d5698SJohn Baldwin	str	r12,[sp,#444]			@ store counter
1179*bc3d5698SJohn Baldwin	bic	r12,r4,r2,ror#32-22
1180*bc3d5698SJohn Baldwin	bic	r14,r5,r3,ror#32-22
1181*bc3d5698SJohn Baldwin	mov	r2,r2,ror#32-22		@ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1182*bc3d5698SJohn Baldwin	mov	r3,r3,ror#32-22
1183*bc3d5698SJohn Baldwin	eor	r12,r12,r0
1184*bc3d5698SJohn Baldwin	eor	r14,r14,r1
1185*bc3d5698SJohn Baldwin	eor	r10,r10,r12
1186*bc3d5698SJohn Baldwin	eor	r11,r11,r14
1187*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1188*bc3d5698SJohn Baldwin	str	r10,[sp,#0]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1189*bc3d5698SJohn Baldwin#endif
1190*bc3d5698SJohn Baldwin	bic	r12,r6,r4,ror#11
1191*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1192*bc3d5698SJohn Baldwin	str	r11,[sp,#0+4]
1193*bc3d5698SJohn Baldwin#else
1194*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#0]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1195*bc3d5698SJohn Baldwin#endif
1196*bc3d5698SJohn Baldwin	bic	r14,r7,r5,ror#10
1197*bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#32-(11-7)
1198*bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#32-(10-7)
1199*bc3d5698SJohn Baldwin	eor	r12,r2,r12,ror#32-11
1200*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1201*bc3d5698SJohn Baldwin	str	r12,[sp,#8]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
1202*bc3d5698SJohn Baldwin#endif
1203*bc3d5698SJohn Baldwin	eor	r14,r3,r14,ror#32-10
1204*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1205*bc3d5698SJohn Baldwin	str	r14,[sp,#8+4]
1206*bc3d5698SJohn Baldwin#else
1207*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#8]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
1208*bc3d5698SJohn Baldwin#endif
1209*bc3d5698SJohn Baldwin	eor	r10,r4,r10,ror#32-7
1210*bc3d5698SJohn Baldwin	eor	r11,r5,r11,ror#32-7
1211*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1212*bc3d5698SJohn Baldwin	str	r10,[sp,#16]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
1213*bc3d5698SJohn Baldwin#endif
1214*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#32-7
1215*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1216*bc3d5698SJohn Baldwin	str	r11,[sp,#16+4]
1217*bc3d5698SJohn Baldwin#else
1218*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#16]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
1219*bc3d5698SJohn Baldwin#endif
1220*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#32-7
1221*bc3d5698SJohn Baldwin	eor	r12,r12,r6,ror#32-11
1222*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1223*bc3d5698SJohn Baldwin	str	r12,[sp,#24]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
1224*bc3d5698SJohn Baldwin#endif
1225*bc3d5698SJohn Baldwin	eor	r14,r14,r7,ror#32-10
1226*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1227*bc3d5698SJohn Baldwin	str	r14,[sp,#24+4]
1228*bc3d5698SJohn Baldwin#else
1229*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#24]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
1230*bc3d5698SJohn Baldwin#endif
1231*bc3d5698SJohn Baldwin	bic	r10,r2,r0
1232*bc3d5698SJohn Baldwin	add	r14,sp,#224
1233*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1234*bc3d5698SJohn Baldwin	ldr	r0,[sp,#264]		@ A[0][3]
1235*bc3d5698SJohn Baldwin#endif
1236*bc3d5698SJohn Baldwin	bic	r11,r3,r1
1237*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1238*bc3d5698SJohn Baldwin	ldr	r1,[sp,#264+4]
1239*bc3d5698SJohn Baldwin#else
1240*bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#264]		@ A[0][3]
1241*bc3d5698SJohn Baldwin#endif
1242*bc3d5698SJohn Baldwin	eor	r10,r10,r8,ror#32-7
1243*bc3d5698SJohn Baldwin	eor	r11,r11,r9,ror#32-7
1244*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1245*bc3d5698SJohn Baldwin	str	r10,[sp,#32]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
1246*bc3d5698SJohn Baldwin#endif
1247*bc3d5698SJohn Baldwin	add	r9,sp,#200
1248*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1249*bc3d5698SJohn Baldwin	str	r11,[sp,#32+4]
1250*bc3d5698SJohn Baldwin#else
1251*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#32]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
1252*bc3d5698SJohn Baldwin#endif
1253*bc3d5698SJohn Baldwin
1254*bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[3..4]
1255*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[0..1]
1256*bc3d5698SJohn Baldwin
1257*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1258*bc3d5698SJohn Baldwin	ldr	r2,[sp,#312]		@ A[1][4]
1259*bc3d5698SJohn Baldwin#endif
1260*bc3d5698SJohn Baldwin	eor	r0,r0,r10
1261*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1262*bc3d5698SJohn Baldwin	ldr	r3,[sp,#312+4]
1263*bc3d5698SJohn Baldwin#else
1264*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#312]		@ A[1][4]
1265*bc3d5698SJohn Baldwin#endif
1266*bc3d5698SJohn Baldwin	eor	r1,r1,r11
1267*bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-14		@ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1268*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1269*bc3d5698SJohn Baldwin	ldr	r10,[sp,#368]		@ A[3][1]
1270*bc3d5698SJohn Baldwin#endif
1271*bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-14
1272*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1273*bc3d5698SJohn Baldwin	ldr	r11,[sp,#368+4]
1274*bc3d5698SJohn Baldwin#else
1275*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#368]		@ A[3][1]
1276*bc3d5698SJohn Baldwin#endif
1277*bc3d5698SJohn Baldwin
1278*bc3d5698SJohn Baldwin	eor	r2,r2,r12
1279*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1280*bc3d5698SJohn Baldwin	ldr	r4,[sp,#320]		@ A[2][0]
1281*bc3d5698SJohn Baldwin#endif
1282*bc3d5698SJohn Baldwin	eor	r3,r3,r14
1283*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1284*bc3d5698SJohn Baldwin	ldr	r5,[sp,#320+4]
1285*bc3d5698SJohn Baldwin#else
1286*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#320]		@ A[2][0]
1287*bc3d5698SJohn Baldwin#endif
1288*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-10		@ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1289*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-10
1290*bc3d5698SJohn Baldwin
1291*bc3d5698SJohn Baldwin	eor	r6,r6,r4
1292*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1293*bc3d5698SJohn Baldwin	ldr	r12,[sp,#216]		@ D[2]
1294*bc3d5698SJohn Baldwin#endif
1295*bc3d5698SJohn Baldwin	eor	r7,r7,r5
1296*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1297*bc3d5698SJohn Baldwin	ldr	r14,[sp,#216+4]
1298*bc3d5698SJohn Baldwin#else
1299*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#216]		@ D[2]
1300*bc3d5698SJohn Baldwin#endif
1301*bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-1		@ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1302*bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-2
1303*bc3d5698SJohn Baldwin
1304*bc3d5698SJohn Baldwin	eor	r10,r10,r8
1305*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1306*bc3d5698SJohn Baldwin	ldr	r8,[sp,#416]		@ A[4][2]
1307*bc3d5698SJohn Baldwin#endif
1308*bc3d5698SJohn Baldwin	eor	r11,r11,r9
1309*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1310*bc3d5698SJohn Baldwin	ldr	r9,[sp,#416+4]
1311*bc3d5698SJohn Baldwin#else
1312*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#416]		@ A[4][2]
1313*bc3d5698SJohn Baldwin#endif
1314*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-22		@ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1315*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-23
1316*bc3d5698SJohn Baldwin
1317*bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-10
1318*bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-10
1319*bc3d5698SJohn Baldwin	eor	r12,r12,r8
1320*bc3d5698SJohn Baldwin	eor	r14,r14,r9
1321*bc3d5698SJohn Baldwin	mov	r9,r12,ror#32-30		@ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1322*bc3d5698SJohn Baldwin	mov	r8,r14,ror#32-31
1323*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
1324*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-14
1325*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1326*bc3d5698SJohn Baldwin	str	r10,[sp,#40]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
1327*bc3d5698SJohn Baldwin#endif
1328*bc3d5698SJohn Baldwin	bic	r12,r6,r4
1329*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1330*bc3d5698SJohn Baldwin	str	r11,[sp,#40+4]
1331*bc3d5698SJohn Baldwin#else
1332*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#40]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
1333*bc3d5698SJohn Baldwin#endif
1334*bc3d5698SJohn Baldwin	bic	r14,r7,r5
1335*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-10
1336*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1337*bc3d5698SJohn Baldwin	str	r12,[sp,#48]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
1338*bc3d5698SJohn Baldwin#endif
1339*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-10
1340*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1341*bc3d5698SJohn Baldwin	str	r14,[sp,#48+4]
1342*bc3d5698SJohn Baldwin#else
1343*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#48]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
1344*bc3d5698SJohn Baldwin#endif
1345*bc3d5698SJohn Baldwin	bic	r10,r8,r6
1346*bc3d5698SJohn Baldwin	bic	r11,r9,r7
1347*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
1348*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#14
1349*bc3d5698SJohn Baldwin	eor	r10,r10,r4
1350*bc3d5698SJohn Baldwin	eor	r11,r11,r5
1351*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1352*bc3d5698SJohn Baldwin	str	r10,[sp,#56]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
1353*bc3d5698SJohn Baldwin#endif
1354*bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#32-(14-10)
1355*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1356*bc3d5698SJohn Baldwin	str	r11,[sp,#56+4]
1357*bc3d5698SJohn Baldwin#else
1358*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#56]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
1359*bc3d5698SJohn Baldwin#endif
1360*bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
1361*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-(14-10)
1362*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1363*bc3d5698SJohn Baldwin	str	r12,[sp,#64]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
1364*bc3d5698SJohn Baldwin#endif
1365*bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-14
1366*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1367*bc3d5698SJohn Baldwin	str	r14,[sp,#64+4]
1368*bc3d5698SJohn Baldwin#else
1369*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#64]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
1370*bc3d5698SJohn Baldwin#endif
1371*bc3d5698SJohn Baldwin	add	r12,sp,#208
1372*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1373*bc3d5698SJohn Baldwin	ldr	r1,[sp,#248]		@ A[0][1]
1374*bc3d5698SJohn Baldwin#endif
1375*bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-10
1376*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1377*bc3d5698SJohn Baldwin	ldr	r0,[sp,#248+4]
1378*bc3d5698SJohn Baldwin#else
1379*bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#248]		@ A[0][1]
1380*bc3d5698SJohn Baldwin#endif
1381*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-10
1382*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1383*bc3d5698SJohn Baldwin	str	r10,[sp,#72]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
1384*bc3d5698SJohn Baldwin#endif
1385*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1386*bc3d5698SJohn Baldwin	str	r11,[sp,#72+4]
1387*bc3d5698SJohn Baldwin#else
1388*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#72]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
1389*bc3d5698SJohn Baldwin#endif
1390*bc3d5698SJohn Baldwin
1391*bc3d5698SJohn Baldwin	add	r9,sp,#224
1392*bc3d5698SJohn Baldwin	ldmia	r12,{r10,r11,r12,r14}	@ D[1..2]
1393*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1394*bc3d5698SJohn Baldwin	ldr	r2,[sp,#296]		@ A[1][2]
1395*bc3d5698SJohn Baldwin#endif
1396*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1397*bc3d5698SJohn Baldwin	ldr	r3,[sp,#296+4]
1398*bc3d5698SJohn Baldwin#else
1399*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#296]		@ A[1][2]
1400*bc3d5698SJohn Baldwin#endif
1401*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[3..4]
1402*bc3d5698SJohn Baldwin
1403*bc3d5698SJohn Baldwin	eor	r1,r1,r10
1404*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1405*bc3d5698SJohn Baldwin	ldr	r4,[sp,#344]		@ A[2][3]
1406*bc3d5698SJohn Baldwin#endif
1407*bc3d5698SJohn Baldwin	eor	r0,r0,r11
1408*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1409*bc3d5698SJohn Baldwin	ldr	r5,[sp,#344+4]
1410*bc3d5698SJohn Baldwin#else
1411*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#344]		@ A[2][3]
1412*bc3d5698SJohn Baldwin#endif
1413*bc3d5698SJohn Baldwin	mov	r0,r0,ror#32-1		@ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1414*bc3d5698SJohn Baldwin
1415*bc3d5698SJohn Baldwin	eor	r2,r2,r12
1416*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1417*bc3d5698SJohn Baldwin	ldr	r10,[sp,#392]		@ A[3][4]
1418*bc3d5698SJohn Baldwin#endif
1419*bc3d5698SJohn Baldwin	eor	r3,r3,r14
1420*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1421*bc3d5698SJohn Baldwin	ldr	r11,[sp,#392+4]
1422*bc3d5698SJohn Baldwin#else
1423*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#392]		@ A[3][4]
1424*bc3d5698SJohn Baldwin#endif
1425*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-3		@ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1426*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1427*bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
1428*bc3d5698SJohn Baldwin#endif
1429*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-3
1430*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1431*bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
1432*bc3d5698SJohn Baldwin#else
1433*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
1434*bc3d5698SJohn Baldwin#endif
1435*bc3d5698SJohn Baldwin
1436*bc3d5698SJohn Baldwin	eor	r4,r4,r6
1437*bc3d5698SJohn Baldwin	eor	r5,r5,r7
1438*bc3d5698SJohn Baldwin	@ mov	r5,r6,ror#32-12		@ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1439*bc3d5698SJohn Baldwin	@ mov	r4,r7,ror#32-13		@ [track reverse order below]
1440*bc3d5698SJohn Baldwin
1441*bc3d5698SJohn Baldwin	eor	r10,r10,r8
1442*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1443*bc3d5698SJohn Baldwin	ldr	r8,[sp,#400]		@ A[4][0]
1444*bc3d5698SJohn Baldwin#endif
1445*bc3d5698SJohn Baldwin	eor	r11,r11,r9
1446*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1447*bc3d5698SJohn Baldwin	ldr	r9,[sp,#400+4]
1448*bc3d5698SJohn Baldwin#else
1449*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#400]		@ A[4][0]
1450*bc3d5698SJohn Baldwin#endif
1451*bc3d5698SJohn Baldwin	mov	r6,r10,ror#32-4		@ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1452*bc3d5698SJohn Baldwin	mov	r7,r11,ror#32-4
1453*bc3d5698SJohn Baldwin
1454*bc3d5698SJohn Baldwin	eor	r12,r12,r8
1455*bc3d5698SJohn Baldwin	eor	r14,r14,r9
1456*bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-9		@ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1457*bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-9
1458*bc3d5698SJohn Baldwin
1459*bc3d5698SJohn Baldwin	bic	r10,r5,r2,ror#13-3
1460*bc3d5698SJohn Baldwin	bic	r11,r4,r3,ror#12-3
1461*bc3d5698SJohn Baldwin	bic	r12,r6,r5,ror#32-13
1462*bc3d5698SJohn Baldwin	bic	r14,r7,r4,ror#32-12
1463*bc3d5698SJohn Baldwin	eor	r10,r0,r10,ror#32-13
1464*bc3d5698SJohn Baldwin	eor	r11,r1,r11,ror#32-12
1465*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1466*bc3d5698SJohn Baldwin	str	r10,[sp,#80]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
1467*bc3d5698SJohn Baldwin#endif
1468*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-3
1469*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1470*bc3d5698SJohn Baldwin	str	r11,[sp,#80+4]
1471*bc3d5698SJohn Baldwin#else
1472*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#80]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
1473*bc3d5698SJohn Baldwin#endif
1474*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-3
1475*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1476*bc3d5698SJohn Baldwin	str	r12,[sp,#88]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
1477*bc3d5698SJohn Baldwin#endif
1478*bc3d5698SJohn Baldwin	bic	r10,r8,r6
1479*bc3d5698SJohn Baldwin	bic	r11,r9,r7
1480*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1481*bc3d5698SJohn Baldwin	str	r14,[sp,#88+4]
1482*bc3d5698SJohn Baldwin#else
1483*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#88]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
1484*bc3d5698SJohn Baldwin#endif
1485*bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#32-13
1486*bc3d5698SJohn Baldwin	eor	r11,r11,r4,ror#32-12
1487*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1488*bc3d5698SJohn Baldwin	str	r10,[sp,#96]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
1489*bc3d5698SJohn Baldwin#endif
1490*bc3d5698SJohn Baldwin	bic	r12,r0,r8
1491*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1492*bc3d5698SJohn Baldwin	str	r11,[sp,#96+4]
1493*bc3d5698SJohn Baldwin#else
1494*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#96]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
1495*bc3d5698SJohn Baldwin#endif
1496*bc3d5698SJohn Baldwin	bic	r14,r1,r9
1497*bc3d5698SJohn Baldwin	eor	r12,r12,r6
1498*bc3d5698SJohn Baldwin	eor	r14,r14,r7
1499*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1500*bc3d5698SJohn Baldwin	str	r12,[sp,#104]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
1501*bc3d5698SJohn Baldwin#endif
1502*bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#3
1503*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1504*bc3d5698SJohn Baldwin	str	r14,[sp,#104+4]
1505*bc3d5698SJohn Baldwin#else
1506*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#104]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
1507*bc3d5698SJohn Baldwin#endif
1508*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#3
1509*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1510*bc3d5698SJohn Baldwin	ldr	r1,[sp,#272]		@ A[0][4] [in reverse order]
1511*bc3d5698SJohn Baldwin#endif
1512*bc3d5698SJohn Baldwin	eor	r10,r8,r10,ror#32-3
1513*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1514*bc3d5698SJohn Baldwin	ldr	r0,[sp,#272+4]
1515*bc3d5698SJohn Baldwin#else
1516*bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#272]		@ A[0][4] [in reverse order]
1517*bc3d5698SJohn Baldwin#endif
1518*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-3
1519*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1520*bc3d5698SJohn Baldwin	str	r10,[sp,#112]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
1521*bc3d5698SJohn Baldwin#endif
1522*bc3d5698SJohn Baldwin	add	r9,sp,#208
1523*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1524*bc3d5698SJohn Baldwin	str	r11,[sp,#112+4]
1525*bc3d5698SJohn Baldwin#else
1526*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#112]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
1527*bc3d5698SJohn Baldwin#endif
1528*bc3d5698SJohn Baldwin
1529*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1530*bc3d5698SJohn Baldwin	ldr	r10,[sp,#232]		@ D[4]
1531*bc3d5698SJohn Baldwin#endif
1532*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1533*bc3d5698SJohn Baldwin	ldr	r11,[sp,#232+4]
1534*bc3d5698SJohn Baldwin#else
1535*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#232]		@ D[4]
1536*bc3d5698SJohn Baldwin#endif
1537*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1538*bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
1539*bc3d5698SJohn Baldwin#endif
1540*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1541*bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
1542*bc3d5698SJohn Baldwin#else
1543*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
1544*bc3d5698SJohn Baldwin#endif
1545*bc3d5698SJohn Baldwin
1546*bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[1..2]
1547*bc3d5698SJohn Baldwin
1548*bc3d5698SJohn Baldwin	eor	r1,r1,r10
1549*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1550*bc3d5698SJohn Baldwin	ldr	r2,[sp,#280]		@ A[1][0]
1551*bc3d5698SJohn Baldwin#endif
1552*bc3d5698SJohn Baldwin	eor	r0,r0,r11
1553*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1554*bc3d5698SJohn Baldwin	ldr	r3,[sp,#280+4]
1555*bc3d5698SJohn Baldwin#else
1556*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#280]		@ A[1][0]
1557*bc3d5698SJohn Baldwin#endif
1558*bc3d5698SJohn Baldwin	@ mov	r1,r10,ror#32-13		@ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1559*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1560*bc3d5698SJohn Baldwin	ldr	r4,[sp,#328]		@ A[2][1]
1561*bc3d5698SJohn Baldwin#endif
1562*bc3d5698SJohn Baldwin	@ mov	r0,r11,ror#32-14		@ [was loaded in reverse order]
1563*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1564*bc3d5698SJohn Baldwin	ldr	r5,[sp,#328+4]
1565*bc3d5698SJohn Baldwin#else
1566*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#328]		@ A[2][1]
1567*bc3d5698SJohn Baldwin#endif
1568*bc3d5698SJohn Baldwin
1569*bc3d5698SJohn Baldwin	eor	r2,r2,r12
1570*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1571*bc3d5698SJohn Baldwin	ldr	r10,[sp,#376]		@ A[3][2]
1572*bc3d5698SJohn Baldwin#endif
1573*bc3d5698SJohn Baldwin	eor	r3,r3,r14
1574*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1575*bc3d5698SJohn Baldwin	ldr	r11,[sp,#376+4]
1576*bc3d5698SJohn Baldwin#else
1577*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#376]		@ A[3][2]
1578*bc3d5698SJohn Baldwin#endif
1579*bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-18		@ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1580*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1581*bc3d5698SJohn Baldwin	ldr	r12,[sp,#224]		@ D[3]
1582*bc3d5698SJohn Baldwin#endif
1583*bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-18
1584*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1585*bc3d5698SJohn Baldwin	ldr	r14,[sp,#224+4]
1586*bc3d5698SJohn Baldwin#else
1587*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#224]		@ D[3]
1588*bc3d5698SJohn Baldwin#endif
1589*bc3d5698SJohn Baldwin
1590*bc3d5698SJohn Baldwin	eor	r6,r6,r4
1591*bc3d5698SJohn Baldwin	eor	r7,r7,r5
1592*bc3d5698SJohn Baldwin	mov	r4,r6,ror#32-5		@ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1593*bc3d5698SJohn Baldwin	mov	r5,r7,ror#32-5
1594*bc3d5698SJohn Baldwin
1595*bc3d5698SJohn Baldwin	eor	r10,r10,r8
1596*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1597*bc3d5698SJohn Baldwin	ldr	r8,[sp,#424]		@ A[4][3]
1598*bc3d5698SJohn Baldwin#endif
1599*bc3d5698SJohn Baldwin	eor	r11,r11,r9
1600*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1601*bc3d5698SJohn Baldwin	ldr	r9,[sp,#424+4]
1602*bc3d5698SJohn Baldwin#else
1603*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#424]		@ A[4][3]
1604*bc3d5698SJohn Baldwin#endif
1605*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-7		@ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1606*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-8
1607*bc3d5698SJohn Baldwin
1608*bc3d5698SJohn Baldwin	eor	r12,r12,r8
1609*bc3d5698SJohn Baldwin	eor	r14,r14,r9
1610*bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-28		@ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1611*bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-28
1612*bc3d5698SJohn Baldwin
1613*bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-18
1614*bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-18
1615*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
1616*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-13
1617*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1618*bc3d5698SJohn Baldwin	str	r10,[sp,#120]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
1619*bc3d5698SJohn Baldwin#endif
1620*bc3d5698SJohn Baldwin	bic	r12,r6,r4
1621*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1622*bc3d5698SJohn Baldwin	str	r11,[sp,#120+4]
1623*bc3d5698SJohn Baldwin#else
1624*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#120]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
1625*bc3d5698SJohn Baldwin#endif
1626*bc3d5698SJohn Baldwin	bic	r14,r7,r5
1627*bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-18
1628*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1629*bc3d5698SJohn Baldwin	str	r12,[sp,#128]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
1630*bc3d5698SJohn Baldwin#endif
1631*bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-18
1632*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1633*bc3d5698SJohn Baldwin	str	r14,[sp,#128+4]
1634*bc3d5698SJohn Baldwin#else
1635*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#128]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
1636*bc3d5698SJohn Baldwin#endif
1637*bc3d5698SJohn Baldwin	bic	r10,r8,r6
1638*bc3d5698SJohn Baldwin	bic	r11,r9,r7
1639*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
1640*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#13
1641*bc3d5698SJohn Baldwin	eor	r10,r10,r4
1642*bc3d5698SJohn Baldwin	eor	r11,r11,r5
1643*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1644*bc3d5698SJohn Baldwin	str	r10,[sp,#136]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
1645*bc3d5698SJohn Baldwin#endif
1646*bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#18-14
1647*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1648*bc3d5698SJohn Baldwin	str	r11,[sp,#136+4]
1649*bc3d5698SJohn Baldwin#else
1650*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#136]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
1651*bc3d5698SJohn Baldwin#endif
1652*bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
1653*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#18-13
1654*bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-13
1655*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1656*bc3d5698SJohn Baldwin	str	r12,[sp,#144]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
1657*bc3d5698SJohn Baldwin#endif
1658*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1659*bc3d5698SJohn Baldwin	str	r14,[sp,#144+4]
1660*bc3d5698SJohn Baldwin#else
1661*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#144]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
1662*bc3d5698SJohn Baldwin#endif
1663*bc3d5698SJohn Baldwin	add	r14,sp,#216
1664*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1665*bc3d5698SJohn Baldwin	ldr	r0,[sp,#256]		@ A[0][2]
1666*bc3d5698SJohn Baldwin#endif
1667*bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-18
1668*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1669*bc3d5698SJohn Baldwin	ldr	r1,[sp,#256+4]
1670*bc3d5698SJohn Baldwin#else
1671*bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#256]		@ A[0][2]
1672*bc3d5698SJohn Baldwin#endif
1673*bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-18
1674*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1675*bc3d5698SJohn Baldwin	str	r10,[sp,#152]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
1676*bc3d5698SJohn Baldwin#endif
1677*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1678*bc3d5698SJohn Baldwin	str	r11,[sp,#152+4]
1679*bc3d5698SJohn Baldwin#else
1680*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#152]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
1681*bc3d5698SJohn Baldwin#endif
1682*bc3d5698SJohn Baldwin
1683*bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[2..3]
1684*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1685*bc3d5698SJohn Baldwin	ldr	r2,[sp,#304]		@ A[1][3]
1686*bc3d5698SJohn Baldwin#endif
1687*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1688*bc3d5698SJohn Baldwin	ldr	r3,[sp,#304+4]
1689*bc3d5698SJohn Baldwin#else
1690*bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#304]		@ A[1][3]
1691*bc3d5698SJohn Baldwin#endif
1692*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1693*bc3d5698SJohn Baldwin	ldr	r6,[sp,#232]		@ D[4]
1694*bc3d5698SJohn Baldwin#endif
1695*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1696*bc3d5698SJohn Baldwin	ldr	r7,[sp,#232+4]
1697*bc3d5698SJohn Baldwin#else
1698*bc3d5698SJohn Baldwin	ldrd	r6,r7,[sp,#232]		@ D[4]
1699*bc3d5698SJohn Baldwin#endif
1700*bc3d5698SJohn Baldwin
1701*bc3d5698SJohn Baldwin	eor	r0,r0,r10
1702*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1703*bc3d5698SJohn Baldwin	ldr	r4,[sp,#352]		@ A[2][4]
1704*bc3d5698SJohn Baldwin#endif
1705*bc3d5698SJohn Baldwin	eor	r1,r1,r11
1706*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1707*bc3d5698SJohn Baldwin	ldr	r5,[sp,#352+4]
1708*bc3d5698SJohn Baldwin#else
1709*bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#352]		@ A[2][4]
1710*bc3d5698SJohn Baldwin#endif
1711*bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-31		@ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1712*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1713*bc3d5698SJohn Baldwin	ldr	r8,[sp,#200]		@ D[0]
1714*bc3d5698SJohn Baldwin#endif
1715*bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-31
1716*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1717*bc3d5698SJohn Baldwin	ldr	r9,[sp,#200+4]
1718*bc3d5698SJohn Baldwin#else
1719*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#200]		@ D[0]
1720*bc3d5698SJohn Baldwin#endif
1721*bc3d5698SJohn Baldwin
1722*bc3d5698SJohn Baldwin	eor	r12,r12,r2
1723*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1724*bc3d5698SJohn Baldwin	ldr	r10,[sp,#360]		@ A[3][0]
1725*bc3d5698SJohn Baldwin#endif
1726*bc3d5698SJohn Baldwin	eor	r14,r14,r3
1727*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1728*bc3d5698SJohn Baldwin	ldr	r11,[sp,#360+4]
1729*bc3d5698SJohn Baldwin#else
1730*bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#360]		@ A[3][0]
1731*bc3d5698SJohn Baldwin#endif
1732*bc3d5698SJohn Baldwin	mov	r3,r12,ror#32-27		@ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1733*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1734*bc3d5698SJohn Baldwin	ldr	r12,[sp,#208]		@ D[1]
1735*bc3d5698SJohn Baldwin#endif
1736*bc3d5698SJohn Baldwin	mov	r2,r14,ror#32-28
1737*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1738*bc3d5698SJohn Baldwin	ldr	r14,[sp,#208+4]
1739*bc3d5698SJohn Baldwin#else
1740*bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#208]		@ D[1]
1741*bc3d5698SJohn Baldwin#endif
1742*bc3d5698SJohn Baldwin
1743*bc3d5698SJohn Baldwin	eor	r6,r6,r4
1744*bc3d5698SJohn Baldwin	eor	r7,r7,r5
1745*bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-19		@ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1746*bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-20
1747*bc3d5698SJohn Baldwin
1748*bc3d5698SJohn Baldwin	eor	r10,r10,r8
1749*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1750*bc3d5698SJohn Baldwin	ldr	r8,[sp,#408]		@ A[4][1]
1751*bc3d5698SJohn Baldwin#endif
1752*bc3d5698SJohn Baldwin	eor	r11,r11,r9
1753*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1754*bc3d5698SJohn Baldwin	ldr	r9,[sp,#408+4]
1755*bc3d5698SJohn Baldwin#else
1756*bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#408]		@ A[4][1]
1757*bc3d5698SJohn Baldwin#endif
1758*bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-20		@ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1759*bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-21
1760*bc3d5698SJohn Baldwin
1761*bc3d5698SJohn Baldwin	eor	r8,r8,r12
1762*bc3d5698SJohn Baldwin	eor	r9,r9,r14
1763*bc3d5698SJohn Baldwin	@ mov	r8,r2,ror#32-1		@ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1764*bc3d5698SJohn Baldwin	@ mov	r9,r3,ror#32-1
1765*bc3d5698SJohn Baldwin
1766*bc3d5698SJohn Baldwin	bic	r10,r4,r2
1767*bc3d5698SJohn Baldwin	bic	r11,r5,r3
1768*bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-31
1769*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1770*bc3d5698SJohn Baldwin	str	r10,[sp,#160]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
1771*bc3d5698SJohn Baldwin#endif
1772*bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-31
1773*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1774*bc3d5698SJohn Baldwin	str	r11,[sp,#160+4]
1775*bc3d5698SJohn Baldwin#else
1776*bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#160]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
1777*bc3d5698SJohn Baldwin#endif
1778*bc3d5698SJohn Baldwin	bic	r12,r6,r4
1779*bc3d5698SJohn Baldwin	bic	r14,r7,r5
1780*bc3d5698SJohn Baldwin	eor	r12,r12,r2
1781*bc3d5698SJohn Baldwin	eor	r14,r14,r3
1782*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1783*bc3d5698SJohn Baldwin	str	r12,[sp,#168]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
1784*bc3d5698SJohn Baldwin#endif
1785*bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#1
1786*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1787*bc3d5698SJohn Baldwin	str	r14,[sp,#168+4]
1788*bc3d5698SJohn Baldwin#else
1789*bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#168]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
1790*bc3d5698SJohn Baldwin#endif
1791*bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#1
1792*bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#31-1
1793*bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#31-1
1794*bc3d5698SJohn Baldwin	eor	r4,r4,r10,ror#32-1
1795*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1796*bc3d5698SJohn Baldwin	str	r4,[sp,#176]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
1797*bc3d5698SJohn Baldwin#endif
1798*bc3d5698SJohn Baldwin	eor	r5,r5,r11,ror#32-1
1799*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1800*bc3d5698SJohn Baldwin	str	r5,[sp,#176+4]
1801*bc3d5698SJohn Baldwin#else
1802*bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#176]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
1803*bc3d5698SJohn Baldwin#endif
1804*bc3d5698SJohn Baldwin	eor	r6,r6,r12,ror#32-31
1805*bc3d5698SJohn Baldwin	eor	r7,r7,r14,ror#32-31
1806*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1807*bc3d5698SJohn Baldwin	str	r6,[sp,#184]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
1808*bc3d5698SJohn Baldwin#endif
1809*bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#32-31
1810*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1811*bc3d5698SJohn Baldwin	str	r7,[sp,#184+4]
1812*bc3d5698SJohn Baldwin#else
1813*bc3d5698SJohn Baldwin	strd	r6,r7,[sp,#184]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
1814*bc3d5698SJohn Baldwin#endif
1815*bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-31
1816*bc3d5698SJohn Baldwin	add	r12,sp,#0
1817*bc3d5698SJohn Baldwin	eor	r8,r10,r8,ror#32-1
1818*bc3d5698SJohn Baldwin	add	r10,sp,#40
1819*bc3d5698SJohn Baldwin	eor	r9,r11,r9,ror#32-1
1820*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1821*bc3d5698SJohn Baldwin	str	r8,[sp,#192]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
1822*bc3d5698SJohn Baldwin#endif
1823*bc3d5698SJohn Baldwin#ifndef	__thumb2__
1824*bc3d5698SJohn Baldwin	str	r9,[sp,#192+4]
1825*bc3d5698SJohn Baldwin#else
1826*bc3d5698SJohn Baldwin	strd	r8,r9,[sp,#192]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
1827*bc3d5698SJohn Baldwin#endif
1828*bc3d5698SJohn Baldwin	blo	.Lround2x
1829*bc3d5698SJohn Baldwin
1830*bc3d5698SJohn Baldwin	ldr	pc,[sp,#440]
1831*bc3d5698SJohn Baldwin.size	KeccakF1600_int,.-KeccakF1600_int
1832*bc3d5698SJohn Baldwin
1833*bc3d5698SJohn Baldwin.type	KeccakF1600, %function
1834*bc3d5698SJohn Baldwin.align	5
1835*bc3d5698SJohn BaldwinKeccakF1600:
1836*bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r4-r11,lr}
1837*bc3d5698SJohn Baldwin	sub	sp,sp,#440+16			@ space for A[5][5],D[5],T[5][5],...
1838*bc3d5698SJohn Baldwin
1839*bc3d5698SJohn Baldwin	add	r10,r0,#40
1840*bc3d5698SJohn Baldwin	add	r11,sp,#40
1841*bc3d5698SJohn Baldwin	ldmia	r0,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ copy A[5][5] to stack
1842*bc3d5698SJohn Baldwin	stmia	sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1843*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1844*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1845*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1846*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1847*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1848*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849*bc3d5698SJohn Baldwin	ldmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1850*bc3d5698SJohn Baldwin	add	r12,sp,#0
1851*bc3d5698SJohn Baldwin	add	r10,sp,#40
1852*bc3d5698SJohn Baldwin	stmia	r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1853*bc3d5698SJohn Baldwin
1854*bc3d5698SJohn Baldwin	bl	KeccakF1600_enter
1855*bc3d5698SJohn Baldwin
1856*bc3d5698SJohn Baldwin	ldr	r11, [sp,#440+16]		@ restore pointer to A
1857*bc3d5698SJohn Baldwin	ldmia	sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1858*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ return A[5][5]
1859*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1860*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1861*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1862*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1863*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864*bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1865*bc3d5698SJohn Baldwin	ldmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866*bc3d5698SJohn Baldwin	stmia	r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1867*bc3d5698SJohn Baldwin
1868*bc3d5698SJohn Baldwin	add	sp,sp,#440+20
1869*bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1870*bc3d5698SJohn Baldwin.size	KeccakF1600,.-KeccakF1600
1871*bc3d5698SJohn Baldwin.globl	SHA3_absorb
1872*bc3d5698SJohn Baldwin.type	SHA3_absorb,%function
1873*bc3d5698SJohn Baldwin.align	5
1874*bc3d5698SJohn BaldwinSHA3_absorb:
1875*bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1876*bc3d5698SJohn Baldwin	sub	sp,sp,#456+16
1877*bc3d5698SJohn Baldwin
1878*bc3d5698SJohn Baldwin	add	r10,r0,#40
1879*bc3d5698SJohn Baldwin	@ mov	r11,r1
1880*bc3d5698SJohn Baldwin	mov	r12,r2
1881*bc3d5698SJohn Baldwin	mov	r14,r3
1882*bc3d5698SJohn Baldwin	cmp	r2,r3
1883*bc3d5698SJohn Baldwin	blo	.Labsorb_abort
1884*bc3d5698SJohn Baldwin
1885*bc3d5698SJohn Baldwin	add	r11,sp,#0
1886*bc3d5698SJohn Baldwin	ldmia	r0,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}	@ copy A[5][5] to stack
1887*bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1888*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1889*bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1890*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1891*bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1892*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1893*bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1894*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1895*bc3d5698SJohn Baldwin	stmia	r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1896*bc3d5698SJohn Baldwin
1897*bc3d5698SJohn Baldwin	ldr	r11,[sp,#476]		@ restore r11
1898*bc3d5698SJohn Baldwin#ifdef	__thumb2__
1899*bc3d5698SJohn Baldwin	mov	r9,#0x00ff00ff
1900*bc3d5698SJohn Baldwin	mov	r8,#0x0f0f0f0f
1901*bc3d5698SJohn Baldwin	mov	r7,#0x33333333
1902*bc3d5698SJohn Baldwin	mov	r6,#0x55555555
1903*bc3d5698SJohn Baldwin#else
1904*bc3d5698SJohn Baldwin	mov	r6,#0x11		@ compose constants
1905*bc3d5698SJohn Baldwin	mov	r8,#0x0f
1906*bc3d5698SJohn Baldwin	mov	r9,#0xff
1907*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#8
1908*bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#8
1909*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#16		@ 0x11111111
1910*bc3d5698SJohn Baldwin	orr	r9,r9,r9,lsl#16		@ 0x00ff00ff
1911*bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#16		@ 0x0f0f0f0f
1912*bc3d5698SJohn Baldwin	orr	r7,r6,r6,lsl#1		@ 0x33333333
1913*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#2		@ 0x55555555
1914*bc3d5698SJohn Baldwin#endif
1915*bc3d5698SJohn Baldwin	str	r9,[sp,#468]
1916*bc3d5698SJohn Baldwin	str	r8,[sp,#464]
1917*bc3d5698SJohn Baldwin	str	r7,[sp,#460]
1918*bc3d5698SJohn Baldwin	str	r6,[sp,#456]
1919*bc3d5698SJohn Baldwin	b	.Loop_absorb
1920*bc3d5698SJohn Baldwin
1921*bc3d5698SJohn Baldwin.align	4
1922*bc3d5698SJohn Baldwin.Loop_absorb:
1923*bc3d5698SJohn Baldwin	subs	r0,r12,r14
1924*bc3d5698SJohn Baldwin	blo	.Labsorbed
1925*bc3d5698SJohn Baldwin	add	r10,sp,#0
1926*bc3d5698SJohn Baldwin	str	r0,[sp,#480]		@ save len - bsz
1927*bc3d5698SJohn Baldwin
1928*bc3d5698SJohn Baldwin.align	4
1929*bc3d5698SJohn Baldwin.Loop_block:
1930*bc3d5698SJohn Baldwin	ldrb	r0,[r11],#1
1931*bc3d5698SJohn Baldwin	ldrb	r1,[r11],#1
1932*bc3d5698SJohn Baldwin	ldrb	r2,[r11],#1
1933*bc3d5698SJohn Baldwin	ldrb	r3,[r11],#1
1934*bc3d5698SJohn Baldwin	ldrb	r4,[r11],#1
1935*bc3d5698SJohn Baldwin	orr	r0,r0,r1,lsl#8
1936*bc3d5698SJohn Baldwin	ldrb	r1,[r11],#1
1937*bc3d5698SJohn Baldwin	orr	r0,r0,r2,lsl#16
1938*bc3d5698SJohn Baldwin	ldrb	r2,[r11],#1
1939*bc3d5698SJohn Baldwin	orr	r0,r0,r3,lsl#24		@ lo
1940*bc3d5698SJohn Baldwin	ldrb	r3,[r11],#1
1941*bc3d5698SJohn Baldwin	orr	r1,r4,r1,lsl#8
1942*bc3d5698SJohn Baldwin	orr	r1,r1,r2,lsl#16
1943*bc3d5698SJohn Baldwin	orr	r1,r1,r3,lsl#24		@ hi
1944*bc3d5698SJohn Baldwin
1945*bc3d5698SJohn Baldwin	and	r2,r0,r6		@ &=0x55555555
1946*bc3d5698SJohn Baldwin	and	r0,r0,r6,lsl#1		@ &=0xaaaaaaaa
1947*bc3d5698SJohn Baldwin	and	r3,r1,r6		@ &=0x55555555
1948*bc3d5698SJohn Baldwin	and	r1,r1,r6,lsl#1		@ &=0xaaaaaaaa
1949*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#1
1950*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#1
1951*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#1
1952*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#1
1953*bc3d5698SJohn Baldwin	and	r2,r2,r7		@ &=0x33333333
1954*bc3d5698SJohn Baldwin	and	r0,r0,r7,lsl#2		@ &=0xcccccccc
1955*bc3d5698SJohn Baldwin	and	r3,r3,r7		@ &=0x33333333
1956*bc3d5698SJohn Baldwin	and	r1,r1,r7,lsl#2		@ &=0xcccccccc
1957*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#2
1958*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#2
1959*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#2
1960*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#2
1961*bc3d5698SJohn Baldwin	and	r2,r2,r8		@ &=0x0f0f0f0f
1962*bc3d5698SJohn Baldwin	and	r0,r0,r8,lsl#4		@ &=0xf0f0f0f0
1963*bc3d5698SJohn Baldwin	and	r3,r3,r8		@ &=0x0f0f0f0f
1964*bc3d5698SJohn Baldwin	and	r1,r1,r8,lsl#4		@ &=0xf0f0f0f0
1965*bc3d5698SJohn Baldwin	ldmia	r10,{r4,r5}		@ A_flat[i]
1966*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#4
1967*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#4
1968*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#4
1969*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#4
1970*bc3d5698SJohn Baldwin	and	r2,r2,r9		@ &=0x00ff00ff
1971*bc3d5698SJohn Baldwin	and	r0,r0,r9,lsl#8		@ &=0xff00ff00
1972*bc3d5698SJohn Baldwin	and	r3,r3,r9		@ &=0x00ff00ff
1973*bc3d5698SJohn Baldwin	and	r1,r1,r9,lsl#8		@ &=0xff00ff00
1974*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#8
1975*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#8
1976*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#8
1977*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#8
1978*bc3d5698SJohn Baldwin
1979*bc3d5698SJohn Baldwin	mov	r2,r2,lsl#16
1980*bc3d5698SJohn Baldwin	mov	r1,r1,lsr#16
1981*bc3d5698SJohn Baldwin	eor	r4,r4,r3,lsl#16
1982*bc3d5698SJohn Baldwin	eor	r5,r5,r0,lsr#16
1983*bc3d5698SJohn Baldwin	eor	r4,r4,r2,lsr#16
1984*bc3d5698SJohn Baldwin	eor	r5,r5,r1,lsl#16
1985*bc3d5698SJohn Baldwin	stmia	r10!,{r4,r5}	@ A_flat[i++] ^= BitInterleave(inp[0..7])
1986*bc3d5698SJohn Baldwin
1987*bc3d5698SJohn Baldwin	subs	r14,r14,#8
1988*bc3d5698SJohn Baldwin	bhi	.Loop_block
1989*bc3d5698SJohn Baldwin
1990*bc3d5698SJohn Baldwin	str	r11,[sp,#476]
1991*bc3d5698SJohn Baldwin
1992*bc3d5698SJohn Baldwin	bl	KeccakF1600_int
1993*bc3d5698SJohn Baldwin
1994*bc3d5698SJohn Baldwin	add	r14,sp,#456
1995*bc3d5698SJohn Baldwin	ldmia	r14,{r6,r7,r8,r9,r10,r11,r12,r14}	@ restore constants and variables
1996*bc3d5698SJohn Baldwin	b	.Loop_absorb
1997*bc3d5698SJohn Baldwin
1998*bc3d5698SJohn Baldwin.align	4
1999*bc3d5698SJohn Baldwin.Labsorbed:
2000*bc3d5698SJohn Baldwin	add	r11,sp,#40
2001*bc3d5698SJohn Baldwin	ldmia	sp,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2002*bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}	@ return A[5][5]
2003*bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2004*bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2005*bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2006*bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2007*bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2008*bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2009*bc3d5698SJohn Baldwin	ldmia	r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2010*bc3d5698SJohn Baldwin	stmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2011*bc3d5698SJohn Baldwin
2012*bc3d5698SJohn Baldwin.Labsorb_abort:
2013*bc3d5698SJohn Baldwin	add	sp,sp,#456+32
2014*bc3d5698SJohn Baldwin	mov	r0,r12			@ return value
2015*bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2016*bc3d5698SJohn Baldwin.size	SHA3_absorb,.-SHA3_absorb
2017*bc3d5698SJohn Baldwin.globl	SHA3_squeeze
2018*bc3d5698SJohn Baldwin.type	SHA3_squeeze,%function
2019*bc3d5698SJohn Baldwin.align	5
2020*bc3d5698SJohn BaldwinSHA3_squeeze:
2021*bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r3-r10,lr}
2022*bc3d5698SJohn Baldwin
2023*bc3d5698SJohn Baldwin	mov	r10,r0
2024*bc3d5698SJohn Baldwin	mov	r4,r1
2025*bc3d5698SJohn Baldwin	mov	r5,r2
2026*bc3d5698SJohn Baldwin	mov	r12,r3
2027*bc3d5698SJohn Baldwin
2028*bc3d5698SJohn Baldwin#ifdef	__thumb2__
2029*bc3d5698SJohn Baldwin	mov	r9,#0x00ff00ff
2030*bc3d5698SJohn Baldwin	mov	r8,#0x0f0f0f0f
2031*bc3d5698SJohn Baldwin	mov	r7,#0x33333333
2032*bc3d5698SJohn Baldwin	mov	r6,#0x55555555
2033*bc3d5698SJohn Baldwin#else
2034*bc3d5698SJohn Baldwin	mov	r6,#0x11		@ compose constants
2035*bc3d5698SJohn Baldwin	mov	r8,#0x0f
2036*bc3d5698SJohn Baldwin	mov	r9,#0xff
2037*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#8
2038*bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#8
2039*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#16		@ 0x11111111
2040*bc3d5698SJohn Baldwin	orr	r9,r9,r9,lsl#16		@ 0x00ff00ff
2041*bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#16		@ 0x0f0f0f0f
2042*bc3d5698SJohn Baldwin	orr	r7,r6,r6,lsl#1		@ 0x33333333
2043*bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#2		@ 0x55555555
2044*bc3d5698SJohn Baldwin#endif
2045*bc3d5698SJohn Baldwin	stmdb	sp!,{r6,r7,r8,r9}
2046*bc3d5698SJohn Baldwin
2047*bc3d5698SJohn Baldwin	mov	r14,r10
2048*bc3d5698SJohn Baldwin	b	.Loop_squeeze
2049*bc3d5698SJohn Baldwin
2050*bc3d5698SJohn Baldwin.align	4
2051*bc3d5698SJohn Baldwin.Loop_squeeze:
2052*bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1}	@ A_flat[i++]
2053*bc3d5698SJohn Baldwin
2054*bc3d5698SJohn Baldwin	mov	r2,r0,lsl#16
2055*bc3d5698SJohn Baldwin	mov	r3,r1,lsl#16		@ r3 = r1 << 16
2056*bc3d5698SJohn Baldwin	mov	r2,r2,lsr#16		@ r2 = r0 & 0x0000ffff
2057*bc3d5698SJohn Baldwin	mov	r1,r1,lsr#16
2058*bc3d5698SJohn Baldwin	mov	r0,r0,lsr#16		@ r0 = r0 >> 16
2059*bc3d5698SJohn Baldwin	mov	r1,r1,lsl#16		@ r1 = r1 & 0xffff0000
2060*bc3d5698SJohn Baldwin
2061*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#8
2062*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#8
2063*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#8
2064*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#8
2065*bc3d5698SJohn Baldwin	and	r2,r2,r9		@ &=0x00ff00ff
2066*bc3d5698SJohn Baldwin	and	r3,r3,r9,lsl#8		@ &=0xff00ff00
2067*bc3d5698SJohn Baldwin	and	r0,r0,r9		@ &=0x00ff00ff
2068*bc3d5698SJohn Baldwin	and	r1,r1,r9,lsl#8		@ &=0xff00ff00
2069*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#4
2070*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#4
2071*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#4
2072*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#4
2073*bc3d5698SJohn Baldwin	and	r2,r2,r8		@ &=0x0f0f0f0f
2074*bc3d5698SJohn Baldwin	and	r3,r3,r8,lsl#4		@ &=0xf0f0f0f0
2075*bc3d5698SJohn Baldwin	and	r0,r0,r8		@ &=0x0f0f0f0f
2076*bc3d5698SJohn Baldwin	and	r1,r1,r8,lsl#4		@ &=0xf0f0f0f0
2077*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#2
2078*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#2
2079*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#2
2080*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#2
2081*bc3d5698SJohn Baldwin	and	r2,r2,r7		@ &=0x33333333
2082*bc3d5698SJohn Baldwin	and	r3,r3,r7,lsl#2		@ &=0xcccccccc
2083*bc3d5698SJohn Baldwin	and	r0,r0,r7		@ &=0x33333333
2084*bc3d5698SJohn Baldwin	and	r1,r1,r7,lsl#2		@ &=0xcccccccc
2085*bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#1
2086*bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#1
2087*bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#1
2088*bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#1
2089*bc3d5698SJohn Baldwin	and	r2,r2,r6		@ &=0x55555555
2090*bc3d5698SJohn Baldwin	and	r3,r3,r6,lsl#1		@ &=0xaaaaaaaa
2091*bc3d5698SJohn Baldwin	and	r0,r0,r6		@ &=0x55555555
2092*bc3d5698SJohn Baldwin	and	r1,r1,r6,lsl#1		@ &=0xaaaaaaaa
2093*bc3d5698SJohn Baldwin
2094*bc3d5698SJohn Baldwin	orr	r2,r2,r3
2095*bc3d5698SJohn Baldwin	orr	r0,r0,r1
2096*bc3d5698SJohn Baldwin
2097*bc3d5698SJohn Baldwin	cmp	r5,#8
2098*bc3d5698SJohn Baldwin	blo	.Lsqueeze_tail
2099*bc3d5698SJohn Baldwin	mov	r1,r2,lsr#8
2100*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2101*bc3d5698SJohn Baldwin	mov	r3,r2,lsr#16
2102*bc3d5698SJohn Baldwin	strb	r1,[r4],#1
2103*bc3d5698SJohn Baldwin	mov	r2,r2,lsr#24
2104*bc3d5698SJohn Baldwin	strb	r3,[r4],#1
2105*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2106*bc3d5698SJohn Baldwin
2107*bc3d5698SJohn Baldwin	mov	r1,r0,lsr#8
2108*bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2109*bc3d5698SJohn Baldwin	mov	r3,r0,lsr#16
2110*bc3d5698SJohn Baldwin	strb	r1,[r4],#1
2111*bc3d5698SJohn Baldwin	mov	r0,r0,lsr#24
2112*bc3d5698SJohn Baldwin	strb	r3,[r4],#1
2113*bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2114*bc3d5698SJohn Baldwin	subs	r5,r5,#8
2115*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2116*bc3d5698SJohn Baldwin
2117*bc3d5698SJohn Baldwin	subs	r12,r12,#8		@ bsz -= 8
2118*bc3d5698SJohn Baldwin	bhi	.Loop_squeeze
2119*bc3d5698SJohn Baldwin
2120*bc3d5698SJohn Baldwin	mov	r0,r14			@ original r10
2121*bc3d5698SJohn Baldwin
2122*bc3d5698SJohn Baldwin	bl	KeccakF1600
2123*bc3d5698SJohn Baldwin
2124*bc3d5698SJohn Baldwin	ldmia	sp,{r6,r7,r8,r9,r10,r12}		@ restore constants and variables
2125*bc3d5698SJohn Baldwin	mov	r14,r10
2126*bc3d5698SJohn Baldwin	b	.Loop_squeeze
2127*bc3d5698SJohn Baldwin
2128*bc3d5698SJohn Baldwin.align	4
2129*bc3d5698SJohn Baldwin.Lsqueeze_tail:
2130*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2131*bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2132*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2133*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2134*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2135*bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2136*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2137*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2138*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2139*bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2140*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2141*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2142*bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2143*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2144*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2145*bc3d5698SJohn Baldwin
2146*bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2147*bc3d5698SJohn Baldwin	mov	r0,r0,lsr#8
2148*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2149*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2150*bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2151*bc3d5698SJohn Baldwin	mov	r0,r0,lsr#8
2152*bc3d5698SJohn Baldwin	subs	r5,r5,#1
2153*bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2154*bc3d5698SJohn Baldwin	strb	r0,[r4]
2155*bc3d5698SJohn Baldwin	b	.Lsqueeze_done
2156*bc3d5698SJohn Baldwin
2157*bc3d5698SJohn Baldwin.align	4
2158*bc3d5698SJohn Baldwin.Lsqueeze_done:
2159*bc3d5698SJohn Baldwin	add	sp,sp,#24
2160*bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2161*bc3d5698SJohn Baldwin.size	SHA3_squeeze,.-SHA3_squeeze
2162*bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
2163*bc3d5698SJohn Baldwin.fpu	neon
2164*bc3d5698SJohn Baldwin
2165*bc3d5698SJohn Baldwin.type	iotas64, %object
2166*bc3d5698SJohn Baldwin.align	5
2167*bc3d5698SJohn Baldwiniotas64:
2168*bc3d5698SJohn Baldwin.quad	0x0000000000000001
2169*bc3d5698SJohn Baldwin.quad	0x0000000000008082
2170*bc3d5698SJohn Baldwin.quad	0x800000000000808a
2171*bc3d5698SJohn Baldwin.quad	0x8000000080008000
2172*bc3d5698SJohn Baldwin.quad	0x000000000000808b
2173*bc3d5698SJohn Baldwin.quad	0x0000000080000001
2174*bc3d5698SJohn Baldwin.quad	0x8000000080008081
2175*bc3d5698SJohn Baldwin.quad	0x8000000000008009
2176*bc3d5698SJohn Baldwin.quad	0x000000000000008a
2177*bc3d5698SJohn Baldwin.quad	0x0000000000000088
2178*bc3d5698SJohn Baldwin.quad	0x0000000080008009
2179*bc3d5698SJohn Baldwin.quad	0x000000008000000a
2180*bc3d5698SJohn Baldwin.quad	0x000000008000808b
2181*bc3d5698SJohn Baldwin.quad	0x800000000000008b
2182*bc3d5698SJohn Baldwin.quad	0x8000000000008089
2183*bc3d5698SJohn Baldwin.quad	0x8000000000008003
2184*bc3d5698SJohn Baldwin.quad	0x8000000000008002
2185*bc3d5698SJohn Baldwin.quad	0x8000000000000080
2186*bc3d5698SJohn Baldwin.quad	0x000000000000800a
2187*bc3d5698SJohn Baldwin.quad	0x800000008000000a
2188*bc3d5698SJohn Baldwin.quad	0x8000000080008081
2189*bc3d5698SJohn Baldwin.quad	0x8000000000008080
2190*bc3d5698SJohn Baldwin.quad	0x0000000080000001
2191*bc3d5698SJohn Baldwin.quad	0x8000000080008008
2192*bc3d5698SJohn Baldwin.size	iotas64,.-iotas64
2193*bc3d5698SJohn Baldwin
2194*bc3d5698SJohn Baldwin.type	KeccakF1600_neon, %function
2195*bc3d5698SJohn Baldwin.align	5
2196*bc3d5698SJohn BaldwinKeccakF1600_neon:
2197*bc3d5698SJohn Baldwin	add	r1, r0, #16
2198*bc3d5698SJohn Baldwin	adr	r2, iotas64
2199*bc3d5698SJohn Baldwin	mov	r3, #24			@ loop counter
2200*bc3d5698SJohn Baldwin	b	.Loop_neon
2201*bc3d5698SJohn Baldwin
2202*bc3d5698SJohn Baldwin.align	4
2203*bc3d5698SJohn Baldwin.Loop_neon:
2204*bc3d5698SJohn Baldwin	@ Theta
2205*bc3d5698SJohn Baldwin	vst1.64	{q4},  [r0,:64]		@ offload A[0..1][4]
2206*bc3d5698SJohn Baldwin	veor	q13, q0,  q5		@ A[0..1][0]^A[2..3][0]
2207*bc3d5698SJohn Baldwin	vst1.64	{d18}, [r1,:64]		@ offload A[2][4]
2208*bc3d5698SJohn Baldwin	veor	q14, q1,  q6		@ A[0..1][1]^A[2..3][1]
2209*bc3d5698SJohn Baldwin	veor	q15, q2,  q7		@ A[0..1][2]^A[2..3][2]
2210*bc3d5698SJohn Baldwin	veor	d26, d26, d27		@ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2211*bc3d5698SJohn Baldwin	veor	d27, d28, d29		@ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2212*bc3d5698SJohn Baldwin	veor	q14, q3,  q8		@ A[0..1][3]^A[2..3][3]
2213*bc3d5698SJohn Baldwin	veor	q4,  q4,  q9		@ A[0..1][4]^A[2..3][4]
2214*bc3d5698SJohn Baldwin	veor	d30, d30, d31		@ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2215*bc3d5698SJohn Baldwin	veor	d31, d28, d29		@ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2216*bc3d5698SJohn Baldwin	veor	d25, d8,  d9		@ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2217*bc3d5698SJohn Baldwin	veor	q13, q13, q10		@ C[0..1]^=A[4][0..1]
2218*bc3d5698SJohn Baldwin	veor	q14, q15, q11		@ C[2..3]^=A[4][2..3]
2219*bc3d5698SJohn Baldwin	veor	d25, d25, d24		@ C[4]^=A[4][4]
2220*bc3d5698SJohn Baldwin
2221*bc3d5698SJohn Baldwin	vadd.u64	q4,  q13, q13		@ C[0..1]<<1
2222*bc3d5698SJohn Baldwin	vadd.u64	q15, q14, q14		@ C[2..3]<<1
2223*bc3d5698SJohn Baldwin	vadd.u64	d18, d25, d25		@ C[4]<<1
2224*bc3d5698SJohn Baldwin	vsri.u64	q4,  q13, #63		@ ROL64(C[0..1],1)
2225*bc3d5698SJohn Baldwin	vsri.u64	q15, q14, #63		@ ROL64(C[2..3],1)
2226*bc3d5698SJohn Baldwin	vsri.u64	d18, d25, #63		@ ROL64(C[4],1)
2227*bc3d5698SJohn Baldwin	veor	d25, d25, d9		@ D[0] = C[4] ^= ROL64(C[1],1)
2228*bc3d5698SJohn Baldwin	veor	q13, q13, q15		@ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2229*bc3d5698SJohn Baldwin	veor	d28, d28, d18		@ D[3] = C[2] ^= ROL64(C[4],1)
2230*bc3d5698SJohn Baldwin	veor	d29, d29, d8		@ D[4] = C[3] ^= ROL64(C[0],1)
2231*bc3d5698SJohn Baldwin
2232*bc3d5698SJohn Baldwin	veor	d0,  d0,  d25		@ A[0][0] ^= C[4]
2233*bc3d5698SJohn Baldwin	veor	d1,  d1,  d25		@ A[1][0] ^= C[4]
2234*bc3d5698SJohn Baldwin	veor	d10, d10, d25		@ A[2][0] ^= C[4]
2235*bc3d5698SJohn Baldwin	veor	d11, d11, d25		@ A[3][0] ^= C[4]
2236*bc3d5698SJohn Baldwin	veor	d20, d20, d25		@ A[4][0] ^= C[4]
2237*bc3d5698SJohn Baldwin
2238*bc3d5698SJohn Baldwin	veor	d2,  d2,  d26		@ A[0][1] ^= D[1]
2239*bc3d5698SJohn Baldwin	veor	d3,  d3,  d26		@ A[1][1] ^= D[1]
2240*bc3d5698SJohn Baldwin	veor	d12, d12, d26		@ A[2][1] ^= D[1]
2241*bc3d5698SJohn Baldwin	veor	d13, d13, d26		@ A[3][1] ^= D[1]
2242*bc3d5698SJohn Baldwin	veor	d21, d21, d26		@ A[4][1] ^= D[1]
2243*bc3d5698SJohn Baldwin	vmov	d26, d27
2244*bc3d5698SJohn Baldwin
2245*bc3d5698SJohn Baldwin	veor	d6,  d6,  d28		@ A[0][3] ^= C[2]
2246*bc3d5698SJohn Baldwin	veor	d7,  d7,  d28		@ A[1][3] ^= C[2]
2247*bc3d5698SJohn Baldwin	veor	d16, d16, d28		@ A[2][3] ^= C[2]
2248*bc3d5698SJohn Baldwin	veor	d17, d17, d28		@ A[3][3] ^= C[2]
2249*bc3d5698SJohn Baldwin	veor	d23, d23, d28		@ A[4][3] ^= C[2]
2250*bc3d5698SJohn Baldwin	vld1.64	{q4},  [r0,:64]		@ restore A[0..1][4]
2251*bc3d5698SJohn Baldwin	vmov	d28, d29
2252*bc3d5698SJohn Baldwin
2253*bc3d5698SJohn Baldwin	vld1.64	{d18}, [r1,:64]		@ restore A[2][4]
2254*bc3d5698SJohn Baldwin	veor	q2,  q2,  q13		@ A[0..1][2] ^= D[2]
2255*bc3d5698SJohn Baldwin	veor	q7,  q7,  q13		@ A[2..3][2] ^= D[2]
2256*bc3d5698SJohn Baldwin	veor	d22, d22, d27		@ A[4][2]    ^= D[2]
2257*bc3d5698SJohn Baldwin
2258*bc3d5698SJohn Baldwin	veor	q4,  q4,  q14		@ A[0..1][4] ^= C[3]
2259*bc3d5698SJohn Baldwin	veor	q9,  q9,  q14		@ A[2..3][4] ^= C[3]
2260*bc3d5698SJohn Baldwin	veor	d24, d24, d29		@ A[4][4]    ^= C[3]
2261*bc3d5698SJohn Baldwin
2262*bc3d5698SJohn Baldwin	@ Rho + Pi
2263*bc3d5698SJohn Baldwin	vmov	d26, d2			@ C[1] = A[0][1]
2264*bc3d5698SJohn Baldwin	vshl.u64	d2,  d3,  #44
2265*bc3d5698SJohn Baldwin	vmov	d27, d4			@ C[2] = A[0][2]
2266*bc3d5698SJohn Baldwin	vshl.u64	d4,  d14, #43
2267*bc3d5698SJohn Baldwin	vmov	d28, d6			@ C[3] = A[0][3]
2268*bc3d5698SJohn Baldwin	vshl.u64	d6,  d17, #21
2269*bc3d5698SJohn Baldwin	vmov	d29, d8			@ C[4] = A[0][4]
2270*bc3d5698SJohn Baldwin	vshl.u64	d8,  d24, #14
2271*bc3d5698SJohn Baldwin	vsri.u64	d2,  d3,  #64-44	@ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2272*bc3d5698SJohn Baldwin	vsri.u64	d4,  d14, #64-43	@ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2273*bc3d5698SJohn Baldwin	vsri.u64	d6,  d17, #64-21	@ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2274*bc3d5698SJohn Baldwin	vsri.u64	d8,  d24, #64-14	@ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2275*bc3d5698SJohn Baldwin
2276*bc3d5698SJohn Baldwin	vshl.u64	d3,  d9,  #20
2277*bc3d5698SJohn Baldwin	vshl.u64	d14, d16, #25
2278*bc3d5698SJohn Baldwin	vshl.u64	d17, d15, #15
2279*bc3d5698SJohn Baldwin	vshl.u64	d24, d21, #2
2280*bc3d5698SJohn Baldwin	vsri.u64	d3,  d9,  #64-20	@ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2281*bc3d5698SJohn Baldwin	vsri.u64	d14, d16, #64-25	@ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2282*bc3d5698SJohn Baldwin	vsri.u64	d17, d15, #64-15	@ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2283*bc3d5698SJohn Baldwin	vsri.u64	d24, d21, #64-2		@ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2284*bc3d5698SJohn Baldwin
2285*bc3d5698SJohn Baldwin	vshl.u64	d9,  d22, #61
2286*bc3d5698SJohn Baldwin	@ vshl.u64	d16, d19, #8
2287*bc3d5698SJohn Baldwin	vshl.u64	d15, d12, #10
2288*bc3d5698SJohn Baldwin	vshl.u64	d21, d7,  #55
2289*bc3d5698SJohn Baldwin	vsri.u64	d9,  d22, #64-61	@ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2290*bc3d5698SJohn Baldwin	vext.8	d16, d19, d19, #8-1	@ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2291*bc3d5698SJohn Baldwin	vsri.u64	d15, d12, #64-10	@ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2292*bc3d5698SJohn Baldwin	vsri.u64	d21, d7,  #64-55	@ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2293*bc3d5698SJohn Baldwin
2294*bc3d5698SJohn Baldwin	vshl.u64	d22, d18, #39
2295*bc3d5698SJohn Baldwin	@ vshl.u64	d19, d23, #56
2296*bc3d5698SJohn Baldwin	vshl.u64	d12, d5,  #6
2297*bc3d5698SJohn Baldwin	vshl.u64	d7,  d13, #45
2298*bc3d5698SJohn Baldwin	vsri.u64	d22, d18, #64-39	@ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2299*bc3d5698SJohn Baldwin	vext.8	d19, d23, d23, #8-7	@ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2300*bc3d5698SJohn Baldwin	vsri.u64	d12, d5,  #64-6		@ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2301*bc3d5698SJohn Baldwin	vsri.u64	d7,  d13, #64-45	@ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2302*bc3d5698SJohn Baldwin
2303*bc3d5698SJohn Baldwin	vshl.u64	d18, d20, #18
2304*bc3d5698SJohn Baldwin	vshl.u64	d23, d11, #41
2305*bc3d5698SJohn Baldwin	vshl.u64	d5,  d10, #3
2306*bc3d5698SJohn Baldwin	vshl.u64	d13, d1,  #36
2307*bc3d5698SJohn Baldwin	vsri.u64	d18, d20, #64-18	@ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2308*bc3d5698SJohn Baldwin	vsri.u64	d23, d11, #64-41	@ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2309*bc3d5698SJohn Baldwin	vsri.u64	d5,  d10, #64-3		@ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2310*bc3d5698SJohn Baldwin	vsri.u64	d13, d1,  #64-36	@ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2311*bc3d5698SJohn Baldwin
2312*bc3d5698SJohn Baldwin	vshl.u64	d1,  d28, #28
2313*bc3d5698SJohn Baldwin	vshl.u64	d10, d26, #1
2314*bc3d5698SJohn Baldwin	vshl.u64	d11, d29, #27
2315*bc3d5698SJohn Baldwin	vshl.u64	d20, d27, #62
2316*bc3d5698SJohn Baldwin	vsri.u64	d1,  d28, #64-28	@ A[1][0] = ROL64(C[3],    rhotates[0][3])
2317*bc3d5698SJohn Baldwin	vsri.u64	d10, d26, #64-1		@ A[2][0] = ROL64(C[1],    rhotates[0][1])
2318*bc3d5698SJohn Baldwin	vsri.u64	d11, d29, #64-27	@ A[3][0] = ROL64(C[4],    rhotates[0][4])
2319*bc3d5698SJohn Baldwin	vsri.u64	d20, d27, #64-62	@ A[4][0] = ROL64(C[2],    rhotates[0][2])
2320*bc3d5698SJohn Baldwin
2321*bc3d5698SJohn Baldwin	@ Chi + Iota
2322*bc3d5698SJohn Baldwin	vbic	q13, q2,  q1
2323*bc3d5698SJohn Baldwin	vbic	q14, q3,  q2
2324*bc3d5698SJohn Baldwin	vbic	q15, q4,  q3
2325*bc3d5698SJohn Baldwin	veor	q13, q13, q0		@ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2326*bc3d5698SJohn Baldwin	veor	q14, q14, q1		@ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2327*bc3d5698SJohn Baldwin	veor	q2,  q2,  q15		@ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2328*bc3d5698SJohn Baldwin	vst1.64	{q13}, [r0,:64]		@ offload A[0..1][0]
2329*bc3d5698SJohn Baldwin	vbic	q13, q0,  q4
2330*bc3d5698SJohn Baldwin	vbic	q15, q1,  q0
2331*bc3d5698SJohn Baldwin	vmov	q1,  q14		@ A[0..1][1]
2332*bc3d5698SJohn Baldwin	veor	q3,  q3,  q13		@ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2333*bc3d5698SJohn Baldwin	veor	q4,  q4,  q15		@ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2334*bc3d5698SJohn Baldwin
2335*bc3d5698SJohn Baldwin	vbic	q13, q7,  q6
2336*bc3d5698SJohn Baldwin	vmov	q0,  q5			@ A[2..3][0]
2337*bc3d5698SJohn Baldwin	vbic	q14, q8,  q7
2338*bc3d5698SJohn Baldwin	vmov	q15, q6			@ A[2..3][1]
2339*bc3d5698SJohn Baldwin	veor	q5,  q5,  q13		@ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2340*bc3d5698SJohn Baldwin	vbic	q13, q9,  q8
2341*bc3d5698SJohn Baldwin	veor	q6,  q6,  q14		@ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2342*bc3d5698SJohn Baldwin	vbic	q14, q0,  q9
2343*bc3d5698SJohn Baldwin	veor	q7,  q7,  q13		@ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2344*bc3d5698SJohn Baldwin	vbic	q13, q15, q0
2345*bc3d5698SJohn Baldwin	veor	q8,  q8,  q14		@ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2346*bc3d5698SJohn Baldwin	vmov	q14, q10		@ A[4][0..1]
2347*bc3d5698SJohn Baldwin	veor	q9,  q9,  q13		@ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2348*bc3d5698SJohn Baldwin
2349*bc3d5698SJohn Baldwin	vld1.64	d25, [r2,:64]!		@ Iota[i++]
2350*bc3d5698SJohn Baldwin	vbic	d26, d22, d21
2351*bc3d5698SJohn Baldwin	vbic	d27, d23, d22
2352*bc3d5698SJohn Baldwin	vld1.64	{q0}, [r0,:64]		@ restore A[0..1][0]
2353*bc3d5698SJohn Baldwin	veor	d20, d20, d26		@ A[4][0] ^= (~A[4][1] & A[4][2])
2354*bc3d5698SJohn Baldwin	vbic	d26, d24, d23
2355*bc3d5698SJohn Baldwin	veor	d21, d21, d27		@ A[4][1] ^= (~A[4][2] & A[4][3])
2356*bc3d5698SJohn Baldwin	vbic	d27, d28, d24
2357*bc3d5698SJohn Baldwin	veor	d22, d22, d26		@ A[4][2] ^= (~A[4][3] & A[4][4])
2358*bc3d5698SJohn Baldwin	vbic	d26, d29, d28
2359*bc3d5698SJohn Baldwin	veor	d23, d23, d27		@ A[4][3] ^= (~A[4][4] & A[4][0])
2360*bc3d5698SJohn Baldwin	veor	d0,  d0,  d25		@ A[0][0] ^= Iota[i]
2361*bc3d5698SJohn Baldwin	veor	d24, d24, d26		@ A[4][4] ^= (~A[4][0] & A[4][1])
2362*bc3d5698SJohn Baldwin
2363*bc3d5698SJohn Baldwin	subs	r3, r3, #1
2364*bc3d5698SJohn Baldwin	bne	.Loop_neon
2365*bc3d5698SJohn Baldwin
2366*bc3d5698SJohn Baldwin.word	0xe12fff1e
2367*bc3d5698SJohn Baldwin.size	KeccakF1600_neon,.-KeccakF1600_neon
2368*bc3d5698SJohn Baldwin
2369*bc3d5698SJohn Baldwin.globl	SHA3_absorb_neon
2370*bc3d5698SJohn Baldwin.type	SHA3_absorb_neon, %function
2371*bc3d5698SJohn Baldwin.align	5
2372*bc3d5698SJohn BaldwinSHA3_absorb_neon:
2373*bc3d5698SJohn Baldwin	stmdb	sp!, {r4,r5,r6,lr}
2374*bc3d5698SJohn Baldwin	vstmdb	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2375*bc3d5698SJohn Baldwin
2376*bc3d5698SJohn Baldwin	mov	r4, r1			@ inp
2377*bc3d5698SJohn Baldwin	mov	r5, r2			@ len
2378*bc3d5698SJohn Baldwin	mov	r6, r3			@ bsz
2379*bc3d5698SJohn Baldwin
2380*bc3d5698SJohn Baldwin	vld1.32	{d0}, [r0,:64]!		@ A[0][0]
2381*bc3d5698SJohn Baldwin	vld1.32	{d2}, [r0,:64]!		@ A[0][1]
2382*bc3d5698SJohn Baldwin	vld1.32	{d4}, [r0,:64]!		@ A[0][2]
2383*bc3d5698SJohn Baldwin	vld1.32	{d6}, [r0,:64]!		@ A[0][3]
2384*bc3d5698SJohn Baldwin	vld1.32	{d8}, [r0,:64]!		@ A[0][4]
2385*bc3d5698SJohn Baldwin
2386*bc3d5698SJohn Baldwin	vld1.32	{d1}, [r0,:64]!		@ A[1][0]
2387*bc3d5698SJohn Baldwin	vld1.32	{d3}, [r0,:64]!		@ A[1][1]
2388*bc3d5698SJohn Baldwin	vld1.32	{d5}, [r0,:64]!		@ A[1][2]
2389*bc3d5698SJohn Baldwin	vld1.32	{d7}, [r0,:64]!		@ A[1][3]
2390*bc3d5698SJohn Baldwin	vld1.32	{d9}, [r0,:64]!		@ A[1][4]
2391*bc3d5698SJohn Baldwin
2392*bc3d5698SJohn Baldwin	vld1.32	{d10}, [r0,:64]!		@ A[2][0]
2393*bc3d5698SJohn Baldwin	vld1.32	{d12}, [r0,:64]!		@ A[2][1]
2394*bc3d5698SJohn Baldwin	vld1.32	{d14}, [r0,:64]!		@ A[2][2]
2395*bc3d5698SJohn Baldwin	vld1.32	{d16}, [r0,:64]!		@ A[2][3]
2396*bc3d5698SJohn Baldwin	vld1.32	{d18}, [r0,:64]!		@ A[2][4]
2397*bc3d5698SJohn Baldwin
2398*bc3d5698SJohn Baldwin	vld1.32	{d11}, [r0,:64]!		@ A[3][0]
2399*bc3d5698SJohn Baldwin	vld1.32	{d13}, [r0,:64]!		@ A[3][1]
2400*bc3d5698SJohn Baldwin	vld1.32	{d15}, [r0,:64]!		@ A[3][2]
2401*bc3d5698SJohn Baldwin	vld1.32	{d17}, [r0,:64]!		@ A[3][3]
2402*bc3d5698SJohn Baldwin	vld1.32	{d19}, [r0,:64]!		@ A[3][4]
2403*bc3d5698SJohn Baldwin
2404*bc3d5698SJohn Baldwin	vld1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..3]
2405*bc3d5698SJohn Baldwin	vld1.32	{d24}, [r0,:64]		@ A[4][4]
2406*bc3d5698SJohn Baldwin	sub	r0, r0, #24*8		@ rewind
2407*bc3d5698SJohn Baldwin	b	.Loop_absorb_neon
2408*bc3d5698SJohn Baldwin
2409*bc3d5698SJohn Baldwin.align	4
2410*bc3d5698SJohn Baldwin.Loop_absorb_neon:
2411*bc3d5698SJohn Baldwin	subs	r12, r5, r6		@ len - bsz
2412*bc3d5698SJohn Baldwin	blo	.Labsorbed_neon
2413*bc3d5698SJohn Baldwin	mov	r5, r12
2414*bc3d5698SJohn Baldwin
2415*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!		@ endian-neutral loads...
2416*bc3d5698SJohn Baldwin	cmp	r6, #8*2
2417*bc3d5698SJohn Baldwin	veor	d0, d0, d31		@ A[0][0] ^= *inp++
2418*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2419*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2420*bc3d5698SJohn Baldwin	veor	d2, d2, d31		@ A[0][1] ^= *inp++
2421*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2422*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2423*bc3d5698SJohn Baldwin	cmp	r6, #8*4
2424*bc3d5698SJohn Baldwin	veor	d4, d4, d31		@ A[0][2] ^= *inp++
2425*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2426*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2427*bc3d5698SJohn Baldwin	veor	d6, d6, d31		@ A[0][3] ^= *inp++
2428*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2429*bc3d5698SJohn Baldwin	vld1.8	{d31},[r4]!
2430*bc3d5698SJohn Baldwin	cmp	r6, #8*6
2431*bc3d5698SJohn Baldwin	veor	d8, d8, d31		@ A[0][4] ^= *inp++
2432*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2433*bc3d5698SJohn Baldwin
2434*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2435*bc3d5698SJohn Baldwin	veor	d1, d1, d31		@ A[1][0] ^= *inp++
2436*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2437*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2438*bc3d5698SJohn Baldwin	cmp	r6, #8*8
2439*bc3d5698SJohn Baldwin	veor	d3, d3, d31		@ A[1][1] ^= *inp++
2440*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2441*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2442*bc3d5698SJohn Baldwin	veor	d5, d5, d31		@ A[1][2] ^= *inp++
2443*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2444*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2445*bc3d5698SJohn Baldwin	cmp	r6, #8*10
2446*bc3d5698SJohn Baldwin	veor	d7, d7, d31		@ A[1][3] ^= *inp++
2447*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2448*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2449*bc3d5698SJohn Baldwin	veor	d9, d9, d31		@ A[1][4] ^= *inp++
2450*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2451*bc3d5698SJohn Baldwin
2452*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2453*bc3d5698SJohn Baldwin	cmp	r6, #8*12
2454*bc3d5698SJohn Baldwin	veor	d10, d10, d31		@ A[2][0] ^= *inp++
2455*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2456*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2457*bc3d5698SJohn Baldwin	veor	d12, d12, d31		@ A[2][1] ^= *inp++
2458*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2459*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2460*bc3d5698SJohn Baldwin	cmp	r6, #8*14
2461*bc3d5698SJohn Baldwin	veor	d14, d14, d31		@ A[2][2] ^= *inp++
2462*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2463*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2464*bc3d5698SJohn Baldwin	veor	d16, d16, d31		@ A[2][3] ^= *inp++
2465*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2466*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2467*bc3d5698SJohn Baldwin	cmp	r6, #8*16
2468*bc3d5698SJohn Baldwin	veor	d18, d18, d31		@ A[2][4] ^= *inp++
2469*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2470*bc3d5698SJohn Baldwin
2471*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2472*bc3d5698SJohn Baldwin	veor	d11, d11, d31		@ A[3][0] ^= *inp++
2473*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2474*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2475*bc3d5698SJohn Baldwin	cmp	r6, #8*18
2476*bc3d5698SJohn Baldwin	veor	d13, d13, d31		@ A[3][1] ^= *inp++
2477*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2478*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2479*bc3d5698SJohn Baldwin	veor	d15, d15, d31		@ A[3][2] ^= *inp++
2480*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2481*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2482*bc3d5698SJohn Baldwin	cmp	r6, #8*20
2483*bc3d5698SJohn Baldwin	veor	d17, d17, d31		@ A[3][3] ^= *inp++
2484*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2485*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2486*bc3d5698SJohn Baldwin	veor	d19, d19, d31		@ A[3][4] ^= *inp++
2487*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2488*bc3d5698SJohn Baldwin
2489*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2490*bc3d5698SJohn Baldwin	cmp	r6, #8*22
2491*bc3d5698SJohn Baldwin	veor	d20, d20, d31		@ A[4][0] ^= *inp++
2492*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2493*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2494*bc3d5698SJohn Baldwin	veor	d21, d21, d31		@ A[4][1] ^= *inp++
2495*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2496*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2497*bc3d5698SJohn Baldwin	cmp	r6, #8*24
2498*bc3d5698SJohn Baldwin	veor	d22, d22, d31		@ A[4][2] ^= *inp++
2499*bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2500*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2501*bc3d5698SJohn Baldwin	veor	d23, d23, d31		@ A[4][3] ^= *inp++
2502*bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2503*bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2504*bc3d5698SJohn Baldwin	veor	d24, d24, d31		@ A[4][4] ^= *inp++
2505*bc3d5698SJohn Baldwin
2506*bc3d5698SJohn Baldwin.Lprocess_neon:
2507*bc3d5698SJohn Baldwin	bl	KeccakF1600_neon
2508*bc3d5698SJohn Baldwin	b	.Loop_absorb_neon
2509*bc3d5698SJohn Baldwin
2510*bc3d5698SJohn Baldwin.align	4
2511*bc3d5698SJohn Baldwin.Labsorbed_neon:
2512*bc3d5698SJohn Baldwin	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2513*bc3d5698SJohn Baldwin	vst1.32	{d2}, [r0,:64]!
2514*bc3d5698SJohn Baldwin	vst1.32	{d4}, [r0,:64]!
2515*bc3d5698SJohn Baldwin	vst1.32	{d6}, [r0,:64]!
2516*bc3d5698SJohn Baldwin	vst1.32	{d8}, [r0,:64]!
2517*bc3d5698SJohn Baldwin
2518*bc3d5698SJohn Baldwin	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2519*bc3d5698SJohn Baldwin	vst1.32	{d3}, [r0,:64]!
2520*bc3d5698SJohn Baldwin	vst1.32	{d5}, [r0,:64]!
2521*bc3d5698SJohn Baldwin	vst1.32	{d7}, [r0,:64]!
2522*bc3d5698SJohn Baldwin	vst1.32	{d9}, [r0,:64]!
2523*bc3d5698SJohn Baldwin
2524*bc3d5698SJohn Baldwin	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2525*bc3d5698SJohn Baldwin	vst1.32	{d12}, [r0,:64]!
2526*bc3d5698SJohn Baldwin	vst1.32	{d14}, [r0,:64]!
2527*bc3d5698SJohn Baldwin	vst1.32	{d16}, [r0,:64]!
2528*bc3d5698SJohn Baldwin	vst1.32	{d18}, [r0,:64]!
2529*bc3d5698SJohn Baldwin
2530*bc3d5698SJohn Baldwin	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2531*bc3d5698SJohn Baldwin	vst1.32	{d13}, [r0,:64]!
2532*bc3d5698SJohn Baldwin	vst1.32	{d15}, [r0,:64]!
2533*bc3d5698SJohn Baldwin	vst1.32	{d17}, [r0,:64]!
2534*bc3d5698SJohn Baldwin	vst1.32	{d19}, [r0,:64]!
2535*bc3d5698SJohn Baldwin
2536*bc3d5698SJohn Baldwin	vst1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2537*bc3d5698SJohn Baldwin	vst1.32	{d24}, [r0,:64]
2538*bc3d5698SJohn Baldwin
2539*bc3d5698SJohn Baldwin	mov	r0, r5			@ return value
2540*bc3d5698SJohn Baldwin	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2541*bc3d5698SJohn Baldwin	ldmia	sp!, {r4,r5,r6,pc}
2542*bc3d5698SJohn Baldwin.size	SHA3_absorb_neon,.-SHA3_absorb_neon
2543*bc3d5698SJohn Baldwin
2544*bc3d5698SJohn Baldwin.globl	SHA3_squeeze_neon
2545*bc3d5698SJohn Baldwin.type	SHA3_squeeze_neon, %function
2546*bc3d5698SJohn Baldwin.align	5
2547*bc3d5698SJohn BaldwinSHA3_squeeze_neon:
2548*bc3d5698SJohn Baldwin	stmdb	sp!, {r4,r5,r6,lr}
2549*bc3d5698SJohn Baldwin
2550*bc3d5698SJohn Baldwin	mov	r4, r1			@ out
2551*bc3d5698SJohn Baldwin	mov	r5, r2			@ len
2552*bc3d5698SJohn Baldwin	mov	r6, r3			@ bsz
2553*bc3d5698SJohn Baldwin	mov	r12, r0			@ A_flat
2554*bc3d5698SJohn Baldwin	mov	r14, r3			@ bsz
2555*bc3d5698SJohn Baldwin	b	.Loop_squeeze_neon
2556*bc3d5698SJohn Baldwin
2557*bc3d5698SJohn Baldwin.align	4
2558*bc3d5698SJohn Baldwin.Loop_squeeze_neon:
2559*bc3d5698SJohn Baldwin	cmp	r5, #8
2560*bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_tail
2561*bc3d5698SJohn Baldwin	vld1.32	{d0}, [r12]!
2562*bc3d5698SJohn Baldwin	vst1.8	{d0}, [r4]!		@ endian-neutral store
2563*bc3d5698SJohn Baldwin
2564*bc3d5698SJohn Baldwin	subs	r5, r5, #8		@ len -= 8
2565*bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2566*bc3d5698SJohn Baldwin
2567*bc3d5698SJohn Baldwin	subs	r14, r14, #8		@ bsz -= 8
2568*bc3d5698SJohn Baldwin	bhi	.Loop_squeeze_neon
2569*bc3d5698SJohn Baldwin
2570*bc3d5698SJohn Baldwin	vstmdb	sp!,  {d8,d9,d10,d11,d12,d13,d14,d15}
2571*bc3d5698SJohn Baldwin
2572*bc3d5698SJohn Baldwin	vld1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2573*bc3d5698SJohn Baldwin	vld1.32	{d2}, [r0,:64]!
2574*bc3d5698SJohn Baldwin	vld1.32	{d4}, [r0,:64]!
2575*bc3d5698SJohn Baldwin	vld1.32	{d6}, [r0,:64]!
2576*bc3d5698SJohn Baldwin	vld1.32	{d8}, [r0,:64]!
2577*bc3d5698SJohn Baldwin
2578*bc3d5698SJohn Baldwin	vld1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2579*bc3d5698SJohn Baldwin	vld1.32	{d3}, [r0,:64]!
2580*bc3d5698SJohn Baldwin	vld1.32	{d5}, [r0,:64]!
2581*bc3d5698SJohn Baldwin	vld1.32	{d7}, [r0,:64]!
2582*bc3d5698SJohn Baldwin	vld1.32	{d9}, [r0,:64]!
2583*bc3d5698SJohn Baldwin
2584*bc3d5698SJohn Baldwin	vld1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2585*bc3d5698SJohn Baldwin	vld1.32	{d12}, [r0,:64]!
2586*bc3d5698SJohn Baldwin	vld1.32	{d14}, [r0,:64]!
2587*bc3d5698SJohn Baldwin	vld1.32	{d16}, [r0,:64]!
2588*bc3d5698SJohn Baldwin	vld1.32	{d18}, [r0,:64]!
2589*bc3d5698SJohn Baldwin
2590*bc3d5698SJohn Baldwin	vld1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2591*bc3d5698SJohn Baldwin	vld1.32	{d13}, [r0,:64]!
2592*bc3d5698SJohn Baldwin	vld1.32	{d15}, [r0,:64]!
2593*bc3d5698SJohn Baldwin	vld1.32	{d17}, [r0,:64]!
2594*bc3d5698SJohn Baldwin	vld1.32	{d19}, [r0,:64]!
2595*bc3d5698SJohn Baldwin
2596*bc3d5698SJohn Baldwin	vld1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2597*bc3d5698SJohn Baldwin	vld1.32	{d24}, [r0,:64]
2598*bc3d5698SJohn Baldwin	sub	r0, r0, #24*8		@ rewind
2599*bc3d5698SJohn Baldwin
2600*bc3d5698SJohn Baldwin	bl	KeccakF1600_neon
2601*bc3d5698SJohn Baldwin
2602*bc3d5698SJohn Baldwin	mov	r12, r0			@ A_flat
2603*bc3d5698SJohn Baldwin	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2604*bc3d5698SJohn Baldwin	vst1.32	{d2}, [r0,:64]!
2605*bc3d5698SJohn Baldwin	vst1.32	{d4}, [r0,:64]!
2606*bc3d5698SJohn Baldwin	vst1.32	{d6}, [r0,:64]!
2607*bc3d5698SJohn Baldwin	vst1.32	{d8}, [r0,:64]!
2608*bc3d5698SJohn Baldwin
2609*bc3d5698SJohn Baldwin	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2610*bc3d5698SJohn Baldwin	vst1.32	{d3}, [r0,:64]!
2611*bc3d5698SJohn Baldwin	vst1.32	{d5}, [r0,:64]!
2612*bc3d5698SJohn Baldwin	vst1.32	{d7}, [r0,:64]!
2613*bc3d5698SJohn Baldwin	vst1.32	{d9}, [r0,:64]!
2614*bc3d5698SJohn Baldwin
2615*bc3d5698SJohn Baldwin	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2616*bc3d5698SJohn Baldwin	vst1.32	{d12}, [r0,:64]!
2617*bc3d5698SJohn Baldwin	vst1.32	{d14}, [r0,:64]!
2618*bc3d5698SJohn Baldwin	vst1.32	{d16}, [r0,:64]!
2619*bc3d5698SJohn Baldwin	vst1.32	{d18}, [r0,:64]!
2620*bc3d5698SJohn Baldwin
2621*bc3d5698SJohn Baldwin	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2622*bc3d5698SJohn Baldwin	vst1.32	{d13}, [r0,:64]!
2623*bc3d5698SJohn Baldwin	vst1.32	{d15}, [r0,:64]!
2624*bc3d5698SJohn Baldwin	vst1.32	{d17}, [r0,:64]!
2625*bc3d5698SJohn Baldwin	vst1.32	{d19}, [r0,:64]!
2626*bc3d5698SJohn Baldwin
2627*bc3d5698SJohn Baldwin	vst1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2628*bc3d5698SJohn Baldwin	mov	r14, r6			@ bsz
2629*bc3d5698SJohn Baldwin	vst1.32	{d24}, [r0,:64]
2630*bc3d5698SJohn Baldwin	mov	r0,  r12		@ rewind
2631*bc3d5698SJohn Baldwin
2632*bc3d5698SJohn Baldwin	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2633*bc3d5698SJohn Baldwin	b	.Loop_squeeze_neon
2634*bc3d5698SJohn Baldwin
2635*bc3d5698SJohn Baldwin.align	4
2636*bc3d5698SJohn Baldwin.Lsqueeze_neon_tail:
2637*bc3d5698SJohn Baldwin	ldmia	r12, {r2,r3}
2638*bc3d5698SJohn Baldwin	cmp	r5, #2
2639*bc3d5698SJohn Baldwin	strb	r2, [r4],#1		@ endian-neutral store
2640*bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2641*bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2642*bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2643*bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2644*bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2645*bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2646*bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2647*bc3d5698SJohn Baldwin	cmp	r5, #4
2648*bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2649*bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2650*bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2651*bc3d5698SJohn Baldwin
2652*bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2653*bc3d5698SJohn Baldwin	mov	r3, r3, lsr#8
2654*bc3d5698SJohn Baldwin	cmp	r5, #6
2655*bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2656*bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2657*bc3d5698SJohn Baldwin	mov	r3, r3, lsr#8
2658*bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2659*bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2660*bc3d5698SJohn Baldwin
2661*bc3d5698SJohn Baldwin.Lsqueeze_neon_done:
2662*bc3d5698SJohn Baldwin	ldmia	sp!, {r4,r5,r6,pc}
2663*bc3d5698SJohn Baldwin.size	SHA3_squeeze_neon,.-SHA3_squeeze_neon
2664*bc3d5698SJohn Baldwin#endif
2665*bc3d5698SJohn Baldwin.byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2666*bc3d5698SJohn Baldwin.align	2
2667*bc3d5698SJohn Baldwin.align	2
2668