xref: /freebsd/sys/crypto/openssl/arm/keccak1600-armv4.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
2bc3d5698SJohn Baldwin#include "arm_arch.h"
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin#if defined(__thumb2__)
5bc3d5698SJohn Baldwin.syntax	unified
6bc3d5698SJohn Baldwin.thumb
7bc3d5698SJohn Baldwin#else
8bc3d5698SJohn Baldwin.code	32
9bc3d5698SJohn Baldwin#endif
10bc3d5698SJohn Baldwin
11*c0855eaaSJohn Baldwin.text
12*c0855eaaSJohn Baldwin
13bc3d5698SJohn Baldwin.type	iotas32, %object
14bc3d5698SJohn Baldwin.align	5
15bc3d5698SJohn Baldwiniotas32:
16bc3d5698SJohn Baldwin.long	0x00000001, 0x00000000
17bc3d5698SJohn Baldwin.long	0x00000000, 0x00000089
18bc3d5698SJohn Baldwin.long	0x00000000, 0x8000008b
19bc3d5698SJohn Baldwin.long	0x00000000, 0x80008080
20bc3d5698SJohn Baldwin.long	0x00000001, 0x0000008b
21bc3d5698SJohn Baldwin.long	0x00000001, 0x00008000
22bc3d5698SJohn Baldwin.long	0x00000001, 0x80008088
23bc3d5698SJohn Baldwin.long	0x00000001, 0x80000082
24bc3d5698SJohn Baldwin.long	0x00000000, 0x0000000b
25bc3d5698SJohn Baldwin.long	0x00000000, 0x0000000a
26bc3d5698SJohn Baldwin.long	0x00000001, 0x00008082
27bc3d5698SJohn Baldwin.long	0x00000000, 0x00008003
28bc3d5698SJohn Baldwin.long	0x00000001, 0x0000808b
29bc3d5698SJohn Baldwin.long	0x00000001, 0x8000000b
30bc3d5698SJohn Baldwin.long	0x00000001, 0x8000008a
31bc3d5698SJohn Baldwin.long	0x00000001, 0x80000081
32bc3d5698SJohn Baldwin.long	0x00000000, 0x80000081
33bc3d5698SJohn Baldwin.long	0x00000000, 0x80000008
34bc3d5698SJohn Baldwin.long	0x00000000, 0x00000083
35bc3d5698SJohn Baldwin.long	0x00000000, 0x80008003
36bc3d5698SJohn Baldwin.long	0x00000001, 0x80008088
37bc3d5698SJohn Baldwin.long	0x00000000, 0x80000088
38bc3d5698SJohn Baldwin.long	0x00000001, 0x00008000
39bc3d5698SJohn Baldwin.long	0x00000000, 0x80008082
40bc3d5698SJohn Baldwin.size	iotas32,.-iotas32
41bc3d5698SJohn Baldwin
42bc3d5698SJohn Baldwin.type	KeccakF1600_int, %function
43bc3d5698SJohn Baldwin.align	5
44bc3d5698SJohn BaldwinKeccakF1600_int:
45bc3d5698SJohn Baldwin	add	r9,sp,#176
46bc3d5698SJohn Baldwin	add	r12,sp,#0
47bc3d5698SJohn Baldwin	add	r10,sp,#40
48bc3d5698SJohn Baldwin	ldmia	r9,{r4,r5,r6,r7,r8,r9}		@ A[4][2..4]
49bc3d5698SJohn BaldwinKeccakF1600_enter:
50bc3d5698SJohn Baldwin	str	lr,[sp,#440]
51bc3d5698SJohn Baldwin	eor	r11,r11,r11
52bc3d5698SJohn Baldwin	str	r11,[sp,#444]
53bc3d5698SJohn Baldwin	b	.Lround2x
54bc3d5698SJohn Baldwin
55bc3d5698SJohn Baldwin.align	4
56bc3d5698SJohn Baldwin.Lround2x:
57bc3d5698SJohn Baldwin	ldmia	r12,{r0,r1,r2,r3}		@ A[0][0..1]
58bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][0..1]
59bc3d5698SJohn Baldwin#ifdef	__thumb2__
60bc3d5698SJohn Baldwin	eor	r0,r0,r10
61bc3d5698SJohn Baldwin	eor	r1,r1,r11
62bc3d5698SJohn Baldwin	eor	r2,r2,r12
63bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#56]
64bc3d5698SJohn Baldwin	eor	r3,r3,r14
65bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#64]
66bc3d5698SJohn Baldwin	eor	r4,r4,r10
67bc3d5698SJohn Baldwin	eor	r5,r5,r11
68bc3d5698SJohn Baldwin	eor	r6,r6,r12
69bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#72]
70bc3d5698SJohn Baldwin	eor	r7,r7,r14
71bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#80]
72bc3d5698SJohn Baldwin	eor	r8,r8,r10
73bc3d5698SJohn Baldwin	eor	r9,r9,r11
74bc3d5698SJohn Baldwin	eor	r0,r0,r12
75bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#88]
76bc3d5698SJohn Baldwin	eor	r1,r1,r14
77bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#96]
78bc3d5698SJohn Baldwin	eor	r2,r2,r10
79bc3d5698SJohn Baldwin	eor	r3,r3,r11
80bc3d5698SJohn Baldwin	eor	r4,r4,r12
81bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#104]
82bc3d5698SJohn Baldwin	eor	r5,r5,r14
83bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#112]
84bc3d5698SJohn Baldwin	eor	r6,r6,r10
85bc3d5698SJohn Baldwin	eor	r7,r7,r11
86bc3d5698SJohn Baldwin	eor	r8,r8,r12
87bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#120]
88bc3d5698SJohn Baldwin	eor	r9,r9,r14
89bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#128]
90bc3d5698SJohn Baldwin	eor	r0,r0,r10
91bc3d5698SJohn Baldwin	eor	r1,r1,r11
92bc3d5698SJohn Baldwin	eor	r2,r2,r12
93bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#136]
94bc3d5698SJohn Baldwin	eor	r3,r3,r14
95bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#144]
96bc3d5698SJohn Baldwin	eor	r4,r4,r10
97bc3d5698SJohn Baldwin	eor	r5,r5,r11
98bc3d5698SJohn Baldwin	eor	r6,r6,r12
99bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#152]
100bc3d5698SJohn Baldwin	eor	r7,r7,r14
101bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#160]
102bc3d5698SJohn Baldwin	eor	r8,r8,r10
103bc3d5698SJohn Baldwin	eor	r9,r9,r11
104bc3d5698SJohn Baldwin	eor	r0,r0,r12
105bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#168]
106bc3d5698SJohn Baldwin	eor	r1,r1,r14
107bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#16]
108bc3d5698SJohn Baldwin	eor	r2,r2,r10
109bc3d5698SJohn Baldwin	eor	r3,r3,r11
110bc3d5698SJohn Baldwin	eor	r4,r4,r12
111bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#24]
112bc3d5698SJohn Baldwin	eor	r5,r5,r14
113bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#32]
114bc3d5698SJohn Baldwin#else
115bc3d5698SJohn Baldwin	eor	r0,r0,r10
116bc3d5698SJohn Baldwin	add	r10,sp,#56
117bc3d5698SJohn Baldwin	eor	r1,r1,r11
118bc3d5698SJohn Baldwin	eor	r2,r2,r12
119bc3d5698SJohn Baldwin	eor	r3,r3,r14
120bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][2..3]
121bc3d5698SJohn Baldwin	eor	r4,r4,r10
122bc3d5698SJohn Baldwin	add	r10,sp,#72
123bc3d5698SJohn Baldwin	eor	r5,r5,r11
124bc3d5698SJohn Baldwin	eor	r6,r6,r12
125bc3d5698SJohn Baldwin	eor	r7,r7,r14
126bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][4]..A[2][0]
127bc3d5698SJohn Baldwin	eor	r8,r8,r10
128bc3d5698SJohn Baldwin	add	r10,sp,#88
129bc3d5698SJohn Baldwin	eor	r9,r9,r11
130bc3d5698SJohn Baldwin	eor	r0,r0,r12
131bc3d5698SJohn Baldwin	eor	r1,r1,r14
132bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][1..2]
133bc3d5698SJohn Baldwin	eor	r2,r2,r10
134bc3d5698SJohn Baldwin	add	r10,sp,#104
135bc3d5698SJohn Baldwin	eor	r3,r3,r11
136bc3d5698SJohn Baldwin	eor	r4,r4,r12
137bc3d5698SJohn Baldwin	eor	r5,r5,r14
138bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][3..4]
139bc3d5698SJohn Baldwin	eor	r6,r6,r10
140bc3d5698SJohn Baldwin	add	r10,sp,#120
141bc3d5698SJohn Baldwin	eor	r7,r7,r11
142bc3d5698SJohn Baldwin	eor	r8,r8,r12
143bc3d5698SJohn Baldwin	eor	r9,r9,r14
144bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][0..1]
145bc3d5698SJohn Baldwin	eor	r0,r0,r10
146bc3d5698SJohn Baldwin	add	r10,sp,#136
147bc3d5698SJohn Baldwin	eor	r1,r1,r11
148bc3d5698SJohn Baldwin	eor	r2,r2,r12
149bc3d5698SJohn Baldwin	eor	r3,r3,r14
150bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][2..3]
151bc3d5698SJohn Baldwin	eor	r4,r4,r10
152bc3d5698SJohn Baldwin	add	r10,sp,#152
153bc3d5698SJohn Baldwin	eor	r5,r5,r11
154bc3d5698SJohn Baldwin	eor	r6,r6,r12
155bc3d5698SJohn Baldwin	eor	r7,r7,r14
156bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][4]..A[4][0]
157bc3d5698SJohn Baldwin	eor	r8,r8,r10
158bc3d5698SJohn Baldwin	ldr	r10,[sp,#168]		@ A[4][1]
159bc3d5698SJohn Baldwin	eor	r9,r9,r11
160bc3d5698SJohn Baldwin	ldr	r11,[sp,#168+4]
161bc3d5698SJohn Baldwin	eor	r0,r0,r12
162bc3d5698SJohn Baldwin	ldr	r12,[sp,#16]		@ A[0][2]
163bc3d5698SJohn Baldwin	eor	r1,r1,r14
164bc3d5698SJohn Baldwin	ldr	r14,[sp,#16+4]
165bc3d5698SJohn Baldwin	eor	r2,r2,r10
166bc3d5698SJohn Baldwin	add	r10,sp,#24
167bc3d5698SJohn Baldwin	eor	r3,r3,r11
168bc3d5698SJohn Baldwin	eor	r4,r4,r12
169bc3d5698SJohn Baldwin	eor	r5,r5,r14
170bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[0][3..4]
171bc3d5698SJohn Baldwin#endif
172bc3d5698SJohn Baldwin	eor	r6,r6,r10
173bc3d5698SJohn Baldwin	eor	r7,r7,r11
174bc3d5698SJohn Baldwin	eor	r8,r8,r12
175bc3d5698SJohn Baldwin	eor	r9,r9,r14
176bc3d5698SJohn Baldwin
177bc3d5698SJohn Baldwin	eor	r10,r0,r5,ror#32-1	@ E[0] = ROL64(C[2], 1) ^ C[0];
178bc3d5698SJohn Baldwin#ifndef	__thumb2__
179bc3d5698SJohn Baldwin	str	r10,[sp,#208]		@ D[1] = E[0]
180bc3d5698SJohn Baldwin#endif
181bc3d5698SJohn Baldwin	eor	r11,r1,r4
182bc3d5698SJohn Baldwin#ifndef	__thumb2__
183bc3d5698SJohn Baldwin	str	r11,[sp,#208+4]
184bc3d5698SJohn Baldwin#else
185bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#208]		@ D[1] = E[0]
186bc3d5698SJohn Baldwin#endif
187bc3d5698SJohn Baldwin	eor	r12,r6,r1,ror#32-1	@ E[1] = ROL64(C[0], 1) ^ C[3];
188bc3d5698SJohn Baldwin	eor	r14,r7,r0
189bc3d5698SJohn Baldwin#ifndef	__thumb2__
190bc3d5698SJohn Baldwin	str	r12,[sp,#232]		@ D[4] = E[1]
191bc3d5698SJohn Baldwin#endif
192bc3d5698SJohn Baldwin	eor	r0,r8,r3,ror#32-1	@ C[0] = ROL64(C[1], 1) ^ C[4];
193bc3d5698SJohn Baldwin#ifndef	__thumb2__
194bc3d5698SJohn Baldwin	str	r14,[sp,#232+4]
195bc3d5698SJohn Baldwin#else
196bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#232]		@ D[4] = E[1]
197bc3d5698SJohn Baldwin#endif
198bc3d5698SJohn Baldwin	eor	r1,r9,r2
199bc3d5698SJohn Baldwin#ifndef	__thumb2__
200bc3d5698SJohn Baldwin	str	r0,[sp,#200]		@ D[0] = C[0]
201bc3d5698SJohn Baldwin#endif
202bc3d5698SJohn Baldwin	eor	r2,r2,r7,ror#32-1	@ C[1] = ROL64(C[3], 1) ^ C[1];
203bc3d5698SJohn Baldwin#ifndef	__thumb2__
204bc3d5698SJohn Baldwin	ldr	r7,[sp,#144]
205bc3d5698SJohn Baldwin#endif
206bc3d5698SJohn Baldwin	eor	r3,r3,r6
207bc3d5698SJohn Baldwin#ifndef	__thumb2__
208bc3d5698SJohn Baldwin	str	r1,[sp,#200+4]
209bc3d5698SJohn Baldwin#else
210bc3d5698SJohn Baldwin	strd	r0,r1,[sp,#200]		@ D[0] = C[0]
211bc3d5698SJohn Baldwin#endif
212bc3d5698SJohn Baldwin#ifndef	__thumb2__
213bc3d5698SJohn Baldwin	ldr	r6,[sp,#144+4]
214bc3d5698SJohn Baldwin#else
215bc3d5698SJohn Baldwin	ldrd	r7,r6,[sp,#144]
216bc3d5698SJohn Baldwin#endif
217bc3d5698SJohn Baldwin#ifndef	__thumb2__
218bc3d5698SJohn Baldwin	str	r2,[sp,#216]		@ D[2] = C[1]
219bc3d5698SJohn Baldwin#endif
220bc3d5698SJohn Baldwin	eor	r4,r4,r9,ror#32-1	@ C[2] = ROL64(C[4], 1) ^ C[2];
221bc3d5698SJohn Baldwin#ifndef	__thumb2__
222bc3d5698SJohn Baldwin	str	r3,[sp,#216+4]
223bc3d5698SJohn Baldwin#else
224bc3d5698SJohn Baldwin	strd	r2,r3,[sp,#216]		@ D[2] = C[1]
225bc3d5698SJohn Baldwin#endif
226bc3d5698SJohn Baldwin	eor	r5,r5,r8
227bc3d5698SJohn Baldwin
228bc3d5698SJohn Baldwin#ifndef	__thumb2__
229bc3d5698SJohn Baldwin	ldr	r8,[sp,#192]
230bc3d5698SJohn Baldwin#endif
231bc3d5698SJohn Baldwin#ifndef	__thumb2__
232bc3d5698SJohn Baldwin	ldr	r9,[sp,#192+4]
233bc3d5698SJohn Baldwin#else
234bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#192]
235bc3d5698SJohn Baldwin#endif
236bc3d5698SJohn Baldwin#ifndef	__thumb2__
237bc3d5698SJohn Baldwin	str	r4,[sp,#224]		@ D[3] = C[2]
238bc3d5698SJohn Baldwin#endif
239bc3d5698SJohn Baldwin	eor	r7,r7,r4
240bc3d5698SJohn Baldwin#ifndef	__thumb2__
241bc3d5698SJohn Baldwin	str	r5,[sp,#224+4]
242bc3d5698SJohn Baldwin#else
243bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#224]		@ D[3] = C[2]
244bc3d5698SJohn Baldwin#endif
245bc3d5698SJohn Baldwin	eor	r6,r6,r5
246bc3d5698SJohn Baldwin#ifndef	__thumb2__
247bc3d5698SJohn Baldwin	ldr	r4,[sp,#0]
248bc3d5698SJohn Baldwin#endif
249bc3d5698SJohn Baldwin	@ mov	r7,r7,ror#32-10		@ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
250bc3d5698SJohn Baldwin	@ mov	r6,r6,ror#32-11
251bc3d5698SJohn Baldwin#ifndef	__thumb2__
252bc3d5698SJohn Baldwin	ldr	r5,[sp,#0+4]
253bc3d5698SJohn Baldwin#else
254bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#0]
255bc3d5698SJohn Baldwin#endif
256bc3d5698SJohn Baldwin	eor	r8,r8,r12
257bc3d5698SJohn Baldwin	eor	r9,r9,r14
258bc3d5698SJohn Baldwin#ifndef	__thumb2__
259bc3d5698SJohn Baldwin	ldr	r12,[sp,#96]
260bc3d5698SJohn Baldwin#endif
261bc3d5698SJohn Baldwin	eor	r0,r0,r4
262bc3d5698SJohn Baldwin#ifndef	__thumb2__
263bc3d5698SJohn Baldwin	ldr	r14,[sp,#96+4]
264bc3d5698SJohn Baldwin#else
265bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#96]
266bc3d5698SJohn Baldwin#endif
267bc3d5698SJohn Baldwin	@ mov	r8,r8,ror#32-7		@ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
268bc3d5698SJohn Baldwin	@ mov	r9,r9,ror#32-7
269bc3d5698SJohn Baldwin	eor	r1,r1,r5		@ C[0] =       A[0][0] ^ C[0];
270bc3d5698SJohn Baldwin	eor	r12,r12,r2
271bc3d5698SJohn Baldwin#ifndef	__thumb2__
272bc3d5698SJohn Baldwin	ldr	r2,[sp,#48]
273bc3d5698SJohn Baldwin#endif
274bc3d5698SJohn Baldwin	eor	r14,r14,r3
275bc3d5698SJohn Baldwin#ifndef	__thumb2__
276bc3d5698SJohn Baldwin	ldr	r3,[sp,#48+4]
277bc3d5698SJohn Baldwin#else
278bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#48]
279bc3d5698SJohn Baldwin#endif
280bc3d5698SJohn Baldwin	mov	r5,r12,ror#32-21		@ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
281bc3d5698SJohn Baldwin	ldr	r12,[sp,#444]			@ load counter
282bc3d5698SJohn Baldwin	eor	r2,r2,r10
283bc3d5698SJohn Baldwin	adr	r10,iotas32
284bc3d5698SJohn Baldwin	mov	r4,r14,ror#32-22
285bc3d5698SJohn Baldwin	add	r14,r10,r12
286bc3d5698SJohn Baldwin	eor	r3,r3,r11
287bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11}		@ iotas[i]
288bc3d5698SJohn Baldwin	bic	r12,r4,r2,ror#32-22
289bc3d5698SJohn Baldwin	bic	r14,r5,r3,ror#32-22
290bc3d5698SJohn Baldwin	mov	r2,r2,ror#32-22		@ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
291bc3d5698SJohn Baldwin	mov	r3,r3,ror#32-22
292bc3d5698SJohn Baldwin	eor	r12,r12,r0
293bc3d5698SJohn Baldwin	eor	r14,r14,r1
294bc3d5698SJohn Baldwin	eor	r10,r10,r12
295bc3d5698SJohn Baldwin	eor	r11,r11,r14
296bc3d5698SJohn Baldwin#ifndef	__thumb2__
297bc3d5698SJohn Baldwin	str	r10,[sp,#240]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
298bc3d5698SJohn Baldwin#endif
299bc3d5698SJohn Baldwin	bic	r12,r6,r4,ror#11
300bc3d5698SJohn Baldwin#ifndef	__thumb2__
301bc3d5698SJohn Baldwin	str	r11,[sp,#240+4]
302bc3d5698SJohn Baldwin#else
303bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#240]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
304bc3d5698SJohn Baldwin#endif
305bc3d5698SJohn Baldwin	bic	r14,r7,r5,ror#10
306bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#32-(11-7)
307bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#32-(10-7)
308bc3d5698SJohn Baldwin	eor	r12,r2,r12,ror#32-11
309bc3d5698SJohn Baldwin#ifndef	__thumb2__
310bc3d5698SJohn Baldwin	str	r12,[sp,#248]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
311bc3d5698SJohn Baldwin#endif
312bc3d5698SJohn Baldwin	eor	r14,r3,r14,ror#32-10
313bc3d5698SJohn Baldwin#ifndef	__thumb2__
314bc3d5698SJohn Baldwin	str	r14,[sp,#248+4]
315bc3d5698SJohn Baldwin#else
316bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#248]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
317bc3d5698SJohn Baldwin#endif
318bc3d5698SJohn Baldwin	eor	r10,r4,r10,ror#32-7
319bc3d5698SJohn Baldwin	eor	r11,r5,r11,ror#32-7
320bc3d5698SJohn Baldwin#ifndef	__thumb2__
321bc3d5698SJohn Baldwin	str	r10,[sp,#256]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
322bc3d5698SJohn Baldwin#endif
323bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#32-7
324bc3d5698SJohn Baldwin#ifndef	__thumb2__
325bc3d5698SJohn Baldwin	str	r11,[sp,#256+4]
326bc3d5698SJohn Baldwin#else
327bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#256]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
328bc3d5698SJohn Baldwin#endif
329bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#32-7
330bc3d5698SJohn Baldwin	eor	r12,r12,r6,ror#32-11
331bc3d5698SJohn Baldwin#ifndef	__thumb2__
332bc3d5698SJohn Baldwin	str	r12,[sp,#264]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
333bc3d5698SJohn Baldwin#endif
334bc3d5698SJohn Baldwin	eor	r14,r14,r7,ror#32-10
335bc3d5698SJohn Baldwin#ifndef	__thumb2__
336bc3d5698SJohn Baldwin	str	r14,[sp,#264+4]
337bc3d5698SJohn Baldwin#else
338bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#264]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
339bc3d5698SJohn Baldwin#endif
340bc3d5698SJohn Baldwin	bic	r10,r2,r0
341bc3d5698SJohn Baldwin	add	r14,sp,#224
342bc3d5698SJohn Baldwin#ifndef	__thumb2__
343bc3d5698SJohn Baldwin	ldr	r0,[sp,#24]		@ A[0][3]
344bc3d5698SJohn Baldwin#endif
345bc3d5698SJohn Baldwin	bic	r11,r3,r1
346bc3d5698SJohn Baldwin#ifndef	__thumb2__
347bc3d5698SJohn Baldwin	ldr	r1,[sp,#24+4]
348bc3d5698SJohn Baldwin#else
349bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#24]		@ A[0][3]
350bc3d5698SJohn Baldwin#endif
351bc3d5698SJohn Baldwin	eor	r10,r10,r8,ror#32-7
352bc3d5698SJohn Baldwin	eor	r11,r11,r9,ror#32-7
353bc3d5698SJohn Baldwin#ifndef	__thumb2__
354bc3d5698SJohn Baldwin	str	r10,[sp,#272]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
355bc3d5698SJohn Baldwin#endif
356bc3d5698SJohn Baldwin	add	r9,sp,#200
357bc3d5698SJohn Baldwin#ifndef	__thumb2__
358bc3d5698SJohn Baldwin	str	r11,[sp,#272+4]
359bc3d5698SJohn Baldwin#else
360bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#272]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
361bc3d5698SJohn Baldwin#endif
362bc3d5698SJohn Baldwin
363bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[3..4]
364bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[0..1]
365bc3d5698SJohn Baldwin
366bc3d5698SJohn Baldwin#ifndef	__thumb2__
367bc3d5698SJohn Baldwin	ldr	r2,[sp,#72]		@ A[1][4]
368bc3d5698SJohn Baldwin#endif
369bc3d5698SJohn Baldwin	eor	r0,r0,r10
370bc3d5698SJohn Baldwin#ifndef	__thumb2__
371bc3d5698SJohn Baldwin	ldr	r3,[sp,#72+4]
372bc3d5698SJohn Baldwin#else
373bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#72]		@ A[1][4]
374bc3d5698SJohn Baldwin#endif
375bc3d5698SJohn Baldwin	eor	r1,r1,r11
376bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-14		@ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
377bc3d5698SJohn Baldwin#ifndef	__thumb2__
378bc3d5698SJohn Baldwin	ldr	r10,[sp,#128]		@ A[3][1]
379bc3d5698SJohn Baldwin#endif
380bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-14
381bc3d5698SJohn Baldwin#ifndef	__thumb2__
382bc3d5698SJohn Baldwin	ldr	r11,[sp,#128+4]
383bc3d5698SJohn Baldwin#else
384bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#128]		@ A[3][1]
385bc3d5698SJohn Baldwin#endif
386bc3d5698SJohn Baldwin
387bc3d5698SJohn Baldwin	eor	r2,r2,r12
388bc3d5698SJohn Baldwin#ifndef	__thumb2__
389bc3d5698SJohn Baldwin	ldr	r4,[sp,#80]		@ A[2][0]
390bc3d5698SJohn Baldwin#endif
391bc3d5698SJohn Baldwin	eor	r3,r3,r14
392bc3d5698SJohn Baldwin#ifndef	__thumb2__
393bc3d5698SJohn Baldwin	ldr	r5,[sp,#80+4]
394bc3d5698SJohn Baldwin#else
395bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#80]		@ A[2][0]
396bc3d5698SJohn Baldwin#endif
397bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-10		@ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
398bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-10
399bc3d5698SJohn Baldwin
400bc3d5698SJohn Baldwin	eor	r6,r6,r4
401bc3d5698SJohn Baldwin#ifndef	__thumb2__
402bc3d5698SJohn Baldwin	ldr	r12,[sp,#216]		@ D[2]
403bc3d5698SJohn Baldwin#endif
404bc3d5698SJohn Baldwin	eor	r7,r7,r5
405bc3d5698SJohn Baldwin#ifndef	__thumb2__
406bc3d5698SJohn Baldwin	ldr	r14,[sp,#216+4]
407bc3d5698SJohn Baldwin#else
408bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#216]		@ D[2]
409bc3d5698SJohn Baldwin#endif
410bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-1		@ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
411bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-2
412bc3d5698SJohn Baldwin
413bc3d5698SJohn Baldwin	eor	r10,r10,r8
414bc3d5698SJohn Baldwin#ifndef	__thumb2__
415bc3d5698SJohn Baldwin	ldr	r8,[sp,#176]		@ A[4][2]
416bc3d5698SJohn Baldwin#endif
417bc3d5698SJohn Baldwin	eor	r11,r11,r9
418bc3d5698SJohn Baldwin#ifndef	__thumb2__
419bc3d5698SJohn Baldwin	ldr	r9,[sp,#176+4]
420bc3d5698SJohn Baldwin#else
421bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#176]		@ A[4][2]
422bc3d5698SJohn Baldwin#endif
423bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-22		@ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
424bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-23
425bc3d5698SJohn Baldwin
426bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-10
427bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-10
428bc3d5698SJohn Baldwin	eor	r12,r12,r8
429bc3d5698SJohn Baldwin	eor	r14,r14,r9
430bc3d5698SJohn Baldwin	mov	r9,r12,ror#32-30		@ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
431bc3d5698SJohn Baldwin	mov	r8,r14,ror#32-31
432bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
433bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-14
434bc3d5698SJohn Baldwin#ifndef	__thumb2__
435bc3d5698SJohn Baldwin	str	r10,[sp,#280]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
436bc3d5698SJohn Baldwin#endif
437bc3d5698SJohn Baldwin	bic	r12,r6,r4
438bc3d5698SJohn Baldwin#ifndef	__thumb2__
439bc3d5698SJohn Baldwin	str	r11,[sp,#280+4]
440bc3d5698SJohn Baldwin#else
441bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#280]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
442bc3d5698SJohn Baldwin#endif
443bc3d5698SJohn Baldwin	bic	r14,r7,r5
444bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-10
445bc3d5698SJohn Baldwin#ifndef	__thumb2__
446bc3d5698SJohn Baldwin	str	r12,[sp,#288]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
447bc3d5698SJohn Baldwin#endif
448bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-10
449bc3d5698SJohn Baldwin#ifndef	__thumb2__
450bc3d5698SJohn Baldwin	str	r14,[sp,#288+4]
451bc3d5698SJohn Baldwin#else
452bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#288]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
453bc3d5698SJohn Baldwin#endif
454bc3d5698SJohn Baldwin	bic	r10,r8,r6
455bc3d5698SJohn Baldwin	bic	r11,r9,r7
456bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
457bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#14
458bc3d5698SJohn Baldwin	eor	r10,r10,r4
459bc3d5698SJohn Baldwin	eor	r11,r11,r5
460bc3d5698SJohn Baldwin#ifndef	__thumb2__
461bc3d5698SJohn Baldwin	str	r10,[sp,#296]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
462bc3d5698SJohn Baldwin#endif
463bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#32-(14-10)
464bc3d5698SJohn Baldwin#ifndef	__thumb2__
465bc3d5698SJohn Baldwin	str	r11,[sp,#296+4]
466bc3d5698SJohn Baldwin#else
467bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#296]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
468bc3d5698SJohn Baldwin#endif
469bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
470bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-(14-10)
471bc3d5698SJohn Baldwin#ifndef	__thumb2__
472bc3d5698SJohn Baldwin	str	r12,[sp,#304]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
473bc3d5698SJohn Baldwin#endif
474bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-14
475bc3d5698SJohn Baldwin#ifndef	__thumb2__
476bc3d5698SJohn Baldwin	str	r14,[sp,#304+4]
477bc3d5698SJohn Baldwin#else
478bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#304]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
479bc3d5698SJohn Baldwin#endif
480bc3d5698SJohn Baldwin	add	r12,sp,#208
481bc3d5698SJohn Baldwin#ifndef	__thumb2__
482bc3d5698SJohn Baldwin	ldr	r1,[sp,#8]		@ A[0][1]
483bc3d5698SJohn Baldwin#endif
484bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-10
485bc3d5698SJohn Baldwin#ifndef	__thumb2__
486bc3d5698SJohn Baldwin	ldr	r0,[sp,#8+4]
487bc3d5698SJohn Baldwin#else
488bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#8]		@ A[0][1]
489bc3d5698SJohn Baldwin#endif
490bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-10
491bc3d5698SJohn Baldwin#ifndef	__thumb2__
492bc3d5698SJohn Baldwin	str	r10,[sp,#312]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
493bc3d5698SJohn Baldwin#endif
494bc3d5698SJohn Baldwin#ifndef	__thumb2__
495bc3d5698SJohn Baldwin	str	r11,[sp,#312+4]
496bc3d5698SJohn Baldwin#else
497bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#312]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
498bc3d5698SJohn Baldwin#endif
499bc3d5698SJohn Baldwin
500bc3d5698SJohn Baldwin	add	r9,sp,#224
501bc3d5698SJohn Baldwin	ldmia	r12,{r10,r11,r12,r14}	@ D[1..2]
502bc3d5698SJohn Baldwin#ifndef	__thumb2__
503bc3d5698SJohn Baldwin	ldr	r2,[sp,#56]		@ A[1][2]
504bc3d5698SJohn Baldwin#endif
505bc3d5698SJohn Baldwin#ifndef	__thumb2__
506bc3d5698SJohn Baldwin	ldr	r3,[sp,#56+4]
507bc3d5698SJohn Baldwin#else
508bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#56]		@ A[1][2]
509bc3d5698SJohn Baldwin#endif
510bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[3..4]
511bc3d5698SJohn Baldwin
512bc3d5698SJohn Baldwin	eor	r1,r1,r10
513bc3d5698SJohn Baldwin#ifndef	__thumb2__
514bc3d5698SJohn Baldwin	ldr	r4,[sp,#104]		@ A[2][3]
515bc3d5698SJohn Baldwin#endif
516bc3d5698SJohn Baldwin	eor	r0,r0,r11
517bc3d5698SJohn Baldwin#ifndef	__thumb2__
518bc3d5698SJohn Baldwin	ldr	r5,[sp,#104+4]
519bc3d5698SJohn Baldwin#else
520bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#104]		@ A[2][3]
521bc3d5698SJohn Baldwin#endif
522bc3d5698SJohn Baldwin	mov	r0,r0,ror#32-1		@ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
523bc3d5698SJohn Baldwin
524bc3d5698SJohn Baldwin	eor	r2,r2,r12
525bc3d5698SJohn Baldwin#ifndef	__thumb2__
526bc3d5698SJohn Baldwin	ldr	r10,[sp,#152]		@ A[3][4]
527bc3d5698SJohn Baldwin#endif
528bc3d5698SJohn Baldwin	eor	r3,r3,r14
529bc3d5698SJohn Baldwin#ifndef	__thumb2__
530bc3d5698SJohn Baldwin	ldr	r11,[sp,#152+4]
531bc3d5698SJohn Baldwin#else
532bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#152]		@ A[3][4]
533bc3d5698SJohn Baldwin#endif
534bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-3		@ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
535bc3d5698SJohn Baldwin#ifndef	__thumb2__
536bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
537bc3d5698SJohn Baldwin#endif
538bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-3
539bc3d5698SJohn Baldwin#ifndef	__thumb2__
540bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
541bc3d5698SJohn Baldwin#else
542bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
543bc3d5698SJohn Baldwin#endif
544bc3d5698SJohn Baldwin
545bc3d5698SJohn Baldwin	eor	r4,r4,r6
546bc3d5698SJohn Baldwin	eor	r5,r5,r7
547bc3d5698SJohn Baldwin	@ mov	r5,r6,ror#32-12		@ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
548bc3d5698SJohn Baldwin	@ mov	r4,r7,ror#32-13		@ [track reverse order below]
549bc3d5698SJohn Baldwin
550bc3d5698SJohn Baldwin	eor	r10,r10,r8
551bc3d5698SJohn Baldwin#ifndef	__thumb2__
552bc3d5698SJohn Baldwin	ldr	r8,[sp,#160]		@ A[4][0]
553bc3d5698SJohn Baldwin#endif
554bc3d5698SJohn Baldwin	eor	r11,r11,r9
555bc3d5698SJohn Baldwin#ifndef	__thumb2__
556bc3d5698SJohn Baldwin	ldr	r9,[sp,#160+4]
557bc3d5698SJohn Baldwin#else
558bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#160]		@ A[4][0]
559bc3d5698SJohn Baldwin#endif
560bc3d5698SJohn Baldwin	mov	r6,r10,ror#32-4		@ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
561bc3d5698SJohn Baldwin	mov	r7,r11,ror#32-4
562bc3d5698SJohn Baldwin
563bc3d5698SJohn Baldwin	eor	r12,r12,r8
564bc3d5698SJohn Baldwin	eor	r14,r14,r9
565bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-9		@ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
566bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-9
567bc3d5698SJohn Baldwin
568bc3d5698SJohn Baldwin	bic	r10,r5,r2,ror#13-3
569bc3d5698SJohn Baldwin	bic	r11,r4,r3,ror#12-3
570bc3d5698SJohn Baldwin	bic	r12,r6,r5,ror#32-13
571bc3d5698SJohn Baldwin	bic	r14,r7,r4,ror#32-12
572bc3d5698SJohn Baldwin	eor	r10,r0,r10,ror#32-13
573bc3d5698SJohn Baldwin	eor	r11,r1,r11,ror#32-12
574bc3d5698SJohn Baldwin#ifndef	__thumb2__
575bc3d5698SJohn Baldwin	str	r10,[sp,#320]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
576bc3d5698SJohn Baldwin#endif
577bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-3
578bc3d5698SJohn Baldwin#ifndef	__thumb2__
579bc3d5698SJohn Baldwin	str	r11,[sp,#320+4]
580bc3d5698SJohn Baldwin#else
581bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#320]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
582bc3d5698SJohn Baldwin#endif
583bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-3
584bc3d5698SJohn Baldwin#ifndef	__thumb2__
585bc3d5698SJohn Baldwin	str	r12,[sp,#328]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
586bc3d5698SJohn Baldwin#endif
587bc3d5698SJohn Baldwin	bic	r10,r8,r6
588bc3d5698SJohn Baldwin	bic	r11,r9,r7
589bc3d5698SJohn Baldwin#ifndef	__thumb2__
590bc3d5698SJohn Baldwin	str	r14,[sp,#328+4]
591bc3d5698SJohn Baldwin#else
592bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#328]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
593bc3d5698SJohn Baldwin#endif
594bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#32-13
595bc3d5698SJohn Baldwin	eor	r11,r11,r4,ror#32-12
596bc3d5698SJohn Baldwin#ifndef	__thumb2__
597bc3d5698SJohn Baldwin	str	r10,[sp,#336]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
598bc3d5698SJohn Baldwin#endif
599bc3d5698SJohn Baldwin	bic	r12,r0,r8
600bc3d5698SJohn Baldwin#ifndef	__thumb2__
601bc3d5698SJohn Baldwin	str	r11,[sp,#336+4]
602bc3d5698SJohn Baldwin#else
603bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#336]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
604bc3d5698SJohn Baldwin#endif
605bc3d5698SJohn Baldwin	bic	r14,r1,r9
606bc3d5698SJohn Baldwin	eor	r12,r12,r6
607bc3d5698SJohn Baldwin	eor	r14,r14,r7
608bc3d5698SJohn Baldwin#ifndef	__thumb2__
609bc3d5698SJohn Baldwin	str	r12,[sp,#344]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
610bc3d5698SJohn Baldwin#endif
611bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#3
612bc3d5698SJohn Baldwin#ifndef	__thumb2__
613bc3d5698SJohn Baldwin	str	r14,[sp,#344+4]
614bc3d5698SJohn Baldwin#else
615bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#344]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
616bc3d5698SJohn Baldwin#endif
617bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#3
618bc3d5698SJohn Baldwin#ifndef	__thumb2__
619bc3d5698SJohn Baldwin	ldr	r1,[sp,#32]		@ A[0][4] [in reverse order]
620bc3d5698SJohn Baldwin#endif
621bc3d5698SJohn Baldwin	eor	r10,r8,r10,ror#32-3
622bc3d5698SJohn Baldwin#ifndef	__thumb2__
623bc3d5698SJohn Baldwin	ldr	r0,[sp,#32+4]
624bc3d5698SJohn Baldwin#else
625bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#32]		@ A[0][4] [in reverse order]
626bc3d5698SJohn Baldwin#endif
627bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-3
628bc3d5698SJohn Baldwin#ifndef	__thumb2__
629bc3d5698SJohn Baldwin	str	r10,[sp,#352]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
630bc3d5698SJohn Baldwin#endif
631bc3d5698SJohn Baldwin	add	r9,sp,#208
632bc3d5698SJohn Baldwin#ifndef	__thumb2__
633bc3d5698SJohn Baldwin	str	r11,[sp,#352+4]
634bc3d5698SJohn Baldwin#else
635bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#352]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
636bc3d5698SJohn Baldwin#endif
637bc3d5698SJohn Baldwin
638bc3d5698SJohn Baldwin#ifndef	__thumb2__
639bc3d5698SJohn Baldwin	ldr	r10,[sp,#232]		@ D[4]
640bc3d5698SJohn Baldwin#endif
641bc3d5698SJohn Baldwin#ifndef	__thumb2__
642bc3d5698SJohn Baldwin	ldr	r11,[sp,#232+4]
643bc3d5698SJohn Baldwin#else
644bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#232]		@ D[4]
645bc3d5698SJohn Baldwin#endif
646bc3d5698SJohn Baldwin#ifndef	__thumb2__
647bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
648bc3d5698SJohn Baldwin#endif
649bc3d5698SJohn Baldwin#ifndef	__thumb2__
650bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
651bc3d5698SJohn Baldwin#else
652bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
653bc3d5698SJohn Baldwin#endif
654bc3d5698SJohn Baldwin
655bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[1..2]
656bc3d5698SJohn Baldwin
657bc3d5698SJohn Baldwin	eor	r1,r1,r10
658bc3d5698SJohn Baldwin#ifndef	__thumb2__
659bc3d5698SJohn Baldwin	ldr	r2,[sp,#40]		@ A[1][0]
660bc3d5698SJohn Baldwin#endif
661bc3d5698SJohn Baldwin	eor	r0,r0,r11
662bc3d5698SJohn Baldwin#ifndef	__thumb2__
663bc3d5698SJohn Baldwin	ldr	r3,[sp,#40+4]
664bc3d5698SJohn Baldwin#else
665bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#40]		@ A[1][0]
666bc3d5698SJohn Baldwin#endif
667bc3d5698SJohn Baldwin	@ mov	r1,r10,ror#32-13		@ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
668bc3d5698SJohn Baldwin#ifndef	__thumb2__
669bc3d5698SJohn Baldwin	ldr	r4,[sp,#88]		@ A[2][1]
670bc3d5698SJohn Baldwin#endif
671bc3d5698SJohn Baldwin	@ mov	r0,r11,ror#32-14		@ [was loaded in reverse order]
672bc3d5698SJohn Baldwin#ifndef	__thumb2__
673bc3d5698SJohn Baldwin	ldr	r5,[sp,#88+4]
674bc3d5698SJohn Baldwin#else
675bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#88]		@ A[2][1]
676bc3d5698SJohn Baldwin#endif
677bc3d5698SJohn Baldwin
678bc3d5698SJohn Baldwin	eor	r2,r2,r12
679bc3d5698SJohn Baldwin#ifndef	__thumb2__
680bc3d5698SJohn Baldwin	ldr	r10,[sp,#136]		@ A[3][2]
681bc3d5698SJohn Baldwin#endif
682bc3d5698SJohn Baldwin	eor	r3,r3,r14
683bc3d5698SJohn Baldwin#ifndef	__thumb2__
684bc3d5698SJohn Baldwin	ldr	r11,[sp,#136+4]
685bc3d5698SJohn Baldwin#else
686bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#136]		@ A[3][2]
687bc3d5698SJohn Baldwin#endif
688bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-18		@ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
689bc3d5698SJohn Baldwin#ifndef	__thumb2__
690bc3d5698SJohn Baldwin	ldr	r12,[sp,#224]		@ D[3]
691bc3d5698SJohn Baldwin#endif
692bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-18
693bc3d5698SJohn Baldwin#ifndef	__thumb2__
694bc3d5698SJohn Baldwin	ldr	r14,[sp,#224+4]
695bc3d5698SJohn Baldwin#else
696bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#224]		@ D[3]
697bc3d5698SJohn Baldwin#endif
698bc3d5698SJohn Baldwin
699bc3d5698SJohn Baldwin	eor	r6,r6,r4
700bc3d5698SJohn Baldwin	eor	r7,r7,r5
701bc3d5698SJohn Baldwin	mov	r4,r6,ror#32-5		@ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
702bc3d5698SJohn Baldwin	mov	r5,r7,ror#32-5
703bc3d5698SJohn Baldwin
704bc3d5698SJohn Baldwin	eor	r10,r10,r8
705bc3d5698SJohn Baldwin#ifndef	__thumb2__
706bc3d5698SJohn Baldwin	ldr	r8,[sp,#184]		@ A[4][3]
707bc3d5698SJohn Baldwin#endif
708bc3d5698SJohn Baldwin	eor	r11,r11,r9
709bc3d5698SJohn Baldwin#ifndef	__thumb2__
710bc3d5698SJohn Baldwin	ldr	r9,[sp,#184+4]
711bc3d5698SJohn Baldwin#else
712bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#184]		@ A[4][3]
713bc3d5698SJohn Baldwin#endif
714bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-7		@ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
715bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-8
716bc3d5698SJohn Baldwin
717bc3d5698SJohn Baldwin	eor	r12,r12,r8
718bc3d5698SJohn Baldwin	eor	r14,r14,r9
719bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-28		@ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
720bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-28
721bc3d5698SJohn Baldwin
722bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-18
723bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-18
724bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
725bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-13
726bc3d5698SJohn Baldwin#ifndef	__thumb2__
727bc3d5698SJohn Baldwin	str	r10,[sp,#360]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
728bc3d5698SJohn Baldwin#endif
729bc3d5698SJohn Baldwin	bic	r12,r6,r4
730bc3d5698SJohn Baldwin#ifndef	__thumb2__
731bc3d5698SJohn Baldwin	str	r11,[sp,#360+4]
732bc3d5698SJohn Baldwin#else
733bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#360]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
734bc3d5698SJohn Baldwin#endif
735bc3d5698SJohn Baldwin	bic	r14,r7,r5
736bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-18
737bc3d5698SJohn Baldwin#ifndef	__thumb2__
738bc3d5698SJohn Baldwin	str	r12,[sp,#368]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
739bc3d5698SJohn Baldwin#endif
740bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-18
741bc3d5698SJohn Baldwin#ifndef	__thumb2__
742bc3d5698SJohn Baldwin	str	r14,[sp,#368+4]
743bc3d5698SJohn Baldwin#else
744bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#368]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
745bc3d5698SJohn Baldwin#endif
746bc3d5698SJohn Baldwin	bic	r10,r8,r6
747bc3d5698SJohn Baldwin	bic	r11,r9,r7
748bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
749bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#13
750bc3d5698SJohn Baldwin	eor	r10,r10,r4
751bc3d5698SJohn Baldwin	eor	r11,r11,r5
752bc3d5698SJohn Baldwin#ifndef	__thumb2__
753bc3d5698SJohn Baldwin	str	r10,[sp,#376]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
754bc3d5698SJohn Baldwin#endif
755bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#18-14
756bc3d5698SJohn Baldwin#ifndef	__thumb2__
757bc3d5698SJohn Baldwin	str	r11,[sp,#376+4]
758bc3d5698SJohn Baldwin#else
759bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#376]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
760bc3d5698SJohn Baldwin#endif
761bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
762bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#18-13
763bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-13
764bc3d5698SJohn Baldwin#ifndef	__thumb2__
765bc3d5698SJohn Baldwin	str	r12,[sp,#384]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
766bc3d5698SJohn Baldwin#endif
767bc3d5698SJohn Baldwin#ifndef	__thumb2__
768bc3d5698SJohn Baldwin	str	r14,[sp,#384+4]
769bc3d5698SJohn Baldwin#else
770bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#384]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
771bc3d5698SJohn Baldwin#endif
772bc3d5698SJohn Baldwin	add	r14,sp,#216
773bc3d5698SJohn Baldwin#ifndef	__thumb2__
774bc3d5698SJohn Baldwin	ldr	r0,[sp,#16]		@ A[0][2]
775bc3d5698SJohn Baldwin#endif
776bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-18
777bc3d5698SJohn Baldwin#ifndef	__thumb2__
778bc3d5698SJohn Baldwin	ldr	r1,[sp,#16+4]
779bc3d5698SJohn Baldwin#else
780bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#16]		@ A[0][2]
781bc3d5698SJohn Baldwin#endif
782bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-18
783bc3d5698SJohn Baldwin#ifndef	__thumb2__
784bc3d5698SJohn Baldwin	str	r10,[sp,#392]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
785bc3d5698SJohn Baldwin#endif
786bc3d5698SJohn Baldwin#ifndef	__thumb2__
787bc3d5698SJohn Baldwin	str	r11,[sp,#392+4]
788bc3d5698SJohn Baldwin#else
789bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#392]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
790bc3d5698SJohn Baldwin#endif
791bc3d5698SJohn Baldwin
792bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[2..3]
793bc3d5698SJohn Baldwin#ifndef	__thumb2__
794bc3d5698SJohn Baldwin	ldr	r2,[sp,#64]		@ A[1][3]
795bc3d5698SJohn Baldwin#endif
796bc3d5698SJohn Baldwin#ifndef	__thumb2__
797bc3d5698SJohn Baldwin	ldr	r3,[sp,#64+4]
798bc3d5698SJohn Baldwin#else
799bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#64]		@ A[1][3]
800bc3d5698SJohn Baldwin#endif
801bc3d5698SJohn Baldwin#ifndef	__thumb2__
802bc3d5698SJohn Baldwin	ldr	r6,[sp,#232]		@ D[4]
803bc3d5698SJohn Baldwin#endif
804bc3d5698SJohn Baldwin#ifndef	__thumb2__
805bc3d5698SJohn Baldwin	ldr	r7,[sp,#232+4]
806bc3d5698SJohn Baldwin#else
807bc3d5698SJohn Baldwin	ldrd	r6,r7,[sp,#232]		@ D[4]
808bc3d5698SJohn Baldwin#endif
809bc3d5698SJohn Baldwin
810bc3d5698SJohn Baldwin	eor	r0,r0,r10
811bc3d5698SJohn Baldwin#ifndef	__thumb2__
812bc3d5698SJohn Baldwin	ldr	r4,[sp,#112]		@ A[2][4]
813bc3d5698SJohn Baldwin#endif
814bc3d5698SJohn Baldwin	eor	r1,r1,r11
815bc3d5698SJohn Baldwin#ifndef	__thumb2__
816bc3d5698SJohn Baldwin	ldr	r5,[sp,#112+4]
817bc3d5698SJohn Baldwin#else
818bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#112]		@ A[2][4]
819bc3d5698SJohn Baldwin#endif
820bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-31		@ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
821bc3d5698SJohn Baldwin#ifndef	__thumb2__
822bc3d5698SJohn Baldwin	ldr	r8,[sp,#200]		@ D[0]
823bc3d5698SJohn Baldwin#endif
824bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-31
825bc3d5698SJohn Baldwin#ifndef	__thumb2__
826bc3d5698SJohn Baldwin	ldr	r9,[sp,#200+4]
827bc3d5698SJohn Baldwin#else
828bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#200]		@ D[0]
829bc3d5698SJohn Baldwin#endif
830bc3d5698SJohn Baldwin
831bc3d5698SJohn Baldwin	eor	r12,r12,r2
832bc3d5698SJohn Baldwin#ifndef	__thumb2__
833bc3d5698SJohn Baldwin	ldr	r10,[sp,#120]		@ A[3][0]
834bc3d5698SJohn Baldwin#endif
835bc3d5698SJohn Baldwin	eor	r14,r14,r3
836bc3d5698SJohn Baldwin#ifndef	__thumb2__
837bc3d5698SJohn Baldwin	ldr	r11,[sp,#120+4]
838bc3d5698SJohn Baldwin#else
839bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#120]		@ A[3][0]
840bc3d5698SJohn Baldwin#endif
841bc3d5698SJohn Baldwin	mov	r3,r12,ror#32-27		@ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
842bc3d5698SJohn Baldwin#ifndef	__thumb2__
843bc3d5698SJohn Baldwin	ldr	r12,[sp,#208]		@ D[1]
844bc3d5698SJohn Baldwin#endif
845bc3d5698SJohn Baldwin	mov	r2,r14,ror#32-28
846bc3d5698SJohn Baldwin#ifndef	__thumb2__
847bc3d5698SJohn Baldwin	ldr	r14,[sp,#208+4]
848bc3d5698SJohn Baldwin#else
849bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#208]		@ D[1]
850bc3d5698SJohn Baldwin#endif
851bc3d5698SJohn Baldwin
852bc3d5698SJohn Baldwin	eor	r6,r6,r4
853bc3d5698SJohn Baldwin	eor	r7,r7,r5
854bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-19		@ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
855bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-20
856bc3d5698SJohn Baldwin
857bc3d5698SJohn Baldwin	eor	r10,r10,r8
858bc3d5698SJohn Baldwin#ifndef	__thumb2__
859bc3d5698SJohn Baldwin	ldr	r8,[sp,#168]		@ A[4][1]
860bc3d5698SJohn Baldwin#endif
861bc3d5698SJohn Baldwin	eor	r11,r11,r9
862bc3d5698SJohn Baldwin#ifndef	__thumb2__
863bc3d5698SJohn Baldwin	ldr	r9,[sp,#168+4]
864bc3d5698SJohn Baldwin#else
865bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#168]		@ A[4][1]
866bc3d5698SJohn Baldwin#endif
867bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-20		@ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
868bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-21
869bc3d5698SJohn Baldwin
870bc3d5698SJohn Baldwin	eor	r8,r8,r12
871bc3d5698SJohn Baldwin	eor	r9,r9,r14
872bc3d5698SJohn Baldwin	@ mov	r8,r2,ror#32-1		@ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
873bc3d5698SJohn Baldwin	@ mov	r9,r3,ror#32-1
874bc3d5698SJohn Baldwin
875bc3d5698SJohn Baldwin	bic	r10,r4,r2
876bc3d5698SJohn Baldwin	bic	r11,r5,r3
877bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-31
878bc3d5698SJohn Baldwin#ifndef	__thumb2__
879bc3d5698SJohn Baldwin	str	r10,[sp,#400]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
880bc3d5698SJohn Baldwin#endif
881bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-31
882bc3d5698SJohn Baldwin#ifndef	__thumb2__
883bc3d5698SJohn Baldwin	str	r11,[sp,#400+4]
884bc3d5698SJohn Baldwin#else
885bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#400]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
886bc3d5698SJohn Baldwin#endif
887bc3d5698SJohn Baldwin	bic	r12,r6,r4
888bc3d5698SJohn Baldwin	bic	r14,r7,r5
889bc3d5698SJohn Baldwin	eor	r12,r12,r2
890bc3d5698SJohn Baldwin	eor	r14,r14,r3
891bc3d5698SJohn Baldwin#ifndef	__thumb2__
892bc3d5698SJohn Baldwin	str	r12,[sp,#408]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
893bc3d5698SJohn Baldwin#endif
894bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#1
895bc3d5698SJohn Baldwin#ifndef	__thumb2__
896bc3d5698SJohn Baldwin	str	r14,[sp,#408+4]
897bc3d5698SJohn Baldwin#else
898bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#408]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
899bc3d5698SJohn Baldwin#endif
900bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#1
901bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#31-1
902bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#31-1
903bc3d5698SJohn Baldwin	eor	r4,r4,r10,ror#32-1
904bc3d5698SJohn Baldwin#ifndef	__thumb2__
905bc3d5698SJohn Baldwin	str	r4,[sp,#416]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
906bc3d5698SJohn Baldwin#endif
907bc3d5698SJohn Baldwin	eor	r5,r5,r11,ror#32-1
908bc3d5698SJohn Baldwin#ifndef	__thumb2__
909bc3d5698SJohn Baldwin	str	r5,[sp,#416+4]
910bc3d5698SJohn Baldwin#else
911bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#416]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
912bc3d5698SJohn Baldwin#endif
913bc3d5698SJohn Baldwin	eor	r6,r6,r12,ror#32-31
914bc3d5698SJohn Baldwin	eor	r7,r7,r14,ror#32-31
915bc3d5698SJohn Baldwin#ifndef	__thumb2__
916bc3d5698SJohn Baldwin	str	r6,[sp,#424]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
917bc3d5698SJohn Baldwin#endif
918bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#32-31
919bc3d5698SJohn Baldwin#ifndef	__thumb2__
920bc3d5698SJohn Baldwin	str	r7,[sp,#424+4]
921bc3d5698SJohn Baldwin#else
922bc3d5698SJohn Baldwin	strd	r6,r7,[sp,#424]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
923bc3d5698SJohn Baldwin#endif
924bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-31
925bc3d5698SJohn Baldwin	add	r12,sp,#240
926bc3d5698SJohn Baldwin	eor	r8,r10,r8,ror#32-1
927bc3d5698SJohn Baldwin	add	r10,sp,#280
928bc3d5698SJohn Baldwin	eor	r9,r11,r9,ror#32-1
929bc3d5698SJohn Baldwin#ifndef	__thumb2__
930bc3d5698SJohn Baldwin	str	r8,[sp,#432]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
931bc3d5698SJohn Baldwin#endif
932bc3d5698SJohn Baldwin#ifndef	__thumb2__
933bc3d5698SJohn Baldwin	str	r9,[sp,#432+4]
934bc3d5698SJohn Baldwin#else
935bc3d5698SJohn Baldwin	strd	r8,r9,[sp,#432]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
936bc3d5698SJohn Baldwin#endif
937bc3d5698SJohn Baldwin	ldmia	r12,{r0,r1,r2,r3}		@ A[0][0..1]
938bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][0..1]
939bc3d5698SJohn Baldwin#ifdef	__thumb2__
940bc3d5698SJohn Baldwin	eor	r0,r0,r10
941bc3d5698SJohn Baldwin	eor	r1,r1,r11
942bc3d5698SJohn Baldwin	eor	r2,r2,r12
943bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#296]
944bc3d5698SJohn Baldwin	eor	r3,r3,r14
945bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#304]
946bc3d5698SJohn Baldwin	eor	r4,r4,r10
947bc3d5698SJohn Baldwin	eor	r5,r5,r11
948bc3d5698SJohn Baldwin	eor	r6,r6,r12
949bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#312]
950bc3d5698SJohn Baldwin	eor	r7,r7,r14
951bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#320]
952bc3d5698SJohn Baldwin	eor	r8,r8,r10
953bc3d5698SJohn Baldwin	eor	r9,r9,r11
954bc3d5698SJohn Baldwin	eor	r0,r0,r12
955bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#328]
956bc3d5698SJohn Baldwin	eor	r1,r1,r14
957bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#336]
958bc3d5698SJohn Baldwin	eor	r2,r2,r10
959bc3d5698SJohn Baldwin	eor	r3,r3,r11
960bc3d5698SJohn Baldwin	eor	r4,r4,r12
961bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#344]
962bc3d5698SJohn Baldwin	eor	r5,r5,r14
963bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#352]
964bc3d5698SJohn Baldwin	eor	r6,r6,r10
965bc3d5698SJohn Baldwin	eor	r7,r7,r11
966bc3d5698SJohn Baldwin	eor	r8,r8,r12
967bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#360]
968bc3d5698SJohn Baldwin	eor	r9,r9,r14
969bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#368]
970bc3d5698SJohn Baldwin	eor	r0,r0,r10
971bc3d5698SJohn Baldwin	eor	r1,r1,r11
972bc3d5698SJohn Baldwin	eor	r2,r2,r12
973bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#376]
974bc3d5698SJohn Baldwin	eor	r3,r3,r14
975bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#384]
976bc3d5698SJohn Baldwin	eor	r4,r4,r10
977bc3d5698SJohn Baldwin	eor	r5,r5,r11
978bc3d5698SJohn Baldwin	eor	r6,r6,r12
979bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#392]
980bc3d5698SJohn Baldwin	eor	r7,r7,r14
981bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#400]
982bc3d5698SJohn Baldwin	eor	r8,r8,r10
983bc3d5698SJohn Baldwin	eor	r9,r9,r11
984bc3d5698SJohn Baldwin	eor	r0,r0,r12
985bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#408]
986bc3d5698SJohn Baldwin	eor	r1,r1,r14
987bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#256]
988bc3d5698SJohn Baldwin	eor	r2,r2,r10
989bc3d5698SJohn Baldwin	eor	r3,r3,r11
990bc3d5698SJohn Baldwin	eor	r4,r4,r12
991bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#264]
992bc3d5698SJohn Baldwin	eor	r5,r5,r14
993bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#272]
994bc3d5698SJohn Baldwin#else
995bc3d5698SJohn Baldwin	eor	r0,r0,r10
996bc3d5698SJohn Baldwin	add	r10,sp,#296
997bc3d5698SJohn Baldwin	eor	r1,r1,r11
998bc3d5698SJohn Baldwin	eor	r2,r2,r12
999bc3d5698SJohn Baldwin	eor	r3,r3,r14
1000bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][2..3]
1001bc3d5698SJohn Baldwin	eor	r4,r4,r10
1002bc3d5698SJohn Baldwin	add	r10,sp,#312
1003bc3d5698SJohn Baldwin	eor	r5,r5,r11
1004bc3d5698SJohn Baldwin	eor	r6,r6,r12
1005bc3d5698SJohn Baldwin	eor	r7,r7,r14
1006bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[1][4]..A[2][0]
1007bc3d5698SJohn Baldwin	eor	r8,r8,r10
1008bc3d5698SJohn Baldwin	add	r10,sp,#328
1009bc3d5698SJohn Baldwin	eor	r9,r9,r11
1010bc3d5698SJohn Baldwin	eor	r0,r0,r12
1011bc3d5698SJohn Baldwin	eor	r1,r1,r14
1012bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][1..2]
1013bc3d5698SJohn Baldwin	eor	r2,r2,r10
1014bc3d5698SJohn Baldwin	add	r10,sp,#344
1015bc3d5698SJohn Baldwin	eor	r3,r3,r11
1016bc3d5698SJohn Baldwin	eor	r4,r4,r12
1017bc3d5698SJohn Baldwin	eor	r5,r5,r14
1018bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[2][3..4]
1019bc3d5698SJohn Baldwin	eor	r6,r6,r10
1020bc3d5698SJohn Baldwin	add	r10,sp,#360
1021bc3d5698SJohn Baldwin	eor	r7,r7,r11
1022bc3d5698SJohn Baldwin	eor	r8,r8,r12
1023bc3d5698SJohn Baldwin	eor	r9,r9,r14
1024bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][0..1]
1025bc3d5698SJohn Baldwin	eor	r0,r0,r10
1026bc3d5698SJohn Baldwin	add	r10,sp,#376
1027bc3d5698SJohn Baldwin	eor	r1,r1,r11
1028bc3d5698SJohn Baldwin	eor	r2,r2,r12
1029bc3d5698SJohn Baldwin	eor	r3,r3,r14
1030bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][2..3]
1031bc3d5698SJohn Baldwin	eor	r4,r4,r10
1032bc3d5698SJohn Baldwin	add	r10,sp,#392
1033bc3d5698SJohn Baldwin	eor	r5,r5,r11
1034bc3d5698SJohn Baldwin	eor	r6,r6,r12
1035bc3d5698SJohn Baldwin	eor	r7,r7,r14
1036bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[3][4]..A[4][0]
1037bc3d5698SJohn Baldwin	eor	r8,r8,r10
1038bc3d5698SJohn Baldwin	ldr	r10,[sp,#408]		@ A[4][1]
1039bc3d5698SJohn Baldwin	eor	r9,r9,r11
1040bc3d5698SJohn Baldwin	ldr	r11,[sp,#408+4]
1041bc3d5698SJohn Baldwin	eor	r0,r0,r12
1042bc3d5698SJohn Baldwin	ldr	r12,[sp,#256]		@ A[0][2]
1043bc3d5698SJohn Baldwin	eor	r1,r1,r14
1044bc3d5698SJohn Baldwin	ldr	r14,[sp,#256+4]
1045bc3d5698SJohn Baldwin	eor	r2,r2,r10
1046bc3d5698SJohn Baldwin	add	r10,sp,#264
1047bc3d5698SJohn Baldwin	eor	r3,r3,r11
1048bc3d5698SJohn Baldwin	eor	r4,r4,r12
1049bc3d5698SJohn Baldwin	eor	r5,r5,r14
1050bc3d5698SJohn Baldwin	ldmia	r10,{r10,r11,r12,r14}	@ A[0][3..4]
1051bc3d5698SJohn Baldwin#endif
1052bc3d5698SJohn Baldwin	eor	r6,r6,r10
1053bc3d5698SJohn Baldwin	eor	r7,r7,r11
1054bc3d5698SJohn Baldwin	eor	r8,r8,r12
1055bc3d5698SJohn Baldwin	eor	r9,r9,r14
1056bc3d5698SJohn Baldwin
1057bc3d5698SJohn Baldwin	eor	r10,r0,r5,ror#32-1	@ E[0] = ROL64(C[2], 1) ^ C[0];
1058bc3d5698SJohn Baldwin#ifndef	__thumb2__
1059bc3d5698SJohn Baldwin	str	r10,[sp,#208]		@ D[1] = E[0]
1060bc3d5698SJohn Baldwin#endif
1061bc3d5698SJohn Baldwin	eor	r11,r1,r4
1062bc3d5698SJohn Baldwin#ifndef	__thumb2__
1063bc3d5698SJohn Baldwin	str	r11,[sp,#208+4]
1064bc3d5698SJohn Baldwin#else
1065bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#208]		@ D[1] = E[0]
1066bc3d5698SJohn Baldwin#endif
1067bc3d5698SJohn Baldwin	eor	r12,r6,r1,ror#32-1	@ E[1] = ROL64(C[0], 1) ^ C[3];
1068bc3d5698SJohn Baldwin	eor	r14,r7,r0
1069bc3d5698SJohn Baldwin#ifndef	__thumb2__
1070bc3d5698SJohn Baldwin	str	r12,[sp,#232]		@ D[4] = E[1]
1071bc3d5698SJohn Baldwin#endif
1072bc3d5698SJohn Baldwin	eor	r0,r8,r3,ror#32-1	@ C[0] = ROL64(C[1], 1) ^ C[4];
1073bc3d5698SJohn Baldwin#ifndef	__thumb2__
1074bc3d5698SJohn Baldwin	str	r14,[sp,#232+4]
1075bc3d5698SJohn Baldwin#else
1076bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#232]		@ D[4] = E[1]
1077bc3d5698SJohn Baldwin#endif
1078bc3d5698SJohn Baldwin	eor	r1,r9,r2
1079bc3d5698SJohn Baldwin#ifndef	__thumb2__
1080bc3d5698SJohn Baldwin	str	r0,[sp,#200]		@ D[0] = C[0]
1081bc3d5698SJohn Baldwin#endif
1082bc3d5698SJohn Baldwin	eor	r2,r2,r7,ror#32-1	@ C[1] = ROL64(C[3], 1) ^ C[1];
1083bc3d5698SJohn Baldwin#ifndef	__thumb2__
1084bc3d5698SJohn Baldwin	ldr	r7,[sp,#384]
1085bc3d5698SJohn Baldwin#endif
1086bc3d5698SJohn Baldwin	eor	r3,r3,r6
1087bc3d5698SJohn Baldwin#ifndef	__thumb2__
1088bc3d5698SJohn Baldwin	str	r1,[sp,#200+4]
1089bc3d5698SJohn Baldwin#else
1090bc3d5698SJohn Baldwin	strd	r0,r1,[sp,#200]		@ D[0] = C[0]
1091bc3d5698SJohn Baldwin#endif
1092bc3d5698SJohn Baldwin#ifndef	__thumb2__
1093bc3d5698SJohn Baldwin	ldr	r6,[sp,#384+4]
1094bc3d5698SJohn Baldwin#else
1095bc3d5698SJohn Baldwin	ldrd	r7,r6,[sp,#384]
1096bc3d5698SJohn Baldwin#endif
1097bc3d5698SJohn Baldwin#ifndef	__thumb2__
1098bc3d5698SJohn Baldwin	str	r2,[sp,#216]		@ D[2] = C[1]
1099bc3d5698SJohn Baldwin#endif
1100bc3d5698SJohn Baldwin	eor	r4,r4,r9,ror#32-1	@ C[2] = ROL64(C[4], 1) ^ C[2];
1101bc3d5698SJohn Baldwin#ifndef	__thumb2__
1102bc3d5698SJohn Baldwin	str	r3,[sp,#216+4]
1103bc3d5698SJohn Baldwin#else
1104bc3d5698SJohn Baldwin	strd	r2,r3,[sp,#216]		@ D[2] = C[1]
1105bc3d5698SJohn Baldwin#endif
1106bc3d5698SJohn Baldwin	eor	r5,r5,r8
1107bc3d5698SJohn Baldwin
1108bc3d5698SJohn Baldwin#ifndef	__thumb2__
1109bc3d5698SJohn Baldwin	ldr	r8,[sp,#432]
1110bc3d5698SJohn Baldwin#endif
1111bc3d5698SJohn Baldwin#ifndef	__thumb2__
1112bc3d5698SJohn Baldwin	ldr	r9,[sp,#432+4]
1113bc3d5698SJohn Baldwin#else
1114bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#432]
1115bc3d5698SJohn Baldwin#endif
1116bc3d5698SJohn Baldwin#ifndef	__thumb2__
1117bc3d5698SJohn Baldwin	str	r4,[sp,#224]		@ D[3] = C[2]
1118bc3d5698SJohn Baldwin#endif
1119bc3d5698SJohn Baldwin	eor	r7,r7,r4
1120bc3d5698SJohn Baldwin#ifndef	__thumb2__
1121bc3d5698SJohn Baldwin	str	r5,[sp,#224+4]
1122bc3d5698SJohn Baldwin#else
1123bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#224]		@ D[3] = C[2]
1124bc3d5698SJohn Baldwin#endif
1125bc3d5698SJohn Baldwin	eor	r6,r6,r5
1126bc3d5698SJohn Baldwin#ifndef	__thumb2__
1127bc3d5698SJohn Baldwin	ldr	r4,[sp,#240]
1128bc3d5698SJohn Baldwin#endif
1129bc3d5698SJohn Baldwin	@ mov	r7,r7,ror#32-10		@ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]);   /* D[3] */
1130bc3d5698SJohn Baldwin	@ mov	r6,r6,ror#32-11
1131bc3d5698SJohn Baldwin#ifndef	__thumb2__
1132bc3d5698SJohn Baldwin	ldr	r5,[sp,#240+4]
1133bc3d5698SJohn Baldwin#else
1134bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#240]
1135bc3d5698SJohn Baldwin#endif
1136bc3d5698SJohn Baldwin	eor	r8,r8,r12
1137bc3d5698SJohn Baldwin	eor	r9,r9,r14
1138bc3d5698SJohn Baldwin#ifndef	__thumb2__
1139bc3d5698SJohn Baldwin	ldr	r12,[sp,#336]
1140bc3d5698SJohn Baldwin#endif
1141bc3d5698SJohn Baldwin	eor	r0,r0,r4
1142bc3d5698SJohn Baldwin#ifndef	__thumb2__
1143bc3d5698SJohn Baldwin	ldr	r14,[sp,#336+4]
1144bc3d5698SJohn Baldwin#else
1145bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#336]
1146bc3d5698SJohn Baldwin#endif
1147bc3d5698SJohn Baldwin	@ mov	r8,r8,ror#32-7		@ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]);   /* D[4] */
1148bc3d5698SJohn Baldwin	@ mov	r9,r9,ror#32-7
1149bc3d5698SJohn Baldwin	eor	r1,r1,r5		@ C[0] =       A[0][0] ^ C[0];
1150bc3d5698SJohn Baldwin	eor	r12,r12,r2
1151bc3d5698SJohn Baldwin#ifndef	__thumb2__
1152bc3d5698SJohn Baldwin	ldr	r2,[sp,#288]
1153bc3d5698SJohn Baldwin#endif
1154bc3d5698SJohn Baldwin	eor	r14,r14,r3
1155bc3d5698SJohn Baldwin#ifndef	__thumb2__
1156bc3d5698SJohn Baldwin	ldr	r3,[sp,#288+4]
1157bc3d5698SJohn Baldwin#else
1158bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#288]
1159bc3d5698SJohn Baldwin#endif
1160bc3d5698SJohn Baldwin	mov	r5,r12,ror#32-21		@ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1161bc3d5698SJohn Baldwin	ldr	r12,[sp,#444]			@ load counter
1162bc3d5698SJohn Baldwin	eor	r2,r2,r10
1163bc3d5698SJohn Baldwin	adr	r10,iotas32
1164bc3d5698SJohn Baldwin	mov	r4,r14,ror#32-22
1165bc3d5698SJohn Baldwin	add	r14,r10,r12
1166bc3d5698SJohn Baldwin	eor	r3,r3,r11
1167bc3d5698SJohn Baldwin#ifndef	__thumb2__
1168bc3d5698SJohn Baldwin	ldr	r10,[r14,#8]		@ iotas[i].lo
1169bc3d5698SJohn Baldwin#endif
1170bc3d5698SJohn Baldwin	add	r12,r12,#16
1171bc3d5698SJohn Baldwin#ifndef	__thumb2__
1172bc3d5698SJohn Baldwin	ldr	r11,[r14,#12]		@ iotas[i].hi
1173bc3d5698SJohn Baldwin#else
1174bc3d5698SJohn Baldwin	ldrd	r10,r11,[r14,#8]		@ iotas[i].lo
1175bc3d5698SJohn Baldwin#endif
1176bc3d5698SJohn Baldwin	cmp	r12,#192
1177bc3d5698SJohn Baldwin	str	r12,[sp,#444]			@ store counter
1178bc3d5698SJohn Baldwin	bic	r12,r4,r2,ror#32-22
1179bc3d5698SJohn Baldwin	bic	r14,r5,r3,ror#32-22
1180bc3d5698SJohn Baldwin	mov	r2,r2,ror#32-22		@ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1181bc3d5698SJohn Baldwin	mov	r3,r3,ror#32-22
1182bc3d5698SJohn Baldwin	eor	r12,r12,r0
1183bc3d5698SJohn Baldwin	eor	r14,r14,r1
1184bc3d5698SJohn Baldwin	eor	r10,r10,r12
1185bc3d5698SJohn Baldwin	eor	r11,r11,r14
1186bc3d5698SJohn Baldwin#ifndef	__thumb2__
1187bc3d5698SJohn Baldwin	str	r10,[sp,#0]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1188bc3d5698SJohn Baldwin#endif
1189bc3d5698SJohn Baldwin	bic	r12,r6,r4,ror#11
1190bc3d5698SJohn Baldwin#ifndef	__thumb2__
1191bc3d5698SJohn Baldwin	str	r11,[sp,#0+4]
1192bc3d5698SJohn Baldwin#else
1193bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#0]		@ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1194bc3d5698SJohn Baldwin#endif
1195bc3d5698SJohn Baldwin	bic	r14,r7,r5,ror#10
1196bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#32-(11-7)
1197bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#32-(10-7)
1198bc3d5698SJohn Baldwin	eor	r12,r2,r12,ror#32-11
1199bc3d5698SJohn Baldwin#ifndef	__thumb2__
1200bc3d5698SJohn Baldwin	str	r12,[sp,#8]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
1201bc3d5698SJohn Baldwin#endif
1202bc3d5698SJohn Baldwin	eor	r14,r3,r14,ror#32-10
1203bc3d5698SJohn Baldwin#ifndef	__thumb2__
1204bc3d5698SJohn Baldwin	str	r14,[sp,#8+4]
1205bc3d5698SJohn Baldwin#else
1206bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#8]		@ R[0][1] = C[1] ^ (~C[2] & C[3]);
1207bc3d5698SJohn Baldwin#endif
1208bc3d5698SJohn Baldwin	eor	r10,r4,r10,ror#32-7
1209bc3d5698SJohn Baldwin	eor	r11,r5,r11,ror#32-7
1210bc3d5698SJohn Baldwin#ifndef	__thumb2__
1211bc3d5698SJohn Baldwin	str	r10,[sp,#16]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
1212bc3d5698SJohn Baldwin#endif
1213bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#32-7
1214bc3d5698SJohn Baldwin#ifndef	__thumb2__
1215bc3d5698SJohn Baldwin	str	r11,[sp,#16+4]
1216bc3d5698SJohn Baldwin#else
1217bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#16]		@ R[0][2] = C[2] ^ (~C[3] & C[4]);
1218bc3d5698SJohn Baldwin#endif
1219bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#32-7
1220bc3d5698SJohn Baldwin	eor	r12,r12,r6,ror#32-11
1221bc3d5698SJohn Baldwin#ifndef	__thumb2__
1222bc3d5698SJohn Baldwin	str	r12,[sp,#24]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
1223bc3d5698SJohn Baldwin#endif
1224bc3d5698SJohn Baldwin	eor	r14,r14,r7,ror#32-10
1225bc3d5698SJohn Baldwin#ifndef	__thumb2__
1226bc3d5698SJohn Baldwin	str	r14,[sp,#24+4]
1227bc3d5698SJohn Baldwin#else
1228bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#24]		@ R[0][3] = C[3] ^ (~C[4] & C[0]);
1229bc3d5698SJohn Baldwin#endif
1230bc3d5698SJohn Baldwin	bic	r10,r2,r0
1231bc3d5698SJohn Baldwin	add	r14,sp,#224
1232bc3d5698SJohn Baldwin#ifndef	__thumb2__
1233bc3d5698SJohn Baldwin	ldr	r0,[sp,#264]		@ A[0][3]
1234bc3d5698SJohn Baldwin#endif
1235bc3d5698SJohn Baldwin	bic	r11,r3,r1
1236bc3d5698SJohn Baldwin#ifndef	__thumb2__
1237bc3d5698SJohn Baldwin	ldr	r1,[sp,#264+4]
1238bc3d5698SJohn Baldwin#else
1239bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#264]		@ A[0][3]
1240bc3d5698SJohn Baldwin#endif
1241bc3d5698SJohn Baldwin	eor	r10,r10,r8,ror#32-7
1242bc3d5698SJohn Baldwin	eor	r11,r11,r9,ror#32-7
1243bc3d5698SJohn Baldwin#ifndef	__thumb2__
1244bc3d5698SJohn Baldwin	str	r10,[sp,#32]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
1245bc3d5698SJohn Baldwin#endif
1246bc3d5698SJohn Baldwin	add	r9,sp,#200
1247bc3d5698SJohn Baldwin#ifndef	__thumb2__
1248bc3d5698SJohn Baldwin	str	r11,[sp,#32+4]
1249bc3d5698SJohn Baldwin#else
1250bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#32]		@ R[0][4] = C[4] ^ (~C[0] & C[1]);
1251bc3d5698SJohn Baldwin#endif
1252bc3d5698SJohn Baldwin
1253bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[3..4]
1254bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[0..1]
1255bc3d5698SJohn Baldwin
1256bc3d5698SJohn Baldwin#ifndef	__thumb2__
1257bc3d5698SJohn Baldwin	ldr	r2,[sp,#312]		@ A[1][4]
1258bc3d5698SJohn Baldwin#endif
1259bc3d5698SJohn Baldwin	eor	r0,r0,r10
1260bc3d5698SJohn Baldwin#ifndef	__thumb2__
1261bc3d5698SJohn Baldwin	ldr	r3,[sp,#312+4]
1262bc3d5698SJohn Baldwin#else
1263bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#312]		@ A[1][4]
1264bc3d5698SJohn Baldwin#endif
1265bc3d5698SJohn Baldwin	eor	r1,r1,r11
1266bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-14		@ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1267bc3d5698SJohn Baldwin#ifndef	__thumb2__
1268bc3d5698SJohn Baldwin	ldr	r10,[sp,#368]		@ A[3][1]
1269bc3d5698SJohn Baldwin#endif
1270bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-14
1271bc3d5698SJohn Baldwin#ifndef	__thumb2__
1272bc3d5698SJohn Baldwin	ldr	r11,[sp,#368+4]
1273bc3d5698SJohn Baldwin#else
1274bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#368]		@ A[3][1]
1275bc3d5698SJohn Baldwin#endif
1276bc3d5698SJohn Baldwin
1277bc3d5698SJohn Baldwin	eor	r2,r2,r12
1278bc3d5698SJohn Baldwin#ifndef	__thumb2__
1279bc3d5698SJohn Baldwin	ldr	r4,[sp,#320]		@ A[2][0]
1280bc3d5698SJohn Baldwin#endif
1281bc3d5698SJohn Baldwin	eor	r3,r3,r14
1282bc3d5698SJohn Baldwin#ifndef	__thumb2__
1283bc3d5698SJohn Baldwin	ldr	r5,[sp,#320+4]
1284bc3d5698SJohn Baldwin#else
1285bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#320]		@ A[2][0]
1286bc3d5698SJohn Baldwin#endif
1287bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-10		@ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1288bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-10
1289bc3d5698SJohn Baldwin
1290bc3d5698SJohn Baldwin	eor	r6,r6,r4
1291bc3d5698SJohn Baldwin#ifndef	__thumb2__
1292bc3d5698SJohn Baldwin	ldr	r12,[sp,#216]		@ D[2]
1293bc3d5698SJohn Baldwin#endif
1294bc3d5698SJohn Baldwin	eor	r7,r7,r5
1295bc3d5698SJohn Baldwin#ifndef	__thumb2__
1296bc3d5698SJohn Baldwin	ldr	r14,[sp,#216+4]
1297bc3d5698SJohn Baldwin#else
1298bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#216]		@ D[2]
1299bc3d5698SJohn Baldwin#endif
1300bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-1		@ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1301bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-2
1302bc3d5698SJohn Baldwin
1303bc3d5698SJohn Baldwin	eor	r10,r10,r8
1304bc3d5698SJohn Baldwin#ifndef	__thumb2__
1305bc3d5698SJohn Baldwin	ldr	r8,[sp,#416]		@ A[4][2]
1306bc3d5698SJohn Baldwin#endif
1307bc3d5698SJohn Baldwin	eor	r11,r11,r9
1308bc3d5698SJohn Baldwin#ifndef	__thumb2__
1309bc3d5698SJohn Baldwin	ldr	r9,[sp,#416+4]
1310bc3d5698SJohn Baldwin#else
1311bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#416]		@ A[4][2]
1312bc3d5698SJohn Baldwin#endif
1313bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-22		@ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1314bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-23
1315bc3d5698SJohn Baldwin
1316bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-10
1317bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-10
1318bc3d5698SJohn Baldwin	eor	r12,r12,r8
1319bc3d5698SJohn Baldwin	eor	r14,r14,r9
1320bc3d5698SJohn Baldwin	mov	r9,r12,ror#32-30		@ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1321bc3d5698SJohn Baldwin	mov	r8,r14,ror#32-31
1322bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
1323bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-14
1324bc3d5698SJohn Baldwin#ifndef	__thumb2__
1325bc3d5698SJohn Baldwin	str	r10,[sp,#40]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
1326bc3d5698SJohn Baldwin#endif
1327bc3d5698SJohn Baldwin	bic	r12,r6,r4
1328bc3d5698SJohn Baldwin#ifndef	__thumb2__
1329bc3d5698SJohn Baldwin	str	r11,[sp,#40+4]
1330bc3d5698SJohn Baldwin#else
1331bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#40]		@ R[1][0] = C[0] ^ (~C[1] & C[2])
1332bc3d5698SJohn Baldwin#endif
1333bc3d5698SJohn Baldwin	bic	r14,r7,r5
1334bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-10
1335bc3d5698SJohn Baldwin#ifndef	__thumb2__
1336bc3d5698SJohn Baldwin	str	r12,[sp,#48]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
1337bc3d5698SJohn Baldwin#endif
1338bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-10
1339bc3d5698SJohn Baldwin#ifndef	__thumb2__
1340bc3d5698SJohn Baldwin	str	r14,[sp,#48+4]
1341bc3d5698SJohn Baldwin#else
1342bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#48]		@ R[1][1] = C[1] ^ (~C[2] & C[3]);
1343bc3d5698SJohn Baldwin#endif
1344bc3d5698SJohn Baldwin	bic	r10,r8,r6
1345bc3d5698SJohn Baldwin	bic	r11,r9,r7
1346bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
1347bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#14
1348bc3d5698SJohn Baldwin	eor	r10,r10,r4
1349bc3d5698SJohn Baldwin	eor	r11,r11,r5
1350bc3d5698SJohn Baldwin#ifndef	__thumb2__
1351bc3d5698SJohn Baldwin	str	r10,[sp,#56]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
1352bc3d5698SJohn Baldwin#endif
1353bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#32-(14-10)
1354bc3d5698SJohn Baldwin#ifndef	__thumb2__
1355bc3d5698SJohn Baldwin	str	r11,[sp,#56+4]
1356bc3d5698SJohn Baldwin#else
1357bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#56]		@ R[1][2] = C[2] ^ (~C[3] & C[4]);
1358bc3d5698SJohn Baldwin#endif
1359bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
1360bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-(14-10)
1361bc3d5698SJohn Baldwin#ifndef	__thumb2__
1362bc3d5698SJohn Baldwin	str	r12,[sp,#64]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
1363bc3d5698SJohn Baldwin#endif
1364bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-14
1365bc3d5698SJohn Baldwin#ifndef	__thumb2__
1366bc3d5698SJohn Baldwin	str	r14,[sp,#64+4]
1367bc3d5698SJohn Baldwin#else
1368bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#64]		@ R[1][3] = C[3] ^ (~C[4] & C[0]);
1369bc3d5698SJohn Baldwin#endif
1370bc3d5698SJohn Baldwin	add	r12,sp,#208
1371bc3d5698SJohn Baldwin#ifndef	__thumb2__
1372bc3d5698SJohn Baldwin	ldr	r1,[sp,#248]		@ A[0][1]
1373bc3d5698SJohn Baldwin#endif
1374bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-10
1375bc3d5698SJohn Baldwin#ifndef	__thumb2__
1376bc3d5698SJohn Baldwin	ldr	r0,[sp,#248+4]
1377bc3d5698SJohn Baldwin#else
1378bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#248]		@ A[0][1]
1379bc3d5698SJohn Baldwin#endif
1380bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-10
1381bc3d5698SJohn Baldwin#ifndef	__thumb2__
1382bc3d5698SJohn Baldwin	str	r10,[sp,#72]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
1383bc3d5698SJohn Baldwin#endif
1384bc3d5698SJohn Baldwin#ifndef	__thumb2__
1385bc3d5698SJohn Baldwin	str	r11,[sp,#72+4]
1386bc3d5698SJohn Baldwin#else
1387bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#72]		@ R[1][4] = C[4] ^ (~C[0] & C[1]);
1388bc3d5698SJohn Baldwin#endif
1389bc3d5698SJohn Baldwin
1390bc3d5698SJohn Baldwin	add	r9,sp,#224
1391bc3d5698SJohn Baldwin	ldmia	r12,{r10,r11,r12,r14}	@ D[1..2]
1392bc3d5698SJohn Baldwin#ifndef	__thumb2__
1393bc3d5698SJohn Baldwin	ldr	r2,[sp,#296]		@ A[1][2]
1394bc3d5698SJohn Baldwin#endif
1395bc3d5698SJohn Baldwin#ifndef	__thumb2__
1396bc3d5698SJohn Baldwin	ldr	r3,[sp,#296+4]
1397bc3d5698SJohn Baldwin#else
1398bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#296]		@ A[1][2]
1399bc3d5698SJohn Baldwin#endif
1400bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[3..4]
1401bc3d5698SJohn Baldwin
1402bc3d5698SJohn Baldwin	eor	r1,r1,r10
1403bc3d5698SJohn Baldwin#ifndef	__thumb2__
1404bc3d5698SJohn Baldwin	ldr	r4,[sp,#344]		@ A[2][3]
1405bc3d5698SJohn Baldwin#endif
1406bc3d5698SJohn Baldwin	eor	r0,r0,r11
1407bc3d5698SJohn Baldwin#ifndef	__thumb2__
1408bc3d5698SJohn Baldwin	ldr	r5,[sp,#344+4]
1409bc3d5698SJohn Baldwin#else
1410bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#344]		@ A[2][3]
1411bc3d5698SJohn Baldwin#endif
1412bc3d5698SJohn Baldwin	mov	r0,r0,ror#32-1		@ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1413bc3d5698SJohn Baldwin
1414bc3d5698SJohn Baldwin	eor	r2,r2,r12
1415bc3d5698SJohn Baldwin#ifndef	__thumb2__
1416bc3d5698SJohn Baldwin	ldr	r10,[sp,#392]		@ A[3][4]
1417bc3d5698SJohn Baldwin#endif
1418bc3d5698SJohn Baldwin	eor	r3,r3,r14
1419bc3d5698SJohn Baldwin#ifndef	__thumb2__
1420bc3d5698SJohn Baldwin	ldr	r11,[sp,#392+4]
1421bc3d5698SJohn Baldwin#else
1422bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#392]		@ A[3][4]
1423bc3d5698SJohn Baldwin#endif
1424bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-3		@ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1425bc3d5698SJohn Baldwin#ifndef	__thumb2__
1426bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
1427bc3d5698SJohn Baldwin#endif
1428bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-3
1429bc3d5698SJohn Baldwin#ifndef	__thumb2__
1430bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
1431bc3d5698SJohn Baldwin#else
1432bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
1433bc3d5698SJohn Baldwin#endif
1434bc3d5698SJohn Baldwin
1435bc3d5698SJohn Baldwin	eor	r4,r4,r6
1436bc3d5698SJohn Baldwin	eor	r5,r5,r7
1437bc3d5698SJohn Baldwin	@ mov	r5,r6,ror#32-12		@ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1438bc3d5698SJohn Baldwin	@ mov	r4,r7,ror#32-13		@ [track reverse order below]
1439bc3d5698SJohn Baldwin
1440bc3d5698SJohn Baldwin	eor	r10,r10,r8
1441bc3d5698SJohn Baldwin#ifndef	__thumb2__
1442bc3d5698SJohn Baldwin	ldr	r8,[sp,#400]		@ A[4][0]
1443bc3d5698SJohn Baldwin#endif
1444bc3d5698SJohn Baldwin	eor	r11,r11,r9
1445bc3d5698SJohn Baldwin#ifndef	__thumb2__
1446bc3d5698SJohn Baldwin	ldr	r9,[sp,#400+4]
1447bc3d5698SJohn Baldwin#else
1448bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#400]		@ A[4][0]
1449bc3d5698SJohn Baldwin#endif
1450bc3d5698SJohn Baldwin	mov	r6,r10,ror#32-4		@ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1451bc3d5698SJohn Baldwin	mov	r7,r11,ror#32-4
1452bc3d5698SJohn Baldwin
1453bc3d5698SJohn Baldwin	eor	r12,r12,r8
1454bc3d5698SJohn Baldwin	eor	r14,r14,r9
1455bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-9		@ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1456bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-9
1457bc3d5698SJohn Baldwin
1458bc3d5698SJohn Baldwin	bic	r10,r5,r2,ror#13-3
1459bc3d5698SJohn Baldwin	bic	r11,r4,r3,ror#12-3
1460bc3d5698SJohn Baldwin	bic	r12,r6,r5,ror#32-13
1461bc3d5698SJohn Baldwin	bic	r14,r7,r4,ror#32-12
1462bc3d5698SJohn Baldwin	eor	r10,r0,r10,ror#32-13
1463bc3d5698SJohn Baldwin	eor	r11,r1,r11,ror#32-12
1464bc3d5698SJohn Baldwin#ifndef	__thumb2__
1465bc3d5698SJohn Baldwin	str	r10,[sp,#80]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
1466bc3d5698SJohn Baldwin#endif
1467bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-3
1468bc3d5698SJohn Baldwin#ifndef	__thumb2__
1469bc3d5698SJohn Baldwin	str	r11,[sp,#80+4]
1470bc3d5698SJohn Baldwin#else
1471bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#80]		@ R[2][0] = C[0] ^ (~C[1] & C[2])
1472bc3d5698SJohn Baldwin#endif
1473bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-3
1474bc3d5698SJohn Baldwin#ifndef	__thumb2__
1475bc3d5698SJohn Baldwin	str	r12,[sp,#88]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
1476bc3d5698SJohn Baldwin#endif
1477bc3d5698SJohn Baldwin	bic	r10,r8,r6
1478bc3d5698SJohn Baldwin	bic	r11,r9,r7
1479bc3d5698SJohn Baldwin#ifndef	__thumb2__
1480bc3d5698SJohn Baldwin	str	r14,[sp,#88+4]
1481bc3d5698SJohn Baldwin#else
1482bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#88]		@ R[2][1] = C[1] ^ (~C[2] & C[3]);
1483bc3d5698SJohn Baldwin#endif
1484bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#32-13
1485bc3d5698SJohn Baldwin	eor	r11,r11,r4,ror#32-12
1486bc3d5698SJohn Baldwin#ifndef	__thumb2__
1487bc3d5698SJohn Baldwin	str	r10,[sp,#96]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
1488bc3d5698SJohn Baldwin#endif
1489bc3d5698SJohn Baldwin	bic	r12,r0,r8
1490bc3d5698SJohn Baldwin#ifndef	__thumb2__
1491bc3d5698SJohn Baldwin	str	r11,[sp,#96+4]
1492bc3d5698SJohn Baldwin#else
1493bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#96]		@ R[2][2] = C[2] ^ (~C[3] & C[4]);
1494bc3d5698SJohn Baldwin#endif
1495bc3d5698SJohn Baldwin	bic	r14,r1,r9
1496bc3d5698SJohn Baldwin	eor	r12,r12,r6
1497bc3d5698SJohn Baldwin	eor	r14,r14,r7
1498bc3d5698SJohn Baldwin#ifndef	__thumb2__
1499bc3d5698SJohn Baldwin	str	r12,[sp,#104]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
1500bc3d5698SJohn Baldwin#endif
1501bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#3
1502bc3d5698SJohn Baldwin#ifndef	__thumb2__
1503bc3d5698SJohn Baldwin	str	r14,[sp,#104+4]
1504bc3d5698SJohn Baldwin#else
1505bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#104]		@ R[2][3] = C[3] ^ (~C[4] & C[0]);
1506bc3d5698SJohn Baldwin#endif
1507bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#3
1508bc3d5698SJohn Baldwin#ifndef	__thumb2__
1509bc3d5698SJohn Baldwin	ldr	r1,[sp,#272]		@ A[0][4] [in reverse order]
1510bc3d5698SJohn Baldwin#endif
1511bc3d5698SJohn Baldwin	eor	r10,r8,r10,ror#32-3
1512bc3d5698SJohn Baldwin#ifndef	__thumb2__
1513bc3d5698SJohn Baldwin	ldr	r0,[sp,#272+4]
1514bc3d5698SJohn Baldwin#else
1515bc3d5698SJohn Baldwin	ldrd	r1,r0,[sp,#272]		@ A[0][4] [in reverse order]
1516bc3d5698SJohn Baldwin#endif
1517bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-3
1518bc3d5698SJohn Baldwin#ifndef	__thumb2__
1519bc3d5698SJohn Baldwin	str	r10,[sp,#112]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
1520bc3d5698SJohn Baldwin#endif
1521bc3d5698SJohn Baldwin	add	r9,sp,#208
1522bc3d5698SJohn Baldwin#ifndef	__thumb2__
1523bc3d5698SJohn Baldwin	str	r11,[sp,#112+4]
1524bc3d5698SJohn Baldwin#else
1525bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#112]		@ R[2][4] = C[4] ^ (~C[0] & C[1]);
1526bc3d5698SJohn Baldwin#endif
1527bc3d5698SJohn Baldwin
1528bc3d5698SJohn Baldwin#ifndef	__thumb2__
1529bc3d5698SJohn Baldwin	ldr	r10,[sp,#232]		@ D[4]
1530bc3d5698SJohn Baldwin#endif
1531bc3d5698SJohn Baldwin#ifndef	__thumb2__
1532bc3d5698SJohn Baldwin	ldr	r11,[sp,#232+4]
1533bc3d5698SJohn Baldwin#else
1534bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#232]		@ D[4]
1535bc3d5698SJohn Baldwin#endif
1536bc3d5698SJohn Baldwin#ifndef	__thumb2__
1537bc3d5698SJohn Baldwin	ldr	r12,[sp,#200]		@ D[0]
1538bc3d5698SJohn Baldwin#endif
1539bc3d5698SJohn Baldwin#ifndef	__thumb2__
1540bc3d5698SJohn Baldwin	ldr	r14,[sp,#200+4]
1541bc3d5698SJohn Baldwin#else
1542bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#200]		@ D[0]
1543bc3d5698SJohn Baldwin#endif
1544bc3d5698SJohn Baldwin
1545bc3d5698SJohn Baldwin	ldmia	r9,{r6,r7,r8,r9}		@ D[1..2]
1546bc3d5698SJohn Baldwin
1547bc3d5698SJohn Baldwin	eor	r1,r1,r10
1548bc3d5698SJohn Baldwin#ifndef	__thumb2__
1549bc3d5698SJohn Baldwin	ldr	r2,[sp,#280]		@ A[1][0]
1550bc3d5698SJohn Baldwin#endif
1551bc3d5698SJohn Baldwin	eor	r0,r0,r11
1552bc3d5698SJohn Baldwin#ifndef	__thumb2__
1553bc3d5698SJohn Baldwin	ldr	r3,[sp,#280+4]
1554bc3d5698SJohn Baldwin#else
1555bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#280]		@ A[1][0]
1556bc3d5698SJohn Baldwin#endif
1557bc3d5698SJohn Baldwin	@ mov	r1,r10,ror#32-13		@ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1558bc3d5698SJohn Baldwin#ifndef	__thumb2__
1559bc3d5698SJohn Baldwin	ldr	r4,[sp,#328]		@ A[2][1]
1560bc3d5698SJohn Baldwin#endif
1561bc3d5698SJohn Baldwin	@ mov	r0,r11,ror#32-14		@ [was loaded in reverse order]
1562bc3d5698SJohn Baldwin#ifndef	__thumb2__
1563bc3d5698SJohn Baldwin	ldr	r5,[sp,#328+4]
1564bc3d5698SJohn Baldwin#else
1565bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#328]		@ A[2][1]
1566bc3d5698SJohn Baldwin#endif
1567bc3d5698SJohn Baldwin
1568bc3d5698SJohn Baldwin	eor	r2,r2,r12
1569bc3d5698SJohn Baldwin#ifndef	__thumb2__
1570bc3d5698SJohn Baldwin	ldr	r10,[sp,#376]		@ A[3][2]
1571bc3d5698SJohn Baldwin#endif
1572bc3d5698SJohn Baldwin	eor	r3,r3,r14
1573bc3d5698SJohn Baldwin#ifndef	__thumb2__
1574bc3d5698SJohn Baldwin	ldr	r11,[sp,#376+4]
1575bc3d5698SJohn Baldwin#else
1576bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#376]		@ A[3][2]
1577bc3d5698SJohn Baldwin#endif
1578bc3d5698SJohn Baldwin	@ mov	r2,r2,ror#32-18		@ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1579bc3d5698SJohn Baldwin#ifndef	__thumb2__
1580bc3d5698SJohn Baldwin	ldr	r12,[sp,#224]		@ D[3]
1581bc3d5698SJohn Baldwin#endif
1582bc3d5698SJohn Baldwin	@ mov	r3,r3,ror#32-18
1583bc3d5698SJohn Baldwin#ifndef	__thumb2__
1584bc3d5698SJohn Baldwin	ldr	r14,[sp,#224+4]
1585bc3d5698SJohn Baldwin#else
1586bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#224]		@ D[3]
1587bc3d5698SJohn Baldwin#endif
1588bc3d5698SJohn Baldwin
1589bc3d5698SJohn Baldwin	eor	r6,r6,r4
1590bc3d5698SJohn Baldwin	eor	r7,r7,r5
1591bc3d5698SJohn Baldwin	mov	r4,r6,ror#32-5		@ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1592bc3d5698SJohn Baldwin	mov	r5,r7,ror#32-5
1593bc3d5698SJohn Baldwin
1594bc3d5698SJohn Baldwin	eor	r10,r10,r8
1595bc3d5698SJohn Baldwin#ifndef	__thumb2__
1596bc3d5698SJohn Baldwin	ldr	r8,[sp,#424]		@ A[4][3]
1597bc3d5698SJohn Baldwin#endif
1598bc3d5698SJohn Baldwin	eor	r11,r11,r9
1599bc3d5698SJohn Baldwin#ifndef	__thumb2__
1600bc3d5698SJohn Baldwin	ldr	r9,[sp,#424+4]
1601bc3d5698SJohn Baldwin#else
1602bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#424]		@ A[4][3]
1603bc3d5698SJohn Baldwin#endif
1604bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-7		@ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1605bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-8
1606bc3d5698SJohn Baldwin
1607bc3d5698SJohn Baldwin	eor	r12,r12,r8
1608bc3d5698SJohn Baldwin	eor	r14,r14,r9
1609bc3d5698SJohn Baldwin	mov	r8,r12,ror#32-28		@ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1610bc3d5698SJohn Baldwin	mov	r9,r14,ror#32-28
1611bc3d5698SJohn Baldwin
1612bc3d5698SJohn Baldwin	bic	r10,r4,r2,ror#32-18
1613bc3d5698SJohn Baldwin	bic	r11,r5,r3,ror#32-18
1614bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-14
1615bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-13
1616bc3d5698SJohn Baldwin#ifndef	__thumb2__
1617bc3d5698SJohn Baldwin	str	r10,[sp,#120]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
1618bc3d5698SJohn Baldwin#endif
1619bc3d5698SJohn Baldwin	bic	r12,r6,r4
1620bc3d5698SJohn Baldwin#ifndef	__thumb2__
1621bc3d5698SJohn Baldwin	str	r11,[sp,#120+4]
1622bc3d5698SJohn Baldwin#else
1623bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#120]		@ R[3][0] = C[0] ^ (~C[1] & C[2])
1624bc3d5698SJohn Baldwin#endif
1625bc3d5698SJohn Baldwin	bic	r14,r7,r5
1626bc3d5698SJohn Baldwin	eor	r12,r12,r2,ror#32-18
1627bc3d5698SJohn Baldwin#ifndef	__thumb2__
1628bc3d5698SJohn Baldwin	str	r12,[sp,#128]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
1629bc3d5698SJohn Baldwin#endif
1630bc3d5698SJohn Baldwin	eor	r14,r14,r3,ror#32-18
1631bc3d5698SJohn Baldwin#ifndef	__thumb2__
1632bc3d5698SJohn Baldwin	str	r14,[sp,#128+4]
1633bc3d5698SJohn Baldwin#else
1634bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#128]		@ R[3][1] = C[1] ^ (~C[2] & C[3]);
1635bc3d5698SJohn Baldwin#endif
1636bc3d5698SJohn Baldwin	bic	r10,r8,r6
1637bc3d5698SJohn Baldwin	bic	r11,r9,r7
1638bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#14
1639bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#13
1640bc3d5698SJohn Baldwin	eor	r10,r10,r4
1641bc3d5698SJohn Baldwin	eor	r11,r11,r5
1642bc3d5698SJohn Baldwin#ifndef	__thumb2__
1643bc3d5698SJohn Baldwin	str	r10,[sp,#136]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
1644bc3d5698SJohn Baldwin#endif
1645bc3d5698SJohn Baldwin	bic	r2,r2,r0,ror#18-14
1646bc3d5698SJohn Baldwin#ifndef	__thumb2__
1647bc3d5698SJohn Baldwin	str	r11,[sp,#136+4]
1648bc3d5698SJohn Baldwin#else
1649bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#136]		@ R[3][2] = C[2] ^ (~C[3] & C[4]);
1650bc3d5698SJohn Baldwin#endif
1651bc3d5698SJohn Baldwin	eor	r12,r6,r12,ror#32-14
1652bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#18-13
1653bc3d5698SJohn Baldwin	eor	r14,r7,r14,ror#32-13
1654bc3d5698SJohn Baldwin#ifndef	__thumb2__
1655bc3d5698SJohn Baldwin	str	r12,[sp,#144]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
1656bc3d5698SJohn Baldwin#endif
1657bc3d5698SJohn Baldwin#ifndef	__thumb2__
1658bc3d5698SJohn Baldwin	str	r14,[sp,#144+4]
1659bc3d5698SJohn Baldwin#else
1660bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#144]		@ R[3][3] = C[3] ^ (~C[4] & C[0]);
1661bc3d5698SJohn Baldwin#endif
1662bc3d5698SJohn Baldwin	add	r14,sp,#216
1663bc3d5698SJohn Baldwin#ifndef	__thumb2__
1664bc3d5698SJohn Baldwin	ldr	r0,[sp,#256]		@ A[0][2]
1665bc3d5698SJohn Baldwin#endif
1666bc3d5698SJohn Baldwin	eor	r10,r8,r2,ror#32-18
1667bc3d5698SJohn Baldwin#ifndef	__thumb2__
1668bc3d5698SJohn Baldwin	ldr	r1,[sp,#256+4]
1669bc3d5698SJohn Baldwin#else
1670bc3d5698SJohn Baldwin	ldrd	r0,r1,[sp,#256]		@ A[0][2]
1671bc3d5698SJohn Baldwin#endif
1672bc3d5698SJohn Baldwin	eor	r11,r9,r11,ror#32-18
1673bc3d5698SJohn Baldwin#ifndef	__thumb2__
1674bc3d5698SJohn Baldwin	str	r10,[sp,#152]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
1675bc3d5698SJohn Baldwin#endif
1676bc3d5698SJohn Baldwin#ifndef	__thumb2__
1677bc3d5698SJohn Baldwin	str	r11,[sp,#152+4]
1678bc3d5698SJohn Baldwin#else
1679bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#152]		@ R[3][4] = C[4] ^ (~C[0] & C[1]);
1680bc3d5698SJohn Baldwin#endif
1681bc3d5698SJohn Baldwin
1682bc3d5698SJohn Baldwin	ldmia	r14,{r10,r11,r12,r14}	@ D[2..3]
1683bc3d5698SJohn Baldwin#ifndef	__thumb2__
1684bc3d5698SJohn Baldwin	ldr	r2,[sp,#304]		@ A[1][3]
1685bc3d5698SJohn Baldwin#endif
1686bc3d5698SJohn Baldwin#ifndef	__thumb2__
1687bc3d5698SJohn Baldwin	ldr	r3,[sp,#304+4]
1688bc3d5698SJohn Baldwin#else
1689bc3d5698SJohn Baldwin	ldrd	r2,r3,[sp,#304]		@ A[1][3]
1690bc3d5698SJohn Baldwin#endif
1691bc3d5698SJohn Baldwin#ifndef	__thumb2__
1692bc3d5698SJohn Baldwin	ldr	r6,[sp,#232]		@ D[4]
1693bc3d5698SJohn Baldwin#endif
1694bc3d5698SJohn Baldwin#ifndef	__thumb2__
1695bc3d5698SJohn Baldwin	ldr	r7,[sp,#232+4]
1696bc3d5698SJohn Baldwin#else
1697bc3d5698SJohn Baldwin	ldrd	r6,r7,[sp,#232]		@ D[4]
1698bc3d5698SJohn Baldwin#endif
1699bc3d5698SJohn Baldwin
1700bc3d5698SJohn Baldwin	eor	r0,r0,r10
1701bc3d5698SJohn Baldwin#ifndef	__thumb2__
1702bc3d5698SJohn Baldwin	ldr	r4,[sp,#352]		@ A[2][4]
1703bc3d5698SJohn Baldwin#endif
1704bc3d5698SJohn Baldwin	eor	r1,r1,r11
1705bc3d5698SJohn Baldwin#ifndef	__thumb2__
1706bc3d5698SJohn Baldwin	ldr	r5,[sp,#352+4]
1707bc3d5698SJohn Baldwin#else
1708bc3d5698SJohn Baldwin	ldrd	r4,r5,[sp,#352]		@ A[2][4]
1709bc3d5698SJohn Baldwin#endif
1710bc3d5698SJohn Baldwin	@ mov	r0,r0,ror#32-31		@ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1711bc3d5698SJohn Baldwin#ifndef	__thumb2__
1712bc3d5698SJohn Baldwin	ldr	r8,[sp,#200]		@ D[0]
1713bc3d5698SJohn Baldwin#endif
1714bc3d5698SJohn Baldwin	@ mov	r1,r1,ror#32-31
1715bc3d5698SJohn Baldwin#ifndef	__thumb2__
1716bc3d5698SJohn Baldwin	ldr	r9,[sp,#200+4]
1717bc3d5698SJohn Baldwin#else
1718bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#200]		@ D[0]
1719bc3d5698SJohn Baldwin#endif
1720bc3d5698SJohn Baldwin
1721bc3d5698SJohn Baldwin	eor	r12,r12,r2
1722bc3d5698SJohn Baldwin#ifndef	__thumb2__
1723bc3d5698SJohn Baldwin	ldr	r10,[sp,#360]		@ A[3][0]
1724bc3d5698SJohn Baldwin#endif
1725bc3d5698SJohn Baldwin	eor	r14,r14,r3
1726bc3d5698SJohn Baldwin#ifndef	__thumb2__
1727bc3d5698SJohn Baldwin	ldr	r11,[sp,#360+4]
1728bc3d5698SJohn Baldwin#else
1729bc3d5698SJohn Baldwin	ldrd	r10,r11,[sp,#360]		@ A[3][0]
1730bc3d5698SJohn Baldwin#endif
1731bc3d5698SJohn Baldwin	mov	r3,r12,ror#32-27		@ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1732bc3d5698SJohn Baldwin#ifndef	__thumb2__
1733bc3d5698SJohn Baldwin	ldr	r12,[sp,#208]		@ D[1]
1734bc3d5698SJohn Baldwin#endif
1735bc3d5698SJohn Baldwin	mov	r2,r14,ror#32-28
1736bc3d5698SJohn Baldwin#ifndef	__thumb2__
1737bc3d5698SJohn Baldwin	ldr	r14,[sp,#208+4]
1738bc3d5698SJohn Baldwin#else
1739bc3d5698SJohn Baldwin	ldrd	r12,r14,[sp,#208]		@ D[1]
1740bc3d5698SJohn Baldwin#endif
1741bc3d5698SJohn Baldwin
1742bc3d5698SJohn Baldwin	eor	r6,r6,r4
1743bc3d5698SJohn Baldwin	eor	r7,r7,r5
1744bc3d5698SJohn Baldwin	mov	r5,r6,ror#32-19		@ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1745bc3d5698SJohn Baldwin	mov	r4,r7,ror#32-20
1746bc3d5698SJohn Baldwin
1747bc3d5698SJohn Baldwin	eor	r10,r10,r8
1748bc3d5698SJohn Baldwin#ifndef	__thumb2__
1749bc3d5698SJohn Baldwin	ldr	r8,[sp,#408]		@ A[4][1]
1750bc3d5698SJohn Baldwin#endif
1751bc3d5698SJohn Baldwin	eor	r11,r11,r9
1752bc3d5698SJohn Baldwin#ifndef	__thumb2__
1753bc3d5698SJohn Baldwin	ldr	r9,[sp,#408+4]
1754bc3d5698SJohn Baldwin#else
1755bc3d5698SJohn Baldwin	ldrd	r8,r9,[sp,#408]		@ A[4][1]
1756bc3d5698SJohn Baldwin#endif
1757bc3d5698SJohn Baldwin	mov	r7,r10,ror#32-20		@ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1758bc3d5698SJohn Baldwin	mov	r6,r11,ror#32-21
1759bc3d5698SJohn Baldwin
1760bc3d5698SJohn Baldwin	eor	r8,r8,r12
1761bc3d5698SJohn Baldwin	eor	r9,r9,r14
1762bc3d5698SJohn Baldwin	@ mov	r8,r2,ror#32-1		@ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1763bc3d5698SJohn Baldwin	@ mov	r9,r3,ror#32-1
1764bc3d5698SJohn Baldwin
1765bc3d5698SJohn Baldwin	bic	r10,r4,r2
1766bc3d5698SJohn Baldwin	bic	r11,r5,r3
1767bc3d5698SJohn Baldwin	eor	r10,r10,r0,ror#32-31
1768bc3d5698SJohn Baldwin#ifndef	__thumb2__
1769bc3d5698SJohn Baldwin	str	r10,[sp,#160]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
1770bc3d5698SJohn Baldwin#endif
1771bc3d5698SJohn Baldwin	eor	r11,r11,r1,ror#32-31
1772bc3d5698SJohn Baldwin#ifndef	__thumb2__
1773bc3d5698SJohn Baldwin	str	r11,[sp,#160+4]
1774bc3d5698SJohn Baldwin#else
1775bc3d5698SJohn Baldwin	strd	r10,r11,[sp,#160]		@ R[4][0] = C[0] ^ (~C[1] & C[2])
1776bc3d5698SJohn Baldwin#endif
1777bc3d5698SJohn Baldwin	bic	r12,r6,r4
1778bc3d5698SJohn Baldwin	bic	r14,r7,r5
1779bc3d5698SJohn Baldwin	eor	r12,r12,r2
1780bc3d5698SJohn Baldwin	eor	r14,r14,r3
1781bc3d5698SJohn Baldwin#ifndef	__thumb2__
1782bc3d5698SJohn Baldwin	str	r12,[sp,#168]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
1783bc3d5698SJohn Baldwin#endif
1784bc3d5698SJohn Baldwin	bic	r10,r8,r6,ror#1
1785bc3d5698SJohn Baldwin#ifndef	__thumb2__
1786bc3d5698SJohn Baldwin	str	r14,[sp,#168+4]
1787bc3d5698SJohn Baldwin#else
1788bc3d5698SJohn Baldwin	strd	r12,r14,[sp,#168]		@ R[4][1] = C[1] ^ (~C[2] & C[3]);
1789bc3d5698SJohn Baldwin#endif
1790bc3d5698SJohn Baldwin	bic	r11,r9,r7,ror#1
1791bc3d5698SJohn Baldwin	bic	r12,r0,r8,ror#31-1
1792bc3d5698SJohn Baldwin	bic	r14,r1,r9,ror#31-1
1793bc3d5698SJohn Baldwin	eor	r4,r4,r10,ror#32-1
1794bc3d5698SJohn Baldwin#ifndef	__thumb2__
1795bc3d5698SJohn Baldwin	str	r4,[sp,#176]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
1796bc3d5698SJohn Baldwin#endif
1797bc3d5698SJohn Baldwin	eor	r5,r5,r11,ror#32-1
1798bc3d5698SJohn Baldwin#ifndef	__thumb2__
1799bc3d5698SJohn Baldwin	str	r5,[sp,#176+4]
1800bc3d5698SJohn Baldwin#else
1801bc3d5698SJohn Baldwin	strd	r4,r5,[sp,#176]		@ R[4][2] = C[2] ^= (~C[3] & C[4]);
1802bc3d5698SJohn Baldwin#endif
1803bc3d5698SJohn Baldwin	eor	r6,r6,r12,ror#32-31
1804bc3d5698SJohn Baldwin	eor	r7,r7,r14,ror#32-31
1805bc3d5698SJohn Baldwin#ifndef	__thumb2__
1806bc3d5698SJohn Baldwin	str	r6,[sp,#184]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
1807bc3d5698SJohn Baldwin#endif
1808bc3d5698SJohn Baldwin	bic	r10,r2,r0,ror#32-31
1809bc3d5698SJohn Baldwin#ifndef	__thumb2__
1810bc3d5698SJohn Baldwin	str	r7,[sp,#184+4]
1811bc3d5698SJohn Baldwin#else
1812bc3d5698SJohn Baldwin	strd	r6,r7,[sp,#184]		@ R[4][3] = C[3] ^= (~C[4] & C[0]);
1813bc3d5698SJohn Baldwin#endif
1814bc3d5698SJohn Baldwin	bic	r11,r3,r1,ror#32-31
1815bc3d5698SJohn Baldwin	add	r12,sp,#0
1816bc3d5698SJohn Baldwin	eor	r8,r10,r8,ror#32-1
1817bc3d5698SJohn Baldwin	add	r10,sp,#40
1818bc3d5698SJohn Baldwin	eor	r9,r11,r9,ror#32-1
1819bc3d5698SJohn Baldwin#ifndef	__thumb2__
1820bc3d5698SJohn Baldwin	str	r8,[sp,#192]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
1821bc3d5698SJohn Baldwin#endif
1822bc3d5698SJohn Baldwin#ifndef	__thumb2__
1823bc3d5698SJohn Baldwin	str	r9,[sp,#192+4]
1824bc3d5698SJohn Baldwin#else
1825bc3d5698SJohn Baldwin	strd	r8,r9,[sp,#192]		@ R[4][4] = C[4] ^= (~C[0] & C[1]);
1826bc3d5698SJohn Baldwin#endif
1827bc3d5698SJohn Baldwin	blo	.Lround2x
1828bc3d5698SJohn Baldwin
1829*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5
1830bc3d5698SJohn Baldwin	ldr	pc,[sp,#440]
1831*c0855eaaSJohn Baldwin#else
1832*c0855eaaSJohn Baldwin	ldr	lr,[sp,#440]
1833*c0855eaaSJohn Baldwin	tst	lr,#1
1834*c0855eaaSJohn Baldwin	moveq	pc,lr		@ be binary compatible with V4, yet
1835*c0855eaaSJohn Baldwin.word	0xe12fff1e		@ interoperable with Thumb ISA:-)
1836*c0855eaaSJohn Baldwin#endif
1837bc3d5698SJohn Baldwin.size	KeccakF1600_int,.-KeccakF1600_int
1838bc3d5698SJohn Baldwin
1839bc3d5698SJohn Baldwin.type	KeccakF1600, %function
1840bc3d5698SJohn Baldwin.align	5
1841bc3d5698SJohn BaldwinKeccakF1600:
1842bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r4-r11,lr}
1843bc3d5698SJohn Baldwin	sub	sp,sp,#440+16			@ space for A[5][5],D[5],T[5][5],...
1844bc3d5698SJohn Baldwin
1845bc3d5698SJohn Baldwin	add	r10,r0,#40
1846bc3d5698SJohn Baldwin	add	r11,sp,#40
1847bc3d5698SJohn Baldwin	ldmia	r0,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ copy A[5][5] to stack
1848bc3d5698SJohn Baldwin	stmia	sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1850bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1851bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1852bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1853bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1854bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1855bc3d5698SJohn Baldwin	ldmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1856bc3d5698SJohn Baldwin	add	r12,sp,#0
1857bc3d5698SJohn Baldwin	add	r10,sp,#40
1858bc3d5698SJohn Baldwin	stmia	r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1859bc3d5698SJohn Baldwin
1860bc3d5698SJohn Baldwin	bl	KeccakF1600_enter
1861bc3d5698SJohn Baldwin
1862bc3d5698SJohn Baldwin	ldr	r11, [sp,#440+16]		@ restore pointer to A
1863bc3d5698SJohn Baldwin	ldmia	sp,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}		@ return A[5][5]
1865bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1867bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1868bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1869bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1870bc3d5698SJohn Baldwin	stmia	r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1871bc3d5698SJohn Baldwin	ldmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1872bc3d5698SJohn Baldwin	stmia	r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1873bc3d5698SJohn Baldwin
1874bc3d5698SJohn Baldwin	add	sp,sp,#440+20
1875*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5
1876bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1877*c0855eaaSJohn Baldwin#else
1878*c0855eaaSJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1879*c0855eaaSJohn Baldwin	tst	lr,#1
1880*c0855eaaSJohn Baldwin	moveq	pc,lr		@ be binary compatible with V4, yet
1881*c0855eaaSJohn Baldwin.word	0xe12fff1e		@ interoperable with Thumb ISA:-)
1882*c0855eaaSJohn Baldwin#endif
1883bc3d5698SJohn Baldwin.size	KeccakF1600,.-KeccakF1600
1884bc3d5698SJohn Baldwin.globl	SHA3_absorb
1885bc3d5698SJohn Baldwin.type	SHA3_absorb,%function
1886bc3d5698SJohn Baldwin.align	5
1887bc3d5698SJohn BaldwinSHA3_absorb:
1888bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1889bc3d5698SJohn Baldwin	sub	sp,sp,#456+16
1890bc3d5698SJohn Baldwin
1891bc3d5698SJohn Baldwin	add	r10,r0,#40
1892bc3d5698SJohn Baldwin	@ mov	r11,r1
1893bc3d5698SJohn Baldwin	mov	r12,r2
1894bc3d5698SJohn Baldwin	mov	r14,r3
1895bc3d5698SJohn Baldwin	cmp	r2,r3
1896bc3d5698SJohn Baldwin	blo	.Labsorb_abort
1897bc3d5698SJohn Baldwin
1898bc3d5698SJohn Baldwin	add	r11,sp,#0
1899bc3d5698SJohn Baldwin	ldmia	r0,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}	@ copy A[5][5] to stack
1900bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1901bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1902bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1903bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1904bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1905bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1906bc3d5698SJohn Baldwin	stmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1907bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1908bc3d5698SJohn Baldwin	stmia	r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1909bc3d5698SJohn Baldwin
1910bc3d5698SJohn Baldwin	ldr	r11,[sp,#476]		@ restore r11
1911bc3d5698SJohn Baldwin#ifdef	__thumb2__
1912bc3d5698SJohn Baldwin	mov	r9,#0x00ff00ff
1913bc3d5698SJohn Baldwin	mov	r8,#0x0f0f0f0f
1914bc3d5698SJohn Baldwin	mov	r7,#0x33333333
1915bc3d5698SJohn Baldwin	mov	r6,#0x55555555
1916bc3d5698SJohn Baldwin#else
1917bc3d5698SJohn Baldwin	mov	r6,#0x11		@ compose constants
1918bc3d5698SJohn Baldwin	mov	r8,#0x0f
1919bc3d5698SJohn Baldwin	mov	r9,#0xff
1920bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#8
1921bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#8
1922bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#16		@ 0x11111111
1923bc3d5698SJohn Baldwin	orr	r9,r9,r9,lsl#16		@ 0x00ff00ff
1924bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#16		@ 0x0f0f0f0f
1925bc3d5698SJohn Baldwin	orr	r7,r6,r6,lsl#1		@ 0x33333333
1926bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#2		@ 0x55555555
1927bc3d5698SJohn Baldwin#endif
1928bc3d5698SJohn Baldwin	str	r9,[sp,#468]
1929bc3d5698SJohn Baldwin	str	r8,[sp,#464]
1930bc3d5698SJohn Baldwin	str	r7,[sp,#460]
1931bc3d5698SJohn Baldwin	str	r6,[sp,#456]
1932bc3d5698SJohn Baldwin	b	.Loop_absorb
1933bc3d5698SJohn Baldwin
1934bc3d5698SJohn Baldwin.align	4
1935bc3d5698SJohn Baldwin.Loop_absorb:
1936bc3d5698SJohn Baldwin	subs	r0,r12,r14
1937bc3d5698SJohn Baldwin	blo	.Labsorbed
1938bc3d5698SJohn Baldwin	add	r10,sp,#0
1939bc3d5698SJohn Baldwin	str	r0,[sp,#480]		@ save len - bsz
1940bc3d5698SJohn Baldwin
1941bc3d5698SJohn Baldwin.align	4
1942bc3d5698SJohn Baldwin.Loop_block:
1943bc3d5698SJohn Baldwin	ldrb	r0,[r11],#1
1944bc3d5698SJohn Baldwin	ldrb	r1,[r11],#1
1945bc3d5698SJohn Baldwin	ldrb	r2,[r11],#1
1946bc3d5698SJohn Baldwin	ldrb	r3,[r11],#1
1947bc3d5698SJohn Baldwin	ldrb	r4,[r11],#1
1948bc3d5698SJohn Baldwin	orr	r0,r0,r1,lsl#8
1949bc3d5698SJohn Baldwin	ldrb	r1,[r11],#1
1950bc3d5698SJohn Baldwin	orr	r0,r0,r2,lsl#16
1951bc3d5698SJohn Baldwin	ldrb	r2,[r11],#1
1952bc3d5698SJohn Baldwin	orr	r0,r0,r3,lsl#24		@ lo
1953bc3d5698SJohn Baldwin	ldrb	r3,[r11],#1
1954bc3d5698SJohn Baldwin	orr	r1,r4,r1,lsl#8
1955bc3d5698SJohn Baldwin	orr	r1,r1,r2,lsl#16
1956bc3d5698SJohn Baldwin	orr	r1,r1,r3,lsl#24		@ hi
1957bc3d5698SJohn Baldwin
1958bc3d5698SJohn Baldwin	and	r2,r0,r6		@ &=0x55555555
1959bc3d5698SJohn Baldwin	and	r0,r0,r6,lsl#1		@ &=0xaaaaaaaa
1960bc3d5698SJohn Baldwin	and	r3,r1,r6		@ &=0x55555555
1961bc3d5698SJohn Baldwin	and	r1,r1,r6,lsl#1		@ &=0xaaaaaaaa
1962bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#1
1963bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#1
1964bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#1
1965bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#1
1966bc3d5698SJohn Baldwin	and	r2,r2,r7		@ &=0x33333333
1967bc3d5698SJohn Baldwin	and	r0,r0,r7,lsl#2		@ &=0xcccccccc
1968bc3d5698SJohn Baldwin	and	r3,r3,r7		@ &=0x33333333
1969bc3d5698SJohn Baldwin	and	r1,r1,r7,lsl#2		@ &=0xcccccccc
1970bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#2
1971bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#2
1972bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#2
1973bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#2
1974bc3d5698SJohn Baldwin	and	r2,r2,r8		@ &=0x0f0f0f0f
1975bc3d5698SJohn Baldwin	and	r0,r0,r8,lsl#4		@ &=0xf0f0f0f0
1976bc3d5698SJohn Baldwin	and	r3,r3,r8		@ &=0x0f0f0f0f
1977bc3d5698SJohn Baldwin	and	r1,r1,r8,lsl#4		@ &=0xf0f0f0f0
1978bc3d5698SJohn Baldwin	ldmia	r10,{r4,r5}		@ A_flat[i]
1979bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#4
1980bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#4
1981bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#4
1982bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#4
1983bc3d5698SJohn Baldwin	and	r2,r2,r9		@ &=0x00ff00ff
1984bc3d5698SJohn Baldwin	and	r0,r0,r9,lsl#8		@ &=0xff00ff00
1985bc3d5698SJohn Baldwin	and	r3,r3,r9		@ &=0x00ff00ff
1986bc3d5698SJohn Baldwin	and	r1,r1,r9,lsl#8		@ &=0xff00ff00
1987bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsr#8
1988bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#8
1989bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#8
1990bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsl#8
1991bc3d5698SJohn Baldwin
1992bc3d5698SJohn Baldwin	mov	r2,r2,lsl#16
1993bc3d5698SJohn Baldwin	mov	r1,r1,lsr#16
1994bc3d5698SJohn Baldwin	eor	r4,r4,r3,lsl#16
1995bc3d5698SJohn Baldwin	eor	r5,r5,r0,lsr#16
1996bc3d5698SJohn Baldwin	eor	r4,r4,r2,lsr#16
1997bc3d5698SJohn Baldwin	eor	r5,r5,r1,lsl#16
1998bc3d5698SJohn Baldwin	stmia	r10!,{r4,r5}	@ A_flat[i++] ^= BitInterleave(inp[0..7])
1999bc3d5698SJohn Baldwin
2000bc3d5698SJohn Baldwin	subs	r14,r14,#8
2001bc3d5698SJohn Baldwin	bhi	.Loop_block
2002bc3d5698SJohn Baldwin
2003bc3d5698SJohn Baldwin	str	r11,[sp,#476]
2004bc3d5698SJohn Baldwin
2005bc3d5698SJohn Baldwin	bl	KeccakF1600_int
2006bc3d5698SJohn Baldwin
2007bc3d5698SJohn Baldwin	add	r14,sp,#456
2008bc3d5698SJohn Baldwin	ldmia	r14,{r6,r7,r8,r9,r10,r11,r12,r14}	@ restore constants and variables
2009bc3d5698SJohn Baldwin	b	.Loop_absorb
2010bc3d5698SJohn Baldwin
2011bc3d5698SJohn Baldwin.align	4
2012bc3d5698SJohn Baldwin.Labsorbed:
2013bc3d5698SJohn Baldwin	add	r11,sp,#40
2014bc3d5698SJohn Baldwin	ldmia	sp,      {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2015bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}	@ return A[5][5]
2016bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2017bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2018bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2019bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2020bc3d5698SJohn Baldwin	ldmia	r11!,   {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2021bc3d5698SJohn Baldwin	stmia	r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2022bc3d5698SJohn Baldwin	ldmia	r11,    {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2023bc3d5698SJohn Baldwin	stmia	r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2024bc3d5698SJohn Baldwin
2025bc3d5698SJohn Baldwin.Labsorb_abort:
2026bc3d5698SJohn Baldwin	add	sp,sp,#456+32
2027bc3d5698SJohn Baldwin	mov	r0,r12			@ return value
2028*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5
2029bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2030*c0855eaaSJohn Baldwin#else
2031*c0855eaaSJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
2032*c0855eaaSJohn Baldwin	tst	lr,#1
2033*c0855eaaSJohn Baldwin	moveq	pc,lr		@ be binary compatible with V4, yet
2034*c0855eaaSJohn Baldwin.word	0xe12fff1e		@ interoperable with Thumb ISA:-)
2035*c0855eaaSJohn Baldwin#endif
2036bc3d5698SJohn Baldwin.size	SHA3_absorb,.-SHA3_absorb
2037bc3d5698SJohn Baldwin.globl	SHA3_squeeze
2038bc3d5698SJohn Baldwin.type	SHA3_squeeze,%function
2039bc3d5698SJohn Baldwin.align	5
2040bc3d5698SJohn BaldwinSHA3_squeeze:
2041bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r3-r10,lr}
2042bc3d5698SJohn Baldwin
2043bc3d5698SJohn Baldwin	mov	r10,r0
2044bc3d5698SJohn Baldwin	mov	r4,r1
2045bc3d5698SJohn Baldwin	mov	r5,r2
2046bc3d5698SJohn Baldwin	mov	r12,r3
2047bc3d5698SJohn Baldwin
2048bc3d5698SJohn Baldwin#ifdef	__thumb2__
2049bc3d5698SJohn Baldwin	mov	r9,#0x00ff00ff
2050bc3d5698SJohn Baldwin	mov	r8,#0x0f0f0f0f
2051bc3d5698SJohn Baldwin	mov	r7,#0x33333333
2052bc3d5698SJohn Baldwin	mov	r6,#0x55555555
2053bc3d5698SJohn Baldwin#else
2054bc3d5698SJohn Baldwin	mov	r6,#0x11		@ compose constants
2055bc3d5698SJohn Baldwin	mov	r8,#0x0f
2056bc3d5698SJohn Baldwin	mov	r9,#0xff
2057bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#8
2058bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#8
2059bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#16		@ 0x11111111
2060bc3d5698SJohn Baldwin	orr	r9,r9,r9,lsl#16		@ 0x00ff00ff
2061bc3d5698SJohn Baldwin	orr	r8,r8,r8,lsl#16		@ 0x0f0f0f0f
2062bc3d5698SJohn Baldwin	orr	r7,r6,r6,lsl#1		@ 0x33333333
2063bc3d5698SJohn Baldwin	orr	r6,r6,r6,lsl#2		@ 0x55555555
2064bc3d5698SJohn Baldwin#endif
2065bc3d5698SJohn Baldwin	stmdb	sp!,{r6,r7,r8,r9}
2066bc3d5698SJohn Baldwin
2067bc3d5698SJohn Baldwin	mov	r14,r10
2068bc3d5698SJohn Baldwin	b	.Loop_squeeze
2069bc3d5698SJohn Baldwin
2070bc3d5698SJohn Baldwin.align	4
2071bc3d5698SJohn Baldwin.Loop_squeeze:
2072bc3d5698SJohn Baldwin	ldmia	r10!,{r0,r1}	@ A_flat[i++]
2073bc3d5698SJohn Baldwin
2074bc3d5698SJohn Baldwin	mov	r2,r0,lsl#16
2075bc3d5698SJohn Baldwin	mov	r3,r1,lsl#16		@ r3 = r1 << 16
2076bc3d5698SJohn Baldwin	mov	r2,r2,lsr#16		@ r2 = r0 & 0x0000ffff
2077bc3d5698SJohn Baldwin	mov	r1,r1,lsr#16
2078bc3d5698SJohn Baldwin	mov	r0,r0,lsr#16		@ r0 = r0 >> 16
2079bc3d5698SJohn Baldwin	mov	r1,r1,lsl#16		@ r1 = r1 & 0xffff0000
2080bc3d5698SJohn Baldwin
2081bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#8
2082bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#8
2083bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#8
2084bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#8
2085bc3d5698SJohn Baldwin	and	r2,r2,r9		@ &=0x00ff00ff
2086bc3d5698SJohn Baldwin	and	r3,r3,r9,lsl#8		@ &=0xff00ff00
2087bc3d5698SJohn Baldwin	and	r0,r0,r9		@ &=0x00ff00ff
2088bc3d5698SJohn Baldwin	and	r1,r1,r9,lsl#8		@ &=0xff00ff00
2089bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#4
2090bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#4
2091bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#4
2092bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#4
2093bc3d5698SJohn Baldwin	and	r2,r2,r8		@ &=0x0f0f0f0f
2094bc3d5698SJohn Baldwin	and	r3,r3,r8,lsl#4		@ &=0xf0f0f0f0
2095bc3d5698SJohn Baldwin	and	r0,r0,r8		@ &=0x0f0f0f0f
2096bc3d5698SJohn Baldwin	and	r1,r1,r8,lsl#4		@ &=0xf0f0f0f0
2097bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#2
2098bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#2
2099bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#2
2100bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#2
2101bc3d5698SJohn Baldwin	and	r2,r2,r7		@ &=0x33333333
2102bc3d5698SJohn Baldwin	and	r3,r3,r7,lsl#2		@ &=0xcccccccc
2103bc3d5698SJohn Baldwin	and	r0,r0,r7		@ &=0x33333333
2104bc3d5698SJohn Baldwin	and	r1,r1,r7,lsl#2		@ &=0xcccccccc
2105bc3d5698SJohn Baldwin	orr	r2,r2,r2,lsl#1
2106bc3d5698SJohn Baldwin	orr	r3,r3,r3,lsr#1
2107bc3d5698SJohn Baldwin	orr	r0,r0,r0,lsl#1
2108bc3d5698SJohn Baldwin	orr	r1,r1,r1,lsr#1
2109bc3d5698SJohn Baldwin	and	r2,r2,r6		@ &=0x55555555
2110bc3d5698SJohn Baldwin	and	r3,r3,r6,lsl#1		@ &=0xaaaaaaaa
2111bc3d5698SJohn Baldwin	and	r0,r0,r6		@ &=0x55555555
2112bc3d5698SJohn Baldwin	and	r1,r1,r6,lsl#1		@ &=0xaaaaaaaa
2113bc3d5698SJohn Baldwin
2114bc3d5698SJohn Baldwin	orr	r2,r2,r3
2115bc3d5698SJohn Baldwin	orr	r0,r0,r1
2116bc3d5698SJohn Baldwin
2117bc3d5698SJohn Baldwin	cmp	r5,#8
2118bc3d5698SJohn Baldwin	blo	.Lsqueeze_tail
2119bc3d5698SJohn Baldwin	mov	r1,r2,lsr#8
2120bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2121bc3d5698SJohn Baldwin	mov	r3,r2,lsr#16
2122bc3d5698SJohn Baldwin	strb	r1,[r4],#1
2123bc3d5698SJohn Baldwin	mov	r2,r2,lsr#24
2124bc3d5698SJohn Baldwin	strb	r3,[r4],#1
2125bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2126bc3d5698SJohn Baldwin
2127bc3d5698SJohn Baldwin	mov	r1,r0,lsr#8
2128bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2129bc3d5698SJohn Baldwin	mov	r3,r0,lsr#16
2130bc3d5698SJohn Baldwin	strb	r1,[r4],#1
2131bc3d5698SJohn Baldwin	mov	r0,r0,lsr#24
2132bc3d5698SJohn Baldwin	strb	r3,[r4],#1
2133bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2134bc3d5698SJohn Baldwin	subs	r5,r5,#8
2135bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2136bc3d5698SJohn Baldwin
2137bc3d5698SJohn Baldwin	subs	r12,r12,#8		@ bsz -= 8
2138bc3d5698SJohn Baldwin	bhi	.Loop_squeeze
2139bc3d5698SJohn Baldwin
2140bc3d5698SJohn Baldwin	mov	r0,r14			@ original r10
2141bc3d5698SJohn Baldwin
2142bc3d5698SJohn Baldwin	bl	KeccakF1600
2143bc3d5698SJohn Baldwin
2144bc3d5698SJohn Baldwin	ldmia	sp,{r6,r7,r8,r9,r10,r12}		@ restore constants and variables
2145bc3d5698SJohn Baldwin	mov	r14,r10
2146bc3d5698SJohn Baldwin	b	.Loop_squeeze
2147bc3d5698SJohn Baldwin
2148bc3d5698SJohn Baldwin.align	4
2149bc3d5698SJohn Baldwin.Lsqueeze_tail:
2150bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2151bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2152bc3d5698SJohn Baldwin	subs	r5,r5,#1
2153bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2154bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2155bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2156bc3d5698SJohn Baldwin	subs	r5,r5,#1
2157bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2158bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2159bc3d5698SJohn Baldwin	mov	r2,r2,lsr#8
2160bc3d5698SJohn Baldwin	subs	r5,r5,#1
2161bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2162bc3d5698SJohn Baldwin	strb	r2,[r4],#1
2163bc3d5698SJohn Baldwin	subs	r5,r5,#1
2164bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2165bc3d5698SJohn Baldwin
2166bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2167bc3d5698SJohn Baldwin	mov	r0,r0,lsr#8
2168bc3d5698SJohn Baldwin	subs	r5,r5,#1
2169bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2170bc3d5698SJohn Baldwin	strb	r0,[r4],#1
2171bc3d5698SJohn Baldwin	mov	r0,r0,lsr#8
2172bc3d5698SJohn Baldwin	subs	r5,r5,#1
2173bc3d5698SJohn Baldwin	beq	.Lsqueeze_done
2174bc3d5698SJohn Baldwin	strb	r0,[r4]
2175bc3d5698SJohn Baldwin	b	.Lsqueeze_done
2176bc3d5698SJohn Baldwin
2177bc3d5698SJohn Baldwin.align	4
2178bc3d5698SJohn Baldwin.Lsqueeze_done:
2179bc3d5698SJohn Baldwin	add	sp,sp,#24
2180*c0855eaaSJohn Baldwin#if __ARM_ARCH__>=5
2181bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2182*c0855eaaSJohn Baldwin#else
2183*c0855eaaSJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
2184*c0855eaaSJohn Baldwin	tst	lr,#1
2185*c0855eaaSJohn Baldwin	moveq	pc,lr		@ be binary compatible with V4, yet
2186*c0855eaaSJohn Baldwin.word	0xe12fff1e		@ interoperable with Thumb ISA:-)
2187*c0855eaaSJohn Baldwin#endif
2188bc3d5698SJohn Baldwin.size	SHA3_squeeze,.-SHA3_squeeze
2189bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
2190bc3d5698SJohn Baldwin.fpu	neon
2191bc3d5698SJohn Baldwin
2192bc3d5698SJohn Baldwin.type	iotas64, %object
2193bc3d5698SJohn Baldwin.align	5
2194bc3d5698SJohn Baldwiniotas64:
2195bc3d5698SJohn Baldwin.quad	0x0000000000000001
2196bc3d5698SJohn Baldwin.quad	0x0000000000008082
2197bc3d5698SJohn Baldwin.quad	0x800000000000808a
2198bc3d5698SJohn Baldwin.quad	0x8000000080008000
2199bc3d5698SJohn Baldwin.quad	0x000000000000808b
2200bc3d5698SJohn Baldwin.quad	0x0000000080000001
2201bc3d5698SJohn Baldwin.quad	0x8000000080008081
2202bc3d5698SJohn Baldwin.quad	0x8000000000008009
2203bc3d5698SJohn Baldwin.quad	0x000000000000008a
2204bc3d5698SJohn Baldwin.quad	0x0000000000000088
2205bc3d5698SJohn Baldwin.quad	0x0000000080008009
2206bc3d5698SJohn Baldwin.quad	0x000000008000000a
2207bc3d5698SJohn Baldwin.quad	0x000000008000808b
2208bc3d5698SJohn Baldwin.quad	0x800000000000008b
2209bc3d5698SJohn Baldwin.quad	0x8000000000008089
2210bc3d5698SJohn Baldwin.quad	0x8000000000008003
2211bc3d5698SJohn Baldwin.quad	0x8000000000008002
2212bc3d5698SJohn Baldwin.quad	0x8000000000000080
2213bc3d5698SJohn Baldwin.quad	0x000000000000800a
2214bc3d5698SJohn Baldwin.quad	0x800000008000000a
2215bc3d5698SJohn Baldwin.quad	0x8000000080008081
2216bc3d5698SJohn Baldwin.quad	0x8000000000008080
2217bc3d5698SJohn Baldwin.quad	0x0000000080000001
2218bc3d5698SJohn Baldwin.quad	0x8000000080008008
2219bc3d5698SJohn Baldwin.size	iotas64,.-iotas64
2220bc3d5698SJohn Baldwin
2221bc3d5698SJohn Baldwin.type	KeccakF1600_neon, %function
2222bc3d5698SJohn Baldwin.align	5
2223bc3d5698SJohn BaldwinKeccakF1600_neon:
2224bc3d5698SJohn Baldwin	add	r1, r0, #16
2225bc3d5698SJohn Baldwin	adr	r2, iotas64
2226bc3d5698SJohn Baldwin	mov	r3, #24			@ loop counter
2227bc3d5698SJohn Baldwin	b	.Loop_neon
2228bc3d5698SJohn Baldwin
2229bc3d5698SJohn Baldwin.align	4
2230bc3d5698SJohn Baldwin.Loop_neon:
2231bc3d5698SJohn Baldwin	@ Theta
2232bc3d5698SJohn Baldwin	vst1.64	{q4},  [r0,:64]		@ offload A[0..1][4]
2233bc3d5698SJohn Baldwin	veor	q13, q0,  q5		@ A[0..1][0]^A[2..3][0]
2234bc3d5698SJohn Baldwin	vst1.64	{d18}, [r1,:64]		@ offload A[2][4]
2235bc3d5698SJohn Baldwin	veor	q14, q1,  q6		@ A[0..1][1]^A[2..3][1]
2236bc3d5698SJohn Baldwin	veor	q15, q2,  q7		@ A[0..1][2]^A[2..3][2]
2237bc3d5698SJohn Baldwin	veor	d26, d26, d27		@ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2238bc3d5698SJohn Baldwin	veor	d27, d28, d29		@ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2239bc3d5698SJohn Baldwin	veor	q14, q3,  q8		@ A[0..1][3]^A[2..3][3]
2240bc3d5698SJohn Baldwin	veor	q4,  q4,  q9		@ A[0..1][4]^A[2..3][4]
2241bc3d5698SJohn Baldwin	veor	d30, d30, d31		@ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2242bc3d5698SJohn Baldwin	veor	d31, d28, d29		@ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2243bc3d5698SJohn Baldwin	veor	d25, d8,  d9		@ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2244bc3d5698SJohn Baldwin	veor	q13, q13, q10		@ C[0..1]^=A[4][0..1]
2245bc3d5698SJohn Baldwin	veor	q14, q15, q11		@ C[2..3]^=A[4][2..3]
2246bc3d5698SJohn Baldwin	veor	d25, d25, d24		@ C[4]^=A[4][4]
2247bc3d5698SJohn Baldwin
2248bc3d5698SJohn Baldwin	vadd.u64	q4,  q13, q13		@ C[0..1]<<1
2249bc3d5698SJohn Baldwin	vadd.u64	q15, q14, q14		@ C[2..3]<<1
2250bc3d5698SJohn Baldwin	vadd.u64	d18, d25, d25		@ C[4]<<1
2251bc3d5698SJohn Baldwin	vsri.u64	q4,  q13, #63		@ ROL64(C[0..1],1)
2252bc3d5698SJohn Baldwin	vsri.u64	q15, q14, #63		@ ROL64(C[2..3],1)
2253bc3d5698SJohn Baldwin	vsri.u64	d18, d25, #63		@ ROL64(C[4],1)
2254bc3d5698SJohn Baldwin	veor	d25, d25, d9		@ D[0] = C[4] ^= ROL64(C[1],1)
2255bc3d5698SJohn Baldwin	veor	q13, q13, q15		@ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2256bc3d5698SJohn Baldwin	veor	d28, d28, d18		@ D[3] = C[2] ^= ROL64(C[4],1)
2257bc3d5698SJohn Baldwin	veor	d29, d29, d8		@ D[4] = C[3] ^= ROL64(C[0],1)
2258bc3d5698SJohn Baldwin
2259bc3d5698SJohn Baldwin	veor	d0,  d0,  d25		@ A[0][0] ^= C[4]
2260bc3d5698SJohn Baldwin	veor	d1,  d1,  d25		@ A[1][0] ^= C[4]
2261bc3d5698SJohn Baldwin	veor	d10, d10, d25		@ A[2][0] ^= C[4]
2262bc3d5698SJohn Baldwin	veor	d11, d11, d25		@ A[3][0] ^= C[4]
2263bc3d5698SJohn Baldwin	veor	d20, d20, d25		@ A[4][0] ^= C[4]
2264bc3d5698SJohn Baldwin
2265bc3d5698SJohn Baldwin	veor	d2,  d2,  d26		@ A[0][1] ^= D[1]
2266bc3d5698SJohn Baldwin	veor	d3,  d3,  d26		@ A[1][1] ^= D[1]
2267bc3d5698SJohn Baldwin	veor	d12, d12, d26		@ A[2][1] ^= D[1]
2268bc3d5698SJohn Baldwin	veor	d13, d13, d26		@ A[3][1] ^= D[1]
2269bc3d5698SJohn Baldwin	veor	d21, d21, d26		@ A[4][1] ^= D[1]
2270bc3d5698SJohn Baldwin	vmov	d26, d27
2271bc3d5698SJohn Baldwin
2272bc3d5698SJohn Baldwin	veor	d6,  d6,  d28		@ A[0][3] ^= C[2]
2273bc3d5698SJohn Baldwin	veor	d7,  d7,  d28		@ A[1][3] ^= C[2]
2274bc3d5698SJohn Baldwin	veor	d16, d16, d28		@ A[2][3] ^= C[2]
2275bc3d5698SJohn Baldwin	veor	d17, d17, d28		@ A[3][3] ^= C[2]
2276bc3d5698SJohn Baldwin	veor	d23, d23, d28		@ A[4][3] ^= C[2]
2277bc3d5698SJohn Baldwin	vld1.64	{q4},  [r0,:64]		@ restore A[0..1][4]
2278bc3d5698SJohn Baldwin	vmov	d28, d29
2279bc3d5698SJohn Baldwin
2280bc3d5698SJohn Baldwin	vld1.64	{d18}, [r1,:64]		@ restore A[2][4]
2281bc3d5698SJohn Baldwin	veor	q2,  q2,  q13		@ A[0..1][2] ^= D[2]
2282bc3d5698SJohn Baldwin	veor	q7,  q7,  q13		@ A[2..3][2] ^= D[2]
2283bc3d5698SJohn Baldwin	veor	d22, d22, d27		@ A[4][2]    ^= D[2]
2284bc3d5698SJohn Baldwin
2285bc3d5698SJohn Baldwin	veor	q4,  q4,  q14		@ A[0..1][4] ^= C[3]
2286bc3d5698SJohn Baldwin	veor	q9,  q9,  q14		@ A[2..3][4] ^= C[3]
2287bc3d5698SJohn Baldwin	veor	d24, d24, d29		@ A[4][4]    ^= C[3]
2288bc3d5698SJohn Baldwin
2289bc3d5698SJohn Baldwin	@ Rho + Pi
2290bc3d5698SJohn Baldwin	vmov	d26, d2			@ C[1] = A[0][1]
2291bc3d5698SJohn Baldwin	vshl.u64	d2,  d3,  #44
2292bc3d5698SJohn Baldwin	vmov	d27, d4			@ C[2] = A[0][2]
2293bc3d5698SJohn Baldwin	vshl.u64	d4,  d14, #43
2294bc3d5698SJohn Baldwin	vmov	d28, d6			@ C[3] = A[0][3]
2295bc3d5698SJohn Baldwin	vshl.u64	d6,  d17, #21
2296bc3d5698SJohn Baldwin	vmov	d29, d8			@ C[4] = A[0][4]
2297bc3d5698SJohn Baldwin	vshl.u64	d8,  d24, #14
2298bc3d5698SJohn Baldwin	vsri.u64	d2,  d3,  #64-44	@ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2299bc3d5698SJohn Baldwin	vsri.u64	d4,  d14, #64-43	@ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2300bc3d5698SJohn Baldwin	vsri.u64	d6,  d17, #64-21	@ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2301bc3d5698SJohn Baldwin	vsri.u64	d8,  d24, #64-14	@ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2302bc3d5698SJohn Baldwin
2303bc3d5698SJohn Baldwin	vshl.u64	d3,  d9,  #20
2304bc3d5698SJohn Baldwin	vshl.u64	d14, d16, #25
2305bc3d5698SJohn Baldwin	vshl.u64	d17, d15, #15
2306bc3d5698SJohn Baldwin	vshl.u64	d24, d21, #2
2307bc3d5698SJohn Baldwin	vsri.u64	d3,  d9,  #64-20	@ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2308bc3d5698SJohn Baldwin	vsri.u64	d14, d16, #64-25	@ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2309bc3d5698SJohn Baldwin	vsri.u64	d17, d15, #64-15	@ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2310bc3d5698SJohn Baldwin	vsri.u64	d24, d21, #64-2		@ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2311bc3d5698SJohn Baldwin
2312bc3d5698SJohn Baldwin	vshl.u64	d9,  d22, #61
2313bc3d5698SJohn Baldwin	@ vshl.u64	d16, d19, #8
2314bc3d5698SJohn Baldwin	vshl.u64	d15, d12, #10
2315bc3d5698SJohn Baldwin	vshl.u64	d21, d7,  #55
2316bc3d5698SJohn Baldwin	vsri.u64	d9,  d22, #64-61	@ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2317bc3d5698SJohn Baldwin	vext.8	d16, d19, d19, #8-1	@ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2318bc3d5698SJohn Baldwin	vsri.u64	d15, d12, #64-10	@ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2319bc3d5698SJohn Baldwin	vsri.u64	d21, d7,  #64-55	@ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2320bc3d5698SJohn Baldwin
2321bc3d5698SJohn Baldwin	vshl.u64	d22, d18, #39
2322bc3d5698SJohn Baldwin	@ vshl.u64	d19, d23, #56
2323bc3d5698SJohn Baldwin	vshl.u64	d12, d5,  #6
2324bc3d5698SJohn Baldwin	vshl.u64	d7,  d13, #45
2325bc3d5698SJohn Baldwin	vsri.u64	d22, d18, #64-39	@ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2326bc3d5698SJohn Baldwin	vext.8	d19, d23, d23, #8-7	@ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2327bc3d5698SJohn Baldwin	vsri.u64	d12, d5,  #64-6		@ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2328bc3d5698SJohn Baldwin	vsri.u64	d7,  d13, #64-45	@ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2329bc3d5698SJohn Baldwin
2330bc3d5698SJohn Baldwin	vshl.u64	d18, d20, #18
2331bc3d5698SJohn Baldwin	vshl.u64	d23, d11, #41
2332bc3d5698SJohn Baldwin	vshl.u64	d5,  d10, #3
2333bc3d5698SJohn Baldwin	vshl.u64	d13, d1,  #36
2334bc3d5698SJohn Baldwin	vsri.u64	d18, d20, #64-18	@ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2335bc3d5698SJohn Baldwin	vsri.u64	d23, d11, #64-41	@ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2336bc3d5698SJohn Baldwin	vsri.u64	d5,  d10, #64-3		@ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2337bc3d5698SJohn Baldwin	vsri.u64	d13, d1,  #64-36	@ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2338bc3d5698SJohn Baldwin
2339bc3d5698SJohn Baldwin	vshl.u64	d1,  d28, #28
2340bc3d5698SJohn Baldwin	vshl.u64	d10, d26, #1
2341bc3d5698SJohn Baldwin	vshl.u64	d11, d29, #27
2342bc3d5698SJohn Baldwin	vshl.u64	d20, d27, #62
2343bc3d5698SJohn Baldwin	vsri.u64	d1,  d28, #64-28	@ A[1][0] = ROL64(C[3],    rhotates[0][3])
2344bc3d5698SJohn Baldwin	vsri.u64	d10, d26, #64-1		@ A[2][0] = ROL64(C[1],    rhotates[0][1])
2345bc3d5698SJohn Baldwin	vsri.u64	d11, d29, #64-27	@ A[3][0] = ROL64(C[4],    rhotates[0][4])
2346bc3d5698SJohn Baldwin	vsri.u64	d20, d27, #64-62	@ A[4][0] = ROL64(C[2],    rhotates[0][2])
2347bc3d5698SJohn Baldwin
2348bc3d5698SJohn Baldwin	@ Chi + Iota
2349bc3d5698SJohn Baldwin	vbic	q13, q2,  q1
2350bc3d5698SJohn Baldwin	vbic	q14, q3,  q2
2351bc3d5698SJohn Baldwin	vbic	q15, q4,  q3
2352bc3d5698SJohn Baldwin	veor	q13, q13, q0		@ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2353bc3d5698SJohn Baldwin	veor	q14, q14, q1		@ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2354bc3d5698SJohn Baldwin	veor	q2,  q2,  q15		@ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2355bc3d5698SJohn Baldwin	vst1.64	{q13}, [r0,:64]		@ offload A[0..1][0]
2356bc3d5698SJohn Baldwin	vbic	q13, q0,  q4
2357bc3d5698SJohn Baldwin	vbic	q15, q1,  q0
2358bc3d5698SJohn Baldwin	vmov	q1,  q14		@ A[0..1][1]
2359bc3d5698SJohn Baldwin	veor	q3,  q3,  q13		@ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2360bc3d5698SJohn Baldwin	veor	q4,  q4,  q15		@ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2361bc3d5698SJohn Baldwin
2362bc3d5698SJohn Baldwin	vbic	q13, q7,  q6
2363bc3d5698SJohn Baldwin	vmov	q0,  q5			@ A[2..3][0]
2364bc3d5698SJohn Baldwin	vbic	q14, q8,  q7
2365bc3d5698SJohn Baldwin	vmov	q15, q6			@ A[2..3][1]
2366bc3d5698SJohn Baldwin	veor	q5,  q5,  q13		@ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2367bc3d5698SJohn Baldwin	vbic	q13, q9,  q8
2368bc3d5698SJohn Baldwin	veor	q6,  q6,  q14		@ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2369bc3d5698SJohn Baldwin	vbic	q14, q0,  q9
2370bc3d5698SJohn Baldwin	veor	q7,  q7,  q13		@ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2371bc3d5698SJohn Baldwin	vbic	q13, q15, q0
2372bc3d5698SJohn Baldwin	veor	q8,  q8,  q14		@ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2373bc3d5698SJohn Baldwin	vmov	q14, q10		@ A[4][0..1]
2374bc3d5698SJohn Baldwin	veor	q9,  q9,  q13		@ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2375bc3d5698SJohn Baldwin
2376bc3d5698SJohn Baldwin	vld1.64	d25, [r2,:64]!		@ Iota[i++]
2377bc3d5698SJohn Baldwin	vbic	d26, d22, d21
2378bc3d5698SJohn Baldwin	vbic	d27, d23, d22
2379bc3d5698SJohn Baldwin	vld1.64	{q0}, [r0,:64]		@ restore A[0..1][0]
2380bc3d5698SJohn Baldwin	veor	d20, d20, d26		@ A[4][0] ^= (~A[4][1] & A[4][2])
2381bc3d5698SJohn Baldwin	vbic	d26, d24, d23
2382bc3d5698SJohn Baldwin	veor	d21, d21, d27		@ A[4][1] ^= (~A[4][2] & A[4][3])
2383bc3d5698SJohn Baldwin	vbic	d27, d28, d24
2384bc3d5698SJohn Baldwin	veor	d22, d22, d26		@ A[4][2] ^= (~A[4][3] & A[4][4])
2385bc3d5698SJohn Baldwin	vbic	d26, d29, d28
2386bc3d5698SJohn Baldwin	veor	d23, d23, d27		@ A[4][3] ^= (~A[4][4] & A[4][0])
2387bc3d5698SJohn Baldwin	veor	d0,  d0,  d25		@ A[0][0] ^= Iota[i]
2388bc3d5698SJohn Baldwin	veor	d24, d24, d26		@ A[4][4] ^= (~A[4][0] & A[4][1])
2389bc3d5698SJohn Baldwin
2390bc3d5698SJohn Baldwin	subs	r3, r3, #1
2391bc3d5698SJohn Baldwin	bne	.Loop_neon
2392bc3d5698SJohn Baldwin
2393*c0855eaaSJohn Baldwin	bx	lr
2394bc3d5698SJohn Baldwin.size	KeccakF1600_neon,.-KeccakF1600_neon
2395bc3d5698SJohn Baldwin
2396bc3d5698SJohn Baldwin.globl	SHA3_absorb_neon
2397bc3d5698SJohn Baldwin.type	SHA3_absorb_neon, %function
2398bc3d5698SJohn Baldwin.align	5
2399bc3d5698SJohn BaldwinSHA3_absorb_neon:
2400bc3d5698SJohn Baldwin	stmdb	sp!, {r4,r5,r6,lr}
2401bc3d5698SJohn Baldwin	vstmdb	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2402bc3d5698SJohn Baldwin
2403bc3d5698SJohn Baldwin	mov	r4, r1			@ inp
2404bc3d5698SJohn Baldwin	mov	r5, r2			@ len
2405bc3d5698SJohn Baldwin	mov	r6, r3			@ bsz
2406bc3d5698SJohn Baldwin
2407bc3d5698SJohn Baldwin	vld1.32	{d0}, [r0,:64]!		@ A[0][0]
2408bc3d5698SJohn Baldwin	vld1.32	{d2}, [r0,:64]!		@ A[0][1]
2409bc3d5698SJohn Baldwin	vld1.32	{d4}, [r0,:64]!		@ A[0][2]
2410bc3d5698SJohn Baldwin	vld1.32	{d6}, [r0,:64]!		@ A[0][3]
2411bc3d5698SJohn Baldwin	vld1.32	{d8}, [r0,:64]!		@ A[0][4]
2412bc3d5698SJohn Baldwin
2413bc3d5698SJohn Baldwin	vld1.32	{d1}, [r0,:64]!		@ A[1][0]
2414bc3d5698SJohn Baldwin	vld1.32	{d3}, [r0,:64]!		@ A[1][1]
2415bc3d5698SJohn Baldwin	vld1.32	{d5}, [r0,:64]!		@ A[1][2]
2416bc3d5698SJohn Baldwin	vld1.32	{d7}, [r0,:64]!		@ A[1][3]
2417bc3d5698SJohn Baldwin	vld1.32	{d9}, [r0,:64]!		@ A[1][4]
2418bc3d5698SJohn Baldwin
2419bc3d5698SJohn Baldwin	vld1.32	{d10}, [r0,:64]!		@ A[2][0]
2420bc3d5698SJohn Baldwin	vld1.32	{d12}, [r0,:64]!		@ A[2][1]
2421bc3d5698SJohn Baldwin	vld1.32	{d14}, [r0,:64]!		@ A[2][2]
2422bc3d5698SJohn Baldwin	vld1.32	{d16}, [r0,:64]!		@ A[2][3]
2423bc3d5698SJohn Baldwin	vld1.32	{d18}, [r0,:64]!		@ A[2][4]
2424bc3d5698SJohn Baldwin
2425bc3d5698SJohn Baldwin	vld1.32	{d11}, [r0,:64]!		@ A[3][0]
2426bc3d5698SJohn Baldwin	vld1.32	{d13}, [r0,:64]!		@ A[3][1]
2427bc3d5698SJohn Baldwin	vld1.32	{d15}, [r0,:64]!		@ A[3][2]
2428bc3d5698SJohn Baldwin	vld1.32	{d17}, [r0,:64]!		@ A[3][3]
2429bc3d5698SJohn Baldwin	vld1.32	{d19}, [r0,:64]!		@ A[3][4]
2430bc3d5698SJohn Baldwin
2431bc3d5698SJohn Baldwin	vld1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..3]
2432bc3d5698SJohn Baldwin	vld1.32	{d24}, [r0,:64]		@ A[4][4]
2433bc3d5698SJohn Baldwin	sub	r0, r0, #24*8		@ rewind
2434bc3d5698SJohn Baldwin	b	.Loop_absorb_neon
2435bc3d5698SJohn Baldwin
2436bc3d5698SJohn Baldwin.align	4
2437bc3d5698SJohn Baldwin.Loop_absorb_neon:
2438bc3d5698SJohn Baldwin	subs	r12, r5, r6		@ len - bsz
2439bc3d5698SJohn Baldwin	blo	.Labsorbed_neon
2440bc3d5698SJohn Baldwin	mov	r5, r12
2441bc3d5698SJohn Baldwin
2442bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!		@ endian-neutral loads...
2443bc3d5698SJohn Baldwin	cmp	r6, #8*2
2444bc3d5698SJohn Baldwin	veor	d0, d0, d31		@ A[0][0] ^= *inp++
2445bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2446bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2447bc3d5698SJohn Baldwin	veor	d2, d2, d31		@ A[0][1] ^= *inp++
2448bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2449bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2450bc3d5698SJohn Baldwin	cmp	r6, #8*4
2451bc3d5698SJohn Baldwin	veor	d4, d4, d31		@ A[0][2] ^= *inp++
2452bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2453bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2454bc3d5698SJohn Baldwin	veor	d6, d6, d31		@ A[0][3] ^= *inp++
2455bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2456bc3d5698SJohn Baldwin	vld1.8	{d31},[r4]!
2457bc3d5698SJohn Baldwin	cmp	r6, #8*6
2458bc3d5698SJohn Baldwin	veor	d8, d8, d31		@ A[0][4] ^= *inp++
2459bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2460bc3d5698SJohn Baldwin
2461bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2462bc3d5698SJohn Baldwin	veor	d1, d1, d31		@ A[1][0] ^= *inp++
2463bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2464bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2465bc3d5698SJohn Baldwin	cmp	r6, #8*8
2466bc3d5698SJohn Baldwin	veor	d3, d3, d31		@ A[1][1] ^= *inp++
2467bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2468bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2469bc3d5698SJohn Baldwin	veor	d5, d5, d31		@ A[1][2] ^= *inp++
2470bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2471bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2472bc3d5698SJohn Baldwin	cmp	r6, #8*10
2473bc3d5698SJohn Baldwin	veor	d7, d7, d31		@ A[1][3] ^= *inp++
2474bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2475bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2476bc3d5698SJohn Baldwin	veor	d9, d9, d31		@ A[1][4] ^= *inp++
2477bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2478bc3d5698SJohn Baldwin
2479bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2480bc3d5698SJohn Baldwin	cmp	r6, #8*12
2481bc3d5698SJohn Baldwin	veor	d10, d10, d31		@ A[2][0] ^= *inp++
2482bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2483bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2484bc3d5698SJohn Baldwin	veor	d12, d12, d31		@ A[2][1] ^= *inp++
2485bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2486bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2487bc3d5698SJohn Baldwin	cmp	r6, #8*14
2488bc3d5698SJohn Baldwin	veor	d14, d14, d31		@ A[2][2] ^= *inp++
2489bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2490bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2491bc3d5698SJohn Baldwin	veor	d16, d16, d31		@ A[2][3] ^= *inp++
2492bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2493bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2494bc3d5698SJohn Baldwin	cmp	r6, #8*16
2495bc3d5698SJohn Baldwin	veor	d18, d18, d31		@ A[2][4] ^= *inp++
2496bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2497bc3d5698SJohn Baldwin
2498bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2499bc3d5698SJohn Baldwin	veor	d11, d11, d31		@ A[3][0] ^= *inp++
2500bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2501bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2502bc3d5698SJohn Baldwin	cmp	r6, #8*18
2503bc3d5698SJohn Baldwin	veor	d13, d13, d31		@ A[3][1] ^= *inp++
2504bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2505bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2506bc3d5698SJohn Baldwin	veor	d15, d15, d31		@ A[3][2] ^= *inp++
2507bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2508bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2509bc3d5698SJohn Baldwin	cmp	r6, #8*20
2510bc3d5698SJohn Baldwin	veor	d17, d17, d31		@ A[3][3] ^= *inp++
2511bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2512bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2513bc3d5698SJohn Baldwin	veor	d19, d19, d31		@ A[3][4] ^= *inp++
2514bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2515bc3d5698SJohn Baldwin
2516bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2517bc3d5698SJohn Baldwin	cmp	r6, #8*22
2518bc3d5698SJohn Baldwin	veor	d20, d20, d31		@ A[4][0] ^= *inp++
2519bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2520bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2521bc3d5698SJohn Baldwin	veor	d21, d21, d31		@ A[4][1] ^= *inp++
2522bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2523bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2524bc3d5698SJohn Baldwin	cmp	r6, #8*24
2525bc3d5698SJohn Baldwin	veor	d22, d22, d31		@ A[4][2] ^= *inp++
2526bc3d5698SJohn Baldwin	blo	.Lprocess_neon
2527bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2528bc3d5698SJohn Baldwin	veor	d23, d23, d31		@ A[4][3] ^= *inp++
2529bc3d5698SJohn Baldwin	beq	.Lprocess_neon
2530bc3d5698SJohn Baldwin	vld1.8	{d31}, [r4]!
2531bc3d5698SJohn Baldwin	veor	d24, d24, d31		@ A[4][4] ^= *inp++
2532bc3d5698SJohn Baldwin
2533bc3d5698SJohn Baldwin.Lprocess_neon:
2534bc3d5698SJohn Baldwin	bl	KeccakF1600_neon
2535bc3d5698SJohn Baldwin	b	.Loop_absorb_neon
2536bc3d5698SJohn Baldwin
2537bc3d5698SJohn Baldwin.align	4
2538bc3d5698SJohn Baldwin.Labsorbed_neon:
2539bc3d5698SJohn Baldwin	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2540bc3d5698SJohn Baldwin	vst1.32	{d2}, [r0,:64]!
2541bc3d5698SJohn Baldwin	vst1.32	{d4}, [r0,:64]!
2542bc3d5698SJohn Baldwin	vst1.32	{d6}, [r0,:64]!
2543bc3d5698SJohn Baldwin	vst1.32	{d8}, [r0,:64]!
2544bc3d5698SJohn Baldwin
2545bc3d5698SJohn Baldwin	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2546bc3d5698SJohn Baldwin	vst1.32	{d3}, [r0,:64]!
2547bc3d5698SJohn Baldwin	vst1.32	{d5}, [r0,:64]!
2548bc3d5698SJohn Baldwin	vst1.32	{d7}, [r0,:64]!
2549bc3d5698SJohn Baldwin	vst1.32	{d9}, [r0,:64]!
2550bc3d5698SJohn Baldwin
2551bc3d5698SJohn Baldwin	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2552bc3d5698SJohn Baldwin	vst1.32	{d12}, [r0,:64]!
2553bc3d5698SJohn Baldwin	vst1.32	{d14}, [r0,:64]!
2554bc3d5698SJohn Baldwin	vst1.32	{d16}, [r0,:64]!
2555bc3d5698SJohn Baldwin	vst1.32	{d18}, [r0,:64]!
2556bc3d5698SJohn Baldwin
2557bc3d5698SJohn Baldwin	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2558bc3d5698SJohn Baldwin	vst1.32	{d13}, [r0,:64]!
2559bc3d5698SJohn Baldwin	vst1.32	{d15}, [r0,:64]!
2560bc3d5698SJohn Baldwin	vst1.32	{d17}, [r0,:64]!
2561bc3d5698SJohn Baldwin	vst1.32	{d19}, [r0,:64]!
2562bc3d5698SJohn Baldwin
2563bc3d5698SJohn Baldwin	vst1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2564bc3d5698SJohn Baldwin	vst1.32	{d24}, [r0,:64]
2565bc3d5698SJohn Baldwin
2566bc3d5698SJohn Baldwin	mov	r0, r5			@ return value
2567bc3d5698SJohn Baldwin	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2568bc3d5698SJohn Baldwin	ldmia	sp!, {r4,r5,r6,pc}
2569bc3d5698SJohn Baldwin.size	SHA3_absorb_neon,.-SHA3_absorb_neon
2570bc3d5698SJohn Baldwin
2571bc3d5698SJohn Baldwin.globl	SHA3_squeeze_neon
2572bc3d5698SJohn Baldwin.type	SHA3_squeeze_neon, %function
2573bc3d5698SJohn Baldwin.align	5
2574bc3d5698SJohn BaldwinSHA3_squeeze_neon:
2575bc3d5698SJohn Baldwin	stmdb	sp!, {r4,r5,r6,lr}
2576bc3d5698SJohn Baldwin
2577bc3d5698SJohn Baldwin	mov	r4, r1			@ out
2578bc3d5698SJohn Baldwin	mov	r5, r2			@ len
2579bc3d5698SJohn Baldwin	mov	r6, r3			@ bsz
2580bc3d5698SJohn Baldwin	mov	r12, r0			@ A_flat
2581bc3d5698SJohn Baldwin	mov	r14, r3			@ bsz
2582bc3d5698SJohn Baldwin	b	.Loop_squeeze_neon
2583bc3d5698SJohn Baldwin
2584bc3d5698SJohn Baldwin.align	4
2585bc3d5698SJohn Baldwin.Loop_squeeze_neon:
2586bc3d5698SJohn Baldwin	cmp	r5, #8
2587bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_tail
2588bc3d5698SJohn Baldwin	vld1.32	{d0}, [r12]!
2589bc3d5698SJohn Baldwin	vst1.8	{d0}, [r4]!		@ endian-neutral store
2590bc3d5698SJohn Baldwin
2591bc3d5698SJohn Baldwin	subs	r5, r5, #8		@ len -= 8
2592bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2593bc3d5698SJohn Baldwin
2594bc3d5698SJohn Baldwin	subs	r14, r14, #8		@ bsz -= 8
2595bc3d5698SJohn Baldwin	bhi	.Loop_squeeze_neon
2596bc3d5698SJohn Baldwin
2597bc3d5698SJohn Baldwin	vstmdb	sp!,  {d8,d9,d10,d11,d12,d13,d14,d15}
2598bc3d5698SJohn Baldwin
2599bc3d5698SJohn Baldwin	vld1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2600bc3d5698SJohn Baldwin	vld1.32	{d2}, [r0,:64]!
2601bc3d5698SJohn Baldwin	vld1.32	{d4}, [r0,:64]!
2602bc3d5698SJohn Baldwin	vld1.32	{d6}, [r0,:64]!
2603bc3d5698SJohn Baldwin	vld1.32	{d8}, [r0,:64]!
2604bc3d5698SJohn Baldwin
2605bc3d5698SJohn Baldwin	vld1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2606bc3d5698SJohn Baldwin	vld1.32	{d3}, [r0,:64]!
2607bc3d5698SJohn Baldwin	vld1.32	{d5}, [r0,:64]!
2608bc3d5698SJohn Baldwin	vld1.32	{d7}, [r0,:64]!
2609bc3d5698SJohn Baldwin	vld1.32	{d9}, [r0,:64]!
2610bc3d5698SJohn Baldwin
2611bc3d5698SJohn Baldwin	vld1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2612bc3d5698SJohn Baldwin	vld1.32	{d12}, [r0,:64]!
2613bc3d5698SJohn Baldwin	vld1.32	{d14}, [r0,:64]!
2614bc3d5698SJohn Baldwin	vld1.32	{d16}, [r0,:64]!
2615bc3d5698SJohn Baldwin	vld1.32	{d18}, [r0,:64]!
2616bc3d5698SJohn Baldwin
2617bc3d5698SJohn Baldwin	vld1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2618bc3d5698SJohn Baldwin	vld1.32	{d13}, [r0,:64]!
2619bc3d5698SJohn Baldwin	vld1.32	{d15}, [r0,:64]!
2620bc3d5698SJohn Baldwin	vld1.32	{d17}, [r0,:64]!
2621bc3d5698SJohn Baldwin	vld1.32	{d19}, [r0,:64]!
2622bc3d5698SJohn Baldwin
2623bc3d5698SJohn Baldwin	vld1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2624bc3d5698SJohn Baldwin	vld1.32	{d24}, [r0,:64]
2625bc3d5698SJohn Baldwin	sub	r0, r0, #24*8		@ rewind
2626bc3d5698SJohn Baldwin
2627bc3d5698SJohn Baldwin	bl	KeccakF1600_neon
2628bc3d5698SJohn Baldwin
2629bc3d5698SJohn Baldwin	mov	r12, r0			@ A_flat
2630bc3d5698SJohn Baldwin	vst1.32	{d0}, [r0,:64]!		@ A[0][0..4]
2631bc3d5698SJohn Baldwin	vst1.32	{d2}, [r0,:64]!
2632bc3d5698SJohn Baldwin	vst1.32	{d4}, [r0,:64]!
2633bc3d5698SJohn Baldwin	vst1.32	{d6}, [r0,:64]!
2634bc3d5698SJohn Baldwin	vst1.32	{d8}, [r0,:64]!
2635bc3d5698SJohn Baldwin
2636bc3d5698SJohn Baldwin	vst1.32	{d1}, [r0,:64]!		@ A[1][0..4]
2637bc3d5698SJohn Baldwin	vst1.32	{d3}, [r0,:64]!
2638bc3d5698SJohn Baldwin	vst1.32	{d5}, [r0,:64]!
2639bc3d5698SJohn Baldwin	vst1.32	{d7}, [r0,:64]!
2640bc3d5698SJohn Baldwin	vst1.32	{d9}, [r0,:64]!
2641bc3d5698SJohn Baldwin
2642bc3d5698SJohn Baldwin	vst1.32	{d10}, [r0,:64]!		@ A[2][0..4]
2643bc3d5698SJohn Baldwin	vst1.32	{d12}, [r0,:64]!
2644bc3d5698SJohn Baldwin	vst1.32	{d14}, [r0,:64]!
2645bc3d5698SJohn Baldwin	vst1.32	{d16}, [r0,:64]!
2646bc3d5698SJohn Baldwin	vst1.32	{d18}, [r0,:64]!
2647bc3d5698SJohn Baldwin
2648bc3d5698SJohn Baldwin	vst1.32	{d11}, [r0,:64]!		@ A[3][0..4]
2649bc3d5698SJohn Baldwin	vst1.32	{d13}, [r0,:64]!
2650bc3d5698SJohn Baldwin	vst1.32	{d15}, [r0,:64]!
2651bc3d5698SJohn Baldwin	vst1.32	{d17}, [r0,:64]!
2652bc3d5698SJohn Baldwin	vst1.32	{d19}, [r0,:64]!
2653bc3d5698SJohn Baldwin
2654bc3d5698SJohn Baldwin	vst1.32	{d20,d21,d22,d23}, [r0,:64]!	@ A[4][0..4]
2655bc3d5698SJohn Baldwin	mov	r14, r6			@ bsz
2656bc3d5698SJohn Baldwin	vst1.32	{d24}, [r0,:64]
2657bc3d5698SJohn Baldwin	mov	r0,  r12		@ rewind
2658bc3d5698SJohn Baldwin
2659bc3d5698SJohn Baldwin	vldmia	sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2660bc3d5698SJohn Baldwin	b	.Loop_squeeze_neon
2661bc3d5698SJohn Baldwin
2662bc3d5698SJohn Baldwin.align	4
2663bc3d5698SJohn Baldwin.Lsqueeze_neon_tail:
2664bc3d5698SJohn Baldwin	ldmia	r12, {r2,r3}
2665bc3d5698SJohn Baldwin	cmp	r5, #2
2666bc3d5698SJohn Baldwin	strb	r2, [r4],#1		@ endian-neutral store
2667bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2668bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2669bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2670bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2671bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2672bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2673bc3d5698SJohn Baldwin	mov	r2, r2, lsr#8
2674bc3d5698SJohn Baldwin	cmp	r5, #4
2675bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2676bc3d5698SJohn Baldwin	strb	r2, [r4], #1
2677bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2678bc3d5698SJohn Baldwin
2679bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2680bc3d5698SJohn Baldwin	mov	r3, r3, lsr#8
2681bc3d5698SJohn Baldwin	cmp	r5, #6
2682bc3d5698SJohn Baldwin	blo	.Lsqueeze_neon_done
2683bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2684bc3d5698SJohn Baldwin	mov	r3, r3, lsr#8
2685bc3d5698SJohn Baldwin	beq	.Lsqueeze_neon_done
2686bc3d5698SJohn Baldwin	strb	r3, [r4], #1
2687bc3d5698SJohn Baldwin
2688bc3d5698SJohn Baldwin.Lsqueeze_neon_done:
2689bc3d5698SJohn Baldwin	ldmia	sp!, {r4,r5,r6,pc}
2690bc3d5698SJohn Baldwin.size	SHA3_squeeze_neon,.-SHA3_squeeze_neon
2691bc3d5698SJohn Baldwin#endif
2692bc3d5698SJohn Baldwin.byte	75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2693bc3d5698SJohn Baldwin.align	2
2694bc3d5698SJohn Baldwin.align	2
2695