xref: /freebsd/sys/crypto/openssl/arm/sha1-armv4-large.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha1-armv4-large.pl. */
2bc3d5698SJohn Baldwin#include "arm_arch.h"
3bc3d5698SJohn Baldwin
4bc3d5698SJohn Baldwin#if defined(__thumb2__)
5bc3d5698SJohn Baldwin.syntax	unified
6bc3d5698SJohn Baldwin.thumb
7bc3d5698SJohn Baldwin#else
8bc3d5698SJohn Baldwin.code	32
9bc3d5698SJohn Baldwin#endif
10bc3d5698SJohn Baldwin
11*c0855eaaSJohn Baldwin.text
12*c0855eaaSJohn Baldwin
13bc3d5698SJohn Baldwin.globl	sha1_block_data_order
14bc3d5698SJohn Baldwin.type	sha1_block_data_order,%function
15bc3d5698SJohn Baldwin
16bc3d5698SJohn Baldwin.align	5
17bc3d5698SJohn Baldwinsha1_block_data_order:
18bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
19bc3d5698SJohn Baldwin.Lsha1_block:
20bc3d5698SJohn Baldwin	ldr	r12,.LOPENSSL_armcap
21*c0855eaaSJohn Baldwin# if !defined(_WIN32)
22*c0855eaaSJohn Baldwin	adr	r3,.Lsha1_block
23bc3d5698SJohn Baldwin	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
24*c0855eaaSJohn Baldwin# endif
25*c0855eaaSJohn Baldwin# if defined(__APPLE__) || defined(_WIN32)
26bc3d5698SJohn Baldwin	ldr	r12,[r12]
27bc3d5698SJohn Baldwin# endif
28bc3d5698SJohn Baldwin	tst	r12,#ARMV8_SHA1
29bc3d5698SJohn Baldwin	bne	.LARMv8
30bc3d5698SJohn Baldwin	tst	r12,#ARMV7_NEON
31bc3d5698SJohn Baldwin	bne	.LNEON
32bc3d5698SJohn Baldwin#endif
33bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
34bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
35bc3d5698SJohn Baldwin	ldmia	r0,{r3,r4,r5,r6,r7}
36bc3d5698SJohn Baldwin.Lloop:
37bc3d5698SJohn Baldwin	ldr	r8,.LK_00_19
38bc3d5698SJohn Baldwin	mov	r14,sp
39bc3d5698SJohn Baldwin	sub	sp,sp,#15*4
40bc3d5698SJohn Baldwin	mov	r5,r5,ror#30
41bc3d5698SJohn Baldwin	mov	r6,r6,ror#30
42bc3d5698SJohn Baldwin	mov	r7,r7,ror#30		@ [6]
43bc3d5698SJohn Baldwin.L_00_15:
44bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
45bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
46bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
47bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
48bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_00_19
49bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
50bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
51bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
52bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
53bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
54bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
55bc3d5698SJohn Baldwin#else
56bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
57bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_00_19
58bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
59bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
60bc3d5698SJohn Baldwin#ifdef __ARMEL__
61bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
62bc3d5698SJohn Baldwin#endif
63bc3d5698SJohn Baldwin#endif
64bc3d5698SJohn Baldwin	and	r10,r4,r10,ror#2
65bc3d5698SJohn Baldwin	add	r7,r7,r9			@ E+=X[i]
66bc3d5698SJohn Baldwin	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
67bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
68bc3d5698SJohn Baldwin	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
69bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
70bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
71bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
72bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
73bc3d5698SJohn Baldwin	add	r6,r8,r6,ror#2			@ E+=K_00_19
74bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
75bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
76bc3d5698SJohn Baldwin	eor	r10,r4,r5			@ F_xx_xx
77bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
78bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
79bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
80bc3d5698SJohn Baldwin#else
81bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
82bc3d5698SJohn Baldwin	add	r6,r8,r6,ror#2			@ E+=K_00_19
83bc3d5698SJohn Baldwin	eor	r10,r4,r5			@ F_xx_xx
84bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
85bc3d5698SJohn Baldwin#ifdef __ARMEL__
86bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
87bc3d5698SJohn Baldwin#endif
88bc3d5698SJohn Baldwin#endif
89bc3d5698SJohn Baldwin	and	r10,r3,r10,ror#2
90bc3d5698SJohn Baldwin	add	r6,r6,r9			@ E+=X[i]
91bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
92bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
93bc3d5698SJohn Baldwin	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
94bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
95bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
96bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
97bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
98bc3d5698SJohn Baldwin	add	r5,r8,r5,ror#2			@ E+=K_00_19
99bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
100bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
101bc3d5698SJohn Baldwin	eor	r10,r3,r4			@ F_xx_xx
102bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
103bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
104bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
105bc3d5698SJohn Baldwin#else
106bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
107bc3d5698SJohn Baldwin	add	r5,r8,r5,ror#2			@ E+=K_00_19
108bc3d5698SJohn Baldwin	eor	r10,r3,r4			@ F_xx_xx
109bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
110bc3d5698SJohn Baldwin#ifdef __ARMEL__
111bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
112bc3d5698SJohn Baldwin#endif
113bc3d5698SJohn Baldwin#endif
114bc3d5698SJohn Baldwin	and	r10,r7,r10,ror#2
115bc3d5698SJohn Baldwin	add	r5,r5,r9			@ E+=X[i]
116bc3d5698SJohn Baldwin	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
117bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
118bc3d5698SJohn Baldwin	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
119bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
120bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
121bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
122bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
123bc3d5698SJohn Baldwin	add	r4,r8,r4,ror#2			@ E+=K_00_19
124bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
125bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
126bc3d5698SJohn Baldwin	eor	r10,r7,r3			@ F_xx_xx
127bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
128bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
129bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
130bc3d5698SJohn Baldwin#else
131bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
132bc3d5698SJohn Baldwin	add	r4,r8,r4,ror#2			@ E+=K_00_19
133bc3d5698SJohn Baldwin	eor	r10,r7,r3			@ F_xx_xx
134bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
135bc3d5698SJohn Baldwin#ifdef __ARMEL__
136bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
137bc3d5698SJohn Baldwin#endif
138bc3d5698SJohn Baldwin#endif
139bc3d5698SJohn Baldwin	and	r10,r6,r10,ror#2
140bc3d5698SJohn Baldwin	add	r4,r4,r9			@ E+=X[i]
141bc3d5698SJohn Baldwin	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
142bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
143bc3d5698SJohn Baldwin	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
144bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
145bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
146bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
147bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
148bc3d5698SJohn Baldwin	add	r3,r8,r3,ror#2			@ E+=K_00_19
149bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
150bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
151bc3d5698SJohn Baldwin	eor	r10,r6,r7			@ F_xx_xx
152bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
153bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
154bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
155bc3d5698SJohn Baldwin#else
156bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
157bc3d5698SJohn Baldwin	add	r3,r8,r3,ror#2			@ E+=K_00_19
158bc3d5698SJohn Baldwin	eor	r10,r6,r7			@ F_xx_xx
159bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
160bc3d5698SJohn Baldwin#ifdef __ARMEL__
161bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
162bc3d5698SJohn Baldwin#endif
163bc3d5698SJohn Baldwin#endif
164bc3d5698SJohn Baldwin	and	r10,r5,r10,ror#2
165bc3d5698SJohn Baldwin	add	r3,r3,r9			@ E+=X[i]
166bc3d5698SJohn Baldwin	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
167bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
168bc3d5698SJohn Baldwin	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
169bc3d5698SJohn Baldwin#if defined(__thumb2__)
170bc3d5698SJohn Baldwin	mov	r12,sp
171bc3d5698SJohn Baldwin	teq	r14,r12
172bc3d5698SJohn Baldwin#else
173bc3d5698SJohn Baldwin	teq	r14,sp
174bc3d5698SJohn Baldwin#endif
175bc3d5698SJohn Baldwin	bne	.L_00_15		@ [((11+4)*5+2)*3]
176bc3d5698SJohn Baldwin	sub	sp,sp,#25*4
177bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
178bc3d5698SJohn Baldwin	ldrb	r10,[r1,#2]
179bc3d5698SJohn Baldwin	ldrb	r9,[r1,#3]
180bc3d5698SJohn Baldwin	ldrb	r11,[r1,#1]
181bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_00_19
182bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
183bc3d5698SJohn Baldwin	orr	r9,r9,r10,lsl#8
184bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
185bc3d5698SJohn Baldwin	orr	r9,r9,r11,lsl#16
186bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
187bc3d5698SJohn Baldwin	orr	r9,r9,r12,lsl#24
188bc3d5698SJohn Baldwin#else
189bc3d5698SJohn Baldwin	ldr	r9,[r1],#4			@ handles unaligned
190bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_00_19
191bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
192bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
193bc3d5698SJohn Baldwin#ifdef __ARMEL__
194bc3d5698SJohn Baldwin	rev	r9,r9				@ byte swap
195bc3d5698SJohn Baldwin#endif
196bc3d5698SJohn Baldwin#endif
197bc3d5698SJohn Baldwin	and	r10,r4,r10,ror#2
198bc3d5698SJohn Baldwin	add	r7,r7,r9			@ E+=X[i]
199bc3d5698SJohn Baldwin	eor	r10,r10,r6,ror#2		@ F_00_19(B,C,D)
200bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
201bc3d5698SJohn Baldwin	add	r7,r7,r10			@ E+=F_00_19(B,C,D)
202bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
203bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
204bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
205bc3d5698SJohn Baldwin	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
206bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
207bc3d5698SJohn Baldwin	eor	r9,r9,r10
208bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
209bc3d5698SJohn Baldwin	eor	r10,r4,r5			@ F_xx_xx
210bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
211bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
212bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
213bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
214bc3d5698SJohn Baldwin	and	r10,r3,r10,ror#2					@ F_xx_xx
215bc3d5698SJohn Baldwin						@ F_xx_xx
216bc3d5698SJohn Baldwin	add	r6,r6,r9			@ E+=X[i]
217bc3d5698SJohn Baldwin	eor	r10,r10,r5,ror#2		@ F_00_19(B,C,D)
218bc3d5698SJohn Baldwin	add	r6,r6,r10			@ E+=F_00_19(B,C,D)
219bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
220bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
221bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
222bc3d5698SJohn Baldwin	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
223bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
224bc3d5698SJohn Baldwin	eor	r9,r9,r10
225bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
226bc3d5698SJohn Baldwin	eor	r10,r3,r4			@ F_xx_xx
227bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
228bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
229bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
230bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
231bc3d5698SJohn Baldwin	and	r10,r7,r10,ror#2					@ F_xx_xx
232bc3d5698SJohn Baldwin						@ F_xx_xx
233bc3d5698SJohn Baldwin	add	r5,r5,r9			@ E+=X[i]
234bc3d5698SJohn Baldwin	eor	r10,r10,r4,ror#2		@ F_00_19(B,C,D)
235bc3d5698SJohn Baldwin	add	r5,r5,r10			@ E+=F_00_19(B,C,D)
236bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
237bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
238bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
239bc3d5698SJohn Baldwin	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
240bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
241bc3d5698SJohn Baldwin	eor	r9,r9,r10
242bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
243bc3d5698SJohn Baldwin	eor	r10,r7,r3			@ F_xx_xx
244bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
245bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
246bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
247bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
248bc3d5698SJohn Baldwin	and	r10,r6,r10,ror#2					@ F_xx_xx
249bc3d5698SJohn Baldwin						@ F_xx_xx
250bc3d5698SJohn Baldwin	add	r4,r4,r9			@ E+=X[i]
251bc3d5698SJohn Baldwin	eor	r10,r10,r3,ror#2		@ F_00_19(B,C,D)
252bc3d5698SJohn Baldwin	add	r4,r4,r10			@ E+=F_00_19(B,C,D)
253bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
254bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
255bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
256bc3d5698SJohn Baldwin	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
257bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
258bc3d5698SJohn Baldwin	eor	r9,r9,r10
259bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
260bc3d5698SJohn Baldwin	eor	r10,r6,r7			@ F_xx_xx
261bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
262bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
263bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
264bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
265bc3d5698SJohn Baldwin	and	r10,r5,r10,ror#2					@ F_xx_xx
266bc3d5698SJohn Baldwin						@ F_xx_xx
267bc3d5698SJohn Baldwin	add	r3,r3,r9			@ E+=X[i]
268bc3d5698SJohn Baldwin	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
269bc3d5698SJohn Baldwin	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
270bc3d5698SJohn Baldwin
271bc3d5698SJohn Baldwin	ldr	r8,.LK_20_39		@ [+15+16*4]
272bc3d5698SJohn Baldwin	cmn	sp,#0			@ [+3], clear carry to denote 20_39
273bc3d5698SJohn Baldwin.L_20_39_or_60_79:
274bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
275bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
276bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
277bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
278bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
279bc3d5698SJohn Baldwin	eor	r9,r9,r10
280bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
281bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
282bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
283bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
284bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
285bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
286bc3d5698SJohn Baldwin	eor	r10,r4,r10,ror#2					@ F_xx_xx
287bc3d5698SJohn Baldwin						@ F_xx_xx
288bc3d5698SJohn Baldwin	add	r7,r7,r9			@ E+=X[i]
289bc3d5698SJohn Baldwin	add	r7,r7,r10			@ E+=F_20_39(B,C,D)
290bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
291bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
292bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
293bc3d5698SJohn Baldwin	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
294bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
295bc3d5698SJohn Baldwin	eor	r9,r9,r10
296bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
297bc3d5698SJohn Baldwin	eor	r10,r4,r5			@ F_xx_xx
298bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
299bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
300bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
301bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
302bc3d5698SJohn Baldwin	eor	r10,r3,r10,ror#2					@ F_xx_xx
303bc3d5698SJohn Baldwin						@ F_xx_xx
304bc3d5698SJohn Baldwin	add	r6,r6,r9			@ E+=X[i]
305bc3d5698SJohn Baldwin	add	r6,r6,r10			@ E+=F_20_39(B,C,D)
306bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
307bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
308bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
309bc3d5698SJohn Baldwin	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
310bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
311bc3d5698SJohn Baldwin	eor	r9,r9,r10
312bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
313bc3d5698SJohn Baldwin	eor	r10,r3,r4			@ F_xx_xx
314bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
315bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
316bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
317bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
318bc3d5698SJohn Baldwin	eor	r10,r7,r10,ror#2					@ F_xx_xx
319bc3d5698SJohn Baldwin						@ F_xx_xx
320bc3d5698SJohn Baldwin	add	r5,r5,r9			@ E+=X[i]
321bc3d5698SJohn Baldwin	add	r5,r5,r10			@ E+=F_20_39(B,C,D)
322bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
323bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
324bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
325bc3d5698SJohn Baldwin	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
326bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
327bc3d5698SJohn Baldwin	eor	r9,r9,r10
328bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
329bc3d5698SJohn Baldwin	eor	r10,r7,r3			@ F_xx_xx
330bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
331bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
332bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
333bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
334bc3d5698SJohn Baldwin	eor	r10,r6,r10,ror#2					@ F_xx_xx
335bc3d5698SJohn Baldwin						@ F_xx_xx
336bc3d5698SJohn Baldwin	add	r4,r4,r9			@ E+=X[i]
337bc3d5698SJohn Baldwin	add	r4,r4,r10			@ E+=F_20_39(B,C,D)
338bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
339bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
340bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
341bc3d5698SJohn Baldwin	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
342bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
343bc3d5698SJohn Baldwin	eor	r9,r9,r10
344bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
345bc3d5698SJohn Baldwin	eor	r10,r6,r7			@ F_xx_xx
346bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
347bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
348bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
349bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
350bc3d5698SJohn Baldwin	eor	r10,r5,r10,ror#2					@ F_xx_xx
351bc3d5698SJohn Baldwin						@ F_xx_xx
352bc3d5698SJohn Baldwin	add	r3,r3,r9			@ E+=X[i]
353bc3d5698SJohn Baldwin	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
354bc3d5698SJohn Baldwin#if defined(__thumb2__)
355bc3d5698SJohn Baldwin	mov	r12,sp
356bc3d5698SJohn Baldwin	teq	r14,r12
357bc3d5698SJohn Baldwin#else
358bc3d5698SJohn Baldwin	teq	r14,sp			@ preserve carry
359bc3d5698SJohn Baldwin#endif
360bc3d5698SJohn Baldwin	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
361bc3d5698SJohn Baldwin	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
362bc3d5698SJohn Baldwin
363bc3d5698SJohn Baldwin	ldr	r8,.LK_40_59
364bc3d5698SJohn Baldwin	sub	sp,sp,#20*4		@ [+2]
365bc3d5698SJohn Baldwin.L_40_59:
366bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
367bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
368bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
369bc3d5698SJohn Baldwin	add	r7,r8,r7,ror#2			@ E+=K_xx_xx
370bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
371bc3d5698SJohn Baldwin	eor	r9,r9,r10
372bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
373bc3d5698SJohn Baldwin	eor	r10,r5,r6			@ F_xx_xx
374bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
375bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27			@ E+=ROR(A,27)
376bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
377bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
378bc3d5698SJohn Baldwin	and	r10,r4,r10,ror#2					@ F_xx_xx
379bc3d5698SJohn Baldwin	and	r11,r5,r6					@ F_xx_xx
380bc3d5698SJohn Baldwin	add	r7,r7,r9			@ E+=X[i]
381bc3d5698SJohn Baldwin	add	r7,r7,r10			@ E+=F_40_59(B,C,D)
382bc3d5698SJohn Baldwin	add	r7,r7,r11,ror#2
383bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
384bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
385bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
386bc3d5698SJohn Baldwin	add	r6,r8,r6,ror#2			@ E+=K_xx_xx
387bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
388bc3d5698SJohn Baldwin	eor	r9,r9,r10
389bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
390bc3d5698SJohn Baldwin	eor	r10,r4,r5			@ F_xx_xx
391bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
392bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27			@ E+=ROR(A,27)
393bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
394bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
395bc3d5698SJohn Baldwin	and	r10,r3,r10,ror#2					@ F_xx_xx
396bc3d5698SJohn Baldwin	and	r11,r4,r5					@ F_xx_xx
397bc3d5698SJohn Baldwin	add	r6,r6,r9			@ E+=X[i]
398bc3d5698SJohn Baldwin	add	r6,r6,r10			@ E+=F_40_59(B,C,D)
399bc3d5698SJohn Baldwin	add	r6,r6,r11,ror#2
400bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
401bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
402bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
403bc3d5698SJohn Baldwin	add	r5,r8,r5,ror#2			@ E+=K_xx_xx
404bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
405bc3d5698SJohn Baldwin	eor	r9,r9,r10
406bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
407bc3d5698SJohn Baldwin	eor	r10,r3,r4			@ F_xx_xx
408bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
409bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27			@ E+=ROR(A,27)
410bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
411bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
412bc3d5698SJohn Baldwin	and	r10,r7,r10,ror#2					@ F_xx_xx
413bc3d5698SJohn Baldwin	and	r11,r3,r4					@ F_xx_xx
414bc3d5698SJohn Baldwin	add	r5,r5,r9			@ E+=X[i]
415bc3d5698SJohn Baldwin	add	r5,r5,r10			@ E+=F_40_59(B,C,D)
416bc3d5698SJohn Baldwin	add	r5,r5,r11,ror#2
417bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
418bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
419bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
420bc3d5698SJohn Baldwin	add	r4,r8,r4,ror#2			@ E+=K_xx_xx
421bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
422bc3d5698SJohn Baldwin	eor	r9,r9,r10
423bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
424bc3d5698SJohn Baldwin	eor	r10,r7,r3			@ F_xx_xx
425bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
426bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27			@ E+=ROR(A,27)
427bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
428bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
429bc3d5698SJohn Baldwin	and	r10,r6,r10,ror#2					@ F_xx_xx
430bc3d5698SJohn Baldwin	and	r11,r7,r3					@ F_xx_xx
431bc3d5698SJohn Baldwin	add	r4,r4,r9			@ E+=X[i]
432bc3d5698SJohn Baldwin	add	r4,r4,r10			@ E+=F_40_59(B,C,D)
433bc3d5698SJohn Baldwin	add	r4,r4,r11,ror#2
434bc3d5698SJohn Baldwin	ldr	r9,[r14,#15*4]
435bc3d5698SJohn Baldwin	ldr	r10,[r14,#13*4]
436bc3d5698SJohn Baldwin	ldr	r11,[r14,#7*4]
437bc3d5698SJohn Baldwin	add	r3,r8,r3,ror#2			@ E+=K_xx_xx
438bc3d5698SJohn Baldwin	ldr	r12,[r14,#2*4]
439bc3d5698SJohn Baldwin	eor	r9,r9,r10
440bc3d5698SJohn Baldwin	eor	r11,r11,r12			@ 1 cycle stall
441bc3d5698SJohn Baldwin	eor	r10,r6,r7			@ F_xx_xx
442bc3d5698SJohn Baldwin	mov	r9,r9,ror#31
443bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27			@ E+=ROR(A,27)
444bc3d5698SJohn Baldwin	eor	r9,r9,r11,ror#31
445bc3d5698SJohn Baldwin	str	r9,[r14,#-4]!
446bc3d5698SJohn Baldwin	and	r10,r5,r10,ror#2					@ F_xx_xx
447bc3d5698SJohn Baldwin	and	r11,r6,r7					@ F_xx_xx
448bc3d5698SJohn Baldwin	add	r3,r3,r9			@ E+=X[i]
449bc3d5698SJohn Baldwin	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
450bc3d5698SJohn Baldwin	add	r3,r3,r11,ror#2
451bc3d5698SJohn Baldwin#if defined(__thumb2__)
452bc3d5698SJohn Baldwin	mov	r12,sp
453bc3d5698SJohn Baldwin	teq	r14,r12
454bc3d5698SJohn Baldwin#else
455bc3d5698SJohn Baldwin	teq	r14,sp
456bc3d5698SJohn Baldwin#endif
457bc3d5698SJohn Baldwin	bne	.L_40_59		@ [+((12+5)*5+2)*4]
458bc3d5698SJohn Baldwin
459bc3d5698SJohn Baldwin	ldr	r8,.LK_60_79
460bc3d5698SJohn Baldwin	sub	sp,sp,#20*4
461bc3d5698SJohn Baldwin	cmp	sp,#0			@ set carry to denote 60_79
462bc3d5698SJohn Baldwin	b	.L_20_39_or_60_79	@ [+4], spare 300 bytes
463bc3d5698SJohn Baldwin.L_done:
464bc3d5698SJohn Baldwin	add	sp,sp,#80*4		@ "deallocate" stack frame
465bc3d5698SJohn Baldwin	ldmia	r0,{r8,r9,r10,r11,r12}
466bc3d5698SJohn Baldwin	add	r3,r8,r3
467bc3d5698SJohn Baldwin	add	r4,r9,r4
468bc3d5698SJohn Baldwin	add	r5,r10,r5,ror#2
469bc3d5698SJohn Baldwin	add	r6,r11,r6,ror#2
470bc3d5698SJohn Baldwin	add	r7,r12,r7,ror#2
471bc3d5698SJohn Baldwin	stmia	r0,{r3,r4,r5,r6,r7}
472bc3d5698SJohn Baldwin	teq	r1,r2
473bc3d5698SJohn Baldwin	bne	.Lloop			@ [+18], total 1307
474bc3d5698SJohn Baldwin
475bc3d5698SJohn Baldwin#if __ARM_ARCH__>=5
476bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
477bc3d5698SJohn Baldwin#else
478bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
479bc3d5698SJohn Baldwin	tst	lr,#1
480bc3d5698SJohn Baldwin	moveq	pc,lr			@ be binary compatible with V4, yet
481bc3d5698SJohn Baldwin.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
482bc3d5698SJohn Baldwin#endif
483bc3d5698SJohn Baldwin.size	sha1_block_data_order,.-sha1_block_data_order
484bc3d5698SJohn Baldwin
485bc3d5698SJohn Baldwin.align	5
486bc3d5698SJohn Baldwin.LK_00_19:.word	0x5a827999
487bc3d5698SJohn Baldwin.LK_20_39:.word	0x6ed9eba1
488bc3d5698SJohn Baldwin.LK_40_59:.word	0x8f1bbcdc
489bc3d5698SJohn Baldwin.LK_60_79:.word	0xca62c1d6
490bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
491bc3d5698SJohn Baldwin.LOPENSSL_armcap:
492*c0855eaaSJohn Baldwin# ifdef	_WIN32
493*c0855eaaSJohn Baldwin.word	OPENSSL_armcap_P
494*c0855eaaSJohn Baldwin# else
495bc3d5698SJohn Baldwin.word	OPENSSL_armcap_P-.Lsha1_block
496bc3d5698SJohn Baldwin# endif
497*c0855eaaSJohn Baldwin#endif
498bc3d5698SJohn Baldwin.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
499bc3d5698SJohn Baldwin.align	2
500bc3d5698SJohn Baldwin.align	5
501bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
502bc3d5698SJohn Baldwin.arch	armv7-a
503bc3d5698SJohn Baldwin.fpu	neon
504bc3d5698SJohn Baldwin
505bc3d5698SJohn Baldwin.type	sha1_block_data_order_neon,%function
506bc3d5698SJohn Baldwin.align	4
507bc3d5698SJohn Baldwinsha1_block_data_order_neon:
508bc3d5698SJohn Baldwin.LNEON:
509bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
510bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
511bc3d5698SJohn Baldwin	@ dmb				@ errata #451034 on early Cortex A8
512bc3d5698SJohn Baldwin	@ vstmdb	sp!,{d8-d15}	@ ABI specification says so
513bc3d5698SJohn Baldwin	mov	r14,sp
514bc3d5698SJohn Baldwin	sub	r12,sp,#64
515bc3d5698SJohn Baldwin	adr	r8,.LK_00_19
516bc3d5698SJohn Baldwin	bic	r12,r12,#15		@ align for 128-bit stores
517bc3d5698SJohn Baldwin
518bc3d5698SJohn Baldwin	ldmia	r0,{r3,r4,r5,r6,r7}	@ load context
519bc3d5698SJohn Baldwin	mov	sp,r12		@ alloca
520bc3d5698SJohn Baldwin
521bc3d5698SJohn Baldwin	vld1.8	{q0,q1},[r1]!	@ handles unaligned
522bc3d5698SJohn Baldwin	veor	q15,q15,q15
523bc3d5698SJohn Baldwin	vld1.8	{q2,q3},[r1]!
524bc3d5698SJohn Baldwin	vld1.32	{d28[],d29[]},[r8,:32]!	@ load K_00_19
525bc3d5698SJohn Baldwin	vrev32.8	q0,q0		@ yes, even on
526bc3d5698SJohn Baldwin	vrev32.8	q1,q1		@ big-endian...
527bc3d5698SJohn Baldwin	vrev32.8	q2,q2
528bc3d5698SJohn Baldwin	vadd.i32	q8,q0,q14
529bc3d5698SJohn Baldwin	vrev32.8	q3,q3
530bc3d5698SJohn Baldwin	vadd.i32	q9,q1,q14
531bc3d5698SJohn Baldwin	vst1.32	{q8},[r12,:128]!
532bc3d5698SJohn Baldwin	vadd.i32	q10,q2,q14
533bc3d5698SJohn Baldwin	vst1.32	{q9},[r12,:128]!
534bc3d5698SJohn Baldwin	vst1.32	{q10},[r12,:128]!
535bc3d5698SJohn Baldwin	ldr	r9,[sp]			@ big RAW stall
536bc3d5698SJohn Baldwin
537bc3d5698SJohn Baldwin.Loop_neon:
538bc3d5698SJohn Baldwin	vext.8	q8,q0,q1,#8
539bc3d5698SJohn Baldwin	bic	r10,r6,r4
540bc3d5698SJohn Baldwin	add	r7,r7,r9
541bc3d5698SJohn Baldwin	and	r11,r5,r4
542bc3d5698SJohn Baldwin	vadd.i32	q13,q3,q14
543bc3d5698SJohn Baldwin	ldr	r9,[sp,#4]
544bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
545bc3d5698SJohn Baldwin	vext.8	q12,q3,q15,#4
546bc3d5698SJohn Baldwin	eor	r11,r11,r10
547bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
548bc3d5698SJohn Baldwin	add	r7,r7,r11
549bc3d5698SJohn Baldwin	veor	q8,q8,q0
550bc3d5698SJohn Baldwin	bic	r10,r5,r3
551bc3d5698SJohn Baldwin	add	r6,r6,r9
552bc3d5698SJohn Baldwin	veor	q12,q12,q2
553bc3d5698SJohn Baldwin	and	r11,r4,r3
554bc3d5698SJohn Baldwin	ldr	r9,[sp,#8]
555bc3d5698SJohn Baldwin	veor	q12,q12,q8
556bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
557bc3d5698SJohn Baldwin	eor	r11,r11,r10
558bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
559bc3d5698SJohn Baldwin	sub	r12,r12,#64
560bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
561bc3d5698SJohn Baldwin	add	r6,r6,r11
562bc3d5698SJohn Baldwin	vext.8	q13,q15,q12,#4
563bc3d5698SJohn Baldwin	bic	r10,r4,r7
564bc3d5698SJohn Baldwin	add	r5,r5,r9
565bc3d5698SJohn Baldwin	vadd.i32	q8,q12,q12
566bc3d5698SJohn Baldwin	and	r11,r3,r7
567bc3d5698SJohn Baldwin	ldr	r9,[sp,#12]
568bc3d5698SJohn Baldwin	vsri.32	q8,q12,#31
569bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
570bc3d5698SJohn Baldwin	eor	r11,r11,r10
571bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
572bc3d5698SJohn Baldwin	vshr.u32	q12,q13,#30
573bc3d5698SJohn Baldwin	add	r5,r5,r11
574bc3d5698SJohn Baldwin	bic	r10,r3,r6
575bc3d5698SJohn Baldwin	vshl.u32	q13,q13,#2
576bc3d5698SJohn Baldwin	add	r4,r4,r9
577bc3d5698SJohn Baldwin	and	r11,r7,r6
578bc3d5698SJohn Baldwin	veor	q8,q8,q12
579bc3d5698SJohn Baldwin	ldr	r9,[sp,#16]
580bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
581bc3d5698SJohn Baldwin	veor	q8,q8,q13
582bc3d5698SJohn Baldwin	eor	r11,r11,r10
583bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
584bc3d5698SJohn Baldwin	add	r4,r4,r11
585bc3d5698SJohn Baldwin	vext.8	q9,q1,q2,#8
586bc3d5698SJohn Baldwin	bic	r10,r7,r5
587bc3d5698SJohn Baldwin	add	r3,r3,r9
588bc3d5698SJohn Baldwin	and	r11,r6,r5
589bc3d5698SJohn Baldwin	vadd.i32	q13,q8,q14
590bc3d5698SJohn Baldwin	ldr	r9,[sp,#20]
591bc3d5698SJohn Baldwin	vld1.32	{d28[],d29[]},[r8,:32]!
592bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
593bc3d5698SJohn Baldwin	vext.8	q12,q8,q15,#4
594bc3d5698SJohn Baldwin	eor	r11,r11,r10
595bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
596bc3d5698SJohn Baldwin	add	r3,r3,r11
597bc3d5698SJohn Baldwin	veor	q9,q9,q1
598bc3d5698SJohn Baldwin	bic	r10,r6,r4
599bc3d5698SJohn Baldwin	add	r7,r7,r9
600bc3d5698SJohn Baldwin	veor	q12,q12,q3
601bc3d5698SJohn Baldwin	and	r11,r5,r4
602bc3d5698SJohn Baldwin	ldr	r9,[sp,#24]
603bc3d5698SJohn Baldwin	veor	q12,q12,q9
604bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
605bc3d5698SJohn Baldwin	eor	r11,r11,r10
606bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
607bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
608bc3d5698SJohn Baldwin	add	r7,r7,r11
609bc3d5698SJohn Baldwin	vext.8	q13,q15,q12,#4
610bc3d5698SJohn Baldwin	bic	r10,r5,r3
611bc3d5698SJohn Baldwin	add	r6,r6,r9
612bc3d5698SJohn Baldwin	vadd.i32	q9,q12,q12
613bc3d5698SJohn Baldwin	and	r11,r4,r3
614bc3d5698SJohn Baldwin	ldr	r9,[sp,#28]
615bc3d5698SJohn Baldwin	vsri.32	q9,q12,#31
616bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
617bc3d5698SJohn Baldwin	eor	r11,r11,r10
618bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
619bc3d5698SJohn Baldwin	vshr.u32	q12,q13,#30
620bc3d5698SJohn Baldwin	add	r6,r6,r11
621bc3d5698SJohn Baldwin	bic	r10,r4,r7
622bc3d5698SJohn Baldwin	vshl.u32	q13,q13,#2
623bc3d5698SJohn Baldwin	add	r5,r5,r9
624bc3d5698SJohn Baldwin	and	r11,r3,r7
625bc3d5698SJohn Baldwin	veor	q9,q9,q12
626bc3d5698SJohn Baldwin	ldr	r9,[sp,#32]
627bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
628bc3d5698SJohn Baldwin	veor	q9,q9,q13
629bc3d5698SJohn Baldwin	eor	r11,r11,r10
630bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
631bc3d5698SJohn Baldwin	add	r5,r5,r11
632bc3d5698SJohn Baldwin	vext.8	q10,q2,q3,#8
633bc3d5698SJohn Baldwin	bic	r10,r3,r6
634bc3d5698SJohn Baldwin	add	r4,r4,r9
635bc3d5698SJohn Baldwin	and	r11,r7,r6
636bc3d5698SJohn Baldwin	vadd.i32	q13,q9,q14
637bc3d5698SJohn Baldwin	ldr	r9,[sp,#36]
638bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
639bc3d5698SJohn Baldwin	vext.8	q12,q9,q15,#4
640bc3d5698SJohn Baldwin	eor	r11,r11,r10
641bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
642bc3d5698SJohn Baldwin	add	r4,r4,r11
643bc3d5698SJohn Baldwin	veor	q10,q10,q2
644bc3d5698SJohn Baldwin	bic	r10,r7,r5
645bc3d5698SJohn Baldwin	add	r3,r3,r9
646bc3d5698SJohn Baldwin	veor	q12,q12,q8
647bc3d5698SJohn Baldwin	and	r11,r6,r5
648bc3d5698SJohn Baldwin	ldr	r9,[sp,#40]
649bc3d5698SJohn Baldwin	veor	q12,q12,q10
650bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
651bc3d5698SJohn Baldwin	eor	r11,r11,r10
652bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
653bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
654bc3d5698SJohn Baldwin	add	r3,r3,r11
655bc3d5698SJohn Baldwin	vext.8	q13,q15,q12,#4
656bc3d5698SJohn Baldwin	bic	r10,r6,r4
657bc3d5698SJohn Baldwin	add	r7,r7,r9
658bc3d5698SJohn Baldwin	vadd.i32	q10,q12,q12
659bc3d5698SJohn Baldwin	and	r11,r5,r4
660bc3d5698SJohn Baldwin	ldr	r9,[sp,#44]
661bc3d5698SJohn Baldwin	vsri.32	q10,q12,#31
662bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
663bc3d5698SJohn Baldwin	eor	r11,r11,r10
664bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
665bc3d5698SJohn Baldwin	vshr.u32	q12,q13,#30
666bc3d5698SJohn Baldwin	add	r7,r7,r11
667bc3d5698SJohn Baldwin	bic	r10,r5,r3
668bc3d5698SJohn Baldwin	vshl.u32	q13,q13,#2
669bc3d5698SJohn Baldwin	add	r6,r6,r9
670bc3d5698SJohn Baldwin	and	r11,r4,r3
671bc3d5698SJohn Baldwin	veor	q10,q10,q12
672bc3d5698SJohn Baldwin	ldr	r9,[sp,#48]
673bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
674bc3d5698SJohn Baldwin	veor	q10,q10,q13
675bc3d5698SJohn Baldwin	eor	r11,r11,r10
676bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
677bc3d5698SJohn Baldwin	add	r6,r6,r11
678bc3d5698SJohn Baldwin	vext.8	q11,q3,q8,#8
679bc3d5698SJohn Baldwin	bic	r10,r4,r7
680bc3d5698SJohn Baldwin	add	r5,r5,r9
681bc3d5698SJohn Baldwin	and	r11,r3,r7
682bc3d5698SJohn Baldwin	vadd.i32	q13,q10,q14
683bc3d5698SJohn Baldwin	ldr	r9,[sp,#52]
684bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
685bc3d5698SJohn Baldwin	vext.8	q12,q10,q15,#4
686bc3d5698SJohn Baldwin	eor	r11,r11,r10
687bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
688bc3d5698SJohn Baldwin	add	r5,r5,r11
689bc3d5698SJohn Baldwin	veor	q11,q11,q3
690bc3d5698SJohn Baldwin	bic	r10,r3,r6
691bc3d5698SJohn Baldwin	add	r4,r4,r9
692bc3d5698SJohn Baldwin	veor	q12,q12,q9
693bc3d5698SJohn Baldwin	and	r11,r7,r6
694bc3d5698SJohn Baldwin	ldr	r9,[sp,#56]
695bc3d5698SJohn Baldwin	veor	q12,q12,q11
696bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
697bc3d5698SJohn Baldwin	eor	r11,r11,r10
698bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
699bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
700bc3d5698SJohn Baldwin	add	r4,r4,r11
701bc3d5698SJohn Baldwin	vext.8	q13,q15,q12,#4
702bc3d5698SJohn Baldwin	bic	r10,r7,r5
703bc3d5698SJohn Baldwin	add	r3,r3,r9
704bc3d5698SJohn Baldwin	vadd.i32	q11,q12,q12
705bc3d5698SJohn Baldwin	and	r11,r6,r5
706bc3d5698SJohn Baldwin	ldr	r9,[sp,#60]
707bc3d5698SJohn Baldwin	vsri.32	q11,q12,#31
708bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
709bc3d5698SJohn Baldwin	eor	r11,r11,r10
710bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
711bc3d5698SJohn Baldwin	vshr.u32	q12,q13,#30
712bc3d5698SJohn Baldwin	add	r3,r3,r11
713bc3d5698SJohn Baldwin	bic	r10,r6,r4
714bc3d5698SJohn Baldwin	vshl.u32	q13,q13,#2
715bc3d5698SJohn Baldwin	add	r7,r7,r9
716bc3d5698SJohn Baldwin	and	r11,r5,r4
717bc3d5698SJohn Baldwin	veor	q11,q11,q12
718bc3d5698SJohn Baldwin	ldr	r9,[sp,#0]
719bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
720bc3d5698SJohn Baldwin	veor	q11,q11,q13
721bc3d5698SJohn Baldwin	eor	r11,r11,r10
722bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
723bc3d5698SJohn Baldwin	add	r7,r7,r11
724bc3d5698SJohn Baldwin	vext.8	q12,q10,q11,#8
725bc3d5698SJohn Baldwin	bic	r10,r5,r3
726bc3d5698SJohn Baldwin	add	r6,r6,r9
727bc3d5698SJohn Baldwin	and	r11,r4,r3
728bc3d5698SJohn Baldwin	veor	q0,q0,q8
729bc3d5698SJohn Baldwin	ldr	r9,[sp,#4]
730bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
731bc3d5698SJohn Baldwin	veor	q0,q0,q1
732bc3d5698SJohn Baldwin	eor	r11,r11,r10
733bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
734bc3d5698SJohn Baldwin	vadd.i32	q13,q11,q14
735bc3d5698SJohn Baldwin	add	r6,r6,r11
736bc3d5698SJohn Baldwin	bic	r10,r4,r7
737bc3d5698SJohn Baldwin	veor	q12,q12,q0
738bc3d5698SJohn Baldwin	add	r5,r5,r9
739bc3d5698SJohn Baldwin	and	r11,r3,r7
740bc3d5698SJohn Baldwin	vshr.u32	q0,q12,#30
741bc3d5698SJohn Baldwin	ldr	r9,[sp,#8]
742bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
743bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
744bc3d5698SJohn Baldwin	sub	r12,r12,#64
745bc3d5698SJohn Baldwin	eor	r11,r11,r10
746bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
747bc3d5698SJohn Baldwin	vsli.32	q0,q12,#2
748bc3d5698SJohn Baldwin	add	r5,r5,r11
749bc3d5698SJohn Baldwin	bic	r10,r3,r6
750bc3d5698SJohn Baldwin	add	r4,r4,r9
751bc3d5698SJohn Baldwin	and	r11,r7,r6
752bc3d5698SJohn Baldwin	ldr	r9,[sp,#12]
753bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
754bc3d5698SJohn Baldwin	eor	r11,r11,r10
755bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
756bc3d5698SJohn Baldwin	add	r4,r4,r11
757bc3d5698SJohn Baldwin	bic	r10,r7,r5
758bc3d5698SJohn Baldwin	add	r3,r3,r9
759bc3d5698SJohn Baldwin	and	r11,r6,r5
760bc3d5698SJohn Baldwin	ldr	r9,[sp,#16]
761bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
762bc3d5698SJohn Baldwin	eor	r11,r11,r10
763bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
764bc3d5698SJohn Baldwin	add	r3,r3,r11
765bc3d5698SJohn Baldwin	vext.8	q12,q11,q0,#8
766bc3d5698SJohn Baldwin	eor	r10,r4,r6
767bc3d5698SJohn Baldwin	add	r7,r7,r9
768bc3d5698SJohn Baldwin	ldr	r9,[sp,#20]
769bc3d5698SJohn Baldwin	veor	q1,q1,q9
770bc3d5698SJohn Baldwin	eor	r11,r10,r5
771bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
772bc3d5698SJohn Baldwin	veor	q1,q1,q2
773bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
774bc3d5698SJohn Baldwin	add	r7,r7,r11
775bc3d5698SJohn Baldwin	vadd.i32	q13,q0,q14
776bc3d5698SJohn Baldwin	eor	r10,r3,r5
777bc3d5698SJohn Baldwin	add	r6,r6,r9
778bc3d5698SJohn Baldwin	veor	q12,q12,q1
779bc3d5698SJohn Baldwin	ldr	r9,[sp,#24]
780bc3d5698SJohn Baldwin	eor	r11,r10,r4
781bc3d5698SJohn Baldwin	vshr.u32	q1,q12,#30
782bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
783bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
784bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
785bc3d5698SJohn Baldwin	add	r6,r6,r11
786bc3d5698SJohn Baldwin	eor	r10,r7,r4
787bc3d5698SJohn Baldwin	vsli.32	q1,q12,#2
788bc3d5698SJohn Baldwin	add	r5,r5,r9
789bc3d5698SJohn Baldwin	ldr	r9,[sp,#28]
790bc3d5698SJohn Baldwin	eor	r11,r10,r3
791bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
792bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
793bc3d5698SJohn Baldwin	add	r5,r5,r11
794bc3d5698SJohn Baldwin	eor	r10,r6,r3
795bc3d5698SJohn Baldwin	add	r4,r4,r9
796bc3d5698SJohn Baldwin	ldr	r9,[sp,#32]
797bc3d5698SJohn Baldwin	eor	r11,r10,r7
798bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
799bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
800bc3d5698SJohn Baldwin	add	r4,r4,r11
801bc3d5698SJohn Baldwin	vext.8	q12,q0,q1,#8
802bc3d5698SJohn Baldwin	eor	r10,r5,r7
803bc3d5698SJohn Baldwin	add	r3,r3,r9
804bc3d5698SJohn Baldwin	ldr	r9,[sp,#36]
805bc3d5698SJohn Baldwin	veor	q2,q2,q10
806bc3d5698SJohn Baldwin	eor	r11,r10,r6
807bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
808bc3d5698SJohn Baldwin	veor	q2,q2,q3
809bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
810bc3d5698SJohn Baldwin	add	r3,r3,r11
811bc3d5698SJohn Baldwin	vadd.i32	q13,q1,q14
812bc3d5698SJohn Baldwin	eor	r10,r4,r6
813bc3d5698SJohn Baldwin	vld1.32	{d28[],d29[]},[r8,:32]!
814bc3d5698SJohn Baldwin	add	r7,r7,r9
815bc3d5698SJohn Baldwin	veor	q12,q12,q2
816bc3d5698SJohn Baldwin	ldr	r9,[sp,#40]
817bc3d5698SJohn Baldwin	eor	r11,r10,r5
818bc3d5698SJohn Baldwin	vshr.u32	q2,q12,#30
819bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
820bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
821bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
822bc3d5698SJohn Baldwin	add	r7,r7,r11
823bc3d5698SJohn Baldwin	eor	r10,r3,r5
824bc3d5698SJohn Baldwin	vsli.32	q2,q12,#2
825bc3d5698SJohn Baldwin	add	r6,r6,r9
826bc3d5698SJohn Baldwin	ldr	r9,[sp,#44]
827bc3d5698SJohn Baldwin	eor	r11,r10,r4
828bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
829bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
830bc3d5698SJohn Baldwin	add	r6,r6,r11
831bc3d5698SJohn Baldwin	eor	r10,r7,r4
832bc3d5698SJohn Baldwin	add	r5,r5,r9
833bc3d5698SJohn Baldwin	ldr	r9,[sp,#48]
834bc3d5698SJohn Baldwin	eor	r11,r10,r3
835bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
836bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
837bc3d5698SJohn Baldwin	add	r5,r5,r11
838bc3d5698SJohn Baldwin	vext.8	q12,q1,q2,#8
839bc3d5698SJohn Baldwin	eor	r10,r6,r3
840bc3d5698SJohn Baldwin	add	r4,r4,r9
841bc3d5698SJohn Baldwin	ldr	r9,[sp,#52]
842bc3d5698SJohn Baldwin	veor	q3,q3,q11
843bc3d5698SJohn Baldwin	eor	r11,r10,r7
844bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
845bc3d5698SJohn Baldwin	veor	q3,q3,q8
846bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
847bc3d5698SJohn Baldwin	add	r4,r4,r11
848bc3d5698SJohn Baldwin	vadd.i32	q13,q2,q14
849bc3d5698SJohn Baldwin	eor	r10,r5,r7
850bc3d5698SJohn Baldwin	add	r3,r3,r9
851bc3d5698SJohn Baldwin	veor	q12,q12,q3
852bc3d5698SJohn Baldwin	ldr	r9,[sp,#56]
853bc3d5698SJohn Baldwin	eor	r11,r10,r6
854bc3d5698SJohn Baldwin	vshr.u32	q3,q12,#30
855bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
856bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
857bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
858bc3d5698SJohn Baldwin	add	r3,r3,r11
859bc3d5698SJohn Baldwin	eor	r10,r4,r6
860bc3d5698SJohn Baldwin	vsli.32	q3,q12,#2
861bc3d5698SJohn Baldwin	add	r7,r7,r9
862bc3d5698SJohn Baldwin	ldr	r9,[sp,#60]
863bc3d5698SJohn Baldwin	eor	r11,r10,r5
864bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
865bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
866bc3d5698SJohn Baldwin	add	r7,r7,r11
867bc3d5698SJohn Baldwin	eor	r10,r3,r5
868bc3d5698SJohn Baldwin	add	r6,r6,r9
869bc3d5698SJohn Baldwin	ldr	r9,[sp,#0]
870bc3d5698SJohn Baldwin	eor	r11,r10,r4
871bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
872bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
873bc3d5698SJohn Baldwin	add	r6,r6,r11
874bc3d5698SJohn Baldwin	vext.8	q12,q2,q3,#8
875bc3d5698SJohn Baldwin	eor	r10,r7,r4
876bc3d5698SJohn Baldwin	add	r5,r5,r9
877bc3d5698SJohn Baldwin	ldr	r9,[sp,#4]
878bc3d5698SJohn Baldwin	veor	q8,q8,q0
879bc3d5698SJohn Baldwin	eor	r11,r10,r3
880bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
881bc3d5698SJohn Baldwin	veor	q8,q8,q9
882bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
883bc3d5698SJohn Baldwin	add	r5,r5,r11
884bc3d5698SJohn Baldwin	vadd.i32	q13,q3,q14
885bc3d5698SJohn Baldwin	eor	r10,r6,r3
886bc3d5698SJohn Baldwin	add	r4,r4,r9
887bc3d5698SJohn Baldwin	veor	q12,q12,q8
888bc3d5698SJohn Baldwin	ldr	r9,[sp,#8]
889bc3d5698SJohn Baldwin	eor	r11,r10,r7
890bc3d5698SJohn Baldwin	vshr.u32	q8,q12,#30
891bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
892bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
893bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
894bc3d5698SJohn Baldwin	sub	r12,r12,#64
895bc3d5698SJohn Baldwin	add	r4,r4,r11
896bc3d5698SJohn Baldwin	eor	r10,r5,r7
897bc3d5698SJohn Baldwin	vsli.32	q8,q12,#2
898bc3d5698SJohn Baldwin	add	r3,r3,r9
899bc3d5698SJohn Baldwin	ldr	r9,[sp,#12]
900bc3d5698SJohn Baldwin	eor	r11,r10,r6
901bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
902bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
903bc3d5698SJohn Baldwin	add	r3,r3,r11
904bc3d5698SJohn Baldwin	eor	r10,r4,r6
905bc3d5698SJohn Baldwin	add	r7,r7,r9
906bc3d5698SJohn Baldwin	ldr	r9,[sp,#16]
907bc3d5698SJohn Baldwin	eor	r11,r10,r5
908bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
909bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
910bc3d5698SJohn Baldwin	add	r7,r7,r11
911bc3d5698SJohn Baldwin	vext.8	q12,q3,q8,#8
912bc3d5698SJohn Baldwin	eor	r10,r3,r5
913bc3d5698SJohn Baldwin	add	r6,r6,r9
914bc3d5698SJohn Baldwin	ldr	r9,[sp,#20]
915bc3d5698SJohn Baldwin	veor	q9,q9,q1
916bc3d5698SJohn Baldwin	eor	r11,r10,r4
917bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
918bc3d5698SJohn Baldwin	veor	q9,q9,q10
919bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
920bc3d5698SJohn Baldwin	add	r6,r6,r11
921bc3d5698SJohn Baldwin	vadd.i32	q13,q8,q14
922bc3d5698SJohn Baldwin	eor	r10,r7,r4
923bc3d5698SJohn Baldwin	add	r5,r5,r9
924bc3d5698SJohn Baldwin	veor	q12,q12,q9
925bc3d5698SJohn Baldwin	ldr	r9,[sp,#24]
926bc3d5698SJohn Baldwin	eor	r11,r10,r3
927bc3d5698SJohn Baldwin	vshr.u32	q9,q12,#30
928bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
929bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
930bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
931bc3d5698SJohn Baldwin	add	r5,r5,r11
932bc3d5698SJohn Baldwin	eor	r10,r6,r3
933bc3d5698SJohn Baldwin	vsli.32	q9,q12,#2
934bc3d5698SJohn Baldwin	add	r4,r4,r9
935bc3d5698SJohn Baldwin	ldr	r9,[sp,#28]
936bc3d5698SJohn Baldwin	eor	r11,r10,r7
937bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
938bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
939bc3d5698SJohn Baldwin	add	r4,r4,r11
940bc3d5698SJohn Baldwin	eor	r10,r5,r7
941bc3d5698SJohn Baldwin	add	r3,r3,r9
942bc3d5698SJohn Baldwin	ldr	r9,[sp,#32]
943bc3d5698SJohn Baldwin	eor	r11,r10,r6
944bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
945bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
946bc3d5698SJohn Baldwin	add	r3,r3,r11
947bc3d5698SJohn Baldwin	vext.8	q12,q8,q9,#8
948bc3d5698SJohn Baldwin	add	r7,r7,r9
949bc3d5698SJohn Baldwin	and	r10,r5,r6
950bc3d5698SJohn Baldwin	ldr	r9,[sp,#36]
951bc3d5698SJohn Baldwin	veor	q10,q10,q2
952bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
953bc3d5698SJohn Baldwin	eor	r11,r5,r6
954bc3d5698SJohn Baldwin	veor	q10,q10,q11
955bc3d5698SJohn Baldwin	add	r7,r7,r10
956bc3d5698SJohn Baldwin	and	r11,r11,r4
957bc3d5698SJohn Baldwin	vadd.i32	q13,q9,q14
958bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
959bc3d5698SJohn Baldwin	add	r7,r7,r11
960bc3d5698SJohn Baldwin	veor	q12,q12,q10
961bc3d5698SJohn Baldwin	add	r6,r6,r9
962bc3d5698SJohn Baldwin	and	r10,r4,r5
963bc3d5698SJohn Baldwin	vshr.u32	q10,q12,#30
964bc3d5698SJohn Baldwin	ldr	r9,[sp,#40]
965bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
966bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
967bc3d5698SJohn Baldwin	eor	r11,r4,r5
968bc3d5698SJohn Baldwin	add	r6,r6,r10
969bc3d5698SJohn Baldwin	vsli.32	q10,q12,#2
970bc3d5698SJohn Baldwin	and	r11,r11,r3
971bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
972bc3d5698SJohn Baldwin	add	r6,r6,r11
973bc3d5698SJohn Baldwin	add	r5,r5,r9
974bc3d5698SJohn Baldwin	and	r10,r3,r4
975bc3d5698SJohn Baldwin	ldr	r9,[sp,#44]
976bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
977bc3d5698SJohn Baldwin	eor	r11,r3,r4
978bc3d5698SJohn Baldwin	add	r5,r5,r10
979bc3d5698SJohn Baldwin	and	r11,r11,r7
980bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
981bc3d5698SJohn Baldwin	add	r5,r5,r11
982bc3d5698SJohn Baldwin	add	r4,r4,r9
983bc3d5698SJohn Baldwin	and	r10,r7,r3
984bc3d5698SJohn Baldwin	ldr	r9,[sp,#48]
985bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
986bc3d5698SJohn Baldwin	eor	r11,r7,r3
987bc3d5698SJohn Baldwin	add	r4,r4,r10
988bc3d5698SJohn Baldwin	and	r11,r11,r6
989bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
990bc3d5698SJohn Baldwin	add	r4,r4,r11
991bc3d5698SJohn Baldwin	vext.8	q12,q9,q10,#8
992bc3d5698SJohn Baldwin	add	r3,r3,r9
993bc3d5698SJohn Baldwin	and	r10,r6,r7
994bc3d5698SJohn Baldwin	ldr	r9,[sp,#52]
995bc3d5698SJohn Baldwin	veor	q11,q11,q3
996bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
997bc3d5698SJohn Baldwin	eor	r11,r6,r7
998bc3d5698SJohn Baldwin	veor	q11,q11,q0
999bc3d5698SJohn Baldwin	add	r3,r3,r10
1000bc3d5698SJohn Baldwin	and	r11,r11,r5
1001bc3d5698SJohn Baldwin	vadd.i32	q13,q10,q14
1002bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1003bc3d5698SJohn Baldwin	vld1.32	{d28[],d29[]},[r8,:32]!
1004bc3d5698SJohn Baldwin	add	r3,r3,r11
1005bc3d5698SJohn Baldwin	veor	q12,q12,q11
1006bc3d5698SJohn Baldwin	add	r7,r7,r9
1007bc3d5698SJohn Baldwin	and	r10,r5,r6
1008bc3d5698SJohn Baldwin	vshr.u32	q11,q12,#30
1009bc3d5698SJohn Baldwin	ldr	r9,[sp,#56]
1010bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1011bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1012bc3d5698SJohn Baldwin	eor	r11,r5,r6
1013bc3d5698SJohn Baldwin	add	r7,r7,r10
1014bc3d5698SJohn Baldwin	vsli.32	q11,q12,#2
1015bc3d5698SJohn Baldwin	and	r11,r11,r4
1016bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1017bc3d5698SJohn Baldwin	add	r7,r7,r11
1018bc3d5698SJohn Baldwin	add	r6,r6,r9
1019bc3d5698SJohn Baldwin	and	r10,r4,r5
1020bc3d5698SJohn Baldwin	ldr	r9,[sp,#60]
1021bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1022bc3d5698SJohn Baldwin	eor	r11,r4,r5
1023bc3d5698SJohn Baldwin	add	r6,r6,r10
1024bc3d5698SJohn Baldwin	and	r11,r11,r3
1025bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1026bc3d5698SJohn Baldwin	add	r6,r6,r11
1027bc3d5698SJohn Baldwin	add	r5,r5,r9
1028bc3d5698SJohn Baldwin	and	r10,r3,r4
1029bc3d5698SJohn Baldwin	ldr	r9,[sp,#0]
1030bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1031bc3d5698SJohn Baldwin	eor	r11,r3,r4
1032bc3d5698SJohn Baldwin	add	r5,r5,r10
1033bc3d5698SJohn Baldwin	and	r11,r11,r7
1034bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1035bc3d5698SJohn Baldwin	add	r5,r5,r11
1036bc3d5698SJohn Baldwin	vext.8	q12,q10,q11,#8
1037bc3d5698SJohn Baldwin	add	r4,r4,r9
1038bc3d5698SJohn Baldwin	and	r10,r7,r3
1039bc3d5698SJohn Baldwin	ldr	r9,[sp,#4]
1040bc3d5698SJohn Baldwin	veor	q0,q0,q8
1041bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1042bc3d5698SJohn Baldwin	eor	r11,r7,r3
1043bc3d5698SJohn Baldwin	veor	q0,q0,q1
1044bc3d5698SJohn Baldwin	add	r4,r4,r10
1045bc3d5698SJohn Baldwin	and	r11,r11,r6
1046bc3d5698SJohn Baldwin	vadd.i32	q13,q11,q14
1047bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1048bc3d5698SJohn Baldwin	add	r4,r4,r11
1049bc3d5698SJohn Baldwin	veor	q12,q12,q0
1050bc3d5698SJohn Baldwin	add	r3,r3,r9
1051bc3d5698SJohn Baldwin	and	r10,r6,r7
1052bc3d5698SJohn Baldwin	vshr.u32	q0,q12,#30
1053bc3d5698SJohn Baldwin	ldr	r9,[sp,#8]
1054bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1055bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1056bc3d5698SJohn Baldwin	sub	r12,r12,#64
1057bc3d5698SJohn Baldwin	eor	r11,r6,r7
1058bc3d5698SJohn Baldwin	add	r3,r3,r10
1059bc3d5698SJohn Baldwin	vsli.32	q0,q12,#2
1060bc3d5698SJohn Baldwin	and	r11,r11,r5
1061bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1062bc3d5698SJohn Baldwin	add	r3,r3,r11
1063bc3d5698SJohn Baldwin	add	r7,r7,r9
1064bc3d5698SJohn Baldwin	and	r10,r5,r6
1065bc3d5698SJohn Baldwin	ldr	r9,[sp,#12]
1066bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1067bc3d5698SJohn Baldwin	eor	r11,r5,r6
1068bc3d5698SJohn Baldwin	add	r7,r7,r10
1069bc3d5698SJohn Baldwin	and	r11,r11,r4
1070bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1071bc3d5698SJohn Baldwin	add	r7,r7,r11
1072bc3d5698SJohn Baldwin	add	r6,r6,r9
1073bc3d5698SJohn Baldwin	and	r10,r4,r5
1074bc3d5698SJohn Baldwin	ldr	r9,[sp,#16]
1075bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1076bc3d5698SJohn Baldwin	eor	r11,r4,r5
1077bc3d5698SJohn Baldwin	add	r6,r6,r10
1078bc3d5698SJohn Baldwin	and	r11,r11,r3
1079bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1080bc3d5698SJohn Baldwin	add	r6,r6,r11
1081bc3d5698SJohn Baldwin	vext.8	q12,q11,q0,#8
1082bc3d5698SJohn Baldwin	add	r5,r5,r9
1083bc3d5698SJohn Baldwin	and	r10,r3,r4
1084bc3d5698SJohn Baldwin	ldr	r9,[sp,#20]
1085bc3d5698SJohn Baldwin	veor	q1,q1,q9
1086bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1087bc3d5698SJohn Baldwin	eor	r11,r3,r4
1088bc3d5698SJohn Baldwin	veor	q1,q1,q2
1089bc3d5698SJohn Baldwin	add	r5,r5,r10
1090bc3d5698SJohn Baldwin	and	r11,r11,r7
1091bc3d5698SJohn Baldwin	vadd.i32	q13,q0,q14
1092bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1093bc3d5698SJohn Baldwin	add	r5,r5,r11
1094bc3d5698SJohn Baldwin	veor	q12,q12,q1
1095bc3d5698SJohn Baldwin	add	r4,r4,r9
1096bc3d5698SJohn Baldwin	and	r10,r7,r3
1097bc3d5698SJohn Baldwin	vshr.u32	q1,q12,#30
1098bc3d5698SJohn Baldwin	ldr	r9,[sp,#24]
1099bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1100bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1101bc3d5698SJohn Baldwin	eor	r11,r7,r3
1102bc3d5698SJohn Baldwin	add	r4,r4,r10
1103bc3d5698SJohn Baldwin	vsli.32	q1,q12,#2
1104bc3d5698SJohn Baldwin	and	r11,r11,r6
1105bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1106bc3d5698SJohn Baldwin	add	r4,r4,r11
1107bc3d5698SJohn Baldwin	add	r3,r3,r9
1108bc3d5698SJohn Baldwin	and	r10,r6,r7
1109bc3d5698SJohn Baldwin	ldr	r9,[sp,#28]
1110bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1111bc3d5698SJohn Baldwin	eor	r11,r6,r7
1112bc3d5698SJohn Baldwin	add	r3,r3,r10
1113bc3d5698SJohn Baldwin	and	r11,r11,r5
1114bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1115bc3d5698SJohn Baldwin	add	r3,r3,r11
1116bc3d5698SJohn Baldwin	add	r7,r7,r9
1117bc3d5698SJohn Baldwin	and	r10,r5,r6
1118bc3d5698SJohn Baldwin	ldr	r9,[sp,#32]
1119bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1120bc3d5698SJohn Baldwin	eor	r11,r5,r6
1121bc3d5698SJohn Baldwin	add	r7,r7,r10
1122bc3d5698SJohn Baldwin	and	r11,r11,r4
1123bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1124bc3d5698SJohn Baldwin	add	r7,r7,r11
1125bc3d5698SJohn Baldwin	vext.8	q12,q0,q1,#8
1126bc3d5698SJohn Baldwin	add	r6,r6,r9
1127bc3d5698SJohn Baldwin	and	r10,r4,r5
1128bc3d5698SJohn Baldwin	ldr	r9,[sp,#36]
1129bc3d5698SJohn Baldwin	veor	q2,q2,q10
1130bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1131bc3d5698SJohn Baldwin	eor	r11,r4,r5
1132bc3d5698SJohn Baldwin	veor	q2,q2,q3
1133bc3d5698SJohn Baldwin	add	r6,r6,r10
1134bc3d5698SJohn Baldwin	and	r11,r11,r3
1135bc3d5698SJohn Baldwin	vadd.i32	q13,q1,q14
1136bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1137bc3d5698SJohn Baldwin	add	r6,r6,r11
1138bc3d5698SJohn Baldwin	veor	q12,q12,q2
1139bc3d5698SJohn Baldwin	add	r5,r5,r9
1140bc3d5698SJohn Baldwin	and	r10,r3,r4
1141bc3d5698SJohn Baldwin	vshr.u32	q2,q12,#30
1142bc3d5698SJohn Baldwin	ldr	r9,[sp,#40]
1143bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1144bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1145bc3d5698SJohn Baldwin	eor	r11,r3,r4
1146bc3d5698SJohn Baldwin	add	r5,r5,r10
1147bc3d5698SJohn Baldwin	vsli.32	q2,q12,#2
1148bc3d5698SJohn Baldwin	and	r11,r11,r7
1149bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1150bc3d5698SJohn Baldwin	add	r5,r5,r11
1151bc3d5698SJohn Baldwin	add	r4,r4,r9
1152bc3d5698SJohn Baldwin	and	r10,r7,r3
1153bc3d5698SJohn Baldwin	ldr	r9,[sp,#44]
1154bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1155bc3d5698SJohn Baldwin	eor	r11,r7,r3
1156bc3d5698SJohn Baldwin	add	r4,r4,r10
1157bc3d5698SJohn Baldwin	and	r11,r11,r6
1158bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1159bc3d5698SJohn Baldwin	add	r4,r4,r11
1160bc3d5698SJohn Baldwin	add	r3,r3,r9
1161bc3d5698SJohn Baldwin	and	r10,r6,r7
1162bc3d5698SJohn Baldwin	ldr	r9,[sp,#48]
1163bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1164bc3d5698SJohn Baldwin	eor	r11,r6,r7
1165bc3d5698SJohn Baldwin	add	r3,r3,r10
1166bc3d5698SJohn Baldwin	and	r11,r11,r5
1167bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1168bc3d5698SJohn Baldwin	add	r3,r3,r11
1169bc3d5698SJohn Baldwin	vext.8	q12,q1,q2,#8
1170bc3d5698SJohn Baldwin	eor	r10,r4,r6
1171bc3d5698SJohn Baldwin	add	r7,r7,r9
1172bc3d5698SJohn Baldwin	ldr	r9,[sp,#52]
1173bc3d5698SJohn Baldwin	veor	q3,q3,q11
1174bc3d5698SJohn Baldwin	eor	r11,r10,r5
1175bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1176bc3d5698SJohn Baldwin	veor	q3,q3,q8
1177bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1178bc3d5698SJohn Baldwin	add	r7,r7,r11
1179bc3d5698SJohn Baldwin	vadd.i32	q13,q2,q14
1180bc3d5698SJohn Baldwin	eor	r10,r3,r5
1181bc3d5698SJohn Baldwin	add	r6,r6,r9
1182bc3d5698SJohn Baldwin	veor	q12,q12,q3
1183bc3d5698SJohn Baldwin	ldr	r9,[sp,#56]
1184bc3d5698SJohn Baldwin	eor	r11,r10,r4
1185bc3d5698SJohn Baldwin	vshr.u32	q3,q12,#30
1186bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1187bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1188bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1189bc3d5698SJohn Baldwin	add	r6,r6,r11
1190bc3d5698SJohn Baldwin	eor	r10,r7,r4
1191bc3d5698SJohn Baldwin	vsli.32	q3,q12,#2
1192bc3d5698SJohn Baldwin	add	r5,r5,r9
1193bc3d5698SJohn Baldwin	ldr	r9,[sp,#60]
1194bc3d5698SJohn Baldwin	eor	r11,r10,r3
1195bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1196bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1197bc3d5698SJohn Baldwin	add	r5,r5,r11
1198bc3d5698SJohn Baldwin	eor	r10,r6,r3
1199bc3d5698SJohn Baldwin	add	r4,r4,r9
1200bc3d5698SJohn Baldwin	ldr	r9,[sp,#0]
1201bc3d5698SJohn Baldwin	eor	r11,r10,r7
1202bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1203bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1204bc3d5698SJohn Baldwin	add	r4,r4,r11
1205bc3d5698SJohn Baldwin	vadd.i32	q13,q3,q14
1206bc3d5698SJohn Baldwin	eor	r10,r5,r7
1207bc3d5698SJohn Baldwin	add	r3,r3,r9
1208bc3d5698SJohn Baldwin	vst1.32	{q13},[r12,:128]!
1209bc3d5698SJohn Baldwin	sub	r12,r12,#64
1210bc3d5698SJohn Baldwin	teq	r1,r2
1211bc3d5698SJohn Baldwin	sub	r8,r8,#16
1212bc3d5698SJohn Baldwin	it	eq
1213bc3d5698SJohn Baldwin	subeq	r1,r1,#64
1214bc3d5698SJohn Baldwin	vld1.8	{q0,q1},[r1]!
1215bc3d5698SJohn Baldwin	ldr	r9,[sp,#4]
1216bc3d5698SJohn Baldwin	eor	r11,r10,r6
1217bc3d5698SJohn Baldwin	vld1.8	{q2,q3},[r1]!
1218bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1219bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1220bc3d5698SJohn Baldwin	vld1.32	{d28[],d29[]},[r8,:32]!
1221bc3d5698SJohn Baldwin	add	r3,r3,r11
1222bc3d5698SJohn Baldwin	eor	r10,r4,r6
1223bc3d5698SJohn Baldwin	vrev32.8	q0,q0
1224bc3d5698SJohn Baldwin	add	r7,r7,r9
1225bc3d5698SJohn Baldwin	ldr	r9,[sp,#8]
1226bc3d5698SJohn Baldwin	eor	r11,r10,r5
1227bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1228bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1229bc3d5698SJohn Baldwin	add	r7,r7,r11
1230bc3d5698SJohn Baldwin	eor	r10,r3,r5
1231bc3d5698SJohn Baldwin	add	r6,r6,r9
1232bc3d5698SJohn Baldwin	ldr	r9,[sp,#12]
1233bc3d5698SJohn Baldwin	eor	r11,r10,r4
1234bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1235bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1236bc3d5698SJohn Baldwin	add	r6,r6,r11
1237bc3d5698SJohn Baldwin	eor	r10,r7,r4
1238bc3d5698SJohn Baldwin	add	r5,r5,r9
1239bc3d5698SJohn Baldwin	ldr	r9,[sp,#16]
1240bc3d5698SJohn Baldwin	eor	r11,r10,r3
1241bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1242bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1243bc3d5698SJohn Baldwin	add	r5,r5,r11
1244bc3d5698SJohn Baldwin	vrev32.8	q1,q1
1245bc3d5698SJohn Baldwin	eor	r10,r6,r3
1246bc3d5698SJohn Baldwin	add	r4,r4,r9
1247bc3d5698SJohn Baldwin	vadd.i32	q8,q0,q14
1248bc3d5698SJohn Baldwin	ldr	r9,[sp,#20]
1249bc3d5698SJohn Baldwin	eor	r11,r10,r7
1250bc3d5698SJohn Baldwin	vst1.32	{q8},[r12,:128]!
1251bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1252bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1253bc3d5698SJohn Baldwin	add	r4,r4,r11
1254bc3d5698SJohn Baldwin	eor	r10,r5,r7
1255bc3d5698SJohn Baldwin	add	r3,r3,r9
1256bc3d5698SJohn Baldwin	ldr	r9,[sp,#24]
1257bc3d5698SJohn Baldwin	eor	r11,r10,r6
1258bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1259bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1260bc3d5698SJohn Baldwin	add	r3,r3,r11
1261bc3d5698SJohn Baldwin	eor	r10,r4,r6
1262bc3d5698SJohn Baldwin	add	r7,r7,r9
1263bc3d5698SJohn Baldwin	ldr	r9,[sp,#28]
1264bc3d5698SJohn Baldwin	eor	r11,r10,r5
1265bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1266bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1267bc3d5698SJohn Baldwin	add	r7,r7,r11
1268bc3d5698SJohn Baldwin	eor	r10,r3,r5
1269bc3d5698SJohn Baldwin	add	r6,r6,r9
1270bc3d5698SJohn Baldwin	ldr	r9,[sp,#32]
1271bc3d5698SJohn Baldwin	eor	r11,r10,r4
1272bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1273bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1274bc3d5698SJohn Baldwin	add	r6,r6,r11
1275bc3d5698SJohn Baldwin	vrev32.8	q2,q2
1276bc3d5698SJohn Baldwin	eor	r10,r7,r4
1277bc3d5698SJohn Baldwin	add	r5,r5,r9
1278bc3d5698SJohn Baldwin	vadd.i32	q9,q1,q14
1279bc3d5698SJohn Baldwin	ldr	r9,[sp,#36]
1280bc3d5698SJohn Baldwin	eor	r11,r10,r3
1281bc3d5698SJohn Baldwin	vst1.32	{q9},[r12,:128]!
1282bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1283bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1284bc3d5698SJohn Baldwin	add	r5,r5,r11
1285bc3d5698SJohn Baldwin	eor	r10,r6,r3
1286bc3d5698SJohn Baldwin	add	r4,r4,r9
1287bc3d5698SJohn Baldwin	ldr	r9,[sp,#40]
1288bc3d5698SJohn Baldwin	eor	r11,r10,r7
1289bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1290bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1291bc3d5698SJohn Baldwin	add	r4,r4,r11
1292bc3d5698SJohn Baldwin	eor	r10,r5,r7
1293bc3d5698SJohn Baldwin	add	r3,r3,r9
1294bc3d5698SJohn Baldwin	ldr	r9,[sp,#44]
1295bc3d5698SJohn Baldwin	eor	r11,r10,r6
1296bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1297bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1298bc3d5698SJohn Baldwin	add	r3,r3,r11
1299bc3d5698SJohn Baldwin	eor	r10,r4,r6
1300bc3d5698SJohn Baldwin	add	r7,r7,r9
1301bc3d5698SJohn Baldwin	ldr	r9,[sp,#48]
1302bc3d5698SJohn Baldwin	eor	r11,r10,r5
1303bc3d5698SJohn Baldwin	add	r7,r7,r3,ror#27
1304bc3d5698SJohn Baldwin	mov	r4,r4,ror#2
1305bc3d5698SJohn Baldwin	add	r7,r7,r11
1306bc3d5698SJohn Baldwin	vrev32.8	q3,q3
1307bc3d5698SJohn Baldwin	eor	r10,r3,r5
1308bc3d5698SJohn Baldwin	add	r6,r6,r9
1309bc3d5698SJohn Baldwin	vadd.i32	q10,q2,q14
1310bc3d5698SJohn Baldwin	ldr	r9,[sp,#52]
1311bc3d5698SJohn Baldwin	eor	r11,r10,r4
1312bc3d5698SJohn Baldwin	vst1.32	{q10},[r12,:128]!
1313bc3d5698SJohn Baldwin	add	r6,r6,r7,ror#27
1314bc3d5698SJohn Baldwin	mov	r3,r3,ror#2
1315bc3d5698SJohn Baldwin	add	r6,r6,r11
1316bc3d5698SJohn Baldwin	eor	r10,r7,r4
1317bc3d5698SJohn Baldwin	add	r5,r5,r9
1318bc3d5698SJohn Baldwin	ldr	r9,[sp,#56]
1319bc3d5698SJohn Baldwin	eor	r11,r10,r3
1320bc3d5698SJohn Baldwin	add	r5,r5,r6,ror#27
1321bc3d5698SJohn Baldwin	mov	r7,r7,ror#2
1322bc3d5698SJohn Baldwin	add	r5,r5,r11
1323bc3d5698SJohn Baldwin	eor	r10,r6,r3
1324bc3d5698SJohn Baldwin	add	r4,r4,r9
1325bc3d5698SJohn Baldwin	ldr	r9,[sp,#60]
1326bc3d5698SJohn Baldwin	eor	r11,r10,r7
1327bc3d5698SJohn Baldwin	add	r4,r4,r5,ror#27
1328bc3d5698SJohn Baldwin	mov	r6,r6,ror#2
1329bc3d5698SJohn Baldwin	add	r4,r4,r11
1330bc3d5698SJohn Baldwin	eor	r10,r5,r7
1331bc3d5698SJohn Baldwin	add	r3,r3,r9
1332bc3d5698SJohn Baldwin	eor	r11,r10,r6
1333bc3d5698SJohn Baldwin	add	r3,r3,r4,ror#27
1334bc3d5698SJohn Baldwin	mov	r5,r5,ror#2
1335bc3d5698SJohn Baldwin	add	r3,r3,r11
1336bc3d5698SJohn Baldwin	ldmia	r0,{r9,r10,r11,r12}	@ accumulate context
1337bc3d5698SJohn Baldwin	add	r3,r3,r9
1338bc3d5698SJohn Baldwin	ldr	r9,[r0,#16]
1339bc3d5698SJohn Baldwin	add	r4,r4,r10
1340bc3d5698SJohn Baldwin	add	r5,r5,r11
1341bc3d5698SJohn Baldwin	add	r6,r6,r12
1342bc3d5698SJohn Baldwin	it	eq
1343bc3d5698SJohn Baldwin	moveq	sp,r14
1344bc3d5698SJohn Baldwin	add	r7,r7,r9
1345bc3d5698SJohn Baldwin	it	ne
1346bc3d5698SJohn Baldwin	ldrne	r9,[sp]
1347bc3d5698SJohn Baldwin	stmia	r0,{r3,r4,r5,r6,r7}
1348bc3d5698SJohn Baldwin	itt	ne
1349bc3d5698SJohn Baldwin	addne	r12,sp,#3*16
1350bc3d5698SJohn Baldwin	bne	.Loop_neon
1351bc3d5698SJohn Baldwin
1352bc3d5698SJohn Baldwin	@ vldmia	sp!,{d8-d15}
1353bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
1354bc3d5698SJohn Baldwin.size	sha1_block_data_order_neon,.-sha1_block_data_order_neon
1355bc3d5698SJohn Baldwin#endif
1356bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
1357bc3d5698SJohn Baldwin
1358bc3d5698SJohn Baldwin# if defined(__thumb2__)
1359bc3d5698SJohn Baldwin#  define INST(a,b,c,d)	.byte	c,d|0xf,a,b
1360bc3d5698SJohn Baldwin# else
1361bc3d5698SJohn Baldwin#  define INST(a,b,c,d)	.byte	a,b,c,d|0x10
1362bc3d5698SJohn Baldwin# endif
1363bc3d5698SJohn Baldwin
1364bc3d5698SJohn Baldwin.type	sha1_block_data_order_armv8,%function
1365bc3d5698SJohn Baldwin.align	5
1366bc3d5698SJohn Baldwinsha1_block_data_order_armv8:
1367bc3d5698SJohn Baldwin.LARMv8:
1368bc3d5698SJohn Baldwin	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
1369bc3d5698SJohn Baldwin
1370bc3d5698SJohn Baldwin	veor	q1,q1,q1
1371bc3d5698SJohn Baldwin	adr	r3,.LK_00_19
1372bc3d5698SJohn Baldwin	vld1.32	{q0},[r0]!
1373bc3d5698SJohn Baldwin	vld1.32	{d2[0]},[r0]
1374bc3d5698SJohn Baldwin	sub	r0,r0,#16
1375bc3d5698SJohn Baldwin	vld1.32	{d16[],d17[]},[r3,:32]!
1376bc3d5698SJohn Baldwin	vld1.32	{d18[],d19[]},[r3,:32]!
1377bc3d5698SJohn Baldwin	vld1.32	{d20[],d21[]},[r3,:32]!
1378bc3d5698SJohn Baldwin	vld1.32	{d22[],d23[]},[r3,:32]
1379bc3d5698SJohn Baldwin
1380bc3d5698SJohn Baldwin.Loop_v8:
1381bc3d5698SJohn Baldwin	vld1.8	{q4,q5},[r1]!
1382bc3d5698SJohn Baldwin	vld1.8	{q6,q7},[r1]!
1383bc3d5698SJohn Baldwin	vrev32.8	q4,q4
1384bc3d5698SJohn Baldwin	vrev32.8	q5,q5
1385bc3d5698SJohn Baldwin
1386bc3d5698SJohn Baldwin	vadd.i32	q12,q8,q4
1387bc3d5698SJohn Baldwin	vrev32.8	q6,q6
1388bc3d5698SJohn Baldwin	vmov	q14,q0	@ offload
1389bc3d5698SJohn Baldwin	subs	r2,r2,#1
1390bc3d5698SJohn Baldwin
1391bc3d5698SJohn Baldwin	vadd.i32	q13,q8,q5
1392bc3d5698SJohn Baldwin	vrev32.8	q7,q7
1393bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 0
1394bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xe2)	@ sha1c q0,q1,q12
1395bc3d5698SJohn Baldwin	vadd.i32	q12,q8,q6
1396bc3d5698SJohn Baldwin	INST(0x4c,0x8c,0x3a,0xe2)	@ sha1su0 q4,q5,q6
1397bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 1
1398bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x06,0xe2)	@ sha1c q0,q3,q13
1399bc3d5698SJohn Baldwin	vadd.i32	q13,q8,q7
1400bc3d5698SJohn Baldwin	INST(0x8e,0x83,0xba,0xf3)	@ sha1su1 q4,q7
1401bc3d5698SJohn Baldwin	INST(0x4e,0xac,0x3c,0xe2)	@ sha1su0 q5,q6,q7
1402bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 2
1403bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x04,0xe2)	@ sha1c q0,q2,q12
1404bc3d5698SJohn Baldwin	vadd.i32	q12,q8,q4
1405bc3d5698SJohn Baldwin	INST(0x88,0xa3,0xba,0xf3)	@ sha1su1 q5,q4
1406bc3d5698SJohn Baldwin	INST(0x48,0xcc,0x3e,0xe2)	@ sha1su0 q6,q7,q4
1407bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 3
1408bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x06,0xe2)	@ sha1c q0,q3,q13
1409bc3d5698SJohn Baldwin	vadd.i32	q13,q9,q5
1410bc3d5698SJohn Baldwin	INST(0x8a,0xc3,0xba,0xf3)	@ sha1su1 q6,q5
1411bc3d5698SJohn Baldwin	INST(0x4a,0xec,0x38,0xe2)	@ sha1su0 q7,q4,q5
1412bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 4
1413bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x04,0xe2)	@ sha1c q0,q2,q12
1414bc3d5698SJohn Baldwin	vadd.i32	q12,q9,q6
1415bc3d5698SJohn Baldwin	INST(0x8c,0xe3,0xba,0xf3)	@ sha1su1 q7,q6
1416bc3d5698SJohn Baldwin	INST(0x4c,0x8c,0x3a,0xe2)	@ sha1su0 q4,q5,q6
1417bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 5
1418bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1419bc3d5698SJohn Baldwin	vadd.i32	q13,q9,q7
1420bc3d5698SJohn Baldwin	INST(0x8e,0x83,0xba,0xf3)	@ sha1su1 q4,q7
1421bc3d5698SJohn Baldwin	INST(0x4e,0xac,0x3c,0xe2)	@ sha1su0 q5,q6,q7
1422bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 6
1423bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x14,0xe2)	@ sha1p q0,q2,q12
1424bc3d5698SJohn Baldwin	vadd.i32	q12,q9,q4
1425bc3d5698SJohn Baldwin	INST(0x88,0xa3,0xba,0xf3)	@ sha1su1 q5,q4
1426bc3d5698SJohn Baldwin	INST(0x48,0xcc,0x3e,0xe2)	@ sha1su0 q6,q7,q4
1427bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 7
1428bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1429bc3d5698SJohn Baldwin	vadd.i32	q13,q9,q5
1430bc3d5698SJohn Baldwin	INST(0x8a,0xc3,0xba,0xf3)	@ sha1su1 q6,q5
1431bc3d5698SJohn Baldwin	INST(0x4a,0xec,0x38,0xe2)	@ sha1su0 q7,q4,q5
1432bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 8
1433bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x14,0xe2)	@ sha1p q0,q2,q12
1434bc3d5698SJohn Baldwin	vadd.i32	q12,q10,q6
1435bc3d5698SJohn Baldwin	INST(0x8c,0xe3,0xba,0xf3)	@ sha1su1 q7,q6
1436bc3d5698SJohn Baldwin	INST(0x4c,0x8c,0x3a,0xe2)	@ sha1su0 q4,q5,q6
1437bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 9
1438bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1439bc3d5698SJohn Baldwin	vadd.i32	q13,q10,q7
1440bc3d5698SJohn Baldwin	INST(0x8e,0x83,0xba,0xf3)	@ sha1su1 q4,q7
1441bc3d5698SJohn Baldwin	INST(0x4e,0xac,0x3c,0xe2)	@ sha1su0 q5,q6,q7
1442bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 10
1443bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x24,0xe2)	@ sha1m q0,q2,q12
1444bc3d5698SJohn Baldwin	vadd.i32	q12,q10,q4
1445bc3d5698SJohn Baldwin	INST(0x88,0xa3,0xba,0xf3)	@ sha1su1 q5,q4
1446bc3d5698SJohn Baldwin	INST(0x48,0xcc,0x3e,0xe2)	@ sha1su0 q6,q7,q4
1447bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 11
1448bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x26,0xe2)	@ sha1m q0,q3,q13
1449bc3d5698SJohn Baldwin	vadd.i32	q13,q10,q5
1450bc3d5698SJohn Baldwin	INST(0x8a,0xc3,0xba,0xf3)	@ sha1su1 q6,q5
1451bc3d5698SJohn Baldwin	INST(0x4a,0xec,0x38,0xe2)	@ sha1su0 q7,q4,q5
1452bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 12
1453bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x24,0xe2)	@ sha1m q0,q2,q12
1454bc3d5698SJohn Baldwin	vadd.i32	q12,q10,q6
1455bc3d5698SJohn Baldwin	INST(0x8c,0xe3,0xba,0xf3)	@ sha1su1 q7,q6
1456bc3d5698SJohn Baldwin	INST(0x4c,0x8c,0x3a,0xe2)	@ sha1su0 q4,q5,q6
1457bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 13
1458bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x26,0xe2)	@ sha1m q0,q3,q13
1459bc3d5698SJohn Baldwin	vadd.i32	q13,q11,q7
1460bc3d5698SJohn Baldwin	INST(0x8e,0x83,0xba,0xf3)	@ sha1su1 q4,q7
1461bc3d5698SJohn Baldwin	INST(0x4e,0xac,0x3c,0xe2)	@ sha1su0 q5,q6,q7
1462bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 14
1463bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x24,0xe2)	@ sha1m q0,q2,q12
1464bc3d5698SJohn Baldwin	vadd.i32	q12,q11,q4
1465bc3d5698SJohn Baldwin	INST(0x88,0xa3,0xba,0xf3)	@ sha1su1 q5,q4
1466bc3d5698SJohn Baldwin	INST(0x48,0xcc,0x3e,0xe2)	@ sha1su0 q6,q7,q4
1467bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 15
1468bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1469bc3d5698SJohn Baldwin	vadd.i32	q13,q11,q5
1470bc3d5698SJohn Baldwin	INST(0x8a,0xc3,0xba,0xf3)	@ sha1su1 q6,q5
1471bc3d5698SJohn Baldwin	INST(0x4a,0xec,0x38,0xe2)	@ sha1su0 q7,q4,q5
1472bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 16
1473bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x14,0xe2)	@ sha1p q0,q2,q12
1474bc3d5698SJohn Baldwin	vadd.i32	q12,q11,q6
1475bc3d5698SJohn Baldwin	INST(0x8c,0xe3,0xba,0xf3)	@ sha1su1 q7,q6
1476bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 17
1477bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1478bc3d5698SJohn Baldwin	vadd.i32	q13,q11,q7
1479bc3d5698SJohn Baldwin
1480bc3d5698SJohn Baldwin	INST(0xc0,0x62,0xb9,0xf3)	@ sha1h q3,q0		@ 18
1481bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x14,0xe2)	@ sha1p q0,q2,q12
1482bc3d5698SJohn Baldwin
1483bc3d5698SJohn Baldwin	INST(0xc0,0x42,0xb9,0xf3)	@ sha1h q2,q0		@ 19
1484bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x16,0xe2)	@ sha1p q0,q3,q13
1485bc3d5698SJohn Baldwin
1486bc3d5698SJohn Baldwin	vadd.i32	q1,q1,q2
1487bc3d5698SJohn Baldwin	vadd.i32	q0,q0,q14
1488bc3d5698SJohn Baldwin	bne	.Loop_v8
1489bc3d5698SJohn Baldwin
1490bc3d5698SJohn Baldwin	vst1.32	{q0},[r0]!
1491bc3d5698SJohn Baldwin	vst1.32	{d2[0]},[r0]
1492bc3d5698SJohn Baldwin
1493bc3d5698SJohn Baldwin	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
1494bc3d5698SJohn Baldwin	bx	lr					@ bx lr
1495bc3d5698SJohn Baldwin.size	sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
1496bc3d5698SJohn Baldwin#endif
1497bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
1498bc3d5698SJohn Baldwin.comm	OPENSSL_armcap_P,4,4
1499bc3d5698SJohn Baldwin#endif
1500