xref: /freebsd/sys/crypto/openssl/arm/sha256-armv4.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha256-armv4.pl. */
2bc3d5698SJohn Baldwin@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3bc3d5698SJohn Baldwin@
4*c0855eaaSJohn Baldwin@ Licensed under the Apache License 2.0 (the "License").  You may not use
5bc3d5698SJohn Baldwin@ this file except in compliance with the License.  You can obtain a copy
6bc3d5698SJohn Baldwin@ in the file LICENSE in the source distribution or at
7bc3d5698SJohn Baldwin@ https://www.openssl.org/source/license.html
8bc3d5698SJohn Baldwin
9bc3d5698SJohn Baldwin
10bc3d5698SJohn Baldwin@ ====================================================================
11bc3d5698SJohn Baldwin@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12bc3d5698SJohn Baldwin@ project. The module is, however, dual licensed under OpenSSL and
13bc3d5698SJohn Baldwin@ CRYPTOGAMS licenses depending on where you obtain it. For further
14bc3d5698SJohn Baldwin@ details see http://www.openssl.org/~appro/cryptogams/.
15bc3d5698SJohn Baldwin@
16bc3d5698SJohn Baldwin@ Permission to use under GPL terms is granted.
17bc3d5698SJohn Baldwin@ ====================================================================
18bc3d5698SJohn Baldwin
19bc3d5698SJohn Baldwin@ SHA256 block procedure for ARMv4. May 2007.
20bc3d5698SJohn Baldwin
21bc3d5698SJohn Baldwin@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
22bc3d5698SJohn Baldwin@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23bc3d5698SJohn Baldwin@ byte [on single-issue Xscale PXA250 core].
24bc3d5698SJohn Baldwin
25bc3d5698SJohn Baldwin@ July 2010.
26bc3d5698SJohn Baldwin@
27bc3d5698SJohn Baldwin@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
28bc3d5698SJohn Baldwin@ Cortex A8 core and ~20 cycles per processed byte.
29bc3d5698SJohn Baldwin
30bc3d5698SJohn Baldwin@ February 2011.
31bc3d5698SJohn Baldwin@
32bc3d5698SJohn Baldwin@ Profiler-assisted and platform-specific optimization resulted in 16%
33bc3d5698SJohn Baldwin@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
34bc3d5698SJohn Baldwin
35bc3d5698SJohn Baldwin@ September 2013.
36bc3d5698SJohn Baldwin@
37bc3d5698SJohn Baldwin@ Add NEON implementation. On Cortex A8 it was measured to process one
38bc3d5698SJohn Baldwin@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39bc3d5698SJohn Baldwin@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40bc3d5698SJohn Baldwin@ code (meaning that latter performs sub-optimally, nothing was done
41bc3d5698SJohn Baldwin@ about it).
42bc3d5698SJohn Baldwin
43bc3d5698SJohn Baldwin@ May 2014.
44bc3d5698SJohn Baldwin@
45bc3d5698SJohn Baldwin@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
46bc3d5698SJohn Baldwin
47*c0855eaaSJohn Baldwin@ $output is the last argument if it looks like a file (it has an extension)
48*c0855eaaSJohn Baldwin@ $flavour is the first argument if it doesn't look like a file
49bc3d5698SJohn Baldwin#ifndef __KERNEL__
50bc3d5698SJohn Baldwin# include "arm_arch.h"
51bc3d5698SJohn Baldwin#else
52bc3d5698SJohn Baldwin# define __ARM_ARCH__ __LINUX_ARM_ARCH__
53bc3d5698SJohn Baldwin# define __ARM_MAX_ARCH__ 7
54bc3d5698SJohn Baldwin#endif
55bc3d5698SJohn Baldwin
56bc3d5698SJohn Baldwin#if defined(__thumb2__)
57bc3d5698SJohn Baldwin.syntax	unified
58bc3d5698SJohn Baldwin.thumb
59bc3d5698SJohn Baldwin#else
60bc3d5698SJohn Baldwin.code	32
61bc3d5698SJohn Baldwin#endif
62bc3d5698SJohn Baldwin
63*c0855eaaSJohn Baldwin.text
64*c0855eaaSJohn Baldwin
65bc3d5698SJohn Baldwin.type	K256,%object
66bc3d5698SJohn Baldwin.align	5
67bc3d5698SJohn BaldwinK256:
68bc3d5698SJohn Baldwin.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
69bc3d5698SJohn Baldwin.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
70bc3d5698SJohn Baldwin.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
71bc3d5698SJohn Baldwin.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
72bc3d5698SJohn Baldwin.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
73bc3d5698SJohn Baldwin.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
74bc3d5698SJohn Baldwin.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
75bc3d5698SJohn Baldwin.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
76bc3d5698SJohn Baldwin.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
77bc3d5698SJohn Baldwin.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
78bc3d5698SJohn Baldwin.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
79bc3d5698SJohn Baldwin.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
80bc3d5698SJohn Baldwin.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
81bc3d5698SJohn Baldwin.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
82bc3d5698SJohn Baldwin.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
83bc3d5698SJohn Baldwin.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
84bc3d5698SJohn Baldwin.size	K256,.-K256
85bc3d5698SJohn Baldwin.word	0				@ terminator
86bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
87bc3d5698SJohn Baldwin.LOPENSSL_armcap:
88*c0855eaaSJohn Baldwin# ifdef	_WIN32
89*c0855eaaSJohn Baldwin.word	OPENSSL_armcap_P
90*c0855eaaSJohn Baldwin# else
91bc3d5698SJohn Baldwin.word	OPENSSL_armcap_P-.Lsha256_block_data_order
92bc3d5698SJohn Baldwin# endif
93*c0855eaaSJohn Baldwin#endif
94bc3d5698SJohn Baldwin.align	5
95bc3d5698SJohn Baldwin
96bc3d5698SJohn Baldwin.globl	sha256_block_data_order
97bc3d5698SJohn Baldwin.type	sha256_block_data_order,%function
98bc3d5698SJohn Baldwinsha256_block_data_order:
99bc3d5698SJohn Baldwin.Lsha256_block_data_order:
100bc3d5698SJohn Baldwin#if __ARM_ARCH__<7 && !defined(__thumb2__)
101bc3d5698SJohn Baldwin	sub	r3,pc,#8		@ sha256_block_data_order
102bc3d5698SJohn Baldwin#else
103bc3d5698SJohn Baldwin	adr	r3,.Lsha256_block_data_order
104bc3d5698SJohn Baldwin#endif
105bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
106bc3d5698SJohn Baldwin	ldr	r12,.LOPENSSL_armcap
107*c0855eaaSJohn Baldwin# if !defined(_WIN32)
108bc3d5698SJohn Baldwin	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
109*c0855eaaSJohn Baldwin# endif
110*c0855eaaSJohn Baldwin# if defined(__APPLE__) || defined(_WIN32)
111bc3d5698SJohn Baldwin	ldr	r12,[r12]
112bc3d5698SJohn Baldwin# endif
113bc3d5698SJohn Baldwin	tst	r12,#ARMV8_SHA256
114bc3d5698SJohn Baldwin	bne	.LARMv8
115bc3d5698SJohn Baldwin	tst	r12,#ARMV7_NEON
116bc3d5698SJohn Baldwin	bne	.LNEON
117bc3d5698SJohn Baldwin#endif
118bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
119bc3d5698SJohn Baldwin	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
120bc3d5698SJohn Baldwin	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
121bc3d5698SJohn Baldwin	sub	r14,r3,#256+32	@ K256
122bc3d5698SJohn Baldwin	sub	sp,sp,#16*4		@ alloca(X[16])
123bc3d5698SJohn Baldwin.Loop:
124bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
125bc3d5698SJohn Baldwin	ldr	r2,[r1],#4
126bc3d5698SJohn Baldwin# else
127bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
128bc3d5698SJohn Baldwin# endif
129bc3d5698SJohn Baldwin	eor	r3,r5,r6		@ magic
130bc3d5698SJohn Baldwin	eor	r12,r12,r12
131bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
132bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 0
133bc3d5698SJohn Baldwin# if 0==15
134bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
135bc3d5698SJohn Baldwin# endif
136bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
137bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
138bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
139bc3d5698SJohn Baldwin# ifndef __ARMEB__
140bc3d5698SJohn Baldwin	rev	r2,r2
141bc3d5698SJohn Baldwin# endif
142bc3d5698SJohn Baldwin#else
143bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 0
144bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
145bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
146bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
147bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
148bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
149bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
150bc3d5698SJohn Baldwin# if 0==15
151bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
152bc3d5698SJohn Baldwin# endif
153bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
154bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
155bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
156bc3d5698SJohn Baldwin#endif
157bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
158bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=X[i]
159bc3d5698SJohn Baldwin	str	r2,[sp,#0*4]
160bc3d5698SJohn Baldwin	eor	r2,r9,r10
161bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
162bc3d5698SJohn Baldwin	and	r2,r2,r8
163bc3d5698SJohn Baldwin	add	r11,r11,r12			@ h+=K256[i]
164bc3d5698SJohn Baldwin	eor	r2,r2,r10			@ Ch(e,f,g)
165bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
166bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=Ch(e,f,g)
167bc3d5698SJohn Baldwin#if 0==31
168bc3d5698SJohn Baldwin	and	r12,r12,#0xff
169bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
170bc3d5698SJohn Baldwin#endif
171bc3d5698SJohn Baldwin#if 0<15
172bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
173bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
174bc3d5698SJohn Baldwin# else
175bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
176bc3d5698SJohn Baldwin# endif
177bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
178bc3d5698SJohn Baldwin#else
179bc3d5698SJohn Baldwin	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
180bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
181bc3d5698SJohn Baldwin	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
182bc3d5698SJohn Baldwin#endif
183bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20	@ Sigma0(a)
184bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
185bc3d5698SJohn Baldwin	add	r7,r7,r11			@ d+=h
186bc3d5698SJohn Baldwin	eor	r3,r3,r5			@ Maj(a,b,c)
187bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
188bc3d5698SJohn Baldwin	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
189bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
190bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 1
191bc3d5698SJohn Baldwin# if 1==15
192bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
193bc3d5698SJohn Baldwin# endif
194bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
195bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
196bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
197bc3d5698SJohn Baldwin# ifndef __ARMEB__
198bc3d5698SJohn Baldwin	rev	r2,r2
199bc3d5698SJohn Baldwin# endif
200bc3d5698SJohn Baldwin#else
201bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 1
202bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
203bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
204bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
205bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
206bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
207bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
208bc3d5698SJohn Baldwin# if 1==15
209bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
210bc3d5698SJohn Baldwin# endif
211bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
212bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
213bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
214bc3d5698SJohn Baldwin#endif
215bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
216bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=X[i]
217bc3d5698SJohn Baldwin	str	r2,[sp,#1*4]
218bc3d5698SJohn Baldwin	eor	r2,r8,r9
219bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
220bc3d5698SJohn Baldwin	and	r2,r2,r7
221bc3d5698SJohn Baldwin	add	r10,r10,r3			@ h+=K256[i]
222bc3d5698SJohn Baldwin	eor	r2,r2,r9			@ Ch(e,f,g)
223bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
224bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=Ch(e,f,g)
225bc3d5698SJohn Baldwin#if 1==31
226bc3d5698SJohn Baldwin	and	r3,r3,#0xff
227bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
228bc3d5698SJohn Baldwin#endif
229bc3d5698SJohn Baldwin#if 1<15
230bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
231bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
232bc3d5698SJohn Baldwin# else
233bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
234bc3d5698SJohn Baldwin# endif
235bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
236bc3d5698SJohn Baldwin#else
237bc3d5698SJohn Baldwin	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
238bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
239bc3d5698SJohn Baldwin	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
240bc3d5698SJohn Baldwin#endif
241bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20	@ Sigma0(a)
242bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
243bc3d5698SJohn Baldwin	add	r6,r6,r10			@ d+=h
244bc3d5698SJohn Baldwin	eor	r12,r12,r4			@ Maj(a,b,c)
245bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
246bc3d5698SJohn Baldwin	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
247bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
248bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 2
249bc3d5698SJohn Baldwin# if 2==15
250bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
251bc3d5698SJohn Baldwin# endif
252bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
253bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
254bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
255bc3d5698SJohn Baldwin# ifndef __ARMEB__
256bc3d5698SJohn Baldwin	rev	r2,r2
257bc3d5698SJohn Baldwin# endif
258bc3d5698SJohn Baldwin#else
259bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 2
260bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
261bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
262bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
263bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
264bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
265bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
266bc3d5698SJohn Baldwin# if 2==15
267bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
268bc3d5698SJohn Baldwin# endif
269bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
270bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
271bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
272bc3d5698SJohn Baldwin#endif
273bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
274bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=X[i]
275bc3d5698SJohn Baldwin	str	r2,[sp,#2*4]
276bc3d5698SJohn Baldwin	eor	r2,r7,r8
277bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
278bc3d5698SJohn Baldwin	and	r2,r2,r6
279bc3d5698SJohn Baldwin	add	r9,r9,r12			@ h+=K256[i]
280bc3d5698SJohn Baldwin	eor	r2,r2,r8			@ Ch(e,f,g)
281bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
282bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=Ch(e,f,g)
283bc3d5698SJohn Baldwin#if 2==31
284bc3d5698SJohn Baldwin	and	r12,r12,#0xff
285bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
286bc3d5698SJohn Baldwin#endif
287bc3d5698SJohn Baldwin#if 2<15
288bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
289bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
290bc3d5698SJohn Baldwin# else
291bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
292bc3d5698SJohn Baldwin# endif
293bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
294bc3d5698SJohn Baldwin#else
295bc3d5698SJohn Baldwin	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
296bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
297bc3d5698SJohn Baldwin	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
298bc3d5698SJohn Baldwin#endif
299bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20	@ Sigma0(a)
300bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
301bc3d5698SJohn Baldwin	add	r5,r5,r9			@ d+=h
302bc3d5698SJohn Baldwin	eor	r3,r3,r11			@ Maj(a,b,c)
303bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
304bc3d5698SJohn Baldwin	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
305bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
306bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 3
307bc3d5698SJohn Baldwin# if 3==15
308bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
309bc3d5698SJohn Baldwin# endif
310bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
311bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
312bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
313bc3d5698SJohn Baldwin# ifndef __ARMEB__
314bc3d5698SJohn Baldwin	rev	r2,r2
315bc3d5698SJohn Baldwin# endif
316bc3d5698SJohn Baldwin#else
317bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 3
318bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
319bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
320bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
321bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
322bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
323bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
324bc3d5698SJohn Baldwin# if 3==15
325bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
326bc3d5698SJohn Baldwin# endif
327bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
328bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
329bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
330bc3d5698SJohn Baldwin#endif
331bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
332bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=X[i]
333bc3d5698SJohn Baldwin	str	r2,[sp,#3*4]
334bc3d5698SJohn Baldwin	eor	r2,r6,r7
335bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
336bc3d5698SJohn Baldwin	and	r2,r2,r5
337bc3d5698SJohn Baldwin	add	r8,r8,r3			@ h+=K256[i]
338bc3d5698SJohn Baldwin	eor	r2,r2,r7			@ Ch(e,f,g)
339bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
340bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=Ch(e,f,g)
341bc3d5698SJohn Baldwin#if 3==31
342bc3d5698SJohn Baldwin	and	r3,r3,#0xff
343bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
344bc3d5698SJohn Baldwin#endif
345bc3d5698SJohn Baldwin#if 3<15
346bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
347bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
348bc3d5698SJohn Baldwin# else
349bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
350bc3d5698SJohn Baldwin# endif
351bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
352bc3d5698SJohn Baldwin#else
353bc3d5698SJohn Baldwin	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
354bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
355bc3d5698SJohn Baldwin	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
356bc3d5698SJohn Baldwin#endif
357bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20	@ Sigma0(a)
358bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
359bc3d5698SJohn Baldwin	add	r4,r4,r8			@ d+=h
360bc3d5698SJohn Baldwin	eor	r12,r12,r10			@ Maj(a,b,c)
361bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
362bc3d5698SJohn Baldwin	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
363bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
364bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 4
365bc3d5698SJohn Baldwin# if 4==15
366bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
367bc3d5698SJohn Baldwin# endif
368bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
369bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
370bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
371bc3d5698SJohn Baldwin# ifndef __ARMEB__
372bc3d5698SJohn Baldwin	rev	r2,r2
373bc3d5698SJohn Baldwin# endif
374bc3d5698SJohn Baldwin#else
375bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 4
376bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
377bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
378bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
379bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
380bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
381bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
382bc3d5698SJohn Baldwin# if 4==15
383bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
384bc3d5698SJohn Baldwin# endif
385bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
386bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
387bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
388bc3d5698SJohn Baldwin#endif
389bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
390bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=X[i]
391bc3d5698SJohn Baldwin	str	r2,[sp,#4*4]
392bc3d5698SJohn Baldwin	eor	r2,r5,r6
393bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
394bc3d5698SJohn Baldwin	and	r2,r2,r4
395bc3d5698SJohn Baldwin	add	r7,r7,r12			@ h+=K256[i]
396bc3d5698SJohn Baldwin	eor	r2,r2,r6			@ Ch(e,f,g)
397bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
398bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=Ch(e,f,g)
399bc3d5698SJohn Baldwin#if 4==31
400bc3d5698SJohn Baldwin	and	r12,r12,#0xff
401bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
402bc3d5698SJohn Baldwin#endif
403bc3d5698SJohn Baldwin#if 4<15
404bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
405bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
406bc3d5698SJohn Baldwin# else
407bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
408bc3d5698SJohn Baldwin# endif
409bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
410bc3d5698SJohn Baldwin#else
411bc3d5698SJohn Baldwin	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
412bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
413bc3d5698SJohn Baldwin	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
414bc3d5698SJohn Baldwin#endif
415bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20	@ Sigma0(a)
416bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
417bc3d5698SJohn Baldwin	add	r11,r11,r7			@ d+=h
418bc3d5698SJohn Baldwin	eor	r3,r3,r9			@ Maj(a,b,c)
419bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
420bc3d5698SJohn Baldwin	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
421bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
422bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 5
423bc3d5698SJohn Baldwin# if 5==15
424bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
425bc3d5698SJohn Baldwin# endif
426bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
427bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
428bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
429bc3d5698SJohn Baldwin# ifndef __ARMEB__
430bc3d5698SJohn Baldwin	rev	r2,r2
431bc3d5698SJohn Baldwin# endif
432bc3d5698SJohn Baldwin#else
433bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 5
434bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
435bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
436bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
437bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
438bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
439bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
440bc3d5698SJohn Baldwin# if 5==15
441bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
442bc3d5698SJohn Baldwin# endif
443bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
444bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
445bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
446bc3d5698SJohn Baldwin#endif
447bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
448bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=X[i]
449bc3d5698SJohn Baldwin	str	r2,[sp,#5*4]
450bc3d5698SJohn Baldwin	eor	r2,r4,r5
451bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
452bc3d5698SJohn Baldwin	and	r2,r2,r11
453bc3d5698SJohn Baldwin	add	r6,r6,r3			@ h+=K256[i]
454bc3d5698SJohn Baldwin	eor	r2,r2,r5			@ Ch(e,f,g)
455bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
456bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=Ch(e,f,g)
457bc3d5698SJohn Baldwin#if 5==31
458bc3d5698SJohn Baldwin	and	r3,r3,#0xff
459bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
460bc3d5698SJohn Baldwin#endif
461bc3d5698SJohn Baldwin#if 5<15
462bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
463bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
464bc3d5698SJohn Baldwin# else
465bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
466bc3d5698SJohn Baldwin# endif
467bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
468bc3d5698SJohn Baldwin#else
469bc3d5698SJohn Baldwin	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
470bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
471bc3d5698SJohn Baldwin	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
472bc3d5698SJohn Baldwin#endif
473bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20	@ Sigma0(a)
474bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
475bc3d5698SJohn Baldwin	add	r10,r10,r6			@ d+=h
476bc3d5698SJohn Baldwin	eor	r12,r12,r8			@ Maj(a,b,c)
477bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
478bc3d5698SJohn Baldwin	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
479bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
480bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 6
481bc3d5698SJohn Baldwin# if 6==15
482bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
483bc3d5698SJohn Baldwin# endif
484bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
485bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
486bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
487bc3d5698SJohn Baldwin# ifndef __ARMEB__
488bc3d5698SJohn Baldwin	rev	r2,r2
489bc3d5698SJohn Baldwin# endif
490bc3d5698SJohn Baldwin#else
491bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 6
492bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
493bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
494bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
495bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
496bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
497bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
498bc3d5698SJohn Baldwin# if 6==15
499bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
500bc3d5698SJohn Baldwin# endif
501bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
502bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
503bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
504bc3d5698SJohn Baldwin#endif
505bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
506bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=X[i]
507bc3d5698SJohn Baldwin	str	r2,[sp,#6*4]
508bc3d5698SJohn Baldwin	eor	r2,r11,r4
509bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
510bc3d5698SJohn Baldwin	and	r2,r2,r10
511bc3d5698SJohn Baldwin	add	r5,r5,r12			@ h+=K256[i]
512bc3d5698SJohn Baldwin	eor	r2,r2,r4			@ Ch(e,f,g)
513bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
514bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=Ch(e,f,g)
515bc3d5698SJohn Baldwin#if 6==31
516bc3d5698SJohn Baldwin	and	r12,r12,#0xff
517bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
518bc3d5698SJohn Baldwin#endif
519bc3d5698SJohn Baldwin#if 6<15
520bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
521bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
522bc3d5698SJohn Baldwin# else
523bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
524bc3d5698SJohn Baldwin# endif
525bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
526bc3d5698SJohn Baldwin#else
527bc3d5698SJohn Baldwin	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
528bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
529bc3d5698SJohn Baldwin	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
530bc3d5698SJohn Baldwin#endif
531bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20	@ Sigma0(a)
532bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
533bc3d5698SJohn Baldwin	add	r9,r9,r5			@ d+=h
534bc3d5698SJohn Baldwin	eor	r3,r3,r7			@ Maj(a,b,c)
535bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
536bc3d5698SJohn Baldwin	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
537bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
538bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 7
539bc3d5698SJohn Baldwin# if 7==15
540bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
541bc3d5698SJohn Baldwin# endif
542bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
543bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
544bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
545bc3d5698SJohn Baldwin# ifndef __ARMEB__
546bc3d5698SJohn Baldwin	rev	r2,r2
547bc3d5698SJohn Baldwin# endif
548bc3d5698SJohn Baldwin#else
549bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 7
550bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
551bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
552bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
553bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
554bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
555bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
556bc3d5698SJohn Baldwin# if 7==15
557bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
558bc3d5698SJohn Baldwin# endif
559bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
560bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
561bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
562bc3d5698SJohn Baldwin#endif
563bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
564bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=X[i]
565bc3d5698SJohn Baldwin	str	r2,[sp,#7*4]
566bc3d5698SJohn Baldwin	eor	r2,r10,r11
567bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
568bc3d5698SJohn Baldwin	and	r2,r2,r9
569bc3d5698SJohn Baldwin	add	r4,r4,r3			@ h+=K256[i]
570bc3d5698SJohn Baldwin	eor	r2,r2,r11			@ Ch(e,f,g)
571bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
572bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=Ch(e,f,g)
573bc3d5698SJohn Baldwin#if 7==31
574bc3d5698SJohn Baldwin	and	r3,r3,#0xff
575bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
576bc3d5698SJohn Baldwin#endif
577bc3d5698SJohn Baldwin#if 7<15
578bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
579bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
580bc3d5698SJohn Baldwin# else
581bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
582bc3d5698SJohn Baldwin# endif
583bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
584bc3d5698SJohn Baldwin#else
585bc3d5698SJohn Baldwin	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
586bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
587bc3d5698SJohn Baldwin	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
588bc3d5698SJohn Baldwin#endif
589bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20	@ Sigma0(a)
590bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
591bc3d5698SJohn Baldwin	add	r8,r8,r4			@ d+=h
592bc3d5698SJohn Baldwin	eor	r12,r12,r6			@ Maj(a,b,c)
593bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
594bc3d5698SJohn Baldwin	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
595bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
596bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 8
597bc3d5698SJohn Baldwin# if 8==15
598bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
599bc3d5698SJohn Baldwin# endif
600bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
601bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
602bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
603bc3d5698SJohn Baldwin# ifndef __ARMEB__
604bc3d5698SJohn Baldwin	rev	r2,r2
605bc3d5698SJohn Baldwin# endif
606bc3d5698SJohn Baldwin#else
607bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 8
608bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
609bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
610bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
611bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
612bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
613bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
614bc3d5698SJohn Baldwin# if 8==15
615bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
616bc3d5698SJohn Baldwin# endif
617bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
618bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
619bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
620bc3d5698SJohn Baldwin#endif
621bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
622bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=X[i]
623bc3d5698SJohn Baldwin	str	r2,[sp,#8*4]
624bc3d5698SJohn Baldwin	eor	r2,r9,r10
625bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
626bc3d5698SJohn Baldwin	and	r2,r2,r8
627bc3d5698SJohn Baldwin	add	r11,r11,r12			@ h+=K256[i]
628bc3d5698SJohn Baldwin	eor	r2,r2,r10			@ Ch(e,f,g)
629bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
630bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=Ch(e,f,g)
631bc3d5698SJohn Baldwin#if 8==31
632bc3d5698SJohn Baldwin	and	r12,r12,#0xff
633bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
634bc3d5698SJohn Baldwin#endif
635bc3d5698SJohn Baldwin#if 8<15
636bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
637bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
638bc3d5698SJohn Baldwin# else
639bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
640bc3d5698SJohn Baldwin# endif
641bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
642bc3d5698SJohn Baldwin#else
643bc3d5698SJohn Baldwin	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
644bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
645bc3d5698SJohn Baldwin	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
646bc3d5698SJohn Baldwin#endif
647bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20	@ Sigma0(a)
648bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
649bc3d5698SJohn Baldwin	add	r7,r7,r11			@ d+=h
650bc3d5698SJohn Baldwin	eor	r3,r3,r5			@ Maj(a,b,c)
651bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
652bc3d5698SJohn Baldwin	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
653bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
654bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 9
655bc3d5698SJohn Baldwin# if 9==15
656bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
657bc3d5698SJohn Baldwin# endif
658bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
659bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
660bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
661bc3d5698SJohn Baldwin# ifndef __ARMEB__
662bc3d5698SJohn Baldwin	rev	r2,r2
663bc3d5698SJohn Baldwin# endif
664bc3d5698SJohn Baldwin#else
665bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 9
666bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
667bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
668bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
669bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
670bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
671bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
672bc3d5698SJohn Baldwin# if 9==15
673bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
674bc3d5698SJohn Baldwin# endif
675bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
676bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
677bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
678bc3d5698SJohn Baldwin#endif
679bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
680bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=X[i]
681bc3d5698SJohn Baldwin	str	r2,[sp,#9*4]
682bc3d5698SJohn Baldwin	eor	r2,r8,r9
683bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
684bc3d5698SJohn Baldwin	and	r2,r2,r7
685bc3d5698SJohn Baldwin	add	r10,r10,r3			@ h+=K256[i]
686bc3d5698SJohn Baldwin	eor	r2,r2,r9			@ Ch(e,f,g)
687bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
688bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=Ch(e,f,g)
689bc3d5698SJohn Baldwin#if 9==31
690bc3d5698SJohn Baldwin	and	r3,r3,#0xff
691bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
692bc3d5698SJohn Baldwin#endif
693bc3d5698SJohn Baldwin#if 9<15
694bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
695bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
696bc3d5698SJohn Baldwin# else
697bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
698bc3d5698SJohn Baldwin# endif
699bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
700bc3d5698SJohn Baldwin#else
701bc3d5698SJohn Baldwin	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
702bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
703bc3d5698SJohn Baldwin	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
704bc3d5698SJohn Baldwin#endif
705bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20	@ Sigma0(a)
706bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
707bc3d5698SJohn Baldwin	add	r6,r6,r10			@ d+=h
708bc3d5698SJohn Baldwin	eor	r12,r12,r4			@ Maj(a,b,c)
709bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
710bc3d5698SJohn Baldwin	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
711bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
712bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 10
713bc3d5698SJohn Baldwin# if 10==15
714bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
715bc3d5698SJohn Baldwin# endif
716bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
717bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
718bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
719bc3d5698SJohn Baldwin# ifndef __ARMEB__
720bc3d5698SJohn Baldwin	rev	r2,r2
721bc3d5698SJohn Baldwin# endif
722bc3d5698SJohn Baldwin#else
723bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 10
724bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
725bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
726bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
727bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
728bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
729bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
730bc3d5698SJohn Baldwin# if 10==15
731bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
732bc3d5698SJohn Baldwin# endif
733bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
734bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
735bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
736bc3d5698SJohn Baldwin#endif
737bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
738bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=X[i]
739bc3d5698SJohn Baldwin	str	r2,[sp,#10*4]
740bc3d5698SJohn Baldwin	eor	r2,r7,r8
741bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
742bc3d5698SJohn Baldwin	and	r2,r2,r6
743bc3d5698SJohn Baldwin	add	r9,r9,r12			@ h+=K256[i]
744bc3d5698SJohn Baldwin	eor	r2,r2,r8			@ Ch(e,f,g)
745bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
746bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=Ch(e,f,g)
747bc3d5698SJohn Baldwin#if 10==31
748bc3d5698SJohn Baldwin	and	r12,r12,#0xff
749bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
750bc3d5698SJohn Baldwin#endif
751bc3d5698SJohn Baldwin#if 10<15
752bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
753bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
754bc3d5698SJohn Baldwin# else
755bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
756bc3d5698SJohn Baldwin# endif
757bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
758bc3d5698SJohn Baldwin#else
759bc3d5698SJohn Baldwin	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
760bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
761bc3d5698SJohn Baldwin	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
762bc3d5698SJohn Baldwin#endif
763bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20	@ Sigma0(a)
764bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
765bc3d5698SJohn Baldwin	add	r5,r5,r9			@ d+=h
766bc3d5698SJohn Baldwin	eor	r3,r3,r11			@ Maj(a,b,c)
767bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
768bc3d5698SJohn Baldwin	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
769bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
770bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 11
771bc3d5698SJohn Baldwin# if 11==15
772bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
773bc3d5698SJohn Baldwin# endif
774bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
775bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
776bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
777bc3d5698SJohn Baldwin# ifndef __ARMEB__
778bc3d5698SJohn Baldwin	rev	r2,r2
779bc3d5698SJohn Baldwin# endif
780bc3d5698SJohn Baldwin#else
781bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 11
782bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
783bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
784bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
785bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
786bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
787bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
788bc3d5698SJohn Baldwin# if 11==15
789bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
790bc3d5698SJohn Baldwin# endif
791bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
792bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
793bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
794bc3d5698SJohn Baldwin#endif
795bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
796bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=X[i]
797bc3d5698SJohn Baldwin	str	r2,[sp,#11*4]
798bc3d5698SJohn Baldwin	eor	r2,r6,r7
799bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
800bc3d5698SJohn Baldwin	and	r2,r2,r5
801bc3d5698SJohn Baldwin	add	r8,r8,r3			@ h+=K256[i]
802bc3d5698SJohn Baldwin	eor	r2,r2,r7			@ Ch(e,f,g)
803bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
804bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=Ch(e,f,g)
805bc3d5698SJohn Baldwin#if 11==31
806bc3d5698SJohn Baldwin	and	r3,r3,#0xff
807bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
808bc3d5698SJohn Baldwin#endif
809bc3d5698SJohn Baldwin#if 11<15
810bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
811bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
812bc3d5698SJohn Baldwin# else
813bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
814bc3d5698SJohn Baldwin# endif
815bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
816bc3d5698SJohn Baldwin#else
817bc3d5698SJohn Baldwin	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
818bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
819bc3d5698SJohn Baldwin	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
820bc3d5698SJohn Baldwin#endif
821bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20	@ Sigma0(a)
822bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
823bc3d5698SJohn Baldwin	add	r4,r4,r8			@ d+=h
824bc3d5698SJohn Baldwin	eor	r12,r12,r10			@ Maj(a,b,c)
825bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
826bc3d5698SJohn Baldwin	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
827bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
828bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 12
829bc3d5698SJohn Baldwin# if 12==15
830bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
831bc3d5698SJohn Baldwin# endif
832bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
833bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
834bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
835bc3d5698SJohn Baldwin# ifndef __ARMEB__
836bc3d5698SJohn Baldwin	rev	r2,r2
837bc3d5698SJohn Baldwin# endif
838bc3d5698SJohn Baldwin#else
839bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 12
840bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
841bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
842bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
843bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
844bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
845bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
846bc3d5698SJohn Baldwin# if 12==15
847bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
848bc3d5698SJohn Baldwin# endif
849bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
850bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
851bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
852bc3d5698SJohn Baldwin#endif
853bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
854bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=X[i]
855bc3d5698SJohn Baldwin	str	r2,[sp,#12*4]
856bc3d5698SJohn Baldwin	eor	r2,r5,r6
857bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
858bc3d5698SJohn Baldwin	and	r2,r2,r4
859bc3d5698SJohn Baldwin	add	r7,r7,r12			@ h+=K256[i]
860bc3d5698SJohn Baldwin	eor	r2,r2,r6			@ Ch(e,f,g)
861bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
862bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=Ch(e,f,g)
863bc3d5698SJohn Baldwin#if 12==31
864bc3d5698SJohn Baldwin	and	r12,r12,#0xff
865bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
866bc3d5698SJohn Baldwin#endif
867bc3d5698SJohn Baldwin#if 12<15
868bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
869bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
870bc3d5698SJohn Baldwin# else
871bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
872bc3d5698SJohn Baldwin# endif
873bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
874bc3d5698SJohn Baldwin#else
875bc3d5698SJohn Baldwin	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
876bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
877bc3d5698SJohn Baldwin	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
878bc3d5698SJohn Baldwin#endif
879bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20	@ Sigma0(a)
880bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
881bc3d5698SJohn Baldwin	add	r11,r11,r7			@ d+=h
882bc3d5698SJohn Baldwin	eor	r3,r3,r9			@ Maj(a,b,c)
883bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
884bc3d5698SJohn Baldwin	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
885bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
886bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 13
887bc3d5698SJohn Baldwin# if 13==15
888bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
889bc3d5698SJohn Baldwin# endif
890bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
891bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
892bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
893bc3d5698SJohn Baldwin# ifndef __ARMEB__
894bc3d5698SJohn Baldwin	rev	r2,r2
895bc3d5698SJohn Baldwin# endif
896bc3d5698SJohn Baldwin#else
897bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 13
898bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
899bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
900bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
901bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
902bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
903bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
904bc3d5698SJohn Baldwin# if 13==15
905bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
906bc3d5698SJohn Baldwin# endif
907bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
908bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
909bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
910bc3d5698SJohn Baldwin#endif
911bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
912bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=X[i]
913bc3d5698SJohn Baldwin	str	r2,[sp,#13*4]
914bc3d5698SJohn Baldwin	eor	r2,r4,r5
915bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
916bc3d5698SJohn Baldwin	and	r2,r2,r11
917bc3d5698SJohn Baldwin	add	r6,r6,r3			@ h+=K256[i]
918bc3d5698SJohn Baldwin	eor	r2,r2,r5			@ Ch(e,f,g)
919bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
920bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=Ch(e,f,g)
921bc3d5698SJohn Baldwin#if 13==31
922bc3d5698SJohn Baldwin	and	r3,r3,#0xff
923bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
924bc3d5698SJohn Baldwin#endif
925bc3d5698SJohn Baldwin#if 13<15
926bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
927bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
928bc3d5698SJohn Baldwin# else
929bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
930bc3d5698SJohn Baldwin# endif
931bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
932bc3d5698SJohn Baldwin#else
933bc3d5698SJohn Baldwin	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
934bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
935bc3d5698SJohn Baldwin	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
936bc3d5698SJohn Baldwin#endif
937bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20	@ Sigma0(a)
938bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
939bc3d5698SJohn Baldwin	add	r10,r10,r6			@ d+=h
940bc3d5698SJohn Baldwin	eor	r12,r12,r8			@ Maj(a,b,c)
941bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
942bc3d5698SJohn Baldwin	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
943bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
944bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 14
945bc3d5698SJohn Baldwin# if 14==15
946bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
947bc3d5698SJohn Baldwin# endif
948bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
949bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
950bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
951bc3d5698SJohn Baldwin# ifndef __ARMEB__
952bc3d5698SJohn Baldwin	rev	r2,r2
953bc3d5698SJohn Baldwin# endif
954bc3d5698SJohn Baldwin#else
955bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 14
956bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
957bc3d5698SJohn Baldwin	ldrb	r12,[r1,#2]
958bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
959bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#8
960bc3d5698SJohn Baldwin	ldrb	r12,[r1],#4
961bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
962bc3d5698SJohn Baldwin# if 14==15
963bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
964bc3d5698SJohn Baldwin# endif
965bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
966bc3d5698SJohn Baldwin	orr	r2,r2,r12,lsl#24
967bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
968bc3d5698SJohn Baldwin#endif
969bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
970bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=X[i]
971bc3d5698SJohn Baldwin	str	r2,[sp,#14*4]
972bc3d5698SJohn Baldwin	eor	r2,r11,r4
973bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
974bc3d5698SJohn Baldwin	and	r2,r2,r10
975bc3d5698SJohn Baldwin	add	r5,r5,r12			@ h+=K256[i]
976bc3d5698SJohn Baldwin	eor	r2,r2,r4			@ Ch(e,f,g)
977bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
978bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=Ch(e,f,g)
979bc3d5698SJohn Baldwin#if 14==31
980bc3d5698SJohn Baldwin	and	r12,r12,#0xff
981bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
982bc3d5698SJohn Baldwin#endif
983bc3d5698SJohn Baldwin#if 14<15
984bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
985bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
986bc3d5698SJohn Baldwin# else
987bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
988bc3d5698SJohn Baldwin# endif
989bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
990bc3d5698SJohn Baldwin#else
991bc3d5698SJohn Baldwin	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
992bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
993bc3d5698SJohn Baldwin	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
994bc3d5698SJohn Baldwin#endif
995bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20	@ Sigma0(a)
996bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
997bc3d5698SJohn Baldwin	add	r9,r9,r5			@ d+=h
998bc3d5698SJohn Baldwin	eor	r3,r3,r7			@ Maj(a,b,c)
999bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1000bc3d5698SJohn Baldwin	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1001bc3d5698SJohn Baldwin#if __ARM_ARCH__>=7
1002bc3d5698SJohn Baldwin	@ ldr	r2,[r1],#4			@ 15
1003bc3d5698SJohn Baldwin# if 15==15
1004bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
1005bc3d5698SJohn Baldwin# endif
1006bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
1007bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1008bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1009bc3d5698SJohn Baldwin# ifndef __ARMEB__
1010bc3d5698SJohn Baldwin	rev	r2,r2
1011bc3d5698SJohn Baldwin# endif
1012bc3d5698SJohn Baldwin#else
1013bc3d5698SJohn Baldwin	@ ldrb	r2,[r1,#3]			@ 15
1014bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1015bc3d5698SJohn Baldwin	ldrb	r3,[r1,#2]
1016bc3d5698SJohn Baldwin	ldrb	r0,[r1,#1]
1017bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#8
1018bc3d5698SJohn Baldwin	ldrb	r3,[r1],#4
1019bc3d5698SJohn Baldwin	orr	r2,r2,r0,lsl#16
1020bc3d5698SJohn Baldwin# if 15==15
1021bc3d5698SJohn Baldwin	str	r1,[sp,#17*4]			@ make room for r1
1022bc3d5698SJohn Baldwin# endif
1023bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
1024bc3d5698SJohn Baldwin	orr	r2,r2,r3,lsl#24
1025bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1026bc3d5698SJohn Baldwin#endif
1027bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1028bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=X[i]
1029bc3d5698SJohn Baldwin	str	r2,[sp,#15*4]
1030bc3d5698SJohn Baldwin	eor	r2,r10,r11
1031bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1032bc3d5698SJohn Baldwin	and	r2,r2,r9
1033bc3d5698SJohn Baldwin	add	r4,r4,r3			@ h+=K256[i]
1034bc3d5698SJohn Baldwin	eor	r2,r2,r11			@ Ch(e,f,g)
1035bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
1036bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=Ch(e,f,g)
1037bc3d5698SJohn Baldwin#if 15==31
1038bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1039bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1040bc3d5698SJohn Baldwin#endif
1041bc3d5698SJohn Baldwin#if 15<15
1042bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1043bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1044bc3d5698SJohn Baldwin# else
1045bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1046bc3d5698SJohn Baldwin# endif
1047bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1048bc3d5698SJohn Baldwin#else
1049bc3d5698SJohn Baldwin	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1050bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1051bc3d5698SJohn Baldwin	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1052bc3d5698SJohn Baldwin#endif
1053bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1054bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1055bc3d5698SJohn Baldwin	add	r8,r8,r4			@ d+=h
1056bc3d5698SJohn Baldwin	eor	r12,r12,r6			@ Maj(a,b,c)
1057bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1058bc3d5698SJohn Baldwin	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1059bc3d5698SJohn Baldwin.Lrounds_16_xx:
1060bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#1*4]		@ 16
1061bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#14*4]
1062bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1063bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1064bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1065bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1066bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1067bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1068bc3d5698SJohn Baldwin	ldr	r2,[sp,#0*4]
1069bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1070bc3d5698SJohn Baldwin	ldr	r1,[sp,#9*4]
1071bc3d5698SJohn Baldwin
1072bc3d5698SJohn Baldwin	add	r12,r12,r0
1073bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1074bc3d5698SJohn Baldwin	add	r2,r2,r12
1075bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1076bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1077bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1078bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=X[i]
1079bc3d5698SJohn Baldwin	str	r2,[sp,#0*4]
1080bc3d5698SJohn Baldwin	eor	r2,r9,r10
1081bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1082bc3d5698SJohn Baldwin	and	r2,r2,r8
1083bc3d5698SJohn Baldwin	add	r11,r11,r12			@ h+=K256[i]
1084bc3d5698SJohn Baldwin	eor	r2,r2,r10			@ Ch(e,f,g)
1085bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
1086bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=Ch(e,f,g)
1087bc3d5698SJohn Baldwin#if 16==31
1088bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1089bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1090bc3d5698SJohn Baldwin#endif
1091bc3d5698SJohn Baldwin#if 16<15
1092bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1093bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1094bc3d5698SJohn Baldwin# else
1095bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1096bc3d5698SJohn Baldwin# endif
1097bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
1098bc3d5698SJohn Baldwin#else
1099bc3d5698SJohn Baldwin	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1100bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
1101bc3d5698SJohn Baldwin	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1102bc3d5698SJohn Baldwin#endif
1103bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1104bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1105bc3d5698SJohn Baldwin	add	r7,r7,r11			@ d+=h
1106bc3d5698SJohn Baldwin	eor	r3,r3,r5			@ Maj(a,b,c)
1107bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1108bc3d5698SJohn Baldwin	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1109bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#2*4]		@ 17
1110bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#15*4]
1111bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1112bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1113bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1114bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1115bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1116bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1117bc3d5698SJohn Baldwin	ldr	r2,[sp,#1*4]
1118bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1119bc3d5698SJohn Baldwin	ldr	r1,[sp,#10*4]
1120bc3d5698SJohn Baldwin
1121bc3d5698SJohn Baldwin	add	r3,r3,r0
1122bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1123bc3d5698SJohn Baldwin	add	r2,r2,r3
1124bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1125bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1126bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1127bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=X[i]
1128bc3d5698SJohn Baldwin	str	r2,[sp,#1*4]
1129bc3d5698SJohn Baldwin	eor	r2,r8,r9
1130bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1131bc3d5698SJohn Baldwin	and	r2,r2,r7
1132bc3d5698SJohn Baldwin	add	r10,r10,r3			@ h+=K256[i]
1133bc3d5698SJohn Baldwin	eor	r2,r2,r9			@ Ch(e,f,g)
1134bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
1135bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=Ch(e,f,g)
1136bc3d5698SJohn Baldwin#if 17==31
1137bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1138bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1139bc3d5698SJohn Baldwin#endif
1140bc3d5698SJohn Baldwin#if 17<15
1141bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1142bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1143bc3d5698SJohn Baldwin# else
1144bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1145bc3d5698SJohn Baldwin# endif
1146bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
1147bc3d5698SJohn Baldwin#else
1148bc3d5698SJohn Baldwin	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1149bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
1150bc3d5698SJohn Baldwin	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1151bc3d5698SJohn Baldwin#endif
1152bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1153bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1154bc3d5698SJohn Baldwin	add	r6,r6,r10			@ d+=h
1155bc3d5698SJohn Baldwin	eor	r12,r12,r4			@ Maj(a,b,c)
1156bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1157bc3d5698SJohn Baldwin	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1158bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#3*4]		@ 18
1159bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#0*4]
1160bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1161bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1162bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1163bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1164bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1165bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1166bc3d5698SJohn Baldwin	ldr	r2,[sp,#2*4]
1167bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1168bc3d5698SJohn Baldwin	ldr	r1,[sp,#11*4]
1169bc3d5698SJohn Baldwin
1170bc3d5698SJohn Baldwin	add	r12,r12,r0
1171bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1172bc3d5698SJohn Baldwin	add	r2,r2,r12
1173bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1174bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1175bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1176bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=X[i]
1177bc3d5698SJohn Baldwin	str	r2,[sp,#2*4]
1178bc3d5698SJohn Baldwin	eor	r2,r7,r8
1179bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1180bc3d5698SJohn Baldwin	and	r2,r2,r6
1181bc3d5698SJohn Baldwin	add	r9,r9,r12			@ h+=K256[i]
1182bc3d5698SJohn Baldwin	eor	r2,r2,r8			@ Ch(e,f,g)
1183bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
1184bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=Ch(e,f,g)
1185bc3d5698SJohn Baldwin#if 18==31
1186bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1187bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1188bc3d5698SJohn Baldwin#endif
1189bc3d5698SJohn Baldwin#if 18<15
1190bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1191bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1192bc3d5698SJohn Baldwin# else
1193bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1194bc3d5698SJohn Baldwin# endif
1195bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
1196bc3d5698SJohn Baldwin#else
1197bc3d5698SJohn Baldwin	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1198bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
1199bc3d5698SJohn Baldwin	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1200bc3d5698SJohn Baldwin#endif
1201bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1202bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1203bc3d5698SJohn Baldwin	add	r5,r5,r9			@ d+=h
1204bc3d5698SJohn Baldwin	eor	r3,r3,r11			@ Maj(a,b,c)
1205bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1206bc3d5698SJohn Baldwin	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1207bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#4*4]		@ 19
1208bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#1*4]
1209bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1210bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1211bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1212bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1213bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1214bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1215bc3d5698SJohn Baldwin	ldr	r2,[sp,#3*4]
1216bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1217bc3d5698SJohn Baldwin	ldr	r1,[sp,#12*4]
1218bc3d5698SJohn Baldwin
1219bc3d5698SJohn Baldwin	add	r3,r3,r0
1220bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1221bc3d5698SJohn Baldwin	add	r2,r2,r3
1222bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1223bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1224bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1225bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=X[i]
1226bc3d5698SJohn Baldwin	str	r2,[sp,#3*4]
1227bc3d5698SJohn Baldwin	eor	r2,r6,r7
1228bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1229bc3d5698SJohn Baldwin	and	r2,r2,r5
1230bc3d5698SJohn Baldwin	add	r8,r8,r3			@ h+=K256[i]
1231bc3d5698SJohn Baldwin	eor	r2,r2,r7			@ Ch(e,f,g)
1232bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
1233bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=Ch(e,f,g)
1234bc3d5698SJohn Baldwin#if 19==31
1235bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1236bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1237bc3d5698SJohn Baldwin#endif
1238bc3d5698SJohn Baldwin#if 19<15
1239bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1240bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1241bc3d5698SJohn Baldwin# else
1242bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1243bc3d5698SJohn Baldwin# endif
1244bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
1245bc3d5698SJohn Baldwin#else
1246bc3d5698SJohn Baldwin	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1247bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
1248bc3d5698SJohn Baldwin	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1249bc3d5698SJohn Baldwin#endif
1250bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1251bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1252bc3d5698SJohn Baldwin	add	r4,r4,r8			@ d+=h
1253bc3d5698SJohn Baldwin	eor	r12,r12,r10			@ Maj(a,b,c)
1254bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1255bc3d5698SJohn Baldwin	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1256bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#5*4]		@ 20
1257bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#2*4]
1258bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1259bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1260bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1261bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1262bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1263bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1264bc3d5698SJohn Baldwin	ldr	r2,[sp,#4*4]
1265bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1266bc3d5698SJohn Baldwin	ldr	r1,[sp,#13*4]
1267bc3d5698SJohn Baldwin
1268bc3d5698SJohn Baldwin	add	r12,r12,r0
1269bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1270bc3d5698SJohn Baldwin	add	r2,r2,r12
1271bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1272bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1273bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1274bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=X[i]
1275bc3d5698SJohn Baldwin	str	r2,[sp,#4*4]
1276bc3d5698SJohn Baldwin	eor	r2,r5,r6
1277bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1278bc3d5698SJohn Baldwin	and	r2,r2,r4
1279bc3d5698SJohn Baldwin	add	r7,r7,r12			@ h+=K256[i]
1280bc3d5698SJohn Baldwin	eor	r2,r2,r6			@ Ch(e,f,g)
1281bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
1282bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=Ch(e,f,g)
1283bc3d5698SJohn Baldwin#if 20==31
1284bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1285bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1286bc3d5698SJohn Baldwin#endif
1287bc3d5698SJohn Baldwin#if 20<15
1288bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1289bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1290bc3d5698SJohn Baldwin# else
1291bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1292bc3d5698SJohn Baldwin# endif
1293bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
1294bc3d5698SJohn Baldwin#else
1295bc3d5698SJohn Baldwin	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1296bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
1297bc3d5698SJohn Baldwin	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1298bc3d5698SJohn Baldwin#endif
1299bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1300bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1301bc3d5698SJohn Baldwin	add	r11,r11,r7			@ d+=h
1302bc3d5698SJohn Baldwin	eor	r3,r3,r9			@ Maj(a,b,c)
1303bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1304bc3d5698SJohn Baldwin	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1305bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#6*4]		@ 21
1306bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#3*4]
1307bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1308bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1309bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1310bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1311bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1312bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1313bc3d5698SJohn Baldwin	ldr	r2,[sp,#5*4]
1314bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1315bc3d5698SJohn Baldwin	ldr	r1,[sp,#14*4]
1316bc3d5698SJohn Baldwin
1317bc3d5698SJohn Baldwin	add	r3,r3,r0
1318bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1319bc3d5698SJohn Baldwin	add	r2,r2,r3
1320bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1321bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1322bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1323bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=X[i]
1324bc3d5698SJohn Baldwin	str	r2,[sp,#5*4]
1325bc3d5698SJohn Baldwin	eor	r2,r4,r5
1326bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1327bc3d5698SJohn Baldwin	and	r2,r2,r11
1328bc3d5698SJohn Baldwin	add	r6,r6,r3			@ h+=K256[i]
1329bc3d5698SJohn Baldwin	eor	r2,r2,r5			@ Ch(e,f,g)
1330bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
1331bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=Ch(e,f,g)
1332bc3d5698SJohn Baldwin#if 21==31
1333bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1334bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1335bc3d5698SJohn Baldwin#endif
1336bc3d5698SJohn Baldwin#if 21<15
1337bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1338bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1339bc3d5698SJohn Baldwin# else
1340bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1341bc3d5698SJohn Baldwin# endif
1342bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
1343bc3d5698SJohn Baldwin#else
1344bc3d5698SJohn Baldwin	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1345bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
1346bc3d5698SJohn Baldwin	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1347bc3d5698SJohn Baldwin#endif
1348bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1349bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1350bc3d5698SJohn Baldwin	add	r10,r10,r6			@ d+=h
1351bc3d5698SJohn Baldwin	eor	r12,r12,r8			@ Maj(a,b,c)
1352bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1353bc3d5698SJohn Baldwin	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1354bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#7*4]		@ 22
1355bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#4*4]
1356bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1357bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1358bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1359bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1360bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1361bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1362bc3d5698SJohn Baldwin	ldr	r2,[sp,#6*4]
1363bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1364bc3d5698SJohn Baldwin	ldr	r1,[sp,#15*4]
1365bc3d5698SJohn Baldwin
1366bc3d5698SJohn Baldwin	add	r12,r12,r0
1367bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1368bc3d5698SJohn Baldwin	add	r2,r2,r12
1369bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1370bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1371bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1372bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=X[i]
1373bc3d5698SJohn Baldwin	str	r2,[sp,#6*4]
1374bc3d5698SJohn Baldwin	eor	r2,r11,r4
1375bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1376bc3d5698SJohn Baldwin	and	r2,r2,r10
1377bc3d5698SJohn Baldwin	add	r5,r5,r12			@ h+=K256[i]
1378bc3d5698SJohn Baldwin	eor	r2,r2,r4			@ Ch(e,f,g)
1379bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
1380bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=Ch(e,f,g)
1381bc3d5698SJohn Baldwin#if 22==31
1382bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1383bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1384bc3d5698SJohn Baldwin#endif
1385bc3d5698SJohn Baldwin#if 22<15
1386bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1387bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1388bc3d5698SJohn Baldwin# else
1389bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1390bc3d5698SJohn Baldwin# endif
1391bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
1392bc3d5698SJohn Baldwin#else
1393bc3d5698SJohn Baldwin	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1394bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
1395bc3d5698SJohn Baldwin	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1396bc3d5698SJohn Baldwin#endif
1397bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1398bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1399bc3d5698SJohn Baldwin	add	r9,r9,r5			@ d+=h
1400bc3d5698SJohn Baldwin	eor	r3,r3,r7			@ Maj(a,b,c)
1401bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1402bc3d5698SJohn Baldwin	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1403bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#8*4]		@ 23
1404bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#5*4]
1405bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1406bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1407bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1408bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1409bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1410bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1411bc3d5698SJohn Baldwin	ldr	r2,[sp,#7*4]
1412bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1413bc3d5698SJohn Baldwin	ldr	r1,[sp,#0*4]
1414bc3d5698SJohn Baldwin
1415bc3d5698SJohn Baldwin	add	r3,r3,r0
1416bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1417bc3d5698SJohn Baldwin	add	r2,r2,r3
1418bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1419bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1420bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1421bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=X[i]
1422bc3d5698SJohn Baldwin	str	r2,[sp,#7*4]
1423bc3d5698SJohn Baldwin	eor	r2,r10,r11
1424bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1425bc3d5698SJohn Baldwin	and	r2,r2,r9
1426bc3d5698SJohn Baldwin	add	r4,r4,r3			@ h+=K256[i]
1427bc3d5698SJohn Baldwin	eor	r2,r2,r11			@ Ch(e,f,g)
1428bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
1429bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=Ch(e,f,g)
1430bc3d5698SJohn Baldwin#if 23==31
1431bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1432bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1433bc3d5698SJohn Baldwin#endif
1434bc3d5698SJohn Baldwin#if 23<15
1435bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1436bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1437bc3d5698SJohn Baldwin# else
1438bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1439bc3d5698SJohn Baldwin# endif
1440bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1441bc3d5698SJohn Baldwin#else
1442bc3d5698SJohn Baldwin	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1443bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1444bc3d5698SJohn Baldwin	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1445bc3d5698SJohn Baldwin#endif
1446bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1447bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1448bc3d5698SJohn Baldwin	add	r8,r8,r4			@ d+=h
1449bc3d5698SJohn Baldwin	eor	r12,r12,r6			@ Maj(a,b,c)
1450bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1451bc3d5698SJohn Baldwin	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1452bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#9*4]		@ 24
1453bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#6*4]
1454bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1455bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1456bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1457bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1458bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1459bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1460bc3d5698SJohn Baldwin	ldr	r2,[sp,#8*4]
1461bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1462bc3d5698SJohn Baldwin	ldr	r1,[sp,#1*4]
1463bc3d5698SJohn Baldwin
1464bc3d5698SJohn Baldwin	add	r12,r12,r0
1465bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1466bc3d5698SJohn Baldwin	add	r2,r2,r12
1467bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1468bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1469bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1470bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=X[i]
1471bc3d5698SJohn Baldwin	str	r2,[sp,#8*4]
1472bc3d5698SJohn Baldwin	eor	r2,r9,r10
1473bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1474bc3d5698SJohn Baldwin	and	r2,r2,r8
1475bc3d5698SJohn Baldwin	add	r11,r11,r12			@ h+=K256[i]
1476bc3d5698SJohn Baldwin	eor	r2,r2,r10			@ Ch(e,f,g)
1477bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
1478bc3d5698SJohn Baldwin	add	r11,r11,r2			@ h+=Ch(e,f,g)
1479bc3d5698SJohn Baldwin#if 24==31
1480bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1481bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1482bc3d5698SJohn Baldwin#endif
1483bc3d5698SJohn Baldwin#if 24<15
1484bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1485bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1486bc3d5698SJohn Baldwin# else
1487bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1488bc3d5698SJohn Baldwin# endif
1489bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
1490bc3d5698SJohn Baldwin#else
1491bc3d5698SJohn Baldwin	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1492bc3d5698SJohn Baldwin	eor	r12,r4,r5			@ a^b, b^c in next round
1493bc3d5698SJohn Baldwin	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1494bc3d5698SJohn Baldwin#endif
1495bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1496bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1497bc3d5698SJohn Baldwin	add	r7,r7,r11			@ d+=h
1498bc3d5698SJohn Baldwin	eor	r3,r3,r5			@ Maj(a,b,c)
1499bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1500bc3d5698SJohn Baldwin	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1501bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#10*4]		@ 25
1502bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#7*4]
1503bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1504bc3d5698SJohn Baldwin	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1505bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1506bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1507bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1508bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1509bc3d5698SJohn Baldwin	ldr	r2,[sp,#9*4]
1510bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1511bc3d5698SJohn Baldwin	ldr	r1,[sp,#2*4]
1512bc3d5698SJohn Baldwin
1513bc3d5698SJohn Baldwin	add	r3,r3,r0
1514bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1515bc3d5698SJohn Baldwin	add	r2,r2,r3
1516bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1517bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1518bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1519bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=X[i]
1520bc3d5698SJohn Baldwin	str	r2,[sp,#9*4]
1521bc3d5698SJohn Baldwin	eor	r2,r8,r9
1522bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1523bc3d5698SJohn Baldwin	and	r2,r2,r7
1524bc3d5698SJohn Baldwin	add	r10,r10,r3			@ h+=K256[i]
1525bc3d5698SJohn Baldwin	eor	r2,r2,r9			@ Ch(e,f,g)
1526bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
1527bc3d5698SJohn Baldwin	add	r10,r10,r2			@ h+=Ch(e,f,g)
1528bc3d5698SJohn Baldwin#if 25==31
1529bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1530bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1531bc3d5698SJohn Baldwin#endif
1532bc3d5698SJohn Baldwin#if 25<15
1533bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1534bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1535bc3d5698SJohn Baldwin# else
1536bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1537bc3d5698SJohn Baldwin# endif
1538bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
1539bc3d5698SJohn Baldwin#else
1540bc3d5698SJohn Baldwin	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1541bc3d5698SJohn Baldwin	eor	r3,r11,r4			@ a^b, b^c in next round
1542bc3d5698SJohn Baldwin	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1543bc3d5698SJohn Baldwin#endif
1544bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1545bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1546bc3d5698SJohn Baldwin	add	r6,r6,r10			@ d+=h
1547bc3d5698SJohn Baldwin	eor	r12,r12,r4			@ Maj(a,b,c)
1548bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1549bc3d5698SJohn Baldwin	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1550bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#11*4]		@ 26
1551bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#8*4]
1552bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1553bc3d5698SJohn Baldwin	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1554bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1555bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1556bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1557bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1558bc3d5698SJohn Baldwin	ldr	r2,[sp,#10*4]
1559bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1560bc3d5698SJohn Baldwin	ldr	r1,[sp,#3*4]
1561bc3d5698SJohn Baldwin
1562bc3d5698SJohn Baldwin	add	r12,r12,r0
1563bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1564bc3d5698SJohn Baldwin	add	r2,r2,r12
1565bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1566bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1567bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1568bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=X[i]
1569bc3d5698SJohn Baldwin	str	r2,[sp,#10*4]
1570bc3d5698SJohn Baldwin	eor	r2,r7,r8
1571bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1572bc3d5698SJohn Baldwin	and	r2,r2,r6
1573bc3d5698SJohn Baldwin	add	r9,r9,r12			@ h+=K256[i]
1574bc3d5698SJohn Baldwin	eor	r2,r2,r8			@ Ch(e,f,g)
1575bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
1576bc3d5698SJohn Baldwin	add	r9,r9,r2			@ h+=Ch(e,f,g)
1577bc3d5698SJohn Baldwin#if 26==31
1578bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1579bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1580bc3d5698SJohn Baldwin#endif
1581bc3d5698SJohn Baldwin#if 26<15
1582bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1583bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1584bc3d5698SJohn Baldwin# else
1585bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1586bc3d5698SJohn Baldwin# endif
1587bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
1588bc3d5698SJohn Baldwin#else
1589bc3d5698SJohn Baldwin	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1590bc3d5698SJohn Baldwin	eor	r12,r10,r11			@ a^b, b^c in next round
1591bc3d5698SJohn Baldwin	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1592bc3d5698SJohn Baldwin#endif
1593bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1594bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1595bc3d5698SJohn Baldwin	add	r5,r5,r9			@ d+=h
1596bc3d5698SJohn Baldwin	eor	r3,r3,r11			@ Maj(a,b,c)
1597bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1598bc3d5698SJohn Baldwin	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1599bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#12*4]		@ 27
1600bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#9*4]
1601bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1602bc3d5698SJohn Baldwin	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1603bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1604bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1605bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1606bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1607bc3d5698SJohn Baldwin	ldr	r2,[sp,#11*4]
1608bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1609bc3d5698SJohn Baldwin	ldr	r1,[sp,#4*4]
1610bc3d5698SJohn Baldwin
1611bc3d5698SJohn Baldwin	add	r3,r3,r0
1612bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1613bc3d5698SJohn Baldwin	add	r2,r2,r3
1614bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1615bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1616bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1617bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=X[i]
1618bc3d5698SJohn Baldwin	str	r2,[sp,#11*4]
1619bc3d5698SJohn Baldwin	eor	r2,r6,r7
1620bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1621bc3d5698SJohn Baldwin	and	r2,r2,r5
1622bc3d5698SJohn Baldwin	add	r8,r8,r3			@ h+=K256[i]
1623bc3d5698SJohn Baldwin	eor	r2,r2,r7			@ Ch(e,f,g)
1624bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
1625bc3d5698SJohn Baldwin	add	r8,r8,r2			@ h+=Ch(e,f,g)
1626bc3d5698SJohn Baldwin#if 27==31
1627bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1628bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1629bc3d5698SJohn Baldwin#endif
1630bc3d5698SJohn Baldwin#if 27<15
1631bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1632bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1633bc3d5698SJohn Baldwin# else
1634bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1635bc3d5698SJohn Baldwin# endif
1636bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
1637bc3d5698SJohn Baldwin#else
1638bc3d5698SJohn Baldwin	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1639bc3d5698SJohn Baldwin	eor	r3,r9,r10			@ a^b, b^c in next round
1640bc3d5698SJohn Baldwin	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1641bc3d5698SJohn Baldwin#endif
1642bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1643bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1644bc3d5698SJohn Baldwin	add	r4,r4,r8			@ d+=h
1645bc3d5698SJohn Baldwin	eor	r12,r12,r10			@ Maj(a,b,c)
1646bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1647bc3d5698SJohn Baldwin	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1648bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#13*4]		@ 28
1649bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#10*4]
1650bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1651bc3d5698SJohn Baldwin	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1652bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1653bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1654bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1655bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1656bc3d5698SJohn Baldwin	ldr	r2,[sp,#12*4]
1657bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1658bc3d5698SJohn Baldwin	ldr	r1,[sp,#5*4]
1659bc3d5698SJohn Baldwin
1660bc3d5698SJohn Baldwin	add	r12,r12,r0
1661bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1662bc3d5698SJohn Baldwin	add	r2,r2,r12
1663bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1664bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1665bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1666bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=X[i]
1667bc3d5698SJohn Baldwin	str	r2,[sp,#12*4]
1668bc3d5698SJohn Baldwin	eor	r2,r5,r6
1669bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1670bc3d5698SJohn Baldwin	and	r2,r2,r4
1671bc3d5698SJohn Baldwin	add	r7,r7,r12			@ h+=K256[i]
1672bc3d5698SJohn Baldwin	eor	r2,r2,r6			@ Ch(e,f,g)
1673bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
1674bc3d5698SJohn Baldwin	add	r7,r7,r2			@ h+=Ch(e,f,g)
1675bc3d5698SJohn Baldwin#if 28==31
1676bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1677bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1678bc3d5698SJohn Baldwin#endif
1679bc3d5698SJohn Baldwin#if 28<15
1680bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1681bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1682bc3d5698SJohn Baldwin# else
1683bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1684bc3d5698SJohn Baldwin# endif
1685bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
1686bc3d5698SJohn Baldwin#else
1687bc3d5698SJohn Baldwin	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1688bc3d5698SJohn Baldwin	eor	r12,r8,r9			@ a^b, b^c in next round
1689bc3d5698SJohn Baldwin	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1690bc3d5698SJohn Baldwin#endif
1691bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1692bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1693bc3d5698SJohn Baldwin	add	r11,r11,r7			@ d+=h
1694bc3d5698SJohn Baldwin	eor	r3,r3,r9			@ Maj(a,b,c)
1695bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1696bc3d5698SJohn Baldwin	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1697bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#14*4]		@ 29
1698bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#11*4]
1699bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1700bc3d5698SJohn Baldwin	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1701bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1702bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1703bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1704bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1705bc3d5698SJohn Baldwin	ldr	r2,[sp,#13*4]
1706bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1707bc3d5698SJohn Baldwin	ldr	r1,[sp,#6*4]
1708bc3d5698SJohn Baldwin
1709bc3d5698SJohn Baldwin	add	r3,r3,r0
1710bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1711bc3d5698SJohn Baldwin	add	r2,r2,r3
1712bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1713bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1714bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1715bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=X[i]
1716bc3d5698SJohn Baldwin	str	r2,[sp,#13*4]
1717bc3d5698SJohn Baldwin	eor	r2,r4,r5
1718bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1719bc3d5698SJohn Baldwin	and	r2,r2,r11
1720bc3d5698SJohn Baldwin	add	r6,r6,r3			@ h+=K256[i]
1721bc3d5698SJohn Baldwin	eor	r2,r2,r5			@ Ch(e,f,g)
1722bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
1723bc3d5698SJohn Baldwin	add	r6,r6,r2			@ h+=Ch(e,f,g)
1724bc3d5698SJohn Baldwin#if 29==31
1725bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1726bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1727bc3d5698SJohn Baldwin#endif
1728bc3d5698SJohn Baldwin#if 29<15
1729bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1730bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1731bc3d5698SJohn Baldwin# else
1732bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1733bc3d5698SJohn Baldwin# endif
1734bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
1735bc3d5698SJohn Baldwin#else
1736bc3d5698SJohn Baldwin	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1737bc3d5698SJohn Baldwin	eor	r3,r7,r8			@ a^b, b^c in next round
1738bc3d5698SJohn Baldwin	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1739bc3d5698SJohn Baldwin#endif
1740bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1741bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1742bc3d5698SJohn Baldwin	add	r10,r10,r6			@ d+=h
1743bc3d5698SJohn Baldwin	eor	r12,r12,r8			@ Maj(a,b,c)
1744bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1745bc3d5698SJohn Baldwin	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1746bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#15*4]		@ 30
1747bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#12*4]
1748bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1749bc3d5698SJohn Baldwin	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1750bc3d5698SJohn Baldwin	mov	r12,r1,ror#17
1751bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1752bc3d5698SJohn Baldwin	eor	r12,r12,r1,ror#19
1753bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1754bc3d5698SJohn Baldwin	ldr	r2,[sp,#14*4]
1755bc3d5698SJohn Baldwin	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1756bc3d5698SJohn Baldwin	ldr	r1,[sp,#7*4]
1757bc3d5698SJohn Baldwin
1758bc3d5698SJohn Baldwin	add	r12,r12,r0
1759bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1760bc3d5698SJohn Baldwin	add	r2,r2,r12
1761bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1762bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1763bc3d5698SJohn Baldwin	ldr	r12,[r14],#4			@ *K256++
1764bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=X[i]
1765bc3d5698SJohn Baldwin	str	r2,[sp,#14*4]
1766bc3d5698SJohn Baldwin	eor	r2,r11,r4
1767bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1768bc3d5698SJohn Baldwin	and	r2,r2,r10
1769bc3d5698SJohn Baldwin	add	r5,r5,r12			@ h+=K256[i]
1770bc3d5698SJohn Baldwin	eor	r2,r2,r4			@ Ch(e,f,g)
1771bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
1772bc3d5698SJohn Baldwin	add	r5,r5,r2			@ h+=Ch(e,f,g)
1773bc3d5698SJohn Baldwin#if 30==31
1774bc3d5698SJohn Baldwin	and	r12,r12,#0xff
1775bc3d5698SJohn Baldwin	cmp	r12,#0xf2			@ done?
1776bc3d5698SJohn Baldwin#endif
1777bc3d5698SJohn Baldwin#if 30<15
1778bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1779bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1780bc3d5698SJohn Baldwin# else
1781bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1782bc3d5698SJohn Baldwin# endif
1783bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
1784bc3d5698SJohn Baldwin#else
1785bc3d5698SJohn Baldwin	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1786bc3d5698SJohn Baldwin	eor	r12,r6,r7			@ a^b, b^c in next round
1787bc3d5698SJohn Baldwin	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1788bc3d5698SJohn Baldwin#endif
1789bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1790bc3d5698SJohn Baldwin	and	r3,r3,r12			@ (b^c)&=(a^b)
1791bc3d5698SJohn Baldwin	add	r9,r9,r5			@ d+=h
1792bc3d5698SJohn Baldwin	eor	r3,r3,r7			@ Maj(a,b,c)
1793bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1794bc3d5698SJohn Baldwin	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1795bc3d5698SJohn Baldwin	@ ldr	r2,[sp,#0*4]		@ 31
1796bc3d5698SJohn Baldwin	@ ldr	r1,[sp,#13*4]
1797bc3d5698SJohn Baldwin	mov	r0,r2,ror#7
1798bc3d5698SJohn Baldwin	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1799bc3d5698SJohn Baldwin	mov	r3,r1,ror#17
1800bc3d5698SJohn Baldwin	eor	r0,r0,r2,ror#18
1801bc3d5698SJohn Baldwin	eor	r3,r3,r1,ror#19
1802bc3d5698SJohn Baldwin	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1803bc3d5698SJohn Baldwin	ldr	r2,[sp,#15*4]
1804bc3d5698SJohn Baldwin	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1805bc3d5698SJohn Baldwin	ldr	r1,[sp,#8*4]
1806bc3d5698SJohn Baldwin
1807bc3d5698SJohn Baldwin	add	r3,r3,r0
1808bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1809bc3d5698SJohn Baldwin	add	r2,r2,r3
1810bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1811bc3d5698SJohn Baldwin	add	r2,r2,r1			@ X[i]
1812bc3d5698SJohn Baldwin	ldr	r3,[r14],#4			@ *K256++
1813bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=X[i]
1814bc3d5698SJohn Baldwin	str	r2,[sp,#15*4]
1815bc3d5698SJohn Baldwin	eor	r2,r10,r11
1816bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1817bc3d5698SJohn Baldwin	and	r2,r2,r9
1818bc3d5698SJohn Baldwin	add	r4,r4,r3			@ h+=K256[i]
1819bc3d5698SJohn Baldwin	eor	r2,r2,r11			@ Ch(e,f,g)
1820bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
1821bc3d5698SJohn Baldwin	add	r4,r4,r2			@ h+=Ch(e,f,g)
1822bc3d5698SJohn Baldwin#if 31==31
1823bc3d5698SJohn Baldwin	and	r3,r3,#0xff
1824bc3d5698SJohn Baldwin	cmp	r3,#0xf2			@ done?
1825bc3d5698SJohn Baldwin#endif
1826bc3d5698SJohn Baldwin#if 31<15
1827bc3d5698SJohn Baldwin# if __ARM_ARCH__>=7
1828bc3d5698SJohn Baldwin	ldr	r2,[r1],#4			@ prefetch
1829bc3d5698SJohn Baldwin# else
1830bc3d5698SJohn Baldwin	ldrb	r2,[r1,#3]
1831bc3d5698SJohn Baldwin# endif
1832bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1833bc3d5698SJohn Baldwin#else
1834bc3d5698SJohn Baldwin	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1835bc3d5698SJohn Baldwin	eor	r3,r5,r6			@ a^b, b^c in next round
1836bc3d5698SJohn Baldwin	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1837bc3d5698SJohn Baldwin#endif
1838bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1839bc3d5698SJohn Baldwin	and	r12,r12,r3			@ (b^c)&=(a^b)
1840bc3d5698SJohn Baldwin	add	r8,r8,r4			@ d+=h
1841bc3d5698SJohn Baldwin	eor	r12,r12,r6			@ Maj(a,b,c)
1842bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1843bc3d5698SJohn Baldwin	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1844bc3d5698SJohn Baldwin#ifdef	__thumb2__
1845bc3d5698SJohn Baldwin	ite	eq			@ Thumb2 thing, sanity check in ARM
1846bc3d5698SJohn Baldwin#endif
1847bc3d5698SJohn Baldwin	ldreq	r3,[sp,#16*4]		@ pull ctx
1848bc3d5698SJohn Baldwin	bne	.Lrounds_16_xx
1849bc3d5698SJohn Baldwin
1850bc3d5698SJohn Baldwin	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1851bc3d5698SJohn Baldwin	ldr	r0,[r3,#0]
1852bc3d5698SJohn Baldwin	ldr	r2,[r3,#4]
1853bc3d5698SJohn Baldwin	ldr	r12,[r3,#8]
1854bc3d5698SJohn Baldwin	add	r4,r4,r0
1855bc3d5698SJohn Baldwin	ldr	r0,[r3,#12]
1856bc3d5698SJohn Baldwin	add	r5,r5,r2
1857bc3d5698SJohn Baldwin	ldr	r2,[r3,#16]
1858bc3d5698SJohn Baldwin	add	r6,r6,r12
1859bc3d5698SJohn Baldwin	ldr	r12,[r3,#20]
1860bc3d5698SJohn Baldwin	add	r7,r7,r0
1861bc3d5698SJohn Baldwin	ldr	r0,[r3,#24]
1862bc3d5698SJohn Baldwin	add	r8,r8,r2
1863bc3d5698SJohn Baldwin	ldr	r2,[r3,#28]
1864bc3d5698SJohn Baldwin	add	r9,r9,r12
1865bc3d5698SJohn Baldwin	ldr	r1,[sp,#17*4]		@ pull inp
1866bc3d5698SJohn Baldwin	ldr	r12,[sp,#18*4]		@ pull inp+len
1867bc3d5698SJohn Baldwin	add	r10,r10,r0
1868bc3d5698SJohn Baldwin	add	r11,r11,r2
1869bc3d5698SJohn Baldwin	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1870bc3d5698SJohn Baldwin	cmp	r1,r12
1871bc3d5698SJohn Baldwin	sub	r14,r14,#256	@ rewind Ktbl
1872bc3d5698SJohn Baldwin	bne	.Loop
1873bc3d5698SJohn Baldwin
1874bc3d5698SJohn Baldwin	add	sp,sp,#19*4	@ destroy frame
1875bc3d5698SJohn Baldwin#if __ARM_ARCH__>=5
1876bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1877bc3d5698SJohn Baldwin#else
1878bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1879bc3d5698SJohn Baldwin	tst	lr,#1
1880bc3d5698SJohn Baldwin	moveq	pc,lr			@ be binary compatible with V4, yet
1881bc3d5698SJohn Baldwin.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1882bc3d5698SJohn Baldwin#endif
1883bc3d5698SJohn Baldwin.size	sha256_block_data_order,.-sha256_block_data_order
1884bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
1885bc3d5698SJohn Baldwin.arch	armv7-a
1886bc3d5698SJohn Baldwin.fpu	neon
1887bc3d5698SJohn Baldwin
1888bc3d5698SJohn Baldwin.globl	sha256_block_data_order_neon
1889bc3d5698SJohn Baldwin.type	sha256_block_data_order_neon,%function
1890bc3d5698SJohn Baldwin.align	5
1891bc3d5698SJohn Baldwin.skip	16
1892bc3d5698SJohn Baldwinsha256_block_data_order_neon:
1893bc3d5698SJohn Baldwin.LNEON:
1894bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1895bc3d5698SJohn Baldwin
1896bc3d5698SJohn Baldwin	sub	r11,sp,#16*4+16
1897bc3d5698SJohn Baldwin	adr	r14,K256
1898bc3d5698SJohn Baldwin	bic	r11,r11,#15		@ align for 128-bit stores
1899bc3d5698SJohn Baldwin	mov	r12,sp
1900bc3d5698SJohn Baldwin	mov	sp,r11			@ alloca
1901bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1902bc3d5698SJohn Baldwin
1903bc3d5698SJohn Baldwin	vld1.8	{q0},[r1]!
1904bc3d5698SJohn Baldwin	vld1.8	{q1},[r1]!
1905bc3d5698SJohn Baldwin	vld1.8	{q2},[r1]!
1906bc3d5698SJohn Baldwin	vld1.8	{q3},[r1]!
1907bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
1908bc3d5698SJohn Baldwin	vld1.32	{q9},[r14,:128]!
1909bc3d5698SJohn Baldwin	vld1.32	{q10},[r14,:128]!
1910bc3d5698SJohn Baldwin	vld1.32	{q11},[r14,:128]!
1911bc3d5698SJohn Baldwin	vrev32.8	q0,q0		@ yes, even on
1912bc3d5698SJohn Baldwin	str	r0,[sp,#64]
1913bc3d5698SJohn Baldwin	vrev32.8	q1,q1		@ big-endian
1914bc3d5698SJohn Baldwin	str	r1,[sp,#68]
1915bc3d5698SJohn Baldwin	mov	r1,sp
1916bc3d5698SJohn Baldwin	vrev32.8	q2,q2
1917bc3d5698SJohn Baldwin	str	r2,[sp,#72]
1918bc3d5698SJohn Baldwin	vrev32.8	q3,q3
1919bc3d5698SJohn Baldwin	str	r12,[sp,#76]		@ save original sp
1920bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q0
1921bc3d5698SJohn Baldwin	vadd.i32	q9,q9,q1
1922bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
1923bc3d5698SJohn Baldwin	vadd.i32	q10,q10,q2
1924bc3d5698SJohn Baldwin	vst1.32	{q9},[r1,:128]!
1925bc3d5698SJohn Baldwin	vadd.i32	q11,q11,q3
1926bc3d5698SJohn Baldwin	vst1.32	{q10},[r1,:128]!
1927bc3d5698SJohn Baldwin	vst1.32	{q11},[r1,:128]!
1928bc3d5698SJohn Baldwin
1929bc3d5698SJohn Baldwin	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1930bc3d5698SJohn Baldwin	sub	r1,r1,#64
1931bc3d5698SJohn Baldwin	ldr	r2,[sp,#0]
1932bc3d5698SJohn Baldwin	eor	r12,r12,r12
1933bc3d5698SJohn Baldwin	eor	r3,r5,r6
1934bc3d5698SJohn Baldwin	b	.L_00_48
1935bc3d5698SJohn Baldwin
1936bc3d5698SJohn Baldwin.align	4
1937bc3d5698SJohn Baldwin.L_00_48:
1938bc3d5698SJohn Baldwin	vext.8	q8,q0,q1,#4
1939bc3d5698SJohn Baldwin	add	r11,r11,r2
1940bc3d5698SJohn Baldwin	eor	r2,r9,r10
1941bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
1942bc3d5698SJohn Baldwin	vext.8	q9,q2,q3,#4
1943bc3d5698SJohn Baldwin	add	r4,r4,r12
1944bc3d5698SJohn Baldwin	and	r2,r2,r8
1945bc3d5698SJohn Baldwin	eor	r12,r0,r8,ror#19
1946bc3d5698SJohn Baldwin	vshr.u32	q10,q8,#7
1947bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
1948bc3d5698SJohn Baldwin	eor	r2,r2,r10
1949bc3d5698SJohn Baldwin	vadd.i32	q0,q0,q9
1950bc3d5698SJohn Baldwin	add	r11,r11,r12,ror#6
1951bc3d5698SJohn Baldwin	eor	r12,r4,r5
1952bc3d5698SJohn Baldwin	vshr.u32	q9,q8,#3
1953bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20
1954bc3d5698SJohn Baldwin	add	r11,r11,r2
1955bc3d5698SJohn Baldwin	vsli.32	q10,q8,#25
1956bc3d5698SJohn Baldwin	ldr	r2,[sp,#4]
1957bc3d5698SJohn Baldwin	and	r3,r3,r12
1958bc3d5698SJohn Baldwin	vshr.u32	q11,q8,#18
1959bc3d5698SJohn Baldwin	add	r7,r7,r11
1960bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2
1961bc3d5698SJohn Baldwin	eor	r3,r3,r5
1962bc3d5698SJohn Baldwin	veor	q9,q9,q10
1963bc3d5698SJohn Baldwin	add	r10,r10,r2
1964bc3d5698SJohn Baldwin	vsli.32	q11,q8,#14
1965bc3d5698SJohn Baldwin	eor	r2,r8,r9
1966bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
1967bc3d5698SJohn Baldwin	vshr.u32	d24,d7,#17
1968bc3d5698SJohn Baldwin	add	r11,r11,r3
1969bc3d5698SJohn Baldwin	and	r2,r2,r7
1970bc3d5698SJohn Baldwin	veor	q9,q9,q11
1971bc3d5698SJohn Baldwin	eor	r3,r0,r7,ror#19
1972bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
1973bc3d5698SJohn Baldwin	vsli.32	d24,d7,#15
1974bc3d5698SJohn Baldwin	eor	r2,r2,r9
1975bc3d5698SJohn Baldwin	add	r10,r10,r3,ror#6
1976bc3d5698SJohn Baldwin	vshr.u32	d25,d7,#10
1977bc3d5698SJohn Baldwin	eor	r3,r11,r4
1978bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20
1979bc3d5698SJohn Baldwin	vadd.i32	q0,q0,q9
1980bc3d5698SJohn Baldwin	add	r10,r10,r2
1981bc3d5698SJohn Baldwin	ldr	r2,[sp,#8]
1982bc3d5698SJohn Baldwin	veor	d25,d25,d24
1983bc3d5698SJohn Baldwin	and	r12,r12,r3
1984bc3d5698SJohn Baldwin	add	r6,r6,r10
1985bc3d5698SJohn Baldwin	vshr.u32	d24,d7,#19
1986bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2
1987bc3d5698SJohn Baldwin	eor	r12,r12,r4
1988bc3d5698SJohn Baldwin	vsli.32	d24,d7,#13
1989bc3d5698SJohn Baldwin	add	r9,r9,r2
1990bc3d5698SJohn Baldwin	eor	r2,r7,r8
1991bc3d5698SJohn Baldwin	veor	d25,d25,d24
1992bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
1993bc3d5698SJohn Baldwin	add	r10,r10,r12
1994bc3d5698SJohn Baldwin	vadd.i32	d0,d0,d25
1995bc3d5698SJohn Baldwin	and	r2,r2,r6
1996bc3d5698SJohn Baldwin	eor	r12,r0,r6,ror#19
1997bc3d5698SJohn Baldwin	vshr.u32	d24,d0,#17
1998bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
1999bc3d5698SJohn Baldwin	eor	r2,r2,r8
2000bc3d5698SJohn Baldwin	vsli.32	d24,d0,#15
2001bc3d5698SJohn Baldwin	add	r9,r9,r12,ror#6
2002bc3d5698SJohn Baldwin	eor	r12,r10,r11
2003bc3d5698SJohn Baldwin	vshr.u32	d25,d0,#10
2004bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20
2005bc3d5698SJohn Baldwin	add	r9,r9,r2
2006bc3d5698SJohn Baldwin	veor	d25,d25,d24
2007bc3d5698SJohn Baldwin	ldr	r2,[sp,#12]
2008bc3d5698SJohn Baldwin	and	r3,r3,r12
2009bc3d5698SJohn Baldwin	vshr.u32	d24,d0,#19
2010bc3d5698SJohn Baldwin	add	r5,r5,r9
2011bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2
2012bc3d5698SJohn Baldwin	eor	r3,r3,r11
2013bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2014bc3d5698SJohn Baldwin	add	r8,r8,r2
2015bc3d5698SJohn Baldwin	vsli.32	d24,d0,#13
2016bc3d5698SJohn Baldwin	eor	r2,r6,r7
2017bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
2018bc3d5698SJohn Baldwin	veor	d25,d25,d24
2019bc3d5698SJohn Baldwin	add	r9,r9,r3
2020bc3d5698SJohn Baldwin	and	r2,r2,r5
2021bc3d5698SJohn Baldwin	vadd.i32	d1,d1,d25
2022bc3d5698SJohn Baldwin	eor	r3,r0,r5,ror#19
2023bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
2024bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q0
2025bc3d5698SJohn Baldwin	eor	r2,r2,r7
2026bc3d5698SJohn Baldwin	add	r8,r8,r3,ror#6
2027bc3d5698SJohn Baldwin	eor	r3,r9,r10
2028bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20
2029bc3d5698SJohn Baldwin	add	r8,r8,r2
2030bc3d5698SJohn Baldwin	ldr	r2,[sp,#16]
2031bc3d5698SJohn Baldwin	and	r12,r12,r3
2032bc3d5698SJohn Baldwin	add	r4,r4,r8
2033bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2034bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2
2035bc3d5698SJohn Baldwin	eor	r12,r12,r10
2036bc3d5698SJohn Baldwin	vext.8	q8,q1,q2,#4
2037bc3d5698SJohn Baldwin	add	r7,r7,r2
2038bc3d5698SJohn Baldwin	eor	r2,r5,r6
2039bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
2040bc3d5698SJohn Baldwin	vext.8	q9,q3,q0,#4
2041bc3d5698SJohn Baldwin	add	r8,r8,r12
2042bc3d5698SJohn Baldwin	and	r2,r2,r4
2043bc3d5698SJohn Baldwin	eor	r12,r0,r4,ror#19
2044bc3d5698SJohn Baldwin	vshr.u32	q10,q8,#7
2045bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
2046bc3d5698SJohn Baldwin	eor	r2,r2,r6
2047bc3d5698SJohn Baldwin	vadd.i32	q1,q1,q9
2048bc3d5698SJohn Baldwin	add	r7,r7,r12,ror#6
2049bc3d5698SJohn Baldwin	eor	r12,r8,r9
2050bc3d5698SJohn Baldwin	vshr.u32	q9,q8,#3
2051bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20
2052bc3d5698SJohn Baldwin	add	r7,r7,r2
2053bc3d5698SJohn Baldwin	vsli.32	q10,q8,#25
2054bc3d5698SJohn Baldwin	ldr	r2,[sp,#20]
2055bc3d5698SJohn Baldwin	and	r3,r3,r12
2056bc3d5698SJohn Baldwin	vshr.u32	q11,q8,#18
2057bc3d5698SJohn Baldwin	add	r11,r11,r7
2058bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2
2059bc3d5698SJohn Baldwin	eor	r3,r3,r9
2060bc3d5698SJohn Baldwin	veor	q9,q9,q10
2061bc3d5698SJohn Baldwin	add	r6,r6,r2
2062bc3d5698SJohn Baldwin	vsli.32	q11,q8,#14
2063bc3d5698SJohn Baldwin	eor	r2,r4,r5
2064bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
2065bc3d5698SJohn Baldwin	vshr.u32	d24,d1,#17
2066bc3d5698SJohn Baldwin	add	r7,r7,r3
2067bc3d5698SJohn Baldwin	and	r2,r2,r11
2068bc3d5698SJohn Baldwin	veor	q9,q9,q11
2069bc3d5698SJohn Baldwin	eor	r3,r0,r11,ror#19
2070bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
2071bc3d5698SJohn Baldwin	vsli.32	d24,d1,#15
2072bc3d5698SJohn Baldwin	eor	r2,r2,r5
2073bc3d5698SJohn Baldwin	add	r6,r6,r3,ror#6
2074bc3d5698SJohn Baldwin	vshr.u32	d25,d1,#10
2075bc3d5698SJohn Baldwin	eor	r3,r7,r8
2076bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20
2077bc3d5698SJohn Baldwin	vadd.i32	q1,q1,q9
2078bc3d5698SJohn Baldwin	add	r6,r6,r2
2079bc3d5698SJohn Baldwin	ldr	r2,[sp,#24]
2080bc3d5698SJohn Baldwin	veor	d25,d25,d24
2081bc3d5698SJohn Baldwin	and	r12,r12,r3
2082bc3d5698SJohn Baldwin	add	r10,r10,r6
2083bc3d5698SJohn Baldwin	vshr.u32	d24,d1,#19
2084bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2
2085bc3d5698SJohn Baldwin	eor	r12,r12,r8
2086bc3d5698SJohn Baldwin	vsli.32	d24,d1,#13
2087bc3d5698SJohn Baldwin	add	r5,r5,r2
2088bc3d5698SJohn Baldwin	eor	r2,r11,r4
2089bc3d5698SJohn Baldwin	veor	d25,d25,d24
2090bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
2091bc3d5698SJohn Baldwin	add	r6,r6,r12
2092bc3d5698SJohn Baldwin	vadd.i32	d2,d2,d25
2093bc3d5698SJohn Baldwin	and	r2,r2,r10
2094bc3d5698SJohn Baldwin	eor	r12,r0,r10,ror#19
2095bc3d5698SJohn Baldwin	vshr.u32	d24,d2,#17
2096bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
2097bc3d5698SJohn Baldwin	eor	r2,r2,r4
2098bc3d5698SJohn Baldwin	vsli.32	d24,d2,#15
2099bc3d5698SJohn Baldwin	add	r5,r5,r12,ror#6
2100bc3d5698SJohn Baldwin	eor	r12,r6,r7
2101bc3d5698SJohn Baldwin	vshr.u32	d25,d2,#10
2102bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20
2103bc3d5698SJohn Baldwin	add	r5,r5,r2
2104bc3d5698SJohn Baldwin	veor	d25,d25,d24
2105bc3d5698SJohn Baldwin	ldr	r2,[sp,#28]
2106bc3d5698SJohn Baldwin	and	r3,r3,r12
2107bc3d5698SJohn Baldwin	vshr.u32	d24,d2,#19
2108bc3d5698SJohn Baldwin	add	r9,r9,r5
2109bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2
2110bc3d5698SJohn Baldwin	eor	r3,r3,r7
2111bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2112bc3d5698SJohn Baldwin	add	r4,r4,r2
2113bc3d5698SJohn Baldwin	vsli.32	d24,d2,#13
2114bc3d5698SJohn Baldwin	eor	r2,r10,r11
2115bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
2116bc3d5698SJohn Baldwin	veor	d25,d25,d24
2117bc3d5698SJohn Baldwin	add	r5,r5,r3
2118bc3d5698SJohn Baldwin	and	r2,r2,r9
2119bc3d5698SJohn Baldwin	vadd.i32	d3,d3,d25
2120bc3d5698SJohn Baldwin	eor	r3,r0,r9,ror#19
2121bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
2122bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q1
2123bc3d5698SJohn Baldwin	eor	r2,r2,r11
2124bc3d5698SJohn Baldwin	add	r4,r4,r3,ror#6
2125bc3d5698SJohn Baldwin	eor	r3,r5,r6
2126bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20
2127bc3d5698SJohn Baldwin	add	r4,r4,r2
2128bc3d5698SJohn Baldwin	ldr	r2,[sp,#32]
2129bc3d5698SJohn Baldwin	and	r12,r12,r3
2130bc3d5698SJohn Baldwin	add	r8,r8,r4
2131bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2132bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2
2133bc3d5698SJohn Baldwin	eor	r12,r12,r6
2134bc3d5698SJohn Baldwin	vext.8	q8,q2,q3,#4
2135bc3d5698SJohn Baldwin	add	r11,r11,r2
2136bc3d5698SJohn Baldwin	eor	r2,r9,r10
2137bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
2138bc3d5698SJohn Baldwin	vext.8	q9,q0,q1,#4
2139bc3d5698SJohn Baldwin	add	r4,r4,r12
2140bc3d5698SJohn Baldwin	and	r2,r2,r8
2141bc3d5698SJohn Baldwin	eor	r12,r0,r8,ror#19
2142bc3d5698SJohn Baldwin	vshr.u32	q10,q8,#7
2143bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
2144bc3d5698SJohn Baldwin	eor	r2,r2,r10
2145bc3d5698SJohn Baldwin	vadd.i32	q2,q2,q9
2146bc3d5698SJohn Baldwin	add	r11,r11,r12,ror#6
2147bc3d5698SJohn Baldwin	eor	r12,r4,r5
2148bc3d5698SJohn Baldwin	vshr.u32	q9,q8,#3
2149bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20
2150bc3d5698SJohn Baldwin	add	r11,r11,r2
2151bc3d5698SJohn Baldwin	vsli.32	q10,q8,#25
2152bc3d5698SJohn Baldwin	ldr	r2,[sp,#36]
2153bc3d5698SJohn Baldwin	and	r3,r3,r12
2154bc3d5698SJohn Baldwin	vshr.u32	q11,q8,#18
2155bc3d5698SJohn Baldwin	add	r7,r7,r11
2156bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2
2157bc3d5698SJohn Baldwin	eor	r3,r3,r5
2158bc3d5698SJohn Baldwin	veor	q9,q9,q10
2159bc3d5698SJohn Baldwin	add	r10,r10,r2
2160bc3d5698SJohn Baldwin	vsli.32	q11,q8,#14
2161bc3d5698SJohn Baldwin	eor	r2,r8,r9
2162bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
2163bc3d5698SJohn Baldwin	vshr.u32	d24,d3,#17
2164bc3d5698SJohn Baldwin	add	r11,r11,r3
2165bc3d5698SJohn Baldwin	and	r2,r2,r7
2166bc3d5698SJohn Baldwin	veor	q9,q9,q11
2167bc3d5698SJohn Baldwin	eor	r3,r0,r7,ror#19
2168bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
2169bc3d5698SJohn Baldwin	vsli.32	d24,d3,#15
2170bc3d5698SJohn Baldwin	eor	r2,r2,r9
2171bc3d5698SJohn Baldwin	add	r10,r10,r3,ror#6
2172bc3d5698SJohn Baldwin	vshr.u32	d25,d3,#10
2173bc3d5698SJohn Baldwin	eor	r3,r11,r4
2174bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20
2175bc3d5698SJohn Baldwin	vadd.i32	q2,q2,q9
2176bc3d5698SJohn Baldwin	add	r10,r10,r2
2177bc3d5698SJohn Baldwin	ldr	r2,[sp,#40]
2178bc3d5698SJohn Baldwin	veor	d25,d25,d24
2179bc3d5698SJohn Baldwin	and	r12,r12,r3
2180bc3d5698SJohn Baldwin	add	r6,r6,r10
2181bc3d5698SJohn Baldwin	vshr.u32	d24,d3,#19
2182bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2
2183bc3d5698SJohn Baldwin	eor	r12,r12,r4
2184bc3d5698SJohn Baldwin	vsli.32	d24,d3,#13
2185bc3d5698SJohn Baldwin	add	r9,r9,r2
2186bc3d5698SJohn Baldwin	eor	r2,r7,r8
2187bc3d5698SJohn Baldwin	veor	d25,d25,d24
2188bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
2189bc3d5698SJohn Baldwin	add	r10,r10,r12
2190bc3d5698SJohn Baldwin	vadd.i32	d4,d4,d25
2191bc3d5698SJohn Baldwin	and	r2,r2,r6
2192bc3d5698SJohn Baldwin	eor	r12,r0,r6,ror#19
2193bc3d5698SJohn Baldwin	vshr.u32	d24,d4,#17
2194bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
2195bc3d5698SJohn Baldwin	eor	r2,r2,r8
2196bc3d5698SJohn Baldwin	vsli.32	d24,d4,#15
2197bc3d5698SJohn Baldwin	add	r9,r9,r12,ror#6
2198bc3d5698SJohn Baldwin	eor	r12,r10,r11
2199bc3d5698SJohn Baldwin	vshr.u32	d25,d4,#10
2200bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20
2201bc3d5698SJohn Baldwin	add	r9,r9,r2
2202bc3d5698SJohn Baldwin	veor	d25,d25,d24
2203bc3d5698SJohn Baldwin	ldr	r2,[sp,#44]
2204bc3d5698SJohn Baldwin	and	r3,r3,r12
2205bc3d5698SJohn Baldwin	vshr.u32	d24,d4,#19
2206bc3d5698SJohn Baldwin	add	r5,r5,r9
2207bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2
2208bc3d5698SJohn Baldwin	eor	r3,r3,r11
2209bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2210bc3d5698SJohn Baldwin	add	r8,r8,r2
2211bc3d5698SJohn Baldwin	vsli.32	d24,d4,#13
2212bc3d5698SJohn Baldwin	eor	r2,r6,r7
2213bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
2214bc3d5698SJohn Baldwin	veor	d25,d25,d24
2215bc3d5698SJohn Baldwin	add	r9,r9,r3
2216bc3d5698SJohn Baldwin	and	r2,r2,r5
2217bc3d5698SJohn Baldwin	vadd.i32	d5,d5,d25
2218bc3d5698SJohn Baldwin	eor	r3,r0,r5,ror#19
2219bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
2220bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q2
2221bc3d5698SJohn Baldwin	eor	r2,r2,r7
2222bc3d5698SJohn Baldwin	add	r8,r8,r3,ror#6
2223bc3d5698SJohn Baldwin	eor	r3,r9,r10
2224bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20
2225bc3d5698SJohn Baldwin	add	r8,r8,r2
2226bc3d5698SJohn Baldwin	ldr	r2,[sp,#48]
2227bc3d5698SJohn Baldwin	and	r12,r12,r3
2228bc3d5698SJohn Baldwin	add	r4,r4,r8
2229bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2230bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2
2231bc3d5698SJohn Baldwin	eor	r12,r12,r10
2232bc3d5698SJohn Baldwin	vext.8	q8,q3,q0,#4
2233bc3d5698SJohn Baldwin	add	r7,r7,r2
2234bc3d5698SJohn Baldwin	eor	r2,r5,r6
2235bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
2236bc3d5698SJohn Baldwin	vext.8	q9,q1,q2,#4
2237bc3d5698SJohn Baldwin	add	r8,r8,r12
2238bc3d5698SJohn Baldwin	and	r2,r2,r4
2239bc3d5698SJohn Baldwin	eor	r12,r0,r4,ror#19
2240bc3d5698SJohn Baldwin	vshr.u32	q10,q8,#7
2241bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
2242bc3d5698SJohn Baldwin	eor	r2,r2,r6
2243bc3d5698SJohn Baldwin	vadd.i32	q3,q3,q9
2244bc3d5698SJohn Baldwin	add	r7,r7,r12,ror#6
2245bc3d5698SJohn Baldwin	eor	r12,r8,r9
2246bc3d5698SJohn Baldwin	vshr.u32	q9,q8,#3
2247bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20
2248bc3d5698SJohn Baldwin	add	r7,r7,r2
2249bc3d5698SJohn Baldwin	vsli.32	q10,q8,#25
2250bc3d5698SJohn Baldwin	ldr	r2,[sp,#52]
2251bc3d5698SJohn Baldwin	and	r3,r3,r12
2252bc3d5698SJohn Baldwin	vshr.u32	q11,q8,#18
2253bc3d5698SJohn Baldwin	add	r11,r11,r7
2254bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2
2255bc3d5698SJohn Baldwin	eor	r3,r3,r9
2256bc3d5698SJohn Baldwin	veor	q9,q9,q10
2257bc3d5698SJohn Baldwin	add	r6,r6,r2
2258bc3d5698SJohn Baldwin	vsli.32	q11,q8,#14
2259bc3d5698SJohn Baldwin	eor	r2,r4,r5
2260bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
2261bc3d5698SJohn Baldwin	vshr.u32	d24,d5,#17
2262bc3d5698SJohn Baldwin	add	r7,r7,r3
2263bc3d5698SJohn Baldwin	and	r2,r2,r11
2264bc3d5698SJohn Baldwin	veor	q9,q9,q11
2265bc3d5698SJohn Baldwin	eor	r3,r0,r11,ror#19
2266bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
2267bc3d5698SJohn Baldwin	vsli.32	d24,d5,#15
2268bc3d5698SJohn Baldwin	eor	r2,r2,r5
2269bc3d5698SJohn Baldwin	add	r6,r6,r3,ror#6
2270bc3d5698SJohn Baldwin	vshr.u32	d25,d5,#10
2271bc3d5698SJohn Baldwin	eor	r3,r7,r8
2272bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20
2273bc3d5698SJohn Baldwin	vadd.i32	q3,q3,q9
2274bc3d5698SJohn Baldwin	add	r6,r6,r2
2275bc3d5698SJohn Baldwin	ldr	r2,[sp,#56]
2276bc3d5698SJohn Baldwin	veor	d25,d25,d24
2277bc3d5698SJohn Baldwin	and	r12,r12,r3
2278bc3d5698SJohn Baldwin	add	r10,r10,r6
2279bc3d5698SJohn Baldwin	vshr.u32	d24,d5,#19
2280bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2
2281bc3d5698SJohn Baldwin	eor	r12,r12,r8
2282bc3d5698SJohn Baldwin	vsli.32	d24,d5,#13
2283bc3d5698SJohn Baldwin	add	r5,r5,r2
2284bc3d5698SJohn Baldwin	eor	r2,r11,r4
2285bc3d5698SJohn Baldwin	veor	d25,d25,d24
2286bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
2287bc3d5698SJohn Baldwin	add	r6,r6,r12
2288bc3d5698SJohn Baldwin	vadd.i32	d6,d6,d25
2289bc3d5698SJohn Baldwin	and	r2,r2,r10
2290bc3d5698SJohn Baldwin	eor	r12,r0,r10,ror#19
2291bc3d5698SJohn Baldwin	vshr.u32	d24,d6,#17
2292bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
2293bc3d5698SJohn Baldwin	eor	r2,r2,r4
2294bc3d5698SJohn Baldwin	vsli.32	d24,d6,#15
2295bc3d5698SJohn Baldwin	add	r5,r5,r12,ror#6
2296bc3d5698SJohn Baldwin	eor	r12,r6,r7
2297bc3d5698SJohn Baldwin	vshr.u32	d25,d6,#10
2298bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20
2299bc3d5698SJohn Baldwin	add	r5,r5,r2
2300bc3d5698SJohn Baldwin	veor	d25,d25,d24
2301bc3d5698SJohn Baldwin	ldr	r2,[sp,#60]
2302bc3d5698SJohn Baldwin	and	r3,r3,r12
2303bc3d5698SJohn Baldwin	vshr.u32	d24,d6,#19
2304bc3d5698SJohn Baldwin	add	r9,r9,r5
2305bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2
2306bc3d5698SJohn Baldwin	eor	r3,r3,r7
2307bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2308bc3d5698SJohn Baldwin	add	r4,r4,r2
2309bc3d5698SJohn Baldwin	vsli.32	d24,d6,#13
2310bc3d5698SJohn Baldwin	eor	r2,r10,r11
2311bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
2312bc3d5698SJohn Baldwin	veor	d25,d25,d24
2313bc3d5698SJohn Baldwin	add	r5,r5,r3
2314bc3d5698SJohn Baldwin	and	r2,r2,r9
2315bc3d5698SJohn Baldwin	vadd.i32	d7,d7,d25
2316bc3d5698SJohn Baldwin	eor	r3,r0,r9,ror#19
2317bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
2318bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q3
2319bc3d5698SJohn Baldwin	eor	r2,r2,r11
2320bc3d5698SJohn Baldwin	add	r4,r4,r3,ror#6
2321bc3d5698SJohn Baldwin	eor	r3,r5,r6
2322bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20
2323bc3d5698SJohn Baldwin	add	r4,r4,r2
2324bc3d5698SJohn Baldwin	ldr	r2,[r14]
2325bc3d5698SJohn Baldwin	and	r12,r12,r3
2326bc3d5698SJohn Baldwin	add	r8,r8,r4
2327bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2328bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2
2329bc3d5698SJohn Baldwin	eor	r12,r12,r6
2330bc3d5698SJohn Baldwin	teq	r2,#0				@ check for K256 terminator
2331bc3d5698SJohn Baldwin	ldr	r2,[sp,#0]
2332bc3d5698SJohn Baldwin	sub	r1,r1,#64
2333bc3d5698SJohn Baldwin	bne	.L_00_48
2334bc3d5698SJohn Baldwin
2335bc3d5698SJohn Baldwin	ldr	r1,[sp,#68]
2336bc3d5698SJohn Baldwin	ldr	r0,[sp,#72]
2337bc3d5698SJohn Baldwin	sub	r14,r14,#256	@ rewind r14
2338bc3d5698SJohn Baldwin	teq	r1,r0
2339bc3d5698SJohn Baldwin	it	eq
2340bc3d5698SJohn Baldwin	subeq	r1,r1,#64		@ avoid SEGV
2341bc3d5698SJohn Baldwin	vld1.8	{q0},[r1]!		@ load next input block
2342bc3d5698SJohn Baldwin	vld1.8	{q1},[r1]!
2343bc3d5698SJohn Baldwin	vld1.8	{q2},[r1]!
2344bc3d5698SJohn Baldwin	vld1.8	{q3},[r1]!
2345bc3d5698SJohn Baldwin	it	ne
2346bc3d5698SJohn Baldwin	strne	r1,[sp,#68]
2347bc3d5698SJohn Baldwin	mov	r1,sp
2348bc3d5698SJohn Baldwin	add	r11,r11,r2
2349bc3d5698SJohn Baldwin	eor	r2,r9,r10
2350bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
2351bc3d5698SJohn Baldwin	add	r4,r4,r12
2352bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2353bc3d5698SJohn Baldwin	and	r2,r2,r8
2354bc3d5698SJohn Baldwin	eor	r12,r0,r8,ror#19
2355bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
2356bc3d5698SJohn Baldwin	eor	r2,r2,r10
2357bc3d5698SJohn Baldwin	vrev32.8	q0,q0
2358bc3d5698SJohn Baldwin	add	r11,r11,r12,ror#6
2359bc3d5698SJohn Baldwin	eor	r12,r4,r5
2360bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20
2361bc3d5698SJohn Baldwin	add	r11,r11,r2
2362bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q0
2363bc3d5698SJohn Baldwin	ldr	r2,[sp,#4]
2364bc3d5698SJohn Baldwin	and	r3,r3,r12
2365bc3d5698SJohn Baldwin	add	r7,r7,r11
2366bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2
2367bc3d5698SJohn Baldwin	eor	r3,r3,r5
2368bc3d5698SJohn Baldwin	add	r10,r10,r2
2369bc3d5698SJohn Baldwin	eor	r2,r8,r9
2370bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
2371bc3d5698SJohn Baldwin	add	r11,r11,r3
2372bc3d5698SJohn Baldwin	and	r2,r2,r7
2373bc3d5698SJohn Baldwin	eor	r3,r0,r7,ror#19
2374bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
2375bc3d5698SJohn Baldwin	eor	r2,r2,r9
2376bc3d5698SJohn Baldwin	add	r10,r10,r3,ror#6
2377bc3d5698SJohn Baldwin	eor	r3,r11,r4
2378bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20
2379bc3d5698SJohn Baldwin	add	r10,r10,r2
2380bc3d5698SJohn Baldwin	ldr	r2,[sp,#8]
2381bc3d5698SJohn Baldwin	and	r12,r12,r3
2382bc3d5698SJohn Baldwin	add	r6,r6,r10
2383bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2
2384bc3d5698SJohn Baldwin	eor	r12,r12,r4
2385bc3d5698SJohn Baldwin	add	r9,r9,r2
2386bc3d5698SJohn Baldwin	eor	r2,r7,r8
2387bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
2388bc3d5698SJohn Baldwin	add	r10,r10,r12
2389bc3d5698SJohn Baldwin	and	r2,r2,r6
2390bc3d5698SJohn Baldwin	eor	r12,r0,r6,ror#19
2391bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
2392bc3d5698SJohn Baldwin	eor	r2,r2,r8
2393bc3d5698SJohn Baldwin	add	r9,r9,r12,ror#6
2394bc3d5698SJohn Baldwin	eor	r12,r10,r11
2395bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20
2396bc3d5698SJohn Baldwin	add	r9,r9,r2
2397bc3d5698SJohn Baldwin	ldr	r2,[sp,#12]
2398bc3d5698SJohn Baldwin	and	r3,r3,r12
2399bc3d5698SJohn Baldwin	add	r5,r5,r9
2400bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2
2401bc3d5698SJohn Baldwin	eor	r3,r3,r11
2402bc3d5698SJohn Baldwin	add	r8,r8,r2
2403bc3d5698SJohn Baldwin	eor	r2,r6,r7
2404bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
2405bc3d5698SJohn Baldwin	add	r9,r9,r3
2406bc3d5698SJohn Baldwin	and	r2,r2,r5
2407bc3d5698SJohn Baldwin	eor	r3,r0,r5,ror#19
2408bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
2409bc3d5698SJohn Baldwin	eor	r2,r2,r7
2410bc3d5698SJohn Baldwin	add	r8,r8,r3,ror#6
2411bc3d5698SJohn Baldwin	eor	r3,r9,r10
2412bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20
2413bc3d5698SJohn Baldwin	add	r8,r8,r2
2414bc3d5698SJohn Baldwin	ldr	r2,[sp,#16]
2415bc3d5698SJohn Baldwin	and	r12,r12,r3
2416bc3d5698SJohn Baldwin	add	r4,r4,r8
2417bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2
2418bc3d5698SJohn Baldwin	eor	r12,r12,r10
2419bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2420bc3d5698SJohn Baldwin	add	r7,r7,r2
2421bc3d5698SJohn Baldwin	eor	r2,r5,r6
2422bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
2423bc3d5698SJohn Baldwin	add	r8,r8,r12
2424bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2425bc3d5698SJohn Baldwin	and	r2,r2,r4
2426bc3d5698SJohn Baldwin	eor	r12,r0,r4,ror#19
2427bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
2428bc3d5698SJohn Baldwin	eor	r2,r2,r6
2429bc3d5698SJohn Baldwin	vrev32.8	q1,q1
2430bc3d5698SJohn Baldwin	add	r7,r7,r12,ror#6
2431bc3d5698SJohn Baldwin	eor	r12,r8,r9
2432bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20
2433bc3d5698SJohn Baldwin	add	r7,r7,r2
2434bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q1
2435bc3d5698SJohn Baldwin	ldr	r2,[sp,#20]
2436bc3d5698SJohn Baldwin	and	r3,r3,r12
2437bc3d5698SJohn Baldwin	add	r11,r11,r7
2438bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2
2439bc3d5698SJohn Baldwin	eor	r3,r3,r9
2440bc3d5698SJohn Baldwin	add	r6,r6,r2
2441bc3d5698SJohn Baldwin	eor	r2,r4,r5
2442bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
2443bc3d5698SJohn Baldwin	add	r7,r7,r3
2444bc3d5698SJohn Baldwin	and	r2,r2,r11
2445bc3d5698SJohn Baldwin	eor	r3,r0,r11,ror#19
2446bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
2447bc3d5698SJohn Baldwin	eor	r2,r2,r5
2448bc3d5698SJohn Baldwin	add	r6,r6,r3,ror#6
2449bc3d5698SJohn Baldwin	eor	r3,r7,r8
2450bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20
2451bc3d5698SJohn Baldwin	add	r6,r6,r2
2452bc3d5698SJohn Baldwin	ldr	r2,[sp,#24]
2453bc3d5698SJohn Baldwin	and	r12,r12,r3
2454bc3d5698SJohn Baldwin	add	r10,r10,r6
2455bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2
2456bc3d5698SJohn Baldwin	eor	r12,r12,r8
2457bc3d5698SJohn Baldwin	add	r5,r5,r2
2458bc3d5698SJohn Baldwin	eor	r2,r11,r4
2459bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
2460bc3d5698SJohn Baldwin	add	r6,r6,r12
2461bc3d5698SJohn Baldwin	and	r2,r2,r10
2462bc3d5698SJohn Baldwin	eor	r12,r0,r10,ror#19
2463bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
2464bc3d5698SJohn Baldwin	eor	r2,r2,r4
2465bc3d5698SJohn Baldwin	add	r5,r5,r12,ror#6
2466bc3d5698SJohn Baldwin	eor	r12,r6,r7
2467bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20
2468bc3d5698SJohn Baldwin	add	r5,r5,r2
2469bc3d5698SJohn Baldwin	ldr	r2,[sp,#28]
2470bc3d5698SJohn Baldwin	and	r3,r3,r12
2471bc3d5698SJohn Baldwin	add	r9,r9,r5
2472bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2
2473bc3d5698SJohn Baldwin	eor	r3,r3,r7
2474bc3d5698SJohn Baldwin	add	r4,r4,r2
2475bc3d5698SJohn Baldwin	eor	r2,r10,r11
2476bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
2477bc3d5698SJohn Baldwin	add	r5,r5,r3
2478bc3d5698SJohn Baldwin	and	r2,r2,r9
2479bc3d5698SJohn Baldwin	eor	r3,r0,r9,ror#19
2480bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
2481bc3d5698SJohn Baldwin	eor	r2,r2,r11
2482bc3d5698SJohn Baldwin	add	r4,r4,r3,ror#6
2483bc3d5698SJohn Baldwin	eor	r3,r5,r6
2484bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20
2485bc3d5698SJohn Baldwin	add	r4,r4,r2
2486bc3d5698SJohn Baldwin	ldr	r2,[sp,#32]
2487bc3d5698SJohn Baldwin	and	r12,r12,r3
2488bc3d5698SJohn Baldwin	add	r8,r8,r4
2489bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2
2490bc3d5698SJohn Baldwin	eor	r12,r12,r6
2491bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2492bc3d5698SJohn Baldwin	add	r11,r11,r2
2493bc3d5698SJohn Baldwin	eor	r2,r9,r10
2494bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#5
2495bc3d5698SJohn Baldwin	add	r4,r4,r12
2496bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2497bc3d5698SJohn Baldwin	and	r2,r2,r8
2498bc3d5698SJohn Baldwin	eor	r12,r0,r8,ror#19
2499bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#11
2500bc3d5698SJohn Baldwin	eor	r2,r2,r10
2501bc3d5698SJohn Baldwin	vrev32.8	q2,q2
2502bc3d5698SJohn Baldwin	add	r11,r11,r12,ror#6
2503bc3d5698SJohn Baldwin	eor	r12,r4,r5
2504bc3d5698SJohn Baldwin	eor	r0,r0,r4,ror#20
2505bc3d5698SJohn Baldwin	add	r11,r11,r2
2506bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q2
2507bc3d5698SJohn Baldwin	ldr	r2,[sp,#36]
2508bc3d5698SJohn Baldwin	and	r3,r3,r12
2509bc3d5698SJohn Baldwin	add	r7,r7,r11
2510bc3d5698SJohn Baldwin	add	r11,r11,r0,ror#2
2511bc3d5698SJohn Baldwin	eor	r3,r3,r5
2512bc3d5698SJohn Baldwin	add	r10,r10,r2
2513bc3d5698SJohn Baldwin	eor	r2,r8,r9
2514bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#5
2515bc3d5698SJohn Baldwin	add	r11,r11,r3
2516bc3d5698SJohn Baldwin	and	r2,r2,r7
2517bc3d5698SJohn Baldwin	eor	r3,r0,r7,ror#19
2518bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#11
2519bc3d5698SJohn Baldwin	eor	r2,r2,r9
2520bc3d5698SJohn Baldwin	add	r10,r10,r3,ror#6
2521bc3d5698SJohn Baldwin	eor	r3,r11,r4
2522bc3d5698SJohn Baldwin	eor	r0,r0,r11,ror#20
2523bc3d5698SJohn Baldwin	add	r10,r10,r2
2524bc3d5698SJohn Baldwin	ldr	r2,[sp,#40]
2525bc3d5698SJohn Baldwin	and	r12,r12,r3
2526bc3d5698SJohn Baldwin	add	r6,r6,r10
2527bc3d5698SJohn Baldwin	add	r10,r10,r0,ror#2
2528bc3d5698SJohn Baldwin	eor	r12,r12,r4
2529bc3d5698SJohn Baldwin	add	r9,r9,r2
2530bc3d5698SJohn Baldwin	eor	r2,r7,r8
2531bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#5
2532bc3d5698SJohn Baldwin	add	r10,r10,r12
2533bc3d5698SJohn Baldwin	and	r2,r2,r6
2534bc3d5698SJohn Baldwin	eor	r12,r0,r6,ror#19
2535bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#11
2536bc3d5698SJohn Baldwin	eor	r2,r2,r8
2537bc3d5698SJohn Baldwin	add	r9,r9,r12,ror#6
2538bc3d5698SJohn Baldwin	eor	r12,r10,r11
2539bc3d5698SJohn Baldwin	eor	r0,r0,r10,ror#20
2540bc3d5698SJohn Baldwin	add	r9,r9,r2
2541bc3d5698SJohn Baldwin	ldr	r2,[sp,#44]
2542bc3d5698SJohn Baldwin	and	r3,r3,r12
2543bc3d5698SJohn Baldwin	add	r5,r5,r9
2544bc3d5698SJohn Baldwin	add	r9,r9,r0,ror#2
2545bc3d5698SJohn Baldwin	eor	r3,r3,r11
2546bc3d5698SJohn Baldwin	add	r8,r8,r2
2547bc3d5698SJohn Baldwin	eor	r2,r6,r7
2548bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#5
2549bc3d5698SJohn Baldwin	add	r9,r9,r3
2550bc3d5698SJohn Baldwin	and	r2,r2,r5
2551bc3d5698SJohn Baldwin	eor	r3,r0,r5,ror#19
2552bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#11
2553bc3d5698SJohn Baldwin	eor	r2,r2,r7
2554bc3d5698SJohn Baldwin	add	r8,r8,r3,ror#6
2555bc3d5698SJohn Baldwin	eor	r3,r9,r10
2556bc3d5698SJohn Baldwin	eor	r0,r0,r9,ror#20
2557bc3d5698SJohn Baldwin	add	r8,r8,r2
2558bc3d5698SJohn Baldwin	ldr	r2,[sp,#48]
2559bc3d5698SJohn Baldwin	and	r12,r12,r3
2560bc3d5698SJohn Baldwin	add	r4,r4,r8
2561bc3d5698SJohn Baldwin	add	r8,r8,r0,ror#2
2562bc3d5698SJohn Baldwin	eor	r12,r12,r10
2563bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2564bc3d5698SJohn Baldwin	add	r7,r7,r2
2565bc3d5698SJohn Baldwin	eor	r2,r5,r6
2566bc3d5698SJohn Baldwin	eor	r0,r4,r4,ror#5
2567bc3d5698SJohn Baldwin	add	r8,r8,r12
2568bc3d5698SJohn Baldwin	vld1.32	{q8},[r14,:128]!
2569bc3d5698SJohn Baldwin	and	r2,r2,r4
2570bc3d5698SJohn Baldwin	eor	r12,r0,r4,ror#19
2571bc3d5698SJohn Baldwin	eor	r0,r8,r8,ror#11
2572bc3d5698SJohn Baldwin	eor	r2,r2,r6
2573bc3d5698SJohn Baldwin	vrev32.8	q3,q3
2574bc3d5698SJohn Baldwin	add	r7,r7,r12,ror#6
2575bc3d5698SJohn Baldwin	eor	r12,r8,r9
2576bc3d5698SJohn Baldwin	eor	r0,r0,r8,ror#20
2577bc3d5698SJohn Baldwin	add	r7,r7,r2
2578bc3d5698SJohn Baldwin	vadd.i32	q8,q8,q3
2579bc3d5698SJohn Baldwin	ldr	r2,[sp,#52]
2580bc3d5698SJohn Baldwin	and	r3,r3,r12
2581bc3d5698SJohn Baldwin	add	r11,r11,r7
2582bc3d5698SJohn Baldwin	add	r7,r7,r0,ror#2
2583bc3d5698SJohn Baldwin	eor	r3,r3,r9
2584bc3d5698SJohn Baldwin	add	r6,r6,r2
2585bc3d5698SJohn Baldwin	eor	r2,r4,r5
2586bc3d5698SJohn Baldwin	eor	r0,r11,r11,ror#5
2587bc3d5698SJohn Baldwin	add	r7,r7,r3
2588bc3d5698SJohn Baldwin	and	r2,r2,r11
2589bc3d5698SJohn Baldwin	eor	r3,r0,r11,ror#19
2590bc3d5698SJohn Baldwin	eor	r0,r7,r7,ror#11
2591bc3d5698SJohn Baldwin	eor	r2,r2,r5
2592bc3d5698SJohn Baldwin	add	r6,r6,r3,ror#6
2593bc3d5698SJohn Baldwin	eor	r3,r7,r8
2594bc3d5698SJohn Baldwin	eor	r0,r0,r7,ror#20
2595bc3d5698SJohn Baldwin	add	r6,r6,r2
2596bc3d5698SJohn Baldwin	ldr	r2,[sp,#56]
2597bc3d5698SJohn Baldwin	and	r12,r12,r3
2598bc3d5698SJohn Baldwin	add	r10,r10,r6
2599bc3d5698SJohn Baldwin	add	r6,r6,r0,ror#2
2600bc3d5698SJohn Baldwin	eor	r12,r12,r8
2601bc3d5698SJohn Baldwin	add	r5,r5,r2
2602bc3d5698SJohn Baldwin	eor	r2,r11,r4
2603bc3d5698SJohn Baldwin	eor	r0,r10,r10,ror#5
2604bc3d5698SJohn Baldwin	add	r6,r6,r12
2605bc3d5698SJohn Baldwin	and	r2,r2,r10
2606bc3d5698SJohn Baldwin	eor	r12,r0,r10,ror#19
2607bc3d5698SJohn Baldwin	eor	r0,r6,r6,ror#11
2608bc3d5698SJohn Baldwin	eor	r2,r2,r4
2609bc3d5698SJohn Baldwin	add	r5,r5,r12,ror#6
2610bc3d5698SJohn Baldwin	eor	r12,r6,r7
2611bc3d5698SJohn Baldwin	eor	r0,r0,r6,ror#20
2612bc3d5698SJohn Baldwin	add	r5,r5,r2
2613bc3d5698SJohn Baldwin	ldr	r2,[sp,#60]
2614bc3d5698SJohn Baldwin	and	r3,r3,r12
2615bc3d5698SJohn Baldwin	add	r9,r9,r5
2616bc3d5698SJohn Baldwin	add	r5,r5,r0,ror#2
2617bc3d5698SJohn Baldwin	eor	r3,r3,r7
2618bc3d5698SJohn Baldwin	add	r4,r4,r2
2619bc3d5698SJohn Baldwin	eor	r2,r10,r11
2620bc3d5698SJohn Baldwin	eor	r0,r9,r9,ror#5
2621bc3d5698SJohn Baldwin	add	r5,r5,r3
2622bc3d5698SJohn Baldwin	and	r2,r2,r9
2623bc3d5698SJohn Baldwin	eor	r3,r0,r9,ror#19
2624bc3d5698SJohn Baldwin	eor	r0,r5,r5,ror#11
2625bc3d5698SJohn Baldwin	eor	r2,r2,r11
2626bc3d5698SJohn Baldwin	add	r4,r4,r3,ror#6
2627bc3d5698SJohn Baldwin	eor	r3,r5,r6
2628bc3d5698SJohn Baldwin	eor	r0,r0,r5,ror#20
2629bc3d5698SJohn Baldwin	add	r4,r4,r2
2630bc3d5698SJohn Baldwin	ldr	r2,[sp,#64]
2631bc3d5698SJohn Baldwin	and	r12,r12,r3
2632bc3d5698SJohn Baldwin	add	r8,r8,r4
2633bc3d5698SJohn Baldwin	add	r4,r4,r0,ror#2
2634bc3d5698SJohn Baldwin	eor	r12,r12,r6
2635bc3d5698SJohn Baldwin	vst1.32	{q8},[r1,:128]!
2636bc3d5698SJohn Baldwin	ldr	r0,[r2,#0]
2637bc3d5698SJohn Baldwin	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2638bc3d5698SJohn Baldwin	ldr	r12,[r2,#4]
2639bc3d5698SJohn Baldwin	ldr	r3,[r2,#8]
2640bc3d5698SJohn Baldwin	ldr	r1,[r2,#12]
2641bc3d5698SJohn Baldwin	add	r4,r4,r0			@ accumulate
2642bc3d5698SJohn Baldwin	ldr	r0,[r2,#16]
2643bc3d5698SJohn Baldwin	add	r5,r5,r12
2644bc3d5698SJohn Baldwin	ldr	r12,[r2,#20]
2645bc3d5698SJohn Baldwin	add	r6,r6,r3
2646bc3d5698SJohn Baldwin	ldr	r3,[r2,#24]
2647bc3d5698SJohn Baldwin	add	r7,r7,r1
2648bc3d5698SJohn Baldwin	ldr	r1,[r2,#28]
2649bc3d5698SJohn Baldwin	add	r8,r8,r0
2650bc3d5698SJohn Baldwin	str	r4,[r2],#4
2651bc3d5698SJohn Baldwin	add	r9,r9,r12
2652bc3d5698SJohn Baldwin	str	r5,[r2],#4
2653bc3d5698SJohn Baldwin	add	r10,r10,r3
2654bc3d5698SJohn Baldwin	str	r6,[r2],#4
2655bc3d5698SJohn Baldwin	add	r11,r11,r1
2656bc3d5698SJohn Baldwin	str	r7,[r2],#4
2657bc3d5698SJohn Baldwin	stmia	r2,{r8,r9,r10,r11}
2658bc3d5698SJohn Baldwin
2659bc3d5698SJohn Baldwin	ittte	ne
2660bc3d5698SJohn Baldwin	movne	r1,sp
2661bc3d5698SJohn Baldwin	ldrne	r2,[sp,#0]
2662bc3d5698SJohn Baldwin	eorne	r12,r12,r12
2663bc3d5698SJohn Baldwin	ldreq	sp,[sp,#76]			@ restore original sp
2664bc3d5698SJohn Baldwin	itt	ne
2665bc3d5698SJohn Baldwin	eorne	r3,r5,r6
2666bc3d5698SJohn Baldwin	bne	.L_00_48
2667bc3d5698SJohn Baldwin
2668bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2669bc3d5698SJohn Baldwin.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2670bc3d5698SJohn Baldwin#endif
2671bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2672bc3d5698SJohn Baldwin
2673bc3d5698SJohn Baldwin# if defined(__thumb2__)
2674bc3d5698SJohn Baldwin#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2675bc3d5698SJohn Baldwin# else
2676bc3d5698SJohn Baldwin#  define INST(a,b,c,d)	.byte	a,b,c,d
2677bc3d5698SJohn Baldwin# endif
2678bc3d5698SJohn Baldwin
2679bc3d5698SJohn Baldwin.type	sha256_block_data_order_armv8,%function
2680bc3d5698SJohn Baldwin.align	5
2681bc3d5698SJohn Baldwinsha256_block_data_order_armv8:
2682bc3d5698SJohn Baldwin.LARMv8:
2683bc3d5698SJohn Baldwin	vld1.32	{q0,q1},[r0]
2684bc3d5698SJohn Baldwin	sub	r3,r3,#256+32
2685bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2686bc3d5698SJohn Baldwin	b	.Loop_v8
2687bc3d5698SJohn Baldwin
2688bc3d5698SJohn Baldwin.align	4
2689bc3d5698SJohn Baldwin.Loop_v8:
2690bc3d5698SJohn Baldwin	vld1.8	{q8,q9},[r1]!
2691bc3d5698SJohn Baldwin	vld1.8	{q10,q11},[r1]!
2692bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2693bc3d5698SJohn Baldwin	vrev32.8	q8,q8
2694bc3d5698SJohn Baldwin	vrev32.8	q9,q9
2695bc3d5698SJohn Baldwin	vrev32.8	q10,q10
2696bc3d5698SJohn Baldwin	vrev32.8	q11,q11
2697bc3d5698SJohn Baldwin	vmov	q14,q0	@ offload
2698bc3d5698SJohn Baldwin	vmov	q15,q1
2699bc3d5698SJohn Baldwin	teq	r1,r2
2700bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2701bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q8
2702bc3d5698SJohn Baldwin	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2703bc3d5698SJohn Baldwin	vmov	q2,q0
2704bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2705bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2706bc3d5698SJohn Baldwin	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2707bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2708bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q9
2709bc3d5698SJohn Baldwin	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2710bc3d5698SJohn Baldwin	vmov	q2,q0
2711bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2712bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2713bc3d5698SJohn Baldwin	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2714bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2715bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q10
2716bc3d5698SJohn Baldwin	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2717bc3d5698SJohn Baldwin	vmov	q2,q0
2718bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2719bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2720bc3d5698SJohn Baldwin	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2721bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2722bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q11
2723bc3d5698SJohn Baldwin	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2724bc3d5698SJohn Baldwin	vmov	q2,q0
2725bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2726bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2727bc3d5698SJohn Baldwin	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2728bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2729bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q8
2730bc3d5698SJohn Baldwin	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2731bc3d5698SJohn Baldwin	vmov	q2,q0
2732bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2733bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2734bc3d5698SJohn Baldwin	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2735bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2736bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q9
2737bc3d5698SJohn Baldwin	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2738bc3d5698SJohn Baldwin	vmov	q2,q0
2739bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2740bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2741bc3d5698SJohn Baldwin	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2742bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2743bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q10
2744bc3d5698SJohn Baldwin	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2745bc3d5698SJohn Baldwin	vmov	q2,q0
2746bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2747bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2748bc3d5698SJohn Baldwin	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2749bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2750bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q11
2751bc3d5698SJohn Baldwin	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2752bc3d5698SJohn Baldwin	vmov	q2,q0
2753bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2754bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2755bc3d5698SJohn Baldwin	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2756bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2757bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q8
2758bc3d5698SJohn Baldwin	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2759bc3d5698SJohn Baldwin	vmov	q2,q0
2760bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2761bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2762bc3d5698SJohn Baldwin	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2763bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2764bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q9
2765bc3d5698SJohn Baldwin	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2766bc3d5698SJohn Baldwin	vmov	q2,q0
2767bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2768bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2769bc3d5698SJohn Baldwin	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2770bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2771bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q10
2772bc3d5698SJohn Baldwin	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2773bc3d5698SJohn Baldwin	vmov	q2,q0
2774bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2775bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2776bc3d5698SJohn Baldwin	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2777bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2778bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q11
2779bc3d5698SJohn Baldwin	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2780bc3d5698SJohn Baldwin	vmov	q2,q0
2781bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2782bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2783bc3d5698SJohn Baldwin	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2784bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]!
2785bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q8
2786bc3d5698SJohn Baldwin	vmov	q2,q0
2787bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2788bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2789bc3d5698SJohn Baldwin
2790bc3d5698SJohn Baldwin	vld1.32	{q12},[r3]!
2791bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q9
2792bc3d5698SJohn Baldwin	vmov	q2,q0
2793bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2794bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2795bc3d5698SJohn Baldwin
2796bc3d5698SJohn Baldwin	vld1.32	{q13},[r3]
2797bc3d5698SJohn Baldwin	vadd.i32	q12,q12,q10
2798bc3d5698SJohn Baldwin	sub	r3,r3,#256-16	@ rewind
2799bc3d5698SJohn Baldwin	vmov	q2,q0
2800bc3d5698SJohn Baldwin	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2801bc3d5698SJohn Baldwin	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2802bc3d5698SJohn Baldwin
2803bc3d5698SJohn Baldwin	vadd.i32	q13,q13,q11
2804bc3d5698SJohn Baldwin	vmov	q2,q0
2805bc3d5698SJohn Baldwin	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2806bc3d5698SJohn Baldwin	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2807bc3d5698SJohn Baldwin
2808bc3d5698SJohn Baldwin	vadd.i32	q0,q0,q14
2809bc3d5698SJohn Baldwin	vadd.i32	q1,q1,q15
2810bc3d5698SJohn Baldwin	it	ne
2811bc3d5698SJohn Baldwin	bne	.Loop_v8
2812bc3d5698SJohn Baldwin
2813bc3d5698SJohn Baldwin	vst1.32	{q0,q1},[r0]
2814bc3d5698SJohn Baldwin
2815bc3d5698SJohn Baldwin	bx	lr		@ bx lr
2816bc3d5698SJohn Baldwin.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2817bc3d5698SJohn Baldwin#endif
2818bc3d5698SJohn Baldwin.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2819bc3d5698SJohn Baldwin.align	2
2820bc3d5698SJohn Baldwin.align	2
2821bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2822bc3d5698SJohn Baldwin.comm	OPENSSL_armcap_P,4,4
2823bc3d5698SJohn Baldwin#endif
2824