xref: /freebsd/sys/crypto/openssl/arm/sha256-armv4.S (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1/* Do not modify. This file is auto-generated from sha256-armv4.pl. */
2@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3@
4@ Licensed under the Apache License 2.0 (the "License").  You may not use
5@ this file except in compliance with the License.  You can obtain a copy
6@ in the file LICENSE in the source distribution or at
7@ https://www.openssl.org/source/license.html
8
9
10@ ====================================================================
11@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12@ project. The module is, however, dual licensed under OpenSSL and
13@ CRYPTOGAMS licenses depending on where you obtain it. For further
14@ details see http://www.openssl.org/~appro/cryptogams/.
15@
16@ Permission to use under GPL terms is granted.
17@ ====================================================================
18
19@ SHA256 block procedure for ARMv4. May 2007.
20
21@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
22@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23@ byte [on single-issue Xscale PXA250 core].
24
25@ July 2010.
26@
27@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
28@ Cortex A8 core and ~20 cycles per processed byte.
29
30@ February 2011.
31@
32@ Profiler-assisted and platform-specific optimization resulted in 16%
33@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
34
35@ September 2013.
36@
37@ Add NEON implementation. On Cortex A8 it was measured to process one
38@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40@ code (meaning that latter performs sub-optimally, nothing was done
41@ about it).
42
43@ May 2014.
44@
45@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
46
47@ $output is the last argument if it looks like a file (it has an extension)
48@ $flavour is the first argument if it doesn't look like a file
49#ifndef __KERNEL__
50# include "arm_arch.h"
51#else
52# define __ARM_ARCH__ __LINUX_ARM_ARCH__
53# define __ARM_MAX_ARCH__ 7
54#endif
55
56#if defined(__thumb2__)
57.syntax	unified
58.thumb
59#else
60.code	32
61#endif
62
63.text
64
65.type	K256,%object
66.align	5
67K256:
68.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
69.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
70.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
71.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
72.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
73.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
74.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
75.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
76.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
77.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
78.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
79.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
80.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
81.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
82.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
83.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
84.size	K256,.-K256
85.word	0				@ terminator
86#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
87.LOPENSSL_armcap:
88# ifdef	_WIN32
89.word	OPENSSL_armcap_P
90# else
91.word	OPENSSL_armcap_P-.Lsha256_block_data_order
92# endif
93#endif
94.align	5
95
96.globl	sha256_block_data_order
97.type	sha256_block_data_order,%function
98sha256_block_data_order:
99.Lsha256_block_data_order:
100#if __ARM_ARCH__<7 && !defined(__thumb2__)
101	sub	r3,pc,#8		@ sha256_block_data_order
102#else
103	adr	r3,.Lsha256_block_data_order
104#endif
105#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
106	ldr	r12,.LOPENSSL_armcap
107# if !defined(_WIN32)
108	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
109# endif
110# if defined(__APPLE__) || defined(_WIN32)
111	ldr	r12,[r12]
112# endif
113	tst	r12,#ARMV8_SHA256
114	bne	.LARMv8
115	tst	r12,#ARMV7_NEON
116	bne	.LNEON
117#endif
118	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
119	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
120	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
121	sub	r14,r3,#256+32	@ K256
122	sub	sp,sp,#16*4		@ alloca(X[16])
123.Loop:
124# if __ARM_ARCH__>=7
125	ldr	r2,[r1],#4
126# else
127	ldrb	r2,[r1,#3]
128# endif
129	eor	r3,r5,r6		@ magic
130	eor	r12,r12,r12
131#if __ARM_ARCH__>=7
132	@ ldr	r2,[r1],#4			@ 0
133# if 0==15
134	str	r1,[sp,#17*4]			@ make room for r1
135# endif
136	eor	r0,r8,r8,ror#5
137	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
138	eor	r0,r0,r8,ror#19	@ Sigma1(e)
139# ifndef __ARMEB__
140	rev	r2,r2
141# endif
142#else
143	@ ldrb	r2,[r1,#3]			@ 0
144	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
145	ldrb	r12,[r1,#2]
146	ldrb	r0,[r1,#1]
147	orr	r2,r2,r12,lsl#8
148	ldrb	r12,[r1],#4
149	orr	r2,r2,r0,lsl#16
150# if 0==15
151	str	r1,[sp,#17*4]			@ make room for r1
152# endif
153	eor	r0,r8,r8,ror#5
154	orr	r2,r2,r12,lsl#24
155	eor	r0,r0,r8,ror#19	@ Sigma1(e)
156#endif
157	ldr	r12,[r14],#4			@ *K256++
158	add	r11,r11,r2			@ h+=X[i]
159	str	r2,[sp,#0*4]
160	eor	r2,r9,r10
161	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
162	and	r2,r2,r8
163	add	r11,r11,r12			@ h+=K256[i]
164	eor	r2,r2,r10			@ Ch(e,f,g)
165	eor	r0,r4,r4,ror#11
166	add	r11,r11,r2			@ h+=Ch(e,f,g)
167#if 0==31
168	and	r12,r12,#0xff
169	cmp	r12,#0xf2			@ done?
170#endif
171#if 0<15
172# if __ARM_ARCH__>=7
173	ldr	r2,[r1],#4			@ prefetch
174# else
175	ldrb	r2,[r1,#3]
176# endif
177	eor	r12,r4,r5			@ a^b, b^c in next round
178#else
179	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
180	eor	r12,r4,r5			@ a^b, b^c in next round
181	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
182#endif
183	eor	r0,r0,r4,ror#20	@ Sigma0(a)
184	and	r3,r3,r12			@ (b^c)&=(a^b)
185	add	r7,r7,r11			@ d+=h
186	eor	r3,r3,r5			@ Maj(a,b,c)
187	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
188	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
189#if __ARM_ARCH__>=7
190	@ ldr	r2,[r1],#4			@ 1
191# if 1==15
192	str	r1,[sp,#17*4]			@ make room for r1
193# endif
194	eor	r0,r7,r7,ror#5
195	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
196	eor	r0,r0,r7,ror#19	@ Sigma1(e)
197# ifndef __ARMEB__
198	rev	r2,r2
199# endif
200#else
201	@ ldrb	r2,[r1,#3]			@ 1
202	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
203	ldrb	r3,[r1,#2]
204	ldrb	r0,[r1,#1]
205	orr	r2,r2,r3,lsl#8
206	ldrb	r3,[r1],#4
207	orr	r2,r2,r0,lsl#16
208# if 1==15
209	str	r1,[sp,#17*4]			@ make room for r1
210# endif
211	eor	r0,r7,r7,ror#5
212	orr	r2,r2,r3,lsl#24
213	eor	r0,r0,r7,ror#19	@ Sigma1(e)
214#endif
215	ldr	r3,[r14],#4			@ *K256++
216	add	r10,r10,r2			@ h+=X[i]
217	str	r2,[sp,#1*4]
218	eor	r2,r8,r9
219	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
220	and	r2,r2,r7
221	add	r10,r10,r3			@ h+=K256[i]
222	eor	r2,r2,r9			@ Ch(e,f,g)
223	eor	r0,r11,r11,ror#11
224	add	r10,r10,r2			@ h+=Ch(e,f,g)
225#if 1==31
226	and	r3,r3,#0xff
227	cmp	r3,#0xf2			@ done?
228#endif
229#if 1<15
230# if __ARM_ARCH__>=7
231	ldr	r2,[r1],#4			@ prefetch
232# else
233	ldrb	r2,[r1,#3]
234# endif
235	eor	r3,r11,r4			@ a^b, b^c in next round
236#else
237	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
238	eor	r3,r11,r4			@ a^b, b^c in next round
239	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
240#endif
241	eor	r0,r0,r11,ror#20	@ Sigma0(a)
242	and	r12,r12,r3			@ (b^c)&=(a^b)
243	add	r6,r6,r10			@ d+=h
244	eor	r12,r12,r4			@ Maj(a,b,c)
245	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
246	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
247#if __ARM_ARCH__>=7
248	@ ldr	r2,[r1],#4			@ 2
249# if 2==15
250	str	r1,[sp,#17*4]			@ make room for r1
251# endif
252	eor	r0,r6,r6,ror#5
253	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
254	eor	r0,r0,r6,ror#19	@ Sigma1(e)
255# ifndef __ARMEB__
256	rev	r2,r2
257# endif
258#else
259	@ ldrb	r2,[r1,#3]			@ 2
260	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
261	ldrb	r12,[r1,#2]
262	ldrb	r0,[r1,#1]
263	orr	r2,r2,r12,lsl#8
264	ldrb	r12,[r1],#4
265	orr	r2,r2,r0,lsl#16
266# if 2==15
267	str	r1,[sp,#17*4]			@ make room for r1
268# endif
269	eor	r0,r6,r6,ror#5
270	orr	r2,r2,r12,lsl#24
271	eor	r0,r0,r6,ror#19	@ Sigma1(e)
272#endif
273	ldr	r12,[r14],#4			@ *K256++
274	add	r9,r9,r2			@ h+=X[i]
275	str	r2,[sp,#2*4]
276	eor	r2,r7,r8
277	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
278	and	r2,r2,r6
279	add	r9,r9,r12			@ h+=K256[i]
280	eor	r2,r2,r8			@ Ch(e,f,g)
281	eor	r0,r10,r10,ror#11
282	add	r9,r9,r2			@ h+=Ch(e,f,g)
283#if 2==31
284	and	r12,r12,#0xff
285	cmp	r12,#0xf2			@ done?
286#endif
287#if 2<15
288# if __ARM_ARCH__>=7
289	ldr	r2,[r1],#4			@ prefetch
290# else
291	ldrb	r2,[r1,#3]
292# endif
293	eor	r12,r10,r11			@ a^b, b^c in next round
294#else
295	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
296	eor	r12,r10,r11			@ a^b, b^c in next round
297	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
298#endif
299	eor	r0,r0,r10,ror#20	@ Sigma0(a)
300	and	r3,r3,r12			@ (b^c)&=(a^b)
301	add	r5,r5,r9			@ d+=h
302	eor	r3,r3,r11			@ Maj(a,b,c)
303	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
304	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
305#if __ARM_ARCH__>=7
306	@ ldr	r2,[r1],#4			@ 3
307# if 3==15
308	str	r1,[sp,#17*4]			@ make room for r1
309# endif
310	eor	r0,r5,r5,ror#5
311	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
312	eor	r0,r0,r5,ror#19	@ Sigma1(e)
313# ifndef __ARMEB__
314	rev	r2,r2
315# endif
316#else
317	@ ldrb	r2,[r1,#3]			@ 3
318	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
319	ldrb	r3,[r1,#2]
320	ldrb	r0,[r1,#1]
321	orr	r2,r2,r3,lsl#8
322	ldrb	r3,[r1],#4
323	orr	r2,r2,r0,lsl#16
324# if 3==15
325	str	r1,[sp,#17*4]			@ make room for r1
326# endif
327	eor	r0,r5,r5,ror#5
328	orr	r2,r2,r3,lsl#24
329	eor	r0,r0,r5,ror#19	@ Sigma1(e)
330#endif
331	ldr	r3,[r14],#4			@ *K256++
332	add	r8,r8,r2			@ h+=X[i]
333	str	r2,[sp,#3*4]
334	eor	r2,r6,r7
335	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
336	and	r2,r2,r5
337	add	r8,r8,r3			@ h+=K256[i]
338	eor	r2,r2,r7			@ Ch(e,f,g)
339	eor	r0,r9,r9,ror#11
340	add	r8,r8,r2			@ h+=Ch(e,f,g)
341#if 3==31
342	and	r3,r3,#0xff
343	cmp	r3,#0xf2			@ done?
344#endif
345#if 3<15
346# if __ARM_ARCH__>=7
347	ldr	r2,[r1],#4			@ prefetch
348# else
349	ldrb	r2,[r1,#3]
350# endif
351	eor	r3,r9,r10			@ a^b, b^c in next round
352#else
353	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
354	eor	r3,r9,r10			@ a^b, b^c in next round
355	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
356#endif
357	eor	r0,r0,r9,ror#20	@ Sigma0(a)
358	and	r12,r12,r3			@ (b^c)&=(a^b)
359	add	r4,r4,r8			@ d+=h
360	eor	r12,r12,r10			@ Maj(a,b,c)
361	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
362	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
363#if __ARM_ARCH__>=7
364	@ ldr	r2,[r1],#4			@ 4
365# if 4==15
366	str	r1,[sp,#17*4]			@ make room for r1
367# endif
368	eor	r0,r4,r4,ror#5
369	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
370	eor	r0,r0,r4,ror#19	@ Sigma1(e)
371# ifndef __ARMEB__
372	rev	r2,r2
373# endif
374#else
375	@ ldrb	r2,[r1,#3]			@ 4
376	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
377	ldrb	r12,[r1,#2]
378	ldrb	r0,[r1,#1]
379	orr	r2,r2,r12,lsl#8
380	ldrb	r12,[r1],#4
381	orr	r2,r2,r0,lsl#16
382# if 4==15
383	str	r1,[sp,#17*4]			@ make room for r1
384# endif
385	eor	r0,r4,r4,ror#5
386	orr	r2,r2,r12,lsl#24
387	eor	r0,r0,r4,ror#19	@ Sigma1(e)
388#endif
389	ldr	r12,[r14],#4			@ *K256++
390	add	r7,r7,r2			@ h+=X[i]
391	str	r2,[sp,#4*4]
392	eor	r2,r5,r6
393	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
394	and	r2,r2,r4
395	add	r7,r7,r12			@ h+=K256[i]
396	eor	r2,r2,r6			@ Ch(e,f,g)
397	eor	r0,r8,r8,ror#11
398	add	r7,r7,r2			@ h+=Ch(e,f,g)
399#if 4==31
400	and	r12,r12,#0xff
401	cmp	r12,#0xf2			@ done?
402#endif
403#if 4<15
404# if __ARM_ARCH__>=7
405	ldr	r2,[r1],#4			@ prefetch
406# else
407	ldrb	r2,[r1,#3]
408# endif
409	eor	r12,r8,r9			@ a^b, b^c in next round
410#else
411	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
412	eor	r12,r8,r9			@ a^b, b^c in next round
413	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
414#endif
415	eor	r0,r0,r8,ror#20	@ Sigma0(a)
416	and	r3,r3,r12			@ (b^c)&=(a^b)
417	add	r11,r11,r7			@ d+=h
418	eor	r3,r3,r9			@ Maj(a,b,c)
419	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
420	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
421#if __ARM_ARCH__>=7
422	@ ldr	r2,[r1],#4			@ 5
423# if 5==15
424	str	r1,[sp,#17*4]			@ make room for r1
425# endif
426	eor	r0,r11,r11,ror#5
427	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
428	eor	r0,r0,r11,ror#19	@ Sigma1(e)
429# ifndef __ARMEB__
430	rev	r2,r2
431# endif
432#else
433	@ ldrb	r2,[r1,#3]			@ 5
434	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
435	ldrb	r3,[r1,#2]
436	ldrb	r0,[r1,#1]
437	orr	r2,r2,r3,lsl#8
438	ldrb	r3,[r1],#4
439	orr	r2,r2,r0,lsl#16
440# if 5==15
441	str	r1,[sp,#17*4]			@ make room for r1
442# endif
443	eor	r0,r11,r11,ror#5
444	orr	r2,r2,r3,lsl#24
445	eor	r0,r0,r11,ror#19	@ Sigma1(e)
446#endif
447	ldr	r3,[r14],#4			@ *K256++
448	add	r6,r6,r2			@ h+=X[i]
449	str	r2,[sp,#5*4]
450	eor	r2,r4,r5
451	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
452	and	r2,r2,r11
453	add	r6,r6,r3			@ h+=K256[i]
454	eor	r2,r2,r5			@ Ch(e,f,g)
455	eor	r0,r7,r7,ror#11
456	add	r6,r6,r2			@ h+=Ch(e,f,g)
457#if 5==31
458	and	r3,r3,#0xff
459	cmp	r3,#0xf2			@ done?
460#endif
461#if 5<15
462# if __ARM_ARCH__>=7
463	ldr	r2,[r1],#4			@ prefetch
464# else
465	ldrb	r2,[r1,#3]
466# endif
467	eor	r3,r7,r8			@ a^b, b^c in next round
468#else
469	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
470	eor	r3,r7,r8			@ a^b, b^c in next round
471	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
472#endif
473	eor	r0,r0,r7,ror#20	@ Sigma0(a)
474	and	r12,r12,r3			@ (b^c)&=(a^b)
475	add	r10,r10,r6			@ d+=h
476	eor	r12,r12,r8			@ Maj(a,b,c)
477	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
478	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
479#if __ARM_ARCH__>=7
480	@ ldr	r2,[r1],#4			@ 6
481# if 6==15
482	str	r1,[sp,#17*4]			@ make room for r1
483# endif
484	eor	r0,r10,r10,ror#5
485	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
486	eor	r0,r0,r10,ror#19	@ Sigma1(e)
487# ifndef __ARMEB__
488	rev	r2,r2
489# endif
490#else
491	@ ldrb	r2,[r1,#3]			@ 6
492	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
493	ldrb	r12,[r1,#2]
494	ldrb	r0,[r1,#1]
495	orr	r2,r2,r12,lsl#8
496	ldrb	r12,[r1],#4
497	orr	r2,r2,r0,lsl#16
498# if 6==15
499	str	r1,[sp,#17*4]			@ make room for r1
500# endif
501	eor	r0,r10,r10,ror#5
502	orr	r2,r2,r12,lsl#24
503	eor	r0,r0,r10,ror#19	@ Sigma1(e)
504#endif
505	ldr	r12,[r14],#4			@ *K256++
506	add	r5,r5,r2			@ h+=X[i]
507	str	r2,[sp,#6*4]
508	eor	r2,r11,r4
509	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
510	and	r2,r2,r10
511	add	r5,r5,r12			@ h+=K256[i]
512	eor	r2,r2,r4			@ Ch(e,f,g)
513	eor	r0,r6,r6,ror#11
514	add	r5,r5,r2			@ h+=Ch(e,f,g)
515#if 6==31
516	and	r12,r12,#0xff
517	cmp	r12,#0xf2			@ done?
518#endif
519#if 6<15
520# if __ARM_ARCH__>=7
521	ldr	r2,[r1],#4			@ prefetch
522# else
523	ldrb	r2,[r1,#3]
524# endif
525	eor	r12,r6,r7			@ a^b, b^c in next round
526#else
527	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
528	eor	r12,r6,r7			@ a^b, b^c in next round
529	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
530#endif
531	eor	r0,r0,r6,ror#20	@ Sigma0(a)
532	and	r3,r3,r12			@ (b^c)&=(a^b)
533	add	r9,r9,r5			@ d+=h
534	eor	r3,r3,r7			@ Maj(a,b,c)
535	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
536	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
537#if __ARM_ARCH__>=7
538	@ ldr	r2,[r1],#4			@ 7
539# if 7==15
540	str	r1,[sp,#17*4]			@ make room for r1
541# endif
542	eor	r0,r9,r9,ror#5
543	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
544	eor	r0,r0,r9,ror#19	@ Sigma1(e)
545# ifndef __ARMEB__
546	rev	r2,r2
547# endif
548#else
549	@ ldrb	r2,[r1,#3]			@ 7
550	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
551	ldrb	r3,[r1,#2]
552	ldrb	r0,[r1,#1]
553	orr	r2,r2,r3,lsl#8
554	ldrb	r3,[r1],#4
555	orr	r2,r2,r0,lsl#16
556# if 7==15
557	str	r1,[sp,#17*4]			@ make room for r1
558# endif
559	eor	r0,r9,r9,ror#5
560	orr	r2,r2,r3,lsl#24
561	eor	r0,r0,r9,ror#19	@ Sigma1(e)
562#endif
563	ldr	r3,[r14],#4			@ *K256++
564	add	r4,r4,r2			@ h+=X[i]
565	str	r2,[sp,#7*4]
566	eor	r2,r10,r11
567	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
568	and	r2,r2,r9
569	add	r4,r4,r3			@ h+=K256[i]
570	eor	r2,r2,r11			@ Ch(e,f,g)
571	eor	r0,r5,r5,ror#11
572	add	r4,r4,r2			@ h+=Ch(e,f,g)
573#if 7==31
574	and	r3,r3,#0xff
575	cmp	r3,#0xf2			@ done?
576#endif
577#if 7<15
578# if __ARM_ARCH__>=7
579	ldr	r2,[r1],#4			@ prefetch
580# else
581	ldrb	r2,[r1,#3]
582# endif
583	eor	r3,r5,r6			@ a^b, b^c in next round
584#else
585	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
586	eor	r3,r5,r6			@ a^b, b^c in next round
587	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
588#endif
589	eor	r0,r0,r5,ror#20	@ Sigma0(a)
590	and	r12,r12,r3			@ (b^c)&=(a^b)
591	add	r8,r8,r4			@ d+=h
592	eor	r12,r12,r6			@ Maj(a,b,c)
593	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
594	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
595#if __ARM_ARCH__>=7
596	@ ldr	r2,[r1],#4			@ 8
597# if 8==15
598	str	r1,[sp,#17*4]			@ make room for r1
599# endif
600	eor	r0,r8,r8,ror#5
601	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
602	eor	r0,r0,r8,ror#19	@ Sigma1(e)
603# ifndef __ARMEB__
604	rev	r2,r2
605# endif
606#else
607	@ ldrb	r2,[r1,#3]			@ 8
608	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
609	ldrb	r12,[r1,#2]
610	ldrb	r0,[r1,#1]
611	orr	r2,r2,r12,lsl#8
612	ldrb	r12,[r1],#4
613	orr	r2,r2,r0,lsl#16
614# if 8==15
615	str	r1,[sp,#17*4]			@ make room for r1
616# endif
617	eor	r0,r8,r8,ror#5
618	orr	r2,r2,r12,lsl#24
619	eor	r0,r0,r8,ror#19	@ Sigma1(e)
620#endif
621	ldr	r12,[r14],#4			@ *K256++
622	add	r11,r11,r2			@ h+=X[i]
623	str	r2,[sp,#8*4]
624	eor	r2,r9,r10
625	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
626	and	r2,r2,r8
627	add	r11,r11,r12			@ h+=K256[i]
628	eor	r2,r2,r10			@ Ch(e,f,g)
629	eor	r0,r4,r4,ror#11
630	add	r11,r11,r2			@ h+=Ch(e,f,g)
631#if 8==31
632	and	r12,r12,#0xff
633	cmp	r12,#0xf2			@ done?
634#endif
635#if 8<15
636# if __ARM_ARCH__>=7
637	ldr	r2,[r1],#4			@ prefetch
638# else
639	ldrb	r2,[r1,#3]
640# endif
641	eor	r12,r4,r5			@ a^b, b^c in next round
642#else
643	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
644	eor	r12,r4,r5			@ a^b, b^c in next round
645	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
646#endif
647	eor	r0,r0,r4,ror#20	@ Sigma0(a)
648	and	r3,r3,r12			@ (b^c)&=(a^b)
649	add	r7,r7,r11			@ d+=h
650	eor	r3,r3,r5			@ Maj(a,b,c)
651	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
652	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
653#if __ARM_ARCH__>=7
654	@ ldr	r2,[r1],#4			@ 9
655# if 9==15
656	str	r1,[sp,#17*4]			@ make room for r1
657# endif
658	eor	r0,r7,r7,ror#5
659	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
660	eor	r0,r0,r7,ror#19	@ Sigma1(e)
661# ifndef __ARMEB__
662	rev	r2,r2
663# endif
664#else
665	@ ldrb	r2,[r1,#3]			@ 9
666	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
667	ldrb	r3,[r1,#2]
668	ldrb	r0,[r1,#1]
669	orr	r2,r2,r3,lsl#8
670	ldrb	r3,[r1],#4
671	orr	r2,r2,r0,lsl#16
672# if 9==15
673	str	r1,[sp,#17*4]			@ make room for r1
674# endif
675	eor	r0,r7,r7,ror#5
676	orr	r2,r2,r3,lsl#24
677	eor	r0,r0,r7,ror#19	@ Sigma1(e)
678#endif
679	ldr	r3,[r14],#4			@ *K256++
680	add	r10,r10,r2			@ h+=X[i]
681	str	r2,[sp,#9*4]
682	eor	r2,r8,r9
683	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
684	and	r2,r2,r7
685	add	r10,r10,r3			@ h+=K256[i]
686	eor	r2,r2,r9			@ Ch(e,f,g)
687	eor	r0,r11,r11,ror#11
688	add	r10,r10,r2			@ h+=Ch(e,f,g)
689#if 9==31
690	and	r3,r3,#0xff
691	cmp	r3,#0xf2			@ done?
692#endif
693#if 9<15
694# if __ARM_ARCH__>=7
695	ldr	r2,[r1],#4			@ prefetch
696# else
697	ldrb	r2,[r1,#3]
698# endif
699	eor	r3,r11,r4			@ a^b, b^c in next round
700#else
701	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
702	eor	r3,r11,r4			@ a^b, b^c in next round
703	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
704#endif
705	eor	r0,r0,r11,ror#20	@ Sigma0(a)
706	and	r12,r12,r3			@ (b^c)&=(a^b)
707	add	r6,r6,r10			@ d+=h
708	eor	r12,r12,r4			@ Maj(a,b,c)
709	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
710	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
711#if __ARM_ARCH__>=7
712	@ ldr	r2,[r1],#4			@ 10
713# if 10==15
714	str	r1,[sp,#17*4]			@ make room for r1
715# endif
716	eor	r0,r6,r6,ror#5
717	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
718	eor	r0,r0,r6,ror#19	@ Sigma1(e)
719# ifndef __ARMEB__
720	rev	r2,r2
721# endif
722#else
723	@ ldrb	r2,[r1,#3]			@ 10
724	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
725	ldrb	r12,[r1,#2]
726	ldrb	r0,[r1,#1]
727	orr	r2,r2,r12,lsl#8
728	ldrb	r12,[r1],#4
729	orr	r2,r2,r0,lsl#16
730# if 10==15
731	str	r1,[sp,#17*4]			@ make room for r1
732# endif
733	eor	r0,r6,r6,ror#5
734	orr	r2,r2,r12,lsl#24
735	eor	r0,r0,r6,ror#19	@ Sigma1(e)
736#endif
737	ldr	r12,[r14],#4			@ *K256++
738	add	r9,r9,r2			@ h+=X[i]
739	str	r2,[sp,#10*4]
740	eor	r2,r7,r8
741	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
742	and	r2,r2,r6
743	add	r9,r9,r12			@ h+=K256[i]
744	eor	r2,r2,r8			@ Ch(e,f,g)
745	eor	r0,r10,r10,ror#11
746	add	r9,r9,r2			@ h+=Ch(e,f,g)
747#if 10==31
748	and	r12,r12,#0xff
749	cmp	r12,#0xf2			@ done?
750#endif
751#if 10<15
752# if __ARM_ARCH__>=7
753	ldr	r2,[r1],#4			@ prefetch
754# else
755	ldrb	r2,[r1,#3]
756# endif
757	eor	r12,r10,r11			@ a^b, b^c in next round
758#else
759	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
760	eor	r12,r10,r11			@ a^b, b^c in next round
761	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
762#endif
763	eor	r0,r0,r10,ror#20	@ Sigma0(a)
764	and	r3,r3,r12			@ (b^c)&=(a^b)
765	add	r5,r5,r9			@ d+=h
766	eor	r3,r3,r11			@ Maj(a,b,c)
767	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
768	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
769#if __ARM_ARCH__>=7
770	@ ldr	r2,[r1],#4			@ 11
771# if 11==15
772	str	r1,[sp,#17*4]			@ make room for r1
773# endif
774	eor	r0,r5,r5,ror#5
775	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
776	eor	r0,r0,r5,ror#19	@ Sigma1(e)
777# ifndef __ARMEB__
778	rev	r2,r2
779# endif
780#else
781	@ ldrb	r2,[r1,#3]			@ 11
782	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
783	ldrb	r3,[r1,#2]
784	ldrb	r0,[r1,#1]
785	orr	r2,r2,r3,lsl#8
786	ldrb	r3,[r1],#4
787	orr	r2,r2,r0,lsl#16
788# if 11==15
789	str	r1,[sp,#17*4]			@ make room for r1
790# endif
791	eor	r0,r5,r5,ror#5
792	orr	r2,r2,r3,lsl#24
793	eor	r0,r0,r5,ror#19	@ Sigma1(e)
794#endif
795	ldr	r3,[r14],#4			@ *K256++
796	add	r8,r8,r2			@ h+=X[i]
797	str	r2,[sp,#11*4]
798	eor	r2,r6,r7
799	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
800	and	r2,r2,r5
801	add	r8,r8,r3			@ h+=K256[i]
802	eor	r2,r2,r7			@ Ch(e,f,g)
803	eor	r0,r9,r9,ror#11
804	add	r8,r8,r2			@ h+=Ch(e,f,g)
805#if 11==31
806	and	r3,r3,#0xff
807	cmp	r3,#0xf2			@ done?
808#endif
809#if 11<15
810# if __ARM_ARCH__>=7
811	ldr	r2,[r1],#4			@ prefetch
812# else
813	ldrb	r2,[r1,#3]
814# endif
815	eor	r3,r9,r10			@ a^b, b^c in next round
816#else
817	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
818	eor	r3,r9,r10			@ a^b, b^c in next round
819	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
820#endif
821	eor	r0,r0,r9,ror#20	@ Sigma0(a)
822	and	r12,r12,r3			@ (b^c)&=(a^b)
823	add	r4,r4,r8			@ d+=h
824	eor	r12,r12,r10			@ Maj(a,b,c)
825	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
826	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
827#if __ARM_ARCH__>=7
828	@ ldr	r2,[r1],#4			@ 12
829# if 12==15
830	str	r1,[sp,#17*4]			@ make room for r1
831# endif
832	eor	r0,r4,r4,ror#5
833	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
834	eor	r0,r0,r4,ror#19	@ Sigma1(e)
835# ifndef __ARMEB__
836	rev	r2,r2
837# endif
838#else
839	@ ldrb	r2,[r1,#3]			@ 12
840	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
841	ldrb	r12,[r1,#2]
842	ldrb	r0,[r1,#1]
843	orr	r2,r2,r12,lsl#8
844	ldrb	r12,[r1],#4
845	orr	r2,r2,r0,lsl#16
846# if 12==15
847	str	r1,[sp,#17*4]			@ make room for r1
848# endif
849	eor	r0,r4,r4,ror#5
850	orr	r2,r2,r12,lsl#24
851	eor	r0,r0,r4,ror#19	@ Sigma1(e)
852#endif
853	ldr	r12,[r14],#4			@ *K256++
854	add	r7,r7,r2			@ h+=X[i]
855	str	r2,[sp,#12*4]
856	eor	r2,r5,r6
857	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
858	and	r2,r2,r4
859	add	r7,r7,r12			@ h+=K256[i]
860	eor	r2,r2,r6			@ Ch(e,f,g)
861	eor	r0,r8,r8,ror#11
862	add	r7,r7,r2			@ h+=Ch(e,f,g)
863#if 12==31
864	and	r12,r12,#0xff
865	cmp	r12,#0xf2			@ done?
866#endif
867#if 12<15
868# if __ARM_ARCH__>=7
869	ldr	r2,[r1],#4			@ prefetch
870# else
871	ldrb	r2,[r1,#3]
872# endif
873	eor	r12,r8,r9			@ a^b, b^c in next round
874#else
875	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
876	eor	r12,r8,r9			@ a^b, b^c in next round
877	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
878#endif
879	eor	r0,r0,r8,ror#20	@ Sigma0(a)
880	and	r3,r3,r12			@ (b^c)&=(a^b)
881	add	r11,r11,r7			@ d+=h
882	eor	r3,r3,r9			@ Maj(a,b,c)
883	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
884	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
885#if __ARM_ARCH__>=7
886	@ ldr	r2,[r1],#4			@ 13
887# if 13==15
888	str	r1,[sp,#17*4]			@ make room for r1
889# endif
890	eor	r0,r11,r11,ror#5
891	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
892	eor	r0,r0,r11,ror#19	@ Sigma1(e)
893# ifndef __ARMEB__
894	rev	r2,r2
895# endif
896#else
897	@ ldrb	r2,[r1,#3]			@ 13
898	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
899	ldrb	r3,[r1,#2]
900	ldrb	r0,[r1,#1]
901	orr	r2,r2,r3,lsl#8
902	ldrb	r3,[r1],#4
903	orr	r2,r2,r0,lsl#16
904# if 13==15
905	str	r1,[sp,#17*4]			@ make room for r1
906# endif
907	eor	r0,r11,r11,ror#5
908	orr	r2,r2,r3,lsl#24
909	eor	r0,r0,r11,ror#19	@ Sigma1(e)
910#endif
911	ldr	r3,[r14],#4			@ *K256++
912	add	r6,r6,r2			@ h+=X[i]
913	str	r2,[sp,#13*4]
914	eor	r2,r4,r5
915	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
916	and	r2,r2,r11
917	add	r6,r6,r3			@ h+=K256[i]
918	eor	r2,r2,r5			@ Ch(e,f,g)
919	eor	r0,r7,r7,ror#11
920	add	r6,r6,r2			@ h+=Ch(e,f,g)
921#if 13==31
922	and	r3,r3,#0xff
923	cmp	r3,#0xf2			@ done?
924#endif
925#if 13<15
926# if __ARM_ARCH__>=7
927	ldr	r2,[r1],#4			@ prefetch
928# else
929	ldrb	r2,[r1,#3]
930# endif
931	eor	r3,r7,r8			@ a^b, b^c in next round
932#else
933	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
934	eor	r3,r7,r8			@ a^b, b^c in next round
935	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
936#endif
937	eor	r0,r0,r7,ror#20	@ Sigma0(a)
938	and	r12,r12,r3			@ (b^c)&=(a^b)
939	add	r10,r10,r6			@ d+=h
940	eor	r12,r12,r8			@ Maj(a,b,c)
941	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
942	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
943#if __ARM_ARCH__>=7
944	@ ldr	r2,[r1],#4			@ 14
945# if 14==15
946	str	r1,[sp,#17*4]			@ make room for r1
947# endif
948	eor	r0,r10,r10,ror#5
949	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
950	eor	r0,r0,r10,ror#19	@ Sigma1(e)
951# ifndef __ARMEB__
952	rev	r2,r2
953# endif
954#else
955	@ ldrb	r2,[r1,#3]			@ 14
956	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
957	ldrb	r12,[r1,#2]
958	ldrb	r0,[r1,#1]
959	orr	r2,r2,r12,lsl#8
960	ldrb	r12,[r1],#4
961	orr	r2,r2,r0,lsl#16
962# if 14==15
963	str	r1,[sp,#17*4]			@ make room for r1
964# endif
965	eor	r0,r10,r10,ror#5
966	orr	r2,r2,r12,lsl#24
967	eor	r0,r0,r10,ror#19	@ Sigma1(e)
968#endif
969	ldr	r12,[r14],#4			@ *K256++
970	add	r5,r5,r2			@ h+=X[i]
971	str	r2,[sp,#14*4]
972	eor	r2,r11,r4
973	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
974	and	r2,r2,r10
975	add	r5,r5,r12			@ h+=K256[i]
976	eor	r2,r2,r4			@ Ch(e,f,g)
977	eor	r0,r6,r6,ror#11
978	add	r5,r5,r2			@ h+=Ch(e,f,g)
979#if 14==31
980	and	r12,r12,#0xff
981	cmp	r12,#0xf2			@ done?
982#endif
983#if 14<15
984# if __ARM_ARCH__>=7
985	ldr	r2,[r1],#4			@ prefetch
986# else
987	ldrb	r2,[r1,#3]
988# endif
989	eor	r12,r6,r7			@ a^b, b^c in next round
990#else
991	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
992	eor	r12,r6,r7			@ a^b, b^c in next round
993	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
994#endif
995	eor	r0,r0,r6,ror#20	@ Sigma0(a)
996	and	r3,r3,r12			@ (b^c)&=(a^b)
997	add	r9,r9,r5			@ d+=h
998	eor	r3,r3,r7			@ Maj(a,b,c)
999	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1000	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1001#if __ARM_ARCH__>=7
1002	@ ldr	r2,[r1],#4			@ 15
1003# if 15==15
1004	str	r1,[sp,#17*4]			@ make room for r1
1005# endif
1006	eor	r0,r9,r9,ror#5
1007	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1008	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1009# ifndef __ARMEB__
1010	rev	r2,r2
1011# endif
1012#else
1013	@ ldrb	r2,[r1,#3]			@ 15
1014	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1015	ldrb	r3,[r1,#2]
1016	ldrb	r0,[r1,#1]
1017	orr	r2,r2,r3,lsl#8
1018	ldrb	r3,[r1],#4
1019	orr	r2,r2,r0,lsl#16
1020# if 15==15
1021	str	r1,[sp,#17*4]			@ make room for r1
1022# endif
1023	eor	r0,r9,r9,ror#5
1024	orr	r2,r2,r3,lsl#24
1025	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1026#endif
1027	ldr	r3,[r14],#4			@ *K256++
1028	add	r4,r4,r2			@ h+=X[i]
1029	str	r2,[sp,#15*4]
1030	eor	r2,r10,r11
1031	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1032	and	r2,r2,r9
1033	add	r4,r4,r3			@ h+=K256[i]
1034	eor	r2,r2,r11			@ Ch(e,f,g)
1035	eor	r0,r5,r5,ror#11
1036	add	r4,r4,r2			@ h+=Ch(e,f,g)
1037#if 15==31
1038	and	r3,r3,#0xff
1039	cmp	r3,#0xf2			@ done?
1040#endif
1041#if 15<15
1042# if __ARM_ARCH__>=7
1043	ldr	r2,[r1],#4			@ prefetch
1044# else
1045	ldrb	r2,[r1,#3]
1046# endif
1047	eor	r3,r5,r6			@ a^b, b^c in next round
1048#else
1049	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1050	eor	r3,r5,r6			@ a^b, b^c in next round
1051	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1052#endif
1053	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1054	and	r12,r12,r3			@ (b^c)&=(a^b)
1055	add	r8,r8,r4			@ d+=h
1056	eor	r12,r12,r6			@ Maj(a,b,c)
1057	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1058	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1059.Lrounds_16_xx:
1060	@ ldr	r2,[sp,#1*4]		@ 16
1061	@ ldr	r1,[sp,#14*4]
1062	mov	r0,r2,ror#7
1063	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1064	mov	r12,r1,ror#17
1065	eor	r0,r0,r2,ror#18
1066	eor	r12,r12,r1,ror#19
1067	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1068	ldr	r2,[sp,#0*4]
1069	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1070	ldr	r1,[sp,#9*4]
1071
1072	add	r12,r12,r0
1073	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1074	add	r2,r2,r12
1075	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1076	add	r2,r2,r1			@ X[i]
1077	ldr	r12,[r14],#4			@ *K256++
1078	add	r11,r11,r2			@ h+=X[i]
1079	str	r2,[sp,#0*4]
1080	eor	r2,r9,r10
1081	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1082	and	r2,r2,r8
1083	add	r11,r11,r12			@ h+=K256[i]
1084	eor	r2,r2,r10			@ Ch(e,f,g)
1085	eor	r0,r4,r4,ror#11
1086	add	r11,r11,r2			@ h+=Ch(e,f,g)
1087#if 16==31
1088	and	r12,r12,#0xff
1089	cmp	r12,#0xf2			@ done?
1090#endif
1091#if 16<15
1092# if __ARM_ARCH__>=7
1093	ldr	r2,[r1],#4			@ prefetch
1094# else
1095	ldrb	r2,[r1,#3]
1096# endif
1097	eor	r12,r4,r5			@ a^b, b^c in next round
1098#else
1099	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1100	eor	r12,r4,r5			@ a^b, b^c in next round
1101	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1102#endif
1103	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1104	and	r3,r3,r12			@ (b^c)&=(a^b)
1105	add	r7,r7,r11			@ d+=h
1106	eor	r3,r3,r5			@ Maj(a,b,c)
1107	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1108	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1109	@ ldr	r2,[sp,#2*4]		@ 17
1110	@ ldr	r1,[sp,#15*4]
1111	mov	r0,r2,ror#7
1112	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1113	mov	r3,r1,ror#17
1114	eor	r0,r0,r2,ror#18
1115	eor	r3,r3,r1,ror#19
1116	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1117	ldr	r2,[sp,#1*4]
1118	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1119	ldr	r1,[sp,#10*4]
1120
1121	add	r3,r3,r0
1122	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1123	add	r2,r2,r3
1124	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1125	add	r2,r2,r1			@ X[i]
1126	ldr	r3,[r14],#4			@ *K256++
1127	add	r10,r10,r2			@ h+=X[i]
1128	str	r2,[sp,#1*4]
1129	eor	r2,r8,r9
1130	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1131	and	r2,r2,r7
1132	add	r10,r10,r3			@ h+=K256[i]
1133	eor	r2,r2,r9			@ Ch(e,f,g)
1134	eor	r0,r11,r11,ror#11
1135	add	r10,r10,r2			@ h+=Ch(e,f,g)
1136#if 17==31
1137	and	r3,r3,#0xff
1138	cmp	r3,#0xf2			@ done?
1139#endif
1140#if 17<15
1141# if __ARM_ARCH__>=7
1142	ldr	r2,[r1],#4			@ prefetch
1143# else
1144	ldrb	r2,[r1,#3]
1145# endif
1146	eor	r3,r11,r4			@ a^b, b^c in next round
1147#else
1148	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1149	eor	r3,r11,r4			@ a^b, b^c in next round
1150	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1151#endif
1152	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1153	and	r12,r12,r3			@ (b^c)&=(a^b)
1154	add	r6,r6,r10			@ d+=h
1155	eor	r12,r12,r4			@ Maj(a,b,c)
1156	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1157	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1158	@ ldr	r2,[sp,#3*4]		@ 18
1159	@ ldr	r1,[sp,#0*4]
1160	mov	r0,r2,ror#7
1161	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1162	mov	r12,r1,ror#17
1163	eor	r0,r0,r2,ror#18
1164	eor	r12,r12,r1,ror#19
1165	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1166	ldr	r2,[sp,#2*4]
1167	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1168	ldr	r1,[sp,#11*4]
1169
1170	add	r12,r12,r0
1171	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1172	add	r2,r2,r12
1173	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1174	add	r2,r2,r1			@ X[i]
1175	ldr	r12,[r14],#4			@ *K256++
1176	add	r9,r9,r2			@ h+=X[i]
1177	str	r2,[sp,#2*4]
1178	eor	r2,r7,r8
1179	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1180	and	r2,r2,r6
1181	add	r9,r9,r12			@ h+=K256[i]
1182	eor	r2,r2,r8			@ Ch(e,f,g)
1183	eor	r0,r10,r10,ror#11
1184	add	r9,r9,r2			@ h+=Ch(e,f,g)
1185#if 18==31
1186	and	r12,r12,#0xff
1187	cmp	r12,#0xf2			@ done?
1188#endif
1189#if 18<15
1190# if __ARM_ARCH__>=7
1191	ldr	r2,[r1],#4			@ prefetch
1192# else
1193	ldrb	r2,[r1,#3]
1194# endif
1195	eor	r12,r10,r11			@ a^b, b^c in next round
1196#else
1197	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1198	eor	r12,r10,r11			@ a^b, b^c in next round
1199	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1200#endif
1201	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1202	and	r3,r3,r12			@ (b^c)&=(a^b)
1203	add	r5,r5,r9			@ d+=h
1204	eor	r3,r3,r11			@ Maj(a,b,c)
1205	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1206	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1207	@ ldr	r2,[sp,#4*4]		@ 19
1208	@ ldr	r1,[sp,#1*4]
1209	mov	r0,r2,ror#7
1210	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1211	mov	r3,r1,ror#17
1212	eor	r0,r0,r2,ror#18
1213	eor	r3,r3,r1,ror#19
1214	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1215	ldr	r2,[sp,#3*4]
1216	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1217	ldr	r1,[sp,#12*4]
1218
1219	add	r3,r3,r0
1220	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1221	add	r2,r2,r3
1222	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1223	add	r2,r2,r1			@ X[i]
1224	ldr	r3,[r14],#4			@ *K256++
1225	add	r8,r8,r2			@ h+=X[i]
1226	str	r2,[sp,#3*4]
1227	eor	r2,r6,r7
1228	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1229	and	r2,r2,r5
1230	add	r8,r8,r3			@ h+=K256[i]
1231	eor	r2,r2,r7			@ Ch(e,f,g)
1232	eor	r0,r9,r9,ror#11
1233	add	r8,r8,r2			@ h+=Ch(e,f,g)
1234#if 19==31
1235	and	r3,r3,#0xff
1236	cmp	r3,#0xf2			@ done?
1237#endif
1238#if 19<15
1239# if __ARM_ARCH__>=7
1240	ldr	r2,[r1],#4			@ prefetch
1241# else
1242	ldrb	r2,[r1,#3]
1243# endif
1244	eor	r3,r9,r10			@ a^b, b^c in next round
1245#else
1246	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1247	eor	r3,r9,r10			@ a^b, b^c in next round
1248	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1249#endif
1250	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1251	and	r12,r12,r3			@ (b^c)&=(a^b)
1252	add	r4,r4,r8			@ d+=h
1253	eor	r12,r12,r10			@ Maj(a,b,c)
1254	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1255	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1256	@ ldr	r2,[sp,#5*4]		@ 20
1257	@ ldr	r1,[sp,#2*4]
1258	mov	r0,r2,ror#7
1259	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1260	mov	r12,r1,ror#17
1261	eor	r0,r0,r2,ror#18
1262	eor	r12,r12,r1,ror#19
1263	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1264	ldr	r2,[sp,#4*4]
1265	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1266	ldr	r1,[sp,#13*4]
1267
1268	add	r12,r12,r0
1269	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1270	add	r2,r2,r12
1271	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1272	add	r2,r2,r1			@ X[i]
1273	ldr	r12,[r14],#4			@ *K256++
1274	add	r7,r7,r2			@ h+=X[i]
1275	str	r2,[sp,#4*4]
1276	eor	r2,r5,r6
1277	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1278	and	r2,r2,r4
1279	add	r7,r7,r12			@ h+=K256[i]
1280	eor	r2,r2,r6			@ Ch(e,f,g)
1281	eor	r0,r8,r8,ror#11
1282	add	r7,r7,r2			@ h+=Ch(e,f,g)
1283#if 20==31
1284	and	r12,r12,#0xff
1285	cmp	r12,#0xf2			@ done?
1286#endif
1287#if 20<15
1288# if __ARM_ARCH__>=7
1289	ldr	r2,[r1],#4			@ prefetch
1290# else
1291	ldrb	r2,[r1,#3]
1292# endif
1293	eor	r12,r8,r9			@ a^b, b^c in next round
1294#else
1295	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1296	eor	r12,r8,r9			@ a^b, b^c in next round
1297	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1298#endif
1299	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1300	and	r3,r3,r12			@ (b^c)&=(a^b)
1301	add	r11,r11,r7			@ d+=h
1302	eor	r3,r3,r9			@ Maj(a,b,c)
1303	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1304	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1305	@ ldr	r2,[sp,#6*4]		@ 21
1306	@ ldr	r1,[sp,#3*4]
1307	mov	r0,r2,ror#7
1308	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1309	mov	r3,r1,ror#17
1310	eor	r0,r0,r2,ror#18
1311	eor	r3,r3,r1,ror#19
1312	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1313	ldr	r2,[sp,#5*4]
1314	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1315	ldr	r1,[sp,#14*4]
1316
1317	add	r3,r3,r0
1318	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1319	add	r2,r2,r3
1320	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1321	add	r2,r2,r1			@ X[i]
1322	ldr	r3,[r14],#4			@ *K256++
1323	add	r6,r6,r2			@ h+=X[i]
1324	str	r2,[sp,#5*4]
1325	eor	r2,r4,r5
1326	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1327	and	r2,r2,r11
1328	add	r6,r6,r3			@ h+=K256[i]
1329	eor	r2,r2,r5			@ Ch(e,f,g)
1330	eor	r0,r7,r7,ror#11
1331	add	r6,r6,r2			@ h+=Ch(e,f,g)
1332#if 21==31
1333	and	r3,r3,#0xff
1334	cmp	r3,#0xf2			@ done?
1335#endif
1336#if 21<15
1337# if __ARM_ARCH__>=7
1338	ldr	r2,[r1],#4			@ prefetch
1339# else
1340	ldrb	r2,[r1,#3]
1341# endif
1342	eor	r3,r7,r8			@ a^b, b^c in next round
1343#else
1344	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1345	eor	r3,r7,r8			@ a^b, b^c in next round
1346	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1347#endif
1348	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1349	and	r12,r12,r3			@ (b^c)&=(a^b)
1350	add	r10,r10,r6			@ d+=h
1351	eor	r12,r12,r8			@ Maj(a,b,c)
1352	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1353	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1354	@ ldr	r2,[sp,#7*4]		@ 22
1355	@ ldr	r1,[sp,#4*4]
1356	mov	r0,r2,ror#7
1357	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1358	mov	r12,r1,ror#17
1359	eor	r0,r0,r2,ror#18
1360	eor	r12,r12,r1,ror#19
1361	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1362	ldr	r2,[sp,#6*4]
1363	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1364	ldr	r1,[sp,#15*4]
1365
1366	add	r12,r12,r0
1367	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1368	add	r2,r2,r12
1369	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1370	add	r2,r2,r1			@ X[i]
1371	ldr	r12,[r14],#4			@ *K256++
1372	add	r5,r5,r2			@ h+=X[i]
1373	str	r2,[sp,#6*4]
1374	eor	r2,r11,r4
1375	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1376	and	r2,r2,r10
1377	add	r5,r5,r12			@ h+=K256[i]
1378	eor	r2,r2,r4			@ Ch(e,f,g)
1379	eor	r0,r6,r6,ror#11
1380	add	r5,r5,r2			@ h+=Ch(e,f,g)
1381#if 22==31
1382	and	r12,r12,#0xff
1383	cmp	r12,#0xf2			@ done?
1384#endif
1385#if 22<15
1386# if __ARM_ARCH__>=7
1387	ldr	r2,[r1],#4			@ prefetch
1388# else
1389	ldrb	r2,[r1,#3]
1390# endif
1391	eor	r12,r6,r7			@ a^b, b^c in next round
1392#else
1393	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1394	eor	r12,r6,r7			@ a^b, b^c in next round
1395	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1396#endif
1397	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1398	and	r3,r3,r12			@ (b^c)&=(a^b)
1399	add	r9,r9,r5			@ d+=h
1400	eor	r3,r3,r7			@ Maj(a,b,c)
1401	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1402	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1403	@ ldr	r2,[sp,#8*4]		@ 23
1404	@ ldr	r1,[sp,#5*4]
1405	mov	r0,r2,ror#7
1406	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1407	mov	r3,r1,ror#17
1408	eor	r0,r0,r2,ror#18
1409	eor	r3,r3,r1,ror#19
1410	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1411	ldr	r2,[sp,#7*4]
1412	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1413	ldr	r1,[sp,#0*4]
1414
1415	add	r3,r3,r0
1416	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1417	add	r2,r2,r3
1418	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1419	add	r2,r2,r1			@ X[i]
1420	ldr	r3,[r14],#4			@ *K256++
1421	add	r4,r4,r2			@ h+=X[i]
1422	str	r2,[sp,#7*4]
1423	eor	r2,r10,r11
1424	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1425	and	r2,r2,r9
1426	add	r4,r4,r3			@ h+=K256[i]
1427	eor	r2,r2,r11			@ Ch(e,f,g)
1428	eor	r0,r5,r5,ror#11
1429	add	r4,r4,r2			@ h+=Ch(e,f,g)
1430#if 23==31
1431	and	r3,r3,#0xff
1432	cmp	r3,#0xf2			@ done?
1433#endif
1434#if 23<15
1435# if __ARM_ARCH__>=7
1436	ldr	r2,[r1],#4			@ prefetch
1437# else
1438	ldrb	r2,[r1,#3]
1439# endif
1440	eor	r3,r5,r6			@ a^b, b^c in next round
1441#else
1442	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1443	eor	r3,r5,r6			@ a^b, b^c in next round
1444	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1445#endif
1446	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1447	and	r12,r12,r3			@ (b^c)&=(a^b)
1448	add	r8,r8,r4			@ d+=h
1449	eor	r12,r12,r6			@ Maj(a,b,c)
1450	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1451	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1452	@ ldr	r2,[sp,#9*4]		@ 24
1453	@ ldr	r1,[sp,#6*4]
1454	mov	r0,r2,ror#7
1455	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1456	mov	r12,r1,ror#17
1457	eor	r0,r0,r2,ror#18
1458	eor	r12,r12,r1,ror#19
1459	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1460	ldr	r2,[sp,#8*4]
1461	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1462	ldr	r1,[sp,#1*4]
1463
1464	add	r12,r12,r0
1465	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1466	add	r2,r2,r12
1467	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1468	add	r2,r2,r1			@ X[i]
1469	ldr	r12,[r14],#4			@ *K256++
1470	add	r11,r11,r2			@ h+=X[i]
1471	str	r2,[sp,#8*4]
1472	eor	r2,r9,r10
1473	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1474	and	r2,r2,r8
1475	add	r11,r11,r12			@ h+=K256[i]
1476	eor	r2,r2,r10			@ Ch(e,f,g)
1477	eor	r0,r4,r4,ror#11
1478	add	r11,r11,r2			@ h+=Ch(e,f,g)
1479#if 24==31
1480	and	r12,r12,#0xff
1481	cmp	r12,#0xf2			@ done?
1482#endif
1483#if 24<15
1484# if __ARM_ARCH__>=7
1485	ldr	r2,[r1],#4			@ prefetch
1486# else
1487	ldrb	r2,[r1,#3]
1488# endif
1489	eor	r12,r4,r5			@ a^b, b^c in next round
1490#else
1491	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1492	eor	r12,r4,r5			@ a^b, b^c in next round
1493	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1494#endif
1495	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1496	and	r3,r3,r12			@ (b^c)&=(a^b)
1497	add	r7,r7,r11			@ d+=h
1498	eor	r3,r3,r5			@ Maj(a,b,c)
1499	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1500	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1501	@ ldr	r2,[sp,#10*4]		@ 25
1502	@ ldr	r1,[sp,#7*4]
1503	mov	r0,r2,ror#7
1504	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1505	mov	r3,r1,ror#17
1506	eor	r0,r0,r2,ror#18
1507	eor	r3,r3,r1,ror#19
1508	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1509	ldr	r2,[sp,#9*4]
1510	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1511	ldr	r1,[sp,#2*4]
1512
1513	add	r3,r3,r0
1514	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1515	add	r2,r2,r3
1516	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1517	add	r2,r2,r1			@ X[i]
1518	ldr	r3,[r14],#4			@ *K256++
1519	add	r10,r10,r2			@ h+=X[i]
1520	str	r2,[sp,#9*4]
1521	eor	r2,r8,r9
1522	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1523	and	r2,r2,r7
1524	add	r10,r10,r3			@ h+=K256[i]
1525	eor	r2,r2,r9			@ Ch(e,f,g)
1526	eor	r0,r11,r11,ror#11
1527	add	r10,r10,r2			@ h+=Ch(e,f,g)
1528#if 25==31
1529	and	r3,r3,#0xff
1530	cmp	r3,#0xf2			@ done?
1531#endif
1532#if 25<15
1533# if __ARM_ARCH__>=7
1534	ldr	r2,[r1],#4			@ prefetch
1535# else
1536	ldrb	r2,[r1,#3]
1537# endif
1538	eor	r3,r11,r4			@ a^b, b^c in next round
1539#else
1540	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1541	eor	r3,r11,r4			@ a^b, b^c in next round
1542	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1543#endif
1544	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1545	and	r12,r12,r3			@ (b^c)&=(a^b)
1546	add	r6,r6,r10			@ d+=h
1547	eor	r12,r12,r4			@ Maj(a,b,c)
1548	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1549	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1550	@ ldr	r2,[sp,#11*4]		@ 26
1551	@ ldr	r1,[sp,#8*4]
1552	mov	r0,r2,ror#7
1553	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1554	mov	r12,r1,ror#17
1555	eor	r0,r0,r2,ror#18
1556	eor	r12,r12,r1,ror#19
1557	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1558	ldr	r2,[sp,#10*4]
1559	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1560	ldr	r1,[sp,#3*4]
1561
1562	add	r12,r12,r0
1563	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1564	add	r2,r2,r12
1565	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1566	add	r2,r2,r1			@ X[i]
1567	ldr	r12,[r14],#4			@ *K256++
1568	add	r9,r9,r2			@ h+=X[i]
1569	str	r2,[sp,#10*4]
1570	eor	r2,r7,r8
1571	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1572	and	r2,r2,r6
1573	add	r9,r9,r12			@ h+=K256[i]
1574	eor	r2,r2,r8			@ Ch(e,f,g)
1575	eor	r0,r10,r10,ror#11
1576	add	r9,r9,r2			@ h+=Ch(e,f,g)
1577#if 26==31
1578	and	r12,r12,#0xff
1579	cmp	r12,#0xf2			@ done?
1580#endif
1581#if 26<15
1582# if __ARM_ARCH__>=7
1583	ldr	r2,[r1],#4			@ prefetch
1584# else
1585	ldrb	r2,[r1,#3]
1586# endif
1587	eor	r12,r10,r11			@ a^b, b^c in next round
1588#else
1589	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1590	eor	r12,r10,r11			@ a^b, b^c in next round
1591	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1592#endif
1593	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1594	and	r3,r3,r12			@ (b^c)&=(a^b)
1595	add	r5,r5,r9			@ d+=h
1596	eor	r3,r3,r11			@ Maj(a,b,c)
1597	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1598	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1599	@ ldr	r2,[sp,#12*4]		@ 27
1600	@ ldr	r1,[sp,#9*4]
1601	mov	r0,r2,ror#7
1602	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1603	mov	r3,r1,ror#17
1604	eor	r0,r0,r2,ror#18
1605	eor	r3,r3,r1,ror#19
1606	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1607	ldr	r2,[sp,#11*4]
1608	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1609	ldr	r1,[sp,#4*4]
1610
1611	add	r3,r3,r0
1612	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1613	add	r2,r2,r3
1614	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1615	add	r2,r2,r1			@ X[i]
1616	ldr	r3,[r14],#4			@ *K256++
1617	add	r8,r8,r2			@ h+=X[i]
1618	str	r2,[sp,#11*4]
1619	eor	r2,r6,r7
1620	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1621	and	r2,r2,r5
1622	add	r8,r8,r3			@ h+=K256[i]
1623	eor	r2,r2,r7			@ Ch(e,f,g)
1624	eor	r0,r9,r9,ror#11
1625	add	r8,r8,r2			@ h+=Ch(e,f,g)
1626#if 27==31
1627	and	r3,r3,#0xff
1628	cmp	r3,#0xf2			@ done?
1629#endif
1630#if 27<15
1631# if __ARM_ARCH__>=7
1632	ldr	r2,[r1],#4			@ prefetch
1633# else
1634	ldrb	r2,[r1,#3]
1635# endif
1636	eor	r3,r9,r10			@ a^b, b^c in next round
1637#else
1638	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1639	eor	r3,r9,r10			@ a^b, b^c in next round
1640	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1641#endif
1642	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1643	and	r12,r12,r3			@ (b^c)&=(a^b)
1644	add	r4,r4,r8			@ d+=h
1645	eor	r12,r12,r10			@ Maj(a,b,c)
1646	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1647	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1648	@ ldr	r2,[sp,#13*4]		@ 28
1649	@ ldr	r1,[sp,#10*4]
1650	mov	r0,r2,ror#7
1651	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1652	mov	r12,r1,ror#17
1653	eor	r0,r0,r2,ror#18
1654	eor	r12,r12,r1,ror#19
1655	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1656	ldr	r2,[sp,#12*4]
1657	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1658	ldr	r1,[sp,#5*4]
1659
1660	add	r12,r12,r0
1661	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1662	add	r2,r2,r12
1663	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1664	add	r2,r2,r1			@ X[i]
1665	ldr	r12,[r14],#4			@ *K256++
1666	add	r7,r7,r2			@ h+=X[i]
1667	str	r2,[sp,#12*4]
1668	eor	r2,r5,r6
1669	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1670	and	r2,r2,r4
1671	add	r7,r7,r12			@ h+=K256[i]
1672	eor	r2,r2,r6			@ Ch(e,f,g)
1673	eor	r0,r8,r8,ror#11
1674	add	r7,r7,r2			@ h+=Ch(e,f,g)
1675#if 28==31
1676	and	r12,r12,#0xff
1677	cmp	r12,#0xf2			@ done?
1678#endif
1679#if 28<15
1680# if __ARM_ARCH__>=7
1681	ldr	r2,[r1],#4			@ prefetch
1682# else
1683	ldrb	r2,[r1,#3]
1684# endif
1685	eor	r12,r8,r9			@ a^b, b^c in next round
1686#else
1687	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1688	eor	r12,r8,r9			@ a^b, b^c in next round
1689	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1690#endif
1691	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1692	and	r3,r3,r12			@ (b^c)&=(a^b)
1693	add	r11,r11,r7			@ d+=h
1694	eor	r3,r3,r9			@ Maj(a,b,c)
1695	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1696	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1697	@ ldr	r2,[sp,#14*4]		@ 29
1698	@ ldr	r1,[sp,#11*4]
1699	mov	r0,r2,ror#7
1700	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1701	mov	r3,r1,ror#17
1702	eor	r0,r0,r2,ror#18
1703	eor	r3,r3,r1,ror#19
1704	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1705	ldr	r2,[sp,#13*4]
1706	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1707	ldr	r1,[sp,#6*4]
1708
1709	add	r3,r3,r0
1710	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1711	add	r2,r2,r3
1712	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1713	add	r2,r2,r1			@ X[i]
1714	ldr	r3,[r14],#4			@ *K256++
1715	add	r6,r6,r2			@ h+=X[i]
1716	str	r2,[sp,#13*4]
1717	eor	r2,r4,r5
1718	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1719	and	r2,r2,r11
1720	add	r6,r6,r3			@ h+=K256[i]
1721	eor	r2,r2,r5			@ Ch(e,f,g)
1722	eor	r0,r7,r7,ror#11
1723	add	r6,r6,r2			@ h+=Ch(e,f,g)
1724#if 29==31
1725	and	r3,r3,#0xff
1726	cmp	r3,#0xf2			@ done?
1727#endif
1728#if 29<15
1729# if __ARM_ARCH__>=7
1730	ldr	r2,[r1],#4			@ prefetch
1731# else
1732	ldrb	r2,[r1,#3]
1733# endif
1734	eor	r3,r7,r8			@ a^b, b^c in next round
1735#else
1736	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1737	eor	r3,r7,r8			@ a^b, b^c in next round
1738	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1739#endif
1740	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1741	and	r12,r12,r3			@ (b^c)&=(a^b)
1742	add	r10,r10,r6			@ d+=h
1743	eor	r12,r12,r8			@ Maj(a,b,c)
1744	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1745	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1746	@ ldr	r2,[sp,#15*4]		@ 30
1747	@ ldr	r1,[sp,#12*4]
1748	mov	r0,r2,ror#7
1749	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1750	mov	r12,r1,ror#17
1751	eor	r0,r0,r2,ror#18
1752	eor	r12,r12,r1,ror#19
1753	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1754	ldr	r2,[sp,#14*4]
1755	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1756	ldr	r1,[sp,#7*4]
1757
1758	add	r12,r12,r0
1759	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1760	add	r2,r2,r12
1761	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1762	add	r2,r2,r1			@ X[i]
1763	ldr	r12,[r14],#4			@ *K256++
1764	add	r5,r5,r2			@ h+=X[i]
1765	str	r2,[sp,#14*4]
1766	eor	r2,r11,r4
1767	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1768	and	r2,r2,r10
1769	add	r5,r5,r12			@ h+=K256[i]
1770	eor	r2,r2,r4			@ Ch(e,f,g)
1771	eor	r0,r6,r6,ror#11
1772	add	r5,r5,r2			@ h+=Ch(e,f,g)
1773#if 30==31
1774	and	r12,r12,#0xff
1775	cmp	r12,#0xf2			@ done?
1776#endif
1777#if 30<15
1778# if __ARM_ARCH__>=7
1779	ldr	r2,[r1],#4			@ prefetch
1780# else
1781	ldrb	r2,[r1,#3]
1782# endif
1783	eor	r12,r6,r7			@ a^b, b^c in next round
1784#else
1785	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1786	eor	r12,r6,r7			@ a^b, b^c in next round
1787	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1788#endif
1789	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1790	and	r3,r3,r12			@ (b^c)&=(a^b)
1791	add	r9,r9,r5			@ d+=h
1792	eor	r3,r3,r7			@ Maj(a,b,c)
1793	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1794	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1795	@ ldr	r2,[sp,#0*4]		@ 31
1796	@ ldr	r1,[sp,#13*4]
1797	mov	r0,r2,ror#7
1798	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1799	mov	r3,r1,ror#17
1800	eor	r0,r0,r2,ror#18
1801	eor	r3,r3,r1,ror#19
1802	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1803	ldr	r2,[sp,#15*4]
1804	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1805	ldr	r1,[sp,#8*4]
1806
1807	add	r3,r3,r0
1808	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1809	add	r2,r2,r3
1810	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1811	add	r2,r2,r1			@ X[i]
1812	ldr	r3,[r14],#4			@ *K256++
1813	add	r4,r4,r2			@ h+=X[i]
1814	str	r2,[sp,#15*4]
1815	eor	r2,r10,r11
1816	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1817	and	r2,r2,r9
1818	add	r4,r4,r3			@ h+=K256[i]
1819	eor	r2,r2,r11			@ Ch(e,f,g)
1820	eor	r0,r5,r5,ror#11
1821	add	r4,r4,r2			@ h+=Ch(e,f,g)
1822#if 31==31
1823	and	r3,r3,#0xff
1824	cmp	r3,#0xf2			@ done?
1825#endif
1826#if 31<15
1827# if __ARM_ARCH__>=7
1828	ldr	r2,[r1],#4			@ prefetch
1829# else
1830	ldrb	r2,[r1,#3]
1831# endif
1832	eor	r3,r5,r6			@ a^b, b^c in next round
1833#else
1834	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1835	eor	r3,r5,r6			@ a^b, b^c in next round
1836	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1837#endif
1838	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1839	and	r12,r12,r3			@ (b^c)&=(a^b)
1840	add	r8,r8,r4			@ d+=h
1841	eor	r12,r12,r6			@ Maj(a,b,c)
1842	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1843	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1844#ifdef	__thumb2__
1845	ite	eq			@ Thumb2 thing, sanity check in ARM
1846#endif
1847	ldreq	r3,[sp,#16*4]		@ pull ctx
1848	bne	.Lrounds_16_xx
1849
1850	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1851	ldr	r0,[r3,#0]
1852	ldr	r2,[r3,#4]
1853	ldr	r12,[r3,#8]
1854	add	r4,r4,r0
1855	ldr	r0,[r3,#12]
1856	add	r5,r5,r2
1857	ldr	r2,[r3,#16]
1858	add	r6,r6,r12
1859	ldr	r12,[r3,#20]
1860	add	r7,r7,r0
1861	ldr	r0,[r3,#24]
1862	add	r8,r8,r2
1863	ldr	r2,[r3,#28]
1864	add	r9,r9,r12
1865	ldr	r1,[sp,#17*4]		@ pull inp
1866	ldr	r12,[sp,#18*4]		@ pull inp+len
1867	add	r10,r10,r0
1868	add	r11,r11,r2
1869	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1870	cmp	r1,r12
1871	sub	r14,r14,#256	@ rewind Ktbl
1872	bne	.Loop
1873
1874	add	sp,sp,#19*4	@ destroy frame
1875#if __ARM_ARCH__>=5
1876	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1877#else
1878	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1879	tst	lr,#1
1880	moveq	pc,lr			@ be binary compatible with V4, yet
1881.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1882#endif
1883.size	sha256_block_data_order,.-sha256_block_data_order
1884#if __ARM_MAX_ARCH__>=7
1885.arch	armv7-a
1886.fpu	neon
1887
1888.globl	sha256_block_data_order_neon
1889.type	sha256_block_data_order_neon,%function
1890.align	5
1891.skip	16
1892sha256_block_data_order_neon:
1893.LNEON:
1894	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1895
1896	sub	r11,sp,#16*4+16
1897	adr	r14,K256
1898	bic	r11,r11,#15		@ align for 128-bit stores
1899	mov	r12,sp
1900	mov	sp,r11			@ alloca
1901	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1902
1903	vld1.8	{q0},[r1]!
1904	vld1.8	{q1},[r1]!
1905	vld1.8	{q2},[r1]!
1906	vld1.8	{q3},[r1]!
1907	vld1.32	{q8},[r14,:128]!
1908	vld1.32	{q9},[r14,:128]!
1909	vld1.32	{q10},[r14,:128]!
1910	vld1.32	{q11},[r14,:128]!
1911	vrev32.8	q0,q0		@ yes, even on
1912	str	r0,[sp,#64]
1913	vrev32.8	q1,q1		@ big-endian
1914	str	r1,[sp,#68]
1915	mov	r1,sp
1916	vrev32.8	q2,q2
1917	str	r2,[sp,#72]
1918	vrev32.8	q3,q3
1919	str	r12,[sp,#76]		@ save original sp
1920	vadd.i32	q8,q8,q0
1921	vadd.i32	q9,q9,q1
1922	vst1.32	{q8},[r1,:128]!
1923	vadd.i32	q10,q10,q2
1924	vst1.32	{q9},[r1,:128]!
1925	vadd.i32	q11,q11,q3
1926	vst1.32	{q10},[r1,:128]!
1927	vst1.32	{q11},[r1,:128]!
1928
1929	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1930	sub	r1,r1,#64
1931	ldr	r2,[sp,#0]
1932	eor	r12,r12,r12
1933	eor	r3,r5,r6
1934	b	.L_00_48
1935
1936.align	4
1937.L_00_48:
1938	vext.8	q8,q0,q1,#4
1939	add	r11,r11,r2
1940	eor	r2,r9,r10
1941	eor	r0,r8,r8,ror#5
1942	vext.8	q9,q2,q3,#4
1943	add	r4,r4,r12
1944	and	r2,r2,r8
1945	eor	r12,r0,r8,ror#19
1946	vshr.u32	q10,q8,#7
1947	eor	r0,r4,r4,ror#11
1948	eor	r2,r2,r10
1949	vadd.i32	q0,q0,q9
1950	add	r11,r11,r12,ror#6
1951	eor	r12,r4,r5
1952	vshr.u32	q9,q8,#3
1953	eor	r0,r0,r4,ror#20
1954	add	r11,r11,r2
1955	vsli.32	q10,q8,#25
1956	ldr	r2,[sp,#4]
1957	and	r3,r3,r12
1958	vshr.u32	q11,q8,#18
1959	add	r7,r7,r11
1960	add	r11,r11,r0,ror#2
1961	eor	r3,r3,r5
1962	veor	q9,q9,q10
1963	add	r10,r10,r2
1964	vsli.32	q11,q8,#14
1965	eor	r2,r8,r9
1966	eor	r0,r7,r7,ror#5
1967	vshr.u32	d24,d7,#17
1968	add	r11,r11,r3
1969	and	r2,r2,r7
1970	veor	q9,q9,q11
1971	eor	r3,r0,r7,ror#19
1972	eor	r0,r11,r11,ror#11
1973	vsli.32	d24,d7,#15
1974	eor	r2,r2,r9
1975	add	r10,r10,r3,ror#6
1976	vshr.u32	d25,d7,#10
1977	eor	r3,r11,r4
1978	eor	r0,r0,r11,ror#20
1979	vadd.i32	q0,q0,q9
1980	add	r10,r10,r2
1981	ldr	r2,[sp,#8]
1982	veor	d25,d25,d24
1983	and	r12,r12,r3
1984	add	r6,r6,r10
1985	vshr.u32	d24,d7,#19
1986	add	r10,r10,r0,ror#2
1987	eor	r12,r12,r4
1988	vsli.32	d24,d7,#13
1989	add	r9,r9,r2
1990	eor	r2,r7,r8
1991	veor	d25,d25,d24
1992	eor	r0,r6,r6,ror#5
1993	add	r10,r10,r12
1994	vadd.i32	d0,d0,d25
1995	and	r2,r2,r6
1996	eor	r12,r0,r6,ror#19
1997	vshr.u32	d24,d0,#17
1998	eor	r0,r10,r10,ror#11
1999	eor	r2,r2,r8
2000	vsli.32	d24,d0,#15
2001	add	r9,r9,r12,ror#6
2002	eor	r12,r10,r11
2003	vshr.u32	d25,d0,#10
2004	eor	r0,r0,r10,ror#20
2005	add	r9,r9,r2
2006	veor	d25,d25,d24
2007	ldr	r2,[sp,#12]
2008	and	r3,r3,r12
2009	vshr.u32	d24,d0,#19
2010	add	r5,r5,r9
2011	add	r9,r9,r0,ror#2
2012	eor	r3,r3,r11
2013	vld1.32	{q8},[r14,:128]!
2014	add	r8,r8,r2
2015	vsli.32	d24,d0,#13
2016	eor	r2,r6,r7
2017	eor	r0,r5,r5,ror#5
2018	veor	d25,d25,d24
2019	add	r9,r9,r3
2020	and	r2,r2,r5
2021	vadd.i32	d1,d1,d25
2022	eor	r3,r0,r5,ror#19
2023	eor	r0,r9,r9,ror#11
2024	vadd.i32	q8,q8,q0
2025	eor	r2,r2,r7
2026	add	r8,r8,r3,ror#6
2027	eor	r3,r9,r10
2028	eor	r0,r0,r9,ror#20
2029	add	r8,r8,r2
2030	ldr	r2,[sp,#16]
2031	and	r12,r12,r3
2032	add	r4,r4,r8
2033	vst1.32	{q8},[r1,:128]!
2034	add	r8,r8,r0,ror#2
2035	eor	r12,r12,r10
2036	vext.8	q8,q1,q2,#4
2037	add	r7,r7,r2
2038	eor	r2,r5,r6
2039	eor	r0,r4,r4,ror#5
2040	vext.8	q9,q3,q0,#4
2041	add	r8,r8,r12
2042	and	r2,r2,r4
2043	eor	r12,r0,r4,ror#19
2044	vshr.u32	q10,q8,#7
2045	eor	r0,r8,r8,ror#11
2046	eor	r2,r2,r6
2047	vadd.i32	q1,q1,q9
2048	add	r7,r7,r12,ror#6
2049	eor	r12,r8,r9
2050	vshr.u32	q9,q8,#3
2051	eor	r0,r0,r8,ror#20
2052	add	r7,r7,r2
2053	vsli.32	q10,q8,#25
2054	ldr	r2,[sp,#20]
2055	and	r3,r3,r12
2056	vshr.u32	q11,q8,#18
2057	add	r11,r11,r7
2058	add	r7,r7,r0,ror#2
2059	eor	r3,r3,r9
2060	veor	q9,q9,q10
2061	add	r6,r6,r2
2062	vsli.32	q11,q8,#14
2063	eor	r2,r4,r5
2064	eor	r0,r11,r11,ror#5
2065	vshr.u32	d24,d1,#17
2066	add	r7,r7,r3
2067	and	r2,r2,r11
2068	veor	q9,q9,q11
2069	eor	r3,r0,r11,ror#19
2070	eor	r0,r7,r7,ror#11
2071	vsli.32	d24,d1,#15
2072	eor	r2,r2,r5
2073	add	r6,r6,r3,ror#6
2074	vshr.u32	d25,d1,#10
2075	eor	r3,r7,r8
2076	eor	r0,r0,r7,ror#20
2077	vadd.i32	q1,q1,q9
2078	add	r6,r6,r2
2079	ldr	r2,[sp,#24]
2080	veor	d25,d25,d24
2081	and	r12,r12,r3
2082	add	r10,r10,r6
2083	vshr.u32	d24,d1,#19
2084	add	r6,r6,r0,ror#2
2085	eor	r12,r12,r8
2086	vsli.32	d24,d1,#13
2087	add	r5,r5,r2
2088	eor	r2,r11,r4
2089	veor	d25,d25,d24
2090	eor	r0,r10,r10,ror#5
2091	add	r6,r6,r12
2092	vadd.i32	d2,d2,d25
2093	and	r2,r2,r10
2094	eor	r12,r0,r10,ror#19
2095	vshr.u32	d24,d2,#17
2096	eor	r0,r6,r6,ror#11
2097	eor	r2,r2,r4
2098	vsli.32	d24,d2,#15
2099	add	r5,r5,r12,ror#6
2100	eor	r12,r6,r7
2101	vshr.u32	d25,d2,#10
2102	eor	r0,r0,r6,ror#20
2103	add	r5,r5,r2
2104	veor	d25,d25,d24
2105	ldr	r2,[sp,#28]
2106	and	r3,r3,r12
2107	vshr.u32	d24,d2,#19
2108	add	r9,r9,r5
2109	add	r5,r5,r0,ror#2
2110	eor	r3,r3,r7
2111	vld1.32	{q8},[r14,:128]!
2112	add	r4,r4,r2
2113	vsli.32	d24,d2,#13
2114	eor	r2,r10,r11
2115	eor	r0,r9,r9,ror#5
2116	veor	d25,d25,d24
2117	add	r5,r5,r3
2118	and	r2,r2,r9
2119	vadd.i32	d3,d3,d25
2120	eor	r3,r0,r9,ror#19
2121	eor	r0,r5,r5,ror#11
2122	vadd.i32	q8,q8,q1
2123	eor	r2,r2,r11
2124	add	r4,r4,r3,ror#6
2125	eor	r3,r5,r6
2126	eor	r0,r0,r5,ror#20
2127	add	r4,r4,r2
2128	ldr	r2,[sp,#32]
2129	and	r12,r12,r3
2130	add	r8,r8,r4
2131	vst1.32	{q8},[r1,:128]!
2132	add	r4,r4,r0,ror#2
2133	eor	r12,r12,r6
2134	vext.8	q8,q2,q3,#4
2135	add	r11,r11,r2
2136	eor	r2,r9,r10
2137	eor	r0,r8,r8,ror#5
2138	vext.8	q9,q0,q1,#4
2139	add	r4,r4,r12
2140	and	r2,r2,r8
2141	eor	r12,r0,r8,ror#19
2142	vshr.u32	q10,q8,#7
2143	eor	r0,r4,r4,ror#11
2144	eor	r2,r2,r10
2145	vadd.i32	q2,q2,q9
2146	add	r11,r11,r12,ror#6
2147	eor	r12,r4,r5
2148	vshr.u32	q9,q8,#3
2149	eor	r0,r0,r4,ror#20
2150	add	r11,r11,r2
2151	vsli.32	q10,q8,#25
2152	ldr	r2,[sp,#36]
2153	and	r3,r3,r12
2154	vshr.u32	q11,q8,#18
2155	add	r7,r7,r11
2156	add	r11,r11,r0,ror#2
2157	eor	r3,r3,r5
2158	veor	q9,q9,q10
2159	add	r10,r10,r2
2160	vsli.32	q11,q8,#14
2161	eor	r2,r8,r9
2162	eor	r0,r7,r7,ror#5
2163	vshr.u32	d24,d3,#17
2164	add	r11,r11,r3
2165	and	r2,r2,r7
2166	veor	q9,q9,q11
2167	eor	r3,r0,r7,ror#19
2168	eor	r0,r11,r11,ror#11
2169	vsli.32	d24,d3,#15
2170	eor	r2,r2,r9
2171	add	r10,r10,r3,ror#6
2172	vshr.u32	d25,d3,#10
2173	eor	r3,r11,r4
2174	eor	r0,r0,r11,ror#20
2175	vadd.i32	q2,q2,q9
2176	add	r10,r10,r2
2177	ldr	r2,[sp,#40]
2178	veor	d25,d25,d24
2179	and	r12,r12,r3
2180	add	r6,r6,r10
2181	vshr.u32	d24,d3,#19
2182	add	r10,r10,r0,ror#2
2183	eor	r12,r12,r4
2184	vsli.32	d24,d3,#13
2185	add	r9,r9,r2
2186	eor	r2,r7,r8
2187	veor	d25,d25,d24
2188	eor	r0,r6,r6,ror#5
2189	add	r10,r10,r12
2190	vadd.i32	d4,d4,d25
2191	and	r2,r2,r6
2192	eor	r12,r0,r6,ror#19
2193	vshr.u32	d24,d4,#17
2194	eor	r0,r10,r10,ror#11
2195	eor	r2,r2,r8
2196	vsli.32	d24,d4,#15
2197	add	r9,r9,r12,ror#6
2198	eor	r12,r10,r11
2199	vshr.u32	d25,d4,#10
2200	eor	r0,r0,r10,ror#20
2201	add	r9,r9,r2
2202	veor	d25,d25,d24
2203	ldr	r2,[sp,#44]
2204	and	r3,r3,r12
2205	vshr.u32	d24,d4,#19
2206	add	r5,r5,r9
2207	add	r9,r9,r0,ror#2
2208	eor	r3,r3,r11
2209	vld1.32	{q8},[r14,:128]!
2210	add	r8,r8,r2
2211	vsli.32	d24,d4,#13
2212	eor	r2,r6,r7
2213	eor	r0,r5,r5,ror#5
2214	veor	d25,d25,d24
2215	add	r9,r9,r3
2216	and	r2,r2,r5
2217	vadd.i32	d5,d5,d25
2218	eor	r3,r0,r5,ror#19
2219	eor	r0,r9,r9,ror#11
2220	vadd.i32	q8,q8,q2
2221	eor	r2,r2,r7
2222	add	r8,r8,r3,ror#6
2223	eor	r3,r9,r10
2224	eor	r0,r0,r9,ror#20
2225	add	r8,r8,r2
2226	ldr	r2,[sp,#48]
2227	and	r12,r12,r3
2228	add	r4,r4,r8
2229	vst1.32	{q8},[r1,:128]!
2230	add	r8,r8,r0,ror#2
2231	eor	r12,r12,r10
2232	vext.8	q8,q3,q0,#4
2233	add	r7,r7,r2
2234	eor	r2,r5,r6
2235	eor	r0,r4,r4,ror#5
2236	vext.8	q9,q1,q2,#4
2237	add	r8,r8,r12
2238	and	r2,r2,r4
2239	eor	r12,r0,r4,ror#19
2240	vshr.u32	q10,q8,#7
2241	eor	r0,r8,r8,ror#11
2242	eor	r2,r2,r6
2243	vadd.i32	q3,q3,q9
2244	add	r7,r7,r12,ror#6
2245	eor	r12,r8,r9
2246	vshr.u32	q9,q8,#3
2247	eor	r0,r0,r8,ror#20
2248	add	r7,r7,r2
2249	vsli.32	q10,q8,#25
2250	ldr	r2,[sp,#52]
2251	and	r3,r3,r12
2252	vshr.u32	q11,q8,#18
2253	add	r11,r11,r7
2254	add	r7,r7,r0,ror#2
2255	eor	r3,r3,r9
2256	veor	q9,q9,q10
2257	add	r6,r6,r2
2258	vsli.32	q11,q8,#14
2259	eor	r2,r4,r5
2260	eor	r0,r11,r11,ror#5
2261	vshr.u32	d24,d5,#17
2262	add	r7,r7,r3
2263	and	r2,r2,r11
2264	veor	q9,q9,q11
2265	eor	r3,r0,r11,ror#19
2266	eor	r0,r7,r7,ror#11
2267	vsli.32	d24,d5,#15
2268	eor	r2,r2,r5
2269	add	r6,r6,r3,ror#6
2270	vshr.u32	d25,d5,#10
2271	eor	r3,r7,r8
2272	eor	r0,r0,r7,ror#20
2273	vadd.i32	q3,q3,q9
2274	add	r6,r6,r2
2275	ldr	r2,[sp,#56]
2276	veor	d25,d25,d24
2277	and	r12,r12,r3
2278	add	r10,r10,r6
2279	vshr.u32	d24,d5,#19
2280	add	r6,r6,r0,ror#2
2281	eor	r12,r12,r8
2282	vsli.32	d24,d5,#13
2283	add	r5,r5,r2
2284	eor	r2,r11,r4
2285	veor	d25,d25,d24
2286	eor	r0,r10,r10,ror#5
2287	add	r6,r6,r12
2288	vadd.i32	d6,d6,d25
2289	and	r2,r2,r10
2290	eor	r12,r0,r10,ror#19
2291	vshr.u32	d24,d6,#17
2292	eor	r0,r6,r6,ror#11
2293	eor	r2,r2,r4
2294	vsli.32	d24,d6,#15
2295	add	r5,r5,r12,ror#6
2296	eor	r12,r6,r7
2297	vshr.u32	d25,d6,#10
2298	eor	r0,r0,r6,ror#20
2299	add	r5,r5,r2
2300	veor	d25,d25,d24
2301	ldr	r2,[sp,#60]
2302	and	r3,r3,r12
2303	vshr.u32	d24,d6,#19
2304	add	r9,r9,r5
2305	add	r5,r5,r0,ror#2
2306	eor	r3,r3,r7
2307	vld1.32	{q8},[r14,:128]!
2308	add	r4,r4,r2
2309	vsli.32	d24,d6,#13
2310	eor	r2,r10,r11
2311	eor	r0,r9,r9,ror#5
2312	veor	d25,d25,d24
2313	add	r5,r5,r3
2314	and	r2,r2,r9
2315	vadd.i32	d7,d7,d25
2316	eor	r3,r0,r9,ror#19
2317	eor	r0,r5,r5,ror#11
2318	vadd.i32	q8,q8,q3
2319	eor	r2,r2,r11
2320	add	r4,r4,r3,ror#6
2321	eor	r3,r5,r6
2322	eor	r0,r0,r5,ror#20
2323	add	r4,r4,r2
2324	ldr	r2,[r14]
2325	and	r12,r12,r3
2326	add	r8,r8,r4
2327	vst1.32	{q8},[r1,:128]!
2328	add	r4,r4,r0,ror#2
2329	eor	r12,r12,r6
2330	teq	r2,#0				@ check for K256 terminator
2331	ldr	r2,[sp,#0]
2332	sub	r1,r1,#64
2333	bne	.L_00_48
2334
2335	ldr	r1,[sp,#68]
2336	ldr	r0,[sp,#72]
2337	sub	r14,r14,#256	@ rewind r14
2338	teq	r1,r0
2339	it	eq
2340	subeq	r1,r1,#64		@ avoid SEGV
2341	vld1.8	{q0},[r1]!		@ load next input block
2342	vld1.8	{q1},[r1]!
2343	vld1.8	{q2},[r1]!
2344	vld1.8	{q3},[r1]!
2345	it	ne
2346	strne	r1,[sp,#68]
2347	mov	r1,sp
2348	add	r11,r11,r2
2349	eor	r2,r9,r10
2350	eor	r0,r8,r8,ror#5
2351	add	r4,r4,r12
2352	vld1.32	{q8},[r14,:128]!
2353	and	r2,r2,r8
2354	eor	r12,r0,r8,ror#19
2355	eor	r0,r4,r4,ror#11
2356	eor	r2,r2,r10
2357	vrev32.8	q0,q0
2358	add	r11,r11,r12,ror#6
2359	eor	r12,r4,r5
2360	eor	r0,r0,r4,ror#20
2361	add	r11,r11,r2
2362	vadd.i32	q8,q8,q0
2363	ldr	r2,[sp,#4]
2364	and	r3,r3,r12
2365	add	r7,r7,r11
2366	add	r11,r11,r0,ror#2
2367	eor	r3,r3,r5
2368	add	r10,r10,r2
2369	eor	r2,r8,r9
2370	eor	r0,r7,r7,ror#5
2371	add	r11,r11,r3
2372	and	r2,r2,r7
2373	eor	r3,r0,r7,ror#19
2374	eor	r0,r11,r11,ror#11
2375	eor	r2,r2,r9
2376	add	r10,r10,r3,ror#6
2377	eor	r3,r11,r4
2378	eor	r0,r0,r11,ror#20
2379	add	r10,r10,r2
2380	ldr	r2,[sp,#8]
2381	and	r12,r12,r3
2382	add	r6,r6,r10
2383	add	r10,r10,r0,ror#2
2384	eor	r12,r12,r4
2385	add	r9,r9,r2
2386	eor	r2,r7,r8
2387	eor	r0,r6,r6,ror#5
2388	add	r10,r10,r12
2389	and	r2,r2,r6
2390	eor	r12,r0,r6,ror#19
2391	eor	r0,r10,r10,ror#11
2392	eor	r2,r2,r8
2393	add	r9,r9,r12,ror#6
2394	eor	r12,r10,r11
2395	eor	r0,r0,r10,ror#20
2396	add	r9,r9,r2
2397	ldr	r2,[sp,#12]
2398	and	r3,r3,r12
2399	add	r5,r5,r9
2400	add	r9,r9,r0,ror#2
2401	eor	r3,r3,r11
2402	add	r8,r8,r2
2403	eor	r2,r6,r7
2404	eor	r0,r5,r5,ror#5
2405	add	r9,r9,r3
2406	and	r2,r2,r5
2407	eor	r3,r0,r5,ror#19
2408	eor	r0,r9,r9,ror#11
2409	eor	r2,r2,r7
2410	add	r8,r8,r3,ror#6
2411	eor	r3,r9,r10
2412	eor	r0,r0,r9,ror#20
2413	add	r8,r8,r2
2414	ldr	r2,[sp,#16]
2415	and	r12,r12,r3
2416	add	r4,r4,r8
2417	add	r8,r8,r0,ror#2
2418	eor	r12,r12,r10
2419	vst1.32	{q8},[r1,:128]!
2420	add	r7,r7,r2
2421	eor	r2,r5,r6
2422	eor	r0,r4,r4,ror#5
2423	add	r8,r8,r12
2424	vld1.32	{q8},[r14,:128]!
2425	and	r2,r2,r4
2426	eor	r12,r0,r4,ror#19
2427	eor	r0,r8,r8,ror#11
2428	eor	r2,r2,r6
2429	vrev32.8	q1,q1
2430	add	r7,r7,r12,ror#6
2431	eor	r12,r8,r9
2432	eor	r0,r0,r8,ror#20
2433	add	r7,r7,r2
2434	vadd.i32	q8,q8,q1
2435	ldr	r2,[sp,#20]
2436	and	r3,r3,r12
2437	add	r11,r11,r7
2438	add	r7,r7,r0,ror#2
2439	eor	r3,r3,r9
2440	add	r6,r6,r2
2441	eor	r2,r4,r5
2442	eor	r0,r11,r11,ror#5
2443	add	r7,r7,r3
2444	and	r2,r2,r11
2445	eor	r3,r0,r11,ror#19
2446	eor	r0,r7,r7,ror#11
2447	eor	r2,r2,r5
2448	add	r6,r6,r3,ror#6
2449	eor	r3,r7,r8
2450	eor	r0,r0,r7,ror#20
2451	add	r6,r6,r2
2452	ldr	r2,[sp,#24]
2453	and	r12,r12,r3
2454	add	r10,r10,r6
2455	add	r6,r6,r0,ror#2
2456	eor	r12,r12,r8
2457	add	r5,r5,r2
2458	eor	r2,r11,r4
2459	eor	r0,r10,r10,ror#5
2460	add	r6,r6,r12
2461	and	r2,r2,r10
2462	eor	r12,r0,r10,ror#19
2463	eor	r0,r6,r6,ror#11
2464	eor	r2,r2,r4
2465	add	r5,r5,r12,ror#6
2466	eor	r12,r6,r7
2467	eor	r0,r0,r6,ror#20
2468	add	r5,r5,r2
2469	ldr	r2,[sp,#28]
2470	and	r3,r3,r12
2471	add	r9,r9,r5
2472	add	r5,r5,r0,ror#2
2473	eor	r3,r3,r7
2474	add	r4,r4,r2
2475	eor	r2,r10,r11
2476	eor	r0,r9,r9,ror#5
2477	add	r5,r5,r3
2478	and	r2,r2,r9
2479	eor	r3,r0,r9,ror#19
2480	eor	r0,r5,r5,ror#11
2481	eor	r2,r2,r11
2482	add	r4,r4,r3,ror#6
2483	eor	r3,r5,r6
2484	eor	r0,r0,r5,ror#20
2485	add	r4,r4,r2
2486	ldr	r2,[sp,#32]
2487	and	r12,r12,r3
2488	add	r8,r8,r4
2489	add	r4,r4,r0,ror#2
2490	eor	r12,r12,r6
2491	vst1.32	{q8},[r1,:128]!
2492	add	r11,r11,r2
2493	eor	r2,r9,r10
2494	eor	r0,r8,r8,ror#5
2495	add	r4,r4,r12
2496	vld1.32	{q8},[r14,:128]!
2497	and	r2,r2,r8
2498	eor	r12,r0,r8,ror#19
2499	eor	r0,r4,r4,ror#11
2500	eor	r2,r2,r10
2501	vrev32.8	q2,q2
2502	add	r11,r11,r12,ror#6
2503	eor	r12,r4,r5
2504	eor	r0,r0,r4,ror#20
2505	add	r11,r11,r2
2506	vadd.i32	q8,q8,q2
2507	ldr	r2,[sp,#36]
2508	and	r3,r3,r12
2509	add	r7,r7,r11
2510	add	r11,r11,r0,ror#2
2511	eor	r3,r3,r5
2512	add	r10,r10,r2
2513	eor	r2,r8,r9
2514	eor	r0,r7,r7,ror#5
2515	add	r11,r11,r3
2516	and	r2,r2,r7
2517	eor	r3,r0,r7,ror#19
2518	eor	r0,r11,r11,ror#11
2519	eor	r2,r2,r9
2520	add	r10,r10,r3,ror#6
2521	eor	r3,r11,r4
2522	eor	r0,r0,r11,ror#20
2523	add	r10,r10,r2
2524	ldr	r2,[sp,#40]
2525	and	r12,r12,r3
2526	add	r6,r6,r10
2527	add	r10,r10,r0,ror#2
2528	eor	r12,r12,r4
2529	add	r9,r9,r2
2530	eor	r2,r7,r8
2531	eor	r0,r6,r6,ror#5
2532	add	r10,r10,r12
2533	and	r2,r2,r6
2534	eor	r12,r0,r6,ror#19
2535	eor	r0,r10,r10,ror#11
2536	eor	r2,r2,r8
2537	add	r9,r9,r12,ror#6
2538	eor	r12,r10,r11
2539	eor	r0,r0,r10,ror#20
2540	add	r9,r9,r2
2541	ldr	r2,[sp,#44]
2542	and	r3,r3,r12
2543	add	r5,r5,r9
2544	add	r9,r9,r0,ror#2
2545	eor	r3,r3,r11
2546	add	r8,r8,r2
2547	eor	r2,r6,r7
2548	eor	r0,r5,r5,ror#5
2549	add	r9,r9,r3
2550	and	r2,r2,r5
2551	eor	r3,r0,r5,ror#19
2552	eor	r0,r9,r9,ror#11
2553	eor	r2,r2,r7
2554	add	r8,r8,r3,ror#6
2555	eor	r3,r9,r10
2556	eor	r0,r0,r9,ror#20
2557	add	r8,r8,r2
2558	ldr	r2,[sp,#48]
2559	and	r12,r12,r3
2560	add	r4,r4,r8
2561	add	r8,r8,r0,ror#2
2562	eor	r12,r12,r10
2563	vst1.32	{q8},[r1,:128]!
2564	add	r7,r7,r2
2565	eor	r2,r5,r6
2566	eor	r0,r4,r4,ror#5
2567	add	r8,r8,r12
2568	vld1.32	{q8},[r14,:128]!
2569	and	r2,r2,r4
2570	eor	r12,r0,r4,ror#19
2571	eor	r0,r8,r8,ror#11
2572	eor	r2,r2,r6
2573	vrev32.8	q3,q3
2574	add	r7,r7,r12,ror#6
2575	eor	r12,r8,r9
2576	eor	r0,r0,r8,ror#20
2577	add	r7,r7,r2
2578	vadd.i32	q8,q8,q3
2579	ldr	r2,[sp,#52]
2580	and	r3,r3,r12
2581	add	r11,r11,r7
2582	add	r7,r7,r0,ror#2
2583	eor	r3,r3,r9
2584	add	r6,r6,r2
2585	eor	r2,r4,r5
2586	eor	r0,r11,r11,ror#5
2587	add	r7,r7,r3
2588	and	r2,r2,r11
2589	eor	r3,r0,r11,ror#19
2590	eor	r0,r7,r7,ror#11
2591	eor	r2,r2,r5
2592	add	r6,r6,r3,ror#6
2593	eor	r3,r7,r8
2594	eor	r0,r0,r7,ror#20
2595	add	r6,r6,r2
2596	ldr	r2,[sp,#56]
2597	and	r12,r12,r3
2598	add	r10,r10,r6
2599	add	r6,r6,r0,ror#2
2600	eor	r12,r12,r8
2601	add	r5,r5,r2
2602	eor	r2,r11,r4
2603	eor	r0,r10,r10,ror#5
2604	add	r6,r6,r12
2605	and	r2,r2,r10
2606	eor	r12,r0,r10,ror#19
2607	eor	r0,r6,r6,ror#11
2608	eor	r2,r2,r4
2609	add	r5,r5,r12,ror#6
2610	eor	r12,r6,r7
2611	eor	r0,r0,r6,ror#20
2612	add	r5,r5,r2
2613	ldr	r2,[sp,#60]
2614	and	r3,r3,r12
2615	add	r9,r9,r5
2616	add	r5,r5,r0,ror#2
2617	eor	r3,r3,r7
2618	add	r4,r4,r2
2619	eor	r2,r10,r11
2620	eor	r0,r9,r9,ror#5
2621	add	r5,r5,r3
2622	and	r2,r2,r9
2623	eor	r3,r0,r9,ror#19
2624	eor	r0,r5,r5,ror#11
2625	eor	r2,r2,r11
2626	add	r4,r4,r3,ror#6
2627	eor	r3,r5,r6
2628	eor	r0,r0,r5,ror#20
2629	add	r4,r4,r2
2630	ldr	r2,[sp,#64]
2631	and	r12,r12,r3
2632	add	r8,r8,r4
2633	add	r4,r4,r0,ror#2
2634	eor	r12,r12,r6
2635	vst1.32	{q8},[r1,:128]!
2636	ldr	r0,[r2,#0]
2637	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2638	ldr	r12,[r2,#4]
2639	ldr	r3,[r2,#8]
2640	ldr	r1,[r2,#12]
2641	add	r4,r4,r0			@ accumulate
2642	ldr	r0,[r2,#16]
2643	add	r5,r5,r12
2644	ldr	r12,[r2,#20]
2645	add	r6,r6,r3
2646	ldr	r3,[r2,#24]
2647	add	r7,r7,r1
2648	ldr	r1,[r2,#28]
2649	add	r8,r8,r0
2650	str	r4,[r2],#4
2651	add	r9,r9,r12
2652	str	r5,[r2],#4
2653	add	r10,r10,r3
2654	str	r6,[r2],#4
2655	add	r11,r11,r1
2656	str	r7,[r2],#4
2657	stmia	r2,{r8,r9,r10,r11}
2658
2659	ittte	ne
2660	movne	r1,sp
2661	ldrne	r2,[sp,#0]
2662	eorne	r12,r12,r12
2663	ldreq	sp,[sp,#76]			@ restore original sp
2664	itt	ne
2665	eorne	r3,r5,r6
2666	bne	.L_00_48
2667
2668	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2669.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2670#endif
2671#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2672
2673# if defined(__thumb2__)
2674#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2675# else
2676#  define INST(a,b,c,d)	.byte	a,b,c,d
2677# endif
2678
2679.type	sha256_block_data_order_armv8,%function
2680.align	5
2681sha256_block_data_order_armv8:
2682.LARMv8:
2683	vld1.32	{q0,q1},[r0]
2684	sub	r3,r3,#256+32
2685	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2686	b	.Loop_v8
2687
2688.align	4
2689.Loop_v8:
2690	vld1.8	{q8,q9},[r1]!
2691	vld1.8	{q10,q11},[r1]!
2692	vld1.32	{q12},[r3]!
2693	vrev32.8	q8,q8
2694	vrev32.8	q9,q9
2695	vrev32.8	q10,q10
2696	vrev32.8	q11,q11
2697	vmov	q14,q0	@ offload
2698	vmov	q15,q1
2699	teq	r1,r2
2700	vld1.32	{q13},[r3]!
2701	vadd.i32	q12,q12,q8
2702	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2703	vmov	q2,q0
2704	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2705	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2706	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2707	vld1.32	{q12},[r3]!
2708	vadd.i32	q13,q13,q9
2709	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2710	vmov	q2,q0
2711	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2712	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2713	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2714	vld1.32	{q13},[r3]!
2715	vadd.i32	q12,q12,q10
2716	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2717	vmov	q2,q0
2718	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2719	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2720	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2721	vld1.32	{q12},[r3]!
2722	vadd.i32	q13,q13,q11
2723	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2724	vmov	q2,q0
2725	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2726	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2727	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2728	vld1.32	{q13},[r3]!
2729	vadd.i32	q12,q12,q8
2730	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2731	vmov	q2,q0
2732	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2733	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2734	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2735	vld1.32	{q12},[r3]!
2736	vadd.i32	q13,q13,q9
2737	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2738	vmov	q2,q0
2739	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2740	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2741	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2742	vld1.32	{q13},[r3]!
2743	vadd.i32	q12,q12,q10
2744	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2745	vmov	q2,q0
2746	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2747	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2748	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2749	vld1.32	{q12},[r3]!
2750	vadd.i32	q13,q13,q11
2751	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2752	vmov	q2,q0
2753	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2754	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2755	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2756	vld1.32	{q13},[r3]!
2757	vadd.i32	q12,q12,q8
2758	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2759	vmov	q2,q0
2760	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2761	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2762	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2763	vld1.32	{q12},[r3]!
2764	vadd.i32	q13,q13,q9
2765	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2766	vmov	q2,q0
2767	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2768	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2769	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2770	vld1.32	{q13},[r3]!
2771	vadd.i32	q12,q12,q10
2772	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2773	vmov	q2,q0
2774	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2775	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2776	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2777	vld1.32	{q12},[r3]!
2778	vadd.i32	q13,q13,q11
2779	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2780	vmov	q2,q0
2781	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2782	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2783	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2784	vld1.32	{q13},[r3]!
2785	vadd.i32	q12,q12,q8
2786	vmov	q2,q0
2787	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2788	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2789
2790	vld1.32	{q12},[r3]!
2791	vadd.i32	q13,q13,q9
2792	vmov	q2,q0
2793	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2794	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2795
2796	vld1.32	{q13},[r3]
2797	vadd.i32	q12,q12,q10
2798	sub	r3,r3,#256-16	@ rewind
2799	vmov	q2,q0
2800	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2801	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2802
2803	vadd.i32	q13,q13,q11
2804	vmov	q2,q0
2805	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2806	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2807
2808	vadd.i32	q0,q0,q14
2809	vadd.i32	q1,q1,q15
2810	it	ne
2811	bne	.Loop_v8
2812
2813	vst1.32	{q0,q1},[r0]
2814
2815	bx	lr		@ bx lr
2816.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2817#endif
2818.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2819.align	2
2820.align	2
2821#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2822.comm	OPENSSL_armcap_P,4,4
2823#endif
2824