xref: /freebsd/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S (revision e64fe029e9d3ce476e77a478318e0c3cd201ff08)
1/*
2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19 * - modified assembly to fit into OpenZFS
20 */
21
22#if defined(__arm__)
23
24#define	__ARM_ARCH__      7
25#define	__ARM_MAX_ARCH__  7
26
27#if defined(__thumb2__)
28.syntax unified
29.thumb
30#else
31.code   32
32#endif
33
34.text
35
36.type	K256,%object
37.align	5
38K256:
39.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
40.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
41.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
42.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
43.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
44.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
45.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
46.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
47.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
48.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
49.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
50.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
51.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
52.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
53.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
54.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
55.size	K256,.-K256
56.word	0				@ terminator
57
58.align	5
59.globl	zfs_sha256_block_armv7
60.type	zfs_sha256_block_armv7,%function
61zfs_sha256_block_armv7:
62.Lzfs_sha256_block_armv7:
63
64#if __ARM_ARCH__<7 && !defined(__thumb2__)
65	sub	r3,pc,#8		@ zfs_sha256_block_armv7
66#else
67	adr	r3,.Lzfs_sha256_block_armv7
68#endif
69
70	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
71	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
72	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
73	sub	r14,r3,#256+32	@ K256
74	sub	sp,sp,#16*4		@ alloca(X[16])
75.Loop:
76# if __ARM_ARCH__>=7
77	ldr	r2,[r1],#4
78# else
79	ldrb	r2,[r1,#3]
80# endif
81	eor	r3,r5,r6		@ magic
82	eor	r12,r12,r12
83#if __ARM_ARCH__>=7
84	@ ldr	r2,[r1],#4			@ 0
85# if 0==15
86	str	r1,[sp,#17*4]			@ make room for r1
87# endif
88	eor	r0,r8,r8,ror#5
89	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
90	eor	r0,r0,r8,ror#19	@ Sigma1(e)
91# ifndef __ARMEB__
92	rev	r2,r2
93# endif
94#else
95	@ ldrb	r2,[r1,#3]			@ 0
96	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
97	ldrb	r12,[r1,#2]
98	ldrb	r0,[r1,#1]
99	orr	r2,r2,r12,lsl#8
100	ldrb	r12,[r1],#4
101	orr	r2,r2,r0,lsl#16
102# if 0==15
103	str	r1,[sp,#17*4]			@ make room for r1
104# endif
105	eor	r0,r8,r8,ror#5
106	orr	r2,r2,r12,lsl#24
107	eor	r0,r0,r8,ror#19	@ Sigma1(e)
108#endif
109	ldr	r12,[r14],#4			@ *K256++
110	add	r11,r11,r2			@ h+=X[i]
111	str	r2,[sp,#0*4]
112	eor	r2,r9,r10
113	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
114	and	r2,r2,r8
115	add	r11,r11,r12			@ h+=K256[i]
116	eor	r2,r2,r10			@ Ch(e,f,g)
117	eor	r0,r4,r4,ror#11
118	add	r11,r11,r2			@ h+=Ch(e,f,g)
119#if 0==31
120	and	r12,r12,#0xff
121	cmp	r12,#0xf2			@ done?
122#endif
123#if 0<15
124# if __ARM_ARCH__>=7
125	ldr	r2,[r1],#4			@ prefetch
126# else
127	ldrb	r2,[r1,#3]
128# endif
129	eor	r12,r4,r5			@ a^b, b^c in next round
130#else
131	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
132	eor	r12,r4,r5			@ a^b, b^c in next round
133	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
134#endif
135	eor	r0,r0,r4,ror#20	@ Sigma0(a)
136	and	r3,r3,r12			@ (b^c)&=(a^b)
137	add	r7,r7,r11			@ d+=h
138	eor	r3,r3,r5			@ Maj(a,b,c)
139	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
140	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
141#if __ARM_ARCH__>=7
142	@ ldr	r2,[r1],#4			@ 1
143# if 1==15
144	str	r1,[sp,#17*4]			@ make room for r1
145# endif
146	eor	r0,r7,r7,ror#5
147	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
148	eor	r0,r0,r7,ror#19	@ Sigma1(e)
149# ifndef __ARMEB__
150	rev	r2,r2
151# endif
152#else
153	@ ldrb	r2,[r1,#3]			@ 1
154	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
155	ldrb	r3,[r1,#2]
156	ldrb	r0,[r1,#1]
157	orr	r2,r2,r3,lsl#8
158	ldrb	r3,[r1],#4
159	orr	r2,r2,r0,lsl#16
160# if 1==15
161	str	r1,[sp,#17*4]			@ make room for r1
162# endif
163	eor	r0,r7,r7,ror#5
164	orr	r2,r2,r3,lsl#24
165	eor	r0,r0,r7,ror#19	@ Sigma1(e)
166#endif
167	ldr	r3,[r14],#4			@ *K256++
168	add	r10,r10,r2			@ h+=X[i]
169	str	r2,[sp,#1*4]
170	eor	r2,r8,r9
171	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
172	and	r2,r2,r7
173	add	r10,r10,r3			@ h+=K256[i]
174	eor	r2,r2,r9			@ Ch(e,f,g)
175	eor	r0,r11,r11,ror#11
176	add	r10,r10,r2			@ h+=Ch(e,f,g)
177#if 1==31
178	and	r3,r3,#0xff
179	cmp	r3,#0xf2			@ done?
180#endif
181#if 1<15
182# if __ARM_ARCH__>=7
183	ldr	r2,[r1],#4			@ prefetch
184# else
185	ldrb	r2,[r1,#3]
186# endif
187	eor	r3,r11,r4			@ a^b, b^c in next round
188#else
189	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
190	eor	r3,r11,r4			@ a^b, b^c in next round
191	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
192#endif
193	eor	r0,r0,r11,ror#20	@ Sigma0(a)
194	and	r12,r12,r3			@ (b^c)&=(a^b)
195	add	r6,r6,r10			@ d+=h
196	eor	r12,r12,r4			@ Maj(a,b,c)
197	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
198	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
199#if __ARM_ARCH__>=7
200	@ ldr	r2,[r1],#4			@ 2
201# if 2==15
202	str	r1,[sp,#17*4]			@ make room for r1
203# endif
204	eor	r0,r6,r6,ror#5
205	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
206	eor	r0,r0,r6,ror#19	@ Sigma1(e)
207# ifndef __ARMEB__
208	rev	r2,r2
209# endif
210#else
211	@ ldrb	r2,[r1,#3]			@ 2
212	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
213	ldrb	r12,[r1,#2]
214	ldrb	r0,[r1,#1]
215	orr	r2,r2,r12,lsl#8
216	ldrb	r12,[r1],#4
217	orr	r2,r2,r0,lsl#16
218# if 2==15
219	str	r1,[sp,#17*4]			@ make room for r1
220# endif
221	eor	r0,r6,r6,ror#5
222	orr	r2,r2,r12,lsl#24
223	eor	r0,r0,r6,ror#19	@ Sigma1(e)
224#endif
225	ldr	r12,[r14],#4			@ *K256++
226	add	r9,r9,r2			@ h+=X[i]
227	str	r2,[sp,#2*4]
228	eor	r2,r7,r8
229	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
230	and	r2,r2,r6
231	add	r9,r9,r12			@ h+=K256[i]
232	eor	r2,r2,r8			@ Ch(e,f,g)
233	eor	r0,r10,r10,ror#11
234	add	r9,r9,r2			@ h+=Ch(e,f,g)
235#if 2==31
236	and	r12,r12,#0xff
237	cmp	r12,#0xf2			@ done?
238#endif
239#if 2<15
240# if __ARM_ARCH__>=7
241	ldr	r2,[r1],#4			@ prefetch
242# else
243	ldrb	r2,[r1,#3]
244# endif
245	eor	r12,r10,r11			@ a^b, b^c in next round
246#else
247	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
248	eor	r12,r10,r11			@ a^b, b^c in next round
249	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
250#endif
251	eor	r0,r0,r10,ror#20	@ Sigma0(a)
252	and	r3,r3,r12			@ (b^c)&=(a^b)
253	add	r5,r5,r9			@ d+=h
254	eor	r3,r3,r11			@ Maj(a,b,c)
255	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
256	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
257#if __ARM_ARCH__>=7
258	@ ldr	r2,[r1],#4			@ 3
259# if 3==15
260	str	r1,[sp,#17*4]			@ make room for r1
261# endif
262	eor	r0,r5,r5,ror#5
263	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
264	eor	r0,r0,r5,ror#19	@ Sigma1(e)
265# ifndef __ARMEB__
266	rev	r2,r2
267# endif
268#else
269	@ ldrb	r2,[r1,#3]			@ 3
270	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
271	ldrb	r3,[r1,#2]
272	ldrb	r0,[r1,#1]
273	orr	r2,r2,r3,lsl#8
274	ldrb	r3,[r1],#4
275	orr	r2,r2,r0,lsl#16
276# if 3==15
277	str	r1,[sp,#17*4]			@ make room for r1
278# endif
279	eor	r0,r5,r5,ror#5
280	orr	r2,r2,r3,lsl#24
281	eor	r0,r0,r5,ror#19	@ Sigma1(e)
282#endif
283	ldr	r3,[r14],#4			@ *K256++
284	add	r8,r8,r2			@ h+=X[i]
285	str	r2,[sp,#3*4]
286	eor	r2,r6,r7
287	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
288	and	r2,r2,r5
289	add	r8,r8,r3			@ h+=K256[i]
290	eor	r2,r2,r7			@ Ch(e,f,g)
291	eor	r0,r9,r9,ror#11
292	add	r8,r8,r2			@ h+=Ch(e,f,g)
293#if 3==31
294	and	r3,r3,#0xff
295	cmp	r3,#0xf2			@ done?
296#endif
297#if 3<15
298# if __ARM_ARCH__>=7
299	ldr	r2,[r1],#4			@ prefetch
300# else
301	ldrb	r2,[r1,#3]
302# endif
303	eor	r3,r9,r10			@ a^b, b^c in next round
304#else
305	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
306	eor	r3,r9,r10			@ a^b, b^c in next round
307	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
308#endif
309	eor	r0,r0,r9,ror#20	@ Sigma0(a)
310	and	r12,r12,r3			@ (b^c)&=(a^b)
311	add	r4,r4,r8			@ d+=h
312	eor	r12,r12,r10			@ Maj(a,b,c)
313	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
314	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
315#if __ARM_ARCH__>=7
316	@ ldr	r2,[r1],#4			@ 4
317# if 4==15
318	str	r1,[sp,#17*4]			@ make room for r1
319# endif
320	eor	r0,r4,r4,ror#5
321	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
322	eor	r0,r0,r4,ror#19	@ Sigma1(e)
323# ifndef __ARMEB__
324	rev	r2,r2
325# endif
326#else
327	@ ldrb	r2,[r1,#3]			@ 4
328	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
329	ldrb	r12,[r1,#2]
330	ldrb	r0,[r1,#1]
331	orr	r2,r2,r12,lsl#8
332	ldrb	r12,[r1],#4
333	orr	r2,r2,r0,lsl#16
334# if 4==15
335	str	r1,[sp,#17*4]			@ make room for r1
336# endif
337	eor	r0,r4,r4,ror#5
338	orr	r2,r2,r12,lsl#24
339	eor	r0,r0,r4,ror#19	@ Sigma1(e)
340#endif
341	ldr	r12,[r14],#4			@ *K256++
342	add	r7,r7,r2			@ h+=X[i]
343	str	r2,[sp,#4*4]
344	eor	r2,r5,r6
345	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
346	and	r2,r2,r4
347	add	r7,r7,r12			@ h+=K256[i]
348	eor	r2,r2,r6			@ Ch(e,f,g)
349	eor	r0,r8,r8,ror#11
350	add	r7,r7,r2			@ h+=Ch(e,f,g)
351#if 4==31
352	and	r12,r12,#0xff
353	cmp	r12,#0xf2			@ done?
354#endif
355#if 4<15
356# if __ARM_ARCH__>=7
357	ldr	r2,[r1],#4			@ prefetch
358# else
359	ldrb	r2,[r1,#3]
360# endif
361	eor	r12,r8,r9			@ a^b, b^c in next round
362#else
363	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
364	eor	r12,r8,r9			@ a^b, b^c in next round
365	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
366#endif
367	eor	r0,r0,r8,ror#20	@ Sigma0(a)
368	and	r3,r3,r12			@ (b^c)&=(a^b)
369	add	r11,r11,r7			@ d+=h
370	eor	r3,r3,r9			@ Maj(a,b,c)
371	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
372	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
373#if __ARM_ARCH__>=7
374	@ ldr	r2,[r1],#4			@ 5
375# if 5==15
376	str	r1,[sp,#17*4]			@ make room for r1
377# endif
378	eor	r0,r11,r11,ror#5
379	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
380	eor	r0,r0,r11,ror#19	@ Sigma1(e)
381# ifndef __ARMEB__
382	rev	r2,r2
383# endif
384#else
385	@ ldrb	r2,[r1,#3]			@ 5
386	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
387	ldrb	r3,[r1,#2]
388	ldrb	r0,[r1,#1]
389	orr	r2,r2,r3,lsl#8
390	ldrb	r3,[r1],#4
391	orr	r2,r2,r0,lsl#16
392# if 5==15
393	str	r1,[sp,#17*4]			@ make room for r1
394# endif
395	eor	r0,r11,r11,ror#5
396	orr	r2,r2,r3,lsl#24
397	eor	r0,r0,r11,ror#19	@ Sigma1(e)
398#endif
399	ldr	r3,[r14],#4			@ *K256++
400	add	r6,r6,r2			@ h+=X[i]
401	str	r2,[sp,#5*4]
402	eor	r2,r4,r5
403	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
404	and	r2,r2,r11
405	add	r6,r6,r3			@ h+=K256[i]
406	eor	r2,r2,r5			@ Ch(e,f,g)
407	eor	r0,r7,r7,ror#11
408	add	r6,r6,r2			@ h+=Ch(e,f,g)
409#if 5==31
410	and	r3,r3,#0xff
411	cmp	r3,#0xf2			@ done?
412#endif
413#if 5<15
414# if __ARM_ARCH__>=7
415	ldr	r2,[r1],#4			@ prefetch
416# else
417	ldrb	r2,[r1,#3]
418# endif
419	eor	r3,r7,r8			@ a^b, b^c in next round
420#else
421	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
422	eor	r3,r7,r8			@ a^b, b^c in next round
423	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
424#endif
425	eor	r0,r0,r7,ror#20	@ Sigma0(a)
426	and	r12,r12,r3			@ (b^c)&=(a^b)
427	add	r10,r10,r6			@ d+=h
428	eor	r12,r12,r8			@ Maj(a,b,c)
429	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
430	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
431#if __ARM_ARCH__>=7
432	@ ldr	r2,[r1],#4			@ 6
433# if 6==15
434	str	r1,[sp,#17*4]			@ make room for r1
435# endif
436	eor	r0,r10,r10,ror#5
437	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
438	eor	r0,r0,r10,ror#19	@ Sigma1(e)
439# ifndef __ARMEB__
440	rev	r2,r2
441# endif
442#else
443	@ ldrb	r2,[r1,#3]			@ 6
444	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
445	ldrb	r12,[r1,#2]
446	ldrb	r0,[r1,#1]
447	orr	r2,r2,r12,lsl#8
448	ldrb	r12,[r1],#4
449	orr	r2,r2,r0,lsl#16
450# if 6==15
451	str	r1,[sp,#17*4]			@ make room for r1
452# endif
453	eor	r0,r10,r10,ror#5
454	orr	r2,r2,r12,lsl#24
455	eor	r0,r0,r10,ror#19	@ Sigma1(e)
456#endif
457	ldr	r12,[r14],#4			@ *K256++
458	add	r5,r5,r2			@ h+=X[i]
459	str	r2,[sp,#6*4]
460	eor	r2,r11,r4
461	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
462	and	r2,r2,r10
463	add	r5,r5,r12			@ h+=K256[i]
464	eor	r2,r2,r4			@ Ch(e,f,g)
465	eor	r0,r6,r6,ror#11
466	add	r5,r5,r2			@ h+=Ch(e,f,g)
467#if 6==31
468	and	r12,r12,#0xff
469	cmp	r12,#0xf2			@ done?
470#endif
471#if 6<15
472# if __ARM_ARCH__>=7
473	ldr	r2,[r1],#4			@ prefetch
474# else
475	ldrb	r2,[r1,#3]
476# endif
477	eor	r12,r6,r7			@ a^b, b^c in next round
478#else
479	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
480	eor	r12,r6,r7			@ a^b, b^c in next round
481	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
482#endif
483	eor	r0,r0,r6,ror#20	@ Sigma0(a)
484	and	r3,r3,r12			@ (b^c)&=(a^b)
485	add	r9,r9,r5			@ d+=h
486	eor	r3,r3,r7			@ Maj(a,b,c)
487	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
488	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
489#if __ARM_ARCH__>=7
490	@ ldr	r2,[r1],#4			@ 7
491# if 7==15
492	str	r1,[sp,#17*4]			@ make room for r1
493# endif
494	eor	r0,r9,r9,ror#5
495	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
496	eor	r0,r0,r9,ror#19	@ Sigma1(e)
497# ifndef __ARMEB__
498	rev	r2,r2
499# endif
500#else
501	@ ldrb	r2,[r1,#3]			@ 7
502	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
503	ldrb	r3,[r1,#2]
504	ldrb	r0,[r1,#1]
505	orr	r2,r2,r3,lsl#8
506	ldrb	r3,[r1],#4
507	orr	r2,r2,r0,lsl#16
508# if 7==15
509	str	r1,[sp,#17*4]			@ make room for r1
510# endif
511	eor	r0,r9,r9,ror#5
512	orr	r2,r2,r3,lsl#24
513	eor	r0,r0,r9,ror#19	@ Sigma1(e)
514#endif
515	ldr	r3,[r14],#4			@ *K256++
516	add	r4,r4,r2			@ h+=X[i]
517	str	r2,[sp,#7*4]
518	eor	r2,r10,r11
519	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
520	and	r2,r2,r9
521	add	r4,r4,r3			@ h+=K256[i]
522	eor	r2,r2,r11			@ Ch(e,f,g)
523	eor	r0,r5,r5,ror#11
524	add	r4,r4,r2			@ h+=Ch(e,f,g)
525#if 7==31
526	and	r3,r3,#0xff
527	cmp	r3,#0xf2			@ done?
528#endif
529#if 7<15
530# if __ARM_ARCH__>=7
531	ldr	r2,[r1],#4			@ prefetch
532# else
533	ldrb	r2,[r1,#3]
534# endif
535	eor	r3,r5,r6			@ a^b, b^c in next round
536#else
537	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
538	eor	r3,r5,r6			@ a^b, b^c in next round
539	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
540#endif
541	eor	r0,r0,r5,ror#20	@ Sigma0(a)
542	and	r12,r12,r3			@ (b^c)&=(a^b)
543	add	r8,r8,r4			@ d+=h
544	eor	r12,r12,r6			@ Maj(a,b,c)
545	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
546	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
547#if __ARM_ARCH__>=7
548	@ ldr	r2,[r1],#4			@ 8
549# if 8==15
550	str	r1,[sp,#17*4]			@ make room for r1
551# endif
552	eor	r0,r8,r8,ror#5
553	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
554	eor	r0,r0,r8,ror#19	@ Sigma1(e)
555# ifndef __ARMEB__
556	rev	r2,r2
557# endif
558#else
559	@ ldrb	r2,[r1,#3]			@ 8
560	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
561	ldrb	r12,[r1,#2]
562	ldrb	r0,[r1,#1]
563	orr	r2,r2,r12,lsl#8
564	ldrb	r12,[r1],#4
565	orr	r2,r2,r0,lsl#16
566# if 8==15
567	str	r1,[sp,#17*4]			@ make room for r1
568# endif
569	eor	r0,r8,r8,ror#5
570	orr	r2,r2,r12,lsl#24
571	eor	r0,r0,r8,ror#19	@ Sigma1(e)
572#endif
573	ldr	r12,[r14],#4			@ *K256++
574	add	r11,r11,r2			@ h+=X[i]
575	str	r2,[sp,#8*4]
576	eor	r2,r9,r10
577	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
578	and	r2,r2,r8
579	add	r11,r11,r12			@ h+=K256[i]
580	eor	r2,r2,r10			@ Ch(e,f,g)
581	eor	r0,r4,r4,ror#11
582	add	r11,r11,r2			@ h+=Ch(e,f,g)
583#if 8==31
584	and	r12,r12,#0xff
585	cmp	r12,#0xf2			@ done?
586#endif
587#if 8<15
588# if __ARM_ARCH__>=7
589	ldr	r2,[r1],#4			@ prefetch
590# else
591	ldrb	r2,[r1,#3]
592# endif
593	eor	r12,r4,r5			@ a^b, b^c in next round
594#else
595	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
596	eor	r12,r4,r5			@ a^b, b^c in next round
597	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
598#endif
599	eor	r0,r0,r4,ror#20	@ Sigma0(a)
600	and	r3,r3,r12			@ (b^c)&=(a^b)
601	add	r7,r7,r11			@ d+=h
602	eor	r3,r3,r5			@ Maj(a,b,c)
603	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
604	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
605#if __ARM_ARCH__>=7
606	@ ldr	r2,[r1],#4			@ 9
607# if 9==15
608	str	r1,[sp,#17*4]			@ make room for r1
609# endif
610	eor	r0,r7,r7,ror#5
611	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
612	eor	r0,r0,r7,ror#19	@ Sigma1(e)
613# ifndef __ARMEB__
614	rev	r2,r2
615# endif
616#else
617	@ ldrb	r2,[r1,#3]			@ 9
618	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
619	ldrb	r3,[r1,#2]
620	ldrb	r0,[r1,#1]
621	orr	r2,r2,r3,lsl#8
622	ldrb	r3,[r1],#4
623	orr	r2,r2,r0,lsl#16
624# if 9==15
625	str	r1,[sp,#17*4]			@ make room for r1
626# endif
627	eor	r0,r7,r7,ror#5
628	orr	r2,r2,r3,lsl#24
629	eor	r0,r0,r7,ror#19	@ Sigma1(e)
630#endif
631	ldr	r3,[r14],#4			@ *K256++
632	add	r10,r10,r2			@ h+=X[i]
633	str	r2,[sp,#9*4]
634	eor	r2,r8,r9
635	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
636	and	r2,r2,r7
637	add	r10,r10,r3			@ h+=K256[i]
638	eor	r2,r2,r9			@ Ch(e,f,g)
639	eor	r0,r11,r11,ror#11
640	add	r10,r10,r2			@ h+=Ch(e,f,g)
641#if 9==31
642	and	r3,r3,#0xff
643	cmp	r3,#0xf2			@ done?
644#endif
645#if 9<15
646# if __ARM_ARCH__>=7
647	ldr	r2,[r1],#4			@ prefetch
648# else
649	ldrb	r2,[r1,#3]
650# endif
651	eor	r3,r11,r4			@ a^b, b^c in next round
652#else
653	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
654	eor	r3,r11,r4			@ a^b, b^c in next round
655	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
656#endif
657	eor	r0,r0,r11,ror#20	@ Sigma0(a)
658	and	r12,r12,r3			@ (b^c)&=(a^b)
659	add	r6,r6,r10			@ d+=h
660	eor	r12,r12,r4			@ Maj(a,b,c)
661	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
662	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
663#if __ARM_ARCH__>=7
664	@ ldr	r2,[r1],#4			@ 10
665# if 10==15
666	str	r1,[sp,#17*4]			@ make room for r1
667# endif
668	eor	r0,r6,r6,ror#5
669	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
670	eor	r0,r0,r6,ror#19	@ Sigma1(e)
671# ifndef __ARMEB__
672	rev	r2,r2
673# endif
674#else
675	@ ldrb	r2,[r1,#3]			@ 10
676	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
677	ldrb	r12,[r1,#2]
678	ldrb	r0,[r1,#1]
679	orr	r2,r2,r12,lsl#8
680	ldrb	r12,[r1],#4
681	orr	r2,r2,r0,lsl#16
682# if 10==15
683	str	r1,[sp,#17*4]			@ make room for r1
684# endif
685	eor	r0,r6,r6,ror#5
686	orr	r2,r2,r12,lsl#24
687	eor	r0,r0,r6,ror#19	@ Sigma1(e)
688#endif
689	ldr	r12,[r14],#4			@ *K256++
690	add	r9,r9,r2			@ h+=X[i]
691	str	r2,[sp,#10*4]
692	eor	r2,r7,r8
693	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
694	and	r2,r2,r6
695	add	r9,r9,r12			@ h+=K256[i]
696	eor	r2,r2,r8			@ Ch(e,f,g)
697	eor	r0,r10,r10,ror#11
698	add	r9,r9,r2			@ h+=Ch(e,f,g)
699#if 10==31
700	and	r12,r12,#0xff
701	cmp	r12,#0xf2			@ done?
702#endif
703#if 10<15
704# if __ARM_ARCH__>=7
705	ldr	r2,[r1],#4			@ prefetch
706# else
707	ldrb	r2,[r1,#3]
708# endif
709	eor	r12,r10,r11			@ a^b, b^c in next round
710#else
711	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
712	eor	r12,r10,r11			@ a^b, b^c in next round
713	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
714#endif
715	eor	r0,r0,r10,ror#20	@ Sigma0(a)
716	and	r3,r3,r12			@ (b^c)&=(a^b)
717	add	r5,r5,r9			@ d+=h
718	eor	r3,r3,r11			@ Maj(a,b,c)
719	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
720	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
721#if __ARM_ARCH__>=7
722	@ ldr	r2,[r1],#4			@ 11
723# if 11==15
724	str	r1,[sp,#17*4]			@ make room for r1
725# endif
726	eor	r0,r5,r5,ror#5
727	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
728	eor	r0,r0,r5,ror#19	@ Sigma1(e)
729# ifndef __ARMEB__
730	rev	r2,r2
731# endif
732#else
733	@ ldrb	r2,[r1,#3]			@ 11
734	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
735	ldrb	r3,[r1,#2]
736	ldrb	r0,[r1,#1]
737	orr	r2,r2,r3,lsl#8
738	ldrb	r3,[r1],#4
739	orr	r2,r2,r0,lsl#16
740# if 11==15
741	str	r1,[sp,#17*4]			@ make room for r1
742# endif
743	eor	r0,r5,r5,ror#5
744	orr	r2,r2,r3,lsl#24
745	eor	r0,r0,r5,ror#19	@ Sigma1(e)
746#endif
747	ldr	r3,[r14],#4			@ *K256++
748	add	r8,r8,r2			@ h+=X[i]
749	str	r2,[sp,#11*4]
750	eor	r2,r6,r7
751	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
752	and	r2,r2,r5
753	add	r8,r8,r3			@ h+=K256[i]
754	eor	r2,r2,r7			@ Ch(e,f,g)
755	eor	r0,r9,r9,ror#11
756	add	r8,r8,r2			@ h+=Ch(e,f,g)
757#if 11==31
758	and	r3,r3,#0xff
759	cmp	r3,#0xf2			@ done?
760#endif
761#if 11<15
762# if __ARM_ARCH__>=7
763	ldr	r2,[r1],#4			@ prefetch
764# else
765	ldrb	r2,[r1,#3]
766# endif
767	eor	r3,r9,r10			@ a^b, b^c in next round
768#else
769	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
770	eor	r3,r9,r10			@ a^b, b^c in next round
771	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
772#endif
773	eor	r0,r0,r9,ror#20	@ Sigma0(a)
774	and	r12,r12,r3			@ (b^c)&=(a^b)
775	add	r4,r4,r8			@ d+=h
776	eor	r12,r12,r10			@ Maj(a,b,c)
777	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
778	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
779#if __ARM_ARCH__>=7
780	@ ldr	r2,[r1],#4			@ 12
781# if 12==15
782	str	r1,[sp,#17*4]			@ make room for r1
783# endif
784	eor	r0,r4,r4,ror#5
785	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
786	eor	r0,r0,r4,ror#19	@ Sigma1(e)
787# ifndef __ARMEB__
788	rev	r2,r2
789# endif
790#else
791	@ ldrb	r2,[r1,#3]			@ 12
792	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
793	ldrb	r12,[r1,#2]
794	ldrb	r0,[r1,#1]
795	orr	r2,r2,r12,lsl#8
796	ldrb	r12,[r1],#4
797	orr	r2,r2,r0,lsl#16
798# if 12==15
799	str	r1,[sp,#17*4]			@ make room for r1
800# endif
801	eor	r0,r4,r4,ror#5
802	orr	r2,r2,r12,lsl#24
803	eor	r0,r0,r4,ror#19	@ Sigma1(e)
804#endif
805	ldr	r12,[r14],#4			@ *K256++
806	add	r7,r7,r2			@ h+=X[i]
807	str	r2,[sp,#12*4]
808	eor	r2,r5,r6
809	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
810	and	r2,r2,r4
811	add	r7,r7,r12			@ h+=K256[i]
812	eor	r2,r2,r6			@ Ch(e,f,g)
813	eor	r0,r8,r8,ror#11
814	add	r7,r7,r2			@ h+=Ch(e,f,g)
815#if 12==31
816	and	r12,r12,#0xff
817	cmp	r12,#0xf2			@ done?
818#endif
819#if 12<15
820# if __ARM_ARCH__>=7
821	ldr	r2,[r1],#4			@ prefetch
822# else
823	ldrb	r2,[r1,#3]
824# endif
825	eor	r12,r8,r9			@ a^b, b^c in next round
826#else
827	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
828	eor	r12,r8,r9			@ a^b, b^c in next round
829	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
830#endif
831	eor	r0,r0,r8,ror#20	@ Sigma0(a)
832	and	r3,r3,r12			@ (b^c)&=(a^b)
833	add	r11,r11,r7			@ d+=h
834	eor	r3,r3,r9			@ Maj(a,b,c)
835	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
836	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
837#if __ARM_ARCH__>=7
838	@ ldr	r2,[r1],#4			@ 13
839# if 13==15
840	str	r1,[sp,#17*4]			@ make room for r1
841# endif
842	eor	r0,r11,r11,ror#5
843	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
844	eor	r0,r0,r11,ror#19	@ Sigma1(e)
845# ifndef __ARMEB__
846	rev	r2,r2
847# endif
848#else
849	@ ldrb	r2,[r1,#3]			@ 13
850	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
851	ldrb	r3,[r1,#2]
852	ldrb	r0,[r1,#1]
853	orr	r2,r2,r3,lsl#8
854	ldrb	r3,[r1],#4
855	orr	r2,r2,r0,lsl#16
856# if 13==15
857	str	r1,[sp,#17*4]			@ make room for r1
858# endif
859	eor	r0,r11,r11,ror#5
860	orr	r2,r2,r3,lsl#24
861	eor	r0,r0,r11,ror#19	@ Sigma1(e)
862#endif
863	ldr	r3,[r14],#4			@ *K256++
864	add	r6,r6,r2			@ h+=X[i]
865	str	r2,[sp,#13*4]
866	eor	r2,r4,r5
867	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
868	and	r2,r2,r11
869	add	r6,r6,r3			@ h+=K256[i]
870	eor	r2,r2,r5			@ Ch(e,f,g)
871	eor	r0,r7,r7,ror#11
872	add	r6,r6,r2			@ h+=Ch(e,f,g)
873#if 13==31
874	and	r3,r3,#0xff
875	cmp	r3,#0xf2			@ done?
876#endif
877#if 13<15
878# if __ARM_ARCH__>=7
879	ldr	r2,[r1],#4			@ prefetch
880# else
881	ldrb	r2,[r1,#3]
882# endif
883	eor	r3,r7,r8			@ a^b, b^c in next round
884#else
885	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
886	eor	r3,r7,r8			@ a^b, b^c in next round
887	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
888#endif
889	eor	r0,r0,r7,ror#20	@ Sigma0(a)
890	and	r12,r12,r3			@ (b^c)&=(a^b)
891	add	r10,r10,r6			@ d+=h
892	eor	r12,r12,r8			@ Maj(a,b,c)
893	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
894	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
895#if __ARM_ARCH__>=7
896	@ ldr	r2,[r1],#4			@ 14
897# if 14==15
898	str	r1,[sp,#17*4]			@ make room for r1
899# endif
900	eor	r0,r10,r10,ror#5
901	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
902	eor	r0,r0,r10,ror#19	@ Sigma1(e)
903# ifndef __ARMEB__
904	rev	r2,r2
905# endif
906#else
907	@ ldrb	r2,[r1,#3]			@ 14
908	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
909	ldrb	r12,[r1,#2]
910	ldrb	r0,[r1,#1]
911	orr	r2,r2,r12,lsl#8
912	ldrb	r12,[r1],#4
913	orr	r2,r2,r0,lsl#16
914# if 14==15
915	str	r1,[sp,#17*4]			@ make room for r1
916# endif
917	eor	r0,r10,r10,ror#5
918	orr	r2,r2,r12,lsl#24
919	eor	r0,r0,r10,ror#19	@ Sigma1(e)
920#endif
921	ldr	r12,[r14],#4			@ *K256++
922	add	r5,r5,r2			@ h+=X[i]
923	str	r2,[sp,#14*4]
924	eor	r2,r11,r4
925	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
926	and	r2,r2,r10
927	add	r5,r5,r12			@ h+=K256[i]
928	eor	r2,r2,r4			@ Ch(e,f,g)
929	eor	r0,r6,r6,ror#11
930	add	r5,r5,r2			@ h+=Ch(e,f,g)
931#if 14==31
932	and	r12,r12,#0xff
933	cmp	r12,#0xf2			@ done?
934#endif
935#if 14<15
936# if __ARM_ARCH__>=7
937	ldr	r2,[r1],#4			@ prefetch
938# else
939	ldrb	r2,[r1,#3]
940# endif
941	eor	r12,r6,r7			@ a^b, b^c in next round
942#else
943	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
944	eor	r12,r6,r7			@ a^b, b^c in next round
945	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
946#endif
947	eor	r0,r0,r6,ror#20	@ Sigma0(a)
948	and	r3,r3,r12			@ (b^c)&=(a^b)
949	add	r9,r9,r5			@ d+=h
950	eor	r3,r3,r7			@ Maj(a,b,c)
951	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
952	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
953#if __ARM_ARCH__>=7
954	@ ldr	r2,[r1],#4			@ 15
955# if 15==15
956	str	r1,[sp,#17*4]			@ make room for r1
957# endif
958	eor	r0,r9,r9,ror#5
959	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
960	eor	r0,r0,r9,ror#19	@ Sigma1(e)
961# ifndef __ARMEB__
962	rev	r2,r2
963# endif
964#else
965	@ ldrb	r2,[r1,#3]			@ 15
966	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
967	ldrb	r3,[r1,#2]
968	ldrb	r0,[r1,#1]
969	orr	r2,r2,r3,lsl#8
970	ldrb	r3,[r1],#4
971	orr	r2,r2,r0,lsl#16
972# if 15==15
973	str	r1,[sp,#17*4]			@ make room for r1
974# endif
975	eor	r0,r9,r9,ror#5
976	orr	r2,r2,r3,lsl#24
977	eor	r0,r0,r9,ror#19	@ Sigma1(e)
978#endif
979	ldr	r3,[r14],#4			@ *K256++
980	add	r4,r4,r2			@ h+=X[i]
981	str	r2,[sp,#15*4]
982	eor	r2,r10,r11
983	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
984	and	r2,r2,r9
985	add	r4,r4,r3			@ h+=K256[i]
986	eor	r2,r2,r11			@ Ch(e,f,g)
987	eor	r0,r5,r5,ror#11
988	add	r4,r4,r2			@ h+=Ch(e,f,g)
989#if 15==31
990	and	r3,r3,#0xff
991	cmp	r3,#0xf2			@ done?
992#endif
993#if 15<15
994# if __ARM_ARCH__>=7
995	ldr	r2,[r1],#4			@ prefetch
996# else
997	ldrb	r2,[r1,#3]
998# endif
999	eor	r3,r5,r6			@ a^b, b^c in next round
1000#else
1001	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1002	eor	r3,r5,r6			@ a^b, b^c in next round
1003	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1004#endif
1005	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1006	and	r12,r12,r3			@ (b^c)&=(a^b)
1007	add	r8,r8,r4			@ d+=h
1008	eor	r12,r12,r6			@ Maj(a,b,c)
1009	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1010	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1011.Lrounds_16_xx:
1012	@ ldr	r2,[sp,#1*4]		@ 16
1013	@ ldr	r1,[sp,#14*4]
1014	mov	r0,r2,ror#7
1015	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1016	mov	r12,r1,ror#17
1017	eor	r0,r0,r2,ror#18
1018	eor	r12,r12,r1,ror#19
1019	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1020	ldr	r2,[sp,#0*4]
1021	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1022	ldr	r1,[sp,#9*4]
1023
1024	add	r12,r12,r0
1025	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1026	add	r2,r2,r12
1027	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1028	add	r2,r2,r1			@ X[i]
1029	ldr	r12,[r14],#4			@ *K256++
1030	add	r11,r11,r2			@ h+=X[i]
1031	str	r2,[sp,#0*4]
1032	eor	r2,r9,r10
1033	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1034	and	r2,r2,r8
1035	add	r11,r11,r12			@ h+=K256[i]
1036	eor	r2,r2,r10			@ Ch(e,f,g)
1037	eor	r0,r4,r4,ror#11
1038	add	r11,r11,r2			@ h+=Ch(e,f,g)
1039#if 16==31
1040	and	r12,r12,#0xff
1041	cmp	r12,#0xf2			@ done?
1042#endif
1043#if 16<15
1044# if __ARM_ARCH__>=7
1045	ldr	r2,[r1],#4			@ prefetch
1046# else
1047	ldrb	r2,[r1,#3]
1048# endif
1049	eor	r12,r4,r5			@ a^b, b^c in next round
1050#else
1051	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1052	eor	r12,r4,r5			@ a^b, b^c in next round
1053	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1054#endif
1055	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1056	and	r3,r3,r12			@ (b^c)&=(a^b)
1057	add	r7,r7,r11			@ d+=h
1058	eor	r3,r3,r5			@ Maj(a,b,c)
1059	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1060	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1061	@ ldr	r2,[sp,#2*4]		@ 17
1062	@ ldr	r1,[sp,#15*4]
1063	mov	r0,r2,ror#7
1064	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1065	mov	r3,r1,ror#17
1066	eor	r0,r0,r2,ror#18
1067	eor	r3,r3,r1,ror#19
1068	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1069	ldr	r2,[sp,#1*4]
1070	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1071	ldr	r1,[sp,#10*4]
1072
1073	add	r3,r3,r0
1074	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1075	add	r2,r2,r3
1076	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1077	add	r2,r2,r1			@ X[i]
1078	ldr	r3,[r14],#4			@ *K256++
1079	add	r10,r10,r2			@ h+=X[i]
1080	str	r2,[sp,#1*4]
1081	eor	r2,r8,r9
1082	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1083	and	r2,r2,r7
1084	add	r10,r10,r3			@ h+=K256[i]
1085	eor	r2,r2,r9			@ Ch(e,f,g)
1086	eor	r0,r11,r11,ror#11
1087	add	r10,r10,r2			@ h+=Ch(e,f,g)
1088#if 17==31
1089	and	r3,r3,#0xff
1090	cmp	r3,#0xf2			@ done?
1091#endif
1092#if 17<15
1093# if __ARM_ARCH__>=7
1094	ldr	r2,[r1],#4			@ prefetch
1095# else
1096	ldrb	r2,[r1,#3]
1097# endif
1098	eor	r3,r11,r4			@ a^b, b^c in next round
1099#else
1100	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1101	eor	r3,r11,r4			@ a^b, b^c in next round
1102	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1103#endif
1104	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1105	and	r12,r12,r3			@ (b^c)&=(a^b)
1106	add	r6,r6,r10			@ d+=h
1107	eor	r12,r12,r4			@ Maj(a,b,c)
1108	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1109	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1110	@ ldr	r2,[sp,#3*4]		@ 18
1111	@ ldr	r1,[sp,#0*4]
1112	mov	r0,r2,ror#7
1113	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1114	mov	r12,r1,ror#17
1115	eor	r0,r0,r2,ror#18
1116	eor	r12,r12,r1,ror#19
1117	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1118	ldr	r2,[sp,#2*4]
1119	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1120	ldr	r1,[sp,#11*4]
1121
1122	add	r12,r12,r0
1123	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1124	add	r2,r2,r12
1125	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1126	add	r2,r2,r1			@ X[i]
1127	ldr	r12,[r14],#4			@ *K256++
1128	add	r9,r9,r2			@ h+=X[i]
1129	str	r2,[sp,#2*4]
1130	eor	r2,r7,r8
1131	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1132	and	r2,r2,r6
1133	add	r9,r9,r12			@ h+=K256[i]
1134	eor	r2,r2,r8			@ Ch(e,f,g)
1135	eor	r0,r10,r10,ror#11
1136	add	r9,r9,r2			@ h+=Ch(e,f,g)
1137#if 18==31
1138	and	r12,r12,#0xff
1139	cmp	r12,#0xf2			@ done?
1140#endif
1141#if 18<15
1142# if __ARM_ARCH__>=7
1143	ldr	r2,[r1],#4			@ prefetch
1144# else
1145	ldrb	r2,[r1,#3]
1146# endif
1147	eor	r12,r10,r11			@ a^b, b^c in next round
1148#else
1149	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1150	eor	r12,r10,r11			@ a^b, b^c in next round
1151	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1152#endif
1153	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1154	and	r3,r3,r12			@ (b^c)&=(a^b)
1155	add	r5,r5,r9			@ d+=h
1156	eor	r3,r3,r11			@ Maj(a,b,c)
1157	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1158	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1159	@ ldr	r2,[sp,#4*4]		@ 19
1160	@ ldr	r1,[sp,#1*4]
1161	mov	r0,r2,ror#7
1162	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1163	mov	r3,r1,ror#17
1164	eor	r0,r0,r2,ror#18
1165	eor	r3,r3,r1,ror#19
1166	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1167	ldr	r2,[sp,#3*4]
1168	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1169	ldr	r1,[sp,#12*4]
1170
1171	add	r3,r3,r0
1172	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1173	add	r2,r2,r3
1174	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1175	add	r2,r2,r1			@ X[i]
1176	ldr	r3,[r14],#4			@ *K256++
1177	add	r8,r8,r2			@ h+=X[i]
1178	str	r2,[sp,#3*4]
1179	eor	r2,r6,r7
1180	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1181	and	r2,r2,r5
1182	add	r8,r8,r3			@ h+=K256[i]
1183	eor	r2,r2,r7			@ Ch(e,f,g)
1184	eor	r0,r9,r9,ror#11
1185	add	r8,r8,r2			@ h+=Ch(e,f,g)
1186#if 19==31
1187	and	r3,r3,#0xff
1188	cmp	r3,#0xf2			@ done?
1189#endif
1190#if 19<15
1191# if __ARM_ARCH__>=7
1192	ldr	r2,[r1],#4			@ prefetch
1193# else
1194	ldrb	r2,[r1,#3]
1195# endif
1196	eor	r3,r9,r10			@ a^b, b^c in next round
1197#else
1198	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1199	eor	r3,r9,r10			@ a^b, b^c in next round
1200	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1201#endif
1202	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1203	and	r12,r12,r3			@ (b^c)&=(a^b)
1204	add	r4,r4,r8			@ d+=h
1205	eor	r12,r12,r10			@ Maj(a,b,c)
1206	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1207	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1208	@ ldr	r2,[sp,#5*4]		@ 20
1209	@ ldr	r1,[sp,#2*4]
1210	mov	r0,r2,ror#7
1211	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1212	mov	r12,r1,ror#17
1213	eor	r0,r0,r2,ror#18
1214	eor	r12,r12,r1,ror#19
1215	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1216	ldr	r2,[sp,#4*4]
1217	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1218	ldr	r1,[sp,#13*4]
1219
1220	add	r12,r12,r0
1221	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1222	add	r2,r2,r12
1223	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1224	add	r2,r2,r1			@ X[i]
1225	ldr	r12,[r14],#4			@ *K256++
1226	add	r7,r7,r2			@ h+=X[i]
1227	str	r2,[sp,#4*4]
1228	eor	r2,r5,r6
1229	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1230	and	r2,r2,r4
1231	add	r7,r7,r12			@ h+=K256[i]
1232	eor	r2,r2,r6			@ Ch(e,f,g)
1233	eor	r0,r8,r8,ror#11
1234	add	r7,r7,r2			@ h+=Ch(e,f,g)
1235#if 20==31
1236	and	r12,r12,#0xff
1237	cmp	r12,#0xf2			@ done?
1238#endif
1239#if 20<15
1240# if __ARM_ARCH__>=7
1241	ldr	r2,[r1],#4			@ prefetch
1242# else
1243	ldrb	r2,[r1,#3]
1244# endif
1245	eor	r12,r8,r9			@ a^b, b^c in next round
1246#else
1247	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1248	eor	r12,r8,r9			@ a^b, b^c in next round
1249	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1250#endif
1251	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1252	and	r3,r3,r12			@ (b^c)&=(a^b)
1253	add	r11,r11,r7			@ d+=h
1254	eor	r3,r3,r9			@ Maj(a,b,c)
1255	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1256	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1257	@ ldr	r2,[sp,#6*4]		@ 21
1258	@ ldr	r1,[sp,#3*4]
1259	mov	r0,r2,ror#7
1260	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1261	mov	r3,r1,ror#17
1262	eor	r0,r0,r2,ror#18
1263	eor	r3,r3,r1,ror#19
1264	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1265	ldr	r2,[sp,#5*4]
1266	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1267	ldr	r1,[sp,#14*4]
1268
1269	add	r3,r3,r0
1270	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1271	add	r2,r2,r3
1272	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1273	add	r2,r2,r1			@ X[i]
1274	ldr	r3,[r14],#4			@ *K256++
1275	add	r6,r6,r2			@ h+=X[i]
1276	str	r2,[sp,#5*4]
1277	eor	r2,r4,r5
1278	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1279	and	r2,r2,r11
1280	add	r6,r6,r3			@ h+=K256[i]
1281	eor	r2,r2,r5			@ Ch(e,f,g)
1282	eor	r0,r7,r7,ror#11
1283	add	r6,r6,r2			@ h+=Ch(e,f,g)
1284#if 21==31
1285	and	r3,r3,#0xff
1286	cmp	r3,#0xf2			@ done?
1287#endif
1288#if 21<15
1289# if __ARM_ARCH__>=7
1290	ldr	r2,[r1],#4			@ prefetch
1291# else
1292	ldrb	r2,[r1,#3]
1293# endif
1294	eor	r3,r7,r8			@ a^b, b^c in next round
1295#else
1296	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1297	eor	r3,r7,r8			@ a^b, b^c in next round
1298	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1299#endif
1300	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1301	and	r12,r12,r3			@ (b^c)&=(a^b)
1302	add	r10,r10,r6			@ d+=h
1303	eor	r12,r12,r8			@ Maj(a,b,c)
1304	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1305	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1306	@ ldr	r2,[sp,#7*4]		@ 22
1307	@ ldr	r1,[sp,#4*4]
1308	mov	r0,r2,ror#7
1309	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1310	mov	r12,r1,ror#17
1311	eor	r0,r0,r2,ror#18
1312	eor	r12,r12,r1,ror#19
1313	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1314	ldr	r2,[sp,#6*4]
1315	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1316	ldr	r1,[sp,#15*4]
1317
1318	add	r12,r12,r0
1319	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1320	add	r2,r2,r12
1321	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1322	add	r2,r2,r1			@ X[i]
1323	ldr	r12,[r14],#4			@ *K256++
1324	add	r5,r5,r2			@ h+=X[i]
1325	str	r2,[sp,#6*4]
1326	eor	r2,r11,r4
1327	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1328	and	r2,r2,r10
1329	add	r5,r5,r12			@ h+=K256[i]
1330	eor	r2,r2,r4			@ Ch(e,f,g)
1331	eor	r0,r6,r6,ror#11
1332	add	r5,r5,r2			@ h+=Ch(e,f,g)
1333#if 22==31
1334	and	r12,r12,#0xff
1335	cmp	r12,#0xf2			@ done?
1336#endif
1337#if 22<15
1338# if __ARM_ARCH__>=7
1339	ldr	r2,[r1],#4			@ prefetch
1340# else
1341	ldrb	r2,[r1,#3]
1342# endif
1343	eor	r12,r6,r7			@ a^b, b^c in next round
1344#else
1345	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1346	eor	r12,r6,r7			@ a^b, b^c in next round
1347	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1348#endif
1349	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1350	and	r3,r3,r12			@ (b^c)&=(a^b)
1351	add	r9,r9,r5			@ d+=h
1352	eor	r3,r3,r7			@ Maj(a,b,c)
1353	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1354	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1355	@ ldr	r2,[sp,#8*4]		@ 23
1356	@ ldr	r1,[sp,#5*4]
1357	mov	r0,r2,ror#7
1358	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1359	mov	r3,r1,ror#17
1360	eor	r0,r0,r2,ror#18
1361	eor	r3,r3,r1,ror#19
1362	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1363	ldr	r2,[sp,#7*4]
1364	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1365	ldr	r1,[sp,#0*4]
1366
1367	add	r3,r3,r0
1368	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1369	add	r2,r2,r3
1370	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1371	add	r2,r2,r1			@ X[i]
1372	ldr	r3,[r14],#4			@ *K256++
1373	add	r4,r4,r2			@ h+=X[i]
1374	str	r2,[sp,#7*4]
1375	eor	r2,r10,r11
1376	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1377	and	r2,r2,r9
1378	add	r4,r4,r3			@ h+=K256[i]
1379	eor	r2,r2,r11			@ Ch(e,f,g)
1380	eor	r0,r5,r5,ror#11
1381	add	r4,r4,r2			@ h+=Ch(e,f,g)
1382#if 23==31
1383	and	r3,r3,#0xff
1384	cmp	r3,#0xf2			@ done?
1385#endif
1386#if 23<15
1387# if __ARM_ARCH__>=7
1388	ldr	r2,[r1],#4			@ prefetch
1389# else
1390	ldrb	r2,[r1,#3]
1391# endif
1392	eor	r3,r5,r6			@ a^b, b^c in next round
1393#else
1394	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1395	eor	r3,r5,r6			@ a^b, b^c in next round
1396	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1397#endif
1398	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1399	and	r12,r12,r3			@ (b^c)&=(a^b)
1400	add	r8,r8,r4			@ d+=h
1401	eor	r12,r12,r6			@ Maj(a,b,c)
1402	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1403	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1404	@ ldr	r2,[sp,#9*4]		@ 24
1405	@ ldr	r1,[sp,#6*4]
1406	mov	r0,r2,ror#7
1407	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1408	mov	r12,r1,ror#17
1409	eor	r0,r0,r2,ror#18
1410	eor	r12,r12,r1,ror#19
1411	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1412	ldr	r2,[sp,#8*4]
1413	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1414	ldr	r1,[sp,#1*4]
1415
1416	add	r12,r12,r0
1417	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1418	add	r2,r2,r12
1419	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1420	add	r2,r2,r1			@ X[i]
1421	ldr	r12,[r14],#4			@ *K256++
1422	add	r11,r11,r2			@ h+=X[i]
1423	str	r2,[sp,#8*4]
1424	eor	r2,r9,r10
1425	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1426	and	r2,r2,r8
1427	add	r11,r11,r12			@ h+=K256[i]
1428	eor	r2,r2,r10			@ Ch(e,f,g)
1429	eor	r0,r4,r4,ror#11
1430	add	r11,r11,r2			@ h+=Ch(e,f,g)
1431#if 24==31
1432	and	r12,r12,#0xff
1433	cmp	r12,#0xf2			@ done?
1434#endif
1435#if 24<15
1436# if __ARM_ARCH__>=7
1437	ldr	r2,[r1],#4			@ prefetch
1438# else
1439	ldrb	r2,[r1,#3]
1440# endif
1441	eor	r12,r4,r5			@ a^b, b^c in next round
1442#else
1443	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1444	eor	r12,r4,r5			@ a^b, b^c in next round
1445	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1446#endif
1447	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1448	and	r3,r3,r12			@ (b^c)&=(a^b)
1449	add	r7,r7,r11			@ d+=h
1450	eor	r3,r3,r5			@ Maj(a,b,c)
1451	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1452	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1453	@ ldr	r2,[sp,#10*4]		@ 25
1454	@ ldr	r1,[sp,#7*4]
1455	mov	r0,r2,ror#7
1456	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1457	mov	r3,r1,ror#17
1458	eor	r0,r0,r2,ror#18
1459	eor	r3,r3,r1,ror#19
1460	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1461	ldr	r2,[sp,#9*4]
1462	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1463	ldr	r1,[sp,#2*4]
1464
1465	add	r3,r3,r0
1466	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1467	add	r2,r2,r3
1468	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1469	add	r2,r2,r1			@ X[i]
1470	ldr	r3,[r14],#4			@ *K256++
1471	add	r10,r10,r2			@ h+=X[i]
1472	str	r2,[sp,#9*4]
1473	eor	r2,r8,r9
1474	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1475	and	r2,r2,r7
1476	add	r10,r10,r3			@ h+=K256[i]
1477	eor	r2,r2,r9			@ Ch(e,f,g)
1478	eor	r0,r11,r11,ror#11
1479	add	r10,r10,r2			@ h+=Ch(e,f,g)
1480#if 25==31
1481	and	r3,r3,#0xff
1482	cmp	r3,#0xf2			@ done?
1483#endif
1484#if 25<15
1485# if __ARM_ARCH__>=7
1486	ldr	r2,[r1],#4			@ prefetch
1487# else
1488	ldrb	r2,[r1,#3]
1489# endif
1490	eor	r3,r11,r4			@ a^b, b^c in next round
1491#else
1492	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1493	eor	r3,r11,r4			@ a^b, b^c in next round
1494	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1495#endif
1496	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1497	and	r12,r12,r3			@ (b^c)&=(a^b)
1498	add	r6,r6,r10			@ d+=h
1499	eor	r12,r12,r4			@ Maj(a,b,c)
1500	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1501	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1502	@ ldr	r2,[sp,#11*4]		@ 26
1503	@ ldr	r1,[sp,#8*4]
1504	mov	r0,r2,ror#7
1505	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1506	mov	r12,r1,ror#17
1507	eor	r0,r0,r2,ror#18
1508	eor	r12,r12,r1,ror#19
1509	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1510	ldr	r2,[sp,#10*4]
1511	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1512	ldr	r1,[sp,#3*4]
1513
1514	add	r12,r12,r0
1515	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1516	add	r2,r2,r12
1517	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1518	add	r2,r2,r1			@ X[i]
1519	ldr	r12,[r14],#4			@ *K256++
1520	add	r9,r9,r2			@ h+=X[i]
1521	str	r2,[sp,#10*4]
1522	eor	r2,r7,r8
1523	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1524	and	r2,r2,r6
1525	add	r9,r9,r12			@ h+=K256[i]
1526	eor	r2,r2,r8			@ Ch(e,f,g)
1527	eor	r0,r10,r10,ror#11
1528	add	r9,r9,r2			@ h+=Ch(e,f,g)
1529#if 26==31
1530	and	r12,r12,#0xff
1531	cmp	r12,#0xf2			@ done?
1532#endif
1533#if 26<15
1534# if __ARM_ARCH__>=7
1535	ldr	r2,[r1],#4			@ prefetch
1536# else
1537	ldrb	r2,[r1,#3]
1538# endif
1539	eor	r12,r10,r11			@ a^b, b^c in next round
1540#else
1541	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1542	eor	r12,r10,r11			@ a^b, b^c in next round
1543	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1544#endif
1545	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1546	and	r3,r3,r12			@ (b^c)&=(a^b)
1547	add	r5,r5,r9			@ d+=h
1548	eor	r3,r3,r11			@ Maj(a,b,c)
1549	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1550	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1551	@ ldr	r2,[sp,#12*4]		@ 27
1552	@ ldr	r1,[sp,#9*4]
1553	mov	r0,r2,ror#7
1554	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1555	mov	r3,r1,ror#17
1556	eor	r0,r0,r2,ror#18
1557	eor	r3,r3,r1,ror#19
1558	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1559	ldr	r2,[sp,#11*4]
1560	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1561	ldr	r1,[sp,#4*4]
1562
1563	add	r3,r3,r0
1564	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1565	add	r2,r2,r3
1566	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1567	add	r2,r2,r1			@ X[i]
1568	ldr	r3,[r14],#4			@ *K256++
1569	add	r8,r8,r2			@ h+=X[i]
1570	str	r2,[sp,#11*4]
1571	eor	r2,r6,r7
1572	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1573	and	r2,r2,r5
1574	add	r8,r8,r3			@ h+=K256[i]
1575	eor	r2,r2,r7			@ Ch(e,f,g)
1576	eor	r0,r9,r9,ror#11
1577	add	r8,r8,r2			@ h+=Ch(e,f,g)
1578#if 27==31
1579	and	r3,r3,#0xff
1580	cmp	r3,#0xf2			@ done?
1581#endif
1582#if 27<15
1583# if __ARM_ARCH__>=7
1584	ldr	r2,[r1],#4			@ prefetch
1585# else
1586	ldrb	r2,[r1,#3]
1587# endif
1588	eor	r3,r9,r10			@ a^b, b^c in next round
1589#else
1590	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1591	eor	r3,r9,r10			@ a^b, b^c in next round
1592	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1593#endif
1594	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1595	and	r12,r12,r3			@ (b^c)&=(a^b)
1596	add	r4,r4,r8			@ d+=h
1597	eor	r12,r12,r10			@ Maj(a,b,c)
1598	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1599	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1600	@ ldr	r2,[sp,#13*4]		@ 28
1601	@ ldr	r1,[sp,#10*4]
1602	mov	r0,r2,ror#7
1603	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1604	mov	r12,r1,ror#17
1605	eor	r0,r0,r2,ror#18
1606	eor	r12,r12,r1,ror#19
1607	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1608	ldr	r2,[sp,#12*4]
1609	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1610	ldr	r1,[sp,#5*4]
1611
1612	add	r12,r12,r0
1613	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1614	add	r2,r2,r12
1615	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1616	add	r2,r2,r1			@ X[i]
1617	ldr	r12,[r14],#4			@ *K256++
1618	add	r7,r7,r2			@ h+=X[i]
1619	str	r2,[sp,#12*4]
1620	eor	r2,r5,r6
1621	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1622	and	r2,r2,r4
1623	add	r7,r7,r12			@ h+=K256[i]
1624	eor	r2,r2,r6			@ Ch(e,f,g)
1625	eor	r0,r8,r8,ror#11
1626	add	r7,r7,r2			@ h+=Ch(e,f,g)
1627#if 28==31
1628	and	r12,r12,#0xff
1629	cmp	r12,#0xf2			@ done?
1630#endif
1631#if 28<15
1632# if __ARM_ARCH__>=7
1633	ldr	r2,[r1],#4			@ prefetch
1634# else
1635	ldrb	r2,[r1,#3]
1636# endif
1637	eor	r12,r8,r9			@ a^b, b^c in next round
1638#else
1639	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1640	eor	r12,r8,r9			@ a^b, b^c in next round
1641	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1642#endif
1643	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1644	and	r3,r3,r12			@ (b^c)&=(a^b)
1645	add	r11,r11,r7			@ d+=h
1646	eor	r3,r3,r9			@ Maj(a,b,c)
1647	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1648	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1649	@ ldr	r2,[sp,#14*4]		@ 29
1650	@ ldr	r1,[sp,#11*4]
1651	mov	r0,r2,ror#7
1652	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1653	mov	r3,r1,ror#17
1654	eor	r0,r0,r2,ror#18
1655	eor	r3,r3,r1,ror#19
1656	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1657	ldr	r2,[sp,#13*4]
1658	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1659	ldr	r1,[sp,#6*4]
1660
1661	add	r3,r3,r0
1662	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1663	add	r2,r2,r3
1664	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1665	add	r2,r2,r1			@ X[i]
1666	ldr	r3,[r14],#4			@ *K256++
1667	add	r6,r6,r2			@ h+=X[i]
1668	str	r2,[sp,#13*4]
1669	eor	r2,r4,r5
1670	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1671	and	r2,r2,r11
1672	add	r6,r6,r3			@ h+=K256[i]
1673	eor	r2,r2,r5			@ Ch(e,f,g)
1674	eor	r0,r7,r7,ror#11
1675	add	r6,r6,r2			@ h+=Ch(e,f,g)
1676#if 29==31
1677	and	r3,r3,#0xff
1678	cmp	r3,#0xf2			@ done?
1679#endif
1680#if 29<15
1681# if __ARM_ARCH__>=7
1682	ldr	r2,[r1],#4			@ prefetch
1683# else
1684	ldrb	r2,[r1,#3]
1685# endif
1686	eor	r3,r7,r8			@ a^b, b^c in next round
1687#else
1688	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1689	eor	r3,r7,r8			@ a^b, b^c in next round
1690	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1691#endif
1692	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1693	and	r12,r12,r3			@ (b^c)&=(a^b)
1694	add	r10,r10,r6			@ d+=h
1695	eor	r12,r12,r8			@ Maj(a,b,c)
1696	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1697	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1698	@ ldr	r2,[sp,#15*4]		@ 30
1699	@ ldr	r1,[sp,#12*4]
1700	mov	r0,r2,ror#7
1701	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1702	mov	r12,r1,ror#17
1703	eor	r0,r0,r2,ror#18
1704	eor	r12,r12,r1,ror#19
1705	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1706	ldr	r2,[sp,#14*4]
1707	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1708	ldr	r1,[sp,#7*4]
1709
1710	add	r12,r12,r0
1711	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1712	add	r2,r2,r12
1713	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1714	add	r2,r2,r1			@ X[i]
1715	ldr	r12,[r14],#4			@ *K256++
1716	add	r5,r5,r2			@ h+=X[i]
1717	str	r2,[sp,#14*4]
1718	eor	r2,r11,r4
1719	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1720	and	r2,r2,r10
1721	add	r5,r5,r12			@ h+=K256[i]
1722	eor	r2,r2,r4			@ Ch(e,f,g)
1723	eor	r0,r6,r6,ror#11
1724	add	r5,r5,r2			@ h+=Ch(e,f,g)
1725#if 30==31
1726	and	r12,r12,#0xff
1727	cmp	r12,#0xf2			@ done?
1728#endif
1729#if 30<15
1730# if __ARM_ARCH__>=7
1731	ldr	r2,[r1],#4			@ prefetch
1732# else
1733	ldrb	r2,[r1,#3]
1734# endif
1735	eor	r12,r6,r7			@ a^b, b^c in next round
1736#else
1737	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1738	eor	r12,r6,r7			@ a^b, b^c in next round
1739	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1740#endif
1741	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1742	and	r3,r3,r12			@ (b^c)&=(a^b)
1743	add	r9,r9,r5			@ d+=h
1744	eor	r3,r3,r7			@ Maj(a,b,c)
1745	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1746	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1747	@ ldr	r2,[sp,#0*4]		@ 31
1748	@ ldr	r1,[sp,#13*4]
1749	mov	r0,r2,ror#7
1750	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1751	mov	r3,r1,ror#17
1752	eor	r0,r0,r2,ror#18
1753	eor	r3,r3,r1,ror#19
1754	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1755	ldr	r2,[sp,#15*4]
1756	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1757	ldr	r1,[sp,#8*4]
1758
1759	add	r3,r3,r0
1760	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1761	add	r2,r2,r3
1762	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1763	add	r2,r2,r1			@ X[i]
1764	ldr	r3,[r14],#4			@ *K256++
1765	add	r4,r4,r2			@ h+=X[i]
1766	str	r2,[sp,#15*4]
1767	eor	r2,r10,r11
1768	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1769	and	r2,r2,r9
1770	add	r4,r4,r3			@ h+=K256[i]
1771	eor	r2,r2,r11			@ Ch(e,f,g)
1772	eor	r0,r5,r5,ror#11
1773	add	r4,r4,r2			@ h+=Ch(e,f,g)
1774#if 31==31
1775	and	r3,r3,#0xff
1776	cmp	r3,#0xf2			@ done?
1777#endif
1778#if 31<15
1779# if __ARM_ARCH__>=7
1780	ldr	r2,[r1],#4			@ prefetch
1781# else
1782	ldrb	r2,[r1,#3]
1783# endif
1784	eor	r3,r5,r6			@ a^b, b^c in next round
1785#else
1786	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1787	eor	r3,r5,r6			@ a^b, b^c in next round
1788	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1789#endif
1790	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1791	and	r12,r12,r3			@ (b^c)&=(a^b)
1792	add	r8,r8,r4			@ d+=h
1793	eor	r12,r12,r6			@ Maj(a,b,c)
1794	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1795	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1796#ifdef	__thumb2__
1797	ite	eq			@ Thumb2 thing, sanity check in ARM
1798#endif
1799	ldreq	r3,[sp,#16*4]		@ pull ctx
1800	bne	.Lrounds_16_xx
1801
1802	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1803	ldr	r0,[r3,#0]
1804	ldr	r2,[r3,#4]
1805	ldr	r12,[r3,#8]
1806	add	r4,r4,r0
1807	ldr	r0,[r3,#12]
1808	add	r5,r5,r2
1809	ldr	r2,[r3,#16]
1810	add	r6,r6,r12
1811	ldr	r12,[r3,#20]
1812	add	r7,r7,r0
1813	ldr	r0,[r3,#24]
1814	add	r8,r8,r2
1815	ldr	r2,[r3,#28]
1816	add	r9,r9,r12
1817	ldr	r1,[sp,#17*4]		@ pull inp
1818	ldr	r12,[sp,#18*4]		@ pull inp+len
1819	add	r10,r10,r0
1820	add	r11,r11,r2
1821	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1822	cmp	r1,r12
1823	sub	r14,r14,#256	@ rewind Ktbl
1824	bne	.Loop
1825
1826	add	sp,sp,#19*4	@ destroy frame
1827#if __ARM_ARCH__>=5
1828	ldmia	sp!,{r4-r11,pc}
1829#else
1830	ldmia	sp!,{r4-r11,lr}
1831	tst	lr,#1
1832	moveq	pc,lr			@ be binary compatible with V4, yet
1833	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1834#endif
1835.size	zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
1836
1837.arch	armv7-a
1838.fpu	neon
1839
1840.globl	zfs_sha256_block_neon
1841.type	zfs_sha256_block_neon,%function
1842.align	5
1843.skip	16
1844zfs_sha256_block_neon:
1845.LNEON:
1846	stmdb	sp!,{r4-r12,lr}
1847
1848	sub	r11,sp,#16*4+16
1849	adr	r14,K256
1850	bic	r11,r11,#15		@ align for 128-bit stores
1851	mov	r12,sp
1852	mov	sp,r11			@ alloca
1853	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1854
1855	vld1.8		{q0},[r1]!
1856	vld1.8		{q1},[r1]!
1857	vld1.8		{q2},[r1]!
1858	vld1.8		{q3},[r1]!
1859	vld1.32		{q8},[r14,:128]!
1860	vld1.32		{q9},[r14,:128]!
1861	vld1.32		{q10},[r14,:128]!
1862	vld1.32		{q11},[r14,:128]!
1863	vrev32.8	q0,q0		@ yes, even on
1864	str		r0,[sp,#64]
1865	vrev32.8	q1,q1		@ big-endian
1866	str		r1,[sp,#68]
1867	mov		r1,sp
1868	vrev32.8	q2,q2
1869	str		r2,[sp,#72]
1870	vrev32.8	q3,q3
1871	str		r12,[sp,#76]		@ save original sp
1872	vadd.i32	q8,q8,q0
1873	vadd.i32	q9,q9,q1
1874	vst1.32		{q8},[r1,:128]!
1875	vadd.i32	q10,q10,q2
1876	vst1.32		{q9},[r1,:128]!
1877	vadd.i32	q11,q11,q3
1878	vst1.32		{q10},[r1,:128]!
1879	vst1.32		{q11},[r1,:128]!
1880
1881	ldmia		r0,{r4-r11}
1882	sub		r1,r1,#64
1883	ldr		r2,[sp,#0]
1884	eor		r12,r12,r12
1885	eor		r3,r5,r6
1886	b		.L_00_48
1887
1888.align	4
1889.L_00_48:
1890	vext.8	q8,q0,q1,#4
1891	add	r11,r11,r2
1892	eor	r2,r9,r10
1893	eor	r0,r8,r8,ror#5
1894	vext.8	q9,q2,q3,#4
1895	add	r4,r4,r12
1896	and	r2,r2,r8
1897	eor	r12,r0,r8,ror#19
1898	vshr.u32	q10,q8,#7
1899	eor	r0,r4,r4,ror#11
1900	eor	r2,r2,r10
1901	vadd.i32	q0,q0,q9
1902	add	r11,r11,r12,ror#6
1903	eor	r12,r4,r5
1904	vshr.u32	q9,q8,#3
1905	eor	r0,r0,r4,ror#20
1906	add	r11,r11,r2
1907	vsli.32	q10,q8,#25
1908	ldr	r2,[sp,#4]
1909	and	r3,r3,r12
1910	vshr.u32	q11,q8,#18
1911	add	r7,r7,r11
1912	add	r11,r11,r0,ror#2
1913	eor	r3,r3,r5
1914	veor	q9,q9,q10
1915	add	r10,r10,r2
1916	vsli.32	q11,q8,#14
1917	eor	r2,r8,r9
1918	eor	r0,r7,r7,ror#5
1919	vshr.u32	d24,d7,#17
1920	add	r11,r11,r3
1921	and	r2,r2,r7
1922	veor	q9,q9,q11
1923	eor	r3,r0,r7,ror#19
1924	eor	r0,r11,r11,ror#11
1925	vsli.32	d24,d7,#15
1926	eor	r2,r2,r9
1927	add	r10,r10,r3,ror#6
1928	vshr.u32	d25,d7,#10
1929	eor	r3,r11,r4
1930	eor	r0,r0,r11,ror#20
1931	vadd.i32	q0,q0,q9
1932	add	r10,r10,r2
1933	ldr	r2,[sp,#8]
1934	veor	d25,d25,d24
1935	and	r12,r12,r3
1936	add	r6,r6,r10
1937	vshr.u32	d24,d7,#19
1938	add	r10,r10,r0,ror#2
1939	eor	r12,r12,r4
1940	vsli.32	d24,d7,#13
1941	add	r9,r9,r2
1942	eor	r2,r7,r8
1943	veor	d25,d25,d24
1944	eor	r0,r6,r6,ror#5
1945	add	r10,r10,r12
1946	vadd.i32	d0,d0,d25
1947	and	r2,r2,r6
1948	eor	r12,r0,r6,ror#19
1949	vshr.u32	d24,d0,#17
1950	eor	r0,r10,r10,ror#11
1951	eor	r2,r2,r8
1952	vsli.32	d24,d0,#15
1953	add	r9,r9,r12,ror#6
1954	eor	r12,r10,r11
1955	vshr.u32	d25,d0,#10
1956	eor	r0,r0,r10,ror#20
1957	add	r9,r9,r2
1958	veor	d25,d25,d24
1959	ldr	r2,[sp,#12]
1960	and	r3,r3,r12
1961	vshr.u32	d24,d0,#19
1962	add	r5,r5,r9
1963	add	r9,r9,r0,ror#2
1964	eor	r3,r3,r11
1965	vld1.32	{q8},[r14,:128]!
1966	add	r8,r8,r2
1967	vsli.32	d24,d0,#13
1968	eor	r2,r6,r7
1969	eor	r0,r5,r5,ror#5
1970	veor	d25,d25,d24
1971	add	r9,r9,r3
1972	and	r2,r2,r5
1973	vadd.i32	d1,d1,d25
1974	eor	r3,r0,r5,ror#19
1975	eor	r0,r9,r9,ror#11
1976	vadd.i32	q8,q8,q0
1977	eor	r2,r2,r7
1978	add	r8,r8,r3,ror#6
1979	eor	r3,r9,r10
1980	eor	r0,r0,r9,ror#20
1981	add	r8,r8,r2
1982	ldr	r2,[sp,#16]
1983	and	r12,r12,r3
1984	add	r4,r4,r8
1985	vst1.32	{q8},[r1,:128]!
1986	add	r8,r8,r0,ror#2
1987	eor	r12,r12,r10
1988	vext.8	q8,q1,q2,#4
1989	add	r7,r7,r2
1990	eor	r2,r5,r6
1991	eor	r0,r4,r4,ror#5
1992	vext.8	q9,q3,q0,#4
1993	add	r8,r8,r12
1994	and	r2,r2,r4
1995	eor	r12,r0,r4,ror#19
1996	vshr.u32	q10,q8,#7
1997	eor	r0,r8,r8,ror#11
1998	eor	r2,r2,r6
1999	vadd.i32	q1,q1,q9
2000	add	r7,r7,r12,ror#6
2001	eor	r12,r8,r9
2002	vshr.u32	q9,q8,#3
2003	eor	r0,r0,r8,ror#20
2004	add	r7,r7,r2
2005	vsli.32	q10,q8,#25
2006	ldr	r2,[sp,#20]
2007	and	r3,r3,r12
2008	vshr.u32	q11,q8,#18
2009	add	r11,r11,r7
2010	add	r7,r7,r0,ror#2
2011	eor	r3,r3,r9
2012	veor	q9,q9,q10
2013	add	r6,r6,r2
2014	vsli.32	q11,q8,#14
2015	eor	r2,r4,r5
2016	eor	r0,r11,r11,ror#5
2017	vshr.u32	d24,d1,#17
2018	add	r7,r7,r3
2019	and	r2,r2,r11
2020	veor	q9,q9,q11
2021	eor	r3,r0,r11,ror#19
2022	eor	r0,r7,r7,ror#11
2023	vsli.32	d24,d1,#15
2024	eor	r2,r2,r5
2025	add	r6,r6,r3,ror#6
2026	vshr.u32	d25,d1,#10
2027	eor	r3,r7,r8
2028	eor	r0,r0,r7,ror#20
2029	vadd.i32	q1,q1,q9
2030	add	r6,r6,r2
2031	ldr	r2,[sp,#24]
2032	veor	d25,d25,d24
2033	and	r12,r12,r3
2034	add	r10,r10,r6
2035	vshr.u32	d24,d1,#19
2036	add	r6,r6,r0,ror#2
2037	eor	r12,r12,r8
2038	vsli.32	d24,d1,#13
2039	add	r5,r5,r2
2040	eor	r2,r11,r4
2041	veor	d25,d25,d24
2042	eor	r0,r10,r10,ror#5
2043	add	r6,r6,r12
2044	vadd.i32	d2,d2,d25
2045	and	r2,r2,r10
2046	eor	r12,r0,r10,ror#19
2047	vshr.u32	d24,d2,#17
2048	eor	r0,r6,r6,ror#11
2049	eor	r2,r2,r4
2050	vsli.32	d24,d2,#15
2051	add	r5,r5,r12,ror#6
2052	eor	r12,r6,r7
2053	vshr.u32	d25,d2,#10
2054	eor	r0,r0,r6,ror#20
2055	add	r5,r5,r2
2056	veor	d25,d25,d24
2057	ldr	r2,[sp,#28]
2058	and	r3,r3,r12
2059	vshr.u32	d24,d2,#19
2060	add	r9,r9,r5
2061	add	r5,r5,r0,ror#2
2062	eor	r3,r3,r7
2063	vld1.32	{q8},[r14,:128]!
2064	add	r4,r4,r2
2065	vsli.32	d24,d2,#13
2066	eor	r2,r10,r11
2067	eor	r0,r9,r9,ror#5
2068	veor	d25,d25,d24
2069	add	r5,r5,r3
2070	and	r2,r2,r9
2071	vadd.i32	d3,d3,d25
2072	eor	r3,r0,r9,ror#19
2073	eor	r0,r5,r5,ror#11
2074	vadd.i32	q8,q8,q1
2075	eor	r2,r2,r11
2076	add	r4,r4,r3,ror#6
2077	eor	r3,r5,r6
2078	eor	r0,r0,r5,ror#20
2079	add	r4,r4,r2
2080	ldr	r2,[sp,#32]
2081	and	r12,r12,r3
2082	add	r8,r8,r4
2083	vst1.32	{q8},[r1,:128]!
2084	add	r4,r4,r0,ror#2
2085	eor	r12,r12,r6
2086	vext.8	q8,q2,q3,#4
2087	add	r11,r11,r2
2088	eor	r2,r9,r10
2089	eor	r0,r8,r8,ror#5
2090	vext.8	q9,q0,q1,#4
2091	add	r4,r4,r12
2092	and	r2,r2,r8
2093	eor	r12,r0,r8,ror#19
2094	vshr.u32	q10,q8,#7
2095	eor	r0,r4,r4,ror#11
2096	eor	r2,r2,r10
2097	vadd.i32	q2,q2,q9
2098	add	r11,r11,r12,ror#6
2099	eor	r12,r4,r5
2100	vshr.u32	q9,q8,#3
2101	eor	r0,r0,r4,ror#20
2102	add	r11,r11,r2
2103	vsli.32	q10,q8,#25
2104	ldr	r2,[sp,#36]
2105	and	r3,r3,r12
2106	vshr.u32	q11,q8,#18
2107	add	r7,r7,r11
2108	add	r11,r11,r0,ror#2
2109	eor	r3,r3,r5
2110	veor	q9,q9,q10
2111	add	r10,r10,r2
2112	vsli.32	q11,q8,#14
2113	eor	r2,r8,r9
2114	eor	r0,r7,r7,ror#5
2115	vshr.u32	d24,d3,#17
2116	add	r11,r11,r3
2117	and	r2,r2,r7
2118	veor	q9,q9,q11
2119	eor	r3,r0,r7,ror#19
2120	eor	r0,r11,r11,ror#11
2121	vsli.32	d24,d3,#15
2122	eor	r2,r2,r9
2123	add	r10,r10,r3,ror#6
2124	vshr.u32	d25,d3,#10
2125	eor	r3,r11,r4
2126	eor	r0,r0,r11,ror#20
2127	vadd.i32	q2,q2,q9
2128	add	r10,r10,r2
2129	ldr	r2,[sp,#40]
2130	veor	d25,d25,d24
2131	and	r12,r12,r3
2132	add	r6,r6,r10
2133	vshr.u32	d24,d3,#19
2134	add	r10,r10,r0,ror#2
2135	eor	r12,r12,r4
2136	vsli.32	d24,d3,#13
2137	add	r9,r9,r2
2138	eor	r2,r7,r8
2139	veor	d25,d25,d24
2140	eor	r0,r6,r6,ror#5
2141	add	r10,r10,r12
2142	vadd.i32	d4,d4,d25
2143	and	r2,r2,r6
2144	eor	r12,r0,r6,ror#19
2145	vshr.u32	d24,d4,#17
2146	eor	r0,r10,r10,ror#11
2147	eor	r2,r2,r8
2148	vsli.32	d24,d4,#15
2149	add	r9,r9,r12,ror#6
2150	eor	r12,r10,r11
2151	vshr.u32	d25,d4,#10
2152	eor	r0,r0,r10,ror#20
2153	add	r9,r9,r2
2154	veor	d25,d25,d24
2155	ldr	r2,[sp,#44]
2156	and	r3,r3,r12
2157	vshr.u32	d24,d4,#19
2158	add	r5,r5,r9
2159	add	r9,r9,r0,ror#2
2160	eor	r3,r3,r11
2161	vld1.32	{q8},[r14,:128]!
2162	add	r8,r8,r2
2163	vsli.32	d24,d4,#13
2164	eor	r2,r6,r7
2165	eor	r0,r5,r5,ror#5
2166	veor	d25,d25,d24
2167	add	r9,r9,r3
2168	and	r2,r2,r5
2169	vadd.i32	d5,d5,d25
2170	eor	r3,r0,r5,ror#19
2171	eor	r0,r9,r9,ror#11
2172	vadd.i32	q8,q8,q2
2173	eor	r2,r2,r7
2174	add	r8,r8,r3,ror#6
2175	eor	r3,r9,r10
2176	eor	r0,r0,r9,ror#20
2177	add	r8,r8,r2
2178	ldr	r2,[sp,#48]
2179	and	r12,r12,r3
2180	add	r4,r4,r8
2181	vst1.32	{q8},[r1,:128]!
2182	add	r8,r8,r0,ror#2
2183	eor	r12,r12,r10
2184	vext.8	q8,q3,q0,#4
2185	add	r7,r7,r2
2186	eor	r2,r5,r6
2187	eor	r0,r4,r4,ror#5
2188	vext.8	q9,q1,q2,#4
2189	add	r8,r8,r12
2190	and	r2,r2,r4
2191	eor	r12,r0,r4,ror#19
2192	vshr.u32	q10,q8,#7
2193	eor	r0,r8,r8,ror#11
2194	eor	r2,r2,r6
2195	vadd.i32	q3,q3,q9
2196	add	r7,r7,r12,ror#6
2197	eor	r12,r8,r9
2198	vshr.u32	q9,q8,#3
2199	eor	r0,r0,r8,ror#20
2200	add	r7,r7,r2
2201	vsli.32	q10,q8,#25
2202	ldr	r2,[sp,#52]
2203	and	r3,r3,r12
2204	vshr.u32	q11,q8,#18
2205	add	r11,r11,r7
2206	add	r7,r7,r0,ror#2
2207	eor	r3,r3,r9
2208	veor	q9,q9,q10
2209	add	r6,r6,r2
2210	vsli.32	q11,q8,#14
2211	eor	r2,r4,r5
2212	eor	r0,r11,r11,ror#5
2213	vshr.u32	d24,d5,#17
2214	add	r7,r7,r3
2215	and	r2,r2,r11
2216	veor	q9,q9,q11
2217	eor	r3,r0,r11,ror#19
2218	eor	r0,r7,r7,ror#11
2219	vsli.32	d24,d5,#15
2220	eor	r2,r2,r5
2221	add	r6,r6,r3,ror#6
2222	vshr.u32	d25,d5,#10
2223	eor	r3,r7,r8
2224	eor	r0,r0,r7,ror#20
2225	vadd.i32	q3,q3,q9
2226	add	r6,r6,r2
2227	ldr	r2,[sp,#56]
2228	veor	d25,d25,d24
2229	and	r12,r12,r3
2230	add	r10,r10,r6
2231	vshr.u32	d24,d5,#19
2232	add	r6,r6,r0,ror#2
2233	eor	r12,r12,r8
2234	vsli.32	d24,d5,#13
2235	add	r5,r5,r2
2236	eor	r2,r11,r4
2237	veor	d25,d25,d24
2238	eor	r0,r10,r10,ror#5
2239	add	r6,r6,r12
2240	vadd.i32	d6,d6,d25
2241	and	r2,r2,r10
2242	eor	r12,r0,r10,ror#19
2243	vshr.u32	d24,d6,#17
2244	eor	r0,r6,r6,ror#11
2245	eor	r2,r2,r4
2246	vsli.32	d24,d6,#15
2247	add	r5,r5,r12,ror#6
2248	eor	r12,r6,r7
2249	vshr.u32	d25,d6,#10
2250	eor	r0,r0,r6,ror#20
2251	add	r5,r5,r2
2252	veor	d25,d25,d24
2253	ldr	r2,[sp,#60]
2254	and	r3,r3,r12
2255	vshr.u32	d24,d6,#19
2256	add	r9,r9,r5
2257	add	r5,r5,r0,ror#2
2258	eor	r3,r3,r7
2259	vld1.32	{q8},[r14,:128]!
2260	add	r4,r4,r2
2261	vsli.32	d24,d6,#13
2262	eor	r2,r10,r11
2263	eor	r0,r9,r9,ror#5
2264	veor	d25,d25,d24
2265	add	r5,r5,r3
2266	and	r2,r2,r9
2267	vadd.i32	d7,d7,d25
2268	eor	r3,r0,r9,ror#19
2269	eor	r0,r5,r5,ror#11
2270	vadd.i32	q8,q8,q3
2271	eor	r2,r2,r11
2272	add	r4,r4,r3,ror#6
2273	eor	r3,r5,r6
2274	eor	r0,r0,r5,ror#20
2275	add	r4,r4,r2
2276	ldr	r2,[r14]
2277	and	r12,r12,r3
2278	add	r8,r8,r4
2279	vst1.32	{q8},[r1,:128]!
2280	add	r4,r4,r0,ror#2
2281	eor	r12,r12,r6
2282	teq	r2,#0				@ check for K256 terminator
2283	ldr	r2,[sp,#0]
2284	sub	r1,r1,#64
2285	bne	.L_00_48
2286
2287	ldr		r1,[sp,#68]
2288	ldr		r0,[sp,#72]
2289	sub		r14,r14,#256	@ rewind r14
2290	teq		r1,r0
2291	it		eq
2292	subeq		r1,r1,#64		@ avoid SEGV
2293	vld1.8		{q0},[r1]!		@ load next input block
2294	vld1.8		{q1},[r1]!
2295	vld1.8		{q2},[r1]!
2296	vld1.8		{q3},[r1]!
2297	it		ne
2298	strne		r1,[sp,#68]
2299	mov		r1,sp
2300	add	r11,r11,r2
2301	eor	r2,r9,r10
2302	eor	r0,r8,r8,ror#5
2303	add	r4,r4,r12
2304	vld1.32	{q8},[r14,:128]!
2305	and	r2,r2,r8
2306	eor	r12,r0,r8,ror#19
2307	eor	r0,r4,r4,ror#11
2308	eor	r2,r2,r10
2309	vrev32.8	q0,q0
2310	add	r11,r11,r12,ror#6
2311	eor	r12,r4,r5
2312	eor	r0,r0,r4,ror#20
2313	add	r11,r11,r2
2314	vadd.i32	q8,q8,q0
2315	ldr	r2,[sp,#4]
2316	and	r3,r3,r12
2317	add	r7,r7,r11
2318	add	r11,r11,r0,ror#2
2319	eor	r3,r3,r5
2320	add	r10,r10,r2
2321	eor	r2,r8,r9
2322	eor	r0,r7,r7,ror#5
2323	add	r11,r11,r3
2324	and	r2,r2,r7
2325	eor	r3,r0,r7,ror#19
2326	eor	r0,r11,r11,ror#11
2327	eor	r2,r2,r9
2328	add	r10,r10,r3,ror#6
2329	eor	r3,r11,r4
2330	eor	r0,r0,r11,ror#20
2331	add	r10,r10,r2
2332	ldr	r2,[sp,#8]
2333	and	r12,r12,r3
2334	add	r6,r6,r10
2335	add	r10,r10,r0,ror#2
2336	eor	r12,r12,r4
2337	add	r9,r9,r2
2338	eor	r2,r7,r8
2339	eor	r0,r6,r6,ror#5
2340	add	r10,r10,r12
2341	and	r2,r2,r6
2342	eor	r12,r0,r6,ror#19
2343	eor	r0,r10,r10,ror#11
2344	eor	r2,r2,r8
2345	add	r9,r9,r12,ror#6
2346	eor	r12,r10,r11
2347	eor	r0,r0,r10,ror#20
2348	add	r9,r9,r2
2349	ldr	r2,[sp,#12]
2350	and	r3,r3,r12
2351	add	r5,r5,r9
2352	add	r9,r9,r0,ror#2
2353	eor	r3,r3,r11
2354	add	r8,r8,r2
2355	eor	r2,r6,r7
2356	eor	r0,r5,r5,ror#5
2357	add	r9,r9,r3
2358	and	r2,r2,r5
2359	eor	r3,r0,r5,ror#19
2360	eor	r0,r9,r9,ror#11
2361	eor	r2,r2,r7
2362	add	r8,r8,r3,ror#6
2363	eor	r3,r9,r10
2364	eor	r0,r0,r9,ror#20
2365	add	r8,r8,r2
2366	ldr	r2,[sp,#16]
2367	and	r12,r12,r3
2368	add	r4,r4,r8
2369	add	r8,r8,r0,ror#2
2370	eor	r12,r12,r10
2371	vst1.32	{q8},[r1,:128]!
2372	add	r7,r7,r2
2373	eor	r2,r5,r6
2374	eor	r0,r4,r4,ror#5
2375	add	r8,r8,r12
2376	vld1.32	{q8},[r14,:128]!
2377	and	r2,r2,r4
2378	eor	r12,r0,r4,ror#19
2379	eor	r0,r8,r8,ror#11
2380	eor	r2,r2,r6
2381	vrev32.8	q1,q1
2382	add	r7,r7,r12,ror#6
2383	eor	r12,r8,r9
2384	eor	r0,r0,r8,ror#20
2385	add	r7,r7,r2
2386	vadd.i32	q8,q8,q1
2387	ldr	r2,[sp,#20]
2388	and	r3,r3,r12
2389	add	r11,r11,r7
2390	add	r7,r7,r0,ror#2
2391	eor	r3,r3,r9
2392	add	r6,r6,r2
2393	eor	r2,r4,r5
2394	eor	r0,r11,r11,ror#5
2395	add	r7,r7,r3
2396	and	r2,r2,r11
2397	eor	r3,r0,r11,ror#19
2398	eor	r0,r7,r7,ror#11
2399	eor	r2,r2,r5
2400	add	r6,r6,r3,ror#6
2401	eor	r3,r7,r8
2402	eor	r0,r0,r7,ror#20
2403	add	r6,r6,r2
2404	ldr	r2,[sp,#24]
2405	and	r12,r12,r3
2406	add	r10,r10,r6
2407	add	r6,r6,r0,ror#2
2408	eor	r12,r12,r8
2409	add	r5,r5,r2
2410	eor	r2,r11,r4
2411	eor	r0,r10,r10,ror#5
2412	add	r6,r6,r12
2413	and	r2,r2,r10
2414	eor	r12,r0,r10,ror#19
2415	eor	r0,r6,r6,ror#11
2416	eor	r2,r2,r4
2417	add	r5,r5,r12,ror#6
2418	eor	r12,r6,r7
2419	eor	r0,r0,r6,ror#20
2420	add	r5,r5,r2
2421	ldr	r2,[sp,#28]
2422	and	r3,r3,r12
2423	add	r9,r9,r5
2424	add	r5,r5,r0,ror#2
2425	eor	r3,r3,r7
2426	add	r4,r4,r2
2427	eor	r2,r10,r11
2428	eor	r0,r9,r9,ror#5
2429	add	r5,r5,r3
2430	and	r2,r2,r9
2431	eor	r3,r0,r9,ror#19
2432	eor	r0,r5,r5,ror#11
2433	eor	r2,r2,r11
2434	add	r4,r4,r3,ror#6
2435	eor	r3,r5,r6
2436	eor	r0,r0,r5,ror#20
2437	add	r4,r4,r2
2438	ldr	r2,[sp,#32]
2439	and	r12,r12,r3
2440	add	r8,r8,r4
2441	add	r4,r4,r0,ror#2
2442	eor	r12,r12,r6
2443	vst1.32	{q8},[r1,:128]!
2444	add	r11,r11,r2
2445	eor	r2,r9,r10
2446	eor	r0,r8,r8,ror#5
2447	add	r4,r4,r12
2448	vld1.32	{q8},[r14,:128]!
2449	and	r2,r2,r8
2450	eor	r12,r0,r8,ror#19
2451	eor	r0,r4,r4,ror#11
2452	eor	r2,r2,r10
2453	vrev32.8	q2,q2
2454	add	r11,r11,r12,ror#6
2455	eor	r12,r4,r5
2456	eor	r0,r0,r4,ror#20
2457	add	r11,r11,r2
2458	vadd.i32	q8,q8,q2
2459	ldr	r2,[sp,#36]
2460	and	r3,r3,r12
2461	add	r7,r7,r11
2462	add	r11,r11,r0,ror#2
2463	eor	r3,r3,r5
2464	add	r10,r10,r2
2465	eor	r2,r8,r9
2466	eor	r0,r7,r7,ror#5
2467	add	r11,r11,r3
2468	and	r2,r2,r7
2469	eor	r3,r0,r7,ror#19
2470	eor	r0,r11,r11,ror#11
2471	eor	r2,r2,r9
2472	add	r10,r10,r3,ror#6
2473	eor	r3,r11,r4
2474	eor	r0,r0,r11,ror#20
2475	add	r10,r10,r2
2476	ldr	r2,[sp,#40]
2477	and	r12,r12,r3
2478	add	r6,r6,r10
2479	add	r10,r10,r0,ror#2
2480	eor	r12,r12,r4
2481	add	r9,r9,r2
2482	eor	r2,r7,r8
2483	eor	r0,r6,r6,ror#5
2484	add	r10,r10,r12
2485	and	r2,r2,r6
2486	eor	r12,r0,r6,ror#19
2487	eor	r0,r10,r10,ror#11
2488	eor	r2,r2,r8
2489	add	r9,r9,r12,ror#6
2490	eor	r12,r10,r11
2491	eor	r0,r0,r10,ror#20
2492	add	r9,r9,r2
2493	ldr	r2,[sp,#44]
2494	and	r3,r3,r12
2495	add	r5,r5,r9
2496	add	r9,r9,r0,ror#2
2497	eor	r3,r3,r11
2498	add	r8,r8,r2
2499	eor	r2,r6,r7
2500	eor	r0,r5,r5,ror#5
2501	add	r9,r9,r3
2502	and	r2,r2,r5
2503	eor	r3,r0,r5,ror#19
2504	eor	r0,r9,r9,ror#11
2505	eor	r2,r2,r7
2506	add	r8,r8,r3,ror#6
2507	eor	r3,r9,r10
2508	eor	r0,r0,r9,ror#20
2509	add	r8,r8,r2
2510	ldr	r2,[sp,#48]
2511	and	r12,r12,r3
2512	add	r4,r4,r8
2513	add	r8,r8,r0,ror#2
2514	eor	r12,r12,r10
2515	vst1.32	{q8},[r1,:128]!
2516	add	r7,r7,r2
2517	eor	r2,r5,r6
2518	eor	r0,r4,r4,ror#5
2519	add	r8,r8,r12
2520	vld1.32	{q8},[r14,:128]!
2521	and	r2,r2,r4
2522	eor	r12,r0,r4,ror#19
2523	eor	r0,r8,r8,ror#11
2524	eor	r2,r2,r6
2525	vrev32.8	q3,q3
2526	add	r7,r7,r12,ror#6
2527	eor	r12,r8,r9
2528	eor	r0,r0,r8,ror#20
2529	add	r7,r7,r2
2530	vadd.i32	q8,q8,q3
2531	ldr	r2,[sp,#52]
2532	and	r3,r3,r12
2533	add	r11,r11,r7
2534	add	r7,r7,r0,ror#2
2535	eor	r3,r3,r9
2536	add	r6,r6,r2
2537	eor	r2,r4,r5
2538	eor	r0,r11,r11,ror#5
2539	add	r7,r7,r3
2540	and	r2,r2,r11
2541	eor	r3,r0,r11,ror#19
2542	eor	r0,r7,r7,ror#11
2543	eor	r2,r2,r5
2544	add	r6,r6,r3,ror#6
2545	eor	r3,r7,r8
2546	eor	r0,r0,r7,ror#20
2547	add	r6,r6,r2
2548	ldr	r2,[sp,#56]
2549	and	r12,r12,r3
2550	add	r10,r10,r6
2551	add	r6,r6,r0,ror#2
2552	eor	r12,r12,r8
2553	add	r5,r5,r2
2554	eor	r2,r11,r4
2555	eor	r0,r10,r10,ror#5
2556	add	r6,r6,r12
2557	and	r2,r2,r10
2558	eor	r12,r0,r10,ror#19
2559	eor	r0,r6,r6,ror#11
2560	eor	r2,r2,r4
2561	add	r5,r5,r12,ror#6
2562	eor	r12,r6,r7
2563	eor	r0,r0,r6,ror#20
2564	add	r5,r5,r2
2565	ldr	r2,[sp,#60]
2566	and	r3,r3,r12
2567	add	r9,r9,r5
2568	add	r5,r5,r0,ror#2
2569	eor	r3,r3,r7
2570	add	r4,r4,r2
2571	eor	r2,r10,r11
2572	eor	r0,r9,r9,ror#5
2573	add	r5,r5,r3
2574	and	r2,r2,r9
2575	eor	r3,r0,r9,ror#19
2576	eor	r0,r5,r5,ror#11
2577	eor	r2,r2,r11
2578	add	r4,r4,r3,ror#6
2579	eor	r3,r5,r6
2580	eor	r0,r0,r5,ror#20
2581	add	r4,r4,r2
2582	ldr	r2,[sp,#64]
2583	and	r12,r12,r3
2584	add	r8,r8,r4
2585	add	r4,r4,r0,ror#2
2586	eor	r12,r12,r6
2587	vst1.32	{q8},[r1,:128]!
2588	ldr	r0,[r2,#0]
2589	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2590	ldr	r12,[r2,#4]
2591	ldr	r3,[r2,#8]
2592	ldr	r1,[r2,#12]
2593	add	r4,r4,r0			@ accumulate
2594	ldr	r0,[r2,#16]
2595	add	r5,r5,r12
2596	ldr	r12,[r2,#20]
2597	add	r6,r6,r3
2598	ldr	r3,[r2,#24]
2599	add	r7,r7,r1
2600	ldr	r1,[r2,#28]
2601	add	r8,r8,r0
2602	str	r4,[r2],#4
2603	add	r9,r9,r12
2604	str	r5,[r2],#4
2605	add	r10,r10,r3
2606	str	r6,[r2],#4
2607	add	r11,r11,r1
2608	str	r7,[r2],#4
2609	stmia	r2,{r8-r11}
2610
2611	ittte	ne
2612	movne	r1,sp
2613	ldrne	r2,[sp,#0]
2614	eorne	r12,r12,r12
2615	ldreq	sp,[sp,#76]			@ restore original sp
2616	itt	ne
2617	eorne	r3,r5,r6
2618	bne	.L_00_48
2619
2620	ldmia	sp!,{r4-r12,pc}
2621.size	zfs_sha256_block_neon,.-zfs_sha256_block_neon
2622
2623# if defined(__thumb2__)
2624#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2625# else
2626#  define INST(a,b,c,d)	.byte	a,b,c,d
2627# endif
2628
2629.globl	zfs_sha256_block_armv8
2630.type	zfs_sha256_block_armv8,%function
2631.align	5
2632zfs_sha256_block_armv8:
2633.LARMv8:
2634	vld1.32	{q0,q1},[r0]
2635	sub	r3,r3,#256+32
2636	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2637	b	.Loop_v8
2638
2639.align	4
2640.Loop_v8:
2641	vld1.8		{q8-q9},[r1]!
2642	vld1.8		{q10-q11},[r1]!
2643	vld1.32		{q12},[r3]!
2644	vrev32.8	q8,q8
2645	vrev32.8	q9,q9
2646	vrev32.8	q10,q10
2647	vrev32.8	q11,q11
2648	vmov		q14,q0	@ offload
2649	vmov		q15,q1
2650	teq		r1,r2
2651	vld1.32		{q13},[r3]!
2652	vadd.i32	q12,q12,q8
2653	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2654	vmov		q2,q0
2655	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2656	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2657	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2658	vld1.32		{q12},[r3]!
2659	vadd.i32	q13,q13,q9
2660	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2661	vmov		q2,q0
2662	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2663	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2664	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2665	vld1.32		{q13},[r3]!
2666	vadd.i32	q12,q12,q10
2667	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2668	vmov		q2,q0
2669	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2670	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2671	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2672	vld1.32		{q12},[r3]!
2673	vadd.i32	q13,q13,q11
2674	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2675	vmov		q2,q0
2676	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2677	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2678	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2679	vld1.32		{q13},[r3]!
2680	vadd.i32	q12,q12,q8
2681	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2682	vmov		q2,q0
2683	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2684	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2685	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2686	vld1.32		{q12},[r3]!
2687	vadd.i32	q13,q13,q9
2688	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2689	vmov		q2,q0
2690	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2691	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2692	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2693	vld1.32		{q13},[r3]!
2694	vadd.i32	q12,q12,q10
2695	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2696	vmov		q2,q0
2697	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2698	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2699	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2700	vld1.32		{q12},[r3]!
2701	vadd.i32	q13,q13,q11
2702	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2703	vmov		q2,q0
2704	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2705	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2706	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2707	vld1.32		{q13},[r3]!
2708	vadd.i32	q12,q12,q8
2709	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2710	vmov		q2,q0
2711	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2712	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2713	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2714	vld1.32		{q12},[r3]!
2715	vadd.i32	q13,q13,q9
2716	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2717	vmov		q2,q0
2718	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2719	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2720	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2721	vld1.32		{q13},[r3]!
2722	vadd.i32	q12,q12,q10
2723	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2724	vmov		q2,q0
2725	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2726	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2727	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2728	vld1.32		{q12},[r3]!
2729	vadd.i32	q13,q13,q11
2730	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2731	vmov		q2,q0
2732	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2733	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2734	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2735	vld1.32		{q13},[r3]!
2736	vadd.i32	q12,q12,q8
2737	vmov		q2,q0
2738	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2739	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2740
2741	vld1.32		{q12},[r3]!
2742	vadd.i32	q13,q13,q9
2743	vmov		q2,q0
2744	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2745	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2746
2747	vld1.32		{q13},[r3]
2748	vadd.i32	q12,q12,q10
2749	sub		r3,r3,#256-16	@ rewind
2750	vmov		q2,q0
2751	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2752	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2753
2754	vadd.i32	q13,q13,q11
2755	vmov		q2,q0
2756	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2757	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2758
2759	vadd.i32	q0,q0,q14
2760	vadd.i32	q1,q1,q15
2761	it		ne
2762	bne		.Loop_v8
2763
2764	vst1.32		{q0,q1},[r0]
2765
2766	bx	lr		@ bx lr
2767.size	zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
2768
2769#endif
2770